mirror of https://github.com/PCSX2/pcsx2.git
1778 lines
42 KiB
C++
1778 lines
42 KiB
C++
// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team
|
|
// SPDX-License-Identifier: GPL-3.0+
|
|
|
|
#include "iR3000A.h"
|
|
#include "R3000A.h"
|
|
#include "BaseblockEx.h"
|
|
#include "R5900OpcodeTables.h"
|
|
#include "IopBios.h"
|
|
#include "IopHw.h"
|
|
#include "Common.h"
|
|
#include "VMManager.h"
|
|
|
|
#include <time.h>
|
|
|
|
#ifndef _WIN32
|
|
#include <sys/types.h>
|
|
#endif
|
|
|
|
#include "iCore.h"
|
|
|
|
#include "Config.h"
|
|
|
|
#include "common/AlignedMalloc.h"
|
|
#include "common/FileSystem.h"
|
|
#include "common/Path.h"
|
|
#include "common/Perf.h"
|
|
#include "DebugTools/Breakpoints.h"
|
|
|
|
#include "fmt/core.h"
|
|
|
|
// #define DUMP_BLOCKS 1
|
|
// #define TRACE_BLOCKS 1
|
|
|
|
#ifdef DUMP_BLOCKS
|
|
#include "Zydis/Zydis.h"
|
|
#include "Zycore/Format.h"
|
|
#include "Zycore/Status.h"
|
|
#endif
|
|
|
|
#ifdef TRACE_BLOCKS
|
|
#include <zlib.h>
|
|
#endif
|
|
|
|
using namespace x86Emitter;
|
|
|
|
extern void psxBREAK();
|
|
|
|
u32 g_psxMaxRecMem = 0;
|
|
|
|
uptr psxRecLUT[0x10000];
|
|
u32 psxhwLUT[0x10000];
|
|
|
|
static __fi u32 HWADDR(u32 mem) { return psxhwLUT[mem >> 16] + mem; }
|
|
|
|
static BASEBLOCK* recRAM = nullptr; // and the ptr to the blocks here
|
|
static BASEBLOCK* recROM = nullptr; // and here
|
|
static BASEBLOCK* recROM1 = nullptr; // also here
|
|
static BASEBLOCK* recROM2 = nullptr; // also here
|
|
static BaseBlocks recBlocks;
|
|
static u8* recPtr = nullptr;
|
|
static u8* recPtrEnd = nullptr;
|
|
u32 psxpc; // recompiler psxpc
|
|
int psxbranch; // set for branch
|
|
u32 g_iopCyclePenalty;
|
|
|
|
static EEINST* s_pInstCache = nullptr;
|
|
static u32 s_nInstCacheSize = 0;
|
|
|
|
static BASEBLOCK* s_pCurBlock = nullptr;
|
|
static BASEBLOCKEX* s_pCurBlockEx = nullptr;
|
|
|
|
static u32 s_nEndBlock = 0; // what psxpc the current block ends
|
|
static u32 s_branchTo;
|
|
static bool s_nBlockFF;
|
|
|
|
static u32 s_saveConstRegs[32];
|
|
static u32 s_saveHasConstReg = 0, s_saveFlushedConstReg = 0;
|
|
static EEINST* s_psaveInstInfo = nullptr;
|
|
|
|
u32 s_psxBlockCycles = 0; // cycles of current block recompiling
|
|
static u32 s_savenBlockCycles = 0;
|
|
static bool s_recompilingDelaySlot = false;
|
|
|
|
static void iPsxBranchTest(u32 newpc, u32 cpuBranch);
|
|
void psxRecompileNextInstruction(int delayslot);
|
|
|
|
extern void (*rpsxBSC[64])();
|
|
void rpsxpropBSC(EEINST* prev, EEINST* pinst);
|
|
|
|
static void iopClearRecLUT(BASEBLOCK* base, int count);
|
|
|
|
#define PSX_GETBLOCK(x) PC_GETBLOCK_(x, psxRecLUT)
|
|
|
|
#define PSXREC_CLEARM(mem) \
|
|
(((mem) < g_psxMaxRecMem && (psxRecLUT[(mem) >> 16] + (mem))) ? \
|
|
psxRecClearMem(mem) : \
|
|
4)
|
|
|
|
#ifdef DUMP_BLOCKS
|
|
static ZydisFormatterFunc s_old_print_address;
|
|
|
|
static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* formatter,
|
|
ZydisFormatterBuffer* buffer, ZydisFormatterContext* context)
|
|
{
|
|
ZyanU64 address;
|
|
ZYAN_CHECK(ZydisCalcAbsoluteAddress(context->instruction, context->operand,
|
|
context->runtime_address, &address));
|
|
|
|
char buf[128];
|
|
u32 len = 0;
|
|
|
|
#define A(x) ((u64)(x))
|
|
|
|
if (address >= A(iopMem->Main) && address < A(iopMem->P))
|
|
{
|
|
len = snprintf(buf, sizeof(buf), "iopMem+0x%08X", static_cast<u32>(address - A(iopMem->Main)));
|
|
}
|
|
else if (address >= A(&psxRegs.GPR) && address < A(&psxRegs.CP0))
|
|
{
|
|
len = snprintf(buf, sizeof(buf), "psxRegs.GPR.%s", R3000A::disRNameGPR[static_cast<u32>(address - A(&psxRegs)) / 4u]);
|
|
}
|
|
else if (address == A(&psxRegs.pc))
|
|
{
|
|
len = snprintf(buf, sizeof(buf), "psxRegs.pc");
|
|
}
|
|
else if (address == A(&psxRegs.cycle))
|
|
{
|
|
len = snprintf(buf, sizeof(buf), "psxRegs.cycle");
|
|
}
|
|
else if (address == A(&g_nextEventCycle))
|
|
{
|
|
len = snprintf(buf, sizeof(buf), "g_nextEventCycle");
|
|
}
|
|
|
|
#undef A
|
|
|
|
if (len > 0)
|
|
{
|
|
ZYAN_CHECK(ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL));
|
|
ZyanString* string;
|
|
ZYAN_CHECK(ZydisFormatterBufferGetString(buffer, &string));
|
|
return ZyanStringAppendFormat(string, "&%s", buf);
|
|
}
|
|
|
|
return s_old_print_address(formatter, buffer, context);
|
|
}
|
|
#endif
|
|
|
|
// =====================================================================================================
|
|
// Dynamically Compiled Dispatchers - R3000A style
|
|
// =====================================================================================================
|
|
|
|
static void iopRecRecompile(u32 startpc);
|
|
|
|
static const void* iopDispatcherEvent = nullptr;
|
|
static const void* iopDispatcherReg = nullptr;
|
|
static const void* iopJITCompile = nullptr;
|
|
static const void* iopJITCompileInBlock = nullptr;
|
|
static const void* iopEnterRecompiledCode = nullptr;
|
|
static const void* iopExitRecompiledCode = nullptr;
|
|
|
|
static void recEventTest()
|
|
{
|
|
_cpuEventTest_Shared();
|
|
}
|
|
|
|
// The address for all cleared blocks. It recompiles the current pc and then
|
|
// dispatches to the recompiled block address.
|
|
static const void* _DynGen_JITCompile()
|
|
{
|
|
pxAssertMsg(iopDispatcherReg != NULL, "Please compile the DispatcherReg subroutine *before* JITComple. Thanks.");
|
|
|
|
u8* retval = xGetPtr();
|
|
|
|
xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc]);
|
|
|
|
xMOV(eax, ptr[&psxRegs.pc]);
|
|
xMOV(ebx, eax);
|
|
xSHR(eax, 16);
|
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
|
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
|
|
|
return retval;
|
|
}
|
|
|
|
static const void* _DynGen_JITCompileInBlock()
|
|
{
|
|
u8* retval = xGetPtr();
|
|
xJMP((void*)iopJITCompile);
|
|
return retval;
|
|
}
|
|
|
|
// called when jumping to variable pc address
|
|
static const void* _DynGen_DispatcherReg()
|
|
{
|
|
u8* retval = xGetPtr();
|
|
|
|
xMOV(eax, ptr[&psxRegs.pc]);
|
|
xMOV(ebx, eax);
|
|
xSHR(eax, 16);
|
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
|
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
|
|
|
return retval;
|
|
}
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
// EnterRecompiledCode - dynamic compilation stub!
|
|
// --------------------------------------------------------------------------------------
|
|
static const void* _DynGen_EnterRecompiledCode()
|
|
{
|
|
// Optimization: The IOP never uses stack-based parameter invocation, so we can avoid
|
|
// allocating any room on the stack for it (which is important since the IOP's entry
|
|
// code gets invoked quite a lot).
|
|
|
|
u8* retval = xGetPtr();
|
|
|
|
{ // Properly scope the frame prologue/epilogue
|
|
#ifdef ENABLE_VTUNE
|
|
xScopedStackFrame frame(true, true);
|
|
#else
|
|
xScopedStackFrame frame(false, true);
|
|
#endif
|
|
|
|
xJMP((void*)iopDispatcherReg);
|
|
|
|
// Save an exit point
|
|
iopExitRecompiledCode = xGetPtr();
|
|
}
|
|
|
|
xRET();
|
|
|
|
return retval;
|
|
}
|
|
|
|
static void _DynGen_Dispatchers()
|
|
{
|
|
const u8* start = xGetAlignedCallTarget();
|
|
|
|
// Place the EventTest and DispatcherReg stuff at the top, because they get called the
|
|
// most and stand to benefit from strong alignment and direct referencing.
|
|
iopDispatcherEvent = xGetPtr();
|
|
xFastCall((void*)recEventTest);
|
|
iopDispatcherReg = _DynGen_DispatcherReg();
|
|
|
|
iopJITCompile = _DynGen_JITCompile();
|
|
iopJITCompileInBlock = _DynGen_JITCompileInBlock();
|
|
iopEnterRecompiledCode = _DynGen_EnterRecompiledCode();
|
|
|
|
recBlocks.SetJITCompile(iopJITCompile);
|
|
|
|
Perf::any.Register(start, xGetPtr() - start, "IOP Dispatcher");
|
|
}
|
|
|
|
////////////////////////////////////////////////////
|
|
using namespace R3000A;
|
|
|
|
void _psxFlushConstReg(int reg)
|
|
{
|
|
if (PSX_IS_CONST1(reg) && !(g_psxFlushedConstReg & (1 << reg)))
|
|
{
|
|
xMOV(ptr32[&psxRegs.GPR.r[reg]], g_psxConstRegs[reg]);
|
|
g_psxFlushedConstReg |= (1 << reg);
|
|
}
|
|
}
|
|
|
|
void _psxFlushConstRegs()
|
|
{
|
|
// TODO: Combine flushes
|
|
|
|
int i;
|
|
|
|
// flush constants
|
|
|
|
// ignore r0
|
|
for (i = 1; i < 32; ++i)
|
|
{
|
|
if (g_psxHasConstReg & (1 << i))
|
|
{
|
|
|
|
if (!(g_psxFlushedConstReg & (1 << i)))
|
|
{
|
|
xMOV(ptr32[&psxRegs.GPR.r[i]], g_psxConstRegs[i]);
|
|
g_psxFlushedConstReg |= 1 << i;
|
|
}
|
|
|
|
if (g_psxHasConstReg == g_psxFlushedConstReg)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void _psxDeleteReg(int reg, int flush)
|
|
{
|
|
if (!reg)
|
|
return;
|
|
if (flush && PSX_IS_CONST1(reg))
|
|
_psxFlushConstReg(reg);
|
|
|
|
PSX_DEL_CONST(reg);
|
|
_deletePSXtoX86reg(reg, flush ? DELETE_REG_FREE : DELETE_REG_FREE_NO_WRITEBACK);
|
|
}
|
|
|
|
void _psxMoveGPRtoR(const xRegister32& to, int fromgpr)
|
|
{
|
|
if (PSX_IS_CONST1(fromgpr))
|
|
{
|
|
xMOV(to, g_psxConstRegs[fromgpr]);
|
|
}
|
|
else
|
|
{
|
|
const int reg = EEINST_USEDTEST(fromgpr) ? _allocX86reg(X86TYPE_PSX, fromgpr, MODE_READ) : _checkX86reg(X86TYPE_PSX, fromgpr, MODE_READ);
|
|
if (reg >= 0)
|
|
xMOV(to, xRegister32(reg));
|
|
else
|
|
xMOV(to, ptr[&psxRegs.GPR.r[fromgpr]]);
|
|
}
|
|
}
|
|
|
|
void _psxMoveGPRtoM(uptr to, int fromgpr)
|
|
{
|
|
if (PSX_IS_CONST1(fromgpr))
|
|
{
|
|
xMOV(ptr32[(u32*)(to)], g_psxConstRegs[fromgpr]);
|
|
}
|
|
else
|
|
{
|
|
const int reg = EEINST_USEDTEST(fromgpr) ? _allocX86reg(X86TYPE_PSX, fromgpr, MODE_READ) : _checkX86reg(X86TYPE_PSX, fromgpr, MODE_READ);
|
|
if (reg >= 0)
|
|
{
|
|
xMOV(ptr32[(u32*)(to)], xRegister32(reg));
|
|
}
|
|
else
|
|
{
|
|
xMOV(eax, ptr[&psxRegs.GPR.r[fromgpr]]);
|
|
xMOV(ptr32[(u32*)(to)], eax);
|
|
}
|
|
}
|
|
}
|
|
|
|
void _psxFlushCall(int flushtype)
|
|
{
|
|
// Free registers that are not saved across function calls (x86-32 ABI):
|
|
for (u32 i = 0; i < iREGCNT_GPR; i++)
|
|
{
|
|
if (!x86regs[i].inuse)
|
|
continue;
|
|
|
|
if (xRegisterBase::IsCallerSaved(i) ||
|
|
((flushtype & FLUSH_FREE_NONTEMP_X86) && x86regs[i].type != X86TYPE_TEMP) ||
|
|
((flushtype & FLUSH_FREE_TEMP_X86) && x86regs[i].type == X86TYPE_TEMP))
|
|
{
|
|
_freeX86reg(i);
|
|
}
|
|
}
|
|
|
|
if (flushtype & FLUSH_ALL_X86)
|
|
_flushX86regs();
|
|
|
|
if (flushtype & FLUSH_CONSTANT_REGS)
|
|
_psxFlushConstRegs();
|
|
|
|
if ((flushtype & FLUSH_PC) /*&& !g_cpuFlushedPC*/)
|
|
{
|
|
xMOV(ptr32[&psxRegs.pc], psxpc);
|
|
//g_cpuFlushedPC = true;
|
|
}
|
|
}
|
|
|
|
void _psxFlushAllDirty()
|
|
{
|
|
// TODO: Combine flushes
|
|
for (u32 i = 0; i < 32; ++i)
|
|
{
|
|
if (PSX_IS_CONST1(i))
|
|
_psxFlushConstReg(i);
|
|
}
|
|
|
|
_flushX86regs();
|
|
}
|
|
|
|
void psxSaveBranchState()
|
|
{
|
|
s_savenBlockCycles = s_psxBlockCycles;
|
|
memcpy(s_saveConstRegs, g_psxConstRegs, sizeof(g_psxConstRegs));
|
|
s_saveHasConstReg = g_psxHasConstReg;
|
|
s_saveFlushedConstReg = g_psxFlushedConstReg;
|
|
s_psaveInstInfo = g_pCurInstInfo;
|
|
|
|
// save all regs
|
|
memcpy(s_saveX86regs, x86regs, sizeof(x86regs));
|
|
}
|
|
|
|
void psxLoadBranchState()
|
|
{
|
|
s_psxBlockCycles = s_savenBlockCycles;
|
|
|
|
memcpy(g_psxConstRegs, s_saveConstRegs, sizeof(g_psxConstRegs));
|
|
g_psxHasConstReg = s_saveHasConstReg;
|
|
g_psxFlushedConstReg = s_saveFlushedConstReg;
|
|
g_pCurInstInfo = s_psaveInstInfo;
|
|
|
|
// restore all regs
|
|
memcpy(x86regs, s_saveX86regs, sizeof(x86regs));
|
|
}
|
|
|
|
////////////////////
|
|
// Code Templates //
|
|
////////////////////
|
|
|
|
void _psxOnWriteReg(int reg)
|
|
{
|
|
PSX_DEL_CONST(reg);
|
|
}
|
|
|
|
bool psxTrySwapDelaySlot(u32 rs, u32 rt, u32 rd)
|
|
{
|
|
#if 1
|
|
if (s_recompilingDelaySlot)
|
|
return false;
|
|
|
|
const u32 opcode_encoded = iopMemRead32(psxpc);
|
|
if (opcode_encoded == 0)
|
|
{
|
|
psxRecompileNextInstruction(true, true);
|
|
return true;
|
|
}
|
|
|
|
const u32 opcode_rs = ((opcode_encoded >> 21) & 0x1F);
|
|
const u32 opcode_rt = ((opcode_encoded >> 16) & 0x1F);
|
|
const u32 opcode_rd = ((opcode_encoded >> 11) & 0x1F);
|
|
|
|
switch (opcode_encoded >> 26)
|
|
{
|
|
case 8: // ADDI
|
|
case 9: // ADDIU
|
|
case 10: // SLTI
|
|
case 11: // SLTIU
|
|
case 12: // ANDIU
|
|
case 13: // ORI
|
|
case 14: // XORI
|
|
case 15: // LUI
|
|
case 32: // LB
|
|
case 33: // LH
|
|
case 34: // LWL
|
|
case 35: // LW
|
|
case 36: // LBU
|
|
case 37: // LHU
|
|
case 38: // LWR
|
|
case 39: // LWU
|
|
case 40: // SB
|
|
case 41: // SH
|
|
case 42: // SWL
|
|
case 43: // SW
|
|
case 46: // SWR
|
|
{
|
|
if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
|
|
goto is_unsafe;
|
|
}
|
|
break;
|
|
|
|
case 50: // LWC2
|
|
case 58: // SWC2
|
|
break;
|
|
|
|
case 0: // SPECIAL
|
|
{
|
|
switch (opcode_encoded & 0x3F)
|
|
{
|
|
case 0: // SLL
|
|
case 2: // SRL
|
|
case 3: // SRA
|
|
case 4: // SLLV
|
|
case 6: // SRLV
|
|
case 7: // SRAV
|
|
case 32: // ADD
|
|
case 33: // ADDU
|
|
case 34: // SUB
|
|
case 35: // SUBU
|
|
case 36: // AND
|
|
case 37: // OR
|
|
case 38: // XOR
|
|
case 39: // NOR
|
|
case 42: // SLT
|
|
case 43: // SLTU
|
|
{
|
|
if ((rs != 0 && rs == opcode_rd) || (rt != 0 && rt == opcode_rd) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
|
|
goto is_unsafe;
|
|
}
|
|
break;
|
|
|
|
case 15: // SYNC
|
|
case 24: // MULT
|
|
case 25: // MULTU
|
|
case 26: // DIV
|
|
case 27: // DIVU
|
|
break;
|
|
|
|
default:
|
|
goto is_unsafe;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 16: // COP0
|
|
case 17: // COP1
|
|
case 18: // COP2
|
|
case 19: // COP3
|
|
{
|
|
switch ((opcode_encoded >> 21) & 0x1F)
|
|
{
|
|
case 0: // MFC0
|
|
case 2: // CFC0
|
|
{
|
|
if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && rd == opcode_rt))
|
|
goto is_unsafe;
|
|
}
|
|
break;
|
|
|
|
case 4: // MTC0
|
|
case 6: // CTC0
|
|
break;
|
|
|
|
default:
|
|
{
|
|
// swap when it's GTE
|
|
if ((opcode_encoded >> 26) != 18)
|
|
goto is_unsafe;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
goto is_unsafe;
|
|
}
|
|
|
|
RALOG("Swapping delay slot %08X %s\n", psxpc, disR3000AF(iopMemRead32(psxpc), psxpc));
|
|
psxRecompileNextInstruction(true, true);
|
|
return true;
|
|
|
|
is_unsafe:
|
|
RALOG("NOT SWAPPING delay slot %08X %s\n", psxpc, disR3000AF(iopMemRead32(psxpc), psxpc));
|
|
return false;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
int psxTryRenameReg(int to, int from, int fromx86, int other, int xmminfo)
|
|
{
|
|
// can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt
|
|
if ((xmminfo & XMMINFO_NORENAME) || fromx86 < 0 || to == from || to == other || !EEINST_RENAMETEST(from))
|
|
return -1;
|
|
|
|
RALOG("Renaming %s to %s\n", R3000A::disRNameGPR[from], R3000A::disRNameGPR[to]);
|
|
|
|
// flush back when it's been modified
|
|
if (x86regs[fromx86].mode & MODE_WRITE && EEINST_LIVETEST(from))
|
|
_writebackX86Reg(fromx86);
|
|
|
|
// remove all references to renamed-to register
|
|
_deletePSXtoX86reg(to, DELETE_REG_FREE_NO_WRITEBACK);
|
|
PSX_DEL_CONST(to);
|
|
|
|
// and do the actual rename, new register has been modified.
|
|
x86regs[fromx86].reg = to;
|
|
x86regs[fromx86].mode |= MODE_READ | MODE_WRITE;
|
|
return fromx86;
|
|
}
|
|
|
|
// rd = rs op rt
|
|
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
|
|
{
|
|
if (!_Rd_)
|
|
return;
|
|
|
|
if (PSX_IS_CONST2(_Rs_, _Rt_))
|
|
{
|
|
_deletePSXtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
|
|
PSX_SET_CONST(_Rd_);
|
|
constcode();
|
|
return;
|
|
}
|
|
|
|
// we have to put these up here, because the register allocator below will wipe out const flags
|
|
// for the destination register when/if it switches it to write mode.
|
|
const bool s_is_const = PSX_IS_CONST1(_Rs_);
|
|
const bool t_is_const = PSX_IS_CONST1(_Rt_);
|
|
const bool d_is_const = PSX_IS_CONST1(_Rd_);
|
|
const bool s_is_used = EEINST_USEDTEST(_Rs_);
|
|
const bool t_is_used = EEINST_USEDTEST(_Rt_);
|
|
|
|
if (!s_is_const)
|
|
_addNeededGPRtoX86reg(_Rs_);
|
|
if (!t_is_const)
|
|
_addNeededGPRtoX86reg(_Rt_);
|
|
if (!d_is_const)
|
|
_addNeededGPRtoX86reg(_Rd_);
|
|
|
|
u32 info = 0;
|
|
int regs = _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
|
if (regs < 0 && ((!s_is_const && s_is_used) || _Rs_ == _Rd_))
|
|
regs = _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
|
if (regs >= 0)
|
|
info |= PROCESS_EE_SET_S(regs);
|
|
|
|
int regt = _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
|
if (regt < 0 && ((!t_is_const && t_is_used) || _Rt_ == _Rd_))
|
|
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
|
if (regt >= 0)
|
|
info |= PROCESS_EE_SET_T(regt);
|
|
|
|
// If S is no longer live, swap D for S. Saves the move.
|
|
int regd = psxTryRenameReg(_Rd_, _Rs_, regs, _Rt_, xmminfo);
|
|
if (regd < 0)
|
|
{
|
|
// TODO: If not live, write direct to memory.
|
|
regd = _allocX86reg(X86TYPE_PSX, _Rd_, MODE_WRITE);
|
|
}
|
|
if (regd >= 0)
|
|
info |= PROCESS_EE_SET_D(regd);
|
|
|
|
_validateRegs();
|
|
|
|
if (s_is_const && regs < 0)
|
|
{
|
|
// This *must* go inside the if, because of when _Rs_ = _Rd_
|
|
PSX_DEL_CONST(_Rd_);
|
|
constscode(info /*| PROCESS_CONSTS*/);
|
|
return;
|
|
}
|
|
|
|
if (t_is_const && regt < 0)
|
|
{
|
|
PSX_DEL_CONST(_Rd_);
|
|
consttcode(info /*| PROCESS_CONSTT*/);
|
|
return;
|
|
}
|
|
|
|
PSX_DEL_CONST(_Rd_);
|
|
noconstcode(info);
|
|
}
|
|
|
|
static void psxRecompileIrxImport()
|
|
{
|
|
u32 import_table = irxImportTableAddr(psxpc - 4);
|
|
u16 index = psxRegs.code & 0xffff;
|
|
if (!import_table)
|
|
return;
|
|
|
|
const std::string libname = iopMemReadString(import_table + 12, 8);
|
|
|
|
irxHLE hle = irxImportHLE(libname, index);
|
|
#ifdef PCSX2_DEVBUILD
|
|
const irxDEBUG debug = irxImportDebug(libname, index);
|
|
const char* funcname = irxImportFuncname(libname, index);
|
|
#else
|
|
const irxDEBUG debug = 0;
|
|
const char* funcname = nullptr;
|
|
#endif
|
|
|
|
if (!hle && !debug && (!TraceActive(IOP.Bios) || !funcname))
|
|
return;
|
|
|
|
xMOV(ptr32[&psxRegs.code], psxRegs.code);
|
|
xMOV(ptr32[&psxRegs.pc], psxpc);
|
|
_psxFlushCall(FLUSH_NODESTROY);
|
|
|
|
if (TraceActive(IOP.Bios))
|
|
{
|
|
xMOV64(arg3reg, (uptr)funcname);
|
|
|
|
xFastCall((void*)irxImportLog_rec, import_table, index);
|
|
}
|
|
|
|
if (debug)
|
|
xFastCall((void*)debug);
|
|
|
|
if (hle)
|
|
{
|
|
xFastCall((void*)hle);
|
|
xTEST(eax, eax);
|
|
xJNZ(iopDispatcherReg);
|
|
}
|
|
}
|
|
|
|
// rt = rs op imm16
|
|
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
|
|
{
|
|
if (!_Rt_)
|
|
{
|
|
// check for iop module import table magic
|
|
if (psxRegs.code >> 16 == 0x2400)
|
|
psxRecompileIrxImport();
|
|
return;
|
|
}
|
|
|
|
if (PSX_IS_CONST1(_Rs_))
|
|
{
|
|
_deletePSXtoX86reg(_Rt_, DELETE_REG_FREE_NO_WRITEBACK);
|
|
PSX_SET_CONST(_Rt_);
|
|
constcode();
|
|
return;
|
|
}
|
|
|
|
_addNeededPSXtoX86reg(_Rs_);
|
|
_addNeededPSXtoX86reg(_Rt_);
|
|
|
|
u32 info = 0;
|
|
|
|
const bool s_is_used = EEINST_USEDTEST(_Rs_);
|
|
const int regs = s_is_used ? _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ) : _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
|
if (regs >= 0)
|
|
info |= PROCESS_EE_SET_S(regs);
|
|
|
|
int regt = psxTryRenameReg(_Rt_, _Rs_, regs, 0, xmminfo);
|
|
if (regt < 0)
|
|
{
|
|
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_WRITE);
|
|
}
|
|
if (regt >= 0)
|
|
info |= PROCESS_EE_SET_T(regt);
|
|
|
|
_validateRegs();
|
|
|
|
PSX_DEL_CONST(_Rt_);
|
|
noconstcode(info);
|
|
}
|
|
|
|
// rd = rt op sa
|
|
void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
|
|
{
|
|
if (!_Rd_)
|
|
return;
|
|
|
|
if (PSX_IS_CONST1(_Rt_))
|
|
{
|
|
_deletePSXtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
|
|
PSX_SET_CONST(_Rd_);
|
|
constcode();
|
|
return;
|
|
}
|
|
|
|
_addNeededPSXtoX86reg(_Rt_);
|
|
_addNeededPSXtoX86reg(_Rd_);
|
|
|
|
u32 info = 0;
|
|
const bool s_is_used = EEINST_USEDTEST(_Rt_);
|
|
const int regt = s_is_used ? _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ) : _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
|
if (regt >= 0)
|
|
info |= PROCESS_EE_SET_T(regt);
|
|
|
|
int regd = psxTryRenameReg(_Rd_, _Rt_, regt, 0, xmminfo);
|
|
if (regd < 0)
|
|
{
|
|
regd = _allocX86reg(X86TYPE_PSX, _Rd_, MODE_WRITE);
|
|
}
|
|
if (regd >= 0)
|
|
info |= PROCESS_EE_SET_D(regd);
|
|
|
|
_validateRegs();
|
|
|
|
PSX_DEL_CONST(_Rd_);
|
|
noconstcode(info);
|
|
}
|
|
|
|
// rd = rt MULT rs (SPECIAL)
|
|
void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int LOHI)
|
|
{
|
|
if (PSX_IS_CONST2(_Rs_, _Rt_))
|
|
{
|
|
if (LOHI)
|
|
{
|
|
_deletePSXtoX86reg(PSX_LO, DELETE_REG_FREE_NO_WRITEBACK);
|
|
_deletePSXtoX86reg(PSX_HI, DELETE_REG_FREE_NO_WRITEBACK);
|
|
}
|
|
|
|
constcode();
|
|
return;
|
|
}
|
|
|
|
// we have to put these up here, because the register allocator below will wipe out const flags
|
|
// for the destination register when/if it switches it to write mode.
|
|
const bool s_is_const = PSX_IS_CONST1(_Rs_);
|
|
const bool t_is_const = PSX_IS_CONST1(_Rt_);
|
|
const bool s_is_used = EEINST_USEDTEST(_Rs_);
|
|
const bool t_is_used = EEINST_USEDTEST(_Rt_);
|
|
|
|
if (!s_is_const)
|
|
_addNeededGPRtoX86reg(_Rs_);
|
|
if (!t_is_const)
|
|
_addNeededGPRtoX86reg(_Rt_);
|
|
if (LOHI)
|
|
{
|
|
if (EEINST_LIVETEST(PSX_LO))
|
|
_addNeededPSXtoX86reg(PSX_LO);
|
|
if (EEINST_LIVETEST(PSX_HI))
|
|
_addNeededPSXtoX86reg(PSX_HI);
|
|
}
|
|
|
|
u32 info = 0;
|
|
int regs = _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
|
if (regs < 0 && !s_is_const && s_is_used)
|
|
regs = _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
|
if (regs >= 0)
|
|
info |= PROCESS_EE_SET_S(regs);
|
|
|
|
// need at least one in a register
|
|
int regt = _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
|
if (regs < 0 || (regt < 0 && !t_is_const && t_is_used))
|
|
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
|
if (regt >= 0)
|
|
info |= PROCESS_EE_SET_T(regt);
|
|
|
|
if (LOHI)
|
|
{
|
|
// going to destroy lo/hi, so invalidate if we're writing it back to state
|
|
const bool lo_is_used = EEINST_USEDTEST(PSX_LO);
|
|
const int reglo = lo_is_used ? _allocX86reg(X86TYPE_PSX, PSX_LO, MODE_WRITE) : -1;
|
|
if (reglo >= 0)
|
|
info |= PROCESS_EE_SET_LO(reglo) | PROCESS_EE_LO;
|
|
else
|
|
_deletePSXtoX86reg(PSX_LO, DELETE_REG_FREE_NO_WRITEBACK);
|
|
|
|
const bool hi_is_live = EEINST_USEDTEST(PSX_HI);
|
|
const int reghi = hi_is_live ? _allocX86reg(X86TYPE_PSX, PSX_HI, MODE_WRITE) : -1;
|
|
if (reghi >= 0)
|
|
info |= PROCESS_EE_SET_HI(reghi) | PROCESS_EE_HI;
|
|
else
|
|
_deletePSXtoX86reg(PSX_HI, DELETE_REG_FREE_NO_WRITEBACK);
|
|
}
|
|
|
|
_validateRegs();
|
|
|
|
if (s_is_const && regs < 0)
|
|
{
|
|
// This *must* go inside the if, because of when _Rs_ = _Rd_
|
|
constscode(info /*| PROCESS_CONSTS*/);
|
|
return;
|
|
}
|
|
|
|
if (t_is_const && regt < 0)
|
|
{
|
|
consttcode(info /*| PROCESS_CONSTT*/);
|
|
return;
|
|
}
|
|
|
|
noconstcode(info);
|
|
}
|
|
|
|
static u8* m_recBlockAlloc = NULL;
|
|
|
|
static const uint m_recBlockAllocSize =
|
|
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2) / 4) * sizeof(BASEBLOCK));
|
|
|
|
static void recReserve()
|
|
{
|
|
recPtr = SysMemory::GetIOPRec();
|
|
recPtrEnd = SysMemory::GetIOPRecEnd() - _64kb;
|
|
|
|
// Goal: Allocate BASEBLOCKs for every possible branch target in IOP memory.
|
|
// Any 4-byte aligned address makes a valid branch target as per MIPS design (all instructions are
|
|
// always 4 bytes long).
|
|
|
|
if (!m_recBlockAlloc)
|
|
{
|
|
// We're on 64-bit, if these memory allocations fail, we're in real trouble.
|
|
m_recBlockAlloc = (u8*)_aligned_malloc(m_recBlockAllocSize, 4096);
|
|
if (!m_recBlockAlloc)
|
|
pxFailRel("Failed to allocate R3000A BASEBLOCK lookup tables");
|
|
}
|
|
|
|
u8* curpos = m_recBlockAlloc;
|
|
recRAM = (BASEBLOCK*)curpos;
|
|
curpos += (Ps2MemSize::IopRam / 4) * sizeof(BASEBLOCK);
|
|
recROM = (BASEBLOCK*)curpos;
|
|
curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK);
|
|
recROM1 = (BASEBLOCK*)curpos;
|
|
curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK);
|
|
recROM2 = (BASEBLOCK*)curpos;
|
|
curpos += (Ps2MemSize::Rom2 / 4) * sizeof(BASEBLOCK);
|
|
|
|
pxAssertRel(!s_pInstCache, "InstCache not allocated");
|
|
s_nInstCacheSize = 128;
|
|
s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize);
|
|
if (!s_pInstCache)
|
|
pxFailRel("Failed to allocate R3000 InstCache array.");
|
|
}
|
|
|
|
void recResetIOP()
|
|
{
|
|
DevCon.WriteLn("iR3000A Recompiler reset.");
|
|
|
|
xSetPtr(SysMemory::GetIOPRec());
|
|
_DynGen_Dispatchers();
|
|
recPtr = xGetPtr();
|
|
|
|
iopClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
|
|
(((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1 + Ps2MemSize::Rom2) / 4)));
|
|
|
|
for (int i = 0; i < 0x10000; i++)
|
|
recLUT_SetPage(psxRecLUT, 0, 0, 0, i, 0);
|
|
|
|
// IOP knows 64k pages, hence for the 0x10000's
|
|
|
|
// The bottom 2 bits of PC are always zero, so we <<14 to "compress"
|
|
// the pc indexer into it's lower common denominator.
|
|
|
|
// We're only mapping 20 pages here in 4 places.
|
|
// 0x80 comes from : (Ps2MemSize::IopRam / 0x10000) * 4
|
|
|
|
for (int i = 0; i < 0x80; i++)
|
|
{
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0x0000, i, i & 0x1f);
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0x8000, i, i & 0x1f);
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recRAM, 0xa000, i, i & 0x1f);
|
|
}
|
|
|
|
for (int i = 0x1fc0; i < 0x2000; i++)
|
|
{
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0x0000, i, i - 0x1fc0);
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0x8000, i, i - 0x1fc0);
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM, 0xa000, i, i - 0x1fc0);
|
|
}
|
|
|
|
for (int i = 0x1e00; i < 0x1e40; i++)
|
|
{
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM1, 0x0000, i, i - 0x1e00);
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM1, 0x8000, i, i - 0x1e00);
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM1, 0xa000, i, i - 0x1e00);
|
|
}
|
|
|
|
for (int i = 0x1e40; i < 0x1e48; i++)
|
|
{
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM2, 0x0000, i, i - 0x1e40);
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM2, 0x8000, i, i - 0x1e40);
|
|
recLUT_SetPage(psxRecLUT, psxhwLUT, recROM2, 0xa000, i, i - 0x1e40);
|
|
}
|
|
|
|
if (s_pInstCache)
|
|
memset(s_pInstCache, 0, sizeof(EEINST) * s_nInstCacheSize);
|
|
|
|
recBlocks.Reset();
|
|
g_psxMaxRecMem = 0;
|
|
|
|
psxbranch = 0;
|
|
}
|
|
|
|
static void recShutdown()
|
|
{
|
|
safe_aligned_free(m_recBlockAlloc);
|
|
|
|
safe_free(s_pInstCache);
|
|
s_nInstCacheSize = 0;
|
|
|
|
recPtr = nullptr;
|
|
recPtrEnd = nullptr;
|
|
}
|
|
|
|
static void iopClearRecLUT(BASEBLOCK* base, int count)
|
|
{
|
|
for (int i = 0; i < count; i++)
|
|
base[i].SetFnptr((uptr)iopJITCompile);
|
|
}
|
|
|
|
static __noinline s32 recExecuteBlock(s32 eeCycles)
|
|
{
|
|
psxRegs.iopBreak = 0;
|
|
psxRegs.iopCycleEE = eeCycles;
|
|
|
|
#ifdef PCSX2_DEVBUILD
|
|
//if (SysTrace.SIF.IsActive())
|
|
// SysTrace.IOP.R3000A.Write("Switching to IOP CPU for %d cycles", eeCycles);
|
|
#endif
|
|
|
|
// [TODO] recExecuteBlock could be replaced by a direct call to the iopEnterRecompiledCode()
|
|
// (by assigning its address to the psxRec structure). But for that to happen, we need
|
|
// to move iopBreak/iopCycleEE update code to emitted assembly code. >_< --air
|
|
|
|
// Likely Disasm, as borrowed from MSVC:
|
|
|
|
// Entry:
|
|
// mov eax,dword ptr [esp+4]
|
|
// mov dword ptr [iopBreak (0E88DCCh)],0
|
|
// mov dword ptr [iopCycleEE (832A84h)],eax
|
|
|
|
// Exit:
|
|
// mov ecx,dword ptr [iopBreak (0E88DCCh)]
|
|
// mov edx,dword ptr [iopCycleEE (832A84h)]
|
|
// lea eax,[edx+ecx]
|
|
|
|
((void(*)())iopEnterRecompiledCode)();
|
|
|
|
return psxRegs.iopBreak + psxRegs.iopCycleEE;
|
|
}
|
|
|
|
// Returns the offset to the next instruction after any cleared memory
|
|
static __fi u32 psxRecClearMem(u32 pc)
|
|
{
|
|
BASEBLOCK* pblock;
|
|
|
|
pblock = PSX_GETBLOCK(pc);
|
|
// if ((u8*)iopJITCompile == pblock->GetFnptr())
|
|
if (pblock->GetFnptr() == (uptr)iopJITCompile)
|
|
return 4;
|
|
|
|
pc = HWADDR(pc);
|
|
|
|
u32 lowerextent = pc, upperextent = pc + 4;
|
|
int blockidx = recBlocks.Index(pc);
|
|
pxAssert(blockidx != -1);
|
|
|
|
while (BASEBLOCKEX* pexblock = recBlocks[blockidx - 1])
|
|
{
|
|
if (pexblock->startpc + pexblock->size * 4 <= lowerextent)
|
|
break;
|
|
|
|
lowerextent = std::min(lowerextent, pexblock->startpc);
|
|
blockidx--;
|
|
}
|
|
|
|
int toRemoveFirst = blockidx;
|
|
|
|
while (BASEBLOCKEX* pexblock = recBlocks[blockidx])
|
|
{
|
|
if (pexblock->startpc >= upperextent)
|
|
break;
|
|
|
|
lowerextent = std::min(lowerextent, pexblock->startpc);
|
|
upperextent = std::max(upperextent, pexblock->startpc + pexblock->size * 4);
|
|
|
|
blockidx++;
|
|
}
|
|
|
|
if (toRemoveFirst != blockidx)
|
|
{
|
|
recBlocks.Remove(toRemoveFirst, (blockidx - 1));
|
|
}
|
|
|
|
blockidx = 0;
|
|
while (BASEBLOCKEX* pexblock = recBlocks[blockidx++])
|
|
{
|
|
if (pc >= pexblock->startpc && pc < pexblock->startpc + pexblock->size * 4) [[unlikely]]
|
|
{
|
|
DevCon.Error("[IOP] Impossible block clearing failure");
|
|
pxFail("[IOP] Impossible block clearing failure");
|
|
}
|
|
}
|
|
|
|
iopClearRecLUT(PSX_GETBLOCK(lowerextent), (upperextent - lowerextent) / 4);
|
|
|
|
return upperextent - pc;
|
|
}
|
|
|
|
static __fi void recClearIOP(u32 Addr, u32 Size)
|
|
{
|
|
u32 pc = Addr;
|
|
while (pc < Addr + Size * 4)
|
|
pc += PSXREC_CLEARM(pc);
|
|
}
|
|
|
|
void psxSetBranchReg(u32 reg)
|
|
{
|
|
psxbranch = 1;
|
|
|
|
if (reg != 0xffffffff)
|
|
{
|
|
const bool swap = psxTrySwapDelaySlot(reg, 0, 0);
|
|
|
|
if (!swap)
|
|
{
|
|
const int wbreg = _allocX86reg(X86TYPE_PCWRITEBACK, 0, MODE_WRITE | MODE_CALLEESAVED);
|
|
_psxMoveGPRtoR(xRegister32(wbreg), reg);
|
|
|
|
psxRecompileNextInstruction(true, false);
|
|
|
|
if (x86regs[wbreg].inuse && x86regs[wbreg].type == X86TYPE_PCWRITEBACK)
|
|
{
|
|
xMOV(ptr32[&psxRegs.pc], xRegister32(wbreg));
|
|
x86regs[wbreg].inuse = 0;
|
|
}
|
|
else
|
|
{
|
|
xMOV(eax, ptr32[&psxRegs.pcWriteback]);
|
|
xMOV(ptr32[&psxRegs.pc], eax);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (PSX_IS_DIRTY_CONST(reg) || _hasX86reg(X86TYPE_PSX, reg, 0))
|
|
{
|
|
const int x86reg = _allocX86reg(X86TYPE_PSX, reg, MODE_READ);
|
|
xMOV(ptr32[&psxRegs.pc], xRegister32(x86reg));
|
|
}
|
|
else
|
|
{
|
|
_psxMoveGPRtoM((uptr)&psxRegs.pc, reg);
|
|
}
|
|
}
|
|
}
|
|
|
|
_psxFlushCall(FLUSH_EVERYTHING);
|
|
iPsxBranchTest(0xffffffff, 1);
|
|
|
|
JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5));
|
|
}
|
|
|
|
void psxSetBranchImm(u32 imm)
|
|
{
|
|
psxbranch = 1;
|
|
pxAssert(imm);
|
|
|
|
// end the current block
|
|
xMOV(ptr32[&psxRegs.pc], imm);
|
|
_psxFlushCall(FLUSH_EVERYTHING);
|
|
iPsxBranchTest(imm, imm <= psxpc);
|
|
|
|
recBlocks.Link(HWADDR(imm), xJcc32());
|
|
}
|
|
|
|
static __fi u32 psxScaleBlockCycles()
|
|
{
|
|
return s_psxBlockCycles;
|
|
}
|
|
|
|
static void iPsxAddEECycles(u32 blockCycles)
|
|
{
|
|
if (!(psxHu32(HW_ICFG) & (1 << 3))) [[likely]]
|
|
{
|
|
if (blockCycles != 0xFFFFFFFF)
|
|
xSUB(ptr32[&psxRegs.iopCycleEE], blockCycles * 8);
|
|
else
|
|
xSUB(ptr32[&psxRegs.iopCycleEE], eax);
|
|
return;
|
|
}
|
|
|
|
// F = gcd(PS2CLK, PSXCLK) = 230400
|
|
const u32 cnum = 1280; // PS2CLK / F
|
|
const u32 cdenom = 147; // PSXCLK / F
|
|
|
|
if (blockCycles != 0xFFFFFFFF)
|
|
xMOV(eax, blockCycles * cnum);
|
|
xADD(eax, ptr32[&psxRegs.iopCycleEECarry]);
|
|
xMOV(ecx, cdenom);
|
|
xXOR(edx, edx);
|
|
xUDIV(ecx);
|
|
xMOV(ptr32[&psxRegs.iopCycleEECarry], edx);
|
|
xSUB(ptr32[&psxRegs.iopCycleEE], eax);
|
|
}
|
|
|
|
static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
|
|
{
|
|
u32 blockCycles = psxScaleBlockCycles();
|
|
|
|
if (EmuConfig.Speedhacks.WaitLoop && s_nBlockFF && newpc == s_branchTo)
|
|
{
|
|
xMOV(eax, ptr32[&psxRegs.cycle]);
|
|
xMOV(ecx, eax);
|
|
xMOV(edx, ptr32[&psxRegs.iopCycleEE]);
|
|
xADD(edx, 7);
|
|
xSHR(edx, 3);
|
|
xADD(eax, edx);
|
|
xCMP(eax, ptr32[&psxRegs.iopNextEventCycle]);
|
|
xCMOVNS(eax, ptr32[&psxRegs.iopNextEventCycle]);
|
|
xMOV(ptr32[&psxRegs.cycle], eax);
|
|
xSUB(eax, ecx);
|
|
xSHL(eax, 3);
|
|
iPsxAddEECycles(0xFFFFFFFF);
|
|
xJLE(iopExitRecompiledCode);
|
|
|
|
xFastCall((void*)iopEventTest);
|
|
|
|
if (newpc != 0xffffffff)
|
|
{
|
|
xCMP(ptr32[&psxRegs.pc], newpc);
|
|
xJNE(iopDispatcherReg);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
xMOV(ebx, ptr32[&psxRegs.cycle]);
|
|
xADD(ebx, blockCycles);
|
|
xMOV(ptr32[&psxRegs.cycle], ebx); // update cycles
|
|
|
|
// jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE)
|
|
iPsxAddEECycles(blockCycles);
|
|
xJLE(iopExitRecompiledCode);
|
|
|
|
// check if an event is pending
|
|
xSUB(ebx, ptr32[&psxRegs.iopNextEventCycle]);
|
|
xForwardJS<u8> nointerruptpending;
|
|
|
|
xFastCall((void*)iopEventTest);
|
|
|
|
if (newpc != 0xffffffff)
|
|
{
|
|
xCMP(ptr32[&psxRegs.pc], newpc);
|
|
xJNE(iopDispatcherReg);
|
|
}
|
|
|
|
nointerruptpending.SetTarget();
|
|
}
|
|
}
|
|
|
|
#if 0
|
|
//static const int *s_pCode;
|
|
|
|
#if !defined(_MSC_VER)
|
|
static void checkcodefn()
|
|
{
|
|
int pctemp;
|
|
|
|
#ifdef _MSC_VER
|
|
__asm mov pctemp, eax;
|
|
#else
|
|
__asm__ __volatile__("movl %%eax, %[pctemp]" : [pctemp]"m="(pctemp) );
|
|
#endif
|
|
Console.WriteLn("iop code changed! %x", pctemp);
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
void rpsxSYSCALL()
|
|
{
|
|
xMOV(ptr32[&psxRegs.code], psxRegs.code);
|
|
xMOV(ptr32[&psxRegs.pc], psxpc - 4);
|
|
_psxFlushCall(FLUSH_NODESTROY);
|
|
|
|
//xMOV( ecx, 0x20 ); // exception code
|
|
//xMOV( edx, psxbranch==1 ); // branch delay slot?
|
|
xFastCall((void*)psxException, 0x20, psxbranch == 1);
|
|
|
|
xCMP(ptr32[&psxRegs.pc], psxpc - 4);
|
|
j8Ptr[0] = JE8(0);
|
|
|
|
xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles());
|
|
iPsxAddEECycles(psxScaleBlockCycles());
|
|
JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5));
|
|
|
|
// jump target for skipping blockCycle updates
|
|
x86SetJ8(j8Ptr[0]);
|
|
|
|
//if (!psxbranch) psxbranch = 2;
|
|
}
|
|
|
|
void rpsxBREAK()
|
|
{
|
|
xMOV(ptr32[&psxRegs.code], psxRegs.code);
|
|
xMOV(ptr32[&psxRegs.pc], psxpc - 4);
|
|
_psxFlushCall(FLUSH_NODESTROY);
|
|
|
|
//xMOV( ecx, 0x24 ); // exception code
|
|
//xMOV( edx, psxbranch==1 ); // branch delay slot?
|
|
xFastCall((void*)psxException, 0x24, psxbranch == 1);
|
|
|
|
xCMP(ptr32[&psxRegs.pc], psxpc - 4);
|
|
j8Ptr[0] = JE8(0);
|
|
xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles());
|
|
iPsxAddEECycles(psxScaleBlockCycles());
|
|
JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5));
|
|
x86SetJ8(j8Ptr[0]);
|
|
|
|
//if (!psxbranch) psxbranch = 2;
|
|
}
|
|
|
|
static bool psxDynarecCheckBreakpoint()
|
|
{
|
|
u32 pc = psxRegs.pc;
|
|
if (CBreakPoints::CheckSkipFirst(BREAKPOINT_IOP, pc) == pc)
|
|
return false;
|
|
|
|
int bpFlags = psxIsBreakpointNeeded(pc);
|
|
bool hit = false;
|
|
//check breakpoint at current pc
|
|
if (bpFlags & 1)
|
|
{
|
|
auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_IOP, pc);
|
|
if (cond == NULL || cond->Evaluate())
|
|
{
|
|
hit = true;
|
|
}
|
|
}
|
|
//check breakpoint in delay slot
|
|
if (bpFlags & 2)
|
|
{
|
|
auto cond = CBreakPoints::GetBreakPointCondition(BREAKPOINT_IOP, pc + 4);
|
|
if (cond == NULL || cond->Evaluate())
|
|
hit = true;
|
|
}
|
|
|
|
if (!hit)
|
|
return false;
|
|
|
|
CBreakPoints::SetBreakpointTriggered(true, BREAKPOINT_IOP);
|
|
VMManager::SetPaused(true);
|
|
|
|
// Exit the EE too.
|
|
Cpu->ExitExecution();
|
|
return true;
|
|
}
|
|
|
|
static bool psxDynarecMemcheck(size_t i)
|
|
{
|
|
const u32 pc = psxRegs.pc;
|
|
const u32 op = iopMemRead32(pc);
|
|
const R5900::OPCODE& opcode = R5900::GetInstruction(op);
|
|
auto mc = CBreakPoints::GetMemChecks(BREAKPOINT_IOP)[i];
|
|
|
|
if (CBreakPoints::CheckSkipFirst(BREAKPOINT_IOP, pc) == pc)
|
|
return false;
|
|
|
|
if (mc.hasCond)
|
|
{
|
|
if (!mc.cond.Evaluate())
|
|
return false;
|
|
}
|
|
|
|
if (mc.result & MEMCHECK_LOG)
|
|
{
|
|
if (opcode.flags & IS_STORE)
|
|
DevCon.WriteLn("Hit R3000 store breakpoint @0x%x", pc);
|
|
else
|
|
DevCon.WriteLn("Hit R3000 load breakpoint @0x%x", pc);
|
|
}
|
|
|
|
CBreakPoints::SetBreakpointTriggered(true, BREAKPOINT_IOP);
|
|
VMManager::SetPaused(true);
|
|
|
|
// Exit the EE too.
|
|
Cpu->ExitExecution();
|
|
return true;
|
|
}
|
|
|
|
static void psxRecMemcheck(u32 op, u32 bits, bool store)
|
|
{
|
|
_psxFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
|
|
|
|
// compute accessed address
|
|
_psxMoveGPRtoR(ecx, (op >> 21) & 0x1F);
|
|
if ((s16)op != 0)
|
|
xADD(ecx, (s16)op);
|
|
|
|
xMOV(edx, ecx);
|
|
xADD(edx, bits / 8);
|
|
|
|
// ecx = access address
|
|
// edx = access address+size
|
|
|
|
auto checks = CBreakPoints::GetMemChecks(BREAKPOINT_IOP);
|
|
for (size_t i = 0; i < checks.size(); i++)
|
|
{
|
|
if (checks[i].result == 0)
|
|
continue;
|
|
if ((checks[i].memCond & MEMCHECK_WRITE) == 0 && store)
|
|
continue;
|
|
if ((checks[i].memCond & MEMCHECK_READ) == 0 && !store)
|
|
continue;
|
|
|
|
// logic: memAddress < bpEnd && bpStart < memAddress+memSize
|
|
|
|
xMOV(eax, checks[i].end);
|
|
xCMP(ecx, eax); // address < end
|
|
xForwardJGE8 next1; // if address >= end then goto next1
|
|
|
|
xMOV(eax, checks[i].start);
|
|
xCMP(eax, edx); // start < address+size
|
|
xForwardJGE8 next2; // if start >= address+size then goto next2
|
|
|
|
// hit the breakpoint
|
|
|
|
if (checks[i].result & MEMCHECK_BREAK)
|
|
{
|
|
xMOV(eax, i);
|
|
xFastCall((void*)psxDynarecMemcheck, eax);
|
|
xTEST(al, al);
|
|
xJNZ(iopExitRecompiledCode);
|
|
}
|
|
|
|
next1.SetTarget();
|
|
next2.SetTarget();
|
|
}
|
|
}
|
|
|
|
static void psxEncodeBreakpoint()
|
|
{
|
|
if (psxIsBreakpointNeeded(psxpc) != 0)
|
|
{
|
|
_psxFlushCall(FLUSH_EVERYTHING | FLUSH_PC);
|
|
xFastCall((void*)psxDynarecCheckBreakpoint);
|
|
xTEST(al, al);
|
|
xJNZ(iopExitRecompiledCode);
|
|
}
|
|
}
|
|
|
|
static void psxEncodeMemcheck()
|
|
{
|
|
int needed = psxIsMemcheckNeeded(psxpc);
|
|
if (needed == 0)
|
|
return;
|
|
|
|
u32 op = iopMemRead32(needed == 2 ? psxpc + 4 : psxpc);
|
|
const R5900::OPCODE& opcode = R5900::GetInstruction(op);
|
|
|
|
bool store = (opcode.flags & IS_STORE) != 0;
|
|
switch (opcode.flags & MEMTYPE_MASK)
|
|
{
|
|
case MEMTYPE_BYTE:
|
|
psxRecMemcheck(op, 8, store);
|
|
break;
|
|
case MEMTYPE_HALF:
|
|
psxRecMemcheck(op, 16, store);
|
|
break;
|
|
case MEMTYPE_WORD:
|
|
psxRecMemcheck(op, 32, store);
|
|
break;
|
|
case MEMTYPE_DWORD:
|
|
psxRecMemcheck(op, 64, store);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void psxRecompileNextInstruction(bool delayslot, bool swapped_delayslot)
|
|
{
|
|
#ifdef DUMP_BLOCKS
|
|
const bool dump_block = true;
|
|
|
|
const u8* instStart = x86Ptr;
|
|
ZydisDecoder disas_decoder;
|
|
ZydisFormatter disas_formatter;
|
|
ZydisDecodedInstruction disas_instruction;
|
|
|
|
if (dump_block)
|
|
{
|
|
fprintf(stderr, "Compiling %s%s\n", delayslot ? "delay slot " : "", disR3000AF(iopMemRead32(psxpc), psxpc));
|
|
if (!delayslot)
|
|
{
|
|
ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64);
|
|
ZydisFormatterInit(&disas_formatter, ZYDIS_FORMATTER_STYLE_INTEL);
|
|
s_old_print_address = (ZydisFormatterFunc)&ZydisFormatterPrintAddressAbsolute;
|
|
ZydisFormatterSetHook(&disas_formatter, ZYDIS_FORMATTER_FUNC_PRINT_ADDRESS_ABS, (const void**)&s_old_print_address);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
const int old_code = psxRegs.code;
|
|
EEINST* old_inst_info = g_pCurInstInfo;
|
|
s_recompilingDelaySlot = delayslot;
|
|
|
|
// add breakpoint
|
|
if (!delayslot)
|
|
{
|
|
// Broken on x64
|
|
psxEncodeBreakpoint();
|
|
psxEncodeMemcheck();
|
|
}
|
|
else
|
|
{
|
|
_clearNeededX86regs();
|
|
}
|
|
|
|
psxRegs.code = iopMemRead32(psxpc);
|
|
s_psxBlockCycles++;
|
|
psxpc += 4;
|
|
|
|
g_pCurInstInfo++;
|
|
|
|
g_iopCyclePenalty = 0;
|
|
rpsxBSC[psxRegs.code >> 26]();
|
|
s_psxBlockCycles += g_iopCyclePenalty;
|
|
|
|
if (!swapped_delayslot)
|
|
_clearNeededX86regs();
|
|
|
|
if (swapped_delayslot)
|
|
{
|
|
psxRegs.code = old_code;
|
|
g_pCurInstInfo = old_inst_info;
|
|
}
|
|
|
|
#ifdef DUMP_BLOCKS
|
|
if (dump_block && !delayslot)
|
|
{
|
|
const u8* instPtr = instStart;
|
|
ZyanUSize instLength = static_cast<ZyanUSize>(x86Ptr - instStart);
|
|
while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&disas_decoder, instPtr, instLength, &disas_instruction)))
|
|
{
|
|
char buffer[256];
|
|
if (ZYAN_SUCCESS(ZydisFormatterFormatInstruction(&disas_formatter, &disas_instruction, buffer, sizeof(buffer), (ZyanU64)instPtr)))
|
|
std::fprintf(stderr, " %016" PRIX64 " %s\n", (u64)instPtr, buffer);
|
|
|
|
instPtr += disas_instruction.length;
|
|
instLength -= disas_instruction.length;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#ifdef TRACE_BLOCKS
|
|
static void PreBlockCheck(u32 blockpc)
|
|
{
|
|
#if 0
|
|
static FILE* fp = nullptr;
|
|
static bool fp_opened = false;
|
|
if (!fp_opened && psxRegs.cycle >= 0)
|
|
{
|
|
fp = std::fopen("C:\\Dumps\\comp\\ioplog.txt", "wb");
|
|
fp_opened = true;
|
|
}
|
|
if (fp)
|
|
{
|
|
u32 hash = crc32(0, (Bytef*)&psxRegs, offsetof(psxRegisters, pc));
|
|
|
|
#if 1
|
|
std::fprintf(fp, "%08X (%u; %08X):", psxRegs.pc, psxRegs.cycle, hash);
|
|
for (int i = 0; i < 34; i++)
|
|
{
|
|
std::fprintf(fp, " %s: %08X", R3000A::disRNameGPR[i], psxRegs.GPR.r[i]);
|
|
}
|
|
std::fprintf(fp, "\n");
|
|
#else
|
|
std::fprintf(fp, "%08X (%u): %08X\n", psxRegs.pc, psxRegs.cycle, hash);
|
|
#endif
|
|
// std::fflush(fp);
|
|
}
|
|
#endif
|
|
#if 0
|
|
if (psxRegs.cycle == 0)
|
|
__debugbreak();
|
|
#endif
|
|
}
|
|
#endif
|
|
|
|
static void iopRecRecompile(const u32 startpc)
|
|
{
|
|
u32 i;
|
|
u32 willbranch3 = 0;
|
|
|
|
// When upgrading the IOP, there are two resets, the second of which is a 'fake' reset
|
|
// This second 'reset' involves UDNL calling SYSMEM and LOADCORE directly, resetting LOADCORE's modules
|
|
// This detects when SYSMEM is called and clears the modules then
|
|
if(startpc == 0x890)
|
|
{
|
|
DevCon.WriteLn(Color_Gray, "[R3000 Debugger] Branch to 0x890 (SYSMEM). Clearing modules.");
|
|
R3000SymbolGuardian.ClearIrxModules();
|
|
}
|
|
|
|
// Inject IRX hack
|
|
if (startpc == 0x1630 && EmuConfig.CurrentIRX.length() > 3)
|
|
{
|
|
if (iopMemRead32(0x20018) == 0x1F)
|
|
{
|
|
// FIXME do I need to increase the module count (0x1F -> 0x20)
|
|
iopMemWrite32(0x20094, 0xbffc0000);
|
|
}
|
|
}
|
|
|
|
pxAssert(startpc);
|
|
|
|
// if recPtr reached the mem limit reset whole mem
|
|
if (recPtr >= recPtrEnd)
|
|
{
|
|
recResetIOP();
|
|
}
|
|
|
|
xSetPtr(recPtr);
|
|
recPtr = xGetAlignedCallTarget();
|
|
|
|
s_pCurBlock = PSX_GETBLOCK(startpc);
|
|
|
|
pxAssert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile || s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock);
|
|
|
|
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
|
|
|
|
if (!s_pCurBlockEx || s_pCurBlockEx->startpc != HWADDR(startpc))
|
|
s_pCurBlockEx = recBlocks.New(HWADDR(startpc), (uptr)recPtr);
|
|
|
|
psxbranch = 0;
|
|
|
|
s_pCurBlock->SetFnptr((uptr)x86Ptr);
|
|
s_psxBlockCycles = 0;
|
|
|
|
// reset recomp state variables
|
|
psxpc = startpc;
|
|
g_psxHasConstReg = g_psxFlushedConstReg = 1;
|
|
|
|
_initX86regs();
|
|
|
|
if ((psxHu32(HW_ICFG) & 8) && (HWADDR(startpc) == 0xa0 || HWADDR(startpc) == 0xb0 || HWADDR(startpc) == 0xc0))
|
|
{
|
|
xFastCall((void*)psxBiosCall);
|
|
xTEST(al, al);
|
|
xJNZ(iopDispatcherReg);
|
|
}
|
|
|
|
#ifdef TRACE_BLOCKS
|
|
xFastCall((void*)PreBlockCheck, psxpc);
|
|
#endif
|
|
|
|
// go until the next branch
|
|
i = startpc;
|
|
s_nEndBlock = 0xffffffff;
|
|
s_branchTo = -1;
|
|
|
|
while (1)
|
|
{
|
|
BASEBLOCK* pblock = PSX_GETBLOCK(i);
|
|
if (i != startpc && pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetFnptr() != (uptr)iopJITCompileInBlock)
|
|
{
|
|
// branch = 3
|
|
willbranch3 = 1;
|
|
s_nEndBlock = i;
|
|
break;
|
|
}
|
|
|
|
psxRegs.code = iopMemRead32(i);
|
|
|
|
switch (psxRegs.code >> 26)
|
|
{
|
|
case 0: // special
|
|
if (_Funct_ == 8 || _Funct_ == 9)
|
|
{ // JR, JALR
|
|
s_nEndBlock = i + 8;
|
|
goto StartRecomp;
|
|
}
|
|
break;
|
|
|
|
case 1: // regimm
|
|
if (_Rt_ == 0 || _Rt_ == 1 || _Rt_ == 16 || _Rt_ == 17)
|
|
{
|
|
s_branchTo = _Imm_ * 4 + i + 4;
|
|
if (s_branchTo > startpc && s_branchTo < i)
|
|
s_nEndBlock = s_branchTo;
|
|
else
|
|
s_nEndBlock = i + 8;
|
|
goto StartRecomp;
|
|
}
|
|
break;
|
|
|
|
case 2: // J
|
|
case 3: // JAL
|
|
s_branchTo = (_InstrucTarget_ << 2) | ((i + 4) & 0xf0000000);
|
|
s_nEndBlock = i + 8;
|
|
goto StartRecomp;
|
|
|
|
// branches
|
|
case 4:
|
|
case 5:
|
|
case 6:
|
|
case 7:
|
|
s_branchTo = _Imm_ * 4 + i + 4;
|
|
if (s_branchTo > startpc && s_branchTo < i)
|
|
s_nEndBlock = s_branchTo;
|
|
else
|
|
s_nEndBlock = i + 8;
|
|
goto StartRecomp;
|
|
}
|
|
|
|
i += 4;
|
|
}
|
|
|
|
StartRecomp:
|
|
|
|
s_nBlockFF = false;
|
|
if (s_branchTo == startpc)
|
|
{
|
|
s_nBlockFF = true;
|
|
for (i = startpc; i < s_nEndBlock; i += 4)
|
|
{
|
|
if (i != s_nEndBlock - 8)
|
|
{
|
|
switch (iopMemRead32(i))
|
|
{
|
|
case 0: // nop
|
|
break;
|
|
default:
|
|
s_nBlockFF = false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// rec info //
|
|
{
|
|
EEINST* pcur;
|
|
|
|
if (s_nInstCacheSize < (s_nEndBlock - startpc) / 4 + 1)
|
|
{
|
|
free(s_pInstCache);
|
|
s_nInstCacheSize = (s_nEndBlock - startpc) / 4 + 10;
|
|
s_pInstCache = (EEINST*)malloc(sizeof(EEINST) * s_nInstCacheSize);
|
|
pxAssert(s_pInstCache != NULL);
|
|
}
|
|
|
|
pcur = s_pInstCache + (s_nEndBlock - startpc) / 4;
|
|
_recClearInst(pcur);
|
|
pcur->info = 0;
|
|
|
|
for (i = s_nEndBlock; i > startpc; i -= 4)
|
|
{
|
|
psxRegs.code = iopMemRead32(i - 4);
|
|
pcur[-1] = pcur[0];
|
|
rpsxpropBSC(pcur - 1, pcur);
|
|
pcur--;
|
|
}
|
|
}
|
|
|
|
g_pCurInstInfo = s_pInstCache;
|
|
while (!psxbranch && psxpc < s_nEndBlock)
|
|
{
|
|
psxRecompileNextInstruction(false, false);
|
|
}
|
|
|
|
pxAssert((psxpc - startpc) >> 2 <= 0xffff);
|
|
s_pCurBlockEx->size = (psxpc - startpc) >> 2;
|
|
|
|
for (i = 1; i < (u32)s_pCurBlockEx->size; ++i)
|
|
{
|
|
if (s_pCurBlock[i].GetFnptr() == (uptr)iopJITCompile)
|
|
s_pCurBlock[i].SetFnptr((uptr)iopJITCompileInBlock);
|
|
}
|
|
|
|
if (!(psxpc & 0x10000000))
|
|
g_psxMaxRecMem = std::max((psxpc & ~0xa0000000), g_psxMaxRecMem);
|
|
|
|
if (psxbranch == 2)
|
|
{
|
|
_psxFlushCall(FLUSH_EVERYTHING);
|
|
|
|
iPsxBranchTest(0xffffffff, 1);
|
|
|
|
JMP32((uptr)iopDispatcherReg - ((uptr)x86Ptr + 5));
|
|
}
|
|
else
|
|
{
|
|
if (psxbranch)
|
|
pxAssert(!willbranch3);
|
|
else
|
|
{
|
|
xADD(ptr32[&psxRegs.cycle], psxScaleBlockCycles());
|
|
iPsxAddEECycles(psxScaleBlockCycles());
|
|
}
|
|
|
|
if (willbranch3 || !psxbranch)
|
|
{
|
|
pxAssert(psxpc == s_nEndBlock);
|
|
_psxFlushCall(FLUSH_EVERYTHING);
|
|
xMOV(ptr32[&psxRegs.pc], psxpc);
|
|
recBlocks.Link(HWADDR(s_nEndBlock), xJcc32());
|
|
psxbranch = 3;
|
|
}
|
|
}
|
|
|
|
pxAssert(xGetPtr() < recPtrEnd);
|
|
|
|
pxAssert(xGetPtr() - recPtr < _64kb);
|
|
s_pCurBlockEx->x86size = xGetPtr() - recPtr;
|
|
|
|
Perf::iop.RegisterPC((void*)s_pCurBlockEx->fnptr, s_pCurBlockEx->x86size, s_pCurBlockEx->startpc);
|
|
|
|
recPtr = xGetPtr();
|
|
|
|
pxAssert((g_psxHasConstReg & g_psxFlushedConstReg) == g_psxHasConstReg);
|
|
|
|
s_pCurBlock = NULL;
|
|
s_pCurBlockEx = NULL;
|
|
}
|
|
|
|
R3000Acpu psxRec = {
|
|
recReserve,
|
|
recResetIOP,
|
|
recExecuteBlock,
|
|
recClearIOP,
|
|
recShutdown,
|
|
};
|