From 5bd531e2126a2a092c4f605cddfe180ddb74d2a2 Mon Sep 17 00:00:00 2001 From: sudonim1 Date: Tue, 14 Jul 2009 21:33:38 +0000 Subject: [PATCH] Made the EE recompiler 64-bit constant buffer (was called a "stack" despite not being used as one) reuse recent constants rather than duplicating for every instance, resulting in less recompiler resets (e.g. espgaluda resetting every couple of seconds). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1509 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iR5900.h | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 76 +++++++++++++++++++---------- pcsx2/x86/ix86-32/iR5900Arit.cpp | 12 +---- pcsx2/x86/ix86-32/iR5900AritImm.cpp | 51 ++++--------------- pcsx2/x86/ix86-32/iR5900Move.cpp | 26 ++++------ pcsx2/x86/ix86-32/iR5900MultDiv.cpp | 22 +++------ 6 files changed, 77 insertions(+), 112 deletions(-) diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index edd4cf446d..6ddb049751 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -140,7 +140,7 @@ void _eeOnWriteReg(int reg, int signext); void _deleteEEreg(int reg, int flush); // allocates memory on the instruction size and returns the pointer -u32* recAllocStackMem(int size, int align); +u32* recGetImm64(u32 hi, u32 lo); void _vuRegsCOP22(VURegs * VU, _VURegsNum *VUregsn); diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 88a06b57eb..4d36fd7253 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -75,17 +75,18 @@ u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0; // Static Private Variables - R5900 Dynarec #define X86 -static const int RECSTACK_SIZE = 0x00020000; +static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units static u8 *recMem = NULL; // the recompiled blocks will be here -static u8* recStack = NULL; // stack mem +static u32* recConstBuf = NULL; // 64-bit pseudo-immediates static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here static BASEBLOCK *recROM = NULL; // and here static BASEBLOCK *recROM1 = NULL; // also here static u32 *recRAMCopy = NULL; void JITCompile(); static BaseBlocks recBlocks((uptr)JITCompile); -static u8* recPtr = NULL, *recStackPtr = NULL; +static u8* recPtr = NULL; +static u32 *recConstBufPtr = NULL; EEINST* s_pInstCache = NULL; static u32 s_nInstCacheSize = 0; @@ -209,13 +210,8 @@ u32* _eeGetConstReg(int reg) return &cpuRegs.GPR.r[ reg ].UL[0]; // if written in the future, don't flush - if( _recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, reg) ) { - u32* ptempmem; - ptempmem = recAllocStackMem(8, 4); - ptempmem[0] = g_cpuConstRegs[ reg ].UL[0]; - ptempmem[1] = g_cpuConstRegs[ reg ].UL[1]; - return ptempmem; - } + if( _recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, reg) ) + return recGetImm64(g_cpuConstRegs[reg].UL[1], g_cpuConstRegs[reg].UL[0]); _flushConstReg(reg); return &cpuRegs.GPR.r[ reg ].UL[0]; @@ -341,19 +337,44 @@ int _flushUnusedConstReg() return 0; } -// ------------------------------------------------------------------------ -// recAllocStackMem -- an optimization trick to write data to a location so that -// recompiled code can reference it later on during execution. -// -// Intended use is for setting up 64/128 bit SSE immediates, primarily. -// -u32* recAllocStackMem(int size, int align) +// Some of the generated MMX code needs 64-bit immediates but x86 doesn't +// provide this. One of the reasons we are probably better off not doing +// MMX register allocation for the EE. +u32* recGetImm64(u32 hi, u32 lo) { - jASSUME( align == 4 || align == 8 || align == 16 ); + u32 *imm64; // returned pointer + static u32 *imm64_cache[509]; + int cacheidx = lo % (sizeof imm64_cache / sizeof *imm64_cache); + //static int count; count++; - recStackPtr = (u8*) ( (((uptr)recStackPtr) + (align-1)) & ~(align-1) ); - recStackPtr += size; - return (u32*)(recStackPtr-size); + imm64 = imm64_cache[cacheidx]; + if (imm64 && imm64[0] == lo && imm64[1] == hi) + return imm64; + + if (recConstBufPtr >= recConstBuf + RECCONSTBUF_SIZE) { + // TODO: flag an error in recompilation which would reset the recompiler + // immediately and recompile the current block again. There is currently + // no way to do this, so have a last ditch attempt at making things sane + // and return some nonsense if that fails. + for (u32 *p = recConstBuf; p < recConstBuf + RECCONSTBUF_SIZE; p += 2) + if (p[0] == lo && p[1] == hi) { + imm64_cache[cacheidx] = p; + return p; + } + + return recConstBuf; + } + + imm64 = recConstBufPtr; + recConstBufPtr += 2; + imm64_cache[cacheidx] = imm64; + + imm64[0] = lo; + imm64[1] = hi; + + //Console::Notice("Consts allocated: %d of %u", params (recConstBufPtr - recConstBuf) / 2, ++count); + + return imm64; } ////////////////////////////////////////////////////////////////////////////////////////// @@ -366,7 +387,7 @@ static u8* m_recBlockAlloc = NULL; static const uint m_recBlockAllocSize = (((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4) * sizeof(BASEBLOCK)) -+ RECSTACK_SIZE + Ps2MemSize::Base; ++ RECCONSTBUF_SIZE * sizeof(u32) + Ps2MemSize::Base; static void recAlloc() { @@ -408,7 +429,7 @@ static void recAlloc() recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Base / 4) * sizeof(BASEBLOCK); recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK); recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK); - recStack = (u8*)curpos; curpos += RECSTACK_SIZE; + recConstBuf = (u32*)curpos; curpos += RECCONSTBUF_SIZE * sizeof(u32); recRAMCopy = (u32*)curpos; if( s_pInstCache == NULL ) @@ -439,6 +460,7 @@ void recResetEE( void ) memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3 memzero_ptr( m_recBlockAlloc ); + memzero_ptr(recConstBuf); memzero_obj( manual_page ); memzero_obj( manual_counter ); ClearRecLUT((BASEBLOCK*)m_recBlockAlloc, @@ -490,7 +512,7 @@ void recResetEE( void ) x86SetPtr(recMem); recPtr = recMem; - recStackPtr = recStack; + recConstBufPtr = recConstBuf; x86FpuState = FPU_STATE; branch = 0; @@ -505,7 +527,7 @@ static void recShutdown( void ) SafeSysMunmap( recMem, REC_CACHEMEM ); safe_aligned_free( m_recBlockAlloc ); recRAM = recROM = recROM1 = NULL; - recStack = NULL; + recConstBuf = NULL; recRAMCopy = NULL; safe_free( s_pInstCache ); @@ -1247,7 +1269,7 @@ void recRecompile( const u32 startpc ) if ( ( (uptr)recPtr - (uptr)recMem ) >= REC_CACHEMEM-0x40000 || dumplog == 0xffffffff) { recResetEE(); } - if ( ( (uptr)recStackPtr - (uptr)recStack ) >= RECSTACK_SIZE-0x100 ) { + if ( (recConstBufPtr - recConstBuf) >= RECCONSTBUF_SIZE - 64 ) { DevCon::WriteLn("EE recompiler stack reset"); recResetEE(); } @@ -1646,7 +1668,7 @@ StartRecomp: } assert( x86Ptr < recMem+REC_CACHEMEM ); - assert( recStackPtr < recStack+RECSTACK_SIZE ); + assert( recConstBufPtr < recConstBuf + RECCONSTBUF_SIZE ); assert( x86FpuState == 0 ); assert(x86Ptr - recPtr < 0x10000); diff --git a/pcsx2/x86/ix86-32/iR5900Arit.cpp b/pcsx2/x86/ix86-32/iR5900Arit.cpp index 2106347b60..90ac00fd46 100644 --- a/pcsx2/x86/ix86-32/iR5900Arit.cpp +++ b/pcsx2/x86/ix86-32/iR5900Arit.cpp @@ -1306,10 +1306,6 @@ void recSLTs_consts(int info, int sign) PSRLQItoR(EEREC_D, 63); } else { - u32* ptempmem = recAllocStackMem(8,4); - ptempmem[0] = g_cpuConstRegs[_Rs_].UL[0]^0x80000000; - ptempmem[1] = 0; - if( EEREC_D != EEREC_T ) { MOVDMtoMMX(EEREC_D, (u32)&s_sltconst); PXORRtoR(EEREC_D, EEREC_T); @@ -1318,7 +1314,7 @@ void recSLTs_consts(int info, int sign) PXORMtoR(EEREC_D, (u32)&s_sltconst); } - PCMPGTDMtoR(EEREC_D, (u32)ptempmem); + PCMPGTDMtoR(EEREC_D, (uptr)recGetImm64(0, g_cpuConstRegs[_Rs_].UL[0] ^ 0x80000000)); PUNPCKLDQRtoR(EEREC_D, EEREC_D); PSRLQItoR(EEREC_D, 63); @@ -1439,11 +1435,7 @@ void recSLTs_constt(int info, int sign) recSLTmemconstt(EEREC_D, EEREC_S, (u32)_eeGetConstReg(_Rt_), 1); } else { - u32* ptempmem = recAllocStackMem(8,4); - ptempmem[0] = g_cpuConstRegs[_Rt_].UL[0]^0x80000000; - ptempmem[1] = 0; - - recSLTmemconstt(EEREC_D, EEREC_S, (u32)ptempmem, 0); + recSLTmemconstt(EEREC_D, EEREC_S, (uptr)recGetImm64(0, g_cpuConstRegs[_Rt_].UL[0] ^ 0x80000000), 0); } return; diff --git a/pcsx2/x86/ix86-32/iR5900AritImm.cpp b/pcsx2/x86/ix86-32/iR5900AritImm.cpp index 7ae89bfe0a..5cb828d561 100644 --- a/pcsx2/x86/ix86-32/iR5900AritImm.cpp +++ b/pcsx2/x86/ix86-32/iR5900AritImm.cpp @@ -63,13 +63,8 @@ void recADDI_(int info) if ( info & PROCESS_EE_MMX ) { if ( _Imm_ != 0 ) { - - u32* ptempmem = recAllocStackMem(8, 8); - ptempmem[0] = (s32)_Imm_; - ptempmem[1] = 0; - if ( EEREC_T != EEREC_S ) MOVQRtoR(EEREC_T, EEREC_S); - PADDDMtoR(EEREC_T, (u32)ptempmem); + PADDDMtoR(EEREC_T, (uptr)recGetImm64(0, _Imm_)); if ( EEINST_ISLIVE1(_Rt_) ) _signExtendGPRtoMMX(EEREC_T, _Rt_, 0); else EEINST_RESETHASLIVE1(_Rt_); } @@ -89,12 +84,8 @@ void recADDI_(int info) SetMMXstate(); if ( _Imm_ != 0 ) { - u32* ptempmem = recAllocStackMem(8, 8); - ptempmem[0] = (s32)_Imm_; - ptempmem[1] = 0; - MOVDMtoMMX(rtreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]); - PADDDMtoR(rtreg, (u32)ptempmem); + PADDDMtoR(rtreg, (uptr)recGetImm64(0, _Imm_)); if ( EEINST_ISLIVE1(_Rt_) ) _signExtendGPRtoMMX(rtreg, _Rt_, 0); else EEINST_RESETHASLIVE1(_Rt_); @@ -162,13 +153,8 @@ void recDADDI_(int info) if( info & PROCESS_EE_MMX ) { if( _Imm_ != 0 ) { - - // flush - u32* ptempmem = recAllocStackMem(8, 8); - ptempmem[0] = _Imm_; - ptempmem[1] = _Imm_ >= 0 ? 0 : 0xffffffff; if( EEREC_T != EEREC_S ) MOVQRtoR(EEREC_T, EEREC_S); - PADDQMtoR(EEREC_T, (u32)ptempmem); + PADDQMtoR(EEREC_T, (uptr)recGetImm64(-(_Imm_ < 0), _Imm_)); } else { if( EEREC_T != EEREC_S ) MOVQRtoR(EEREC_T, EEREC_S); @@ -178,15 +164,12 @@ void recDADDI_(int info) if( (g_pCurInstInfo->regs[_Rt_]&EEINST_MMX) ) { int rtreg; - u32* ptempmem = recAllocStackMem(8, 8); - ptempmem[0] = _Imm_; - ptempmem[1] = _Imm_ >= 0 ? 0 : 0xffffffff; rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_WRITE); SetMMXstate(); MOVQMtoR(rtreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]); - PADDQMtoR(rtreg, (u32)ptempmem); + PADDQMtoR(rtreg, (uptr)recGetImm64(-(_Imm_ < 0), _Imm_)); } else { if( _Rt_ == _Rs_ ) { @@ -239,10 +222,7 @@ void recSLTIU_(int info) { if( info & PROCESS_EE_MMX ) { if( EEINST_ISSIGNEXT(_Rs_) ) { - u32* ptempmem = recAllocStackMem(8,4); - ptempmem[0] = ((s32)(_Imm_))^0x80000000; - ptempmem[1] = 0; - recSLTmemconstt(EEREC_T, EEREC_S, (u32)ptempmem, 0); + recSLTmemconstt(EEREC_T, EEREC_S, (uptr)recGetImm64(0, ((s32)_Imm_)^0x80000000), 0); EEINST_SETSIGNEXT(_Rt_); return; } @@ -292,10 +272,7 @@ void recSLTI_(int info) if( info & PROCESS_EE_MMX) { if( EEINST_ISSIGNEXT(_Rs_) ) { - u32* ptempmem = recAllocStackMem(8,4); - ptempmem[0] = _Imm_; - ptempmem[1] = 0; - recSLTmemconstt(EEREC_T, EEREC_S, (u32)ptempmem, 1); + recSLTmemconstt(EEREC_T, EEREC_S, (uptr)recGetImm64(0, _Imm_), 1); EEINST_SETSIGNEXT(_Rt_); return; } @@ -347,12 +324,8 @@ void recLogicalOpI(int info, int op) SetMMXstate(); if( _ImmU_ != 0 ) { - u32* ptempmem = recAllocStackMem(8, 8); - ptempmem[0] = _ImmU_; - ptempmem[1] = 0; - if( EEREC_T != EEREC_S ) MOVQRtoR(EEREC_T, EEREC_S); - LogicalOpMtoR(EEREC_T, (u32)ptempmem, op); + LogicalOpMtoR(EEREC_T, (uptr)recGetImm64(0, _ImmU_), op); } else { if( op == 0 ) PXORRtoR(EEREC_T, EEREC_T); @@ -367,21 +340,15 @@ void recLogicalOpI(int info, int op) if( op == 0 ) { if ( _ImmU_ != 0 ) { - u32* ptempmem = recAllocStackMem(8, 8); - ptempmem[0] = _ImmU_; - ptempmem[1] = 0; MOVDMtoMMX(rtreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]); - PANDMtoR(rtreg, (u32)ptempmem); + PANDMtoR(rtreg, (uptr)recGetImm64(0, _ImmU_)); } else PXORRtoR(rtreg, rtreg); } else { MOVQMtoR(rtreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]); if ( _ImmU_ != 0 ) { - u32* ptempmem = recAllocStackMem(8, 8); - ptempmem[0] = _ImmU_; - ptempmem[1] = 0; - LogicalOpMtoR(rtreg, (u32)ptempmem, op); + LogicalOpMtoR(rtreg, (uptr)recGetImm64(0, _ImmU_), op); } } } diff --git a/pcsx2/x86/ix86-32/iR5900Move.cpp b/pcsx2/x86/ix86-32/iR5900Move.cpp index 0d1878e26b..7175c567d2 100644 --- a/pcsx2/x86/ix86-32/iR5900Move.cpp +++ b/pcsx2/x86/ix86-32/iR5900Move.cpp @@ -442,15 +442,12 @@ void recMOVZtemp_consts(int info) CMP8ItoR(EAX, 0xff); j8Ptr[ 0 ] = JNE8( 0 ); - if( g_cpuFlushedConstReg & (1<<_Rs_) ) mem = &cpuRegs.GPR.r[_Rs_].UL[0]; - else { - mem = recAllocStackMem(8,8); + if( g_cpuFlushedConstReg & (1<<_Rs_) ) + mem = &cpuRegs.GPR.r[_Rs_].UL[0]; + else + mem = _eeGetConstReg(_Rs_); - mem[0] = g_cpuConstRegs[_Rs_].UL[0]; - mem[1] = g_cpuConstRegs[_Rs_].UL[1]; - } - - MOVQMtoR(EEREC_D, (u32)mem); + MOVQMtoR(EEREC_D, (uptr)mem); x86SetJ8( j8Ptr[ 0 ] ); _freeMMXreg(t0reg); @@ -566,15 +563,12 @@ void recMOVNtemp_consts(int info) CMP8ItoR(EAX, 0xff); j8Ptr[ 0 ] = JE8( 0 ); - if( g_cpuFlushedConstReg & (1<<_Rs_) ) mem = &cpuRegs.GPR.r[_Rs_].UL[0]; - else { - mem = recAllocStackMem(8,8); + if( g_cpuFlushedConstReg & (1<<_Rs_) ) + mem = &cpuRegs.GPR.r[_Rs_].UL[0]; + else + mem = _eeGetConstReg(_Rs_); - mem[0] = g_cpuConstRegs[_Rs_].UL[0]; - mem[1] = g_cpuConstRegs[_Rs_].UL[1]; - } - - MOVQMtoR(EEREC_D, (u32)mem); + MOVQMtoR(EEREC_D, (uptr)mem); x86SetJ8( j8Ptr[ 0 ] ); _freeMMXreg(t0reg); diff --git a/pcsx2/x86/ix86-32/iR5900MultDiv.cpp b/pcsx2/x86/ix86-32/iR5900MultDiv.cpp index 95917a74d2..ec549f24c2 100644 --- a/pcsx2/x86/ix86-32/iR5900MultDiv.cpp +++ b/pcsx2/x86/ix86-32/iR5900MultDiv.cpp @@ -266,20 +266,15 @@ void recWritebackConstHILO(u64 res, int writed, int upper) if( g_pCurInstInfo->regs[XMMGPR_LO] & testlive ) { if( !upper && (reglo = _allocCheckGPRtoMMX(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE)) >= 0 ) { - u32* ptr = recAllocStackMem(8, 8); - ptr[0] = res & 0xffffffff; - ptr[1] = (res&0x80000000)?0xffffffff:0; - MOVQMtoR(reglo, (u32)ptr); + MOVQMtoR(reglo, (uptr)recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res)); } else { reglo = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE|MODE_READ); if( reglo >= 0 ) { - u32* ptr = recAllocStackMem(8, 8); - ptr[0] = res & 0xffffffff; - ptr[1] = (res&0x80000000)?0xffffffff:0; - if( upper ) SSE_MOVHPS_M64_to_XMM(reglo, (u32)ptr); - else SSE_MOVLPS_M64_to_XMM(reglo, (u32)ptr); + u32* ptr = recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res); + if( upper ) SSE_MOVHPS_M64_to_XMM(reglo, (uptr)ptr); + else SSE_MOVLPS_M64_to_XMM(reglo, (uptr)ptr); } else { MOV32ItoM(loaddr, res & 0xffffffff); @@ -291,18 +286,13 @@ void recWritebackConstHILO(u64 res, int writed, int upper) if( g_pCurInstInfo->regs[XMMGPR_HI] & testlive ) { if( !upper && (reghi = _allocCheckGPRtoMMX(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE)) >= 0 ) { - u32* ptr = recAllocStackMem(8, 8); - ptr[0] = res >> 32; - ptr[1] = (res>>63)?0xffffffff:0; - MOVQMtoR(reghi, (u32)ptr); + MOVQMtoR(reghi, (uptr)recGetImm64(res >> 63 ? -1 : 0, res >> 32)); } else { reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE|MODE_READ); if( reghi >= 0 ) { - u32* ptr = recAllocStackMem(8, 8); - ptr[0] = res >> 32; - ptr[1] = (res>>63)?0xffffffff:0; + u32* ptr = recGetImm64(res >> 63 ? -1 : 0, res >> 32); if( upper ) SSE_MOVHPS_M64_to_XMM(reghi, (u32)ptr); else SSE_MOVLPS_M64_to_XMM(reghi, (u32)ptr); }