From 47759b0a463814ed921e645fce2abcb6d4d47607 Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 4 Apr 2009 19:09:02 +0000 Subject: [PATCH 001/143] Found a small typo in IPU, don't know what videos it will effect, but it could have potentially stopped them working :P git-svn-id: http://pcsx2.googlecode.com/svn/trunk@910 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/IPU/IPU.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 3c481ccb0a..748aa2fcb4 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -369,7 +369,7 @@ __forceinline void ipuWrite64(u32 mem, u64 value) switch (mem) { - case 0x10: + case 0x00: IPU_LOG("Ipu write64: IPU_CMD=0x%08X", value); IPUCMD_WRITE((u32)value); break; From 7cf7a118b7577d41e6edd675b48077da4982fbbb Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sat, 4 Apr 2009 19:50:34 +0000 Subject: [PATCH 002/143] Revered Misc.cpp and vssprintf.cpp from r904. See r904 comments for details. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@911 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Misc.cpp | 260 +++++++++++---------- pcsx2/vssprintf.cpp | 2 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 3 - 3 files changed, 135 insertions(+), 130 deletions(-) diff --git a/pcsx2/Misc.cpp b/pcsx2/Misc.cpp index 6df6b6535a..626830295f 100644 --- a/pcsx2/Misc.cpp +++ b/pcsx2/Misc.cpp @@ -420,7 +420,7 @@ char *ParseLang(char *id) { char* mystrlwr( char* string ) { assert( string != NULL ); - while ( 0 != ( *string++ = (char)tolower( *string ) ) ) ; + while ( 0 != ( *string++ = (char)tolower( *string ) ) ); return string; } @@ -493,99 +493,102 @@ void CycleFrameLimit(int dir) void ProcessFKeys(int fkey, struct KeyModifiers *keymod) { - assert(fkey >= 1 && fkey <= 12 ); + assert(fkey >= 1 && fkey <= 12 ); - switch(fkey) { - case 1: - try - { - gzSavingState( SaveState::GetFilename( StatesC ) ).FreezeAll(); - HostGui::ResetMenuSlots(); - } - catch( Exception::BaseException& ex ) - { - // 99% of the time this is a file permission error and the - // cpu state is intact so just display a passive msg to console. + switch(fkey) + { + case 1: + try + { + gzSavingState( SaveState::GetFilename( StatesC ) ).FreezeAll(); + HostGui::ResetMenuSlots(); + } + catch( Exception::BaseException& ex ) + { + // 99% of the time this is a file permission error and the + // cpu state is intact so just display a passive msg to console. - Console::Error( _( "Error > Could not save state to slot %d" ), params StatesC ); - Console::Error( ex.cMessage() ); - } - break; + Console::Error( _( "Error > Could not save state to slot %d" ), params StatesC ); + Console::Error( ex.cMessage() ); + } + break; - case 2: - if( keymod->shift ) - StatesC = (StatesC+NUM_STATES-1) % NUM_STATES; - else - StatesC = (StatesC+1) % NUM_STATES; + case 2: + if( keymod->shift ) + StatesC = (StatesC+NUM_STATES-1) % NUM_STATES; + else + StatesC = (StatesC+1) % NUM_STATES; - Console::Notice( _( " > Selected savestate slot %d" ), params StatesC); + Console::Notice( _( " > Selected savestate slot %d" ), params StatesC); - if( GSchangeSaveState != NULL ) - GSchangeSaveState(StatesC, SaveState::GetFilename(StatesC).c_str()); - break; + if( GSchangeSaveState != NULL ) + GSchangeSaveState(StatesC, SaveState::GetFilename(StatesC).c_str()); + break; - case 3: - try - { - gzLoadingState joe( SaveState::GetFilename( StatesC ) ); // throws exception on version mismatch - cpuReset(); - SysClearExecutionCache(); - joe.FreezeAll(); - } - catch( Exception::StateLoadError_Recoverable& ) - { - // At this point the cpu hasn't been reset, so we can return - // control to the user safely... (and silently) - } - catch( Exception::FileNotFound& ) - { - Console::Notice( _("Saveslot %d cannot be loaded; slot does not exist (file not found)"), params StatesC ); - } - catch( Exception::RuntimeError& ex ) - { - // This is the bad one. Chances are the cpu has been reset, so emulation has - // to be aborted. Sorry user! We'll give you some info for your trouble: + case 3: + try + { + gzLoadingState joe( SaveState::GetFilename( StatesC ) ); // throws exception on version mismatch + cpuReset(); + SysClearExecutionCache(); + joe.FreezeAll(); + } + catch( Exception::StateLoadError_Recoverable& ) + { + // At this point the cpu hasn't been reset, so we can return + // control to the user safely... (and silently) + } + catch( Exception::FileNotFound& ) + { + Console::Notice( _("Saveslot %d cannot be loaded; slot does not exist (file not found)"), params StatesC ); + } + catch( Exception::RuntimeError& ex ) + { + // This is the bad one. Chances are the cpu has been reset, so emulation has + // to be aborted. Sorry user! We'll give you some info for your trouble: - Console::Error( _("An error occured while trying to load saveslot %d"), params StatesC ); - Console::Error( ex.cMessage() ); - Msgbox::Alert( - "Pcsx2 encountered an error while trying to load the savestate\n" - "and emulation had to be aborted." ); + Console::Error( _("An error occured while trying to load saveslot %d"), params StatesC ); + Console::Error( ex.cMessage() ); + Msgbox::Alert( + "Pcsx2 encountered an error while trying to load the savestate\n" + "and emulation had to be aborted." ); - ClosePlugins( true ); + ClosePlugins( true ); - throw Exception::CpuStateShutdown( - "Saveslot load failed; PS2 emulated state had to be shut down." ); // let the GUI handle the error "gracefully" - } - break; + throw Exception::CpuStateShutdown( + "Saveslot load failed; PS2 emulated state had to be shut down." ); // let the GUI handle the error "gracefully" + } + break; - case 4: - CycleFrameLimit(keymod->shift ? -1 : 1); - break; + case 4: + CycleFrameLimit(keymod->shift ? -1 : 1); + break; - // note: VK_F5-VK_F7 are reserved for GS - case 8: - GSmakeSnapshot( SNAPSHOTS_DIR "/" ); - break; - - case 9: //gsdx "on the fly" renderer switching - if (!renderswitch) { - StateRecovery::MakeGsOnly(); - g_EmulationInProgress = false; - CloseGS(); - renderswitch = true; //go to dx9 sw - StateRecovery::Recover(); - HostGui::BeginExecution(); //also sets g_EmulationInProgress to true later - } - else { - StateRecovery::MakeGsOnly(); - g_EmulationInProgress = false; - CloseGS(); - renderswitch = false; //return to default renderer - StateRecovery::Recover(); - HostGui::BeginExecution(); //also sets g_EmulationInProgress to true later - } - break; + // note: VK_F5-VK_F7 are reserved for GS + case 8: + GSmakeSnapshot( SNAPSHOTS_DIR "/" ); + break; + + case 9: //gsdx "on the fly" renderer switching + if (!renderswitch) + { + StateRecovery::MakeGsOnly(); + g_EmulationInProgress = false; + CloseGS(); + renderswitch = true; //go to dx9 sw + StateRecovery::Recover(); + HostGui::BeginExecution(); //also sets g_EmulationInProgress to true later + } + else + { + StateRecovery::MakeGsOnly(); + g_EmulationInProgress = false; + CloseGS(); + renderswitch = false; //return to default renderer + StateRecovery::Recover(); + HostGui::BeginExecution(); //also sets g_EmulationInProgress to true later + } + break; #ifdef PCSX2_DEVBUILD case 10: // There's likely a better way to implement this, but this seemed useful. @@ -599,55 +602,60 @@ void ProcessFKeys(int fkey, struct KeyModifiers *keymod) GSprintf(10,"Logging Disabled."); break; - - case 11: - if( mtgsThread != NULL ) { - Console::Notice( "Cannot make gsstates in MTGS mode" ); - } - else - { - string Text; - if( strgametitle[0] != 0 ) { - // only take the first two words - char name[256], *tok; - string gsText; + case 11: + if( mtgsThread != NULL ) + { + Console::Notice( "Cannot make gsstates in MTGS mode" ); + } + else + { + string Text; + if( strgametitle[0] != 0 ) + { + // only take the first two words + char name[256], *tok; + string gsText; - tok = strtok(strgametitle, " "); - sprintf(name, "%s_", mystrlwr(tok)); - tok = strtok(NULL, " "); - if( tok != NULL ) strcat(name, tok); + tok = strtok(strgametitle, " "); + sprintf(name, "%s_", mystrlwr(tok)); + + tok = strtok(NULL, " "); + if( tok != NULL ) strcat(name, tok); - - ssprintf( gsText, "%s.%d.gs", name, StatesC); - Text = Path::Combine( SSTATES_DIR, gsText ); - } - else - Text = GetGSStateFilename(); - - SaveGSState(Text); - } - break; + ssprintf( gsText, "%s.%d.gs", name, StatesC); + Text = Path::Combine( SSTATES_DIR, gsText ); + } + else + { + Text = GetGSStateFilename(); + } + + SaveGSState(Text); + } + break; #endif - case 12: - if( keymod->shift ) { + case 12: + if( keymod->shift ) + { #ifdef PCSX2_DEVBUILD - iDumpRegisters(cpuRegs.pc, 0); - Console::Notice("hardware registers dumped EE:%x, IOP:%x\n", params cpuRegs.pc, psxRegs.pc); + iDumpRegisters(cpuRegs.pc, 0); + Console::Notice("hardware registers dumped EE:%x, IOP:%x\n", params cpuRegs.pc, psxRegs.pc); #endif - } - else { - g_Pcsx2Recording ^= 1; - if( mtgsThread != NULL ) { - mtgsThread->SendSimplePacket(GS_RINGTYPE_RECORD, g_Pcsx2Recording, 0, 0); - } - else { - if( GSsetupRecording != NULL ) GSsetupRecording(g_Pcsx2Recording, NULL); - } - if( SPU2setupRecording != NULL ) SPU2setupRecording(g_Pcsx2Recording, NULL); - } - break; - } + } + else + { + g_Pcsx2Recording ^= 1; + + if( mtgsThread != NULL ) + mtgsThread->SendSimplePacket(GS_RINGTYPE_RECORD, g_Pcsx2Recording, 0, 0); + else if( GSsetupRecording != NULL ) + GSsetupRecording(g_Pcsx2Recording, NULL); + + if( SPU2setupRecording != NULL ) SPU2setupRecording(g_Pcsx2Recording, NULL); + } + break; + } } void _memset16_unaligned( void* dest, u16 data, size_t size ) diff --git a/pcsx2/vssprintf.cpp b/pcsx2/vssprintf.cpp index 7657683e54..234017f0e1 100644 --- a/pcsx2/vssprintf.cpp +++ b/pcsx2/vssprintf.cpp @@ -460,7 +460,7 @@ static void cropzeros(char *buffer) stop = buffer--; while (*buffer == '0') buffer--; if (*buffer == '.') buffer--; - while (*++buffer = *stop++) ; + while (*++buffer = *stop++); } } diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 9056cb375f..a11a52c051 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -135,12 +135,9 @@ FavorSizeOrSpeed="1" AdditionalIncludeDirectories="" PreprocessorDefinitions="NDEBUG" - StringPooling="true" - MinimalRebuild="true" ExceptionHandling="2" SmallerTypeCheck="false" BufferSecurityCheck="false" - EnableFunctionLevelLinking="false" UsePrecompiledHeader="2" PrecompiledHeaderThrough="PrecompiledHeader.h" PrecompiledHeaderFile="$(IntDir)\$(TargetName).pch" From 87412c8d5d3c56540b8881a90e427ea7aac1a19d Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sun, 5 Apr 2009 10:29:54 +0000 Subject: [PATCH 003/143] Fix the breakage on Ape Escape 3 from r900. Clean up the unpacking code some more while I'm at it. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@912 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 449 ++++++++++++++++++++++++-------------------------- 1 file changed, 216 insertions(+), 233 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index d4ece5c0d3..a2ebe6f961 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -42,9 +42,9 @@ bool mfifodmairq = FALSE; enum UnpackOffset { OFFSET_X = 0, - OFFSET_Y, - OFFSET_Z, - OFFSET_W + OFFSET_Y = 1, + OFFSET_Z = 2, + OFFSET_W =3 }; #define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) @@ -54,7 +54,7 @@ __forceinline static int _limit(int a, int max) return (a > max) ? max : a; } -static __releaseinline void writeX(u32 *dest, u32 data) +static __releaseinline void writeX(u32 &dest, u32 data) { int n; @@ -83,39 +83,39 @@ static __releaseinline void writeX(u32 *dest, u32 data) case 0: if ((_vif->cmd & 0x6F) == 0x6f) { - *dest = data; + dest = data; } else if (_vifRegs->mode == 1) { - *dest = data + _vifRegs->r0; + dest = data + _vifRegs->r0; } else if (_vifRegs->mode == 2) { _vifRegs->r0 += data; - *dest = _vifRegs->r0; + dest = _vifRegs->r0; } else { - *dest = data; + dest = data; } break; case 1: - *dest = _vifRegs->r0; + dest = _vifRegs->r0; break; case 2: switch (_vif->cl) { case 0: - *dest = _vifRegs->c0; + dest = _vifRegs->c0; break; case 1: - *dest = _vifRegs->c1; + dest = _vifRegs->c1; break; case 2: - *dest = _vifRegs->c2; + dest = _vifRegs->c2; break; default: - *dest = _vifRegs->c3; + dest = _vifRegs->c3; break; } break; @@ -123,7 +123,7 @@ static __releaseinline void writeX(u32 *dest, u32 data) // VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); } -static __releaseinline void writeY(u32 *dest, u32 data) +static __releaseinline void writeY(u32 &dest, u32 data) { int n; @@ -152,39 +152,39 @@ static __releaseinline void writeY(u32 *dest, u32 data) case 0: if ((_vif->cmd & 0x6F) == 0x6f) { - *dest = data; + dest = data; } else if (_vifRegs->mode == 1) { - *dest = data + _vifRegs->r1; + dest = data + _vifRegs->r1; } else if (_vifRegs->mode == 2) { _vifRegs->r1 += data; - *dest = _vifRegs->r1; + dest = _vifRegs->r1; } else { - *dest = data; + dest = data; } break; case 1: - *dest = _vifRegs->r1; + dest = _vifRegs->r1; break; case 2: switch (_vif->cl) { case 0: - *dest = _vifRegs->c0; + dest = _vifRegs->c0; break; case 1: - *dest = _vifRegs->c1; + dest = _vifRegs->c1; break; case 2: - *dest = _vifRegs->c2; + dest = _vifRegs->c2; break; default: - *dest = _vifRegs->c3; + dest = _vifRegs->c3; break; } break; @@ -192,7 +192,7 @@ static __releaseinline void writeY(u32 *dest, u32 data) // VIF_LOG("writeY %8.8x : Mode %d, r1 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r1,data); } -static __releaseinline void writeZ(u32 *dest, u32 data) +static __releaseinline void writeZ(u32 &dest, u32 data) { int n; @@ -221,39 +221,39 @@ static __releaseinline void writeZ(u32 *dest, u32 data) case 0: if ((_vif->cmd & 0x6F) == 0x6f) { - *dest = data; + dest = data; } else if (_vifRegs->mode == 1) { - *dest = data + _vifRegs->r2; + dest = data + _vifRegs->r2; } else if (_vifRegs->mode == 2) { _vifRegs->r2 += data; - *dest = _vifRegs->r2; + dest = _vifRegs->r2; } else { - *dest = data; + dest = data; } break; case 1: - *dest = _vifRegs->r2; + dest = _vifRegs->r2; break; case 2: switch (_vif->cl) { case 0: - *dest = _vifRegs->c0; + dest = _vifRegs->c0; break; case 1: - *dest = _vifRegs->c1; + dest = _vifRegs->c1; break; case 2: - *dest = _vifRegs->c2; + dest = _vifRegs->c2; break; default: - *dest = _vifRegs->c3; + dest = _vifRegs->c3; break; } break; @@ -261,7 +261,7 @@ static __releaseinline void writeZ(u32 *dest, u32 data) // VIF_LOG("writeZ %8.8x : Mode %d, r2 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r2,data); } -static __releaseinline void writeW(u32 *dest, u32 data) +static __releaseinline void writeW(u32 &dest, u32 data) { int n; @@ -290,39 +290,39 @@ static __releaseinline void writeW(u32 *dest, u32 data) case 0: if ((_vif->cmd & 0x6F) == 0x6f) { - *dest = data; + dest = data; } else if (_vifRegs->mode == 1) { - *dest = data + _vifRegs->r3; + dest = data + _vifRegs->r3; } else if (_vifRegs->mode == 2) { _vifRegs->r3 += data; - *dest = _vifRegs->r3; + dest = _vifRegs->r3; } else { - *dest = data; + dest = data; } break; case 1: - *dest = _vifRegs->r3; + dest = _vifRegs->r3; break; case 2: switch (_vif->cl) { case 0: - *dest = _vifRegs->c0; + dest = _vifRegs->c0; break; case 1: - *dest = _vifRegs->c1; + dest = _vifRegs->c1; break; case 2: - *dest = _vifRegs->c2; + dest = _vifRegs->c2; break; default: - *dest = _vifRegs->c3; + dest = _vifRegs->c3; break; } break; @@ -330,7 +330,8 @@ static __releaseinline void writeW(u32 *dest, u32 data) // VIF_LOG("writeW %8.8x : Mode %d, r3 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r3,data); } -static __forceinline bool __fastcall _UNPACKpart(u32 offnum, u32 *x, u32 y) +template +static void _UNPACKpart(u32 offnum, u32 &x, T y) { if (_vifRegs->offset == offnum) { @@ -352,219 +353,201 @@ static __forceinline bool __fastcall _UNPACKpart(u32 offnum, u32 *x, u32 y) break; } _vifRegs->offset++; - - return TRUE; } - return FALSE; } -void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) +template +static void _UNPACKpart(u32 offnum, u32 &x, T y, int &size) { - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *data)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *data)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *data)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *data)) size--; + if (_vifRegs->offset == offnum) + { + switch (offnum) + { + case OFFSET_X: + writeX(x,y); + break; + case OFFSET_Y: + writeY(x,y); + break; + case OFFSET_Z: + writeZ(x,y); + break; + case OFFSET_W: + writeW(x,y); + break; + default: + break; + } + size--; + _vifRegs->offset++; + } +} + +template +void __fastcall UNPACK_S(u32 *dest, T *data, int size) +{ + _UNPACKpart(OFFSET_X, *dest++, *data, size); + _UNPACKpart(OFFSET_Y, *dest++, *data, size); + _UNPACKpart(OFFSET_Z, *dest++, *data, size); + _UNPACKpart(OFFSET_W, *dest , *data, size); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } -void __fastcall UNPACK_S_16s(u32 *dest, u32 *data, int size) +template +void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - s16 *sdata = (s16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest, *sdata)) size--; + _UNPACKpart(OFFSET_X, *dest++, *data++, size); + _UNPACKpart(OFFSET_Y, *dest++, *data--, size); + _UNPACKpart(OFFSET_Z, *dest++, *data++); + _UNPACKpart(OFFSET_W, *dest , *data); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } -void __fastcall UNPACK_S_16u(u32 *dest, u32 *data, int size) +template +void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - const u16 *sdata = (u16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *sdata)) size--; + _UNPACKpart(OFFSET_X, *dest++, *data++, size); + _UNPACKpart(OFFSET_Y, *dest++, *data++, size); + _UNPACKpart(OFFSET_Z, *dest++, *data++, size); + _UNPACKpart(OFFSET_W, *dest, *data); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } -void __fastcall UNPACK_S_8s(u32 *dest, u32 *data, int size) +template +void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { - s8 *cdata = (s8*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *cdata)) size--; - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_S_8u(u32 *dest, u32 *data, int size) -{ - u8 *cdata = (u8*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata))size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *cdata)) size--; - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V2_32(u32 *dest, u32 *data, int size) -{ - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *data++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *data--)) size--; - _UNPACKpart(OFFSET_Z, dest++, *data); - _UNPACKpart(OFFSET_W, dest, 0); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; - -} - -void __fastcall UNPACK_V2_16s(u32 *dest, u32 *data, int size) -{ - s16 *sdata = (s16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata--)) size--; - _UNPACKpart(OFFSET_Z, dest++, *sdata++); - _UNPACKpart(OFFSET_W, dest , *sdata); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V2_16u(u32 *dest, u32 *data, int size) -{ - u16 *sdata = (u16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata--)) size--; - _UNPACKpart(OFFSET_Z, dest++, *sdata++); - _UNPACKpart(OFFSET_W, dest , *sdata); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V2_8s(u32 *dest, u32 *data, int size) -{ - s8 *cdata = (s8*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata--)) size--; - _UNPACKpart(OFFSET_Z, dest++, *cdata++); - _UNPACKpart(OFFSET_W, dest , *cdata); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V2_8u(u32 *dest, u32 *data, int size) -{ - u8 *cdata = (u8*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata--)) size--; - _UNPACKpart(OFFSET_Z, dest++, *cdata++); - _UNPACKpart(OFFSET_W, dest , *cdata); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V3_32(u32 *dest, u32 *data, int size) -{ - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *data++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *data++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *data++)) size--; - _UNPACKpart(OFFSET_W, dest, *data); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V3_16s(u32 *dest, u32 *data, int size) -{ - s16 *sdata = (s16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata++)) size--; - _UNPACKpart(OFFSET_W, dest, *sdata); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V3_16u(u32 *dest, u32 *data, int size) -{ - u16 *sdata = (u16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata++)) size--; - _UNPACKpart(OFFSET_W, dest, *sdata); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V3_8s(u32 *dest, u32 *data, int size) -{ - s8 *cdata = (s8*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata++)) size--; - _UNPACKpart(OFFSET_W, dest, *cdata); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V3_8u(u32 *dest, u32 *data, int size) -{ - u8 *cdata = (u8*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata++)) size--; - _UNPACKpart(OFFSET_W, dest, *cdata); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V4_32(u32 *dest, u32 *data , int size) -{ - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *data++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *data++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *data++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *data)) size--; - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V4_16s(u32 *dest, u32 *data, int size) -{ - s16 *sdata = (s16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest , *sdata)) size--; - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V4_16u(u32 *dest, u32 *data, int size) -{ - u16 *sdata = (u16*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *sdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest, *sdata)) size--; - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V4_8s(u32 *dest, u32 *data, int size) -{ - s8 *cdata = (s8*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest, *cdata)) size--; - if (_vifRegs->offset == 4) _vifRegs->offset = 0; -} - -void __fastcall UNPACK_V4_8u(u32 *dest, u32 *data, int size) -{ - u8 *cdata = (u8*)data; - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, *cdata++)) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest, *cdata)) size--; + _UNPACKpart(OFFSET_X, *dest++, *data++, size); + _UNPACKpart(OFFSET_Y, *dest++, *data++, size); + _UNPACKpart(OFFSET_Z, *dest++, *data++, size); + _UNPACKpart(OFFSET_W, *dest , *data, size); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - - if (size > 0) if (_UNPACKpart(OFFSET_X, dest++, ((*data & 0x001f) << 3))) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Y, dest++, ((*data & 0x03e0) >> 2))) size--; - if (size > 0) if (_UNPACKpart(OFFSET_Z, dest++, ((*data & 0x7c00) >> 7))) size--; - if (size > 0) if (_UNPACKpart(OFFSET_W, dest , ((*data & 0x8000) >> 8))) size--; + _UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3), size); + _UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2), size); + _UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7), size); + _UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8), size); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } +void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) +{ + UNPACK_S(dest, data, size); +} + +void __fastcall UNPACK_S_16s(u32 *dest, u32 *data, int size) +{ + s16 *sdata = (s16*)data; + UNPACK_S(dest, sdata, size); +} + +void __fastcall UNPACK_S_16u(u32 *dest, u32 *data, int size) +{ + u16 *sdata = (u16*)data; + UNPACK_S(dest, sdata, size); +} + +void __fastcall UNPACK_S_8s(u32 *dest, u32 *data, int size) +{ + s8 *cdata = (s8*)data; + UNPACK_S(dest, cdata, size); +} + +void __fastcall UNPACK_S_8u(u32 *dest, u32 *data, int size) +{ + u8 *cdata = (u8*)data; + UNPACK_S(dest, cdata, size); +} + +void __fastcall UNPACK_V2_32(u32 *dest, u32 *data, int size) +{ + UNPACK_V2(dest, data, size); +} + +void __fastcall UNPACK_V2_16s(u32 *dest, u32 *data, int size) +{ + s16 *sdata = (s16*)data; + UNPACK_V2(dest, sdata, size); +} + +void __fastcall UNPACK_V2_16u(u32 *dest, u32 *data, int size) +{ + u16 *sdata = (u16*)data; + UNPACK_V2(dest, sdata, size); +} + +void __fastcall UNPACK_V2_8s(u32 *dest, u32 *data, int size) +{ + s8 *cdata = (s8*)data; + UNPACK_V2(dest, cdata, size); +} + +void __fastcall UNPACK_V2_8u(u32 *dest, u32 *data, int size) +{ + u8 *cdata = (u8*)data; + UNPACK_V2(dest, cdata, size); +} + +void __fastcall UNPACK_V3_32(u32 *dest, u32 *data, int size) +{ + UNPACK_V3(dest, data, size); +} + +void __fastcall UNPACK_V3_16s(u32 *dest, u32 *data, int size) +{ + s16 *sdata = (s16*)data; + UNPACK_V3(dest, sdata, size); +} + +void __fastcall UNPACK_V3_16u(u32 *dest, u32 *data, int size) +{ + u16 *sdata = (u16*)data; + UNPACK_V3(dest, sdata, size); +} + +void __fastcall UNPACK_V3_8s(u32 *dest, u32 *data, int size) +{ + s8 *cdata = (s8*)data; + UNPACK_V3(dest, cdata, size); +} + +void __fastcall UNPACK_V3_8u(u32 *dest, u32 *data, int size) +{ + u8 *cdata = (u8*)data; + UNPACK_V3(dest, cdata, size); +} + +void __fastcall UNPACK_V4_32(u32 *dest, u32 *data , int size) +{ + UNPACK_V4(dest, data, size); +} + +void __fastcall UNPACK_V4_16s(u32 *dest, u32 *data, int size) +{ + s16 *sdata = (s16*)data; + UNPACK_V4(dest, sdata, size); +} + +void __fastcall UNPACK_V4_16u(u32 *dest, u32 *data, int size) +{ + u16 *sdata = (u16*)data; + UNPACK_V4(dest, sdata, size); +} + +void __fastcall UNPACK_V4_8s(u32 *dest, u32 *data, int size) +{ + s8 *cdata = (s8*)data; + UNPACK_V4(dest, cdata, size); +} + +void __fastcall UNPACK_V4_8u(u32 *dest, u32 *data, int size) +{ + u8 *cdata = (u8*)data; + UNPACK_V4(dest, cdata, size); +} + static __forceinline int mfifoVIF1rbTransfer() { u32 maddr = psHu32(DMAC_RBOR); From 4d8769ecc530827790b404ec000809d2a6b9d366 Mon Sep 17 00:00:00 2001 From: sudonim1 Date: Sun, 5 Apr 2009 14:19:13 +0000 Subject: [PATCH 004/143] Implemented another block lookup method. Fixed an unused instruction in the emitter. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@913 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/BaseblockEx.cpp | 23 ++++++++++++++++++++--- pcsx2/x86/BaseblockEx.h | 2 +- pcsx2/x86/ix86/ix86.inl | 2 +- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/pcsx2/x86/BaseblockEx.cpp b/pcsx2/x86/BaseblockEx.cpp index 22a6765e60..bb933f301d 100644 --- a/pcsx2/x86/BaseblockEx.cpp +++ b/pcsx2/x86/BaseblockEx.cpp @@ -70,10 +70,27 @@ int BaseBlocks::LastIndex(u32 startpc) const return imin; } -BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) const +BASEBLOCKEX* BaseBlocks::GetByX86(uptr ip) { - // TODO - return 0; + if (0 == blocks.size()) + return 0; + + int imin = 0, imax = blocks.size() - 1, imid; + + while(imin != imax) { + imid = (imin+imax+1)>>1; + + if (blocks[imid].fnptr > ip) + imax = imid - 1; + else + imin = imid; + } + + if (ip < blocks[imin].fnptr || + ip >= blocks[imin].fnptr + blocks[imin].x86size) + return 0; + + return &blocks[imin]; } void BaseBlocks::Link(u32 pc, uptr jumpptr) diff --git a/pcsx2/x86/BaseblockEx.h b/pcsx2/x86/BaseblockEx.h index 6ebe592d5e..ab52f311ae 100644 --- a/pcsx2/x86/BaseblockEx.h +++ b/pcsx2/x86/BaseblockEx.h @@ -73,7 +73,7 @@ public: BASEBLOCKEX* New(u32 startpc, uptr fnptr); int LastIndex (u32 startpc) const; - BASEBLOCKEX* GetByX86(uptr ip) const; + BASEBLOCKEX* GetByX86(uptr ip); inline int Index (u32 startpc) const { diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index ae4f5829af..6a6376de3c 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -2356,7 +2356,7 @@ emitterT void eAND32RmtoR( x86IntRegType to, x86IntRegType from ) emitterT void eAND32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) { RexRB(0,to,from); - write16( 0x23 ); + write8( 0x23 ); WriteRmOffsetFrom(to,from,offset); } From 014b5c667192e6d6ddd4b3999f43a396dd905d45 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Sun, 5 Apr 2009 18:37:06 +0000 Subject: [PATCH 005/143] LilyPad: Fixed "Swap with Pad 1" git-svn-id: http://pcsx2.googlecode.com/svn/trunk@914 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/Config.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/LilyPad/Config.cpp b/plugins/LilyPad/Config.cpp index 99769d6c85..39e9e30ad1 100644 --- a/plugins/LilyPad/Config.cpp +++ b/plugins/LilyPad/Config.cpp @@ -1867,7 +1867,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L InsertMenuItemW(hMenu, index, 1, &info); } else { - info.wID = port2+2*slot2; + info.wID = port2+2*slot2+1; wsprintfW(text, L"Swap with %s", pad); InsertMenuItemW(hMenu, 0, 1, &info); } @@ -1879,6 +1879,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L DestroyMenu(hMenu); if (!res) break; if (res > 0) { + res--; slot2 = res / 2; port2 = res&1; PadConfig padCfgTemp = config.padConfigs[port1][slot1]; From 923802f9367a3ae60cdb1b2c087e9341b7b7b8b0 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 6 Apr 2009 07:32:10 +0000 Subject: [PATCH 006/143] microVU: more recompiler first-pass implementation stuff... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@915 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVU0micro.cpp | 33 +++++++++++++-- pcsx2/x86/iVU1micro.cpp | 28 ++++++++++++- pcsx2/x86/microVU.cpp | 26 ++++++------ pcsx2/x86/microVU_Alloc.h | 1 + pcsx2/x86/microVU_Analyze.inl | 14 +++++++ pcsx2/x86/microVU_Compile.inl | 26 ++++-------- pcsx2/x86/microVU_Lower.inl | 78 ++++++++++++++++++++++++++++++----- pcsx2/x86/microVU_Misc.h | 24 ++++++++--- 8 files changed, 180 insertions(+), 50 deletions(-) diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index 4b70381294..2dfaac7669 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -23,11 +23,12 @@ #include "VUmicro.h" #include "iVUzerorec.h" +#ifndef PCSX2_MICROVU_ namespace VU0micro { - void recAlloc() - { - SuperVUAlloc(0); + void recAlloc() + { + SuperVUAlloc(0); } void __fastcall recClear(u32 Addr, u32 Size) @@ -62,6 +63,32 @@ namespace VU0micro FreezeXMMRegs(0); } } +#else + +extern void initVUrec(VURegs* vuRegs, const int vuIndex); +extern void closeVUrec(const int vuIndex); +extern void resetVUrec(const int vuIndex); +extern void clearVUrec(u32 addr, u32 size, const int vuIndex); +extern void runVUrec(u32 startPC, u32 cycles, const int vuIndex); + +namespace VU0micro +{ + void recAlloc() { initVUrec(&VU0, 0); } + void __fastcall recClear(u32 Addr, u32 Size) { clearVUrec(Addr, Size, 0); } + void recShutdown() { closeVUrec(0); } + static void recReset() { resetVUrec(0); x86FpuState = FPU_STATE; } + static void recStep() {} + static void recExecuteBlock() + { + if((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return; + + FreezeXMMRegs(1); + runVUrec(VU0.VI[REG_TPC].UL & 0xfff, 0xffffffff, 0); + FreezeXMMRegs(0); + } + +} +#endif using namespace VU0micro; diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 2c4cc00bb4..87f3099d7b 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -29,7 +29,7 @@ #ifdef _DEBUG extern u32 vudump; #endif - +#ifndef PCSX2_MICROVU_ namespace VU1micro { void recAlloc() @@ -121,6 +121,32 @@ namespace VU1micro FreezeXMMRegs(0); } } +#else + +extern void initVUrec(VURegs* vuRegs, const int vuIndex); +extern void closeVUrec(const int vuIndex); +extern void resetVUrec(const int vuIndex); +extern void clearVUrec(u32 addr, u32 size, const int vuIndex); +extern void runVUrec(u32 startPC, u32 cycles, const int vuIndex); + +namespace VU1micro +{ + void recAlloc() { initVUrec(&VU1, 1); } + void __fastcall recClear(u32 Addr, u32 Size) { clearVUrec(Addr, Size, 1); } + void recShutdown() { closeVUrec(1); } + static void recReset() { resetVUrec(1); x86FpuState = FPU_STATE; } + static void recStep() {} + static void recExecuteBlock() { + + if((VU0.VI[REG_VPU_STAT].UL & 0x100) == 0) return; + assert( (VU1.VI[REG_TPC].UL&7) == 0 ); + + FreezeXMMRegs(1); + runVUrec(VU1.VI[REG_TPC].UL & 0x3fff, 0xffffffff, 1); + FreezeXMMRegs(0); + } +} +#endif using namespace VU1micro; diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 4613a13acb..39e9697f38 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -42,7 +42,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->index = vuIndex; mVU->microSize = (vuIndex ? 0x4000 : 0x1000); mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8; - mVU->cacheAddr = 0xC0000000 + (vuIndex ? mVU->cacheSize : 0); + mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000); mVU->cache = NULL; mVUreset(); @@ -55,16 +55,16 @@ microVUt(void) mVUreset() { mVUclose(); // Close // Create Block Managers - for (int i; i <= mVU->prog.max; i++) { - for (u32 j; j < (mVU->progSize / 2); j++) { + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { mVU->prog.prog[i].block[j] = new microBlockManager(); } } // Dynarec Cache - mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0x10000000, (vuIndex ? "Micro VU1" : "Micro VU0")); - if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - + mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); + if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); + // Other Variables memset(&mVU->prog, 0, sizeof(mVU->prog)); mVU->prog.finished = 1; @@ -81,8 +81,8 @@ microVUt(void) mVUclose() { if ( mVU->cache ) { HostSys::Munmap( mVU->cache, mVU->cacheSize ); mVU->cache = NULL; } // Delete Block Managers - for (int i; i <= mVU->prog.max; i++) { - for (u32 j; j < (mVU->progSize / 2); j++) { + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { if (mVU->prog.prog[i].block[j]) delete mVU->prog.prog[i].block[j]; } } @@ -277,27 +277,27 @@ extern "C" { // Wrapper Functions - Called by other parts of the Emu //------------------------------------------------------------------ -__forceinline void initVUrec(VURegs* vuRegs, const int vuIndex) { +void initVUrec(VURegs* vuRegs, const int vuIndex) { if (!vuIndex) mVUinit<0>(vuRegs); else mVUinit<1>(vuRegs); } -__forceinline void closeVUrec(const int vuIndex) { +void closeVUrec(const int vuIndex) { if (!vuIndex) mVUclose<0>(); else mVUclose<1>(); } -__forceinline void resetVUrec(const int vuIndex) { +void resetVUrec(const int vuIndex) { if (!vuIndex) mVUreset<0>(); else mVUreset<1>(); } -__forceinline void clearVUrec(u32 addr, u32 size, const int vuIndex) { +void clearVUrec(u32 addr, u32 size, const int vuIndex) { if (!vuIndex) mVUclear<0>(addr, size); else mVUclear<1>(addr, size); } -__forceinline void runVUrec(u32 startPC, u32 cycles, const int vuIndex) { +void runVUrec(u32 startPC, u32 cycles, const int vuIndex) { if (!vuIndex) startVU0(startPC, cycles); else startVU1(startPC, cycles); } diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index c539eb1297..f039c0a980 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -54,6 +54,7 @@ struct microAllocInfo { u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block u32 curPC; // Current PC + u32 startPC; // Start PC for Cur Block u32 info[pSize]; // bit 00 = Lower Instruction is NOP // bit 01 // bit 02 diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index f35299ab28..a493da8968 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -143,4 +143,18 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { analyzePreg(xCycles); } +//------------------------------------------------------------------ +// Sflag - Status Flag Opcodes +//------------------------------------------------------------------ + +#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } +#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; } } + +microVUt(void) mVUanalyzeSflag(int It) { + microVU* mVU = mVUx; + if (!It) { mVUinfo |= _isNOP; return; } + mVUinfo |= _isSflag; + analyzeVIreg2(It, 1); +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 49c517de5e..686e4a6563 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -19,18 +19,6 @@ #pragma once #ifdef PCSX2_MICROVU -#ifdef mVUdebug -#define mVUdebugStuff1() { \ - if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ - if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ - if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ - if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \ - if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ -} -#else -#define mVUdebugStuff1() {} -#endif - #define createBlock(blockEndPtr) { \ block.pipelineState = pipelineState; \ block.x86ptrStart = x86ptrStart; \ @@ -59,19 +47,18 @@ microVUt(void) mVUsetCycles() { microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { microVU* mVU = mVUx; microBlock block; - int branch; iPC = startPC / 4; - + // Searches for Existing Compiled Block (if found, then returns; else, compile) microBlock* pblock = mVUblock[iPC]->search(pipelineState, pState); if (block) { x86SetPtr(pblock->x86ptrEnd); return; } // First Pass setCode(); - branch = 0; - mVUbranch = 0; - mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage - for (;;) { + mVUbranch = 0; + mVUstartPC = iPC; + mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage + for (int branch = 0;; ) { startLoop(); mVUopU(); if (curI & _Ebit_) { branch = 1; } @@ -89,6 +76,9 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, iPC = startPC; setCode(); for (bool x = 1; x; ) { + // + // ToDo: status/mac flag stuff + // if (isEOB) { x = 0; } else if (isBranch) { mVUopU(); incPC(2); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 8e7859e033..a7e8438c24 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -23,6 +23,10 @@ // Micro VU Micromode Lower instructions //------------------------------------------------------------------ +//------------------------------------------------------------------ +// DIV/SQRT/RSQRT +//------------------------------------------------------------------ + #define testZero(xmmReg, xmmTemp, gprTemp) { \ SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \ SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if zero */ \ @@ -128,6 +132,10 @@ microVUf(void) mVU_RSQRT() { } } +//------------------------------------------------------------------ +// EATAN/EEXP/ELENG/ERCPR/ERLENG/ERSADD/ERSQRT/ESADD/ESIN/ESQRT/ESUM +//------------------------------------------------------------------ + #define EATANhelper(addr) { \ SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ SSE_MULSS_XMM_to_XMM(xmmT1, xmmFs); \ @@ -401,6 +409,10 @@ microVUf(void) mVU_ESUM() { } } +//------------------------------------------------------------------ +// FCAND/FCEQ/FCGET/FCOR/FCSET +//------------------------------------------------------------------ + microVUf(void) mVU_FCAND() { microVU* mVU = mVUx; if (!recPass) {} @@ -456,6 +468,10 @@ microVUf(void) mVU_FCSET() { } } +//------------------------------------------------------------------ +// FMAND/FMEQ/FMOR +//------------------------------------------------------------------ + microVUf(void) mVU_FMAND() { microVU* mVU = mVUx; if (!recPass) {} @@ -491,9 +507,13 @@ microVUf(void) mVU_FMOR() { } } +//------------------------------------------------------------------ +// FSAND/FSEQ/FSOR/FSSET +//------------------------------------------------------------------ + microVUf(void) mVU_FSAND() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); AND16ItoR(gprT1, _Imm12_); @@ -503,7 +523,7 @@ microVUf(void) mVU_FSAND() { microVUf(void) mVU_FSEQ() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); XOR16ItoR(gprT1, _Imm12_); @@ -515,7 +535,7 @@ microVUf(void) mVU_FSEQ() { microVUf(void) mVU_FSOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSflag(_Ft_); } else { mVUallocSFLAGa(gprT1, fvsInstance); OR16ItoR(gprT1, _Imm12_); @@ -536,6 +556,10 @@ microVUf(void) mVU_FSSET() { } } +//------------------------------------------------------------------ +// IADD/IADDI/IADDIU/IAND/IOR/ISUB/ISUBIU +//------------------------------------------------------------------ + microVUf(void) mVU_IADD() { microVU* mVU = mVUx; if (!recPass) {} @@ -623,6 +647,10 @@ microVUf(void) mVU_ISUBIU() { } } +//------------------------------------------------------------------ +// MOVE/MFIR/MFP/MTIR/MR32 +//------------------------------------------------------------------ + microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } @@ -672,6 +700,10 @@ microVUf(void) mVU_MR32() { } } +//------------------------------------------------------------------ +// ILW/ILWR +//------------------------------------------------------------------ + microVUf(void) mVU_ILW() { microVU* mVU = mVUx; if (!recPass) { /*If (!_Ft_) nop();*/ } @@ -709,6 +741,10 @@ microVUf(void) mVU_ILWR() { } } +//------------------------------------------------------------------ +// ISW/ISWR +//------------------------------------------------------------------ + microVUf(void) mVU_ISW() { microVU* mVU = mVUx; if (!recPass) {} @@ -757,6 +793,10 @@ microVUf(void) mVU_ISWR() { } } +//------------------------------------------------------------------ +// LQ/LQD/LQI +//------------------------------------------------------------------ + microVUf(void) mVU_LQ() { microVU* mVU = mVUx; if (!recPass) { /*If (!_Ft_) nop();*/ } @@ -818,6 +858,10 @@ microVUf(void) mVU_LQI() { } } +//------------------------------------------------------------------ +// SQ/SQD/SQI +//------------------------------------------------------------------ + microVUf(void) mVU_SQ() { microVU* mVU = mVUx; if (!recPass) {} @@ -875,6 +919,10 @@ microVUf(void) mVU_SQI() { } } +//------------------------------------------------------------------ +// RINIT/RGET/RNEXT/RXOR +//------------------------------------------------------------------ + microVUf(void) mVU_RINIT() { microVU* mVU = mVUx; if (!recPass) {} @@ -938,21 +986,27 @@ microVUf(void) mVU_RXOR() { } } +//------------------------------------------------------------------ +// WaitP/WaitQ +//------------------------------------------------------------------ + microVUf(void) mVU_WAITP() { microVU* mVU = mVUx; - if (!recPass) {} - else {} + if (!recPass) { mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); } } microVUf(void) mVU_WAITQ() { microVU* mVU = mVUx; - if (!recPass) {} - else {} + if (!recPass) { mVUstall = aMax(mVUstall, mVUregs.q); } } +//------------------------------------------------------------------ +// XTOP/XITOP +//------------------------------------------------------------------ + microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; return; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->top); mVUallocVIb(gprT1, _Ft_); @@ -961,13 +1015,17 @@ microVUf(void) mVU_XTOP() { microVUf(void) mVU_XITOP() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; return; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->itop ); mVUallocVIb(gprT1, _Ft_); } } +//------------------------------------------------------------------ +// XGkick +//------------------------------------------------------------------ + microVUt(void) __fastcall mVU_XGKICK_(u32 addr) { microVU* mVU = mVUx; u32 *data = (u32*)(mVU->regs->Mem + (addr&0x3fff)); @@ -990,7 +1048,7 @@ microVUf(void) mVU_XGKICK() { } //------------------------------------------------------------------ -// Branches +// Branches/Jumps //------------------------------------------------------------------ microVUf(void) mVU_B() { diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index ce9272f201..90232a5088 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -66,9 +66,9 @@ declareAllVariables //------------------------------------------------------------------ // Helper Macros //------------------------------------------------------------------ -#define _Ft_ ((mVU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ ((mVU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ ((mVU->code >> 6) & 0x1F) // The sa part of the instruction register +#define _Ft_ ((mVU->code >> 16) & 0x1F) // The ft/it part of the instruction register +#define _Fs_ ((mVU->code >> 11) & 0x1F) // The fs/is part of the instruction register +#define _Fd_ ((mVU->code >> 6) & 0x1F) // The fd/id part of the instruction register #define _X ((mVU->code>>24) & 0x1) #define _Y ((mVU->code>>23) & 0x1) @@ -144,11 +144,12 @@ declareAllVariables #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUstall mVUallocInfo.maxStall -#define mVUdivFlag mVUallocInfo.divFlag -#define mVUdivFlagT mVUallocInfo.divFlagTimer +//#define mVUdivFlag mVUallocInfo.divFlag +//#define mVUdivFlagT mVUallocInfo.divFlagTimer #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp #define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2] +#define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC #define xPC ((iPC / 2) * 8) #define incCycles(x) { mVUcycles += x; } @@ -157,6 +158,7 @@ declareAllVariables #define _isBranch (1<<1) // Cur Instruction is a Branch #define _isEOB (1<<2) // End of Block #define _isBdelay (1<<3) // Cur Instruction in Branch Delay slot +#define _isSflag (1<<4) // Cur Instruction uses status flag #define _writeQ (1<<5) #define _readQ (1<<6) #define _writeP (1<<7) @@ -177,6 +179,7 @@ declareAllVariables #define isBranch (mVUinfo & (1<<1)) #define isEOB (mVUinfo & (1<<2)) #define isBdelay (mVUinfo & (1<<3)) +#define isSflag (mVUinfo & (1<<4)) #define writeQ ((mVUinfo >> 5) & 1) #define readQ ((mVUinfo >> 6) & 1) #define writeP ((mVUinfo >> 7) & 1) @@ -200,3 +203,14 @@ declareAllVariables #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) +#ifdef mVUdebug +#define mVUdebugStuff1() { \ + if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ + if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ + if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ + if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \ + if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ +} +#else +#define mVUdebugStuff1() {} +#endif From 60e7428671a37168c5cc3fcb37e84908f3f41d69 Mon Sep 17 00:00:00 2001 From: refraction Date: Mon, 6 Apr 2009 23:48:59 +0000 Subject: [PATCH 007/143] Did some testing on the V3_# unpacks, they do some strange stuff for what goes in the W vector every 6qw of original data. Also fixed the use of the size variable so Xmen works again. Note: Nobody will notice this as SSE unpacks are forced on (for now) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@916 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 11 +++++++++-- pcsx2/VifDma.cpp | 9 ++++++--- pcsx2/VifDma.h | 2 +- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index a2ebe6f961..3e6626391f 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -357,8 +357,10 @@ static void _UNPACKpart(u32 offnum, u32 &x, T y) } template -static void _UNPACKpart(u32 offnum, u32 &x, T y, int &size) +static void _UNPACKpart(u32 offnum, u32 &x, T y, int size) { + if(size == 0) return; + if (_vifRegs->offset == offnum) { switch (offnum) @@ -409,7 +411,12 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) _UNPACKpart(OFFSET_X, *dest++, *data++, size); _UNPACKpart(OFFSET_Y, *dest++, *data++, size); _UNPACKpart(OFFSET_Z, *dest++, *data++, size); - _UNPACKpart(OFFSET_W, *dest, *data); + //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) + if((_vif->qwcalign % 24) == 0) + _UNPACKpart(OFFSET_W, *dest, 0); + else + _UNPACKpart(OFFSET_W, *dest, *data); + if (_vifRegs->offset == 4) _vifRegs->offset = 0; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 02361285c2..32840541c0 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -440,7 +440,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma Console::WriteLn("Unpack align offset = 0"); } destinc = (4 - ft->qsize) + unpacksize; - + vif->qwcalign += unpacksize * ft->dsize; func(dest, (u32*)cdata, unpacksize); size -= unpacksize * ft->dsize; cdata += unpacksize * ft->dsize; @@ -482,6 +482,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma while ((size >= ft->gsize) && (vifRegs->num > 0)) { + vif->qwcalign += ft->gsize; func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -595,6 +596,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma while ((size >= ft->gsize) && (vifRegs->num > 0)) { + vif->qwcalign += ft->gsize; //Must do this before the transfer, else the confusing packets dont go right :P func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -649,6 +651,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ + vif->qwcalign += (size / ft->dsize) * ft->dsize; func(dest, (u32*)cdata, size / ft->dsize); size = 0; @@ -786,7 +789,7 @@ static __forceinline void vif0UNPACK(u32 *data) len = ((((32 >> vl) * (vn + 1)) * n) + 31) >> 5; } - vif0.wl = 0; + vif0.qwcalign = 0; vif0.cl = 0; vif0.tag.cmd = vif0.cmd; vif0.tag.addr &= 0xfff; @@ -1516,9 +1519,9 @@ static __forceinline void vif1UNPACK(u32 *data) else vif1.tag.addr = vif1Regs->code & 0x3ff; + vif1.qwcalign = 0; vif1.cl = 0; vif1.tag.addr <<= 4; - vif1.tag.cmd = vif1.cmd; } diff --git a/pcsx2/VifDma.h b/pcsx2/VifDma.h index a7fed1c30b..6af78214e7 100644 --- a/pcsx2/VifDma.h +++ b/pcsx2/VifDma.h @@ -32,7 +32,7 @@ struct vifStruct { int cmd; int irq; int cl; - int wl; + int qwcalign; u8 usn; // The next three should be boolean, and will be next time I break savestate compatability. --arcum42 From 2588dc0309e0025da2181fffda9f7bad2ff431fe Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 7 Apr 2009 08:42:25 +0000 Subject: [PATCH 008/143] Emitter renovations of a large scale sort (only up to phase 1). Intel's 'group 1' instructions now use a completely new ModRM/SIB encoder, along with a nicely object-oriented interface. I created some macros to retain backward compat for now, and will continue implementing the rest of the instructions later as I have time. Also: Removed x86/64 instructions from the emitter. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@917 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/iMMI.cpp | 4 +- pcsx2/x86/iR3000A.cpp | 2 +- pcsx2/x86/iVUmicroLower.cpp | 94 +- pcsx2/x86/ix86-32/iCore-32.cpp | 16 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 4 +- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 4 +- pcsx2/x86/ix86-32/recVTLB.cpp | 48 +- pcsx2/x86/ix86/ix86.cpp | 196 ++++ pcsx2/x86/ix86/ix86.h | 27 +- pcsx2/x86/ix86/ix86.inl | 1115 +------------------- pcsx2/x86/ix86/ix86_group1.inl | 258 +++++ pcsx2/x86/ix86/ix86_macros.h | 109 +- pcsx2/x86/ix86/ix86_mmx.inl | 8 +- pcsx2/x86/ix86/ix86_sse.inl | 75 +- pcsx2/x86/ix86/ix86_sse_helpers.h | 30 +- pcsx2/x86/ix86/ix86_types.h | 209 ++++ 17 files changed, 905 insertions(+), 1298 deletions(-) create mode 100644 pcsx2/x86/ix86/ix86_group1.inl diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index a11a52c051..2618f59630 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2982,6 +2982,10 @@ /> + + diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 4fe4632b0a..a9edfba6c2 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -1982,8 +1982,8 @@ void recQFSRV() MOV32MtoR(EAX, (uptr)&cpuRegs.sa); SHL32ItoR(EAX, 4); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros) - AND32I8toR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) - ADD32ItoEAX((uptr)x86Ptr[0] + 7); // ADD32 = 5 bytes, JMPR = 2 bytes + AND32ItoR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) + ADD32ItoR(EAX, (uptr)x86Ptr[0] + 7); // ADD32 = 5 bytes, JMPR = 2 bytes JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases) // Case 0: diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index a1b1ec756d..23d0d45bf0 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -316,7 +316,7 @@ void _psxMoveGPRtoM(u32 to, int fromgpr) void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr) { if( PSX_IS_CONST1(fromgpr) ) - MOV32ItoRmOffset( to, g_psxConstRegs[fromgpr], 0 ); + MOV32ItoRm( to, g_psxConstRegs[fromgpr] ); else { // check x86 MOV32MtoR(EAX, (uptr)&psxRegs.GPR.r[ fromgpr ] ); diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 58156d0926..3271d69681 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -609,31 +609,31 @@ void _loadEAX(VURegs *VU, int x86reg, uptr offset, int info) if( x86reg >= 0 ) { switch(_X_Y_Z_W) { case 3: // ZW - SSE_MOVHPS_RmOffset_to_XMM(EEREC_T, x86reg, offset+8); + SSE_MOVHPS_Rm_to_XMM(EEREC_T, x86reg, offset+8); break; case 6: // YZ - SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0x9c); + SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0x9c); SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78); break; case 8: // X - SSE_MOVSS_RmOffset_to_XMM(EEREC_TEMP, x86reg, offset); + SSE_MOVSS_Rm_to_XMM(EEREC_TEMP, x86reg, offset); SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); break; case 9: // XW - SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0xc9); + SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0xc9); SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2); break; case 12: // XY - SSE_MOVLPS_RmOffset_to_XMM(EEREC_T, x86reg, offset); + SSE_MOVLPS_Rm_to_XMM(EEREC_T, x86reg, offset); break; case 15: - if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_T, x86reg, offset); - else SSE_MOVUPSRmtoROffset(EEREC_T, x86reg, offset); + if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_T, x86reg, offset); + else SSE_MOVUPSRmtoR(EEREC_T, x86reg, offset); break; default: - if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset); - else SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset); + if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_TEMP, x86reg, offset); + else SSE_MOVUPSRmtoR(EEREC_TEMP, x86reg, offset); VU_MERGE_REGS(EEREC_T, EEREC_TEMP); break; @@ -795,15 +795,15 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) if ( _Fs_ == 0 ) { if ( _XYZW_SS ) { u32 c = _W ? 0x3f800000 : 0; - if ( x86reg >= 0 ) MOV32ItoRmOffset(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0)))); + if ( x86reg >= 0 ) MOV32ItoRm(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0)))); else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c); } else { if ( x86reg >= 0 ) { - if ( _X ) MOV32ItoRmOffset(x86reg, 0x00000000, offset); - if ( _Y ) MOV32ItoRmOffset(x86reg, 0x00000000, offset+4); - if ( _Z ) MOV32ItoRmOffset(x86reg, 0x00000000, offset+8); - if ( _W ) MOV32ItoRmOffset(x86reg, 0x3f800000, offset+12); + if ( _X ) MOV32ItoRm(x86reg, 0x00000000, offset); + if ( _Y ) MOV32ItoRm(x86reg, 0x00000000, offset+4); + if ( _Z ) MOV32ItoRm(x86reg, 0x00000000, offset+8); + if ( _W ) MOV32ItoRm(x86reg, 0x3f800000, offset+12); } else { if ( _X ) MOV32ItoM(offset, 0x00000000); @@ -818,29 +818,29 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) switch ( _X_Y_Z_W ) { case 1: // W SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x27); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); break; case 2: // Z SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); else SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); break; case 3: // ZW - if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8); + if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); else SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S); break; case 4: // Y SSE2_PSHUFLW_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4e); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); else SSE_MOVSS_XMM_to_M32(offset+4, EEREC_TEMP); break; case 5: // YW SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xB1); SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset+4); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset+4); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVSS_XMM_to_M32(offset+4, EEREC_S); @@ -850,14 +850,14 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 6: // YZ SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xc9); - if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); + if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); else SSE_MOVLPS_XMM_to_M64(offset+4, EEREC_TEMP); break; case 7: // YZW SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x93); //ZYXW if ( x86reg >= 0 ) { - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+4); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVHPS_XMM_to_M64(offset+4, EEREC_TEMP); @@ -865,26 +865,26 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) } break; case 8: // X - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); break; case 9: // XW SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); else SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); break; case 10: //XZ SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); } else { SSE_MOVSS_XMM_to_M32(offset, EEREC_S); @@ -893,8 +893,8 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 11: //XZW if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset); - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); } else { SSE_MOVSS_XMM_to_M32(offset, EEREC_S); @@ -902,14 +902,14 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) } break; case 12: // XY - if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0); + if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); else SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); break; case 13: // XYW SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4b); //YXZW if ( x86reg >= 0 ) { - SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+0); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+0); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); } else { SSE_MOVHPS_XMM_to_M64(offset, EEREC_TEMP); @@ -919,8 +919,8 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) case 14: // XYZ SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) { - SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0); - SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8); + SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); } else { SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); @@ -929,11 +929,11 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) break; case 15: // XYZW if ( VU == &VU1 ) { - if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_S, offset+0); + if( x86reg >= 0 ) SSE_MOVAPSRtoRm(x86reg, EEREC_S, offset+0); else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); } else { - if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_S, offset+0); + if( x86reg >= 0 ) SSE_MOVUPSRtoRm(x86reg, EEREC_S, offset+0); else { if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_S); else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); @@ -1018,7 +1018,7 @@ void recVUMI_ILW(VURegs *VU, int info) } else { int fsreg = ALLOCVI(_Fs_, MODE_READ); - MOV32RmtoROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (uptr)VU->Mem + off); + MOV32RmtoR(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (uptr)VU->Mem + off); } } //------------------------------------------------------------------ @@ -1051,10 +1051,10 @@ void recVUMI_ISW( VURegs *VU, int info ) x86reg = recVUTransformAddr(fsreg, VU, _Fs_, imm); - if (_X) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem); - if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+4); - if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+8); - if (_W) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+12); + if (_X) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem); + if (_Y) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+4); + if (_Z) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+8); + if (_W) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+12); } } //------------------------------------------------------------------ @@ -1082,7 +1082,7 @@ void recVUMI_ILWR( VURegs *VU, int info ) } else { int fsreg = ALLOCVI(_Fs_, MODE_READ); - MOVZX32Rm16toROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (uptr)VU->Mem + off); + MOVZX32Rm16toR(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (uptr)VU->Mem + off); } } //------------------------------------------------------------------ @@ -1109,10 +1109,10 @@ void recVUMI_ISWR( VURegs *VU, int info ) int fsreg = ALLOCVI(_Fs_, MODE_READ); x86reg = recVUTransformAddr(fsreg, VU, _Fs_, 0); - if (_X) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem); - if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+4); - if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+8); - if (_W) MOV32RtoRmOffset(x86reg, ftreg, (uptr)VU->Mem+12); + if (_X) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem); + if (_Y) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+4); + if (_Z) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+8); + if (_W) MOV32RtoRm(x86reg, ftreg, (uptr)VU->Mem+12); } } //------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 1021644be3..d9a9e75664 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -1050,12 +1050,12 @@ void _recMove128MtoM(u32 to, u32 from) // fixme - see above function! void _recMove128RmOffsettoM(u32 to, u32 offset) { - MOV32RmtoROffset(EAX, ECX, offset); - MOV32RmtoROffset(EDX, ECX, offset+4); + MOV32RmtoR(EAX, ECX, offset); + MOV32RmtoR(EDX, ECX, offset+4); MOV32RtoM(to, EAX); MOV32RtoM(to+4, EDX); - MOV32RmtoROffset(EAX, ECX, offset+8); - MOV32RmtoROffset(EDX, ECX, offset+12); + MOV32RmtoR(EAX, ECX, offset+8); + MOV32RmtoR(EDX, ECX, offset+12); MOV32RtoM(to+8, EAX); MOV32RtoM(to+12, EDX); } @@ -1065,12 +1065,12 @@ void _recMove128MtoRmOffset(u32 offset, u32 from) { MOV32MtoR(EAX, from); MOV32MtoR(EDX, from+4); - MOV32RtoRmOffset(ECX, EAX, offset); - MOV32RtoRmOffset(ECX, EDX, offset+4); + MOV32RtoRm(ECX, EAX, offset); + MOV32RtoRm(ECX, EDX, offset+4); MOV32MtoR(EAX, from+8); MOV32MtoR(EDX, from+12); - MOV32RtoRmOffset(ECX, EAX, offset+8); - MOV32RtoRmOffset(ECX, EDX, offset+12); + MOV32RtoRm(ECX, EAX, offset+8); + MOV32RtoRm(ECX, EDX, offset+12); } static PCSX2_ALIGNED16(u32 s_ones[2]) = {0xffffffff, 0xffffffff}; diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 6d568c95b8..34d9662b9b 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -367,7 +367,7 @@ void _eeMoveGPRtoM(u32 to, int fromgpr) void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr) { if( GPR_IS_CONST1(fromgpr) ) - MOV32ItoRmOffset( to, g_cpuConstRegs[fromgpr].UL[0], 0 ); + MOV32ItoRm( to, g_cpuConstRegs[fromgpr].UL[0] ); else { int mmreg; @@ -380,7 +380,7 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr) } else { MOV32MtoR(EAX, (int)&cpuRegs.GPR.r[ fromgpr ].UL[ 0 ] ); - MOV32RtoRm(to, EAX ); + MOV32RtoRm( to, EAX ); } } } diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 83925362c3..1c39766598 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -2101,7 +2101,7 @@ void recLoad64( u32 bits, bool sign ) if ( _Imm_ != 0 ) ADD32ItoR( ECX, _Imm_ ); if( bits == 128 ) // force 16 byte alignment on 128 bit reads - AND32I8toR(ECX,0xF0); + AND32ItoR(ECX,~0x0F); // emitter automatically encodes this as an 8-bit sign-extended imm8 _eeOnLoadWrite(_Rt_); EEINST_RESETSIGNEXT(_Rt_); // remove the sign extension @@ -2198,7 +2198,7 @@ void recStore(u32 sz, bool edxAlreadyAssigned=false) if ( _Imm_ != 0 ) ADD32ItoR(ECX, _Imm_); if (sz==128) - AND32I8toR(ECX,0xF0); + AND32ItoR(ECX,~0x0F); vtlb_DynGenWrite(sz); } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index cbc567c68d..dd45f51d82 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -33,12 +33,12 @@ void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) { MOV32RmtoR(EAX,srcRm); MOV32RtoRm(destRm,EAX); - MOV32RmtoROffset(EAX,srcRm,4); - MOV32RtoRmOffset(destRm,EAX,4); - MOV32RmtoROffset(EAX,srcRm,8); - MOV32RtoRmOffset(destRm,EAX,8); - MOV32RmtoROffset(EAX,srcRm,12); - MOV32RtoRmOffset(destRm,EAX,12); + MOV32RmtoR(EAX,srcRm,4); + MOV32RtoRm(destRm,EAX,4); + MOV32RmtoR(EAX,srcRm,8); + MOV32RtoRm(destRm,EAX,8); + MOV32RmtoR(EAX,srcRm,12); + MOV32RtoRm(destRm,EAX,12); } /* @@ -121,8 +121,8 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) if( _hasFreeMMXreg() ) { const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,ECX,0); - MOVQRtoRmOffset(EDX,freereg,0); + MOVQRmtoR(freereg,ECX); + MOVQRtoRm(EDX,freereg); _freeMMXreg(freereg); } else @@ -130,8 +130,8 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) MOV32RmtoR(EAX,ECX); MOV32RtoRm(EDX,EAX); - MOV32RmtoROffset(EAX,ECX,4); - MOV32RtoRmOffset(EDX,EAX,4); + MOV32RmtoR(EAX,ECX,4); + MOV32RtoRm(EDX,EAX,4); } break; @@ -139,8 +139,8 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) if( _hasFreeXMMreg() ) { const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,ECX,0); - SSE2_MOVDQARtoRmOffset(EDX,freereg,0); + SSE2_MOVDQARmtoR(freereg,ECX); + SSE2_MOVDQARtoRm(EDX,freereg); _freeXMMreg(freereg); } else @@ -255,7 +255,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) { const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); MOVQMtoR(freereg,ppf); - MOVQRtoRmOffset(EDX,freereg,0); + MOVQRtoRm(EDX,freereg); _freeMMXreg(freereg); } else @@ -264,7 +264,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) MOV32RtoRm(EDX,EAX); MOV32MtoR(EAX,ppf+4); - MOV32RtoRmOffset(EDX,EAX,4); + MOV32RtoRm(EDX,EAX,4); } break; @@ -273,7 +273,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) { const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); SSE2_MOVDQA_M128_to_XMM( freereg, ppf ); - SSE2_MOVDQARtoRmOffset(EDX,freereg,0); + SSE2_MOVDQARtoRm(EDX,freereg); _freeXMMreg(freereg); } else @@ -406,8 +406,8 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) if( _hasFreeMMXreg() ) { const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,EDX,0); - MOVQRtoRmOffset(ECX,freereg,0); + MOVQRmtoR(freereg,EDX); + MOVQRtoRm(ECX,freereg); _freeMMXreg( freereg ); } else @@ -415,8 +415,8 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) MOV32RmtoR(EAX,EDX); MOV32RtoRm(ECX,EAX); - MOV32RmtoROffset(EAX,EDX,4); - MOV32RtoRmOffset(ECX,EAX,4); + MOV32RmtoR(EAX,EDX,4); + MOV32RtoRm(ECX,EAX,4); } break; @@ -424,8 +424,8 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) if( _hasFreeXMMreg() ) { const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,EDX,0); - SSE2_MOVDQARtoRmOffset(ECX,freereg,0); + SSE2_MOVDQARmtoR(freereg,EDX); + SSE2_MOVDQARtoRm(ECX,freereg); _freeXMMreg( freereg ); } else @@ -502,7 +502,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) if( _hasFreeMMXreg() ) { const int freereg = _allocMMXreg(-1, MMX_TEMP, 0); - MOVQRmtoROffset(freereg,EDX,0); + MOVQRmtoR(freereg,EDX); MOVQRtoM(ppf,freereg); _freeMMXreg( freereg ); } @@ -511,7 +511,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) MOV32RmtoR(EAX,EDX); MOV32RtoM(ppf,EAX); - MOV32RmtoROffset(EAX,EDX,4); + MOV32RmtoR(EAX,EDX,4); MOV32RtoM(ppf+4,EAX); } break; @@ -520,7 +520,7 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ) if( _hasFreeXMMreg() ) { const int freereg = _allocTempXMMreg( XMMT_INT, -1 ); - SSE2_MOVDQARmtoROffset(freereg,EDX,0); + SSE2_MOVDQARmtoR(freereg,EDX); SSE2_MOVDQA_XMM_to_M128(ppf,freereg); _freeXMMreg( freereg ); } diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 11acfce6b5..35d90c8079 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -39,3 +39,199 @@ PCSX2_ALIGNED16(u32 p2[4]); PCSX2_ALIGNED16(float f[4]); XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; + +namespace x86Emitter +{ + const x86IndexerType ptr; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + const x86Register x86Register::Empty( -1 ); + + const x86Register eax( 0 ); + const x86Register ebx( 3 ); + const x86Register ecx( 1 ); + const x86Register edx( 2 ); + const x86Register esi( 6 ); + const x86Register edi( 7 ); + const x86Register ebp( 5 ); + const x86Register esp( 4 ); + + const x86Register16 ax( 0 ); + const x86Register16 bx( 3 ); + const x86Register16 cx( 1 ); + const x86Register16 dx( 2 ); + const x86Register16 si( 6 ); + const x86Register16 di( 7 ); + const x86Register16 bp( 5 ); + const x86Register16 sp( 4 ); + + const x86Register8 al( 0 ); + const x86Register8 cl( 1 ); + const x86Register8 dl( 2 ); + const x86Register8 bl( 3 ); + const x86Register8 ah( 4 ); + const x86Register8 ch( 5 ); + const x86Register8 dh( 6 ); + const x86Register8 bh( 7 ); + + ////////////////////////////////////////////////////////////////////////////////////////// + // x86Register Method Implementations + // + x86ModRm x86Register::operator+( const x86Register& right ) const + { + return x86ModRm( *this, right ); + } + + x86ModRm x86Register::operator+( const x86ModRm& right ) const + { + return right + *this; + } + + ////////////////////////////////////////////////////////////////////////////////////////// + // ModSib Method Implementations + // + x86ModRm x86ModRm::FromIndexReg( x86Register index, int scale, int displacement ) + { + return x86ModRm( x86Register::Empty, index, scale, displacement ); + } + + x86Register x86ModRm::GetEitherReg() const + { + return Base.IsEmpty() ? Base : Index; + } + + x86ModRm& x86ModRm::Add( const x86Register& src ) + { + if( src == Index ) + { + Factor++; + } + else if( src == Base ) + { + // Compound the existing register reference into the Index/Scale pair. + Base = x86Register::Empty; + + if( src == Index ) + Factor++; + else + { + jASSUME( Index.IsEmpty() ); // or die if we already have an index! + Index = src; + Factor = 2; + } + } + else if( Base.IsEmpty() ) + Base = src; + else if( Index.IsEmpty() ) + Index = src; + else + assert( false ); // oops, only 2 regs allowed per ModRm! + + return *this; + } + + x86ModRm& x86ModRm::Add( const x86ModRm& src ) + { + Add( src.Base ); + Add( src.Displacement ); + + // If the factor is 1, we can just treat index like a base register also. + if( src.Factor == 1 ) + { + Add( src.Index ); + } + else if( Index.IsEmpty() ) + { + Index = src.Index; + Factor = 1; + } + else if( Index == src.Index ) + Factor++; + else + assert( false ); // oops, only 2 regs allowed! + + return *this; + } + + + x86ModRm x86ptr( x86Register base ) { return x86ModRm( base ); } + + // ------------------------------------------------------------------------ + // Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. + // Necessary because by default ModSib compounds registers into Index when possible. + // + void ModSib::Reduce() + { + // If no index reg, then nothing for us to do... + if( Index.IsEmpty() || Scale == 0 ) return; + + // The Scale has a series of valid forms, all shown here: + + switch( Scale ) + { + case 1: Scale = 0; break; + case 2: Scale = 1; break; + + case 3: // becomes [reg*2+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 1; + break; + + case 4: Scale = 2; break; + + case 5: // becomes [reg*4+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 2; + break; + + case 6: // invalid! + assert( false ); + break; + + case 7: // so invalid! + assert( false ); + break; + + case 8: Scale = 3; break; + case 9: // becomes [reg*8+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 3; + break; + } + } + + ModSib::ModSib( const x86ModRm& src ) : + Base( src.Base ), + Index( src.Index ), + Scale( src.Factor ), + Displacement( src.Displacement ) + { + Reduce(); + } + + ModSib::ModSib( x86Register base, x86Register index, int scale, s32 displacement ) : + Base( base ), + Index( index ), + Scale( scale ), + Displacement( displacement ) + { + Reduce(); + } + + ModSib::ModSib( s32 displacement ) : + Base(), + Index(), + Scale(0), + Displacement( displacement ) + { + } + + x86Register ModSib::GetEitherReg() const + { + return Base.IsEmpty() ? Base : Index; + } +} diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 797dc2eaf7..34a10fbe59 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -42,6 +42,11 @@ #define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) #define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) +// We use int param for offsets and then test them for validity in the recompiler. +// This helps catch programmer errors better than using an auto-truncated s8 parameter. +#define assertOffset8(ofs) assert( ofs < 128 && ofs >= -128 ) + + //------------------------------------------------------------------ // write functions //------------------------------------------------------------------ @@ -49,23 +54,27 @@ extern u8 *x86Ptr[EmitterId_Count]; extern u8 *j8Ptr[32]; extern u32 *j32Ptr[32]; -emitterT void write8( u8 val ) { - *x86Ptr[I] = (u8)val; +emitterT void write8( u8 val ) +{ + *x86Ptr[I] = (u8)val; x86Ptr[I]++; -} +} -emitterT void write16( u16 val ) { - *(u16*)x86Ptr[I] = (u16)val; +emitterT void write16( u16 val ) +{ + *(u16*)x86Ptr[I] = val; x86Ptr[I] += 2; } -emitterT void write24( u32 val ) { +emitterT void write24( u32 val ) +{ *x86Ptr[I]++ = (u8)(val & 0xff); *x86Ptr[I]++ = (u8)((val >> 8) & 0xff); *x86Ptr[I]++ = (u8)((val >> 16) & 0xff); } -emitterT void write32( u32 val ) { +emitterT void write32( u32 val ) +{ *(u32*)x86Ptr[I] = val; x86Ptr[I] += 4; } @@ -93,7 +102,6 @@ emitterT void ex86AlignExecutable( int align ); //------------------------------------------------------------------ // General Emitter Helper functions //------------------------------------------------------------------ -emitterT void WriteRmOffset(x86IntRegType to, int offset); emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); emitterT void ModRM( int mod, int reg, int rm ); emitterT void SibSB( int ss, int index, int base ); @@ -112,15 +120,12 @@ emitterT void eCALL32( u32 to ); emitterT void eLEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset); emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); emitterT void eNOP( void ); -emitterT void eAND32I8toR( x86IntRegType to, u8 from ); emitterT void eAND32ItoM( uptr to, u32 from ); emitterT void eLEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); -emitterT void eAND32I8toM( uptr to, u8 from ); emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); - #define MMXONLY(code) code #define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index 6a6376de3c..301401ae4b 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -34,39 +34,10 @@ #include "System.h" #include "ix86.h" -emitterT void WriteRmOffset(x86IntRegType to, s32 offset) -{ - if( (to&7) == ESP ) { - if( offset == 0 ) { - ModRM( 0, 0, 4 ); - SibSB( 0, ESP, 4 ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, 4 ); - SibSB( 0, ESP, 4 ); - write8(offset); - } - else { - ModRM( 2, 0, 4 ); - SibSB( 0, ESP, 4 ); - write32(offset); - } - } - else { - if( offset == 0 ) { - ModRM( 0, 0, to ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, to ); - write8(offset); - } - else { - ModRM( 2, 0, to ); - write32(offset); - } - } -} +#include "ix86_group1.inl" +// Note: the 'to' field can either be a register or a special opcode extension specifier +// depending on the opcode's encoding. emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) { if ((from&7) == ESP) { @@ -294,101 +265,6 @@ emitterT void eNOP( void ) // mov instructions / //////////////////////////////////// -/* mov r64 to r64 */ -emitterT void eMOV64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x89 ); - ModRM( 3, from, to ); -} - -/* mov r64 to m64 */ -emitterT void eMOV64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1, from); - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - write32( (u32)MEMADDR(to, 4) ); -} - -/* mov m64 to r64 */ -emitterT void eMOV64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - write32( (u32)MEMADDR(from, 4) ); -} - -/* mov imm32 to m64 */ -emitterT void eMOV64I32toM(uptr to, u32 from ) -{ - Rex(1, 0, 0, 0); - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -// mov imm64 to r64 -emitterT void eMOV64ItoR( x86IntRegType to, u64 from) -{ - RexB(1, to); - write8( 0xB8 | (to & 0x7) ); - write64( from ); -} - -/* mov imm32 to r64 */ -emitterT void eMOV64I32toR( x86IntRegType to, s32 from ) -{ - RexB(1, to); - write8( 0xC7 ); - ModRM( 0, 0, to ); - write32( from ); -} - -// mov imm64 to [r64+off] -emitterT void eMOV64ItoRmOffset( x86IntRegType to, u32 from, int offset) -{ - RexB(1,to); - write8( 0xC7 ); - WriteRmOffset(to, offset); - write32(from); -} - -// mov [r64+offset] to r64 -emitterT void eMOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(1, to, from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); -} - -/* mov [r64][r64*scale] to r64 */ -emitterT void eMOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - RexRXB(1, to, from2, from); - write8( 0x8B ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - -/* mov r64 to [r64+offset] */ -emitterT void eMOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(1,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); -} - -/* mov r64 to [r64][r64*scale] */ -emitterT void eMOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { - RexRXB(1, to, from2, from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - - /* mov r32 to r32 */ emitterT void eMOV32RtoR( x86IntRegType to, x86IntRegType from ) { @@ -423,21 +299,16 @@ emitterT void eMOV32MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -/* mov [r32] to r32 */ -emitterT void eMOV32RmtoR( x86IntRegType to, x86IntRegType from ) { - RexRB(0, to, from); - write8(0x8B); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) { +emitterT void eMOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) +{ RexRB(0, to, from); write8( 0x8B ); WriteRmOffsetFrom(to, from, offset); } /* mov [r32+r32*scale] to r32 */ -emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { +emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +{ RexRXB(0,to,from2,from); write8( 0x8B ); ModRM( 0, to, 0x4 ); @@ -445,7 +316,7 @@ emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType } // mov r32 to [r32<( 0x8B ); @@ -454,22 +325,9 @@ emitterT void eMOV32RmSOffsettoR( x86IntRegType to, x86IntRegType from1, int fro write32(from2); } -/* mov r32 to [r32] */ -emitterT void eMOV32RtoRm( x86IntRegType to, x86IntRegType from ) { - RexRB(0, from, to); - if ((to&7) == ESP) { - write8( 0x89 ); - ModRM( 0, from, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - } - else { - write8( 0x89 ); - ModRM( 0, from, to ); - } -} - /* mov r32 to [r32][r32*scale] */ -emitterT void eMOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { +emitterT void eMOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +{ RexRXB(0, to, from2, from); write8( 0x89 ); ModRM( 0, to, 0x4 ); @@ -494,16 +352,16 @@ emitterT void eMOV32ItoM(uptr to, u32 from ) } // mov imm32 to [r32+off] -emitterT void eMOV32ItoRmOffset( x86IntRegType to, u32 from, int offset) +emitterT void eMOV32ItoRm( x86IntRegType to, u32 from, int offset=0) { RexB(0,to); write8( 0xC7 ); - WriteRmOffset(to, offset); + WriteRmOffsetFrom(0, to, offset); write32(from); } // mov r32 to [r32+off] -emitterT void eMOV32RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +emitterT void eMOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) { RexRB(0,from,to); write8( 0x89 ); @@ -530,15 +388,7 @@ emitterT void eMOV16MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -emitterT void eMOV16RmtoR( x86IntRegType to, x86IntRegType from) -{ - write8( 0x66 ); - RexRB(0,to,from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void eMOV16RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { write8( 0x66 ); RexRB(0,to,from); @@ -546,7 +396,7 @@ emitterT void eMOV16RmtoROffset( x86IntRegType to, x86IntRegType from, int offse WriteRmOffsetFrom(to, from, offset); } -emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale ) +emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) { write8(0x66); RexRXB(0,to,from1,0); @@ -556,14 +406,6 @@ emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 fro write32(from2); } -emitterT void eMOV16RtoRm(x86IntRegType to, x86IntRegType from) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x89 ); - ModRM( 0, from, to ); -} - /* mov imm16 to m16 */ emitterT void eMOV16ItoM( uptr to, u16 from ) { @@ -575,7 +417,8 @@ emitterT void eMOV16ItoM( uptr to, u16 from ) } /* mov r16 to [r32][r32*scale] */ -emitterT void eMOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale) { +emitterT void eMOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +{ write8( 0x66 ); RexRXB(0,to,from2,from); write8( 0x89 ); @@ -591,7 +434,7 @@ emitterT void eMOV16ItoR( x86IntRegType to, u16 from ) } // mov imm16 to [r16+off] -emitterT void eMOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset) +emitterT void eMOV16ItoRm( x86IntRegType to, u16 from, u32 offset=0 ) { write8(0x66); RexB(0,to); @@ -601,7 +444,7 @@ emitterT void eMOV16ItoRmOffset( x86IntRegType to, u16 from, u32 offset) } // mov r16 to [r16+off] -emitterT void eMOV16RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +emitterT void eMOV16RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { write8(0x66); RexRB(0,from,to); @@ -627,22 +470,14 @@ emitterT void eMOV8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -/* mov [r32] to r8 */ -emitterT void eMOV8RmtoR(x86IntRegType to, x86IntRegType from) -{ - RexRB(0,to,from); - write8( 0x8A ); - WriteRmOffsetFrom(to, from, 0); -} - -emitterT void eMOV8RmtoROffset(x86IntRegType to, x86IntRegType from, int offset) +emitterT void eMOV8RmtoR(x86IntRegType to, x86IntRegType from, int offset=0) { RexRB(0,to,from); write8( 0x8A ); WriteRmOffsetFrom(to, from, offset); } -emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale ) +emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) { RexRXB(0,to,from1,0); write8( 0x8A ); @@ -651,13 +486,6 @@ emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from write32(from2); } -emitterT void eMOV8RtoRm(x86IntRegType to, x86IntRegType from) -{ - RexRB(0,from,to); - write8( 0x88 ); - WriteRmOffsetFrom(from, to, 0); -} - /* mov imm8 to m8 */ emitterT void eMOV8ItoM( uptr to, u8 from ) { @@ -676,7 +504,7 @@ emitterT void eMOV8ItoR( x86IntRegType to, u8 from ) } // mov imm8 to [r8+off] -emitterT void eMOV8ItoRmOffset( x86IntRegType to, u8 from, int offset) +emitterT void eMOV8ItoRm( x86IntRegType to, u8 from, int offset=0) { assert( to != ESP ); RexB(0,to); @@ -686,7 +514,7 @@ emitterT void eMOV8ItoRmOffset( x86IntRegType to, u8 from, int offset) } // mov r8 to [r8+off] -emitterT void eMOV8RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset) +emitterT void eMOV8RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) { assert( to != ESP ); RexRB(0,from,to); @@ -733,14 +561,7 @@ emitterT void eMOVSX32R16toR( x86IntRegType to, x86IntRegType from ) ModRM( 3, to, from ); } -emitterT void eMOVSX32Rm16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVSX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void eMOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); write16( 0xBF0F ); @@ -764,14 +585,7 @@ emitterT void eMOVZX32R8toR( x86IntRegType to, x86IntRegType from ) ModRM( 3, to, from ); } -emitterT void eMOVZX32Rm8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVZX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void eMOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); write16( 0xB60F ); @@ -795,14 +609,7 @@ emitterT void eMOVZX32R16toR( x86IntRegType to, x86IntRegType from ) ModRM( 3, to, from ); } -emitterT void eMOVZX32Rm16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - ModRM( 0, to, from ); -} - -emitterT void eMOVZX32Rm16toROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void eMOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); write16( 0xB70F ); @@ -1014,130 +821,6 @@ emitterT void eCMOVLE32MtoR( x86IntRegType to, uptr from ) // arithmetic instructions / //////////////////////////////////// -/* add imm32 to r64 */ -emitterT void eADD64ItoR( x86IntRegType to, u32 from ) -{ - Rex(1, 0, 0, to >> 3); - if ( to == EAX) { - write8( 0x05 ); - } - else { - write8( 0x81 ); - ModRM( 3, 0, to ); - } - write32( from ); -} - -/* add m64 to r64 */ -emitterT void eADD64MtoR( x86IntRegType to, uptr from ) -{ - Rex(1, to >> 3, 0, 0); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* add r64 to r64 */ -emitterT void eADD64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x01 ); - ModRM( 3, from, to ); -} - -/* add imm32 to EAX */ -emitterT void eADD32ItoEAX( s32 imm ) -{ - write8( 0x05 ); - write32( imm ); -} - -/* add imm32 to r32 */ -emitterT void eADD32ItoR( x86IntRegType to, s32 imm ) -{ - RexB(0, to); - if (imm <= 127 && imm >= -128) - { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8( (s8)imm ); - } - else - { - if ( to == EAX ) { - eADD32ItoEAX(imm); - } - else { - write8( 0x81 ); - ModRM( 3, 0, to ); - write32( imm ); - } - } -} - -/* add imm32 to m32 */ -emitterT void eADD32ItoM( uptr to, s32 imm ) -{ - if(imm <= 127 && imm >= -128) - { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write8( imm ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( imm ); - } -} - -// add imm32 to [r32+off] -emitterT void eADD32ItoRmOffset( x86IntRegType to, s32 imm, s32 offset) -{ - RexB(0,to); - if(imm <= 127 && imm >= -128) - { - write8( 0x83 ); - WriteRmOffset(to,offset); - write8(imm); - } - else - { - write8( 0x81 ); - WriteRmOffset(to,offset); - write32(imm); - } -} - -/* add r32 to r32 */ -emitterT void eADD32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write8( 0x01 ); - ModRM( 3, from, to ); -} - -/* add r32 to m32 */ -emitterT void eADD32RtoM(uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x01 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* add m32 to r32 */ -emitterT void eADD32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - // add r16 to r16 emitterT void eADD16RtoR( x86IntRegType to , x86IntRegType from ) { @@ -1221,55 +904,6 @@ emitterT void eADD8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -/* adc imm32 to r32 */ -emitterT void eADC32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x15 ); - } - else { - write8( 0x81 ); - ModRM( 3, 2, to ); - } - write32( from ); -} - -/* adc imm32 to m32 */ -emitterT void eADC32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* adc r32 to r32 */ -emitterT void eADC32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x11 ); - ModRM( 3, from, to ); -} - -/* adc m32 to r32 */ -emitterT void eADC32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x13 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// adc r32 to m32 -emitterT void eADC32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x11 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - /* inc r32 */ emitterT void eINC32R( x86IntRegType to ) { @@ -1301,85 +935,6 @@ emitterT void eINC16M( u32 to ) } -/* sub imm32 to r64 */ -emitterT void eSUB64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write32( from ); -} - -/* sub r64 to r64 */ -emitterT void eSUB64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x29 ); - ModRM( 3, from, to ); -} - -/* sub m64 to r64 */ -emitterT void eSUB64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* sub imm32 to r32 */ -emitterT void eSUB32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write32( from ); -} - -/* sub imm32 to m32 */ -emitterT void eSUB32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* sub r32 to r32 */ -emitterT void eSUB32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write8( 0x29 ); - ModRM( 3, from, to ); -} - -/* sub m32 to r32 */ -emitterT void eSUB32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// sub r32 to m32 -emitterT void eSUB32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x29 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} // sub r16 to r16 emitterT void eSUB16RtoR( x86IntRegType to, u16 from ) @@ -1422,60 +977,6 @@ emitterT void eSUB16MtoR( x86IntRegType to, uptr from ) { write32( MEMADDR(from, 4) ); } -/* sbb r64 to r64 */ -emitterT void eSBB64RtoR( x86IntRegType to, x86IntRegType from ) { - RexRB(1, from,to); - write8( 0x19 ); - ModRM( 3, from, to ); -} - -/* sbb imm32 to r32 */ -emitterT void eSBB32ItoR( x86IntRegType to, u32 from ) { - RexB(0,to); - if ( to == EAX ) { - write8( 0x1D ); - } - else { - write8( 0x81 ); - ModRM( 3, 3, to ); - } - write32( from ); -} - -/* sbb imm32 to m32 */ -emitterT void eSBB32ItoM( uptr to, u32 from ) { - write8( 0x81 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* sbb r32 to r32 */ -emitterT void eSBB32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x19 ); - ModRM( 3, from, to ); -} - -/* sbb m32 to r32 */ -emitterT void eSBB32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x1B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* sbb r32 to m32 */ -emitterT void eSBB32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x19 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - /* dec r32 */ emitterT void eDEC32R( x86IntRegType to ) { @@ -1582,51 +1083,6 @@ emitterT void eIDIV32M( u32 from ) // shifting instructions / //////////////////////////////////// -/* shl imm8 to r64 */ -emitterT void eSHL64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1, to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 4, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); -} - -/* shl cl to r64 */ -emitterT void eSHL64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 4, to ); -} - -/* shr imm8 to r64 */ -emitterT void eSHR64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1,to); - if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 5, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); -} - -/* shr cl to r64 */ -emitterT void eSHR64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 5, to ); -} - /* shl imm8 to r32 */ emitterT void eSHL32ItoR( x86IntRegType to, u8 from ) { @@ -1775,29 +1231,6 @@ emitterT void eSHR8ItoR( x86IntRegType to, u8 from ) } } -/* sar imm8 to r64 */ -emitterT void eSAR64ItoR( x86IntRegType to, u8 from ) -{ - RexB(1,to); - if ( from == 1 ) - { - write8( 0xD1 ); - ModRM( 3, 7, to ); - return; - } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); -} - -/* sar cl to r64 */ -emitterT void eSAR64CLtoR( x86IntRegType to ) -{ - RexB(1, to); - write8( 0xD3 ); - ModRM( 3, 7, to ); -} - /* sar imm8 to r32 */ emitterT void eSAR32ItoR( x86IntRegType to, u8 from ) { @@ -1846,7 +1279,7 @@ emitterT void eSAR16ItoR( x86IntRegType to, u8 from ) write8( from ); } -emitterT void eROR32ItoR( x86IntRegType to,u8 from ) +/*emitterT void eROR32ItoR( x86IntRegType to,u8 from ) { RexB(0,to); if ( from == 1 ) { @@ -1859,7 +1292,7 @@ emitterT void eROR32ItoR( x86IntRegType to,u8 from ) write8( 0xc8 | to ); write8( from ); } -} +}*/ emitterT void eRCR32ItoR( x86IntRegType to, u8 from ) { @@ -1917,95 +1350,6 @@ emitterT void eSHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) // logical instructions / //////////////////////////////////// -/* or imm32 to r32 */ -emitterT void eOR64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write32( from ); -} - -/* or m64 to r64 */ -emitterT void eOR64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* or r64 to r64 */ -emitterT void eOR64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -// or r32 to m64 -emitterT void eOR64RtoM(uptr to, x86IntRegType from ) -{ - RexR(1,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* or imm32 to r32 */ -emitterT void eOR32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write32( from ); -} - -/* or imm32 to m32 */ -emitterT void eOR32ItoM(uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* or r32 to r32 */ -emitterT void eOR32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -/* or r32 to m32 */ -emitterT void eOR32RtoM(uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* or m32 to r32 */ -emitterT void eOR32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - // or r16 to r16 emitterT void eOR16RtoR( x86IntRegType to, x86IntRegType from ) { @@ -2095,271 +1439,6 @@ emitterT void eOR8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -/* xor imm32 to r64 */ -emitterT void eXOR64ItoR( x86IntRegType to, u32 from ) -{ - RexB(1,to); - if ( to == EAX ) { - write8( 0x35 ); - } else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write32( from ); -} - -/* xor r64 to r64 */ -emitterT void eXOR64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor m64 to r64 */ -emitterT void eXOR64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x33 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* xor r64 to m64 */ -emitterT void eXOR64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* xor imm32 to r32 */ -emitterT void eXOR32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x35 ); - } - else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write32( from ); -} - -/* xor imm32 to m32 */ -emitterT void eXOR32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* xor r32 to r32 */ -emitterT void eXOR32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor r16 to r16 */ -emitterT void eXOR16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x31 ); - ModRM( 3, from, to ); -} - -/* xor r32 to m32 */ -emitterT void eXOR32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* xor m32 to r32 */ -emitterT void eXOR32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x33 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// xor imm16 to r16 -emitterT void eXOR16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - if ( to == EAX ) { - write8( 0x35 ); - } - else { - write8( 0x81 ); - ModRM( 3, 6, to ); - } - write16( from ); -} - -// xor r16 to m16 -emitterT void eXOR16RtoM( uptr to, x86IntRegType from ) -{ - write8(0x66); - RexR(0,from); - write8( 0x31 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and imm32 to r64 */ -emitterT void eAND64I32toR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x25 ); - } else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - } - write32( from ); -} - -/* and m64 to r64 */ -emitterT void eAND64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* and r64 to m64 */ -emitterT void eAND64RtoM( uptr to, x86IntRegType from ) -{ - RexR(1, from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and r64 to r64 */ -emitterT void eAND64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1, from, to); - write8( 0x21 ); - ModRM( 3, from, to ); -} - -/* and imm32 to m64 */ -emitterT void eAND64I32toM( uptr to, u32 from ) -{ - Rex(1,0,0,0); - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* and imm32 to r32 */ -emitterT void eAND32ItoR( x86IntRegType to, u32 from ) -{ - RexB(0,to); - if(from < 0x80) { - eAND32I8toR(to, (u8)from); - } - else { - if ( to == EAX ) { - write8( 0x25 ); - } - else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - } - write32( from ); - } -} - -/* and sign ext imm8 to r32 */ -emitterT void eAND32I8toR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8( from ); -} - -/* and imm32 to m32 */ -emitterT void eAND32ItoM( uptr to, u32 from ) -{ - if(from < 0x80) { - eAND32I8toM(to, (u8)from); - } - else { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); - } -} - - -/* and sign ext imm8 to m32 */ -emitterT void eAND32I8toM( uptr to, u8 from ) -{ - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -/* and r32 to r32 */ -emitterT void eAND32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x21 ); - ModRM( 3, from, to ); -} - -/* and r32 to m32 */ -emitterT void eAND32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m32 to r32 */ -emitterT void eAND32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// Warning: Untested form of AND. -emitterT void eAND32RmtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write8( 0x23 ); - ModRM( 0, to, from ); -} - -// Warning: Untested form of AND. -emitterT void eAND32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write8( 0x23 ); - WriteRmOffsetFrom(to,from,offset); -} - // and r16 to r16 emitterT void eAND16RtoR( x86IntRegType to, x86IntRegType from ) { @@ -2480,14 +1559,6 @@ emitterT void eAND8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4)); } -/* not r64 */ -emitterT void eNOT64R( x86IntRegType from ) -{ - RexB(1, from); - write8( 0xF7 ); - ModRM( 3, 2, from ); -} - /* not r32 */ emitterT void eNOT32R( x86IntRegType from ) { @@ -2504,14 +1575,6 @@ emitterT void eNOT32M( u32 from ) write32( MEMADDR(from, 4)); } -/* neg r64 */ -emitterT void eNEG64R( x86IntRegType from ) -{ - RexB(1, from); - write8( 0xF7 ); - ModRM( 3, 3, from ); -} - /* neg r32 */ emitterT void eNEG32R( x86IntRegType from ) { @@ -2875,14 +1938,6 @@ emitterT void eCALL32R( x86IntRegType to ) ModRM( 3, 2, to ); } -/* call r64 */ -emitterT void eCALL64R( x86IntRegType to ) -{ - RexB(0, to); - write8( 0xFF ); - ModRM( 3, 2, to ); -} - /* call m32 */ emitterT void eCALL32M( u32 to ) { @@ -2895,98 +1950,40 @@ emitterT void eCALL32M( u32 to ) // misc instructions / //////////////////////////////////// -/* cmp imm32 to r64 */ -emitterT void eCMP64I32toR( x86IntRegType to, u32 from ) -{ - RexB(1, to); - if ( to == EAX ) { - write8( 0x3D ); - } - else { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write32( from ); -} - -/* cmp m64 to r64 */ -emitterT void eCMP64MtoR( x86IntRegType to, uptr from ) -{ - RexR(1, to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp r64 to r64 -emitterT void eCMP64RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(1,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp imm32 to r32 */ -emitterT void eCMP32ItoR( x86IntRegType to, u32 from ) +// cmp imm8 to [r32] (byte ptr) +emitterT void eCMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) { RexB(0,to); - if ( to == EAX ) { - write8( 0x3D ); - } - else { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write32( from ); -} - -/* cmp imm32 to m32 */ -emitterT void eCMP32ItoM( uptr to, u32 from ) -{ - write8( 0x81 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); -} - -/* cmp r32 to r32 */ -emitterT void eCMP32RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp m32 to r32 */ -emitterT void eCMP32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp imm8 to [r32] -emitterT void eCMP32I8toRm( x86IntRegType to, u8 from) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 0, 7, to ); - write8(from); -} - -// cmp imm32 to [r32+off] -emitterT void eCMP32I8toRmOffset8( x86IntRegType to, u8 from, u8 off) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( 1, 7, to ); - write8(off); + write8( 0x80 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); write8(from); } // cmp imm8 to [r32] -emitterT void eCMP32I8toM( uptr to, u8 from) +emitterT void eCMP32I8toRm( x86IntRegType to, u8 from, s8 off=0 ) +{ + RexB(0,to); + write8( 0x83 ); + ModRM( (off!=0), 7, to ); + if( off != 0 ) write8(off); + write8(from); +} + +// cmp imm32 to [r32] +emitterT void eCMP32ItoRm( x86IntRegType to, u32 from, s8 off=0 ) +{ + // fixme : This should use the imm8 form if 'from' is between 127 and -128. + + RexB(0,to); + write8( 0x81 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); + write32(from); +} + +// cmp imm8 to [mem] (dword ptr) +emitterT void eCMP32I8toM( uptr to, u8 from ) { write8( 0x83 ); ModRM( 0, 7, DISP32 ); diff --git a/pcsx2/x86/ix86/ix86_group1.inl b/pcsx2/x86/ix86/ix86_group1.inl new file mode 100644 index 0000000000..ccc98726d8 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_group1.inl @@ -0,0 +1,258 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +//------------------------------------------------------------------ +// x86 Group 1 Instructions +//------------------------------------------------------------------ +// Group 1 instructions all adhere to the same encoding scheme, and so they all +// share the same emitter which has been coded here. +// +// Group 1 Table: [column value is the Reg field of the ModRM byte] +// +// 0 1 2 3 4 5 6 7 +// ADD OR ADC SBB AND SUB XOR CMP +// + +namespace x86Emitter { + +static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) +static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) + +// ------------------------------------------------------------------------ +// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the +// instruction ca be encoded as ModRm alone. +emitterT bool NeedsSibMagic( const ModSib& info ) +{ + // no registers? no sibs! + if( info.Base.IsEmpty() && info.Index.IsEmpty() ) return false; + + // A scaled register needs a SIB + if( info.Scale != 0 && !info.Index.IsEmpty() ) return true; + + // two registers needs a SIB + if( !info.Base.IsEmpty() && !info.Index.IsEmpty() ) return true; + + // If register is ESP, then we need a SIB: + if( info.Base == esp || info.Index == esp ) return true; + + return false; +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( int regfield, const ModSib& info ) +{ + int displacement_size = (info.Displacement == 0) ? 0 : + ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); + + if( !NeedsSibMagic( info ) ) + { + // Use ModRm-only encoding, with the rm field holding an index/base register, if + // one has been specified. If neither register is specified then use Disp32 form, + // which is encoded as "EBP w/o displacement" (which is why EBP must always be + // encoded *with* a displacement of 0, if it would otherwise not have one). + + x86Register basereg = info.GetEitherReg(); + + if( basereg.IsEmpty() ) + ModRM( 0, regfield, ModRm_UseDisp32 ); + else + { + if( basereg == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, basereg.Id ); + } + } + else + { + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Index.Id, info.Scale, info.Base.Id ); + } + + switch( displacement_size ) + { + case 0: break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; + jNO_DEFAULT + } +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) +{ + EmitSibMagic( regfield.Id, info ); +} + +enum Group1InstructionType +{ + G1Type_ADD=0, + G1Type_OR, + G1Type_ADC, + G1Type_SBB, + G1Type_AND, + G1Type_SUB, + G1Type_XOR, + G1Type_CMP +}; + + +emitterT void Group1_32( Group1InstructionType inst, x86Register to, x86Register from ) +{ + write8( 0x01 | (inst<<3) ); + ModRM( 3, from.Id, to.Id ); +} + +emitterT void Group1_32( Group1InstructionType inst, x86Register to, u32 imm ) +{ + if( is_s8( imm ) ) + { + write8( 0x83 ); + ModRM( 3, inst, to.Id ); + write8( (s8)imm ); + } + else + { + if( to == eax ) + write8( 0x05 | (inst<<3) ); + else + { + write8( 0x81 ); + ModRM( 3, inst, to.Id ); + } + write32( imm ); + } +} + +emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, u32 imm ) +{ + write8( is_s8( imm ) ? 0x83 : 0x81 ); + + EmitSibMagic( inst, sibdest ); + + if( is_s8( imm ) ) + write8( (s8)imm ); + else + write32( imm ); +} + +emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) +{ + write8( 0x01 | (inst<<3) ); + EmitSibMagic( from, sibdest ); +} + +/* add m32 to r32 */ +emitterT void Group1_32( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) +{ + write8( 0x03 | (inst<<3) ); + EmitSibMagic( to, sibsrc ); +} + +emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) +{ + if( to == eax ) + { + write8( 0x04 | (inst<<3) ); + write8( imm ); + } + else + { + write8( 0x80 ); + ModRM( 3, inst, to.Id ); + write8( imm ); + } +} + +////////////////////////////////////////////////////////////////////////////////////////// +// +#define DEFINE_GROUP1_OPCODE( lwr, cod ) \ + emitterT void lwr##32( x86Register to, x86Register from ) { Group1_32( G1Type_##cod, to, from ); } \ + emitterT void lwr##32( x86Register to, u32 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ + emitterT void lwr##32( x86Register to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( void* to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( void* to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } + +DEFINE_GROUP1_OPCODE( add, ADD ); +DEFINE_GROUP1_OPCODE( cmp, CMP ); +DEFINE_GROUP1_OPCODE( or, OR ); +DEFINE_GROUP1_OPCODE( adc, ADC ); +DEFINE_GROUP1_OPCODE( sbb, SBB ); +DEFINE_GROUP1_OPCODE( and, AND ); +DEFINE_GROUP1_OPCODE( sub, SUB ); +DEFINE_GROUP1_OPCODE( xor, XOR ); + +} // end namespace x86Emitter + + +static __forceinline x86Emitter::x86Register _reghlp( x86IntRegType src ) +{ + return x86Emitter::x86Register( src ); +} + + +static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) +{ + return x86Emitter::x86ModRm( _reghlp(src) ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// +#define DEFINE_GROUP1_OPCODE_LEGACY( lwr, cod ) \ + emitterT void e##cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ + emitterT void e##cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ + emitterT void e##cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ + emitterT void e##cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ + emitterT void e##cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ + emitterT void e##cod##32ItoRm( x86IntRegType to, u32 imm, int offset=0 ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ + emitterT void e##cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ + emitterT void e##cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } + +DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); +DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); +DEFINE_GROUP1_OPCODE_LEGACY( or, OR ); +DEFINE_GROUP1_OPCODE_LEGACY( adc, ADC ); +DEFINE_GROUP1_OPCODE_LEGACY( sbb, SBB ); +DEFINE_GROUP1_OPCODE_LEGACY( and, AND ); +DEFINE_GROUP1_OPCODE_LEGACY( sub, SUB ); +DEFINE_GROUP1_OPCODE_LEGACY( xor, XOR ); + +emitterT void eAND32I8toR( x86IntRegType to, s8 from ) +{ + x86Emitter::and32( _reghlp(to), from ); +} + +emitterT void eAND32I8toM( uptr to, s8 from ) +{ + x86Emitter::and32( (void*)to, from ); +} diff --git a/pcsx2/x86/ix86/ix86_macros.h b/pcsx2/x86/ix86/ix86_macros.h index 2ad9cdfe47..6179fc7e1d 100644 --- a/pcsx2/x86/ix86/ix86_macros.h +++ b/pcsx2/x86/ix86/ix86_macros.h @@ -51,66 +51,51 @@ //------------------------------------------------------------------ // mov instructions //------------------------------------------------------------------ -#define MOV64RtoR eMOV64RtoR<_EmitterId_> -#define MOV64RtoM eMOV64RtoM<_EmitterId_> -#define MOV64MtoR eMOV64MtoR<_EmitterId_> -#define MOV64I32toM eMOV64I32toM<_EmitterId_> -#define MOV64I32toR eMOV64I32toR<_EmitterId_> -#define MOV64ItoR eMOV64ItoR<_EmitterId_> -#define MOV64ItoRmOffset eMOV64ItoRmOffset<_EmitterId_> -#define MOV64RmOffsettoR eMOV64RmOffsettoR<_EmitterId_> -#define MOV64RmStoR eMOV64RmStoR<_EmitterId_> -#define MOV64RtoRmOffset eMOV64RtoRmOffset<_EmitterId_> -#define MOV64RtoRmS eMOV64RtoRmS<_EmitterId_> #define MOV32RtoR eMOV32RtoR<_EmitterId_> #define MOV32RtoM eMOV32RtoM<_EmitterId_> #define MOV32MtoR eMOV32MtoR<_EmitterId_> #define MOV32RmtoR eMOV32RmtoR<_EmitterId_> -#define MOV32RmtoROffset eMOV32RmtoROffset<_EmitterId_> +#define MOV32RmtoR eMOV32RmtoR<_EmitterId_> #define MOV32RmStoR eMOV32RmStoR<_EmitterId_> #define MOV32RmSOffsettoR eMOV32RmSOffsettoR<_EmitterId_> #define MOV32RtoRm eMOV32RtoRm<_EmitterId_> #define MOV32RtoRmS eMOV32RtoRmS<_EmitterId_> #define MOV32ItoR eMOV32ItoR<_EmitterId_> #define MOV32ItoM eMOV32ItoM<_EmitterId_> -#define MOV32ItoRmOffset eMOV32ItoRmOffset<_EmitterId_> -#define MOV32RtoRmOffset eMOV32RtoRmOffset<_EmitterId_> +#define MOV32ItoRm eMOV32ItoRm<_EmitterId_> +#define MOV32RtoRm eMOV32RtoRm<_EmitterId_> #define MOV16RtoM eMOV16RtoM<_EmitterId_> #define MOV16MtoR eMOV16MtoR<_EmitterId_> #define MOV16RmtoR eMOV16RmtoR<_EmitterId_> -#define MOV16RmtoROffset eMOV16RmtoROffset<_EmitterId_> +#define MOV16RmtoR eMOV16RmtoR<_EmitterId_> #define MOV16RmSOffsettoR eMOV16RmSOffsettoR<_EmitterId_> #define MOV16RtoRm eMOV16RtoRm<_EmitterId_> #define MOV16ItoM eMOV16ItoM<_EmitterId_> #define MOV16RtoRmS eMOV16RtoRmS<_EmitterId_> #define MOV16ItoR eMOV16ItoR<_EmitterId_> -#define MOV16ItoRmOffset eMOV16ItoRmOffset<_EmitterId_> -#define MOV16RtoRmOffset eMOV16RtoRmOffset<_EmitterId_> +#define MOV16ItoRm eMOV16ItoRm<_EmitterId_> +#define MOV16RtoRm eMOV16RtoRm<_EmitterId_> #define MOV8RtoM eMOV8RtoM<_EmitterId_> #define MOV8MtoR eMOV8MtoR<_EmitterId_> #define MOV8RmtoR eMOV8RmtoR<_EmitterId_> -#define MOV8RmtoROffset eMOV8RmtoROffset<_EmitterId_> +#define MOV8RmtoR eMOV8RmtoR<_EmitterId_> #define MOV8RmSOffsettoR eMOV8RmSOffsettoR<_EmitterId_> #define MOV8RtoRm eMOV8RtoRm<_EmitterId_> #define MOV8ItoM eMOV8ItoM<_EmitterId_> #define MOV8ItoR eMOV8ItoR<_EmitterId_> -#define MOV8ItoRmOffset eMOV8ItoRmOffset<_EmitterId_> -#define MOV8RtoRmOffset eMOV8RtoRmOffset<_EmitterId_> +#define MOV8ItoRm eMOV8ItoRm<_EmitterId_> +#define MOV8RtoRm eMOV8RtoRm<_EmitterId_> #define MOVSX32R8toR eMOVSX32R8toR<_EmitterId_> #define MOVSX32Rm8toR eMOVSX32Rm8toR<_EmitterId_> -#define MOVSX32Rm8toROffset eMOVSX32Rm8toROffset<_EmitterId_> #define MOVSX32M8toR eMOVSX32M8toR<_EmitterId_> #define MOVSX32R16toR eMOVSX32R16toR<_EmitterId_> #define MOVSX32Rm16toR eMOVSX32Rm16toR<_EmitterId_> -#define MOVSX32Rm16toROffset eMOVSX32Rm16toROffset<_EmitterId_> #define MOVSX32M16toR eMOVSX32M16toR<_EmitterId_> #define MOVZX32R8toR eMOVZX32R8toR<_EmitterId_> #define MOVZX32Rm8toR eMOVZX32Rm8toR<_EmitterId_> -#define MOVZX32Rm8toROffset eMOVZX32Rm8toROffset<_EmitterId_> #define MOVZX32M8toR eMOVZX32M8toR<_EmitterId_> #define MOVZX32R16toR eMOVZX32R16toR<_EmitterId_> #define MOVZX32Rm16toR eMOVZX32Rm16toR<_EmitterId_> -#define MOVZX32Rm16toROffset eMOVZX32Rm16toROffset<_EmitterId_> #define MOVZX32M16toR eMOVZX32M16toR<_EmitterId_> #define CMOVBE32RtoR eCMOVBE32RtoR<_EmitterId_> #define CMOVBE32MtoR eCMOVBE32MtoR<_EmitterId_> @@ -147,12 +132,10 @@ //------------------------------------------------------------------ // arithmetic instructions //------------------------------------------------------------------ -#define ADD64ItoR eADD64ItoR<_EmitterId_> -#define ADD64MtoR eADD64MtoR<_EmitterId_> #define ADD32ItoEAX eADD32ItoEAX<_EmitterId_> #define ADD32ItoR eADD32ItoR<_EmitterId_> #define ADD32ItoM eADD32ItoM<_EmitterId_> -#define ADD32ItoRmOffset eADD32ItoRmOffset<_EmitterId_> +#define ADD32ItoRm eADD32ItoRm<_EmitterId_> #define ADD32RtoR eADD32RtoR<_EmitterId_> #define ADD32RtoM eADD32RtoM<_EmitterId_> #define ADD32MtoR eADD32MtoR<_EmitterId_> @@ -171,7 +154,6 @@ #define INC32M eINC32M<_EmitterId_> #define INC16R eINC16R<_EmitterId_> #define INC16M eINC16M<_EmitterId_> -#define SUB64MtoR eSUB64MtoR<_EmitterId_> #define SUB32ItoR eSUB32ItoR<_EmitterId_> #define SUB32ItoM eSUB32ItoM<_EmitterId_> #define SUB32RtoR eSUB32RtoR<_EmitterId_> @@ -181,7 +163,6 @@ #define SUB16ItoR eSUB16ItoR<_EmitterId_> #define SUB16ItoM eSUB16ItoM<_EmitterId_> #define SUB16MtoR eSUB16MtoR<_EmitterId_> -#define SBB64RtoR eSBB64RtoR<_EmitterId_> #define SBB32ItoR eSBB32ItoR<_EmitterId_> #define SBB32ItoM eSBB32ItoM<_EmitterId_> #define SBB32RtoR eSBB32RtoR<_EmitterId_> @@ -203,12 +184,6 @@ //------------------------------------------------------------------ // shifting instructions //------------------------------------------------------------------ -#define SHL64ItoR eSHL64ItoR<_EmitterId_> -#define SHL64CLtoR eSHL64CLtoR<_EmitterId_> -#define SHR64ItoR eSHR64ItoR<_EmitterId_> -#define SHR64CLtoR eSHR64CLtoR<_EmitterId_> -#define SAR64ItoR eSAR64ItoR<_EmitterId_> -#define SAR64CLtoR eSAR64CLtoR<_EmitterId_> #define SHL32ItoR eSHL32ItoR<_EmitterId_> #define SHL32ItoM eSHL32ItoM<_EmitterId_> #define SHL32CLtoR eSHL32CLtoR<_EmitterId_> @@ -231,10 +206,6 @@ //------------------------------------------------------------------ // logical instructions //------------------------------------------------------------------ -#define OR64ItoR eOR64ItoR<_EmitterId_> -#define OR64MtoR eOR64MtoR<_EmitterId_> -#define OR64RtoR eOR64RtoR<_EmitterId_> -#define OR64RtoM eOR64RtoM<_EmitterId_> #define OR32ItoR eOR32ItoR<_EmitterId_> #define OR32ItoM eOR32ItoM<_EmitterId_> #define OR32RtoR eOR32RtoR<_EmitterId_> @@ -249,11 +220,6 @@ #define OR8RtoM eOR8RtoM<_EmitterId_> #define OR8ItoM eOR8ItoM<_EmitterId_> #define OR8MtoR eOR8MtoR<_EmitterId_> -#define XOR64ItoR eXOR64ItoR<_EmitterId_> -#define XOR64RtoR eXOR64RtoR<_EmitterId_> -#define XOR64MtoR eXOR64MtoR<_EmitterId_> -#define XOR64RtoR eXOR64RtoR<_EmitterId_> -#define XOR64RtoM eXOR64RtoM<_EmitterId_> #define XOR32ItoR eXOR32ItoR<_EmitterId_> #define XOR32ItoM eXOR32ItoM<_EmitterId_> #define XOR32RtoR eXOR32RtoR<_EmitterId_> @@ -262,11 +228,6 @@ #define XOR32MtoR eXOR32MtoR<_EmitterId_> #define XOR16RtoM eXOR16RtoM<_EmitterId_> #define XOR16ItoR eXOR16ItoR<_EmitterId_> -#define AND64I32toR eAND64I32toR<_EmitterId_> -#define AND64MtoR eAND64MtoR<_EmitterId_> -#define AND64RtoM eAND64RtoM<_EmitterId_> -#define AND64RtoR eAND64RtoR<_EmitterId_> -#define AND64I32toM eAND64I32toM<_EmitterId_> #define AND32ItoR eAND32ItoR<_EmitterId_> #define AND32I8toR eAND32I8toR<_EmitterId_> #define AND32ItoM eAND32ItoM<_EmitterId_> @@ -275,7 +236,7 @@ #define AND32RtoM eAND32RtoM<_EmitterId_> #define AND32MtoR eAND32MtoR<_EmitterId_> #define AND32RmtoR eAND32RmtoR<_EmitterId_> -#define AND32RmtoROffset eAND32RmtoROffset<_EmitterId_> +#define AND32RmtoR eAND32RmtoR<_EmitterId_> #define AND16RtoR eAND16RtoR<_EmitterId_> #define AND16ItoR eAND16ItoR<_EmitterId_> #define AND16ItoM eAND16ItoM<_EmitterId_> @@ -286,10 +247,8 @@ #define AND8RtoM eAND8RtoM<_EmitterId_> #define AND8MtoR eAND8MtoR<_EmitterId_> #define AND8RtoR eAND8RtoR<_EmitterId_> -#define NOT64R eNOT64R<_EmitterId_> #define NOT32R eNOT32R<_EmitterId_> #define NOT32M eNOT32M<_EmitterId_> -#define NEG64R eNEG64R<_EmitterId_> #define NEG32R eNEG32R<_EmitterId_> #define NEG32M eNEG32M<_EmitterId_> #define NEG16R eNEG16R<_EmitterId_> @@ -349,15 +308,13 @@ //------------------------------------------------------------------ // misc instructions //------------------------------------------------------------------ -#define CMP64I32toR eCMP64I32toR<_EmitterId_> -#define CMP64MtoR eCMP64MtoR<_EmitterId_> -#define CMP64RtoR eCMP64RtoR<_EmitterId_> #define CMP32ItoR eCMP32ItoR<_EmitterId_> #define CMP32ItoM eCMP32ItoM<_EmitterId_> #define CMP32RtoR eCMP32RtoR<_EmitterId_> #define CMP32MtoR eCMP32MtoR<_EmitterId_> +#define CMP32ItoRm eCMP32ItoRm<_EmitterId_> +#define CMP8I8toRm eCMP8I8toRm<_EmitterId_> #define CMP32I8toRm eCMP32I8toRm<_EmitterId_> -#define CMP32I8toRmOffset8 eCMP32I8toRmOffset8<_EmitterId_> #define CMP32I8toM eCMP32I8toM<_EmitterId_> #define CMP16ItoR eCMP16ItoR<_EmitterId_> #define CMP16ItoM eCMP16ItoM<_EmitterId_> @@ -539,16 +496,16 @@ #define PUNPCKHDQMtoR ePUNPCKHDQMtoR<_EmitterId_> #define MOVQ64ItoR eMOVQ64ItoR<_EmitterId_> #define MOVQRtoR eMOVQRtoR<_EmitterId_> -#define MOVQRmtoROffset eMOVQRmtoROffset<_EmitterId_> -#define MOVQRtoRmOffset eMOVQRtoRmOffset<_EmitterId_> +#define MOVQRmtoR eMOVQRmtoR<_EmitterId_> +#define MOVQRtoRm eMOVQRtoRm<_EmitterId_> #define MOVDMtoMMX eMOVDMtoMMX<_EmitterId_> #define MOVDMMXtoM eMOVDMMXtoM<_EmitterId_> #define MOVD32RtoMMX eMOVD32RtoMMX<_EmitterId_> #define MOVD32RmtoMMX eMOVD32RmtoMMX<_EmitterId_> -#define MOVD32RmOffsettoMMX eMOVD32RmOffsettoMMX<_EmitterId_> +#define MOVD32RmtoMMX eMOVD32RmtoMMX<_EmitterId_> #define MOVD32MMXtoR eMOVD32MMXtoR<_EmitterId_> #define MOVD32MMXtoRm eMOVD32MMXtoRm<_EmitterId_> -#define MOVD32MMXtoRmOffset eMOVD32MMXtoRmOffset<_EmitterId_> +#define MOVD32MMXtoRm eMOVD32MMXtoRm<_EmitterId_> #define PINSRWRtoMMX ePINSRWRtoMMX<_EmitterId_> #define PSHUFWRtoR ePSHUFWRtoR<_EmitterId_> #define PSHUFWMtoR ePSHUFWMtoR<_EmitterId_> @@ -575,33 +532,31 @@ #define SSE_MOVSS_XMM_to_M32 eSSE_MOVSS_XMM_to_M32<_EmitterId_> #define SSE_MOVSS_XMM_to_Rm eSSE_MOVSS_XMM_to_Rm<_EmitterId_> #define SSE_MOVSS_XMM_to_XMM eSSE_MOVSS_XMM_to_XMM<_EmitterId_> -#define SSE_MOVSS_RmOffset_to_XMM eSSE_MOVSS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVSS_XMM_to_RmOffset eSSE_MOVSS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVSS_Rm_to_XMM eSSE_MOVSS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVSS_XMM_to_Rm eSSE_MOVSS_XMM_to_Rm<_EmitterId_> #define SSE_MASKMOVDQU_XMM_to_XMM eSSE_MASKMOVDQU_XMM_to_XMM<_EmitterId_> #define SSE_MOVLPS_M64_to_XMM eSSE_MOVLPS_M64_to_XMM<_EmitterId_> #define SSE_MOVLPS_XMM_to_M64 eSSE_MOVLPS_XMM_to_M64<_EmitterId_> -#define SSE_MOVLPS_RmOffset_to_XMM eSSE_MOVLPS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVLPS_XMM_to_RmOffset eSSE_MOVLPS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVLPS_Rm_to_XMM eSSE_MOVLPS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVLPS_XMM_to_Rm eSSE_MOVLPS_XMM_to_Rm<_EmitterId_> #define SSE_MOVHPS_M64_to_XMM eSSE_MOVHPS_M64_to_XMM<_EmitterId_> #define SSE_MOVHPS_XMM_to_M64 eSSE_MOVHPS_XMM_to_M64<_EmitterId_> -#define SSE_MOVHPS_RmOffset_to_XMM eSSE_MOVHPS_RmOffset_to_XMM<_EmitterId_> -#define SSE_MOVHPS_XMM_to_RmOffset eSSE_MOVHPS_XMM_to_RmOffset<_EmitterId_> +#define SSE_MOVHPS_Rm_to_XMM eSSE_MOVHPS_Rm_to_XMM<_EmitterId_> +#define SSE_MOVHPS_XMM_to_Rm eSSE_MOVHPS_XMM_to_Rm<_EmitterId_> #define SSE_MOVLHPS_XMM_to_XMM eSSE_MOVLHPS_XMM_to_XMM<_EmitterId_> #define SSE_MOVHLPS_XMM_to_XMM eSSE_MOVHLPS_XMM_to_XMM<_EmitterId_> #define SSE_MOVLPSRmtoR eSSE_MOVLPSRmtoR<_EmitterId_> -#define SSE_MOVLPSRmtoROffset eSSE_MOVLPSRmtoROffset<_EmitterId_> #define SSE_MOVLPSRtoRm eSSE_MOVLPSRtoRm<_EmitterId_> -#define SSE_MOVLPSRtoRmOffset eSSE_MOVLPSRtoRmOffset<_EmitterId_> #define SSE_MOVAPSRmStoR eSSE_MOVAPSRmStoR<_EmitterId_> #define SSE_MOVAPSRtoRmS eSSE_MOVAPSRtoRmS<_EmitterId_> -#define SSE_MOVAPSRtoRmOffset eSSE_MOVAPSRtoRmOffset<_EmitterId_> -#define SSE_MOVAPSRmtoROffset eSSE_MOVAPSRmtoROffset<_EmitterId_> +#define SSE_MOVAPSRtoRm eSSE_MOVAPSRtoRm<_EmitterId_> +#define SSE_MOVAPSRmtoR eSSE_MOVAPSRmtoR<_EmitterId_> #define SSE_MOVUPSRmStoR eSSE_MOVUPSRmStoR<_EmitterId_> #define SSE_MOVUPSRtoRmS eSSE_MOVUPSRtoRmS<_EmitterId_> #define SSE_MOVUPSRtoRm eSSE_MOVUPSRtoRm<_EmitterId_> #define SSE_MOVUPSRmtoR eSSE_MOVUPSRmtoR<_EmitterId_> -#define SSE_MOVUPSRmtoROffset eSSE_MOVUPSRmtoROffset<_EmitterId_> -#define SSE_MOVUPSRtoRmOffset eSSE_MOVUPSRtoRmOffset<_EmitterId_> +#define SSE_MOVUPSRmtoR eSSE_MOVUPSRmtoR<_EmitterId_> +#define SSE_MOVUPSRtoRm eSSE_MOVUPSRtoRm<_EmitterId_> #define SSE_RCPPS_XMM_to_XMM eSSE_RCPPS_XMM_to_XMM<_EmitterId_> #define SSE_RCPPS_M128_to_XMM eSSE_RCPPS_M128_to_XMM<_EmitterId_> #define SSE_RCPSS_XMM_to_XMM eSSE_RCPSS_XMM_to_XMM<_EmitterId_> @@ -676,7 +631,7 @@ #define SSE_UNPCKHPS_XMM_to_XMM eSSE_UNPCKHPS_XMM_to_XMM<_EmitterId_> #define SSE_SHUFPS_XMM_to_XMM eSSE_SHUFPS_XMM_to_XMM<_EmitterId_> #define SSE_SHUFPS_M128_to_XMM eSSE_SHUFPS_M128_to_XMM<_EmitterId_> -#define SSE_SHUFPS_RmOffset_to_XMM eSSE_SHUFPS_RmOffset_to_XMM<_EmitterId_> +#define SSE_SHUFPS_Rm_to_XMM eSSE_SHUFPS_Rm_to_XMM<_EmitterId_> #define SSE_CMPEQPS_M128_to_XMM eSSE_CMPEQPS_M128_to_XMM<_EmitterId_> #define SSE_CMPEQPS_XMM_to_XMM eSSE_CMPEQPS_XMM_to_XMM<_EmitterId_> #define SSE_CMPLTPS_M128_to_XMM eSSE_CMPLTPS_M128_to_XMM<_EmitterId_> @@ -780,8 +735,8 @@ #define SSE2_MOVQ_XMM_to_M64 eSSE2_MOVQ_XMM_to_M64<_EmitterId_> #define SSE2_MOVDQ2Q_XMM_to_MM eSSE2_MOVDQ2Q_XMM_to_MM<_EmitterId_> #define SSE2_MOVQ2DQ_MM_to_XMM eSSE2_MOVQ2DQ_MM_to_XMM<_EmitterId_> -#define SSE2_MOVDQARtoRmOffset eSSE2_MOVDQARtoRmOffset<_EmitterId_> -#define SSE2_MOVDQARmtoROffset eSSE2_MOVDQARmtoROffset<_EmitterId_> +#define SSE2_MOVDQARtoRm eSSE2_MOVDQARtoRm<_EmitterId_> +#define SSE2_MOVDQARmtoR eSSE2_MOVDQARmtoR<_EmitterId_> #define SSE2_CVTDQ2PS_M128_to_XMM eSSE2_CVTDQ2PS_M128_to_XMM<_EmitterId_> #define SSE2_CVTDQ2PS_XMM_to_XMM eSSE2_CVTDQ2PS_XMM_to_XMM<_EmitterId_> #define SSE2_CVTPS2DQ_M128_to_XMM eSSE2_CVTPS2DQ_M128_to_XMM<_EmitterId_> @@ -920,11 +875,11 @@ #define SSE2_MOVD_M32_to_XMM eSSE2_MOVD_M32_to_XMM<_EmitterId_> #define SSE2_MOVD_R_to_XMM eSSE2_MOVD_R_to_XMM<_EmitterId_> #define SSE2_MOVD_Rm_to_XMM eSSE2_MOVD_Rm_to_XMM<_EmitterId_> -#define SSE2_MOVD_RmOffset_to_XMM eSSE2_MOVD_RmOffset_to_XMM<_EmitterId_> +#define SSE2_MOVD_Rm_to_XMM eSSE2_MOVD_Rm_to_XMM<_EmitterId_> #define SSE2_MOVD_XMM_to_M32 eSSE2_MOVD_XMM_to_M32<_EmitterId_> #define SSE2_MOVD_XMM_to_R eSSE2_MOVD_XMM_to_R<_EmitterId_> #define SSE2_MOVD_XMM_to_Rm eSSE2_MOVD_XMM_to_Rm<_EmitterId_> -#define SSE2_MOVD_XMM_to_RmOffset eSSE2_MOVD_XMM_to_RmOffset<_EmitterId_> +#define SSE2_MOVD_XMM_to_Rm eSSE2_MOVD_XMM_to_Rm<_EmitterId_> #define SSE2_MOVQ_XMM_to_R eSSE2_MOVQ_XMM_to_R<_EmitterId_> #define SSE2_MOVQ_R_to_XMM eSSE2_MOVQ_R_to_XMM<_EmitterId_> //------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86_mmx.inl b/pcsx2/x86/ix86/ix86_mmx.inl index 18126cd6e1..bc7d183319 100644 --- a/pcsx2/x86/ix86/ix86_mmx.inl +++ b/pcsx2/x86/ix86/ix86_mmx.inl @@ -482,11 +482,11 @@ emitterT void eMOVQRtoR( x86MMXRegType to, x86MMXRegType from ) ModRM( 3, to, from ); } -emitterT void eMOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset ) +emitterT void eMOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ) { write16( 0x6F0F ); - if( offset < 128 ) { + if( offset < 128 && offset >= -128) { ModRM( 1, to, from ); write8(offset); } @@ -496,11 +496,11 @@ emitterT void eMOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset } } -emitterT void eMOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) +emitterT void eMOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ) { write16( 0x7F0F ); - if( offset < 128 ) { + if( offset < 128 && offset >= -128) { ModRM( 1, from , to ); write8(offset); } diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.inl index 971a33af17..a52ba6ccd7 100644 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ b/pcsx2/x86/ix86/ix86_sse.inl @@ -18,18 +18,22 @@ #pragma once -//------------------------------------------------------------------ -// SSE instructions -//------------------------------------------------------------------ - +////////////////////////////////////////////////////////////////////////////////////////// +// AlwaysUseMovaps [const] +// // This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions // do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache // and some marginal speed gains as a result. (it's possible someday in the future the per- // formance of the two instructions could change, so this constant is provided to restore MOVDQA // use easily at a later time, if needed). - +// static const bool AlwaysUseMovaps = true; + +//------------------------------------------------------------------ +// SSE instructions +//------------------------------------------------------------------ + #define SSEMtoR( code, overb ) \ assert( to < XMMREGS ), \ RexR(0, to), \ @@ -140,7 +144,7 @@ static const bool AlwaysUseMovaps = true; write8( op ) /* movups [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(0, to, from2, from); write16( 0x100f ); @@ -149,7 +153,7 @@ emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntReg } /* movups xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +emitterT void eSSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(1, to, from2, from); write16( 0x110f ); @@ -181,7 +185,7 @@ emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) ModRM( 0, to, from ); } -emitterT void eSSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x120f ); @@ -196,7 +200,7 @@ emitterT void eSSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) ModRM( 0, from, to ); } -emitterT void eSSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, from, to); write16( 0x130f ); @@ -204,7 +208,7 @@ emitterT void eSSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int } /* movaps [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { assert( from != EBP ); RexRXB(0, to, from2, from); @@ -214,7 +218,7 @@ emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntReg } /* movaps xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) +emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { assert( from != EBP ); RexRXB(0, to, from2, from); @@ -224,7 +228,7 @@ emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntReg } // movaps [r32+offset] to r32 -emitterT void eSSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x280f ); @@ -232,7 +236,7 @@ emitterT void eSSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int } // movaps r32 to [r32+offset] -emitterT void eSSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); write16( 0x290f ); @@ -240,10 +244,10 @@ emitterT void eSSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int } // movdqa [r32+offset] to r32 -emitterT void eSSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { if( AlwaysUseMovaps ) - eSSE_MOVAPSRmtoROffset( to, from, offset ); + eSSE_MOVAPSRmtoR( to, from, offset ); else { write8(0x66); @@ -254,10 +258,10 @@ emitterT void eSSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int } // movdqa r32 to [r32+offset] -emitterT void eSSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { if( AlwaysUseMovaps ) - eSSE_MOVAPSRtoRmOffset( to, from, offset ); + eSSE_MOVAPSRtoRm( to, from, offset ); else { write8(0x66); @@ -268,7 +272,7 @@ emitterT void eSSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int } // movups [r32+offset] to r32 -emitterT void eSSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x100f ); @@ -276,7 +280,7 @@ emitterT void eSSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int } // movups r32 to [r32+offset] -emitterT void eSSE_MOVUPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); write16( 0x110f ); @@ -328,17 +332,10 @@ emitterT void eSSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) //********************************************************************************** emitterT void eSSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } emitterT void eSSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } -emitterT void eSSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) -{ - write8(0xf3); - RexRB(0, from, to); - write16(0x110f); - ModRM(0, from, to); -} emitterT void eSSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } -emitterT void eSSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { write8(0xf3); RexRB(0, to, from); @@ -346,7 +343,7 @@ emitterT void eSSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { write8(0xf3); RexRB(0, from, to); @@ -361,14 +358,14 @@ emitterT void eSSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) emitterT void eSSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } emitterT void eSSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } -emitterT void eSSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x120f ); WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); write16(0x130f); @@ -382,14 +379,14 @@ emitterT void eSSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, emitterT void eSSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } emitterT void eSSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } -emitterT void eSSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); write16( 0x160f ); WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); write16(0x170f); @@ -756,7 +753,7 @@ emitterT void eSSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSER emitterT void eSSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } emitterT void eSSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } -emitterT void eSSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) +emitterT void eSSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) { RexRB(0, to, from); write16(0xc60f); @@ -903,7 +900,7 @@ emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) ModRM( 0, to, from); } -emitterT void eSSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { write8(0x66); RexRB(0, to, from); @@ -914,15 +911,7 @@ emitterT void eSSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, emitterT void eSSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } emitterT void eSSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } -emitterT void eSSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) -{ - write8(0x66); - RexRB(0, from, to); - write16( 0x7e0f ); - ModRM( 0, from, to ); -} - -emitterT void eSSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) +emitterT void eSSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { write8(0x66); RexRB(0, from, to); diff --git a/pcsx2/x86/ix86/ix86_sse_helpers.h b/pcsx2/x86/ix86/ix86_sse_helpers.h index 9caa04c6a2..ccd7ef5373 100644 --- a/pcsx2/x86/ix86/ix86_sse_helpers.h +++ b/pcsx2/x86/ix86/ix86_sse_helpers.h @@ -47,16 +47,16 @@ static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegTyp else SSE_MOVAPS_XMM_to_XMM(to, from); } -static __forceinline void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) +static __forceinline void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoROffset(to, from, offset); - else SSE_MOVAPSRmtoROffset(to, from, offset); + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoR(to, from, offset); + else SSE_MOVAPSRmtoR(to, from, offset); } -static __forceinline void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) +static __forceinline void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRmOffset(to, from, offset); - else SSE_MOVAPSRtoRmOffset(to, from, offset); + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRm(to, from, offset); + else SSE_MOVAPSRtoRm(to, from, offset); } static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) @@ -83,22 +83,16 @@ static __forceinline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) else SSE_MOVSS_XMM_to_M32(to, from); } -static __forceinline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ) +static __forceinline void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from); - else SSE_MOVSS_XMM_to_Rm(to, from); + if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_Rm_to_XMM(to, from, offset); + else SSE_MOVSS_Rm_to_XMM(to, from, offset); } -static __forceinline void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +static __forceinline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { - if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_RmOffset_to_XMM(to, from, offset); - else SSE_MOVSS_RmOffset_to_XMM(to, from, offset); -} - -static __forceinline void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_RmOffset(to, from, offset); - else SSE_MOVSS_XMM_to_RmOffset(to, from, offset); + if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from, offset); + else SSE_MOVSS_XMM_to_Rm(to, from, offset); } static __forceinline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index e4fb71d84d..70c20eb803 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -29,6 +29,7 @@ // general types typedef int x86IntRegType; + #define EAX 0 #define EBX 3 #define ECX 1 @@ -149,3 +150,211 @@ struct CPUINFO{ extern CPUINFO cpuinfo; //------------------------------------------------------------------ + +static __forceinline bool is_s8( u32 imm ) { return (s8)imm == (s32)imm; } + +namespace x86Emitter +{ + class x86ModRm; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + struct x86Register + { + static const x86Register Empty; // defined as an empty/unused value (-1) + + int Id; + + x86Register( const x86Register& src ) : Id( src.Id ) {} + x86Register() : Id( -1 ) {} + explicit x86Register( int regId ) : Id( regId ) { } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register& src ) const { return Id == src.Id; } + bool operator!=( const x86Register& src ) const { return Id != src.Id; } + + x86ModRm operator+( const x86Register& right ) const; + x86ModRm operator+( const x86ModRm& right ) const; + + x86Register& operator=( const x86Register& src ) + { + Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Similar to x86Register, but without the ability to add/combine them with ModSib. + // + class x86Register16 + { + public: + static const x86Register16 Empty; + + int Id; + + x86Register16( const x86Register16& src ) : Id( src.Id ) {} + x86Register16() : Id( -1 ) {} + explicit x86Register16( int regId ) : Id( regId ) { } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register16& src ) const { return Id == src.Id; } + bool operator!=( const x86Register16& src ) const { return Id != src.Id; } + + x86Register16& operator=( const x86Register16& src ) + { + Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Similar to x86Register, but without the ability to add/combine them with ModSib. + // + class x86Register8 + { + public: + static const x86Register8 Empty; + + int Id; + + x86Register8( const x86Register16& src ) : Id( src.Id ) {} + x86Register8() : Id( -1 ) {} + explicit x86Register8( int regId ) : Id( regId ) { } + + bool IsEmpty() const { return Id == -1; } + + bool operator==( const x86Register8& src ) const { return Id == src.Id; } + bool operator!=( const x86Register8& src ) const { return Id != src.Id; } + + x86Register8& operator=( const x86Register8& src ) + { + Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + class x86ModRm + { + public: + x86Register Base; // base register (no scale) + x86Register Index; // index reg gets multiplied by the scale + int Factor; // scale applied to the index register, in factor form (not a shift!) + s32 Displacement; // address displacement + + public: + x86ModRm( x86Register base, x86Register index, int factor=1, s32 displacement=0 ) : + Base( base ), + Index( index ), + Factor( factor ), + Displacement( displacement ) + { + } + + explicit x86ModRm( x86Register base, int displacement=0 ) : + Base( base ), + Index(), + Factor(0), + Displacement( displacement ) + { + } + + explicit x86ModRm( s32 displacement ) : + Base(), + Index(), + Factor(0), + Displacement( displacement ) + { + } + + static x86ModRm FromIndexReg( x86Register index, int scale=0, s32 displacement=0 ); + + public: + bool IsByteSizeDisp() const { return is_s8( Displacement ); } + x86Register GetEitherReg() const; + + x86ModRm& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + x86ModRm& Add( const x86Register& src ); + x86ModRm& Add( const x86ModRm& src ); + + x86ModRm operator+( const x86Register& right ) const { return x86ModRm( *this ).Add( right ); } + x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); } + x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); } + x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // ModSib - Internal low-level representation of the ModRM/SIB information. + // + // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means that + // the Base, Index, Scale, and Displacement values are all valid, and it serves as a type- + // safe layer between the x86Register's operators (which generate x86ModRm types) and the + // emitter's ModSib instruction forms. Without this, the x86Register would pass as a + // ModSib type implicitly, and that would cause ambiguity on a number of instructions. + // + class ModSib + { + public: + x86Register Base; // base register (no scale) + x86Register Index; // index reg gets multiplied by the scale + int Scale; // scale applied to the index register, in scale/shift form + s32 Displacement; // offset applied to the Base/Index registers. + + ModSib( const x86ModRm& src ); + ModSib( x86Register base, x86Register index, int scale=0, s32 displacement=0 ); + ModSib( s32 disp ); + + x86Register GetEitherReg() const; + bool IsByteSizeDisp() const { return is_s8( Displacement ); } + + protected: + void Reduce(); + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // x86IndexerType - This is a static class which provisions our ptr[] syntax. + // + struct x86IndexerType + { + ModSib operator[]( x86Register src ) const + { + return ModSib( src, x86Register::Empty ); + } + + ModSib operator[]( const x86ModRm& src ) const + { + return ModSib( src ); + } + + ModSib operator[]( uptr src ) const + { + return ModSib( src ); + } + + ModSib operator[]( void* src ) const + { + return ModSib( (uptr)src ); + } + }; + + // ------------------------------------------------------------------------ + extern const x86Register eax; + extern const x86Register ebx; + extern const x86Register ecx; + extern const x86Register edx; + extern const x86Register esi; + extern const x86Register edi; + extern const x86Register ebp; + extern const x86Register esp; + + extern const x86IndexerType ptr; +} \ No newline at end of file From 5f35577543b6da993b37ace29b42822485050678 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Tue, 7 Apr 2009 12:25:56 +0000 Subject: [PATCH 009/143] Linux compiles again. Added back in potentially obsolete code, since it's still called. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@918 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.h | 1 + pcsx2/x86/ix86/Makefile.am | 2 +- pcsx2/x86/ix86/ix86.cpp | 2 +- pcsx2/x86/ix86/ix86.inl | 35 +++++++++++++++++++++++++++++++++++ pcsx2/x86/ix86/ix86_types.h | 2 +- 5 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 4ad1cb233e..90c3a6f105 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -24,6 +24,7 @@ struct vifCycle { u8 pad[2]; }; +// r0-r3 and c0-c3 would be more managable as arrays. struct VIFregisters { u32 stat; u32 pad0[3]; diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index 7f76f134be..880d6f18e3 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -1,4 +1,4 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty noinst_LIBRARIES = libix86.a -libix86_a_SOURCES = ix86.cpp ix86.inl ix86_3dnow.inl ix86.h ix86_fpu.inl ix86_mmx.inl ix86_sse.inl ix86_tools.cpp ix86_cpudetect.cpp ix86_macros.h \ No newline at end of file +libix86_a_SOURCES = ix86.cpp ix86.inl ix86_3dnow.inl ix86.h ix86_fpu.inl ix86_mmx.inl ix86_sse.inl ix86_tools.cpp ix86_cpudetect.cpp ix86_macros.h ix86_group1.inl \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 35d90c8079..aec35d65bc 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -42,7 +42,7 @@ XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; namespace x86Emitter { - const x86IndexerType ptr; + x86IndexerType ptr; ////////////////////////////////////////////////////////////////////////////////////////// // diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index 301401ae4b..b94bab0e8d 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -38,6 +38,41 @@ // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. + +// I added this back in because it's called once from eMOV8ItoRm and eMOV16ItoRm. +emitterT void WriteRmOffset(x86IntRegType to, s32 offset) +{ + if ((to&7) == ESP) { + if( offset == 0 ) { + ModRM( 0, 0, 4 ); + SibSB( 0, ESP, 4 ); + } + else if( offset <= 127 && offset >= -128 ) { + ModRM( 1, 0, 4 ); + SibSB( 0, ESP, 4 ); + write8(offset); + } + else { + ModRM( 2, 0, 4 ); + SibSB( 0, ESP, 4 ); + write32(offset); + } + } + else { + if( offset == 0 ) { + ModRM( 0, 0, to ); + } + else if( offset <= 127 && offset >= -128 ) { + ModRM( 1, 0, to ); + write8(offset); + } + else { + ModRM( 2, 0, to ); + write32(offset); + } + } +} + emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) { if ((from&7) == ESP) { diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 70c20eb803..be0e0d3ec1 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -356,5 +356,5 @@ namespace x86Emitter extern const x86Register ebp; extern const x86Register esp; - extern const x86IndexerType ptr; + extern x86IndexerType ptr; } \ No newline at end of file From bf3d124e23bf9d81ded39fd108cc34be3279a14b Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 7 Apr 2009 16:54:02 +0000 Subject: [PATCH 010/143] Switched the emitter over to using Thread-Local storage (TLS), which removes all the templates and brings us back to a more traditional-looking, macro-free, and intellisense-friendly implementation. Plus it's a lot less prone to errors and will make debugging easier down the road. (next commit will rename the files back to .cpp and get them out of the header includes) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@919 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/PrecompiledHeader.h | 21 - pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 - pcsx2/x86/iMMI.cpp | 6 +- pcsx2/x86/iR3000A.cpp | 24 +- pcsx2/x86/iR3000Atables.cpp | 34 +- pcsx2/x86/iVUzerorec.cpp | 26 +- pcsx2/x86/ix86-32/iCore-32.cpp | 4 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 22 +- pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 8 +- pcsx2/x86/ix86/ix86.cpp | 7 +- pcsx2/x86/ix86/ix86.h | 73 +- pcsx2/x86/ix86/ix86.inl | 1924 ++++++++++---------- pcsx2/x86/ix86/ix86_3dnow.inl | 210 +-- pcsx2/x86/ix86/ix86_fpu.inl | 246 +-- pcsx2/x86/ix86/ix86_group1.inl | 111 +- pcsx2/x86/ix86/ix86_mmx.inl | 608 +++---- pcsx2/x86/ix86/ix86_sse.inl | 1284 ++++++------- 17 files changed, 2281 insertions(+), 2331 deletions(-) diff --git a/pcsx2/PrecompiledHeader.h b/pcsx2/PrecompiledHeader.h index 84c3e977d2..36195bb08d 100644 --- a/pcsx2/PrecompiledHeader.h +++ b/pcsx2/PrecompiledHeader.h @@ -155,24 +155,3 @@ static __forceinline u32 timeGetTime() # define __releaseinline __forceinline #endif -////////////////////////////////////////////////////////////////////////////////////////// -// Emitter Instance Identifiers. If you add a new emitter, do it here also. -// Note: Currently most of the instances map back to 0, since existing dynarec code all -// shares iCore and must therefore all share the same emitter instance. -// (note: these don't really belong here per-se, but it's an easy spot to use for now) -enum -{ - EmitterId_R5900 = 0, - EmitterId_R3000a = EmitterId_R5900, - EmitterId_VU0micro = EmitterId_R5900, - EmitterId_VU1micro = EmitterId_R5900, - - // Cotton's new microVU, which is iCore-free - EmitterId_microVU0, - EmitterId_microVU1, - - // Air's eventual IopRec, which will also be iCore-free - EmitterId_R3000air, - - EmitterId_Count // must always be last! -}; diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 2618f59630..a77fc861b6 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2986,10 +2986,6 @@ RelativePath="..\..\x86\ix86\ix86_group1.inl" > - - diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index a9edfba6c2..8dabe0b5c8 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -1956,14 +1956,14 @@ CPU_SSE_XMMCACHE_END // Both Macros are 16 bytes so we can use a shift instead of a Mul instruction #define QFSRVhelper0() { \ ajmp[0] = JMP32(0); \ - x86Ptr[0] += 11; \ + x86Ptr += 11; \ } #define QFSRVhelper(shift1, shift2) { \ SSE2_PSRLDQ_I8_to_XMM(EEREC_D, shift1); \ SSE2_PSLLDQ_I8_to_XMM(t0reg, shift2); \ ajmp[shift1] = JMP32(0); \ - x86Ptr[0] += 1; \ + x86Ptr += 1; \ } void recQFSRV() @@ -1983,7 +1983,7 @@ void recQFSRV() MOV32MtoR(EAX, (uptr)&cpuRegs.sa); SHL32ItoR(EAX, 4); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros) AND32ItoR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) - ADD32ItoR(EAX, (uptr)x86Ptr[0] + 7); // ADD32 = 5 bytes, JMPR = 2 bytes + ADD32ItoR(EAX, (uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases) // Case 0: diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 23d0d45bf0..0309d6eced 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -171,7 +171,7 @@ static void iIopDumpBlock( int startpc, u8 * ptr ) #ifdef __LINUX__ // dump the asm f = fopen( "mydump1", "wb" ); - fwrite( ptr, 1, (uptr)x86Ptr[0] - (uptr)ptr, f ); + fwrite( ptr, 1, (uptr)x86Ptr - (uptr)ptr, f ); fclose( f ); sprintf( command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 | cat %s - > tempdump", filename ); system( command ); @@ -772,7 +772,7 @@ void psxSetBranchReg(u32 reg) _psxFlushCall(FLUSH_EVERYTHING); iPsxBranchTest(0xffffffff, 1); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); } void psxSetBranchImm( u32 imm ) @@ -828,7 +828,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) if( newpc != 0xffffffff ) { CMP32ItoM((uptr)&psxRegs.pc, newpc); - JNE32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 6 )); + JNE32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 6 )); } // Skip branch jump target here: @@ -864,7 +864,7 @@ void rpsxSYSCALL() ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); // jump target for skipping blockCycle updates x86SetJ8(j8Ptr[0]); @@ -884,7 +884,7 @@ void rpsxBREAK() j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&psxRegs.cycle, psxScaleBlockCycles() ); SUB32ItoM((uptr)&psxCycleEE, psxScaleBlockCycles()*8 ); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //if (!psxbranch) psxbranch = 2; @@ -1004,7 +1004,7 @@ void iopRecRecompile(u32 startpc) x86SetPtr( recPtr ); x86Align(16); - recPtr = x86Ptr[_EmitterId_]; + recPtr = x86Ptr; s_pCurBlock = PSX_GETBLOCK(startpc); @@ -1025,7 +1025,7 @@ void iopRecRecompile(u32 startpc) psxbranch = 0; - s_pCurBlock->SetFnptr( (uptr)x86Ptr[0] ); + s_pCurBlock->SetFnptr( (uptr)x86Ptr ); s_psxBlockCycles = 0; // reset recomp state variables @@ -1160,7 +1160,7 @@ StartRecomp: iPsxBranchTest(0xffffffff, 1); - JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)iopDispatcherReg - ( (uptr)x86Ptr + 5 )); } else { if( psxbranch ) assert( !willbranch3 ); @@ -1180,12 +1180,12 @@ StartRecomp: } } - assert( x86Ptr[0] < recMem+RECMEM_SIZE ); + assert( x86Ptr < recMem+RECMEM_SIZE ); - assert(x86Ptr[_EmitterId_] - recPtr < 0x10000); - s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr; + assert(x86Ptr - recPtr < 0x10000); + s_pCurBlockEx->x86size = x86Ptr - recPtr; - recPtr = x86Ptr[0]; + recPtr = x86Ptr; assert( (g_psxHasConstReg&g_psxFlushedConstReg) == g_psxHasConstReg ); diff --git a/pcsx2/x86/iR3000Atables.cpp b/pcsx2/x86/iR3000Atables.cpp index 1b1f4486e6..fddd6f9690 100644 --- a/pcsx2/x86/iR3000Atables.cpp +++ b/pcsx2/x86/iR3000Atables.cpp @@ -1258,7 +1258,7 @@ void rpsxJALR() static void* s_pbranchjmp; static u32 s_do32 = 0; -#define JUMPVALID(pjmp) (( x86Ptr[0] - (u8*)pjmp ) <= 0x80) +#define JUMPVALID(pjmp) (( x86Ptr - (u8*)pjmp ) <= 0x80) void rpsxSetBranchEQ(int info, int process) { @@ -1305,7 +1305,7 @@ void rpsxBEQ_process(int info, int process) else { _psxFlushAllUnused(); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; s_do32 = 0; psxSaveBranchState(); @@ -1318,7 +1318,7 @@ void rpsxBEQ_process(int info, int process) x86SetJ8A( (u8*)s_pbranchjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); s_do32 = 1; psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); @@ -1369,7 +1369,7 @@ void rpsxBNE_process(int info, int process) } _psxFlushAllUnused(); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; s_do32 = 0; rpsxSetBranchEQ(info, process); @@ -1381,7 +1381,7 @@ void rpsxBNE_process(int info, int process) x86SetJ8A( (u8*)s_pbranchjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); s_do32 = 1; psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); @@ -1423,7 +1423,7 @@ void rpsxBLTZ() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JL8(0); psxSaveBranchState(); @@ -1435,7 +1435,7 @@ void rpsxBLTZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1470,7 +1470,7 @@ void rpsxBGEZ() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JGE8(0); psxSaveBranchState(); @@ -1482,7 +1482,7 @@ void rpsxBGEZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1524,7 +1524,7 @@ void rpsxBLTZAL() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JL8(0); psxSaveBranchState(); @@ -1538,7 +1538,7 @@ void rpsxBLTZAL() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1577,7 +1577,7 @@ void rpsxBGEZAL() } CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JGE8(0); MOV32ItoM((uptr)&psxRegs.GPR.r[31], psxpc+4); @@ -1591,7 +1591,7 @@ void rpsxBGEZAL() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1631,7 +1631,7 @@ void rpsxBLEZ() _clearNeededX86regs(); CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JLE8(0); psxSaveBranchState(); @@ -1642,7 +1642,7 @@ void rpsxBLEZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); @@ -1679,7 +1679,7 @@ void rpsxBGTZ() _clearNeededX86regs(); CMP32ItoM((uptr)&psxRegs.GPR.r[_Rs_], 0); - u8* prevx86 = x86Ptr[0]; + u8* prevx86 = x86Ptr; u8* pjmp = JG8(0); psxSaveBranchState(); @@ -1690,7 +1690,7 @@ void rpsxBGTZ() x86SetJ8A( pjmp ); } else { - x86Ptr[0] = prevx86; + x86SetPtr( prevx86 ); psxpc -= 4; psxRegs.code = iopMemRead32( psxpc - 4 ); psxLoadBranchState(); diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 4dea960b87..80fbdc21ec 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -833,7 +833,7 @@ static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex) SuperVURecompile(); - s_recVUPtr = x86Ptr[0]; + s_recVUPtr = x86Ptr; // set the function's range VuFunctionHeader::RANGE r; @@ -1889,7 +1889,7 @@ void VuBaseBlock::AssignVFRegs() if( i == XMMREGS ) return; // nothing changed } - u8* oldX86 = x86Ptr[0]; + u8* oldX86 = x86Ptr; FORIT(itinst, insts) { @@ -2078,7 +2078,7 @@ void VuBaseBlock::AssignVFRegs() } } - assert( x86Ptr[0] == oldX86 ); + assert( x86Ptr == oldX86 ); u32 analyzechildren = !(type&BLOCKTYPE_ANALYZED); type |= BLOCKTYPE_ANALYZED; @@ -2466,7 +2466,7 @@ static void SuperVURecompile() AND32ItoM( (uptr)&VU->vifRegs->stat, ~0x4 ); MOV32ItoM((uptr)&VU->VI[REG_TPC], pchild->endpc); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); } // only other case is when there are two branches else assert( (*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH ); @@ -2606,11 +2606,11 @@ void SuperVUTestVU0Condition(u32 incstack) ADD32ItoR(ESP, incstack); //CALLFunc((u32)timeout); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); x86SetJ8(ptr); } - else JAE32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 6 ) ); + else JAE32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 6 ) ); } void VuBaseBlock::Recompile() @@ -2618,7 +2618,7 @@ void VuBaseBlock::Recompile() if( type & BLOCKTYPE_ANALYZED ) return; x86Align(16); - pcode = x86Ptr[0]; + pcode = x86Ptr; #ifdef _DEBUG MOV32ItoM((uptr)&s_vufnheader, s_pFnHeader->startpc); @@ -2726,7 +2726,7 @@ void VuBaseBlock::Recompile() AND32ItoM( (uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu?~0x100:~0x001 ); // E flag AND32ItoM( (uptr)&VU->vifRegs->stat, ~0x4 ); if( !branch ) MOV32ItoM((uptr)&VU->VI[REG_TPC], endpc); - JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr[0] + 5 )); + JMP32( (uptr)SuperVUEndProgram - ( (uptr)x86Ptr + 5 )); } else { @@ -2868,7 +2868,7 @@ void VuBaseBlock::Recompile() } } - pendcode = x86Ptr[0]; + pendcode = x86Ptr; type |= BLOCKTYPE_ANALYZED; LISTBLOCKS::iterator itchild; @@ -3569,7 +3569,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr-1; if( !(s_pCurInst->type & INST_BRANCH_DELAY) ) { j8Ptr[1] = JMP8(0); @@ -3578,7 +3578,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc+8); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr-1; x86SetJ8( j8Ptr[ 1 ] ); } @@ -3815,7 +3815,7 @@ void recVUMI_B( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } @@ -3841,7 +3841,7 @@ void recVUMI_BAL( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); MOV32ItoR(s_JumpX86, 0); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr[0]-1; + s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index d9a9e75664..05655f66d9 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -161,7 +161,7 @@ void _flushConstRegs() zero_cnt++; } - rewindPtr = x86Ptr[_EmitterId_]; + rewindPtr = x86Ptr; for (i = 1, j = 0; i < 32; j++ && ++i, j %= 2) { if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1< %s", filename ); @@ -579,8 +579,8 @@ void recResetEE( void ) // so a fix will have to wait until later. -_- (air) //x86SetPtr(recMem+REC_CACHEMEM); - //dyna_block_discard_recmem=(u8*)x86Ptr[0]; - //JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr[0] + 5 )); + //dyna_block_discard_recmem=(u8*)x86Ptr; + //JMP32( (uptr)&dyna_block_discard - ( (u32)x86Ptr + 5 )); x86SetPtr(recMem); @@ -791,7 +791,7 @@ void recSYSCALL( void ) { CMP32ItoM((uptr)&cpuRegs.pc, pc); j8Ptr[0] = JE8(0); ADD32ItoM((uptr)&cpuRegs.cycle, eeScaleBlockCycles()); - JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 5 )); + JMP32((uptr)DispatcherReg - ( (uptr)x86Ptr + 5 )); x86SetJ8(j8Ptr[0]); //branch = 2; } @@ -1148,7 +1148,7 @@ static void iBranchTest(u32 newpc, bool noDispatch) if (!noDispatch) { if (newpc == 0xffffffff) - JS32((uptr)DispatcherReg - ( (uptr)x86Ptr[0] + 6 )); + JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 )); else iBranch(newpc, 1); } @@ -1375,7 +1375,7 @@ void recRecompile( const u32 startpc ) x86SetPtr( recPtr ); x86Align(16); - recPtr = x86Ptr[_EmitterId_]; + recPtr = x86Ptr; s_pCurBlock = PC_GETBLOCK(startpc); @@ -1714,7 +1714,7 @@ StartRecomp: { // was dyna_block_discard_recmem. See note in recResetEE for details. CMP32ItoM((uptr)PSM(lpc),*(u32*)PSM(lpc)); - JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr[0] + 6 )); + JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 )); stg-=4; lpc+=4; @@ -1800,14 +1800,14 @@ StartRecomp: } } - assert( x86Ptr[0] < recMem+REC_CACHEMEM ); + assert( x86Ptr < recMem+REC_CACHEMEM ); assert( recStackPtr < recStack+RECSTACK_SIZE ); assert( x86FpuState == 0 ); - assert(x86Ptr[_EmitterId_] - recPtr < 0x10000); - s_pCurBlockEx->x86size = x86Ptr[_EmitterId_] - recPtr; + assert(x86Ptr - recPtr < 0x10000); + s_pCurBlockEx->x86size = x86Ptr - recPtr; - recPtr = x86Ptr[0]; + recPtr = x86Ptr; assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg ); diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 1c39766598..aaf93d1d47 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -1930,7 +1930,7 @@ void recLQC2( void ) dohw = recSetMemLocation(_Rs_, _Imm_, mmregs, 2, 0); if( _Ft_ ) { - u8* rawreadptr = x86Ptr[0]; + u8* rawreadptr = x86Ptr; if( mmreg >= 0 ) { SSEX_MOVDQARmtoROffset(mmreg, ECX, PS2MEM_BASE_+s_nAddMemOffset); @@ -1945,7 +1945,7 @@ void recLQC2( void ) // check if writing to VUs CMP32ItoR(ECX, 0x11000000); - JAE8(rawreadptr - (x86Ptr[0]+2)); + JAE8(rawreadptr - (x86Ptr+2)); PUSH32I( (int)&VU0.VF[_Ft_].UD[0] ); CALLFunc( (int)recMemRead128 ); @@ -1999,7 +1999,7 @@ void recSQC2( void ) mmregs = _eePrepareReg(_Rs_); dohw = recSetMemLocation(_Rs_, _Imm_, mmregs, 2, 0); - rawreadptr = x86Ptr[0]; + rawreadptr = x86Ptr; if( (mmreg = _checkXMMreg(XMMTYPE_VFREG, _Ft_, MODE_READ)) >= 0) { SSEX_MOVDQARtoRmOffset(ECX, mmreg, PS2MEM_BASE_+s_nAddMemOffset); @@ -2039,7 +2039,7 @@ void recSQC2( void ) // check if writing to VUs CMP32ItoR(ECX, 0x11000000); - JAE8(rawreadptr - (x86Ptr[0]+2)); + JAE8(rawreadptr - (x86Ptr+2)); // some type of hardware write if( (mmreg = _checkXMMreg(XMMTYPE_VFREG, _Ft_, MODE_READ)) >= 0) { diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index aec35d65bc..3a8e1d2830 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -29,10 +29,9 @@ #include "System.h" #include "ix86.h" -u8 *x86Ptr[EmitterId_Count]; - -u8 *j8Ptr[32]; -u32 *j32Ptr[32]; +__threadlocal u8 *x86Ptr; +__threadlocal u8 *j8Ptr[32]; +__threadlocal u32 *j32Ptr[32]; PCSX2_ALIGNED16(u32 p[4]); PCSX2_ALIGNED16(u32 p2[4]); diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 34a10fbe59..556405dc09 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -32,7 +32,7 @@ //------------------------------------------------------------------ // Helper Macros //------------------------------------------------------------------ -#define emitterT template +#define emitterT static __forceinline #define MEMADDR(addr, oplen) (addr) @@ -46,57 +46,62 @@ // This helps catch programmer errors better than using an auto-truncated s8 parameter. #define assertOffset8(ofs) assert( ofs < 128 && ofs >= -128 ) +#ifdef _MSC_VER +#define __threadlocal __declspec(thread) +#else +#define __threadlocal __thread +#endif //------------------------------------------------------------------ // write functions //------------------------------------------------------------------ -extern u8 *x86Ptr[EmitterId_Count]; -extern u8 *j8Ptr[32]; -extern u32 *j32Ptr[32]; +extern __threadlocal u8 *x86Ptr; +extern __threadlocal u8 *j8Ptr[32]; +extern __threadlocal u32 *j32Ptr[32]; emitterT void write8( u8 val ) { - *x86Ptr[I] = (u8)val; - x86Ptr[I]++; + *x86Ptr = (u8)val; + x86Ptr++; } emitterT void write16( u16 val ) { - *(u16*)x86Ptr[I] = val; - x86Ptr[I] += 2; + *(u16*)x86Ptr = val; + x86Ptr += 2; } emitterT void write24( u32 val ) { - *x86Ptr[I]++ = (u8)(val & 0xff); - *x86Ptr[I]++ = (u8)((val >> 8) & 0xff); - *x86Ptr[I]++ = (u8)((val >> 16) & 0xff); + *x86Ptr++ = (u8)(val & 0xff); + *x86Ptr++ = (u8)((val >> 8) & 0xff); + *x86Ptr++ = (u8)((val >> 16) & 0xff); } emitterT void write32( u32 val ) { - *(u32*)x86Ptr[I] = val; - x86Ptr[I] += 4; + *(u32*)x86Ptr = val; + x86Ptr += 4; } emitterT void write64( u64 val ){ - *(u64*)x86Ptr[I] = val; - x86Ptr[I] += 8; + *(u64*)x86Ptr = val; + x86Ptr += 8; } //------------------------------------------------------------------ //------------------------------------------------------------------ // jump/align functions //------------------------------------------------------------------ -emitterT void ex86SetPtr( u8 *ptr ); -emitterT void ex86SetJ8( u8 *j8 ); -emitterT void ex86SetJ8A( u8 *j8 ); -emitterT void ex86SetJ16( u16 *j16 ); -emitterT void ex86SetJ16A( u16 *j16 ); -emitterT void ex86SetJ32( u32 *j32 ); -emitterT void ex86SetJ32A( u32 *j32 ); -emitterT void ex86Align( int bytes ); -emitterT void ex86AlignExecutable( int align ); +emitterT void x86SetPtr( u8 *ptr ); +emitterT void x86SetJ8( u8 *j8 ); +emitterT void x86SetJ8A( u8 *j8 ); +emitterT void x86SetJ16( u16 *j16 ); +emitterT void x86SetJ16A( u16 *j16 ); +emitterT void x86SetJ32( u32 *j32 ); +emitterT void x86SetJ32A( u32 *j32 ); +emitterT void x86Align( int bytes ); +emitterT void x86AlignExecutable( int align ); //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -113,23 +118,21 @@ emitterT u32* J32Rel( int cc, u32 to ); emitterT u64 GetCPUTick( void ); //------------------------------------------------------------------ -emitterT void eMOV32RtoR( x86IntRegType to, x86IntRegType from ); -emitterT u32* eJMP32( uptr to ); -emitterT u8* eJMP8( u8 to ); -emitterT void eCALL32( u32 to ); -emitterT void eLEA32RtoR(x86IntRegType to, x86IntRegType from, u32 offset); -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); -emitterT void eNOP( void ); -emitterT void eAND32ItoM( uptr to, u32 from ); -emitterT void eLEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); +emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ); +emitterT u32* JMP32( uptr to ); +emitterT u8* JMP8( u8 to ); +emitterT void CALL32( u32 to ); +emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); +emitterT void NOP( void ); +emitterT void AND32ItoM( uptr to, u32 from ); +emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); +emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); #define MMXONLY(code) code #define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) -#include "ix86_macros.h" #include "ix86.inl" #include "ix86_3dnow.inl" #include "ix86_fpu.inl" diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86.inl index b94bab0e8d..b29427da43 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86.inl @@ -39,141 +39,107 @@ // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. -// I added this back in because it's called once from eMOV8ItoRm and eMOV16ItoRm. -emitterT void WriteRmOffset(x86IntRegType to, s32 offset) -{ - if ((to&7) == ESP) { - if( offset == 0 ) { - ModRM( 0, 0, 4 ); - SibSB( 0, ESP, 4 ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, 4 ); - SibSB( 0, ESP, 4 ); - write8(offset); - } - else { - ModRM( 2, 0, 4 ); - SibSB( 0, ESP, 4 ); - write32(offset); - } - } - else { - if( offset == 0 ) { - ModRM( 0, 0, to ); - } - else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, 0, to ); - write8(offset); - } - else { - ModRM( 2, 0, to ); - write32(offset); - } - } -} - emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) { if ((from&7) == ESP) { if( offset == 0 ) { - ModRM( 0, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); + ModRM( 0, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); } else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - write8(offset); + ModRM( 1, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + write8(offset); } else { - ModRM( 2, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - write32(offset); + ModRM( 2, to, 0x4 ); + SibSB( 0, 0x4, 0x4 ); + write32(offset); } } else { if( offset == 0 ) { - ModRM( 0, to, from ); + ModRM( 0, to, from ); } else if( offset <= 127 && offset >= -128 ) { - ModRM( 1, to, from ); - write8(offset); + ModRM( 1, to, from ); + write8(offset); } else { - ModRM( 2, to, from ); - write32(offset); + ModRM( 2, to, from ); + write32(offset); } } } emitterT void ModRM( s32 mod, s32 reg, s32 rm ) { - write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) ); + write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) ); } emitterT void SibSB( s32 ss, s32 index, s32 base ) { - write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) ); + write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) ); } emitterT void SET8R( int cc, int to ) { RexB(0, to); - write8( 0x0F ); - write8( cc ); - write8( 0xC0 | ( to ) ); + write8( 0x0F ); + write8( cc ); + write8( 0xC0 | ( to ) ); } emitterT u8* J8Rel( int cc, int to ) { - write8( cc ); - write8( to ); - return (u8*)(x86Ptr[I] - 1); + write8( cc ); + write8( to ); + return (u8*)(x86Ptr - 1); } emitterT u16* J16Rel( int cc, u32 to ) { - write16( 0x0F66 ); - write8( cc ); - write16( to ); - return (u16*)( x86Ptr[I] - 2 ); + write16( 0x0F66 ); + write8( cc ); + write16( to ); + return (u16*)( x86Ptr - 2 ); } emitterT u32* J32Rel( int cc, u32 to ) { - write8( 0x0F ); - write8( cc ); - write32( to ); - return (u32*)( x86Ptr[I] - 4 ); + write8( 0x0F ); + write8( cc ); + write32( to ); + return (u32*)( x86Ptr - 4 ); } emitterT void CMOV32RtoR( int cc, int to, int from ) { RexRB(0, to, from); - write8( 0x0F ); - write8( cc ); - ModRM( 3, to, from ); + write8( 0x0F ); + write8( cc ); + ModRM( 3, to, from ); } emitterT void CMOV32MtoR( int cc, int to, uptr from ) { RexR(0, to); - write8( 0x0F ); - write8( cc ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x0F ); + write8( cc ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } //////////////////////////////////////////////////// -emitterT void ex86SetPtr( u8* ptr ) +emitterT void x86SetPtr( u8* ptr ) { - x86Ptr[I] = ptr; + x86Ptr = ptr; } //////////////////////////////////////////////////// -emitterT void ex86SetJ8( u8* j8 ) +emitterT void x86SetJ8( u8* j8 ) { - u32 jump = ( x86Ptr[I] - j8 ) - 1; + u32 jump = ( x86Ptr - j8 ) - 1; if ( jump > 0x7f ) { Console::Error( "j8 greater than 0x7f!!" ); @@ -182,31 +148,31 @@ emitterT void ex86SetJ8( u8* j8 ) *j8 = (u8)jump; } -emitterT void ex86SetJ8A( u8* j8 ) +emitterT void x86SetJ8A( u8* j8 ) { - u32 jump = ( x86Ptr[I] - j8 ) - 1; + u32 jump = ( x86Ptr - j8 ) - 1; if ( jump > 0x7f ) { Console::Error( "j8 greater than 0x7f!!" ); assert(0); } - if( ((uptr)x86Ptr[I]&0xf) > 4 ) { + if( ((uptr)x86Ptr&0xf) > 4 ) { - uptr newjump = jump + 16-((uptr)x86Ptr[I]&0xf); + uptr newjump = jump + 16-((uptr)x86Ptr&0xf); if( newjump <= 0x7f ) { jump = newjump; - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; } } *j8 = (u8)jump; } -emitterT void ex86SetJ16( u16 *j16 ) +emitterT void x86SetJ16( u16 *j16 ) { // doesn't work - u32 jump = ( x86Ptr[I] - (u8*)j16 ) - 2; + u32 jump = ( x86Ptr - (u8*)j16 ) - 2; if ( jump > 0x7fff ) { Console::Error( "j16 greater than 0x7fff!!" ); @@ -215,84 +181,84 @@ emitterT void ex86SetJ16( u16 *j16 ) *j16 = (u16)jump; } -emitterT void ex86SetJ16A( u16 *j16 ) +emitterT void x86SetJ16A( u16 *j16 ) { - if( ((uptr)x86Ptr[I]&0xf) > 4 ) { - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; + if( ((uptr)x86Ptr&0xf) > 4 ) { + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; } - ex86SetJ16(j16); + x86SetJ16(j16); } //////////////////////////////////////////////////// -emitterT void ex86SetJ32( u32* j32 ) +emitterT void x86SetJ32( u32* j32 ) { - *j32 = ( x86Ptr[I] - (u8*)j32 ) - 4; + *j32 = ( x86Ptr - (u8*)j32 ) - 4; } -emitterT void ex86SetJ32A( u32* j32 ) +emitterT void x86SetJ32A( u32* j32 ) { - while((uptr)x86Ptr[I]&0xf) *x86Ptr[I]++ = 0x90; - ex86SetJ32(j32); + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + x86SetJ32(j32); } //////////////////////////////////////////////////// -emitterT void ex86Align( int bytes ) +emitterT void x86Align( int bytes ) { // forward align - x86Ptr[I] = (u8*)( ( (uptr)x86Ptr[I] + bytes - 1) & ~( bytes - 1 ) ); + x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); } //////////////////////////////////////////////////// // Generates executable code to align to the given alignment (could be useful for the second leg // of if/else conditionals, which usually fall through a jump target label). -emitterT void ex86AlignExecutable( int align ) +emitterT void x86AlignExecutable( int align ) { - uptr newx86 = ( (uptr)x86Ptr[I] + align - 1) & ~( align - 1 ); - uptr bytes = ( newx86 - (uptr)x86Ptr[I] ); + uptr newx86 = ( (uptr)x86Ptr + align - 1) & ~( align - 1 ); + uptr bytes = ( newx86 - (uptr)x86Ptr ); switch( bytes ) { case 0: break; - case 1: eNOP(); break; - case 2: eMOV32RtoR( ESI, ESI ); break; - case 3: write8(0x08D); write8(0x024); write8(0x024); break; - case 5: eNOP(); // falls through to 4... - case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; - case 6: write8(0x08D); write8(0x0B6); write32(0); break; - case 8: eNOP(); // falls through to 7... - case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; + case 1: NOP(); break; + case 2: MOV32RtoR( ESI, ESI ); break; + case 3: write8(0x08D); write8(0x024); write8(0x024); break; + case 5: NOP(); // falls through to 4... + case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; + case 6: write8(0x08D); write8(0x0B6); write32(0); break; + case 8: NOP(); // falls through to 7... + case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; default: { // for larger alignments, just use a JMP... - u8* aligned_target = eJMP8(0); - x86Ptr[I] = (u8*)newx86; - ex86SetJ8( aligned_target ); + u8* aligned_target = JMP8(0); + x86Ptr = (u8*)newx86; + x86SetJ8( aligned_target ); } } - jASSUME( x86Ptr[0] == (u8*)newx86 ); + jASSUME( x86Ptr == (u8*)newx86 ); } /********************/ /* IX86 intructions */ /********************/ -emitterT void eSTC( void ) +emitterT void STC( void ) { - write8( 0xF9 ); + write8( 0xF9 ); } -emitterT void eCLC( void ) +emitterT void CLC( void ) { - write8( 0xF8 ); + write8( 0xF8 ); } // NOP 1-byte -emitterT void eNOP( void ) +emitterT void NOP( void ) { - write8(0x90); + write8(0x90); } @@ -301,555 +267,555 @@ emitterT void eNOP( void ) //////////////////////////////////// /* mov r32 to r32 */ -emitterT void eMOV32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0, from, to); - write8( 0x89 ); - ModRM( 3, from, to ); + write8( 0x89 ); + ModRM( 3, from, to ); } /* mov r32 to m32 */ -emitterT void eMOV32RtoM( uptr to, x86IntRegType from ) +emitterT void MOV32RtoM( uptr to, x86IntRegType from ) { RexR(0, from); if (from == EAX) { - write8(0xA3); + write8(0xA3); } else { - write8( 0x89 ); - ModRM( 0, from, DISP32 ); + write8( 0x89 ); + ModRM( 0, from, DISP32 ); } - write32( MEMADDR(to, 4) ); + write32( MEMADDR(to, 4) ); } /* mov m32 to r32 */ -emitterT void eMOV32MtoR( x86IntRegType to, uptr from ) +emitterT void MOV32MtoR( x86IntRegType to, uptr from ) { RexR(0, to); if (to == EAX) { - write8(0xA1); + write8(0xA1); } else { - write8( 0x8B ); - ModRM( 0, to, DISP32 ); + write8( 0x8B ); + ModRM( 0, to, DISP32 ); } - write32( MEMADDR(from, 4) ); + write32( MEMADDR(from, 4) ); } -emitterT void eMOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, offset); } /* mov [r32+r32*scale] to r32 */ -emitterT void eMOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(0,to,from2,from); - write8( 0x8B ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); } // mov r32 to [r32<( 0x8B ); - ModRM( 0, to, 0x4 ); - ModRM( scale, from1, 5); - write32(from2); + write8( 0x8B ); + ModRM( 0, to, 0x4 ); + ModRM( scale, from1, 5); + write32(from2); } /* mov r32 to [r32][r32*scale] */ -emitterT void eMOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void MOV32RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(0, to, from2, from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); + write8( 0x89 ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); } /* mov imm32 to r32 */ -emitterT void eMOV32ItoR( x86IntRegType to, u32 from ) +emitterT void MOV32ItoR( x86IntRegType to, u32 from ) { RexB(0, to); - write8( 0xB8 | (to & 0x7) ); - write32( from ); + write8( 0xB8 | (to & 0x7) ); + write32( from ); } /* mov imm32 to m32 */ -emitterT void eMOV32ItoM(uptr to, u32 from ) +emitterT void MOV32ItoM(uptr to, u32 from ) { - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); + write8( 0xC7 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 8) ); + write32( from ); } // mov imm32 to [r32+off] -emitterT void eMOV32ItoRm( x86IntRegType to, u32 from, int offset=0) +emitterT void MOV32ItoRm( x86IntRegType to, u32 from, int offset=0) { RexB(0,to); - write8( 0xC7 ); - WriteRmOffsetFrom(0, to, offset); - write32(from); + write8( 0xC7 ); + WriteRmOffsetFrom(0, to, offset); + write32(from); } // mov r32 to [r32+off] -emitterT void eMOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) +emitterT void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) { RexRB(0,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); + write8( 0x89 ); + WriteRmOffsetFrom(from, to, offset); } /* mov r16 to m16 */ -emitterT void eMOV16RtoM(uptr to, x86IntRegType from ) +emitterT void MOV16RtoM(uptr to, x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,from); - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x89 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* mov m16 to r16 */ -emitterT void eMOV16MtoR( x86IntRegType to, uptr from ) +emitterT void MOV16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,to); - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x8B ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eMOV16RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOV16RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { - write8( 0x66 ); + write8( 0x66 ); RexRB(0,to,from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); + write8( 0x8B ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eMOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) +emitterT void MOV16RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) { - write8(0x66); + write8(0x66); RexRXB(0,to,from1,0); - write8( 0x8B ); - ModRM( 0, to, SIB ); - SibSB( scale, from1, SIBDISP); - write32(from2); + write8( 0x8B ); + ModRM( 0, to, SIB ); + SibSB( scale, from1, SIBDISP); + write32(from2); } /* mov imm16 to m16 */ -emitterT void eMOV16ItoM( uptr to, u16 from ) +emitterT void MOV16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0xC7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); + write8( 0x66 ); + write8( 0xC7 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* mov r16 to [r32][r32*scale] */ -emitterT void eMOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void MOV16RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { - write8( 0x66 ); + write8( 0x66 ); RexRXB(0,to,from2,from); - write8( 0x89 ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); + write8( 0x89 ); + ModRM( 0, to, 0x4 ); + SibSB(scale, from2, from ); } -emitterT void eMOV16ItoR( x86IntRegType to, u16 from ) +emitterT void MOV16ItoR( x86IntRegType to, u16 from ) { RexB(0, to); - write16( 0xB866 | ((to & 0x7)<<8) ); - write16( from ); + write16( 0xB866 | ((to & 0x7)<<8) ); + write16( from ); } // mov imm16 to [r16+off] -emitterT void eMOV16ItoRm( x86IntRegType to, u16 from, u32 offset=0 ) +emitterT void MOV16ItoRm( x86IntRegType to, u16 from, u32 offset=0 ) { - write8(0x66); + write8(0x66); RexB(0,to); - write8( 0xC7 ); - WriteRmOffset(to, offset); - write16(from); + write8( 0xC7 ); + WriteRmOffsetFrom(0, to, offset); + write16(from); } // mov r16 to [r16+off] -emitterT void eMOV16RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOV16RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { - write8(0x66); + write8(0x66); RexRB(0,from,to); - write8( 0x89 ); - WriteRmOffsetFrom(from, to, offset); + write8( 0x89 ); + WriteRmOffsetFrom(from, to, offset); } /* mov r8 to m8 */ -emitterT void eMOV8RtoM( uptr to, x86IntRegType from ) +emitterT void MOV8RtoM( uptr to, x86IntRegType from ) { RexR(0,from); - write8( 0x88 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x88 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* mov m8 to r8 */ -emitterT void eMOV8MtoR( x86IntRegType to, uptr from ) +emitterT void MOV8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x8A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x8A ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eMOV8RmtoR(x86IntRegType to, x86IntRegType from, int offset=0) +emitterT void MOV8RmtoR(x86IntRegType to, x86IntRegType from, int offset=0) { RexRB(0,to,from); - write8( 0x8A ); - WriteRmOffsetFrom(to, from, offset); + write8( 0x8A ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eMOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) +emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, u32 from2, int scale=0 ) { RexRXB(0,to,from1,0); - write8( 0x8A ); - ModRM( 0, to, SIB ); - SibSB( scale, from1, SIBDISP); - write32(from2); + write8( 0x8A ); + ModRM( 0, to, SIB ); + SibSB( scale, from1, SIBDISP); + write32(from2); } /* mov imm8 to m8 */ -emitterT void eMOV8ItoM( uptr to, u8 from ) +emitterT void MOV8ItoM( uptr to, u8 from ) { - write8( 0xC6 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xC6 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } // mov imm8 to r8 -emitterT void eMOV8ItoR( x86IntRegType to, u8 from ) +emitterT void MOV8ItoR( x86IntRegType to, u8 from ) { RexB(0, to); - write8( 0xB0 | (to & 0x7) ); - write8( from ); + write8( 0xB0 | (to & 0x7) ); + write8( from ); } // mov imm8 to [r8+off] -emitterT void eMOV8ItoRm( x86IntRegType to, u8 from, int offset=0) +emitterT void MOV8ItoRm( x86IntRegType to, u8 from, int offset=0) { assert( to != ESP ); RexB(0,to); - write8( 0xC6 ); - WriteRmOffset(to,offset); - write8(from); + write8( 0xC6 ); + WriteRmOffsetFrom(0, to,offset); + write8(from); } // mov r8 to [r8+off] -emitterT void eMOV8RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) +emitterT void MOV8RtoRm( x86IntRegType to, x86IntRegType from, int offset=0) { assert( to != ESP ); RexRB(0,from,to); - write8( 0x88 ); - WriteRmOffsetFrom(from,to,offset); + write8( 0x88 ); + WriteRmOffsetFrom(from,to,offset); } /* movsx r8 to r32 */ -emitterT void eMOVSX32R8toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVSX32R8toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xBE0F ); - ModRM( 3, to, from ); + write16( 0xBE0F ); + ModRM( 3, to, from ); } -emitterT void eMOVSX32Rm8toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xBE0F ); - ModRM( 0, to, from ); + write16( 0xBE0F ); + ModRM( 0, to, from ); } -emitterT void eMOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) +emitterT void MOVSX32Rm8toROffset( x86IntRegType to, x86IntRegType from, int offset ) { RexRB(0,to,from); - write16( 0xBE0F ); - WriteRmOffsetFrom(to,from,offset); + write16( 0xBE0F ); + WriteRmOffsetFrom(to,from,offset); } /* movsx m8 to r32 */ -emitterT void eMOVSX32M8toR( x86IntRegType to, u32 from ) +emitterT void MOVSX32M8toR( x86IntRegType to, u32 from ) { RexR(0,to); - write16( 0xBE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xBE0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movsx r16 to r32 */ -emitterT void eMOVSX32R16toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVSX32R16toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xBF0F ); - ModRM( 3, to, from ); + write16( 0xBF0F ); + ModRM( 3, to, from ); } -emitterT void eMOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); - write16( 0xBF0F ); - WriteRmOffsetFrom(to,from,offset); + write16( 0xBF0F ); + WriteRmOffsetFrom(to,from,offset); } /* movsx m16 to r32 */ -emitterT void eMOVSX32M16toR( x86IntRegType to, u32 from ) +emitterT void MOVSX32M16toR( x86IntRegType to, u32 from ) { RexR(0,to); - write16( 0xBF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xBF0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movzx r8 to r32 */ -emitterT void eMOVZX32R8toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVZX32R8toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xB60F ); - ModRM( 3, to, from ); + write16( 0xB60F ); + ModRM( 3, to, from ); } -emitterT void eMOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); - write16( 0xB60F ); - WriteRmOffsetFrom(to,from,offset); + write16( 0xB60F ); + WriteRmOffsetFrom(to,from,offset); } /* movzx m8 to r32 */ -emitterT void eMOVZX32M8toR( x86IntRegType to, u32 from ) +emitterT void MOVZX32M8toR( x86IntRegType to, u32 from ) { RexR(0,to); - write16( 0xB60F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xB60F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movzx r16 to r32 */ -emitterT void eMOVZX32R16toR( x86IntRegType to, x86IntRegType from ) +emitterT void MOVZX32R16toR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xB70F ); - ModRM( 3, to, from ); + write16( 0xB70F ); + ModRM( 3, to, from ); } -emitterT void eMOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset=0 ) { RexRB(0,to,from); - write16( 0xB70F ); - WriteRmOffsetFrom(to,from,offset); + write16( 0xB70F ); + WriteRmOffsetFrom(to,from,offset); } /* movzx m16 to r32 */ -emitterT void eMOVZX32M16toR( x86IntRegType to, u32 from ) +emitterT void MOVZX32M16toR( x86IntRegType to, u32 from ) { RexR(0,to); - write16( 0xB70F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xB70F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* cmovbe r32 to r32 */ -emitterT void eCMOVBE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x46, to, from ); + CMOV32RtoR( 0x46, to, from ); } /* cmovbe m32 to r32*/ -emitterT void eCMOVBE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVBE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x46, to, from ); + CMOV32MtoR( 0x46, to, from ); } /* cmovb r32 to r32 */ -emitterT void eCMOVB32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVB32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x42, to, from ); + CMOV32RtoR( 0x42, to, from ); } /* cmovb m32 to r32*/ -emitterT void eCMOVB32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVB32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x42, to, from ); + CMOV32MtoR( 0x42, to, from ); } /* cmovae r32 to r32 */ -emitterT void eCMOVAE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVAE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x43, to, from ); + CMOV32RtoR( 0x43, to, from ); } /* cmovae m32 to r32*/ -emitterT void eCMOVAE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVAE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x43, to, from ); + CMOV32MtoR( 0x43, to, from ); } /* cmova r32 to r32 */ -emitterT void eCMOVA32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVA32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x47, to, from ); + CMOV32RtoR( 0x47, to, from ); } /* cmova m32 to r32*/ -emitterT void eCMOVA32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVA32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x47, to, from ); + CMOV32MtoR( 0x47, to, from ); } /* cmovo r32 to r32 */ -emitterT void eCMOVO32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVO32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x40, to, from ); + CMOV32RtoR( 0x40, to, from ); } /* cmovo m32 to r32 */ -emitterT void eCMOVO32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVO32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x40, to, from ); + CMOV32MtoR( 0x40, to, from ); } /* cmovp r32 to r32 */ -emitterT void eCMOVP32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVP32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4A, to, from ); + CMOV32RtoR( 0x4A, to, from ); } /* cmovp m32 to r32 */ -emitterT void eCMOVP32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVP32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4A, to, from ); + CMOV32MtoR( 0x4A, to, from ); } /* cmovs r32 to r32 */ -emitterT void eCMOVS32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVS32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x48, to, from ); + CMOV32RtoR( 0x48, to, from ); } /* cmovs m32 to r32 */ -emitterT void eCMOVS32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVS32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x48, to, from ); + CMOV32MtoR( 0x48, to, from ); } /* cmovno r32 to r32 */ -emitterT void eCMOVNO32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVNO32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x41, to, from ); + CMOV32RtoR( 0x41, to, from ); } /* cmovno m32 to r32 */ -emitterT void eCMOVNO32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVNO32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x41, to, from ); + CMOV32MtoR( 0x41, to, from ); } /* cmovnp r32 to r32 */ -emitterT void eCMOVNP32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVNP32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4B, to, from ); + CMOV32RtoR( 0x4B, to, from ); } /* cmovnp m32 to r32 */ -emitterT void eCMOVNP32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVNP32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4B, to, from ); + CMOV32MtoR( 0x4B, to, from ); } /* cmovns r32 to r32 */ -emitterT void eCMOVNS32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVNS32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x49, to, from ); + CMOV32RtoR( 0x49, to, from ); } /* cmovns m32 to r32 */ -emitterT void eCMOVNS32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVNS32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x49, to, from ); + CMOV32MtoR( 0x49, to, from ); } /* cmovne r32 to r32 */ -emitterT void eCMOVNE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVNE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x45, to, from ); + CMOV32RtoR( 0x45, to, from ); } /* cmovne m32 to r32*/ -emitterT void eCMOVNE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVNE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x45, to, from ); + CMOV32MtoR( 0x45, to, from ); } /* cmove r32 to r32*/ -emitterT void eCMOVE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x44, to, from ); + CMOV32RtoR( 0x44, to, from ); } /* cmove m32 to r32*/ -emitterT void eCMOVE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x44, to, from ); + CMOV32MtoR( 0x44, to, from ); } /* cmovg r32 to r32*/ -emitterT void eCMOVG32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVG32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4F, to, from ); + CMOV32RtoR( 0x4F, to, from ); } /* cmovg m32 to r32*/ -emitterT void eCMOVG32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVG32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4F, to, from ); + CMOV32MtoR( 0x4F, to, from ); } /* cmovge r32 to r32*/ -emitterT void eCMOVGE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVGE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4D, to, from ); + CMOV32RtoR( 0x4D, to, from ); } /* cmovge m32 to r32*/ -emitterT void eCMOVGE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVGE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4D, to, from ); + CMOV32MtoR( 0x4D, to, from ); } /* cmovl r32 to r32*/ -emitterT void eCMOVL32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVL32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4C, to, from ); + CMOV32RtoR( 0x4C, to, from ); } /* cmovl m32 to r32*/ -emitterT void eCMOVL32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVL32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4C, to, from ); + CMOV32MtoR( 0x4C, to, from ); } /* cmovle r32 to r32*/ -emitterT void eCMOVLE32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMOVLE32RtoR( x86IntRegType to, x86IntRegType from ) { - CMOV32RtoR( 0x4E, to, from ); + CMOV32RtoR( 0x4E, to, from ); } /* cmovle m32 to r32*/ -emitterT void eCMOVLE32MtoR( x86IntRegType to, uptr from ) +emitterT void CMOVLE32MtoR( x86IntRegType to, uptr from ) { - CMOV32MtoR( 0x4E, to, from ); + CMOV32MtoR( 0x4E, to, from ); } //////////////////////////////////// @@ -857,261 +823,261 @@ emitterT void eCMOVLE32MtoR( x86IntRegType to, uptr from ) //////////////////////////////////// // add r16 to r16 -emitterT void eADD16RtoR( x86IntRegType to , x86IntRegType from ) +emitterT void ADD16RtoR( x86IntRegType to , x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0,to,from); - write8( 0x03 ); - ModRM( 3, to, from ); + write8( 0x03 ); + ModRM( 3, to, from ); } /* add imm16 to r16 */ -emitterT void eADD16ItoR( x86IntRegType to, s16 imm ) +emitterT void ADD16ItoR( x86IntRegType to, s16 imm ) { - write8( 0x66 ); + write8( 0x66 ); RexB(0,to); if ( to == EAX) { - write8( 0x05 ); - write16( imm ); + write8( 0x05 ); + write16( imm ); } else if(imm <= 127 && imm >= -128) { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8((u8)imm ); + write8( 0x83 ); + ModRM( 3, 0, to ); + write8((u8)imm ); } else { - write8( 0x81 ); - ModRM( 3, 0, to ); - write16( imm ); + write8( 0x81 ); + ModRM( 3, 0, to ); + write16( imm ); } } /* add imm16 to m16 */ -emitterT void eADD16ItoM( uptr to, s16 imm ) +emitterT void ADD16ItoM( uptr to, s16 imm ) { - write8( 0x66 ); + write8( 0x66 ); if(imm <= 127 && imm >= -128) { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)imm ); + write8( 0x83 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 6) ); + write8((u8)imm ); } else { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( imm ); + write8( 0x81 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( imm ); } } /* add r16 to m16 */ -emitterT void eADD16RtoM(uptr to, x86IntRegType from ) +emitterT void ADD16RtoM(uptr to, x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,from); - write8( 0x01 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x01 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* add m16 to r16 */ -emitterT void eADD16MtoR( x86IntRegType to, uptr from ) +emitterT void ADD16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,to); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x03 ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // add m8 to r8 -emitterT void eADD8MtoR( x86IntRegType to, uptr from ) +emitterT void ADD8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x02 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x02 ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* inc r32 */ -emitterT void eINC32R( x86IntRegType to ) +emitterT void INC32R( x86IntRegType to ) { - write8( 0x40 + to ); + write8( 0x40 + to ); } /* inc m32 */ -emitterT void eINC32M( u32 to ) +emitterT void INC32M( u32 to ) { - write8( 0xFF ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xFF ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 4) ); } /* inc r16 */ -emitterT void eINC16R( x86IntRegType to ) +emitterT void INC16R( x86IntRegType to ) { - write8( 0x66 ); - write8( 0x40 + to ); + write8( 0x66 ); + write8( 0x40 + to ); } /* inc m16 */ -emitterT void eINC16M( u32 to ) +emitterT void INC16M( u32 to ) { - write8( 0x66 ); - write8( 0xFF ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x66 ); + write8( 0xFF ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 4) ); } // sub r16 to r16 -emitterT void eSUB16RtoR( x86IntRegType to, u16 from ) +emitterT void SUB16RtoR( x86IntRegType to, u16 from ) { - write8(0x66); + write8(0x66); RexRB(0,to,from); - write8( 0x2b ); - ModRM( 3, to, from ); + write8( 0x2b ); + ModRM( 3, to, from ); } /* sub imm16 to r16 */ -emitterT void eSUB16ItoR( x86IntRegType to, u16 from ) { - write8( 0x66 ); +emitterT void SUB16ItoR( x86IntRegType to, u16 from ) { + write8( 0x66 ); RexB(0,to); if ( to == EAX ) { - write8( 0x2D ); + write8( 0x2D ); } else { - write8( 0x81 ); - ModRM( 3, 5, to ); + write8( 0x81 ); + ModRM( 3, 5, to ); } - write16( from ); + write16( from ); } /* sub imm16 to m16 */ -emitterT void eSUB16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); +emitterT void SUB16ItoM( uptr to, u16 from ) { + write8( 0x66 ); + write8( 0x81 ); + ModRM( 0, 5, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* sub m16 to r16 */ -emitterT void eSUB16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); +emitterT void SUB16MtoR( x86IntRegType to, uptr from ) { + write8( 0x66 ); RexR(0,to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x2B ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* dec r32 */ -emitterT void eDEC32R( x86IntRegType to ) +emitterT void DEC32R( x86IntRegType to ) { - write8( 0x48 + to ); + write8( 0x48 + to ); } /* dec m32 */ -emitterT void eDEC32M( u32 to ) +emitterT void DEC32M( u32 to ) { - write8( 0xFF ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xFF ); + ModRM( 0, 1, DISP32 ); + write32( MEMADDR(to, 4) ); } /* dec r16 */ -emitterT void eDEC16R( x86IntRegType to ) +emitterT void DEC16R( x86IntRegType to ) { - write8( 0x66 ); - write8( 0x48 + to ); + write8( 0x66 ); + write8( 0x48 + to ); } /* dec m16 */ -emitterT void eDEC16M( u32 to ) +emitterT void DEC16M( u32 to ) { - write8( 0x66 ); - write8( 0xFF ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x66 ); + write8( 0xFF ); + ModRM( 0, 1, DISP32 ); + write32( MEMADDR(to, 4) ); } /* mul eax by r32 to edx:eax */ -emitterT void eMUL32R( x86IntRegType from ) +emitterT void MUL32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 4, from ); + write8( 0xF7 ); + ModRM( 3, 4, from ); } /* imul eax by r32 to edx:eax */ -emitterT void eIMUL32R( x86IntRegType from ) +emitterT void IMUL32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 5, from ); + write8( 0xF7 ); + ModRM( 3, 5, from ); } /* mul eax by m32 to edx:eax */ -emitterT void eMUL32M( u32 from ) +emitterT void MUL32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xF7 ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(from, 4) ); } /* imul eax by m32 to edx:eax */ -emitterT void eIMUL32M( u32 from ) +emitterT void IMUL32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xF7 ); + ModRM( 0, 5, DISP32 ); + write32( MEMADDR(from, 4) ); } /* imul r32 by r32 to r32 */ -emitterT void eIMUL32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void IMUL32RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write16( 0xAF0F ); - ModRM( 3, to, from ); + write16( 0xAF0F ); + ModRM( 3, to, from ); } /* div eax by r32 to edx:eax */ -emitterT void eDIV32R( x86IntRegType from ) +emitterT void DIV32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 6, from ); + write8( 0xF7 ); + ModRM( 3, 6, from ); } /* idiv eax by r32 to edx:eax */ -emitterT void eIDIV32R( x86IntRegType from ) +emitterT void IDIV32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 7, from ); + write8( 0xF7 ); + ModRM( 3, 7, from ); } /* div eax by m32 to edx:eax */ -emitterT void eDIV32M( u32 from ) +emitterT void DIV32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xF7 ); + ModRM( 0, 6, DISP32 ); + write32( MEMADDR(from, 4) ); } /* idiv eax by m32 to edx:eax */ -emitterT void eIDIV32M( u32 from ) +emitterT void IDIV32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xF7 ); + ModRM( 0, 7, DISP32 ); + write32( MEMADDR(from, 4) ); } //////////////////////////////////// @@ -1119,266 +1085,266 @@ emitterT void eIDIV32M( u32 from ) //////////////////////////////////// /* shl imm8 to r32 */ -emitterT void eSHL32ItoR( x86IntRegType to, u8 from ) +emitterT void SHL32ItoR( x86IntRegType to, u8 from ) { RexB(0, to); if ( from == 1 ) { - write8( 0xD1 ); - write8( 0xE0 | (to & 0x7) ); + write8( 0xD1 ); + write8( 0xE0 | (to & 0x7) ); return; } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 4, to ); + write8( from ); } /* shl imm8 to m32 */ -emitterT void eSHL32ItoM( uptr to, u8 from ) +emitterT void SHL32ItoM( uptr to, u8 from ) { if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD1 ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(to, 4) ); } else { - write8( 0xC1 ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xC1 ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } } /* shl cl to r32 */ -emitterT void eSHL32CLtoR( x86IntRegType to ) +emitterT void SHL32CLtoR( x86IntRegType to ) { RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 4, to ); + write8( 0xD3 ); + ModRM( 3, 4, to ); } // shl imm8 to r16 -emitterT void eSHL16ItoR( x86IntRegType to, u8 from ) +emitterT void SHL16ItoR( x86IntRegType to, u8 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - write8( 0xE0 | (to & 0x7) ); + write8( 0xD1 ); + write8( 0xE0 | (to & 0x7) ); return; } - write8( 0xC1 ); - ModRM( 3, 4, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 4, to ); + write8( from ); } // shl imm8 to r8 -emitterT void eSHL8ItoR( x86IntRegType to, u8 from ) +emitterT void SHL8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD0 ); - write8( 0xE0 | (to & 0x7) ); + write8( 0xD0 ); + write8( 0xE0 | (to & 0x7) ); return; } - write8( 0xC0 ); - ModRM( 3, 4, to ); - write8( from ); + write8( 0xC0 ); + ModRM( 3, 4, to ); + write8( from ); } /* shr imm8 to r32 */ -emitterT void eSHR32ItoR( x86IntRegType to, u8 from ) { +emitterT void SHR32ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - write8( 0xE8 | (to & 0x7) ); + write8( 0xD1 ); + write8( 0xE8 | (to & 0x7) ); } else { - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 5, to ); + write8( from ); } } /* shr imm8 to m32 */ -emitterT void eSHR32ItoM( uptr to, u8 from ) +emitterT void SHR32ItoM( uptr to, u8 from ) { if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD1 ); + ModRM( 0, 5, DISP32 ); + write32( MEMADDR(to, 4) ); } else { - write8( 0xC1 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xC1 ); + ModRM( 0, 5, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } } /* shr cl to r32 */ -emitterT void eSHR32CLtoR( x86IntRegType to ) +emitterT void SHR32CLtoR( x86IntRegType to ) { RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 5, to ); + write8( 0xD3 ); + ModRM( 3, 5, to ); } // shr imm8 to r16 -emitterT void eSHR16ItoR( x86IntRegType to, u8 from ) +emitterT void SHR16ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 5, to ); + write8( 0xD1 ); + ModRM( 3, 5, to ); } else { - write8( 0xC1 ); - ModRM( 3, 5, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 5, to ); + write8( from ); } } // shr imm8 to r8 -emitterT void eSHR8ItoR( x86IntRegType to, u8 from ) +emitterT void SHR8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD0 ); - write8( 0xE8 | (to & 0x7) ); + write8( 0xD0 ); + write8( 0xE8 | (to & 0x7) ); } else { - write8( 0xC0 ); - ModRM( 3, 5, to ); - write8( from ); + write8( 0xC0 ); + ModRM( 3, 5, to ); + write8( from ); } } /* sar imm8 to r32 */ -emitterT void eSAR32ItoR( x86IntRegType to, u8 from ) +emitterT void SAR32ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 7, to ); + write8( 0xD1 ); + ModRM( 3, 7, to ); return; } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 7, to ); + write8( from ); } /* sar imm8 to m32 */ -emitterT void eSAR32ItoM( uptr to, u8 from ) +emitterT void SAR32ItoM( uptr to, u8 from ) { - write8( 0xC1 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xC1 ); + ModRM( 0, 7, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } /* sar cl to r32 */ -emitterT void eSAR32CLtoR( x86IntRegType to ) +emitterT void SAR32CLtoR( x86IntRegType to ) { RexB(0,to); - write8( 0xD3 ); - ModRM( 3, 7, to ); + write8( 0xD3 ); + ModRM( 3, 7, to ); } // sar imm8 to r16 -emitterT void eSAR16ItoR( x86IntRegType to, u8 from ) +emitterT void SAR16ItoR( x86IntRegType to, u8 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( from == 1 ) { - write8( 0xD1 ); - ModRM( 3, 7, to ); + write8( 0xD1 ); + ModRM( 3, 7, to ); return; } - write8( 0xC1 ); - ModRM( 3, 7, to ); - write8( from ); + write8( 0xC1 ); + ModRM( 3, 7, to ); + write8( from ); } -/*emitterT void eROR32ItoR( x86IntRegType to,u8 from ) +/*emitterT void ROR32ItoR( x86IntRegType to,u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xd1 ); - write8( 0xc8 | to ); + write8( 0xd1 ); + write8( 0xc8 | to ); } else { - write8( 0xc1 ); - write8( 0xc8 | to ); - write8( from ); + write8( 0xc1 ); + write8( 0xc8 | to ); + write8( from ); } }*/ -emitterT void eRCR32ItoR( x86IntRegType to, u8 from ) +emitterT void RCR32ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xd1 ); - ModRM(3, 3, to); + write8( 0xd1 ); + ModRM(3, 3, to); } else { - write8( 0xc1 ); - ModRM(3, 3, to); - write8( from ); + write8( 0xc1 ); + ModRM(3, 3, to); + write8( from ); } } -emitterT void eRCR32ItoM( uptr to, u8 from ) +emitterT void RCR32ItoM( uptr to, u8 from ) { RexB(0,to); if ( from == 1 ) { - write8( 0xd1 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); + write8( 0xd1 ); + ModRM( 0, 3, DISP32 ); + write32( MEMADDR(to, 8) ); } else { - write8( 0xc1 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(to, 8) ); - write8( from ); + write8( 0xc1 ); + ModRM( 0, 3, DISP32 ); + write32( MEMADDR(to, 8) ); + write8( from ); } } // shld imm8 to r32 -emitterT void eSHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) +emitterT void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) { RexRB(0,from,to); - write8( 0x0F ); - write8( 0xA4 ); - ModRM( 3, from, to ); - write8( shift ); + write8( 0x0F ); + write8( 0xA4 ); + ModRM( 3, from, to ); + write8( shift ); } // shrd imm8 to r32 -emitterT void eSHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) +emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) { RexRB(0,from,to); - write8( 0x0F ); - write8( 0xAC ); - ModRM( 3, from, to ); - write8( shift ); + write8( 0x0F ); + write8( 0xAC ); + ModRM( 3, from, to ); + write8( shift ); } //////////////////////////////////// @@ -1386,252 +1352,252 @@ emitterT void eSHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) //////////////////////////////////// // or r16 to r16 -emitterT void eOR16RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void OR16RtoR( x86IntRegType to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0,from,to); - write8( 0x09 ); - ModRM( 3, from, to ); + write8( 0x09 ); + ModRM( 3, from, to ); } // or imm16 to r16 -emitterT void eOR16ItoR( x86IntRegType to, u16 from ) +emitterT void OR16ItoR( x86IntRegType to, u16 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( to == EAX ) { - write8( 0x0D ); + write8( 0x0D ); } else { - write8( 0x81 ); - ModRM( 3, 1, to ); + write8( 0x81 ); + ModRM( 3, 1, to ); } - write16( from ); + write16( from ); } // or imm16 to m316 -emitterT void eOR16ItoM( uptr to, u16 from ) +emitterT void OR16ItoM( uptr to, u16 from ) { - write8(0x66); - write8( 0x81 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); + write8(0x66); + write8( 0x81 ); + ModRM( 0, 1, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* or m16 to r16 */ -emitterT void eOR16MtoR( x86IntRegType to, uptr from ) +emitterT void OR16MtoR( x86IntRegType to, uptr from ) { - write8(0x66); + write8(0x66); RexR(0,to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x0B ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // or r16 to m16 -emitterT void eOR16RtoM( uptr to, x86IntRegType from ) +emitterT void OR16RtoM( uptr to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexR(0,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x09 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } // or r8 to r8 -emitterT void eOR8RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void OR8RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0,from,to); - write8( 0x08 ); - ModRM( 3, from, to ); + write8( 0x08 ); + ModRM( 3, from, to ); } // or r8 to m8 -emitterT void eOR8RtoM( uptr to, x86IntRegType from ) +emitterT void OR8RtoM( uptr to, x86IntRegType from ) { RexR(0,from); - write8( 0x08 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x08 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } // or imm8 to m8 -emitterT void eOR8ItoM( uptr to, u8 from ) +emitterT void OR8ItoM( uptr to, u8 from ) { - write8( 0x80 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0x80 ); + ModRM( 0, 1, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } // or m8 to r8 -emitterT void eOR8MtoR( x86IntRegType to, uptr from ) +emitterT void OR8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x0A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x0A ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // and r16 to r16 -emitterT void eAND16RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void AND16RtoR( x86IntRegType to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0,to,from); - write8( 0x23 ); - ModRM( 3, to, from ); + write8( 0x23 ); + ModRM( 3, to, from ); } /* and imm16 to r16 */ -emitterT void eAND16ItoR( x86IntRegType to, u16 from ) +emitterT void AND16ItoR( x86IntRegType to, u16 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( to == EAX ) { - write8( 0x25 ); - write16( from ); + write8( 0x25 ); + write16( from ); } else if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8((u8)from ); + write8( 0x83 ); + ModRM( 3, 0x4, to ); + write8((u8)from ); } else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - write16( from ); + write8( 0x81 ); + ModRM( 3, 0x4, to ); + write16( from ); } } /* and imm16 to m16 */ -emitterT void eAND16ItoM( uptr to, u16 from ) +emitterT void AND16ItoM( uptr to, u16 from ) { - write8(0x66); + write8(0x66); if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)from ); + write8( 0x83 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(to, 6) ); + write8((u8)from ); } else { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); + write8( 0x81 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } } /* and r16 to m16 */ -emitterT void eAND16RtoM( uptr to, x86IntRegType from ) +emitterT void AND16RtoM( uptr to, x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x21 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* and m16 to r16 */ -emitterT void eAND16MtoR( x86IntRegType to, uptr from ) +emitterT void AND16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); + write8( 0x23 ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4)); } /* and imm8 to r8 */ -emitterT void eAND8ItoR( x86IntRegType to, u8 from ) +emitterT void AND8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( to == EAX ) { - write8( 0x24 ); + write8( 0x24 ); } else { - write8( 0x80 ); - ModRM( 3, 0x4, to ); + write8( 0x80 ); + ModRM( 3, 0x4, to ); } - write8( from ); + write8( from ); } /* and imm8 to m8 */ -emitterT void eAND8ItoM( uptr to, u8 from ) +emitterT void AND8ItoM( uptr to, u8 from ) { - write8( 0x80 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0x80 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } // and r8 to r8 -emitterT void eAND8RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void AND8RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0,to,from); - write8( 0x22 ); - ModRM( 3, to, from ); + write8( 0x22 ); + ModRM( 3, to, from ); } /* and r8 to m8 */ -emitterT void eAND8RtoM( uptr to, x86IntRegType from ) +emitterT void AND8RtoM( uptr to, x86IntRegType from ) { RexR(0,from); - write8( 0x20 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0x20 ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } /* and m8 to r8 */ -emitterT void eAND8MtoR( x86IntRegType to, uptr from ) +emitterT void AND8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x22 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); + write8( 0x22 ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4)); } /* not r32 */ -emitterT void eNOT32R( x86IntRegType from ) +emitterT void NOT32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 2, from ); + write8( 0xF7 ); + ModRM( 3, 2, from ); } // not m32 -emitterT void eNOT32M( u32 from ) +emitterT void NOT32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(from, 4)); + write8( 0xF7 ); + ModRM( 0, 2, DISP32 ); + write32( MEMADDR(from, 4)); } /* neg r32 */ -emitterT void eNEG32R( x86IntRegType from ) +emitterT void NEG32R( x86IntRegType from ) { RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 3, from ); + write8( 0xF7 ); + ModRM( 3, 3, from ); } -emitterT void eNEG32M( u32 from ) +emitterT void NEG32M( u32 from ) { - write8( 0xF7 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(from, 4)); + write8( 0xF7 ); + ModRM( 0, 3, DISP32 ); + write32( MEMADDR(from, 4)); } /* neg r16 */ -emitterT void eNEG16R( x86IntRegType from ) +emitterT void NEG16R( x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 3, from ); + write8( 0xF7 ); + ModRM( 3, 3, from ); } //////////////////////////////////// @@ -1639,346 +1605,346 @@ emitterT void eNEG16R( x86IntRegType from ) //////////////////////////////////// emitterT u8* JMP( uptr to ) { - uptr jump = ( x86Ptr[0] - (u8*)to ) - 1; + uptr jump = ( x86Ptr - (u8*)to ) - 1; if ( jump > 0x7f ) { assert( to <= 0xffffffff ); - return (u8*)eJMP32( to ); + return (u8*)JMP32( to ); } else { - return (u8*)eJMP8( to ); + return (u8*)JMP8( to ); } } /* jmp rel8 */ -emitterT u8* eJMP8( u8 to ) +emitterT u8* JMP8( u8 to ) { - write8( 0xEB ); - write8( to ); - return x86Ptr[I] - 1; + write8( 0xEB ); + write8( to ); + return x86Ptr - 1; } /* jmp rel32 */ -emitterT u32* eJMP32( uptr to ) +emitterT u32* JMP32( uptr to ) { assert( (sptr)to <= 0x7fffffff && (sptr)to >= -0x7fffffff ); - write8( 0xE9 ); - write32( to ); - return (u32*)(x86Ptr[I] - 4 ); + write8( 0xE9 ); + write32( to ); + return (u32*)(x86Ptr - 4 ); } /* jmp r32/r64 */ -emitterT void eJMPR( x86IntRegType to ) +emitterT void JMPR( x86IntRegType to ) { RexB(0, to); - write8( 0xFF ); - ModRM( 3, 4, to ); + write8( 0xFF ); + ModRM( 3, 4, to ); } // jmp m32 -emitterT void eJMP32M( uptr to ) +emitterT void JMP32M( uptr to ) { - write8( 0xFF ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 4)); + write8( 0xFF ); + ModRM( 0, 4, DISP32 ); + write32( MEMADDR(to, 4)); } /* jp rel8 */ -emitterT u8* eJP8( u8 to ) { - return J8Rel( 0x7A, to ); +emitterT u8* JP8( u8 to ) { + return J8Rel( 0x7A, to ); } /* jnp rel8 */ -emitterT u8* eJNP8( u8 to ) { - return J8Rel( 0x7B, to ); +emitterT u8* JNP8( u8 to ) { + return J8Rel( 0x7B, to ); } /* je rel8 */ -emitterT u8* eJE8( u8 to ) { - return J8Rel( 0x74, to ); +emitterT u8* JE8( u8 to ) { + return J8Rel( 0x74, to ); } /* jz rel8 */ -emitterT u8* eJZ8( u8 to ) +emitterT u8* JZ8( u8 to ) { - return J8Rel( 0x74, to ); + return J8Rel( 0x74, to ); } /* js rel8 */ -emitterT u8* eJS8( u8 to ) +emitterT u8* JS8( u8 to ) { - return J8Rel( 0x78, to ); + return J8Rel( 0x78, to ); } /* jns rel8 */ -emitterT u8* eJNS8( u8 to ) +emitterT u8* JNS8( u8 to ) { - return J8Rel( 0x79, to ); + return J8Rel( 0x79, to ); } /* jg rel8 */ -emitterT u8* eJG8( u8 to ) +emitterT u8* JG8( u8 to ) { - return J8Rel( 0x7F, to ); + return J8Rel( 0x7F, to ); } /* jge rel8 */ -emitterT u8* eJGE8( u8 to ) +emitterT u8* JGE8( u8 to ) { - return J8Rel( 0x7D, to ); + return J8Rel( 0x7D, to ); } /* jl rel8 */ -emitterT u8* eJL8( u8 to ) +emitterT u8* JL8( u8 to ) { - return J8Rel( 0x7C, to ); + return J8Rel( 0x7C, to ); } /* ja rel8 */ -emitterT u8* eJA8( u8 to ) +emitterT u8* JA8( u8 to ) { - return J8Rel( 0x77, to ); + return J8Rel( 0x77, to ); } -emitterT u8* eJAE8( u8 to ) +emitterT u8* JAE8( u8 to ) { - return J8Rel( 0x73, to ); + return J8Rel( 0x73, to ); } /* jb rel8 */ -emitterT u8* eJB8( u8 to ) +emitterT u8* JB8( u8 to ) { - return J8Rel( 0x72, to ); + return J8Rel( 0x72, to ); } /* jbe rel8 */ -emitterT u8* eJBE8( u8 to ) +emitterT u8* JBE8( u8 to ) { - return J8Rel( 0x76, to ); + return J8Rel( 0x76, to ); } /* jle rel8 */ -emitterT u8* eJLE8( u8 to ) +emitterT u8* JLE8( u8 to ) { - return J8Rel( 0x7E, to ); + return J8Rel( 0x7E, to ); } /* jne rel8 */ -emitterT u8* eJNE8( u8 to ) +emitterT u8* JNE8( u8 to ) { - return J8Rel( 0x75, to ); + return J8Rel( 0x75, to ); } /* jnz rel8 */ -emitterT u8* eJNZ8( u8 to ) +emitterT u8* JNZ8( u8 to ) { - return J8Rel( 0x75, to ); + return J8Rel( 0x75, to ); } /* jng rel8 */ -emitterT u8* eJNG8( u8 to ) +emitterT u8* JNG8( u8 to ) { - return J8Rel( 0x7E, to ); + return J8Rel( 0x7E, to ); } /* jnge rel8 */ -emitterT u8* eJNGE8( u8 to ) +emitterT u8* JNGE8( u8 to ) { - return J8Rel( 0x7C, to ); + return J8Rel( 0x7C, to ); } /* jnl rel8 */ -emitterT u8* eJNL8( u8 to ) +emitterT u8* JNL8( u8 to ) { - return J8Rel( 0x7D, to ); + return J8Rel( 0x7D, to ); } /* jnle rel8 */ -emitterT u8* eJNLE8( u8 to ) +emitterT u8* JNLE8( u8 to ) { - return J8Rel( 0x7F, to ); + return J8Rel( 0x7F, to ); } /* jo rel8 */ -emitterT u8* eJO8( u8 to ) +emitterT u8* JO8( u8 to ) { - return J8Rel( 0x70, to ); + return J8Rel( 0x70, to ); } /* jno rel8 */ -emitterT u8* eJNO8( u8 to ) +emitterT u8* JNO8( u8 to ) { - return J8Rel( 0x71, to ); + return J8Rel( 0x71, to ); } /* Untested and slower, use 32bit versions instead // ja rel16 emitterT u16* eJA16( u16 to ) { -return J16Rel( 0x87, to ); +return J16Rel( 0x87, to ); } // jb rel16 emitterT u16* eJB16( u16 to ) { -return J16Rel( 0x82, to ); +return J16Rel( 0x82, to ); } // je rel16 emitterT u16* eJE16( u16 to ) { -return J16Rel( 0x84, to ); +return J16Rel( 0x84, to ); } // jz rel16 emitterT u16* eJZ16( u16 to ) { -return J16Rel( 0x84, to ); +return J16Rel( 0x84, to ); } */ // jb rel32 -emitterT u32* eJB32( u32 to ) +emitterT u32* JB32( u32 to ) { - return J32Rel( 0x82, to ); + return J32Rel( 0x82, to ); } /* je rel32 */ -emitterT u32* eJE32( u32 to ) +emitterT u32* JE32( u32 to ) { - return J32Rel( 0x84, to ); + return J32Rel( 0x84, to ); } /* jz rel32 */ -emitterT u32* eJZ32( u32 to ) +emitterT u32* JZ32( u32 to ) { - return J32Rel( 0x84, to ); + return J32Rel( 0x84, to ); } /* js rel32 */ -emitterT u32* eJS32( u32 to ) +emitterT u32* JS32( u32 to ) { - return J32Rel( 0x88, to ); + return J32Rel( 0x88, to ); } /* jns rel32 */ -emitterT u32* eJNS32( u32 to ) +emitterT u32* JNS32( u32 to ) { - return J32Rel( 0x89, to ); + return J32Rel( 0x89, to ); } /* jg rel32 */ -emitterT u32* eJG32( u32 to ) +emitterT u32* JG32( u32 to ) { - return J32Rel( 0x8F, to ); + return J32Rel( 0x8F, to ); } /* jge rel32 */ -emitterT u32* eJGE32( u32 to ) +emitterT u32* JGE32( u32 to ) { - return J32Rel( 0x8D, to ); + return J32Rel( 0x8D, to ); } /* jl rel32 */ -emitterT u32* eJL32( u32 to ) +emitterT u32* JL32( u32 to ) { - return J32Rel( 0x8C, to ); + return J32Rel( 0x8C, to ); } /* jle rel32 */ -emitterT u32* eJLE32( u32 to ) +emitterT u32* JLE32( u32 to ) { - return J32Rel( 0x8E, to ); + return J32Rel( 0x8E, to ); } /* ja rel32 */ -emitterT u32* eJA32( u32 to ) +emitterT u32* JA32( u32 to ) { - return J32Rel( 0x87, to ); + return J32Rel( 0x87, to ); } /* jae rel32 */ -emitterT u32* eJAE32( u32 to ) +emitterT u32* JAE32( u32 to ) { - return J32Rel( 0x83, to ); + return J32Rel( 0x83, to ); } /* jne rel32 */ -emitterT u32* eJNE32( u32 to ) +emitterT u32* JNE32( u32 to ) { - return J32Rel( 0x85, to ); + return J32Rel( 0x85, to ); } /* jnz rel32 */ -emitterT u32* eJNZ32( u32 to ) +emitterT u32* JNZ32( u32 to ) { - return J32Rel( 0x85, to ); + return J32Rel( 0x85, to ); } /* jng rel32 */ -emitterT u32* eJNG32( u32 to ) +emitterT u32* JNG32( u32 to ) { - return J32Rel( 0x8E, to ); + return J32Rel( 0x8E, to ); } /* jnge rel32 */ -emitterT u32* eJNGE32( u32 to ) +emitterT u32* JNGE32( u32 to ) { - return J32Rel( 0x8C, to ); + return J32Rel( 0x8C, to ); } /* jnl rel32 */ -emitterT u32* eJNL32( u32 to ) +emitterT u32* JNL32( u32 to ) { - return J32Rel( 0x8D, to ); + return J32Rel( 0x8D, to ); } /* jnle rel32 */ -emitterT u32* eJNLE32( u32 to ) +emitterT u32* JNLE32( u32 to ) { - return J32Rel( 0x8F, to ); + return J32Rel( 0x8F, to ); } /* jo rel32 */ -emitterT u32* eJO32( u32 to ) +emitterT u32* JO32( u32 to ) { - return J32Rel( 0x80, to ); + return J32Rel( 0x80, to ); } /* jno rel32 */ -emitterT u32* eJNO32( u32 to ) +emitterT u32* JNO32( u32 to ) { - return J32Rel( 0x81, to ); + return J32Rel( 0x81, to ); } /* call func */ -emitterT void eCALLFunc( uptr func ) +emitterT void CALLFunc( uptr func ) { - func -= ( (uptr)x86Ptr[0] + 5 ); + func -= ( (uptr)x86Ptr + 5 ); assert( (sptr)func <= 0x7fffffff && (sptr)func >= -0x7fffffff ); - eCALL32(func); + CALL32(func); } /* call rel32 */ -emitterT void eCALL32( u32 to ) +emitterT void CALL32( u32 to ) { - write8( 0xE8 ); - write32( to ); + write8( 0xE8 ); + write32( to ); } /* call r32 */ -emitterT void eCALL32R( x86IntRegType to ) +emitterT void CALL32R( x86IntRegType to ) { - write8( 0xFF ); - ModRM( 3, 2, to ); + write8( 0xFF ); + ModRM( 3, 2, to ); } /* call m32 */ -emitterT void eCALL32M( u32 to ) +emitterT void CALL32M( u32 to ) { - write8( 0xFF ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xFF ); + ModRM( 0, 2, DISP32 ); + write32( MEMADDR(to, 4) ); } //////////////////////////////////// @@ -1986,410 +1952,406 @@ emitterT void eCALL32M( u32 to ) //////////////////////////////////// // cmp imm8 to [r32] (byte ptr) -emitterT void eCMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) +emitterT void CMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) { RexB(0,to); - write8( 0x80 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write8(from); + write8( 0x80 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); + write8(from); } // cmp imm8 to [r32] -emitterT void eCMP32I8toRm( x86IntRegType to, u8 from, s8 off=0 ) +emitterT void CMP32I8toRm( x86IntRegType to, u8 from, s8 off=0 ) { RexB(0,to); - write8( 0x83 ); - ModRM( (off!=0), 7, to ); - if( off != 0 ) write8(off); - write8(from); + write8( 0x83 ); + ModRM( (off!=0), 7, to ); + if( off != 0 ) write8(off); + write8(from); } // cmp imm32 to [r32] -emitterT void eCMP32ItoRm( x86IntRegType to, u32 from, s8 off=0 ) +emitterT void CMP32ItoRm( x86IntRegType to, u32 from, s8 off=0 ) { // fixme : This should use the imm8 form if 'from' is between 127 and -128. RexB(0,to); - write8( 0x81 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write32(from); + write8( 0x81 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); + write32(from); } // cmp imm8 to [mem] (dword ptr) -emitterT void eCMP32I8toM( uptr to, u8 from ) +emitterT void CMP32I8toM( uptr to, u8 from ) { - write8( 0x83 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0x83 ); + ModRM( 0, 7, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } /* cmp imm16 to r16 */ -emitterT void eCMP16ItoR( x86IntRegType to, u16 from ) +emitterT void CMP16ItoR( x86IntRegType to, u16 from ) { - write8( 0x66 ); + write8( 0x66 ); RexB(0,to); if ( to == EAX ) { - write8( 0x3D ); + write8( 0x3D ); } else { - write8( 0x81 ); - ModRM( 3, 7, to ); + write8( 0x81 ); + ModRM( 3, 7, to ); } - write16( from ); + write16( from ); } /* cmp imm16 to m16 */ -emitterT void eCMP16ItoM( uptr to, u16 from ) +emitterT void CMP16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); + write8( 0x66 ); + write8( 0x81 ); + ModRM( 0, 7, DISP32 ); + write32( MEMADDR(to, 6) ); + write16( from ); } /* cmp r16 to r16 */ -emitterT void eCMP16RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void CMP16RtoR( x86IntRegType to, x86IntRegType from ) { - write8( 0x66 ); + write8( 0x66 ); RexRB(0,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); + write8( 0x39 ); + ModRM( 3, from, to ); } /* cmp m16 to r16 */ -emitterT void eCMP16MtoR( x86IntRegType to, uptr from ) +emitterT void CMP16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); + write8( 0x66 ); RexR(0,to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x3B ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // cmp imm8 to r8 -emitterT void eCMP8ItoR( x86IntRegType to, u8 from ) +emitterT void CMP8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( to == EAX ) { - write8( 0x3C ); + write8( 0x3C ); } else { - write8( 0x80 ); - ModRM( 3, 7, to ); + write8( 0x80 ); + ModRM( 3, 7, to ); } - write8( from ); + write8( from ); } // cmp m8 to r8 -emitterT void eCMP8MtoR( x86IntRegType to, uptr from ) +emitterT void CMP8MtoR( x86IntRegType to, uptr from ) { RexR(0,to); - write8( 0x3A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0x3A ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* test imm32 to r32 */ -emitterT void eTEST32ItoR( x86IntRegType to, u32 from ) +emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { RexB(0,to); if ( to == EAX ) { - write8( 0xA9 ); + write8( 0xA9 ); } else { - write8( 0xF7 ); - ModRM( 3, 0, to ); + write8( 0xF7 ); + ModRM( 3, 0, to ); } - write32( from ); + write32( from ); } -emitterT void eTEST32ItoM( uptr to, u32 from ) +emitterT void TEST32ItoM( uptr to, u32 from ) { - write8( 0xF7 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 8) ); - write32( from ); + write8( 0xF7 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 8) ); + write32( from ); } /* test r32 to r32 */ -emitterT void eTEST32RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void TEST32RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0,from,to); - write8( 0x85 ); - ModRM( 3, from, to ); + write8( 0x85 ); + ModRM( 3, from, to ); } // test imm32 to [r32] -emitterT void eTEST32ItoRm( x86IntRegType to, u32 from ) +emitterT void TEST32ItoRm( x86IntRegType to, u32 from ) { RexB(0,to); - write8( 0xF7 ); - ModRM( 0, 0, to ); - write32(from); + write8( 0xF7 ); + ModRM( 0, 0, to ); + write32(from); } // test imm16 to r16 -emitterT void eTEST16ItoR( x86IntRegType to, u16 from ) +emitterT void TEST16ItoR( x86IntRegType to, u16 from ) { - write8(0x66); + write8(0x66); RexB(0,to); if ( to == EAX ) { - write8( 0xA9 ); + write8( 0xA9 ); } else { - write8( 0xF7 ); - ModRM( 3, 0, to ); + write8( 0xF7 ); + ModRM( 3, 0, to ); } - write16( from ); + write16( from ); } // test r16 to r16 -emitterT void eTEST16RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void TEST16RtoR( x86IntRegType to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0,from,to); - write8( 0x85 ); - ModRM( 3, from, to ); + write8( 0x85 ); + ModRM( 3, from, to ); } // test r8 to r8 -emitterT void eTEST8RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void TEST8RtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0, from, to); - write8( 0x84 ); - ModRM( 3, from, to ); + write8( 0x84 ); + ModRM( 3, from, to ); } // test imm8 to r8 -emitterT void eTEST8ItoR( x86IntRegType to, u8 from ) +emitterT void TEST8ItoR( x86IntRegType to, u8 from ) { RexB(0,to); if ( to == EAX ) { - write8( 0xA8 ); + write8( 0xA8 ); } else { - write8( 0xF6 ); - ModRM( 3, 0, to ); + write8( 0xF6 ); + ModRM( 3, 0, to ); } - write8( from ); + write8( from ); } // test imm8 to r8 -emitterT void eTEST8ItoM( uptr to, u8 from ) +emitterT void TEST8ItoM( uptr to, u8 from ) { - write8( 0xF6 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); + write8( 0xF6 ); + ModRM( 0, 0, DISP32 ); + write32( MEMADDR(to, 5) ); + write8( from ); } /* sets r8 */ -emitterT void eSETS8R( x86IntRegType to ) +emitterT void SETS8R( x86IntRegType to ) { - SET8R( 0x98, to ); + SET8R( 0x98, to ); } /* setl r8 */ -emitterT void eSETL8R( x86IntRegType to ) +emitterT void SETL8R( x86IntRegType to ) { - SET8R( 0x9C, to ); + SET8R( 0x9C, to ); } // setge r8 -emitterT void eSETGE8R( x86IntRegType to ) { SET8R(0x9d, to); } +emitterT void SETGE8R( x86IntRegType to ) { SET8R(0x9d, to); } // setg r8 -emitterT void eSETG8R( x86IntRegType to ) { SET8R(0x9f, to); } +emitterT void SETG8R( x86IntRegType to ) { SET8R(0x9f, to); } // seta r8 -emitterT void eSETA8R( x86IntRegType to ) { SET8R(0x97, to); } +emitterT void SETA8R( x86IntRegType to ) { SET8R(0x97, to); } // setae r8 -emitterT void eSETAE8R( x86IntRegType to ) { SET8R(0x99, to); } +emitterT void SETAE8R( x86IntRegType to ) { SET8R(0x99, to); } /* setb r8 */ -emitterT void eSETB8R( x86IntRegType to ) { SET8R( 0x92, to ); } +emitterT void SETB8R( x86IntRegType to ) { SET8R( 0x92, to ); } /* setb r8 */ -emitterT void eSETNZ8R( x86IntRegType to ) { SET8R( 0x95, to ); } +emitterT void SETNZ8R( x86IntRegType to ) { SET8R( 0x95, to ); } // setz r8 -emitterT void eSETZ8R( x86IntRegType to ) { SET8R(0x94, to); } +emitterT void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); } // sete r8 -emitterT void eSETE8R( x86IntRegType to ) { SET8R(0x94, to); } +emitterT void SETE8R( x86IntRegType to ) { SET8R(0x94, to); } /* push imm32 */ -emitterT void ePUSH32I( u32 from ) +emitterT void PUSH32I( u32 from ) {; -write8( 0x68 ); -write32( from ); +write8( 0x68 ); +write32( from ); } /* push r32 */ -emitterT void ePUSH32R( x86IntRegType from ) { write8( 0x50 | from ); } +emitterT void PUSH32R( x86IntRegType from ) { write8( 0x50 | from ); } /* push m32 */ -emitterT void ePUSH32M( u32 from ) +emitterT void PUSH32M( u32 from ) { - write8( 0xFF ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xFF ); + ModRM( 0, 6, DISP32 ); + write32( MEMADDR(from, 4) ); } /* pop r32 */ -emitterT void ePOP32R( x86IntRegType from ) { write8( 0x58 | from ); } +emitterT void POP32R( x86IntRegType from ) { write8( 0x58 | from ); } /* pushad */ -emitterT void ePUSHA32( void ) { write8( 0x60 ); } +emitterT void PUSHA32( void ) { write8( 0x60 ); } /* popad */ -emitterT void ePOPA32( void ) { write8( 0x61 ); } - -emitterT void ePUSHR(x86IntRegType from) { ePUSH32R(from); } -emitterT void ePOPR(x86IntRegType from) { ePOP32R(from); } - +emitterT void POPA32( void ) { write8( 0x61 ); } /* pushfd */ -emitterT void ePUSHFD( void ) { write8( 0x9C ); } +emitterT void PUSHFD( void ) { write8( 0x9C ); } /* popfd */ -emitterT void ePOPFD( void ) { write8( 0x9D ); } +emitterT void POPFD( void ) { write8( 0x9D ); } -emitterT void eRET( void ) { /*write8( 0xf3 ); <-- K8 opt?*/ write8( 0xC3 ); } +emitterT void RET( void ) { /*write8( 0xf3 ); <-- K8 opt?*/ write8( 0xC3 ); } -emitterT void eCBW( void ) { write16( 0x9866 ); } -emitterT void eCWD( void ) { write8( 0x98 ); } -emitterT void eCDQ( void ) { write8( 0x99 ); } -emitterT void eCWDE() { write8(0x98); } +emitterT void CBW( void ) { write16( 0x9866 ); } +emitterT void CWD( void ) { write8( 0x98 ); } +emitterT void CDQ( void ) { write8( 0x99 ); } +emitterT void CWDE() { write8(0x98); } -emitterT void eLAHF() { write8(0x9f); } -emitterT void eSAHF() { write8(0x9e); } +emitterT void LAHF() { write8(0x9f); } +emitterT void SAHF() { write8(0x9e); } -emitterT void eBT32ItoR( x86IntRegType to, u8 from ) +emitterT void BT32ItoR( x86IntRegType to, u8 from ) { - write16( 0xBA0F ); - ModRM(3, 4, to); - write8( from ); + write16( 0xBA0F ); + ModRM(3, 4, to); + write8( from ); } -emitterT void eBTR32ItoR( x86IntRegType to, u8 from ) +emitterT void BTR32ItoR( x86IntRegType to, u8 from ) { - write16( 0xBA0F ); - ModRM(3, 6, to); - write8( from ); + write16( 0xBA0F ); + ModRM(3, 6, to); + write8( from ); } -emitterT void eBSRRtoR(x86IntRegType to, x86IntRegType from) +emitterT void BSRRtoR(x86IntRegType to, x86IntRegType from) { - write16( 0xBD0F ); - ModRM( 3, from, to ); + write16( 0xBD0F ); + ModRM( 3, from, to ); } -emitterT void eBSWAP32R( x86IntRegType to ) +emitterT void BSWAP32R( x86IntRegType to ) { - write8( 0x0F ); - write8( 0xC8 + to ); + write8( 0x0F ); + write8( 0xC8 + to ); } -// to = from + offset -emitterT void eLEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset) -{ - write8(0x66); - eLEA32RtoR(to, from, offset); -} - -emitterT void eLEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) +emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) { RexRB(0,to,from); - write8(0x8d); + write8(0x8d); if( (from&7) == ESP ) { if( offset == 0 ) { - ModRM(1, to, from); - write8(0x24); + ModRM(1, to, from); + write8(0x24); } else if( offset <= 127 && offset >= -128 ) { - ModRM(1, to, from); - write8(0x24); - write8(offset); + ModRM(1, to, from); + write8(0x24); + write8(offset); } else { - ModRM(2, to, from); - write8(0x24); - write32(offset); + ModRM(2, to, from); + write8(0x24); + write32(offset); } } else { if( offset == 0 && from != EBP && from!=ESP ) { - ModRM(0, to, from); + ModRM(0, to, from); } else if( offset <= 127 && offset >= -128 ) { - ModRM(1, to, from); - write8(offset); + ModRM(1, to, from); + write8(offset); } else { - ModRM(2, to, from); - write32(offset); + ModRM(2, to, from); + write32(offset); } } } +// to = from + offset +emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset) +{ + write8(0x66); + LEA32RtoR(to, from, offset); +} + // to = from0 + from1 -emitterT void eLEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) +emitterT void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) { - write8(0x66); - eLEA32RRtoR(to, from0, from1); + write8(0x66); + LEA32RRtoR(to, from0, from1); } -emitterT void eLEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) +emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) { RexRXB(0, to, from0, from1); - write8(0x8d); + write8(0x8d); if( (from1&7) == EBP ) { - ModRM(1, to, 4); - ModRM(0, from0, from1); - write8(0); + ModRM(1, to, 4); + ModRM(0, from0, from1); + write8(0); } else { - ModRM(0, to, 4); - ModRM(0, from0, from1); + ModRM(0, to, 4); + ModRM(0, from0, from1); } } // to = from << scale (max is 3) -emitterT void eLEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) +emitterT void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) { - write8(0x66); - eLEA32RStoR(to, from, scale); + write8(0x66); + LEA32RStoR(to, from, scale); } // Don't inline recursive functions -emitterT void eLEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) +emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) { if( to == from ) { - eSHL32ItoR(to, scale); + SHL32ItoR(to, scale); return; } if( from != ESP ) { RexRXB(0,to,from,0); - write8(0x8d); - ModRM(0, to, 4); - ModRM(scale, from, 5); - write32(0); + write8(0x8d); + ModRM(0, to, 4); + ModRM(scale, from, 5); + write32(0); } else { assert( to != ESP ); - eMOV32RtoR(to, from); - eLEA32RStoR(to, to, scale); + MOV32RtoR(to, from); + LEA32RStoR(to, to, scale); } } diff --git a/pcsx2/x86/ix86/ix86_3dnow.inl b/pcsx2/x86/ix86/ix86_3dnow.inl index 5fdcce2347..e7a667f409 100644 --- a/pcsx2/x86/ix86/ix86_3dnow.inl +++ b/pcsx2/x86/ix86/ix86_3dnow.inl @@ -23,179 +23,179 @@ //------------------------------------------------------------------ /* femms */ -emitterT void eFEMMS( void ) +emitterT void FEMMS( void ) { - write16( 0x0E0F ); + write16( 0x0E0F ); } -emitterT void ePFCMPEQMtoR( x86IntRegType to, uptr from ) +emitterT void PFCMPEQMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xB0 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xB0 ); } -emitterT void ePFCMPGTMtoR( x86IntRegType to, uptr from ) +emitterT void PFCMPGTMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xA0 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xA0 ); } -emitterT void ePFCMPGEMtoR( x86IntRegType to, uptr from ) +emitterT void PFCMPGEMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x90 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x90 ); } -emitterT void ePFADDMtoR( x86IntRegType to, uptr from ) +emitterT void PFADDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x9E ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x9E ); } -emitterT void ePFADDRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFADDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x9E ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x9E ); } -emitterT void ePFSUBMtoR( x86IntRegType to, uptr from ) +emitterT void PFSUBMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x9A ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x9A ); } -emitterT void ePFSUBRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFSUBRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x9A ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x9A ); } -emitterT void ePFMULMtoR( x86IntRegType to, uptr from ) +emitterT void PFMULMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xB4 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xB4 ); } -emitterT void ePFMULRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFMULRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xB4 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xB4 ); } -emitterT void ePFRCPMtoR( x86IntRegType to, uptr from ) +emitterT void PFRCPMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x96 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x96 ); } -emitterT void ePFRCPRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRCPRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x96 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x96 ); } -emitterT void ePFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA6 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA6 ); } -emitterT void ePFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xB6 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xB6 ); } -emitterT void ePFRSQRTRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x97 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x97 ); } -emitterT void ePFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA7 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA7 ); } -emitterT void ePF2IDMtoR( x86IntRegType to, uptr from ) +emitterT void PF2IDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x1D ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x1D ); } -emitterT void ePF2IDRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PF2IDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x1D ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x1D ); } -emitterT void ePI2FDMtoR( x86IntRegType to, uptr from ) +emitterT void PI2FDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x0D ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x0D ); } -emitterT void ePI2FDRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PI2FDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x0D ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x0D ); } -emitterT void ePFMAXMtoR( x86IntRegType to, uptr from ) +emitterT void PFMAXMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xA4 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0xA4 ); } -emitterT void ePFMAXRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFMAXRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0xA4 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0xA4 ); } -emitterT void ePFMINMtoR( x86IntRegType to, uptr from ) +emitterT void PFMINMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); - ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x94 ); + write16( 0x0F0F ); + ModRM( 0, to, DISP32 ); + write32( from ); + write8( 0x94 ); } -emitterT void ePFMINRtoR( x86IntRegType to, x86IntRegType from ) +emitterT void PFMINRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); - ModRM( 3, to, from ); - write8( 0x94 ); + write16( 0x0F0F ); + ModRM( 3, to, from ); + write8( 0x94 ); } diff --git a/pcsx2/x86/ix86/ix86_fpu.inl b/pcsx2/x86/ix86/ix86_fpu.inl index ae20b6c4ec..7ed607f30c 100644 --- a/pcsx2/x86/ix86/ix86_fpu.inl +++ b/pcsx2/x86/ix86/ix86_fpu.inl @@ -24,253 +24,253 @@ //------------------------------------------------------------------ /* fild m32 to fpu reg stack */ -emitterT void eFILD32( u32 from ) +emitterT void FILD32( u32 from ) { - write8( 0xDB ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xDB ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fistp m32 from fpu reg stack */ -emitterT void eFISTP32( u32 from ) +emitterT void FISTP32( u32 from ) { - write8( 0xDB ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xDB ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fld m32 to fpu reg stack */ -emitterT void eFLD32( u32 from ) +emitterT void FLD32( u32 from ) { - write8( 0xD9 ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); } // fld st(i) -emitterT void eFLD(int st) { write16(0xc0d9+(st<<8)); } -emitterT void eFLD1() { write16(0xe8d9); } -emitterT void eFLDL2E() { write16(0xead9); } +emitterT void FLD(int st) { write16(0xc0d9+(st<<8)); } +emitterT void FLD1() { write16(0xe8d9); } +emitterT void FLDL2E() { write16(0xead9); } /* fst m32 from fpu reg stack */ -emitterT void eFST32( u32 to ) +emitterT void FST32( u32 to ) { - write8( 0xD9 ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(to, 4) ); } /* fstp m32 from fpu reg stack */ -emitterT void eFSTP32( u32 to ) +emitterT void FSTP32( u32 to ) { - write8( 0xD9 ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(to, 4) ); } // fstp st(i) -emitterT void eFSTP(int st) { write16(0xd8dd+(st<<8)); } +emitterT void FSTP(int st) { write16(0xd8dd+(st<<8)); } /* fldcw fpu control word from m16 */ -emitterT void eFLDCW( u32 from ) +emitterT void FLDCW( u32 from ) { - write8( 0xD9 ); - ModRM( 0, 0x5, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x5, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fnstcw fpu control word to m16 */ -emitterT void eFNSTCW( u32 to ) +emitterT void FNSTCW( u32 to ) { - write8( 0xD9 ); - ModRM( 0, 0x7, DISP32 ); - write32( MEMADDR(to, 4) ); + write8( 0xD9 ); + ModRM( 0, 0x7, DISP32 ); + write32( MEMADDR(to, 4) ); } -emitterT void eFNSTSWtoAX() { write16(0xE0DF); } -emitterT void eFXAM() { write16(0xe5d9); } -emitterT void eFDECSTP() { write16(0xf6d9); } -emitterT void eFRNDINT() { write16(0xfcd9); } -emitterT void eFXCH(int st) { write16(0xc8d9+(st<<8)); } -emitterT void eF2XM1() { write16(0xf0d9); } -emitterT void eFSCALE() { write16(0xfdd9); } -emitterT void eFPATAN(void) { write16(0xf3d9); } -emitterT void eFSIN(void) { write16(0xfed9); } +emitterT void FNSTSWtoAX() { write16(0xE0DF); } +emitterT void FXAM() { write16(0xe5d9); } +emitterT void FDECSTP() { write16(0xf6d9); } +emitterT void FRNDINT() { write16(0xfcd9); } +emitterT void FXCH(int st) { write16(0xc8d9+(st<<8)); } +emitterT void F2XM1() { write16(0xf0d9); } +emitterT void FSCALE() { write16(0xfdd9); } +emitterT void FPATAN(void) { write16(0xf3d9); } +emitterT void FSIN(void) { write16(0xfed9); } /* fadd ST(src) to fpu reg stack ST(0) */ -emitterT void eFADD32Rto0( x86IntRegType src ) +emitterT void FADD32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xC0 + src ); + write8( 0xD8 ); + write8( 0xC0 + src ); } /* fadd ST(0) to fpu reg stack ST(src) */ -emitterT void eFADD320toR( x86IntRegType src ) +emitterT void FADD320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xC0 + src ); + write8( 0xDC ); + write8( 0xC0 + src ); } /* fsub ST(src) to fpu reg stack ST(0) */ -emitterT void eFSUB32Rto0( x86IntRegType src ) +emitterT void FSUB32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xE0 + src ); + write8( 0xD8 ); + write8( 0xE0 + src ); } /* fsub ST(0) to fpu reg stack ST(src) */ -emitterT void eFSUB320toR( x86IntRegType src ) +emitterT void FSUB320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xE8 + src ); + write8( 0xDC ); + write8( 0xE8 + src ); } /* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */ -emitterT void eFSUBP( void ) +emitterT void FSUBP( void ) { - write8( 0xDE ); - write8( 0xE9 ); + write8( 0xDE ); + write8( 0xE9 ); } /* fmul ST(src) to fpu reg stack ST(0) */ -emitterT void eFMUL32Rto0( x86IntRegType src ) +emitterT void FMUL32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xC8 + src ); + write8( 0xD8 ); + write8( 0xC8 + src ); } /* fmul ST(0) to fpu reg stack ST(src) */ -emitterT void eFMUL320toR( x86IntRegType src ) +emitterT void FMUL320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xC8 + src ); + write8( 0xDC ); + write8( 0xC8 + src ); } /* fdiv ST(src) to fpu reg stack ST(0) */ -emitterT void eFDIV32Rto0( x86IntRegType src ) +emitterT void FDIV32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xF0 + src ); + write8( 0xD8 ); + write8( 0xF0 + src ); } /* fdiv ST(0) to fpu reg stack ST(src) */ -emitterT void eFDIV320toR( x86IntRegType src ) +emitterT void FDIV320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xF8 + src ); + write8( 0xDC ); + write8( 0xF8 + src ); } -emitterT void eFDIV320toRP( x86IntRegType src ) +emitterT void FDIV320toRP( x86IntRegType src ) { - write8( 0xDE ); - write8( 0xF8 + src ); + write8( 0xDE ); + write8( 0xF8 + src ); } /* fadd m32 to fpu reg stack */ -emitterT void eFADD32( u32 from ) +emitterT void FADD32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x0, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fsub m32 to fpu reg stack */ -emitterT void eFSUB32( u32 from ) +emitterT void FSUB32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x4, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fmul m32 to fpu reg stack */ -emitterT void eFMUL32( u32 from ) +emitterT void FMUL32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x1, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x1, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fdiv m32 to fpu reg stack */ -emitterT void eFDIV32( u32 from ) +emitterT void FDIV32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x6, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x6, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fabs fpu reg stack */ -emitterT void eFABS( void ) +emitterT void FABS( void ) { - write16( 0xE1D9 ); + write16( 0xE1D9 ); } /* fsqrt fpu reg stack */ -emitterT void eFSQRT( void ) +emitterT void FSQRT( void ) { - write16( 0xFAD9 ); + write16( 0xFAD9 ); } /* fchs fpu reg stack */ -emitterT void eFCHS( void ) +emitterT void FCHS( void ) { - write16( 0xE0D9 ); + write16( 0xE0D9 ); } /* fcomi st, st(i) */ -emitterT void eFCOMI( x86IntRegType src ) +emitterT void FCOMI( x86IntRegType src ) { - write8( 0xDB ); - write8( 0xF0 + src ); + write8( 0xDB ); + write8( 0xF0 + src ); } /* fcomip st, st(i) */ -emitterT void eFCOMIP( x86IntRegType src ) +emitterT void FCOMIP( x86IntRegType src ) { - write8( 0xDF ); - write8( 0xF0 + src ); + write8( 0xDF ); + write8( 0xF0 + src ); } /* fucomi st, st(i) */ -emitterT void eFUCOMI( x86IntRegType src ) +emitterT void FUCOMI( x86IntRegType src ) { - write8( 0xDB ); - write8( 0xE8 + src ); + write8( 0xDB ); + write8( 0xE8 + src ); } /* fucomip st, st(i) */ -emitterT void eFUCOMIP( x86IntRegType src ) +emitterT void FUCOMIP( x86IntRegType src ) { - write8( 0xDF ); - write8( 0xE8 + src ); + write8( 0xDF ); + write8( 0xE8 + src ); } /* fcom m32 to fpu reg stack */ -emitterT void eFCOM32( u32 from ) +emitterT void FCOM32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(from, 4) ); } /* fcomp m32 to fpu reg stack */ -emitterT void eFCOMP32( u32 from ) +emitterT void FCOMP32( u32 from ) { - write8( 0xD8 ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); + write8( 0xD8 ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); } #define FCMOV32( low, high ) \ { \ - write8( low ); \ - write8( high + from ); \ + write8( low ); \ + write8( high + from ); \ } -emitterT void eFCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); } -emitterT void eFCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); } -emitterT void eFCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); } -emitterT void eFCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); } -emitterT void eFCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); } -emitterT void eFCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); } -emitterT void eFCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); } -emitterT void eFCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); } +emitterT void FCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); } +emitterT void FCMOVE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC8 ); } +emitterT void FCMOVBE32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD0 ); } +emitterT void FCMOVU32( x86IntRegType from ) { FCMOV32( 0xDA, 0xD8 ); } +emitterT void FCMOVNB32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC0 ); } +emitterT void FCMOVNE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xC8 ); } +emitterT void FCMOVNBE32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD0 ); } +emitterT void FCMOVNU32( x86IntRegType from ) { FCMOV32( 0xDB, 0xD8 ); } diff --git a/pcsx2/x86/ix86/ix86_group1.inl b/pcsx2/x86/ix86/ix86_group1.inl index ccc98726d8..99e1d6d4ff 100644 --- a/pcsx2/x86/ix86/ix86_group1.inl +++ b/pcsx2/x86/ix86/ix86_group1.inl @@ -66,7 +66,7 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) int displacement_size = (info.Displacement == 0) ? 0 : ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); - if( !NeedsSibMagic( info ) ) + if( !NeedsSibMagic( info ) ) { // Use ModRm-only encoding, with the rm field holding an index/base register, if // one has been specified. If neither register is specified then use Disp32 form, @@ -76,26 +76,26 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) x86Register basereg = info.GetEitherReg(); if( basereg.IsEmpty() ) - ModRM( 0, regfield, ModRm_UseDisp32 ); + ModRM( 0, regfield, ModRm_UseDisp32 ); else { if( basereg == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - ModRM( displacement_size, regfield, basereg.Id ); + ModRM( displacement_size, regfield, basereg.Id ); } } else { - ModRM( displacement_size, regfield, ModRm_UseSib ); - SibSB( info.Index.Id, info.Scale, info.Base.Id ); + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Index.Id, info.Scale, info.Base.Id ); } switch( displacement_size ) { case 0: break; - case 1: write8( info.Displacement ); break; - case 2: write32( info.Displacement ); break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; jNO_DEFAULT } } @@ -108,7 +108,7 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) // emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) { - EmitSibMagic( regfield.Id, info ); + EmitSibMagic( regfield.Id, info ); } enum Group1InstructionType @@ -126,82 +126,93 @@ enum Group1InstructionType emitterT void Group1_32( Group1InstructionType inst, x86Register to, x86Register from ) { - write8( 0x01 | (inst<<3) ); - ModRM( 3, from.Id, to.Id ); + write8( 0x01 | (inst<<3) ); + ModRM( 3, from.Id, to.Id ); } emitterT void Group1_32( Group1InstructionType inst, x86Register to, u32 imm ) { if( is_s8( imm ) ) { - write8( 0x83 ); - ModRM( 3, inst, to.Id ); - write8( (s8)imm ); + write8( 0x83 ); + ModRM( 3, inst, to.Id ); + write8( (s8)imm ); } else { if( to == eax ) - write8( 0x05 | (inst<<3) ); + write8( 0x05 | (inst<<3) ); else { - write8( 0x81 ); - ModRM( 3, inst, to.Id ); + write8( 0x81 ); + ModRM( 3, inst, to.Id ); } - write32( imm ); + write32( imm ); } } emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, u32 imm ) { - write8( is_s8( imm ) ? 0x83 : 0x81 ); + write8( is_s8( imm ) ? 0x83 : 0x81 ); - EmitSibMagic( inst, sibdest ); + EmitSibMagic( inst, sibdest ); if( is_s8( imm ) ) - write8( (s8)imm ); + write8( (s8)imm ); else - write32( imm ); + write32( imm ); } emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) { - write8( 0x01 | (inst<<3) ); - EmitSibMagic( from, sibdest ); + write8( 0x01 | (inst<<3) ); + EmitSibMagic( from, sibdest ); } /* add m32 to r32 */ emitterT void Group1_32( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) { - write8( 0x03 | (inst<<3) ); - EmitSibMagic( to, sibsrc ); + write8( 0x03 | (inst<<3) ); + EmitSibMagic( to, sibsrc ); } emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) { if( to == eax ) { - write8( 0x04 | (inst<<3) ); - write8( imm ); + write8( 0x04 | (inst<<3) ); + write8( imm ); } else { - write8( 0x80 ); - ModRM( 3, inst, to.Id ); - write8( imm ); + write8( 0x80 ); + ModRM( 3, inst, to.Id ); + write8( imm ); } } ////////////////////////////////////////////////////////////////////////////////////////// // #define DEFINE_GROUP1_OPCODE( lwr, cod ) \ - emitterT void lwr##32( x86Register to, x86Register from ) { Group1_32( G1Type_##cod, to, from ); } \ - emitterT void lwr##32( x86Register to, u32 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ - emitterT void lwr##32( x86Register to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( void* to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( void* to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } + emitterT void lwr##32( x86Register to, x86Register from ) { Group1_32( G1Type_##cod, to, from ); } \ + emitterT void lwr##32( x86Register to, u32 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ + emitterT void lwr##32( x86Register to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( void* to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( void* to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } + +/* + emitterT void lwr##16( x86Register16 to, x86Register16 from ) { Group1_32( G1Type_##cod, to, from ); } \ + emitterT void lwr##16( x86Register16 to, u16 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ + emitterT void lwr##16( x86Register16 to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##16( void* to, x86Register16 from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##16( void* to, u16 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##16( x86Register16 to, const x86ModRm& from ){ Group1_32( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##16( const x86ModRm& to, x86Register16 from ){ Group1_32( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##16( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } +*/ DEFINE_GROUP1_OPCODE( add, ADD ); DEFINE_GROUP1_OPCODE( cmp, CMP ); @@ -229,14 +240,14 @@ static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) ////////////////////////////////////////////////////////////////////////////////////////// // #define DEFINE_GROUP1_OPCODE_LEGACY( lwr, cod ) \ - emitterT void e##cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ - emitterT void e##cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ - emitterT void e##cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ - emitterT void e##cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ - emitterT void e##cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ - emitterT void e##cod##32ItoRm( x86IntRegType to, u32 imm, int offset=0 ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ - emitterT void e##cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ - emitterT void e##cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } + emitterT void cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ + emitterT void cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ + emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ + emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ + emitterT void cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ + emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset=0 ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); @@ -247,12 +258,12 @@ DEFINE_GROUP1_OPCODE_LEGACY( and, AND ); DEFINE_GROUP1_OPCODE_LEGACY( sub, SUB ); DEFINE_GROUP1_OPCODE_LEGACY( xor, XOR ); -emitterT void eAND32I8toR( x86IntRegType to, s8 from ) +emitterT void AND32I8toR( x86IntRegType to, s8 from ) { - x86Emitter::and32( _reghlp(to), from ); + x86Emitter::and32( _reghlp(to), from ); } -emitterT void eAND32I8toM( uptr to, s8 from ) +emitterT void AND32I8toM( uptr to, s8 from ) { - x86Emitter::and32( (void*)to, from ); + x86Emitter::and32( (void*)to, from ); } diff --git a/pcsx2/x86/ix86/ix86_mmx.inl b/pcsx2/x86/ix86/ix86_mmx.inl index bc7d183319..f63686e700 100644 --- a/pcsx2/x86/ix86/ix86_mmx.inl +++ b/pcsx2/x86/ix86/ix86_mmx.inl @@ -25,623 +25,623 @@ //------------------------------------------------------------------ /* movq m64 to r64 */ -emitterT void eMOVQMtoR( x86MMXRegType to, uptr from ) +emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { - write16( 0x6F0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x6F0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movq r64 to m64 */ -emitterT void eMOVQRtoM( uptr to, x86MMXRegType from ) +emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { - write16( 0x7F0F ); - ModRM( 0, from, DISP32 ); - write32(MEMADDR(to, 4)); + write16( 0x7F0F ); + ModRM( 0, from, DISP32 ); + write32(MEMADDR(to, 4)); } /* pand r64 to r64 */ -emitterT void ePANDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PANDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xDB0F ); - ModRM( 3, to, from ); + write16( 0xDB0F ); + ModRM( 3, to, from ); } -emitterT void ePANDNRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xDF0F ); - ModRM( 3, to, from ); + write16( 0xDF0F ); + ModRM( 3, to, from ); } /* por r64 to r64 */ -emitterT void ePORRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PORRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xEB0F ); - ModRM( 3, to, from ); + write16( 0xEB0F ); + ModRM( 3, to, from ); } /* pxor r64 to r64 */ -emitterT void ePXORRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PXORRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xEF0F ); - ModRM( 3, to, from ); + write16( 0xEF0F ); + ModRM( 3, to, from ); } /* psllq r64 to r64 */ -emitterT void ePSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF30F ); - ModRM( 3, to, from ); + write16( 0xF30F ); + ModRM( 3, to, from ); } /* psllq m64 to r64 */ -emitterT void ePSLLQMtoR( x86MMXRegType to, uptr from ) +emitterT void PSLLQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xF30F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xF30F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* psllq imm8 to r64 */ -emitterT void ePSLLQItoR( x86MMXRegType to, u8 from ) +emitterT void PSLLQItoR( x86MMXRegType to, u8 from ) { - write16( 0x730F ); - ModRM( 3, 6, to); - write8( from ); + write16( 0x730F ); + ModRM( 3, 6, to); + write8( from ); } /* psrlq r64 to r64 */ -emitterT void ePSRLQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD30F ); - ModRM( 3, to, from ); + write16( 0xD30F ); + ModRM( 3, to, from ); } /* psrlq m64 to r64 */ -emitterT void ePSRLQMtoR( x86MMXRegType to, uptr from ) +emitterT void PSRLQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xD30F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xD30F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* psrlq imm8 to r64 */ -emitterT void ePSRLQItoR( x86MMXRegType to, u8 from ) +emitterT void PSRLQItoR( x86MMXRegType to, u8 from ) { - write16( 0x730F ); - ModRM( 3, 2, to); - write8( from ); + write16( 0x730F ); + ModRM( 3, 2, to); + write8( from ); } /* paddusb r64 to r64 */ -emitterT void ePADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xDC0F ); - ModRM( 3, to, from ); + write16( 0xDC0F ); + ModRM( 3, to, from ); } /* paddusb m64 to r64 */ -emitterT void ePADDUSBMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDUSBMtoR( x86MMXRegType to, uptr from ) { - write16( 0xDC0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xDC0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* paddusw r64 to r64 */ -emitterT void ePADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xDD0F ); - ModRM( 3, to, from ); + write16( 0xDD0F ); + ModRM( 3, to, from ); } /* paddusw m64 to r64 */ -emitterT void ePADDUSWMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDUSWMtoR( x86MMXRegType to, uptr from ) { - write16( 0xDD0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xDD0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* paddb r64 to r64 */ -emitterT void ePADDBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFC0F ); - ModRM( 3, to, from ); + write16( 0xFC0F ); + ModRM( 3, to, from ); } /* paddb m64 to r64 */ -emitterT void ePADDBMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDBMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFC0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFC0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* paddw r64 to r64 */ -emitterT void ePADDWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFD0F ); - ModRM( 3, to, from ); + write16( 0xFD0F ); + ModRM( 3, to, from ); } /* paddw m64 to r64 */ -emitterT void ePADDWMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDWMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFD0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFD0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* paddd r64 to r64 */ -emitterT void ePADDDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFE0F ); - ModRM( 3, to, from ); + write16( 0xFE0F ); + ModRM( 3, to, from ); } /* paddd m64 to r64 */ -emitterT void ePADDDMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDDMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFE0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* emms */ -emitterT void eEMMS() +emitterT void EMMS() { - write16( 0x770F ); + write16( 0x770F ); } -emitterT void ePADDSBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xEC0F ); - ModRM( 3, to, from ); + write16( 0xEC0F ); + ModRM( 3, to, from ); } -emitterT void ePADDSWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xED0F ); - ModRM( 3, to, from ); + write16( 0xED0F ); + ModRM( 3, to, from ); } // paddq m64 to r64 (sse2 only?) -emitterT void ePADDQMtoR( x86MMXRegType to, uptr from ) +emitterT void PADDQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xD40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xD40F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // paddq r64 to r64 (sse2 only?) -emitterT void ePADDQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD40F ); - ModRM( 3, to, from ); + write16( 0xD40F ); + ModRM( 3, to, from ); } -emitterT void ePSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xE80F ); - ModRM( 3, to, from ); + write16( 0xE80F ); + ModRM( 3, to, from ); } -emitterT void ePSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xE90F ); - ModRM( 3, to, from ); + write16( 0xE90F ); + ModRM( 3, to, from ); } -emitterT void ePSUBBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF80F ); - ModRM( 3, to, from ); + write16( 0xF80F ); + ModRM( 3, to, from ); } -emitterT void ePSUBWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF90F ); - ModRM( 3, to, from ); + write16( 0xF90F ); + ModRM( 3, to, from ); } -emitterT void ePSUBDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFA0F ); - ModRM( 3, to, from ); + write16( 0xFA0F ); + ModRM( 3, to, from ); } -emitterT void ePSUBDMtoR( x86MMXRegType to, uptr from ) +emitterT void PSUBDMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFA0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFA0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD80F ); - ModRM( 3, to, from ); + write16( 0xD80F ); + ModRM( 3, to, from ); } -emitterT void ePSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD90F ); - ModRM( 3, to, from ); + write16( 0xD90F ); + ModRM( 3, to, from ); } // psubq m64 to r64 (sse2 only?) -emitterT void ePSUBQMtoR( x86MMXRegType to, uptr from ) +emitterT void PSUBQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xFB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xFB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // psubq r64 to r64 (sse2 only?) -emitterT void ePSUBQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xFB0F ); - ModRM( 3, to, from ); + write16( 0xFB0F ); + ModRM( 3, to, from ); } // pmuludq m64 to r64 (sse2 only?) -emitterT void ePMULUDQMtoR( x86MMXRegType to, uptr from ) +emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { - write16( 0xF40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xF40F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } // pmuludq r64 to r64 (sse2 only?) -emitterT void ePMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF40F ); - ModRM( 3, to, from ); + write16( 0xF40F ); + ModRM( 3, to, from ); } -emitterT void ePCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x740F ); - ModRM( 3, to, from ); + write16( 0x740F ); + ModRM( 3, to, from ); } -emitterT void ePCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x750F ); - ModRM( 3, to, from ); + write16( 0x750F ); + ModRM( 3, to, from ); } -emitterT void ePCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x760F ); - ModRM( 3, to, from ); + write16( 0x760F ); + ModRM( 3, to, from ); } -emitterT void ePCMPEQDMtoR( x86MMXRegType to, uptr from ) +emitterT void PCMPEQDMtoR( x86MMXRegType to, uptr from ) { - write16( 0x760F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x760F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x640F ); - ModRM( 3, to, from ); + write16( 0x640F ); + ModRM( 3, to, from ); } -emitterT void ePCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x650F ); - ModRM( 3, to, from ); + write16( 0x650F ); + ModRM( 3, to, from ); } -emitterT void ePCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x660F ); - ModRM( 3, to, from ); + write16( 0x660F ); + ModRM( 3, to, from ); } -emitterT void ePCMPGTDMtoR( x86MMXRegType to, uptr from ) +emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from ) { - write16( 0x660F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x660F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePSRLWItoR( x86MMXRegType to, u8 from ) +emitterT void PSRLWItoR( x86MMXRegType to, u8 from ) { - write16( 0x710F ); - ModRM( 3, 2 , to ); - write8( from ); + write16( 0x710F ); + ModRM( 3, 2 , to ); + write8( from ); } -emitterT void ePSRLDItoR( x86MMXRegType to, u8 from ) +emitterT void PSRLDItoR( x86MMXRegType to, u8 from ) { - write16( 0x720F ); - ModRM( 3, 2 , to ); - write8( from ); + write16( 0x720F ); + ModRM( 3, 2 , to ); + write8( from ); } -emitterT void ePSRLDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xD20F ); - ModRM( 3, to, from ); + write16( 0xD20F ); + ModRM( 3, to, from ); } -emitterT void ePSLLWItoR( x86MMXRegType to, u8 from ) +emitterT void PSLLWItoR( x86MMXRegType to, u8 from ) { - write16( 0x710F ); - ModRM( 3, 6 , to ); - write8( from ); + write16( 0x710F ); + ModRM( 3, 6 , to ); + write8( from ); } -emitterT void ePSLLDItoR( x86MMXRegType to, u8 from ) +emitterT void PSLLDItoR( x86MMXRegType to, u8 from ) { - write16( 0x720F ); - ModRM( 3, 6 , to ); - write8( from ); + write16( 0x720F ); + ModRM( 3, 6 , to ); + write8( from ); } -emitterT void ePSLLDRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xF20F ); - ModRM( 3, to, from ); + write16( 0xF20F ); + ModRM( 3, to, from ); } -emitterT void ePSRAWItoR( x86MMXRegType to, u8 from ) +emitterT void PSRAWItoR( x86MMXRegType to, u8 from ) { - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( from ); + write16( 0x710F ); + ModRM( 3, 4 , to ); + write8( from ); } -emitterT void ePSRADItoR( x86MMXRegType to, u8 from ) +emitterT void PSRADItoR( x86MMXRegType to, u8 from ) { - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( from ); + write16( 0x720F ); + ModRM( 3, 4 , to ); + write8( from ); } -emitterT void ePSRADRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0xE20F ); - ModRM( 3, to, from ); + write16( 0xE20F ); + ModRM( 3, to, from ); } /* por m64 to r64 */ -emitterT void ePORMtoR( x86MMXRegType to, uptr from ) +emitterT void PORMtoR( x86MMXRegType to, uptr from ) { - write16( 0xEB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xEB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* pxor m64 to r64 */ -emitterT void ePXORMtoR( x86MMXRegType to, uptr from ) +emitterT void PXORMtoR( x86MMXRegType to, uptr from ) { - write16( 0xEF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xEF0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* pand m64 to r64 */ -emitterT void ePANDMtoR( x86MMXRegType to, uptr from ) +emitterT void PANDMtoR( x86MMXRegType to, uptr from ) { - //u64 rip = (u64)x86Ptr[0] + 7; - write16( 0xDB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + //u64 rip = (u64)x86Ptr + 7; + write16( 0xDB0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePANDNMtoR( x86MMXRegType to, uptr from ) +emitterT void PANDNMtoR( x86MMXRegType to, uptr from ) { - write16( 0xDF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0xDF0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x6A0F ); - ModRM( 3, to, from ); + write16( 0x6A0F ); + ModRM( 3, to, from ); } -emitterT void ePUNPCKHDQMtoR( x86MMXRegType to, uptr from ) +emitterT void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ) { - write16( 0x6A0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x6A0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void ePUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x620F ); - ModRM( 3, to, from ); + write16( 0x620F ); + ModRM( 3, to, from ); } -emitterT void ePUNPCKLDQMtoR( x86MMXRegType to, uptr from ) +emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ) { - write16( 0x620F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x620F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eMOVQ64ItoR( x86MMXRegType reg, u64 i ) +emitterT void MOVQ64ItoR( x86MMXRegType reg, u64 i ) { - eMOVQMtoR( reg, ( uptr )(x86Ptr[0]) + 2 + 7 ); - eJMP8( 8 ); - write64( i ); + MOVQMtoR( reg, ( uptr )(x86Ptr) + 2 + 7 ); + JMP8( 8 ); + write64( i ); } -emitterT void eMOVQRtoR( x86MMXRegType to, x86MMXRegType from ) +emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x6F0F ); - ModRM( 3, to, from ); + write16( 0x6F0F ); + ModRM( 3, to, from ); } -emitterT void eMOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ) { - write16( 0x6F0F ); + write16( 0x6F0F ); if( offset < 128 && offset >= -128) { - ModRM( 1, to, from ); - write8(offset); + ModRM( 1, to, from ); + write8(offset); } else { - ModRM( 2, to, from ); - write32(offset); + ModRM( 2, to, from ); + write32(offset); } } -emitterT void eMOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ) +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ) { - write16( 0x7F0F ); + write16( 0x7F0F ); if( offset < 128 && offset >= -128) { - ModRM( 1, from , to ); - write8(offset); + ModRM( 1, from , to ); + write8(offset); } else { - ModRM( 2, from, to ); - write32(offset); + ModRM( 2, from, to ); + write32(offset); } } /* movd m32 to r64 */ -emitterT void eMOVDMtoMMX( x86MMXRegType to, uptr from ) +emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { - write16( 0x6E0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x6E0F ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } /* movd r64 to m32 */ -emitterT void eMOVDMMXtoM( uptr to, x86MMXRegType from ) +emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { - write16( 0x7E0F ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + write16( 0x7E0F ); + ModRM( 0, from, DISP32 ); + write32( MEMADDR(to, 4) ); } -emitterT void eMOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) +emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { - write16( 0x6E0F ); - ModRM( 3, to, from ); + write16( 0x6E0F ); + ModRM( 3, to, from ); } -emitterT void eMOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ) +emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ) { - write16( 0x6E0F ); - ModRM( 0, to, from ); + write16( 0x6E0F ); + ModRM( 0, to, from ); } -emitterT void eMOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ) +emitterT void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ) { - write16( 0x6E0F ); + write16( 0x6E0F ); if( offset < 128 ) { - ModRM( 1, to, from ); - write8(offset); + ModRM( 1, to, from ); + write8(offset); } else { - ModRM( 2, to, from ); - write32(offset); + ModRM( 2, to, from ); + write32(offset); } } -emitterT void eMOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) +emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { - write16( 0x7E0F ); - ModRM( 3, from, to ); + write16( 0x7E0F ); + ModRM( 3, from, to ); } -emitterT void eMOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ) +emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ) { - write16( 0x7E0F ); - ModRM( 0, from, to ); + write16( 0x7E0F ); + ModRM( 0, from, to ); if( to >= 4 ) { // no idea why assert( to == ESP ); - write8(0x24); + write8(0x24); } } -emitterT void eMOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) +emitterT void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) { - write16( 0x7E0F ); + write16( 0x7E0F ); if( offset < 128 ) { - ModRM( 1, from, to ); - write8(offset); + ModRM( 1, from, to ); + write8(offset); } else { - ModRM( 2, from, to ); - write32(offset); + ModRM( 2, from, to ); + write32(offset); } } ///* movd r32 to r64 */ -//emitterT void eMOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) +//emitterT void MOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) //{ -// write16( 0x6E0F ); -// ModRM( 3, to, from ); +// write16( 0x6E0F ); +// ModRM( 3, to, from ); //} // ///* movq r64 to r32 */ -//emitterT void eMOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) +//emitterT void MOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) //{ -// write16( 0x7E0F ); -// ModRM( 3, from, to ); +// write16( 0x7E0F ); +// ModRM( 3, from, to ); //} // untested -emitterT void ePACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) +emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) { - write16( 0x630F ); - ModRM( 3, to, from ); + write16( 0x630F ); + ModRM( 3, to, from ); } -emitterT void ePACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) +emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) { - write16( 0x6B0F ); - ModRM( 3, to, from ); + write16( 0x6B0F ); + ModRM( 3, to, from ); } -emitterT void ePMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { - write16( 0xD70F ); - ModRM( 3, to, from ); + write16( 0xD70F ); + ModRM( 3, to, from ); } -emitterT void ePINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) +emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3); - write16( 0xc40f ); - ModRM( 3, to, from ); - write8( imm8 ); + write16( 0xc40f ); + ModRM( 3, to, from ); + write8( imm8 ); } -emitterT void ePSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) +emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { - write16(0x700f); - ModRM( 3, to, from ); - write8(imm8); + write16(0x700f); + ModRM( 3, to, from ); + write8(imm8); } -emitterT void ePSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) +emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { - write16( 0x700f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); - write8(imm8); + write16( 0x700f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); + write8(imm8); } -emitterT void eMASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) +emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { - write16(0xf70f); - ModRM( 3, to, from ); + write16(0xf70f); + ModRM( 3, to, from ); } diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.inl index a52ba6ccd7..af25c1cbaa 100644 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ b/pcsx2/x86/ix86/ix86_sse.inl @@ -37,474 +37,474 @@ static const bool AlwaysUseMovaps = true; #define SSEMtoR( code, overb ) \ assert( to < XMMREGS ), \ RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) #define SSERtoM( code, overb ) \ assert( from < XMMREGS), \ RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) #define SSE_SS_MtoR( code, overb ) \ assert( to < XMMREGS ), \ - write8( 0xf3 ), \ + write8( 0xf3 ), \ RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) #define SSE_SS_RtoM( code, overb ) \ assert( from < XMMREGS), \ - write8( 0xf3 ), \ + write8( 0xf3 ), \ RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) #define SSERtoR( code ) \ assert( to < XMMREGS && from < XMMREGS), \ RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) + write16( code ), \ + ModRM( 3, to, from ) #define SSEMtoR66( code ) \ - write8( 0x66 ), \ + write8( 0x66 ), \ SSEMtoR( code, 0 ) #define SSERtoM66( code ) \ - write8( 0x66 ), \ + write8( 0x66 ), \ SSERtoM( code, 0 ) #define SSERtoR66( code ) \ - write8( 0x66 ), \ + write8( 0x66 ), \ SSERtoR( code ) #define _SSERtoR66( code ) \ assert( to < XMMREGS && from < XMMREGS), \ - write8( 0x66 ), \ + write8( 0x66 ), \ RexRB(0, from, to), \ - write16( code ), \ - ModRM( 3, from, to ) + write16( code ), \ + ModRM( 3, from, to ) #define SSE_SS_RtoR( code ) \ assert( to < XMMREGS && from < XMMREGS), \ - write8( 0xf3 ), \ + write8( 0xf3 ), \ RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) + write16( code ), \ + ModRM( 3, to, from ) #define SSE_SD_MtoR( code, overb ) \ assert( to < XMMREGS ) , \ - write8( 0xf2 ), \ + write8( 0xf2 ), \ RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) \ + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) \ #define SSE_SD_RtoM( code, overb ) \ assert( from < XMMREGS) , \ - write8( 0xf2 ), \ + write8( 0xf2 ), \ RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) \ + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) \ #define SSE_SD_RtoR( code ) \ assert( to < XMMREGS && from < XMMREGS) , \ - write8( 0xf2 ), \ + write8( 0xf2 ), \ RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) + write16( code ), \ + ModRM( 3, to, from ) #define CMPPSMtoR( op ) \ SSEMtoR( 0xc20f, 1 ), \ - write8( op ) + write8( op ) #define CMPPSRtoR( op ) \ SSERtoR( 0xc20f ), \ - write8( op ) + write8( op ) #define CMPSSMtoR( op ) \ SSE_SS_MtoR( 0xc20f, 1 ), \ - write8( op ) + write8( op ) #define CMPSSRtoR( op ) \ SSE_SS_RtoR( 0xc20f ), \ - write8( op ) + write8( op ) #define CMPSDMtoR( op ) \ SSE_SD_MtoR( 0xc20f, 1 ), \ - write8( op ) + write8( op ) #define CMPSDRtoR( op ) \ SSE_SD_RtoR( 0xc20f ), \ - write8( op ) + write8( op ) /* movups [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(0, to, from2, from); - write16( 0x100f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); + write16( 0x100f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); } /* movups xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { RexRXB(1, to, from2, from); - write16( 0x110f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); + write16( 0x110f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); } /* movups [r32] to r32 */ -emitterT void eSSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) +emitterT void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) { RexRB(0, to, from); - write16( 0x100f ); - ModRM( 0, to, from ); + write16( 0x100f ); + ModRM( 0, to, from ); } /* movups r32 to [r32] */ -emitterT void eSSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) +emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) { RexRB(0, from, to); - write16( 0x110f ); - ModRM( 0, from, to ); + write16( 0x110f ); + ModRM( 0, from, to ); } /* movlps [r32] to r32 */ -emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) +emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) { RexRB(1, to, from); - write16( 0x120f ); - ModRM( 0, to, from ); + write16( 0x120f ); + ModRM( 0, to, from ); } -emitterT void eSSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x120f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x120f ); + WriteRmOffsetFrom(to, from, offset); } /* movaps r32 to [r32] */ -emitterT void eSSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) +emitterT void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) { RexRB(0, from, to); - write16( 0x130f ); - ModRM( 0, from, to ); + write16( 0x130f ); + ModRM( 0, from, to ); } -emitterT void eSSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, from, to); - write16( 0x130f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x130f ); + WriteRmOffsetFrom(from, to, offset); } /* movaps [r32][r32*scale] to xmm1 */ -emitterT void eSSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { assert( from != EBP ); RexRXB(0, to, from2, from); - write16( 0x280f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); + write16( 0x280f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); } /* movaps xmm1 to [r32][r32*scale] */ -emitterT void eSSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) { assert( from != EBP ); RexRXB(0, to, from2, from); - write16( 0x290f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); + write16( 0x290f ); + ModRM( 0, to, 0x4 ); + SibSB( scale, from2, from ); } // movaps [r32+offset] to r32 -emitterT void eSSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x280f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x280f ); + WriteRmOffsetFrom(to, from, offset); } // movaps r32 to [r32+offset] -emitterT void eSSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); - write16( 0x290f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x290f ); + WriteRmOffsetFrom(from, to, offset); } // movdqa [r32+offset] to r32 -emitterT void eSSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { if( AlwaysUseMovaps ) - eSSE_MOVAPSRmtoR( to, from, offset ); + SSE_MOVAPSRmtoR( to, from, offset ); else { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write16( 0x6f0f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x6f0f ); + WriteRmOffsetFrom(to, from, offset); } } // movdqa r32 to [r32+offset] -emitterT void eSSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { if( AlwaysUseMovaps ) - eSSE_MOVAPSRtoRm( to, from, offset ); + SSE_MOVAPSRtoRm( to, from, offset ); else { - write8(0x66); + write8(0x66); RexRB(0, from, to); - write16( 0x7f0f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x7f0f ); + WriteRmOffsetFrom(from, to, offset); } } // movups [r32+offset] to r32 -emitterT void eSSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); } // movups r32 to [r32+offset] -emitterT void eSSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); - write16( 0x110f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x110f ); + WriteRmOffsetFrom(from, to, offset); } //**********************************************************************************/ //MOVAPS: Move aligned Packed Single Precision FP values * //********************************************************************************** -emitterT void eSSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } -emitterT void eSSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } -emitterT void eSSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSERtoR( 0x280f ); } } +emitterT void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } +emitterT void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } +emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSERtoR( 0x280f ); } } -emitterT void eSSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } -emitterT void eSSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } +emitterT void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } +emitterT void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } -emitterT void eSSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x100f); } -emitterT void eSSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); } -emitterT void eSSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); } +emitterT void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x100f); } +emitterT void SSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); } +emitterT void SSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); } -emitterT void eSSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) +emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { - write8(0xf3); SSEMtoR( 0x7e0f, 0); + write8(0xf3); SSEMtoR( 0x7e0f, 0); } -emitterT void eSSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { - write8(0xf3); SSERtoR( 0x7e0f); + write8(0xf3); SSERtoR( 0x7e0f); } -emitterT void eSSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) +emitterT void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM66(0xd60f); } -emitterT void eSSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) +emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) { - write8(0xf2); + write8(0xf2); SSERtoR( 0xd60f); } -emitterT void eSSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) +emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) { - write8(0xf3); + write8(0xf3); SSERtoR( 0xd60f); } //**********************************************************************************/ //MOVSS: Move Scalar Single-Precision FP value * //********************************************************************************** -emitterT void eSSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } -emitterT void eSSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } +emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } +emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } -emitterT void eSSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } +emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } -emitterT void eSSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { - write8(0xf3); + write8(0xf3); RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { - write8(0xf3); + write8(0xf3); RexRB(0, from, to); - write16(0x110f); - WriteRmOffsetFrom(from, to, offset); + write16(0x110f); + WriteRmOffsetFrom(from, to, offset); } -emitterT void eSSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } +emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } //**********************************************************************************/ //MOVLPS: Move low Packed Single-Precision FP * //********************************************************************************** -emitterT void eSSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } -emitterT void eSSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } +emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } +emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } -emitterT void eSSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x120f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x120f ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); - write16(0x130f); - WriteRmOffsetFrom(from, to, offset); + write16(0x130f); + WriteRmOffsetFrom(from, to, offset); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVHPS: Move High Packed Single-Precision FP * //********************************************************************************** -emitterT void eSSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } -emitterT void eSSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } +emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } +emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } -emitterT void eSSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { RexRB(0, to, from); - write16( 0x160f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x160f ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { RexRB(0, from, to); - write16(0x170f); - WriteRmOffsetFrom(from, to, offset); + write16(0x170f); + WriteRmOffsetFrom(from, to, offset); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVLHPS: Moved packed Single-Precision FP low to high * //********************************************************************************** -emitterT void eSSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } +emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVHLPS: Moved packed Single-Precision FP High to Low * //********************************************************************************** -emitterT void eSSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } +emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } /////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ANDPS: Logical Bit-wise AND for Single FP * //********************************************************************************** -emitterT void eSSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } -emitterT void eSSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } +emitterT void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } +emitterT void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } -emitterT void eSSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x540f ); } -emitterT void eSSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x540f ); } +emitterT void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x540f ); } +emitterT void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x540f ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values * //********************************************************************************** -emitterT void eSSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } -emitterT void eSSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } +emitterT void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } +emitterT void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } -emitterT void eSSE2_ANDNPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x550f ); } -emitterT void eSSE2_ANDNPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x550f ); } +emitterT void SSE2_ANDNPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x550f ); } +emitterT void SSE2_ANDNPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x550f ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //RCPPS : Packed Single-Precision FP Reciprocal * //********************************************************************************** -emitterT void eSSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } -emitterT void eSSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } +emitterT void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } +emitterT void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } -emitterT void eSSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } -emitterT void eSSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } +emitterT void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } +emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ORPS : Bit-wise Logical OR of Single-Precision FP Data * //********************************************************************************** -emitterT void eSSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } -emitterT void eSSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } +emitterT void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } +emitterT void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } -emitterT void eSSE2_ORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x560f ); } -emitterT void eSSE2_ORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x560f ); } +emitterT void SSE2_ORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x560f ); } +emitterT void SSE2_ORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x560f ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //XORPS : Bitwise Logical XOR of Single-Precision FP Values * //********************************************************************************** -emitterT void eSSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } -emitterT void eSSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } +emitterT void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } +emitterT void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } -emitterT void eSSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x570f ); } -emitterT void eSSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x570f ); } +emitterT void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x570f ); } +emitterT void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x570f ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ADDPS : ADD Packed Single-Precision FP Values * //********************************************************************************** -emitterT void eSSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } -emitterT void eSSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } +emitterT void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } +emitterT void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ADDSS : ADD Scalar Single-Precision FP Values * //********************************************************************************** -emitterT void eSSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } -emitterT void eSSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } +emitterT void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } +emitterT void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } -emitterT void eSSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x580f, 0 ); } -emitterT void eSSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x580f ); } +emitterT void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x580f, 0 ); } +emitterT void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x580f ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SUBPS: Packed Single-Precision FP Subtract * //********************************************************************************** -emitterT void eSSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } -emitterT void eSSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } +emitterT void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } +emitterT void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SUBSS : Scalar Single-Precision FP Subtract * //********************************************************************************** -emitterT void eSSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } -emitterT void eSSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } +emitterT void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } +emitterT void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } -emitterT void eSSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5c0f, 0 ); } -emitterT void eSSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5c0f ); } +emitterT void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5c0f, 0 ); } +emitterT void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5c0f ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MULPS : Packed Single-Precision FP Multiply * //********************************************************************************** -emitterT void eSSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } -emitterT void eSSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } +emitterT void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } +emitterT void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MULSS : Scalar Single-Precision FP Multiply * //********************************************************************************** -emitterT void eSSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } -emitterT void eSSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } +emitterT void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } +emitterT void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } -emitterT void eSSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x590f, 0 ); } -emitterT void eSSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x590f ); } +emitterT void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x590f, 0 ); } +emitterT void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x590f ); } //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -513,22 +513,22 @@ emitterT void eSSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S //missing SSE_CMPPS_I8_to_XMM // SSE_CMPPS_M32_to_XMM // SSE_CMPPS_XMM_to_XMM -emitterT void eSSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); } -emitterT void eSSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); } -emitterT void eSSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); } -emitterT void eSSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); } -emitterT void eSSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); } -emitterT void eSSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); } -emitterT void eSSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); } -emitterT void eSSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); } -emitterT void eSSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); } -emitterT void eSSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); } -emitterT void eSSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); } -emitterT void eSSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); } -emitterT void eSSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); } -emitterT void eSSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); } -emitterT void eSSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); } -emitterT void eSSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); } +emitterT void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 0 ); } +emitterT void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 0 ); } +emitterT void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 1 ); } +emitterT void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 1 ); } +emitterT void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 2 ); } +emitterT void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 2 ); } +emitterT void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 3 ); } +emitterT void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 3 ); } +emitterT void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 4 ); } +emitterT void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 4 ); } +emitterT void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 5 ); } +emitterT void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 5 ); } +emitterT void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 6 ); } +emitterT void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 6 ); } +emitterT void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ) { CMPPSMtoR( 7 ); } +emitterT void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPPSRtoR( 7 ); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -537,194 +537,194 @@ emitterT void eSSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) //missing SSE_CMPSS_I8_to_XMM // SSE_CMPSS_M32_to_XMM // SSE_CMPSS_XMM_to_XMM -emitterT void eSSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); } -emitterT void eSSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); } -emitterT void eSSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); } -emitterT void eSSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); } -emitterT void eSSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); } -emitterT void eSSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); } -emitterT void eSSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); } -emitterT void eSSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); } -emitterT void eSSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); } -emitterT void eSSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); } -emitterT void eSSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); } -emitterT void eSSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); } -emitterT void eSSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); } -emitterT void eSSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); } -emitterT void eSSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } -emitterT void eSSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } +emitterT void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 0 ); } +emitterT void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 0 ); } +emitterT void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 1 ); } +emitterT void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 1 ); } +emitterT void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 2 ); } +emitterT void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 2 ); } +emitterT void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 3 ); } +emitterT void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 3 ); } +emitterT void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 4 ); } +emitterT void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 4 ); } +emitterT void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 5 ); } +emitterT void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 5 ); } +emitterT void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 6 ); } +emitterT void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 6 ); } +emitterT void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } +emitterT void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } -emitterT void eSSE2_CMPEQSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 0 ); } -emitterT void eSSE2_CMPEQSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 0 ); } -emitterT void eSSE2_CMPLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 1 ); } -emitterT void eSSE2_CMPLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 1 ); } -emitterT void eSSE2_CMPLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 2 ); } -emitterT void eSSE2_CMPLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 2 ); } -emitterT void eSSE2_CMPUNORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 3 ); } -emitterT void eSSE2_CMPUNORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 3 ); } -emitterT void eSSE2_CMPNESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 4 ); } -emitterT void eSSE2_CMPNESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 4 ); } -emitterT void eSSE2_CMPNLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 5 ); } -emitterT void eSSE2_CMPNLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 5 ); } -emitterT void eSSE2_CMPNLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 6 ); } -emitterT void eSSE2_CMPNLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 6 ); } -emitterT void eSSE2_CMPORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 7 ); } -emitterT void eSSE2_CMPORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 7 ); } +emitterT void SSE2_CMPEQSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 0 ); } +emitterT void SSE2_CMPEQSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 0 ); } +emitterT void SSE2_CMPLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 1 ); } +emitterT void SSE2_CMPLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 1 ); } +emitterT void SSE2_CMPLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 2 ); } +emitterT void SSE2_CMPLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 2 ); } +emitterT void SSE2_CMPUNORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 3 ); } +emitterT void SSE2_CMPUNORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 3 ); } +emitterT void SSE2_CMPNESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 4 ); } +emitterT void SSE2_CMPNESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 4 ); } +emitterT void SSE2_CMPNLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 5 ); } +emitterT void SSE2_CMPNLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 5 ); } +emitterT void SSE2_CMPNLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 6 ); } +emitterT void SSE2_CMPNLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 6 ); } +emitterT void SSE2_CMPORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 7 ); } +emitterT void SSE2_CMPORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 7 ); } -emitterT void eSSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) +emitterT void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) { RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x2e0f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eSSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +emitterT void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); + write16( 0x2e0f ); + ModRM( 3, to, from ); } -emitterT void eSSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) +emitterT void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) { - write8(0x66); + write8(0x66); RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + write16( 0x2e0f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); } -emitterT void eSSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +emitterT void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); + write16( 0x2e0f ); + ModRM( 3, to, from ); } ////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * //********************************************************************************** -emitterT void eSSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } -emitterT void eSSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); } +emitterT void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } +emitterT void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal * //********************************************************************************** -emitterT void eSSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } -emitterT void eSSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); } +emitterT void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } +emitterT void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); } //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SQRTPS : Packed Single-Precision FP Square Root * //********************************************************************************** -emitterT void eSSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } -emitterT void eSSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); } +emitterT void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } +emitterT void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SQRTSS : Scalar Single-Precision FP Square Root * //********************************************************************************** -emitterT void eSSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } -emitterT void eSSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } +emitterT void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } +emitterT void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } -emitterT void eSSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } -emitterT void eSSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } +emitterT void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } +emitterT void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MAXPS: Return Packed Single-Precision FP Maximum * //********************************************************************************** -emitterT void eSSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } -emitterT void eSSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } +emitterT void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } +emitterT void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } -emitterT void eSSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); } -emitterT void eSSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); } +emitterT void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); } +emitterT void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MAXSS: Return Scalar Single-Precision FP Maximum * //********************************************************************************** -emitterT void eSSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } -emitterT void eSSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } +emitterT void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } +emitterT void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } -emitterT void eSSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } -emitterT void eSSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } +emitterT void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } +emitterT void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * //********************************************************************************** -emitterT void eSSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } -emitterT void eSSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } +emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } +emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion * //********************************************************************************** -emitterT void eSSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } -emitterT void eSSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } +emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } +emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } -emitterT void eSSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } -emitterT void eSSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) +emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } +emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { - write8(0xf3); + write8(0xf3); RexRB(0, to, from); - write16(0x2c0f); - ModRM(3, to, from); + write16(0x2c0f); + ModRM(3, to, from); } -emitterT void eSSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } -emitterT void eSSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) +emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } +emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { - write8(0xf3); + write8(0xf3); RexRB(0, to, from); - write16(0x2a0f); - ModRM(3, to, from); + write16(0x2a0f); + ModRM(3, to, from); } -emitterT void eSSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } -emitterT void eSSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } +emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } +emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } -emitterT void eSSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } -emitterT void eSSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } +emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } +emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * //********************************************************************************** -emitterT void eSSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } -emitterT void eSSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } +emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } +emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } //**********************************************************************************/ //CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion * //********************************************************************************** -emitterT void eSSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } -emitterT void eSSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } +emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } +emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } -emitterT void eSSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); } +emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MINPS: Return Packed Single-Precision FP Minimum * //********************************************************************************** -emitterT void eSSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } -emitterT void eSSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } +emitterT void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } +emitterT void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } -emitterT void eSSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); } -emitterT void eSSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); } +emitterT void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); } +emitterT void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); } ////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MINSS: Return Scalar Single-Precision FP Minimum * //********************************************************************************** -emitterT void eSSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } -emitterT void eSSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } +emitterT void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } +emitterT void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } -emitterT void eSSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } -emitterT void eSSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } +emitterT void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } +emitterT void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -734,7 +734,7 @@ emitterT void eSSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S // SSE_PMAXSW_M64_to_MM // SSE2_PMAXSW_M128_to_XMM // SSE2_PMAXSW_XMM_to_XMM -emitterT void eSSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } +emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -744,659 +744,659 @@ emitterT void eSSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSER // SSE_PMINSW_M64_to_MM // SSE2_PMINSW_M128_to_XMM // SSE2_PMINSW_XMM_to_XMM -emitterT void eSSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } +emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SHUFPS: Shuffle Packed Single-Precision FP Values * //********************************************************************************** -emitterT void eSSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } -emitterT void eSSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } +emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } +emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } -emitterT void eSSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) +emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) { RexRB(0, to, from); - write16(0xc60f); - WriteRmOffsetFrom(to, from, offset); - write8(imm8); + write16(0xc60f); + WriteRmOffsetFrom(to, from, offset); + write8(imm8); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SHUFPD: Shuffle Packed Double-Precision FP Values * //********************************************************************************** -emitterT void eSSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); } -emitterT void eSSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); } +emitterT void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); } +emitterT void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); } //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PSHUFD: Shuffle Packed DoubleWords * //********************************************************************************** -emitterT void eSSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) +emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0x700F ); - write8( imm8 ); + write8( imm8 ); } -emitterT void eSSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); } +emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); } -emitterT void eSSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } -emitterT void eSSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); } -emitterT void eSSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); } -emitterT void eSSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); } +emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } +emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); } +emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); } +emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); } /////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data * //********************************************************************************** -emitterT void eSSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } -emitterT void eSSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } +emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } +emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data * //********************************************************************************** -emitterT void eSSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } -emitterT void eSSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } +emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } +emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //DIVPS : Packed Single-Precision FP Divide * //********************************************************************************** -emitterT void eSSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } -emitterT void eSSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } +emitterT void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } +emitterT void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //DIVSS : Scalar Single-Precision FP Divide * //********************************************************************************** -emitterT void eSSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } -emitterT void eSSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } +emitterT void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } +emitterT void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } -emitterT void eSSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5e0F, 0 ); } -emitterT void eSSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5e0F ); } +emitterT void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5e0F, 0 ); } +emitterT void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5e0F ); } ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //STMXCSR : Store Streaming SIMD Extension Control/Status * //********************************************************************************** -emitterT void eSSE_STMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); +emitterT void SSE_STMXCSR( uptr from ) { + write16( 0xAE0F ); + ModRM( 0, 0x3, DISP32 ); + write32( MEMADDR(from, 4) ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //LDMXCSR : Load Streaming SIMD Extension Control/Status * //********************************************************************************** -emitterT void eSSE_LDMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); +emitterT void SSE_LDMXCSR( uptr from ) { + write16( 0xAE0F ); + ModRM( 0, 0x2, DISP32 ); + write32( MEMADDR(from, 4) ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PADDB,PADDW,PADDD : Add Packed Integers * //********************************************************************************** -emitterT void eSSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } -emitterT void eSSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } -emitterT void eSSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } -emitterT void eSSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } -emitterT void eSSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } -emitterT void eSSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } -emitterT void eSSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } -emitterT void eSSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } +emitterT void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } +emitterT void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } +emitterT void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } +emitterT void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } +emitterT void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } +emitterT void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } +emitterT void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } +emitterT void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } /////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PCMPxx: Compare Packed Integers * //********************************************************************************** -emitterT void eSSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } -emitterT void eSSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } -emitterT void eSSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } -emitterT void eSSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } -emitterT void eSSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } -emitterT void eSSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } -emitterT void eSSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } -emitterT void eSSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } -emitterT void eSSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } -emitterT void eSSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } -emitterT void eSSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); } -emitterT void eSSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); } +emitterT void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } +emitterT void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } +emitterT void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } +emitterT void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } +emitterT void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } +emitterT void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } +emitterT void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } +emitterT void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } +emitterT void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } +emitterT void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } +emitterT void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); } +emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); } //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PEXTRW,PINSRW: Packed Extract/Insert Word * //********************************************************************************** -emitterT void eSSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } -emitterT void eSSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } +emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } +emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PSUBx: Subtract Packed Integers * //********************************************************************************** -emitterT void eSSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } -emitterT void eSSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } -emitterT void eSSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } -emitterT void eSSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } -emitterT void eSSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } -emitterT void eSSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } -emitterT void eSSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } -emitterT void eSSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } +emitterT void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } +emitterT void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } +emitterT void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } +emitterT void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } +emitterT void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } +emitterT void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } +emitterT void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } +emitterT void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVD: Move Dword(32bit) to /from XMM reg * //********************************************************************************** -emitterT void eSSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } -emitterT void eSSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); } +emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } +emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); } -emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write16( 0x6e0f ); - ModRM( 0, to, from); + write16( 0x6e0f ); + ModRM( 0, to, from); } -emitterT void eSSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write16( 0x6e0f ); - WriteRmOffsetFrom(to, from, offset); + write16( 0x6e0f ); + WriteRmOffsetFrom(to, from, offset); } -emitterT void eSSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } -emitterT void eSSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } +emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } +emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } -emitterT void eSSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) { - write8(0x66); + write8(0x66); RexRB(0, from, to); - write16( 0x7e0f ); - WriteRmOffsetFrom(from, to, offset); + write16( 0x7e0f ); + WriteRmOffsetFrom(from, to, offset); } //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //POR : SSE Bitwise OR * //********************************************************************************** -emitterT void eSSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } -emitterT void eSSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } +emitterT void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } +emitterT void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } // logical and to &= from -emitterT void eSSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } -emitterT void eSSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } +emitterT void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } +emitterT void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } // to = (~to) & from -emitterT void eSSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } -emitterT void eSSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } +emitterT void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } +emitterT void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PXOR : SSE Bitwise XOR * //********************************************************************************** -emitterT void eSSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEF0F ); } -emitterT void eSSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ); } +emitterT void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEF0F ); } +emitterT void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ); } /////////////////////////////////////////////////////////////////////////////////////// -emitterT void eSSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_M128_to_XMM( to, from ); else SSEMtoR66(0x6F0F); } -emitterT void eSSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_M128( to, from ); else SSERtoM66(0x7F0F); } -emitterT void eSSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if( AlwaysUseMovaps ) eSSE_MOVAPS_XMM_to_XMM( to, from ); else if( to != from ) SSERtoR66(0x6F0F); } +emitterT void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) SSE_MOVAPS_M128_to_XMM( to, from ); else SSEMtoR66(0x6F0F); } +emitterT void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_M128( to, from ); else SSERtoM66(0x7F0F); } +emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_XMM( to, from ); else if( to != from ) SSERtoR66(0x6F0F); } -emitterT void eSSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) - eSSE_MOVUPS_M128_to_XMM( to, from ); + SSE_MOVUPS_M128_to_XMM( to, from ); else { - write8(0xF3); + write8(0xF3); SSEMtoR(0x6F0F, 0); } } -emitterT void eSSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) +emitterT void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) { if( AlwaysUseMovaps ) - eSSE_MOVUPS_XMM_to_M128( to, from ); + SSE_MOVUPS_XMM_to_M128( to, from ); else { - write8(0xF3); + write8(0xF3); SSERtoM(0x7F0F, 0); } } // shift right logical -emitterT void eSSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } -emitterT void eSSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } -emitterT void eSSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } +emitterT void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } +emitterT void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x710F ); - ModRM( 3, 2 , to ); - write8( imm8 ); + write16( 0x710F ); + ModRM( 3, 2 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } -emitterT void eSSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } -emitterT void eSSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } +emitterT void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } +emitterT void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x720F ); - ModRM( 3, 2 , to ); - write8( imm8 ); + write16( 0x720F ); + ModRM( 3, 2 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } -emitterT void eSSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } -emitterT void eSSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } +emitterT void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } +emitterT void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x730F ); - ModRM( 3, 2 , to ); - write8( imm8 ); + write16( 0x730F ); + ModRM( 3, 2 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x730F ); - ModRM( 3, 3 , to ); - write8( imm8 ); + write16( 0x730F ); + ModRM( 3, 3 , to ); + write8( imm8 ); } // shift right arithmetic -emitterT void eSSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } -emitterT void eSSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } -emitterT void eSSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } +emitterT void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } +emitterT void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( imm8 ); + write16( 0x710F ); + ModRM( 3, 4 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } -emitterT void eSSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } -emitterT void eSSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } +emitterT void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } +emitterT void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( imm8 ); + write16( 0x720F ); + ModRM( 3, 4 , to ); + write8( imm8 ); } // shift left logical -emitterT void eSSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } -emitterT void eSSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } -emitterT void eSSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } +emitterT void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } +emitterT void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x710F ); - ModRM( 3, 6 , to ); - write8( imm8 ); + write16( 0x710F ); + ModRM( 3, 6 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } -emitterT void eSSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } -emitterT void eSSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } +emitterT void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } +emitterT void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x720F ); - ModRM( 3, 6 , to ); - write8( imm8 ); + write16( 0x720F ); + ModRM( 3, 6 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } -emitterT void eSSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } -emitterT void eSSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } +emitterT void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } +emitterT void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x730F ); - ModRM( 3, 6 , to ); - write8( imm8 ); + write16( 0x730F ); + ModRM( 3, 6 , to ); + write8( imm8 ); } -emitterT void eSSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) +emitterT void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) { - write8( 0x66 ); + write8( 0x66 ); RexB(0, to); - write16( 0x730F ); - ModRM( 3, 7 , to ); - write8( imm8 ); + write16( 0x730F ); + ModRM( 3, 7 , to ); + write8( imm8 ); } -emitterT void eSSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } -emitterT void eSSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } +emitterT void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } +emitterT void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } -emitterT void eSSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); } -emitterT void eSSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); } +emitterT void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); } +emitterT void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); } -emitterT void eSSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); } -emitterT void eSSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); } +emitterT void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); } +emitterT void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); } -emitterT void eSSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } -emitterT void eSSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } +emitterT void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } +emitterT void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } -emitterT void eSSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEC0F ); } -emitterT void eSSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEC0F ); } +emitterT void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEC0F ); } +emitterT void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEC0F ); } -emitterT void eSSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xED0F ); } -emitterT void eSSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xED0F ); } +emitterT void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xED0F ); } +emitterT void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xED0F ); } -emitterT void eSSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE80F ); } -emitterT void eSSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE80F ); } +emitterT void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE80F ); } +emitterT void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE80F ); } -emitterT void eSSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE90F ); } -emitterT void eSSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE90F ); } +emitterT void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE90F ); } +emitterT void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE90F ); } -emitterT void eSSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } -emitterT void eSSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } -emitterT void eSSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } -emitterT void eSSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } +emitterT void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } +emitterT void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } +emitterT void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } +emitterT void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } -emitterT void eSSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } -emitterT void eSSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } -emitterT void eSSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } -emitterT void eSSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } +emitterT void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } +emitterT void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } +emitterT void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } +emitterT void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } //**********************************************************************************/ //PACKSSWB,PACKSSDW: Pack Saturate Signed Word //********************************************************************************** -emitterT void eSSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } -emitterT void eSSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } -emitterT void eSSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } -emitterT void eSSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } +emitterT void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } +emitterT void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } +emitterT void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } +emitterT void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } -emitterT void eSSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } -emitterT void eSSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } +emitterT void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } +emitterT void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } //**********************************************************************************/ //PUNPCKHWD: Unpack 16bit high //********************************************************************************** -emitterT void eSSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } -emitterT void eSSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } +emitterT void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } +emitterT void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } -emitterT void eSSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } -emitterT void eSSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } +emitterT void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } +emitterT void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } -emitterT void eSSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } -emitterT void eSSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } -emitterT void eSSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } -emitterT void eSSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } +emitterT void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } +emitterT void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } +emitterT void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } +emitterT void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } -emitterT void eSSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } -emitterT void eSSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } -emitterT void eSSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } -emitterT void eSSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } +emitterT void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } +emitterT void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } +emitterT void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } +emitterT void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } -emitterT void eSSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } -emitterT void eSSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } +emitterT void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } +emitterT void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } -emitterT void eSSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } -emitterT void eSSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } +emitterT void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } +emitterT void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } -emitterT void eSSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } -emitterT void eSSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } -emitterT void eSSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } -emitterT void eSSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } +emitterT void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } +emitterT void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } +emitterT void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } +emitterT void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } -emitterT void eSSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } -emitterT void eSSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } +emitterT void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } +emitterT void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } -emitterT void eSSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } +emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } -emitterT void eSSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } -emitterT void eSSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } +emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } +emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } -emitterT void eSSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } +emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } -emitterT void eSSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } -emitterT void eSSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } +emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } +emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } -emitterT void eSSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0xf3); +emitterT void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { + write8(0xf3); RexRB(0, to, from); - write16( 0x120f); - ModRM( 3, to, from ); + write16( 0x120f); + ModRM( 3, to, from ); } -emitterT void eSSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); } -emitterT void eSSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } -emitterT void eSSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } +emitterT void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); } +emitterT void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } +emitterT void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } // SSSE3 -emitterT void eSSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x1C380F); - ModRM(3, to, from); + write24(0x1C380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x1D380F); - ModRM(3, to, from); + write24(0x1D380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x1E380F); - ModRM(3, to, from); + write24(0x1E380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +emitterT void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x0F3A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x0F3A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x08380F); - ModRM(3, to, from); + write24(0x08380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x09380F); - ModRM(3, to, from); + write24(0x09380F); + ModRM(3, to, from); } -emitterT void eSSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x0A380F); - ModRM(3, to, from); + write24(0x0A380F); + ModRM(3, to, from); } // SSE4.1 -emitterT void eSSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { - write8(0x66); - write24(0x403A0F); - ModRM(3, to, from); - write8(imm8); + write8(0x66); + write24(0x403A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) +emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) { - write8(0x66); - write24(0x403A0F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); - write8(imm8); + write8(0x66); + write24(0x403A0F); + ModRM(0, to, DISP32); + write32(MEMADDR(from, 4)); + write8(imm8); } -emitterT void eSSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x213A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x213A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) +emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x173A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x173A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) +emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x0C3A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x0C3A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x14380F); - ModRM(3, to, from); + write24(0x14380F); + ModRM(3, to, from); } -emitterT void eSSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x14380F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); + write24(0x14380F); + ModRM(0, to, DISP32); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x25380F); - ModRM(3, to, from); + write24(0x25380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x35380F); - ModRM(3, to, from); + write24(0x35380F); + ModRM(3, to, from); } -emitterT void eSSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) +emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x223A0F); - ModRM(3, to, from); - write8(imm8); + write24(0x223A0F); + ModRM(3, to, from); + write8(imm8); } -emitterT void eSSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x3D380F); - ModRM(3, to, from); + write24(0x3D380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x39380F); - ModRM(3, to, from); + write24(0x39380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x3F380F); - ModRM(3, to, from); + write24(0x3F380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x3B380F); - ModRM(3, to, from); + write24(0x3B380F); + ModRM(3, to, from); } -emitterT void eSSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x3D380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); + write24(0x3D380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x39380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); + write24(0x39380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x3F380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); + write24(0x3F380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from) +emitterT void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from) { - write8(0x66); + write8(0x66); RexR(0, to); - write24(0x3B380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); + write24(0x3B380F); + ModRM( 0, to, DISP32 ); + write32(MEMADDR(from, 4)); } -emitterT void eSSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0x66); + write8(0x66); RexRB(0, to, from); - write24(0x28380F); - ModRM(3, to, from); + write24(0x28380F); + ModRM(3, to, from); } From 1d9adee468b4eb65dab5e2ab7624ff4bc9b25ba2 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Tue, 7 Apr 2009 17:35:09 +0000 Subject: [PATCH 011/143] couple of changes, very minor speedup git-svn-id: http://pcsx2.googlecode.com/svn/trunk@920 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Counters.cpp | 30 ++++++++++++++---------------- pcsx2/Counters.h | 16 ++++++++-------- pcsx2/x86/iR3000A.cpp | 6 +++--- pcsx2/x86/iVUzerorec.cpp | 9 +++++---- 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/pcsx2/Counters.cpp b/pcsx2/Counters.cpp index a58ec1d0a3..b9b9a7c9ed 100644 --- a/pcsx2/Counters.cpp +++ b/pcsx2/Counters.cpp @@ -164,7 +164,7 @@ struct vSyncTimingInfo static vSyncTimingInfo vSyncInfo; -static __forceinline void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame ) +static void vSyncInfoCalc( vSyncTimingInfo* info, u32 framesPerSecond, u32 scansPerFrame ) { // Important: Cannot use floats or doubles here. The emulator changes rounding modes // depending on user-set speedhack options, and it can break float/double code @@ -270,8 +270,6 @@ u32 UpdateVSyncRate() return (u32)m_iTicks; } -extern u32 vu0time; - void frameLimitReset() { m_iStart = GetCPUTicks(); @@ -282,13 +280,13 @@ void frameLimitReset() // See the GS FrameSkip function for details on why this is here and not in the GS. static __forceinline void frameLimit() { + if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_NORMAL ) return; + if( Config.CustomFps >= 999 ) return; // means the user would rather just have framelimiting turned off... + s64 sDeltaTime; u64 uExpectedEnd; u64 iEnd; - if( CHECK_FRAMELIMIT == PCSX2_FRAMELIMIT_NORMAL ) return; - if( Config.CustomFps >= 999 ) return; // means the user would rather just have framelimiting turned off... - uExpectedEnd = m_iStart + m_iTicks; iEnd = GetCPUTicks(); @@ -465,7 +463,7 @@ __forceinline bool rcntUpdate_vSync() return false; } -static __forceinline void __fastcall _cpuTestTarget( int i ) +static __forceinline void _cpuTestTarget( int i ) { if (counters[i].count < counters[i].target) return; @@ -538,7 +536,7 @@ __forceinline bool rcntUpdate() return retval; } -static void _rcntSetGate( int index ) +static __forceinline void _rcntSetGate( int index ) { if (counters[index].mode.EnableGate) { @@ -563,7 +561,7 @@ static void _rcntSetGate( int index ) } // mode - 0 means hblank source, 8 means vblank source. -void __fastcall rcntStartGate(bool isVblank, u32 sCycle) +__forceinline void rcntStartGate(bool isVblank, u32 sCycle) { int i; @@ -624,7 +622,7 @@ void __fastcall rcntStartGate(bool isVblank, u32 sCycle) } // mode - 0 means hblank signal, 8 means vblank signal. -void __fastcall rcntEndGate(bool isVblank , u32 sCycle) +__forceinline void rcntEndGate(bool isVblank , u32 sCycle) { int i; @@ -665,7 +663,7 @@ void __fastcall rcntEndGate(bool isVblank , u32 sCycle) // rcntUpdate, since we're being called from there anyway. } -void __fastcall rcntWmode(int index, u32 value) +__forceinline void rcntWmode(int index, u32 value) { if(counters[index].mode.IsCounting) { if(counters[index].mode.ClockSource != 0x3) { @@ -696,7 +694,7 @@ void __fastcall rcntWmode(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWcount(int index, u32 value) +__forceinline void rcntWcount(int index, u32 value) { EECNT_LOG("EE Counter[%d] writeCount = %x, oldcount=%x, target=%x", index, value, counters[index].count, counters[index].target ); @@ -722,7 +720,7 @@ void __fastcall rcntWcount(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWtarget(int index, u32 value) +__forceinline void rcntWtarget(int index, u32 value) { EECNT_LOG("EE Counter[%d] writeTarget = %x", index, value); @@ -738,13 +736,13 @@ void __fastcall rcntWtarget(int index, u32 value) _rcntSet( index ); } -void __fastcall rcntWhold(int index, u32 value) +__forceinline void rcntWhold(int index, u32 value) { EECNT_LOG("EE Counter[%d] Hold Write = %x", index, value); counters[index].hold = value; } -u32 __fastcall rcntRcount(int index) +__forceinline u32 rcntRcount(int index) { u32 ret; @@ -759,7 +757,7 @@ u32 __fastcall rcntRcount(int index) return ret; } -u32 __fastcall rcntCycle(int index) +__forceinline u32 rcntCycle(int index) { if (counters[index].mode.IsCounting && (counters[index].mode.ClockSource != 0x3)) return counters[index].count + ((cpuRegs.cycle - counters[index].sCycleT) / counters[index].rate); diff --git a/pcsx2/Counters.h b/pcsx2/Counters.h index 6e20dda0c2..5333bcb111 100644 --- a/pcsx2/Counters.h +++ b/pcsx2/Counters.h @@ -139,14 +139,14 @@ extern bool rcntUpdate_vSync(); extern bool rcntUpdate(); extern void rcntInit(); -extern void __fastcall rcntStartGate(bool mode, u32 sCycle); -extern void __fastcall rcntEndGate(bool mode, u32 sCycle); -extern void __fastcall rcntWcount(int index, u32 value); -extern void __fastcall rcntWmode(int index, u32 value); -extern void __fastcall rcntWtarget(int index, u32 value); -extern void __fastcall rcntWhold(int index, u32 value); -extern u32 __fastcall rcntRcount(int index); -extern u32 __fastcall rcntCycle(int index); +extern void rcntStartGate(bool mode, u32 sCycle); +extern void rcntEndGate(bool mode, u32 sCycle); +extern void rcntWcount(int index, u32 value); +extern void rcntWmode(int index, u32 value); +extern void rcntWtarget(int index, u32 value); +extern void rcntWhold(int index, u32 value); +extern u32 rcntRcount(int index); +extern u32 rcntCycle(int index); u32 UpdateVSyncRate(); void frameLimitReset(); diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 0309d6eced..1911acd813 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -647,7 +647,7 @@ static void recExecute() //for (;;) R3000AExecute(); } -static s32 recExecuteBlock( s32 eeCycles ) +static __forceinline s32 recExecuteBlock( s32 eeCycles ) { psxBreak = 0; psxCycleEE = eeCycles; @@ -741,7 +741,7 @@ static __forceinline u32 psxRecClearMem(u32 pc) return upperextent - pc; } -static void recClear(u32 Addr, u32 Size) +static __forceinline void recClearIOP(u32 Addr, u32 Size) { u32 pc = Addr; while (pc < Addr + Size*4) @@ -1198,7 +1198,7 @@ R3000Acpu psxRec = { recResetIOP, recExecute, recExecuteBlock, - recClear, + recClearIOP, recShutdown }; diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 80fbdc21ec..78ab51b4f6 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -2302,10 +2302,11 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex) //memset(recVUStack, 0, SUPERVU_STACKSIZE * 4); - // Clear allocation info to prevent bad data being used in other parts of pcsx2; doing this just incase (cottonvibes) - _initXMMregs(); - _initMMXregs(); - _initX86regs(); + // Could clear allocation info to prevent possibly bad data being used in other parts of pcsx2; + // not doing this because it's slow and not needed (rama) + // _initXMMregs(); + // _initMMXregs(); + // _initX86regs(); } #if defined(_MSC_VER) From 5f354c3cee536e1ce258d6a007b51f95cb616b94 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 7 Apr 2009 21:54:50 +0000 Subject: [PATCH 012/143] Reverted the emitter back to a c/cpp form from inl files (probably wasn't necessary, but I don't like having code in header/inl files when I can help it). Also: * Fixed a couple potential bugs in some Rm forms of MMX instructions. * Improved compilation times by isolating BaseBlockEx.h to the files the needed it (it uses STL junks). * Removed some dead code form emitters and BaseBlockEx. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@921 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 8 +- pcsx2/Common.h | 7 +- pcsx2/Exceptions.h | 8 +- pcsx2/PrecompiledHeader.h | 3 + pcsx2/R5900OpcodeTables.h | 2 - pcsx2/System.h | 2 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 146 +- pcsx2/x86/BaseblockEx.h | 8 +- pcsx2/x86/iCore.cpp | 2 +- pcsx2/x86/iR3000A.cpp | 2 + pcsx2/x86/iR3000A.h | 2 - pcsx2/x86/iR5900.h | 2 - pcsx2/x86/ix86-32/iCore-32.cpp | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 3 + pcsx2/x86/ix86/ix86.cpp | 418 +++-- pcsx2/x86/ix86/ix86.h | 1455 ++++++++++++++++- .../ix86/{ix86_3dnow.inl => ix86_3dnow.cpp} | 3 +- pcsx2/x86/ix86/ix86_cpudetect.cpp | 6 +- pcsx2/x86/ix86/{ix86_fpu.inl => ix86_fpu.cpp} | 4 +- .../ix86/{ix86_group1.inl => ix86_group1.cpp} | 88 +- pcsx2/x86/ix86/ix86_internal.h | 43 + pcsx2/x86/ix86/{ix86.inl => ix86_legacy.cpp} | 149 +- pcsx2/x86/ix86/{ix86_mmx.inl => ix86_mmx.cpp} | 83 +- pcsx2/x86/ix86/{ix86_sse.inl => ix86_sse.cpp} | 201 ++- pcsx2/x86/ix86/ix86_sse_helpers.h | 176 +- pcsx2/x86/ix86/ix86_tools.cpp | 2 +- pcsx2/x86/microVU.h | 1 - 27 files changed, 1995 insertions(+), 831 deletions(-) rename pcsx2/x86/ix86/{ix86_3dnow.inl => ix86_3dnow.cpp} (94%) rename pcsx2/x86/ix86/{ix86_fpu.inl => ix86_fpu.cpp} (94%) rename pcsx2/x86/ix86/{ix86_group1.inl => ix86_group1.cpp} (67%) create mode 100644 pcsx2/x86/ix86/ix86_internal.h rename pcsx2/x86/ix86/{ix86.inl => ix86_legacy.cpp} (89%) rename pcsx2/x86/ix86/{ix86_mmx.inl => ix86_mmx.cpp} (86%) rename pcsx2/x86/ix86/{ix86_sse.inl => ix86_sse.cpp} (89%) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index 8a71d245c4..d99f47debe 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -20,11 +20,11 @@ #define __PCSX2CONFIG_H__ // Hack so that you can still use this file from C (not C++), or from a plugin without access to Paths.h. -#ifdef PLUGIN_ONLY +// .. and removed in favor of a less hackish approach (air) + +#ifndef g_MaxPath #define g_MaxPath 255 -#else -#include "Paths.h" - #endif +#endif ///////////////////////////////////////////////////////////////////////// // Session Configuration Override Flags diff --git a/pcsx2/Common.h b/pcsx2/Common.h index 1461bb83bd..46135a75ac 100644 --- a/pcsx2/Common.h +++ b/pcsx2/Common.h @@ -29,8 +29,10 @@ #define PCSX2_VERSION "(beta)" -#include "Plugins.h" +#include "System.h" + #include "SaveState.h" +#include "Plugins.h" #include "DebugTools/Debug.h" #include "Memory.h" @@ -40,7 +42,4 @@ #include "Elfheader.h" #include "Patch.h" -#include "System.h" -#include "Pcsx2Config.h" - #endif /* __COMMON_H__ */ diff --git a/pcsx2/Exceptions.h b/pcsx2/Exceptions.h index c15ffb5acb..640e61c0e1 100644 --- a/pcsx2/Exceptions.h +++ b/pcsx2/Exceptions.h @@ -16,11 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#ifndef _PCSX2_EXCEPTIONS_H_ -#define _PCSX2_EXCEPTIONS_H_ - -#include -#include "StringUtils.h" +#pragma once // This class provides an easy and clean method for ensuring objects are not copyable. class NoncopyableObject @@ -380,5 +376,3 @@ namespace Exception {} }; } - -#endif diff --git a/pcsx2/PrecompiledHeader.h b/pcsx2/PrecompiledHeader.h index 36195bb08d..e2b24c72e3 100644 --- a/pcsx2/PrecompiledHeader.h +++ b/pcsx2/PrecompiledHeader.h @@ -33,6 +33,7 @@ ////////////////////////////////////////////////////////////////////////////////////////// // Include the STL junk that's actually handy. +#include #include #include #include @@ -69,7 +70,9 @@ typedef int BOOL; #include "zlib/zlib.h" #include "PS2Etypes.h" +#include "MemcpyFast.h" #include "StringUtils.h" +#include "Exceptions.h" //////////////////////////////////////////////////////////////////// // Compiler/OS specific macros and defines -- Begin Section diff --git a/pcsx2/R5900OpcodeTables.h b/pcsx2/R5900OpcodeTables.h index 8f4d956848..cd2a5e499c 100644 --- a/pcsx2/R5900OpcodeTables.h +++ b/pcsx2/R5900OpcodeTables.h @@ -18,8 +18,6 @@ #ifndef _R5900_OPCODETABLES_H #define _R5900_OPCODETABLES_H -#include - #include "PS2Etypes.h" // TODO : Move these into the OpcodeTables namespace diff --git a/pcsx2/System.h b/pcsx2/System.h index 80c7516749..09dff0196c 100644 --- a/pcsx2/System.h +++ b/pcsx2/System.h @@ -20,9 +20,9 @@ #define __SYSTEM_H__ #include "PS2Etypes.h" +#include "Paths.h" #include "Pcsx2Config.h" #include "Exceptions.h" -#include "Paths.h" #include "MemcpyFast.h" #include "SafeArray.h" #include "Misc.h" diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index a77fc861b6..b013011d39 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -947,7 +947,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - -#include +#include // used by BaseBlockEx #include -// used to keep block information -#define BLOCKTYPE_DELAYSLOT 1 // if bit set, delay slot - // Every potential jump point in the PS2's addressable memory has a BASEBLOCK // associated with it. So that means a BASEBLOCK for every 4 bytes of PS2 // addressable memory. Yay! @@ -119,7 +114,6 @@ public: } }; -#define GET_BLOCKTYPE(b) ((b)->Type) #define PC_GETBLOCK_(x, reclut) ((BASEBLOCK*)(reclut[((u32)(x)) >> 16] + (x)*(sizeof(BASEBLOCK)/4))) static void recLUT_SetPage(uptr reclut[0x10000], uptr hwlut[0x10000], diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 6ab0be3488..0ce3a2c5b5 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "iR5900.h" #include "Vif.h" #include "VU.h" diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 1911acd813..a506bdc4a8 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -24,6 +24,8 @@ #include "PrecompiledHeader.h" #include "iR3000A.h" +#include "BaseblockEx.h" + #include #ifndef _WIN32 diff --git a/pcsx2/x86/iR3000A.h b/pcsx2/x86/iR3000A.h index ebf78ec3cb..220eb33ff0 100644 --- a/pcsx2/x86/iR3000A.h +++ b/pcsx2/x86/iR3000A.h @@ -18,12 +18,10 @@ #ifndef _R3000A_SUPERREC_ #define _R3000A_SUPERREC_ -#define _EmitterId_ EmitterId_R3000a #include "ix86/ix86.h" #include "R3000A.h" #include "iCore.h" -#include "BaseblockEx.h" // Cycle penalties for particularly slow instructions. static const int psxInstCycles_Mult = 7; diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 15fc5ef33b..5f0644c073 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -19,13 +19,11 @@ #ifndef __IR5900_H__ #define __IR5900_H__ -#define _EmitterId_ EmitterId_R5900 #include "ix86/ix86.h" #include "ix86/ix86_sse_helpers.h" #include "R5900.h" #include "VU.h" #include "iCore.h" -#include "BaseblockEx.h" // needed for recClear and stuff // Yay! These work now! (air) ... almost (air) #define ARITHMETICIMM_RECOMPILE diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 05655f66d9..9904f342a1 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "iR5900.h" #include "Vif.h" #include "VU.h" diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index e231ef9ca3..cbe8ca8130 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -30,6 +30,9 @@ #include "iR5900Jump.h" #include "iR5900LoadStore.h" #include "iR5900Move.h" + +#include "BaseblockEx.h" + #include "iMMI.h" #include "iFPU.h" #include "iCOP0.h" diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 3a8e1d2830..bbdf9da5ff 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -27,7 +27,7 @@ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86.h" +#include "ix86_internal.h" __threadlocal u8 *x86Ptr; __threadlocal u8 *j8Ptr[32]; @@ -39,198 +39,266 @@ PCSX2_ALIGNED16(float f[4]); XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; -namespace x86Emitter +namespace x86Emitter { + +x86IndexerType ptr; + +////////////////////////////////////////////////////////////////////////////////////////// +// +const x86Register x86Register::Empty( -1 ); + +const x86Register eax( 0 ); +const x86Register ebx( 3 ); +const x86Register ecx( 1 ); +const x86Register edx( 2 ); +const x86Register esi( 6 ); +const x86Register edi( 7 ); +const x86Register ebp( 5 ); +const x86Register esp( 4 ); + +const x86Register16 ax( 0 ); +const x86Register16 bx( 3 ); +const x86Register16 cx( 1 ); +const x86Register16 dx( 2 ); +const x86Register16 si( 6 ); +const x86Register16 di( 7 ); +const x86Register16 bp( 5 ); +const x86Register16 sp( 4 ); + +const x86Register8 al( 0 ); +const x86Register8 cl( 1 ); +const x86Register8 dl( 2 ); +const x86Register8 bl( 3 ); +const x86Register8 ah( 4 ); +const x86Register8 ch( 5 ); +const x86Register8 dh( 6 ); +const x86Register8 bh( 7 ); + +////////////////////////////////////////////////////////////////////////////////////////// +// x86Register Method Implementations +// +x86ModRm x86Register::operator+( const x86Register& right ) const { - x86IndexerType ptr; + return x86ModRm( *this, right ); +} - ////////////////////////////////////////////////////////////////////////////////////////// - // - const x86Register x86Register::Empty( -1 ); +x86ModRm x86Register::operator+( const x86ModRm& right ) const +{ + return right + *this; +} - const x86Register eax( 0 ); - const x86Register ebx( 3 ); - const x86Register ecx( 1 ); - const x86Register edx( 2 ); - const x86Register esi( 6 ); - const x86Register edi( 7 ); - const x86Register ebp( 5 ); - const x86Register esp( 4 ); - - const x86Register16 ax( 0 ); - const x86Register16 bx( 3 ); - const x86Register16 cx( 1 ); - const x86Register16 dx( 2 ); - const x86Register16 si( 6 ); - const x86Register16 di( 7 ); - const x86Register16 bp( 5 ); - const x86Register16 sp( 4 ); - - const x86Register8 al( 0 ); - const x86Register8 cl( 1 ); - const x86Register8 dl( 2 ); - const x86Register8 bl( 3 ); - const x86Register8 ah( 4 ); - const x86Register8 ch( 5 ); - const x86Register8 dh( 6 ); - const x86Register8 bh( 7 ); - - ////////////////////////////////////////////////////////////////////////////////////////// - // x86Register Method Implementations - // - x86ModRm x86Register::operator+( const x86Register& right ) const +////////////////////////////////////////////////////////////////////////////////////////// +// x86ModRm Method Implementations +// +x86ModRm& x86ModRm::Add( const x86Register& src ) +{ + if( src == Index ) { - return x86ModRm( *this, right ); + Factor++; } + else if( src == Base ) + { + // Compound the existing register reference into the Index/Scale pair. + Base = x86Register::Empty; - x86ModRm x86Register::operator+( const x86ModRm& right ) const - { - return right + *this; - } - - ////////////////////////////////////////////////////////////////////////////////////////// - // ModSib Method Implementations - // - x86ModRm x86ModRm::FromIndexReg( x86Register index, int scale, int displacement ) - { - return x86ModRm( x86Register::Empty, index, scale, displacement ); - } - - x86Register x86ModRm::GetEitherReg() const - { - return Base.IsEmpty() ? Base : Index; - } - - x86ModRm& x86ModRm::Add( const x86Register& src ) - { if( src == Index ) - { Factor++; - } - else if( src == Base ) + else { - // Compound the existing register reference into the Index/Scale pair. - Base = x86Register::Empty; - - if( src == Index ) - Factor++; - else - { - jASSUME( Index.IsEmpty() ); // or die if we already have an index! - Index = src; - Factor = 2; - } - } - else if( Base.IsEmpty() ) - Base = src; - else if( Index.IsEmpty() ) + jASSUME( Index.IsEmpty() ); // or die if we already have an index! Index = src; - else - assert( false ); // oops, only 2 regs allowed per ModRm! - - return *this; - } - - x86ModRm& x86ModRm::Add( const x86ModRm& src ) - { - Add( src.Base ); - Add( src.Displacement ); - - // If the factor is 1, we can just treat index like a base register also. - if( src.Factor == 1 ) - { - Add( src.Index ); - } - else if( Index.IsEmpty() ) - { - Index = src.Index; - Factor = 1; - } - else if( Index == src.Index ) - Factor++; - else - assert( false ); // oops, only 2 regs allowed! - - return *this; - } - - - x86ModRm x86ptr( x86Register base ) { return x86ModRm( base ); } - - // ------------------------------------------------------------------------ - // Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. - // Necessary because by default ModSib compounds registers into Index when possible. - // - void ModSib::Reduce() - { - // If no index reg, then nothing for us to do... - if( Index.IsEmpty() || Scale == 0 ) return; - - // The Scale has a series of valid forms, all shown here: - - switch( Scale ) - { - case 1: Scale = 0; break; - case 2: Scale = 1; break; - - case 3: // becomes [reg*2+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 1; - break; - - case 4: Scale = 2; break; - - case 5: // becomes [reg*4+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 2; - break; - - case 6: // invalid! - assert( false ); - break; - - case 7: // so invalid! - assert( false ); - break; - - case 8: Scale = 3; break; - case 9: // becomes [reg*8+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 3; - break; + Factor = 2; } } + else if( Base.IsEmpty() ) + Base = src; + else if( Index.IsEmpty() ) + Index = src; + else + assert( false ); // oops, only 2 regs allowed per ModRm! - ModSib::ModSib( const x86ModRm& src ) : - Base( src.Base ), - Index( src.Index ), - Scale( src.Factor ), - Displacement( src.Displacement ) + return *this; +} + +x86ModRm& x86ModRm::Add( const x86ModRm& src ) +{ + Add( src.Base ); + Add( src.Displacement ); + + // If the factor is 1, we can just treat index like a base register also. + if( src.Factor == 1 ) { - Reduce(); + Add( src.Index ); } + else if( Index.IsEmpty() ) + { + Index = src.Index; + Factor = 1; + } + else if( Index == src.Index ) + Factor++; + else + assert( false ); // oops, only 2 regs allowed! + + return *this; +} + +////////////////////////////////////////////////////////////////////////////////////////// +// ModSib Method Implementations +// + +// ------------------------------------------------------------------------ +// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. +// Necessary because by default ModSib compounds registers into Index when possible. +// +void ModSib::Reduce() +{ + // If no index reg, then nothing for us to do... + if( Index.IsEmpty() || Scale == 0 ) return; - ModSib::ModSib( x86Register base, x86Register index, int scale, s32 displacement ) : - Base( base ), - Index( index ), - Scale( scale ), - Displacement( displacement ) + // The Scale has a series of valid forms, all shown here: + + switch( Scale ) { - Reduce(); - } + case 1: Scale = 0; break; + case 2: Scale = 1; break; - ModSib::ModSib( s32 displacement ) : - Base(), - Index(), - Scale(0), - Displacement( displacement ) - { - } + case 3: // becomes [reg*2+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 1; + break; + + case 4: Scale = 2; break; - x86Register ModSib::GetEitherReg() const - { - return Base.IsEmpty() ? Base : Index; + case 5: // becomes [reg*4+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 2; + break; + + case 6: // invalid! + assert( false ); + break; + + case 7: // so invalid! + assert( false ); + break; + + case 8: Scale = 3; break; + case 9: // becomes [reg*8+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 3; + break; } } + +ModSib::ModSib( const x86ModRm& src ) : + Base( src.Base ), + Index( src.Index ), + Scale( src.Factor ), + Displacement( src.Displacement ) +{ + Reduce(); +} + +ModSib::ModSib( x86Register base, x86Register index, int scale, s32 displacement ) : + Base( base ), + Index( index ), + Scale( scale ), + Displacement( displacement ) +{ + Reduce(); +} + +ModSib::ModSib( s32 displacement ) : + Base(), + Index(), + Scale(0), + Displacement( displacement ) +{ +} + +x86Register ModSib::GetEitherReg() const +{ + return Base.IsEmpty() ? Base : Index; +} + +// ------------------------------------------------------------------------ +// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the +// instruction ca be encoded as ModRm alone. +emitterT bool NeedsSibMagic( const ModSib& info ) +{ + // no registers? no sibs! + if( info.Base.IsEmpty() && info.Index.IsEmpty() ) return false; + + // A scaled register needs a SIB + if( info.Scale != 0 && !info.Index.IsEmpty() ) return true; + + // two registers needs a SIB + if( !info.Base.IsEmpty() && !info.Index.IsEmpty() ) return true; + + // If register is ESP, then we need a SIB: + if( info.Base == esp || info.Index == esp ) return true; + + return false; +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( int regfield, const ModSib& info ) +{ + int displacement_size = (info.Displacement == 0) ? 0 : + ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); + + if( !NeedsSibMagic( info ) ) + { + // Use ModRm-only encoding, with the rm field holding an index/base register, if + // one has been specified. If neither register is specified then use Disp32 form, + // which is encoded as "EBP w/o displacement" (which is why EBP must always be + // encoded *with* a displacement of 0, if it would otherwise not have one). + + x86Register basereg = info.GetEitherReg(); + + if( basereg.IsEmpty() ) + ModRM( 0, regfield, ModRm_UseDisp32 ); + else + { + if( basereg == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, basereg.Id ); + } + } + else + { + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Index.Id, info.Scale, info.Base.Id ); + } + + switch( displacement_size ) + { + case 0: break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; + jNO_DEFAULT + } +} + +// ------------------------------------------------------------------------ +// Conditionally generates Sib encoding information! +// +// regfield - register field to be written to the ModRm. This is either a register specifier +// or an opcode extension. In either case, the instruction determines the value for us. +// +emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) +{ + EmitSibMagic( regfield.Id, info ); +} + +} diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 556405dc09..3151de883f 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -25,116 +25,1451 @@ */ #pragma once -#define _ix86_included_ // used for sanity checks by headers dependent on this one. #include "ix86_types.h" -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define emitterT static __forceinline - -#define MEMADDR(addr, oplen) (addr) - -#define Rex(w,r,x,b) assert(0) -#define RexR(w, reg) assert( !(w || (reg)>=8) ) -#define RexB(w, base) assert( !(w || (base)>=8) ) -#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) -#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) - -// We use int param for offsets and then test them for validity in the recompiler. -// This helps catch programmer errors better than using an auto-truncated s8 parameter. -#define assertOffset8(ofs) assert( ofs < 128 && ofs >= -128 ) - #ifdef _MSC_VER #define __threadlocal __declspec(thread) #else #define __threadlocal __thread #endif +#define MMXONLY(code) code + + //------------------------------------------------------------------ // write functions //------------------------------------------------------------------ +#define emitterT __forceinline + extern __threadlocal u8 *x86Ptr; extern __threadlocal u8 *j8Ptr[32]; extern __threadlocal u32 *j32Ptr[32]; -emitterT void write8( u8 val ) +static emitterT void write8( u8 val ) { *x86Ptr = (u8)val; x86Ptr++; } -emitterT void write16( u16 val ) +static emitterT void write16( u16 val ) { *(u16*)x86Ptr = val; x86Ptr += 2; } -emitterT void write24( u32 val ) +static emitterT void write24( u32 val ) { *x86Ptr++ = (u8)(val & 0xff); *x86Ptr++ = (u8)((val >> 8) & 0xff); *x86Ptr++ = (u8)((val >> 16) & 0xff); } -emitterT void write32( u32 val ) +static emitterT void write32( u32 val ) { *(u32*)x86Ptr = val; x86Ptr += 4; } -emitterT void write64( u64 val ){ +static emitterT void write64( u64 val ) +{ *(u64*)x86Ptr = val; x86Ptr += 8; } + //------------------------------------------------------------------ //------------------------------------------------------------------ // jump/align functions //------------------------------------------------------------------ -emitterT void x86SetPtr( u8 *ptr ); -emitterT void x86SetJ8( u8 *j8 ); -emitterT void x86SetJ8A( u8 *j8 ); -emitterT void x86SetJ16( u16 *j16 ); -emitterT void x86SetJ16A( u16 *j16 ); -emitterT void x86SetJ32( u32 *j32 ); -emitterT void x86SetJ32A( u32 *j32 ); -emitterT void x86Align( int bytes ); -emitterT void x86AlignExecutable( int align ); +extern void x86SetPtr( u8 *ptr ); +extern void x86SetJ8( u8 *j8 ); +extern void x86SetJ8A( u8 *j8 ); +extern void x86SetJ16( u16 *j16 ); +extern void x86SetJ16A( u16 *j16 ); +extern void x86SetJ32( u32 *j32 ); +extern void x86SetJ32A( u32 *j32 ); +extern void x86Align( int bytes ); +extern void x86AlignExecutable( int align ); //------------------------------------------------------------------ -//------------------------------------------------------------------ -// General Emitter Helper functions -//------------------------------------------------------------------ -emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); -emitterT void ModRM( int mod, int reg, int rm ); -emitterT void SibSB( int ss, int index, int base ); -emitterT void SET8R( int cc, int to ); -emitterT void CMOV32RtoR( int cc, int to, int from ); -emitterT void CMOV32MtoR( int cc, int to, uptr from ); -emitterT u8* J8Rel( int cc, int to ); -emitterT u32* J32Rel( int cc, u32 to ); -emitterT u64 GetCPUTick( void ); -//------------------------------------------------------------------ +extern void CLC( void ); +extern void NOP( void ); -emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ); -emitterT u32* JMP32( uptr to ); -emitterT u8* JMP8( u8 to ); -emitterT void CALL32( u32 to ); -emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); -emitterT void NOP( void ); -emitterT void AND32ItoM( uptr to, u32 from ); -emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1); -emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale); +//////////////////////////////////// +// mov instructions // +//////////////////////////////////// +// mov r32 to r32 +extern void MOV32RtoR( x86IntRegType to, x86IntRegType from ); +// mov r32 to m32 +extern void MOV32RtoM( uptr to, x86IntRegType from ); +// mov m32 to r32 +extern void MOV32MtoR( x86IntRegType to, uptr from ); +// mov [r32] to r32 +extern void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ); +// mov [r32][r32< subtract ST(0) from ST(1), store in ST(1) and POP stack +extern void FSUBP( void ); +// fmul ST(src) to fpu reg stack ST(0) +extern void FMUL32Rto0( x86IntRegType src ); +// fmul ST(0) to fpu reg stack ST(src) +extern void FMUL320toR( x86IntRegType src ); +// fdiv ST(src) to fpu reg stack ST(0) +extern void FDIV32Rto0( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src) +extern void FDIV320toR( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) +extern void FDIV320toRP( x86IntRegType src ); + +// fadd m32 to fpu reg stack +extern void FADD32( u32 from ); +// fsub m32 to fpu reg stack +extern void FSUB32( u32 from ); +// fmul m32 to fpu reg stack +extern void FMUL32( u32 from ); +// fdiv m32 to fpu reg stack +extern void FDIV32( u32 from ); +// fcomi st, st( i) +extern void FCOMI( x86IntRegType src ); +// fcomip st, st( i) +extern void FCOMIP( x86IntRegType src ); +// fucomi st, st( i) +extern void FUCOMI( x86IntRegType src ); +// fucomip st, st( i) +extern void FUCOMIP( x86IntRegType src ); +// fcom m32 to fpu reg stack +extern void FCOM32( u32 from ); +// fabs fpu reg stack +extern void FABS( void ); +// fsqrt fpu reg stack +extern void FSQRT( void ); +// ftan fpu reg stack +extern void FPATAN( void ); +// fsin fpu reg stack +extern void FSIN( void ); +// fchs fpu reg stack +extern void FCHS( void ); + +// fcmovb fpu reg to fpu reg stack +extern void FCMOVB32( x86IntRegType from ); +// fcmove fpu reg to fpu reg stack +extern void FCMOVE32( x86IntRegType from ); +// fcmovbe fpu reg to fpu reg stack +extern void FCMOVBE32( x86IntRegType from ); +// fcmovu fpu reg to fpu reg stack +extern void FCMOVU32( x86IntRegType from ); +// fcmovnb fpu reg to fpu reg stack +extern void FCMOVNB32( x86IntRegType from ); +// fcmovne fpu reg to fpu reg stack +extern void FCMOVNE32( x86IntRegType from ); +// fcmovnbe fpu reg to fpu reg stack +extern void FCMOVNBE32( x86IntRegType from ); +// fcmovnu fpu reg to fpu reg stack +extern void FCMOVNU32( x86IntRegType from ); +extern void FCOMP32( u32 from ); +extern void FNSTSWtoAX( void ); #define MMXONLY(code) code -#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) -#include "ix86.inl" -#include "ix86_3dnow.inl" -#include "ix86_fpu.inl" -#include "ix86_mmx.inl" -#include "ix86_sse.inl" +//****************** +// MMX instructions +//****************** + +// r64 = mm + +// movq m64 to r64 +extern void MOVQMtoR( x86MMXRegType to, uptr from ); +// movq r64 to m64 +extern void MOVQRtoM( uptr to, x86MMXRegType from ); + +// pand r64 to r64 +extern void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pand m64 to r64 ; +extern void PANDMtoR( x86MMXRegType to, uptr from ); +// pandn r64 to r64 +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pandn r64 to r64 +extern void PANDNMtoR( x86MMXRegType to, uptr from ); +// por r64 to r64 +extern void PORRtoR( x86MMXRegType to, x86MMXRegType from ); +// por m64 to r64 +extern void PORMtoR( x86MMXRegType to, uptr from ); +// pxor r64 to r64 +extern void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); +// pxor m64 to r64 +extern void PXORMtoR( x86MMXRegType to, uptr from ); + +// psllq r64 to r64 +extern void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psllq m64 to r64 +extern void PSLLQMtoR( x86MMXRegType to, uptr from ); +// psllq imm8 to r64 +extern void PSLLQItoR( x86MMXRegType to, u8 from ); +// psrlq r64 to r64 +extern void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psrlq m64 to r64 +extern void PSRLQMtoR( x86MMXRegType to, uptr from ); +// psrlq imm8 to r64 +extern void PSRLQItoR( x86MMXRegType to, u8 from ); + +// paddusb r64 to r64 +extern void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusb m64 to r64 +extern void PADDUSBMtoR( x86MMXRegType to, uptr from ); +// paddusw r64 to r64 +extern void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusw m64 to r64 +extern void PADDUSWMtoR( x86MMXRegType to, uptr from ); + +// paddb r64 to r64 +extern void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddb m64 to r64 +extern void PADDBMtoR( x86MMXRegType to, uptr from ); +// paddw r64 to r64 +extern void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddw m64 to r64 +extern void PADDWMtoR( x86MMXRegType to, uptr from ); +// paddd r64 to r64 +extern void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddd m64 to r64 +extern void PADDDMtoR( x86MMXRegType to, uptr from ); +extern void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +// paddq m64 to r64 (sse2 only?) +extern void PADDQMtoR( x86MMXRegType to, uptr from ); +// paddq r64 to r64 (sse2 only?) +extern void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDMtoR( x86MMXRegType to, uptr from ); + +// psubq m64 to r64 (sse2 only?) +extern void PSUBQMtoR( x86MMXRegType to, uptr from ); +// psubq r64 to r64 (sse2 only?) +extern void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); + +// pmuludq m64 to r64 (sse2 only?) +extern void PMULUDQMtoR( x86MMXRegType to, uptr from ); +// pmuludq r64 to r64 (sse2 only?) +extern void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDMtoR( x86MMXRegType to, uptr from ); +extern void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDMtoR( x86MMXRegType to, uptr from ); +extern void PSRLWItoR( x86MMXRegType to, u8 from ); +extern void PSRLDItoR( x86MMXRegType to, u8 from ); +extern void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSLLWItoR( x86MMXRegType to, u8 from ); +extern void PSLLDItoR( x86MMXRegType to, u8 from ); +extern void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSRAWItoR( x86MMXRegType to, u8 from ); +extern void PSRADItoR( x86MMXRegType to, u8 from ); +extern void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); +extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); +extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 +extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void MOVDMtoMMX( x86MMXRegType to, uptr from ); +extern void MOVDMMXtoM( uptr to, x86MMXRegType from ); +extern void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); +extern void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); +extern void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); +extern void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); +extern void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); +extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); + +// emms +extern void EMMS( void ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits +//********************************************************************************** +extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); +extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); + +extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); + +//********************* +// SSE instructions * +//********************* +extern void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); + +extern void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); +extern void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); + +extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); +extern void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); +extern void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); + +extern void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); +extern void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); +extern void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); + +extern void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); +extern void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +// VectorPath +extern void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE_STMXCSR( uptr from ); +extern void SSE_LDMXCSR( uptr from ); + + +//********************* +// SSE 2 Instructions* +//********************* + +extern void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +extern void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +extern void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +extern void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +// mult by half words +extern void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); + + +//**********************************************************************************/ +//PMOVMSKB: Create 16bit mask from signs of 8bit integers +//********************************************************************************** +extern void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +extern void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +extern void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); + + +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +extern void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +extern void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); + +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +extern void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); + +//**********************************************************************************/ +//MOVD: Move Qword(64bit) to/from MMX/XMM reg * +//********************************************************************************** +extern void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +extern void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); + + +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +extern void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); + +// SSSE3 + +extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); + +// SSE4.1 + +#ifndef _MM_MK_INSERTPS_NDX +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) +#endif + +extern void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8); +extern void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8); +extern void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +//********************* +// 3DNOW instructions * +//********************* +extern void FEMMS( void ); +extern void PFCMPEQMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGTMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGEMtoR( x86IntRegType to, uptr from ); +extern void PFADDMtoR( x86IntRegType to, uptr from ); +extern void PFADDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFSUBMtoR( x86IntRegType to, uptr from ); +extern void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMULMtoR( x86IntRegType to, uptr from ); +extern void PFMULRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPMtoR( x86IntRegType to, uptr from ); +extern void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PF2IDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMAXMtoR( x86IntRegType to, uptr from ); +extern void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMINMtoR( x86IntRegType to, uptr from ); +extern void PFMINRtoR( x86IntRegType to, x86IntRegType from ); diff --git a/pcsx2/x86/ix86/ix86_3dnow.inl b/pcsx2/x86/ix86/ix86_3dnow.cpp similarity index 94% rename from pcsx2/x86/ix86/ix86_3dnow.inl rename to pcsx2/x86/ix86/ix86_3dnow.cpp index e7a667f409..ae6743cc3d 100644 --- a/pcsx2/x86/ix86/ix86_3dnow.inl +++ b/pcsx2/x86/ix86/ix86_3dnow.cpp @@ -16,7 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once +#include "PrecompiledHeader.h" +#include "ix86_internal.h" //------------------------------------------------------------------ // 3DNOW instructions diff --git a/pcsx2/x86/ix86/ix86_cpudetect.cpp b/pcsx2/x86/ix86/ix86_cpudetect.cpp index bf88feeb32..7e3323e4a7 100644 --- a/pcsx2/x86/ix86/ix86_cpudetect.cpp +++ b/pcsx2/x86/ix86/ix86_cpudetect.cpp @@ -18,10 +18,8 @@ #include "PrecompiledHeader.h" -#define _EmitterId_ 0 - -#include "ix86.h" -#include "Misc.h" +#include "ix86_internal.h" +#include "System.h" #include "Threading.h" #include "RedtapeWindows.h" diff --git a/pcsx2/x86/ix86/ix86_fpu.inl b/pcsx2/x86/ix86/ix86_fpu.cpp similarity index 94% rename from pcsx2/x86/ix86/ix86_fpu.inl rename to pcsx2/x86/ix86/ix86_fpu.cpp index 7ed607f30c..d7e3a65963 100644 --- a/pcsx2/x86/ix86/ix86_fpu.inl +++ b/pcsx2/x86/ix86/ix86_fpu.cpp @@ -16,8 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once -//#include "PrecompiledHeader.h" +#include "PrecompiledHeader.h" +#include "ix86_internal.h" //------------------------------------------------------------------ // FPU instructions diff --git a/pcsx2/x86/ix86/ix86_group1.inl b/pcsx2/x86/ix86/ix86_group1.cpp similarity index 67% rename from pcsx2/x86/ix86/ix86_group1.inl rename to pcsx2/x86/ix86/ix86_group1.cpp index 99e1d6d4ff..1af689e82c 100644 --- a/pcsx2/x86/ix86/ix86_group1.inl +++ b/pcsx2/x86/ix86/ix86_group1.cpp @@ -16,7 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once +#include "PrecompiledHeader.h" +#include "ix86_internal.h" //------------------------------------------------------------------ // x86 Group 1 Instructions @@ -32,85 +33,6 @@ namespace x86Emitter { -static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) -static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) - -// ------------------------------------------------------------------------ -// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the -// instruction ca be encoded as ModRm alone. -emitterT bool NeedsSibMagic( const ModSib& info ) -{ - // no registers? no sibs! - if( info.Base.IsEmpty() && info.Index.IsEmpty() ) return false; - - // A scaled register needs a SIB - if( info.Scale != 0 && !info.Index.IsEmpty() ) return true; - - // two registers needs a SIB - if( !info.Base.IsEmpty() && !info.Index.IsEmpty() ) return true; - - // If register is ESP, then we need a SIB: - if( info.Base == esp || info.Index == esp ) return true; - - return false; -} - -// ------------------------------------------------------------------------ -// Conditionally generates Sib encoding information! -// -// regfield - register field to be written to the ModRm. This is either a register specifier -// or an opcode extension. In either case, the instruction determines the value for us. -// -emitterT void EmitSibMagic( int regfield, const ModSib& info ) -{ - int displacement_size = (info.Displacement == 0) ? 0 : - ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); - - if( !NeedsSibMagic( info ) ) - { - // Use ModRm-only encoding, with the rm field holding an index/base register, if - // one has been specified. If neither register is specified then use Disp32 form, - // which is encoded as "EBP w/o displacement" (which is why EBP must always be - // encoded *with* a displacement of 0, if it would otherwise not have one). - - x86Register basereg = info.GetEitherReg(); - - if( basereg.IsEmpty() ) - ModRM( 0, regfield, ModRm_UseDisp32 ); - else - { - if( basereg == ebp && displacement_size == 0 ) - displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - - ModRM( displacement_size, regfield, basereg.Id ); - } - } - else - { - ModRM( displacement_size, regfield, ModRm_UseSib ); - SibSB( info.Index.Id, info.Scale, info.Base.Id ); - } - - switch( displacement_size ) - { - case 0: break; - case 1: write8( info.Displacement ); break; - case 2: write32( info.Displacement ); break; - jNO_DEFAULT - } -} - -// ------------------------------------------------------------------------ -// Conditionally generates Sib encoding information! -// -// regfield - register field to be written to the ModRm. This is either a register specifier -// or an opcode extension. In either case, the instruction determines the value for us. -// -emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) -{ - EmitSibMagic( regfield.Id, info ); -} - enum Group1InstructionType { G1Type_ADD=0, @@ -245,9 +167,9 @@ static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ emitterT void cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ - emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset=0 ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ - emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0 ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } + emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); diff --git a/pcsx2/x86/ix86/ix86_internal.h b/pcsx2/x86/ix86/ix86_internal.h new file mode 100644 index 0000000000..83ec23a291 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_internal.h @@ -0,0 +1,43 @@ + +#pragma once +#include "ix86.h" + +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ + +#define MEMADDR(addr, oplen) (addr) + +#define Rex(w,r,x,b) assert(0) +#define RexR(w, reg) assert( !(w || (reg)>=8) ) +#define RexB(w, base) assert( !(w || (base)>=8) ) +#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) +#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) + +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) + +static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) +static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) + + +//------------------------------------------------------------------ +// General Emitter Helper functions +//------------------------------------------------------------------ + +namespace x86Emitter +{ + extern void EmitSibMagic( int regfield, const ModSib& info ); + extern void EmitSibMagic( x86Register regfield, const ModSib& info ); + extern bool NeedsSibMagic( const ModSib& info ); +} + +// From here out are the legacy (old) emitter functions... + +extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); +extern void ModRM( int mod, int reg, int rm ); +extern void SibSB( int ss, int index, int base ); +extern void SET8R( int cc, int to ); +extern u8* J8Rel( int cc, int to ); +extern u32* J32Rel( int cc, u32 to ); +extern u64 GetCPUTick( void ); +//------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86.inl b/pcsx2/x86/ix86/ix86_legacy.cpp similarity index 89% rename from pcsx2/x86/ix86/ix86.inl rename to pcsx2/x86/ix86/ix86_legacy.cpp index b29427da43..8a99058c02 100644 --- a/pcsx2/x86/ix86/ix86.inl +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -27,14 +27,12 @@ #pragma once //------------------------------------------------------------------ -// ix86 instructions +// ix86 legacy emitter functions //------------------------------------------------------------------ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86.h" - -#include "ix86_group1.inl" +#include "ix86_internal.h" // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. @@ -46,7 +44,7 @@ emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset ModRM( 0, to, 0x4 ); SibSB( 0, 0x4, 0x4 ); } - else if( offset <= 127 && offset >= -128 ) { + else if( is_s8( offset ) ) { ModRM( 1, to, 0x4 ); SibSB( 0, 0x4, 0x4 ); write8(offset); @@ -61,7 +59,7 @@ emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset if( offset == 0 ) { ModRM( 0, to, from ); } - else if( offset <= 127 && offset >= -128 ) { + else if( is_s8( offset ) ) { ModRM( 1, to, from ); write8(offset); } @@ -136,8 +134,13 @@ emitterT void x86SetPtr( u8* ptr ) x86Ptr = ptr; } -//////////////////////////////////////////////////// -emitterT void x86SetJ8( u8* j8 ) +////////////////////////////////////////////////////////////////////////////////////////// +// Jump Label API (as rough as it might be) +// +// I don't auto-inline these because of the console logging in case of error, which tends +// to cause quite a bit of code bloat. +// +void x86SetJ8( u8* j8 ) { u32 jump = ( x86Ptr - j8 ) - 1; @@ -148,7 +151,7 @@ emitterT void x86SetJ8( u8* j8 ) *j8 = (u8)jump; } -emitterT void x86SetJ8A( u8* j8 ) +void x86SetJ8A( u8* j8 ) { u32 jump = ( x86Ptr - j8 ) - 1; @@ -169,26 +172,6 @@ emitterT void x86SetJ8A( u8* j8 ) *j8 = (u8)jump; } -emitterT void x86SetJ16( u16 *j16 ) -{ - // doesn't work - u32 jump = ( x86Ptr - (u8*)j16 ) - 2; - - if ( jump > 0x7fff ) { - Console::Error( "j16 greater than 0x7fff!!" ); - assert(0); - } - *j16 = (u16)jump; -} - -emitterT void x86SetJ16A( u16 *j16 ) -{ - if( ((uptr)x86Ptr&0xf) > 4 ) { - while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; - } - x86SetJ16(j16); -} - //////////////////////////////////////////////////// emitterT void x86SetJ32( u32* j32 ) { @@ -211,25 +194,29 @@ emitterT void x86Align( int bytes ) //////////////////////////////////////////////////// // Generates executable code to align to the given alignment (could be useful for the second leg // of if/else conditionals, which usually fall through a jump target label). -emitterT void x86AlignExecutable( int align ) +// +// Note: Left in for now just in case, but usefulness is moot. Only K8's and older (non-Prescott) +// P4s benefit from this, and we don't optimize for those platforms anyway. +// +void x86AlignExecutable( int align ) { uptr newx86 = ( (uptr)x86Ptr + align - 1) & ~( align - 1 ); uptr bytes = ( newx86 - (uptr)x86Ptr ); switch( bytes ) { - case 0: break; + case 0: break; - case 1: NOP(); break; - case 2: MOV32RtoR( ESI, ESI ); break; - case 3: write8(0x08D); write8(0x024); write8(0x024); break; - case 5: NOP(); // falls through to 4... - case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; - case 6: write8(0x08D); write8(0x0B6); write32(0); break; - case 8: NOP(); // falls through to 7... - case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; + case 1: NOP(); break; + case 2: MOV32RtoR( ESI, ESI ); break; + case 3: write8(0x08D); write8(0x024); write8(0x024); break; + case 5: NOP(); // falls through to 4... + case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; + case 6: write8(0x08D); write8(0x0B6); write32(0); break; + case 8: NOP(); // falls through to 7... + case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; - default: + default: { // for larger alignments, just use a JMP... u8* aligned_target = JMP8(0); @@ -242,7 +229,7 @@ emitterT void x86AlignExecutable( int align ) } /********************/ -/* IX86 intructions */ +/* IX86 instructions */ /********************/ emitterT void STC( void ) @@ -300,7 +287,7 @@ emitterT void MOV32MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write8( 0x8B ); @@ -308,7 +295,7 @@ emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ) } /* mov [r32+r32*scale] to r32 */ -emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale ) { RexRXB(0,to,from2,from); write8( 0x8B ); @@ -317,7 +304,7 @@ emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType f } // mov r32 to [r32<= -128) + else if(is_s8(imm)) { write8( 0x83 ); ModRM( 3, 0, to ); @@ -860,7 +840,7 @@ emitterT void ADD16ItoR( x86IntRegType to, s16 imm ) emitterT void ADD16ItoM( uptr to, s16 imm ) { write8( 0x66 ); - if(imm <= 127 && imm >= -128) + if(is_s8(imm)) { write8( 0x83 ); ModRM( 0, 0, DISP32 ); @@ -1776,31 +1756,6 @@ emitterT u8* JNO8( u8 to ) { return J8Rel( 0x71, to ); } -/* Untested and slower, use 32bit versions instead -// ja rel16 -emitterT u16* eJA16( u16 to ) -{ -return J16Rel( 0x87, to ); -} - -// jb rel16 -emitterT u16* eJB16( u16 to ) -{ -return J16Rel( 0x82, to ); -} - -// je rel16 -emitterT u16* eJE16( u16 to ) -{ -return J16Rel( 0x84, to ); -} - -// jz rel16 -emitterT u16* eJZ16( u16 to ) -{ -return J16Rel( 0x84, to ); -} -*/ // jb rel32 emitterT u32* JB32( u32 to ) { @@ -2271,7 +2226,7 @@ emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) ModRM(1, to, from); write8(0x24); } - else if( offset <= 127 && offset >= -128 ) { + else if( is_s8(offset) ) { ModRM(1, to, from); write8(0x24); write8(offset); @@ -2286,7 +2241,7 @@ emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) if( offset == 0 && from != EBP && from!=ESP ) { ModRM(0, to, from); } - else if( offset <= 127 && offset >= -128 ) { + else if( is_s8(offset) ) { ModRM(1, to, from); write8(offset); } @@ -2298,7 +2253,7 @@ emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) } // to = from + offset -emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, u16 offset) +emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, s16 offset) { write8(0x66); LEA32RtoR(to, from, offset); diff --git a/pcsx2/x86/ix86/ix86_mmx.inl b/pcsx2/x86/ix86/ix86_mmx.cpp similarity index 86% rename from pcsx2/x86/ix86/ix86_mmx.inl rename to pcsx2/x86/ix86/ix86_mmx.cpp index f63686e700..77f8f33c97 100644 --- a/pcsx2/x86/ix86/ix86_mmx.inl +++ b/pcsx2/x86/ix86/ix86_mmx.cpp @@ -16,7 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once +#include "PrecompiledHeader.h" +#include "ix86_internal.h" //------------------------------------------------------------------ // MMX instructions @@ -482,32 +483,16 @@ emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) ModRM( 3, to, from ); } -emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ) +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { write16( 0x6F0F ); - - if( offset < 128 && offset >= -128) { - ModRM( 1, to, from ); - write8(offset); - } - else { - ModRM( 2, to, from ); - write32(offset); - } + WriteRmOffsetFrom( to, from, offset ); } -emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ) +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { write16( 0x7F0F ); - - if( offset < 128 && offset >= -128) { - ModRM( 1, from , to ); - write8(offset); - } - else { - ModRM( 2, from, to ); - write32(offset); - } + WriteRmOffsetFrom( from, to, offset ); } /* movd m32 to r64 */ @@ -532,24 +517,10 @@ emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) ModRM( 3, to, from ); } -emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ) +emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { write16( 0x6E0F ); - ModRM( 0, to, from ); -} - -emitterT void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ) -{ - write16( 0x6E0F ); - - if( offset < 128 ) { - ModRM( 1, to, from ); - write8(offset); - } - else { - ModRM( 2, to, from ); - write32(offset); - } + WriteRmOffsetFrom( to, from, offset ); } emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) @@ -558,46 +529,12 @@ emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) ModRM( 3, from, to ); } -emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ) +emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { write16( 0x7E0F ); - ModRM( 0, from, to ); - if( to >= 4 ) { - // no idea why - assert( to == ESP ); - write8(0x24); - } - + WriteRmOffsetFrom( from, to, offset ); } -emitterT void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ) -{ - write16( 0x7E0F ); - - if( offset < 128 ) { - ModRM( 1, from, to ); - write8(offset); - } - else { - ModRM( 2, from, to ); - write32(offset); - } -} - -///* movd r32 to r64 */ -//emitterT void MOVD32MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) -//{ -// write16( 0x6E0F ); -// ModRM( 3, to, from ); -//} -// -///* movq r64 to r32 */ -//emitterT void MOVD64MMXtoMMX( x86MMXRegType to, x86MMXRegType from ) -//{ -// write16( 0x7E0F ); -// ModRM( 3, from, to ); -//} - // untested emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) { diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.cpp similarity index 89% rename from pcsx2/x86/ix86/ix86_sse.inl rename to pcsx2/x86/ix86/ix86_sse.cpp index af25c1cbaa..ffeb51365b 100644 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ b/pcsx2/x86/ix86/ix86_sse.cpp @@ -16,7 +16,9 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#pragma once +#include "PrecompiledHeader.h" +#include "ix86_internal.h" +#include "ix86_sse_helpers.h" ////////////////////////////////////////////////////////////////////////////////////////// // AlwaysUseMovaps [const] @@ -144,7 +146,7 @@ static const bool AlwaysUseMovaps = true; write8( op ) /* movups [r32][r32*scale] to xmm1 */ -emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { RexRXB(0, to, from2, from); write16( 0x100f ); @@ -153,7 +155,7 @@ emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegT } /* movups xmm1 to [r32][r32*scale] */ -emitterT void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { RexRXB(1, to, from2, from); write16( 0x110f ); @@ -185,7 +187,7 @@ emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) ModRM( 0, to, from ); } -emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x120f ); @@ -200,7 +202,7 @@ emitterT void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) ModRM( 0, from, to ); } -emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, from, to); write16( 0x130f ); @@ -208,7 +210,7 @@ emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset= } /* movaps [r32][r32*scale] to xmm1 */ -emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { assert( from != EBP ); RexRXB(0, to, from2, from); @@ -218,7 +220,7 @@ emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegT } /* movaps xmm1 to [r32][r32*scale] */ -emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ) +emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { assert( from != EBP ); RexRXB(0, to, from2, from); @@ -228,7 +230,7 @@ emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegT } // movaps [r32+offset] to r32 -emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x280f ); @@ -236,7 +238,7 @@ emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset= } // movaps r32 to [r32+offset] -emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset ) { RexRB(0, from, to); write16( 0x290f ); @@ -244,7 +246,7 @@ emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset= } // movdqa [r32+offset] to r32 -emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset ) { if( AlwaysUseMovaps ) SSE_MOVAPSRmtoR( to, from, offset ); @@ -258,7 +260,7 @@ emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset } // movdqa r32 to [r32+offset] -emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset ) { if( AlwaysUseMovaps ) SSE_MOVAPSRtoRm( to, from, offset ); @@ -272,7 +274,7 @@ emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset } // movups [r32+offset] to r32 -emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x100f ); @@ -280,7 +282,7 @@ emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset= } // movups r32 to [r32+offset] -emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset ) { RexRB(0, from, to); write16( 0x110f ); @@ -335,7 +337,7 @@ emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } -emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { write8(0xf3); RexRB(0, to, from); @@ -343,7 +345,7 @@ emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int off WriteRmOffsetFrom(to, from, offset); } -emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { write8(0xf3); RexRB(0, from, to); @@ -358,14 +360,14 @@ emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } -emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x120f ); WriteRmOffsetFrom(to, from, offset); } -emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { RexRB(0, from, to); write16(0x130f); @@ -379,14 +381,14 @@ emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int of emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } -emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { RexRB(0, to, from); write16( 0x160f ); WriteRmOffsetFrom(to, from, offset); } -emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { RexRB(0, from, to); write16(0x170f); @@ -900,7 +902,7 @@ emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) ModRM( 0, to, from); } -emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { write8(0x66); RexRB(0, to, from); @@ -911,7 +913,7 @@ emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int off emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } -emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) +emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { write8(0x66); RexRB(0, from, to); @@ -1400,3 +1402,160 @@ emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) write24(0x28380F); ModRM(3, to, from); } + +////////////////////////////////////////////////////////////////////////////////////////// +// SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) +// This header should always be included *after* ix86.h. + +// Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the +// overhead of dynarec instructions that use these, even thought the same check would +// have been done redundantly by the emitter function. + +emitterT void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from); + else SSE_MOVAPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from); + else SSE_MOVAPS_XMM_to_M128(to, from); +} + +emitterT void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from); + else SSE_MOVAPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoR(to, from, offset); + else SSE_MOVAPSRmtoR(to, from, offset); +} + +emitterT void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRm(to, from, offset); + else SSE_MOVAPSRtoRm(to, from, offset); +} + +emitterT void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from); + else SSE_MOVUPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ) +{ + if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from); + else SSE_MOVUPS_XMM_to_M128(to, from); +} + +emitterT void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from); + else SSE_MOVSS_M32_to_XMM(to, from); +} + +emitterT void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from); + else SSE_MOVSS_XMM_to_M32(to, from); +} + +emitterT void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_Rm_to_XMM(to, from, offset); + else SSE_MOVSS_Rm_to_XMM(to, from, offset); +} + +emitterT void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from, offset); + else SSE_MOVSS_XMM_to_Rm(to, from, offset); +} + +emitterT void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from); + else SSE_ORPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from); + else SSE_ORPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from); + else SSE_XORPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from); + else SSE_XORPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from); + else SSE_ANDPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from); + else SSE_ANDPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from); + else SSE_ANDNPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from); + else SSE_ANDNPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from); + else SSE_UNPCKLPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from); + else SSE_UNPCKLPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) +{ + if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from); + else SSE_UNPCKHPS_M128_to_XMM(to, from); +} + +emitterT void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) +{ + if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from); + else SSE_UNPCKHPS_XMM_to_XMM(to, from); +} + +emitterT void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + if( g_xmmtypes[from] == XMMT_INT ) { + SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from); + if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e); + } + else { + SSE_MOVHLPS_XMM_to_XMM(to, from); + } +} diff --git a/pcsx2/x86/ix86/ix86_sse_helpers.h b/pcsx2/x86/ix86/ix86_sse_helpers.h index ccd7ef5373..b198c336b5 100644 --- a/pcsx2/x86/ix86/ix86_sse_helpers.h +++ b/pcsx2/x86/ix86/ix86_sse_helpers.h @@ -22,158 +22,30 @@ // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) // This header should always be included *after* ix86.h. -#ifndef _ix86_included_ -#error Dependency fail: Please define _EmitterId_ and include ix86.h first. -#endif - // Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the // overhead of dynarec instructions that use these. -static __forceinline void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQA_M128_to_XMM(to, from); - else SSE_MOVAPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_M128(to, from); - else SSE_MOVAPS_XMM_to_M128(to, from); -} - -static __forceinline void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQA_XMM_to_XMM(to, from); - else SSE_MOVAPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQARmtoR(to, from, offset); - else SSE_MOVAPSRmtoR(to, from, offset); -} - -static __forceinline void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQARtoRm(to, from, offset); - else SSE_MOVAPSRtoRm(to, from, offset); -} - -static __forceinline void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[to] == XMMT_INT ) SSE2_MOVDQU_M128_to_XMM(to, from); - else SSE_MOVUPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ) -{ - if( !AlwaysUseMovaps && g_xmmtypes[from] == XMMT_INT ) SSE2_MOVDQU_XMM_to_M128(to, from); - else SSE_MOVUPS_XMM_to_M128(to, from); -} - -static __forceinline void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_M32_to_XMM(to, from); - else SSE_MOVSS_M32_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_M32(to, from); - else SSE_MOVSS_XMM_to_M32(to, from); -} - -static __forceinline void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_MOVD_Rm_to_XMM(to, from, offset); - else SSE_MOVSS_Rm_to_XMM(to, from, offset); -} - -static __forceinline void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_MOVD_XMM_to_Rm(to, from, offset); - else SSE_MOVSS_XMM_to_Rm(to, from, offset); -} - -static __forceinline void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_POR_M128_to_XMM(to, from); - else SSE_ORPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_POR_XMM_to_XMM(to, from); - else SSE_ORPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PXOR_M128_to_XMM(to, from); - else SSE_XORPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PXOR_XMM_to_XMM(to, from); - else SSE_XORPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PAND_M128_to_XMM(to, from); - else SSE_ANDPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PAND_XMM_to_XMM(to, from); - else SSE_ANDPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PANDN_M128_to_XMM(to, from); - else SSE_ANDNPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PANDN_XMM_to_XMM(to, from); - else SSE_ANDNPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKLDQ_M128_to_XMM(to, from); - else SSE_UNPCKLPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKLDQ_XMM_to_XMM(to, from); - else SSE_UNPCKLPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) -{ - if( g_xmmtypes[to] == XMMT_INT ) SSE2_PUNPCKHDQ_M128_to_XMM(to, from); - else SSE_UNPCKHPS_M128_to_XMM(to, from); -} - -static __forceinline void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - if( g_xmmtypes[from] == XMMT_INT ) SSE2_PUNPCKHDQ_XMM_to_XMM(to, from); - else SSE_UNPCKHPS_XMM_to_XMM(to, from); -} - -static __forceinline void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - if( g_xmmtypes[from] == XMMT_INT ) { - SSE2_PUNPCKHQDQ_XMM_to_XMM(to, from); - if( to != from ) SSE2_PSHUFD_XMM_to_XMM(to, to, 0x4e); - } - else { - SSE_MOVHLPS_XMM_to_XMM(to, from); - } -} +extern void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSEX_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSEX_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_tools.cpp b/pcsx2/x86/ix86/ix86_tools.cpp index 5ef532ffcd..13028b7432 100644 --- a/pcsx2/x86/ix86/ix86_tools.cpp +++ b/pcsx2/x86/ix86/ix86_tools.cpp @@ -18,7 +18,7 @@ #include "PrecompiledHeader.h" -#include "Misc.h" +#include "System.h" #include "ix86/ix86.h" // used to make sure regs don't get changed while in recompiler diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index c37a084043..a95251ffc0 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -18,7 +18,6 @@ #pragma once #define mVUdebug // Prints Extra Info to Console -#define _EmitterId_ (vuIndex+1) #include "Common.h" #include "VU.h" #include "GS.h" From 920e99145e0b0903d268e80f0822fe29802b123f Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 7 Apr 2009 23:48:43 +0000 Subject: [PATCH 013/143] Implemented the 16 bit forms of Group 1 instructions into the new emitter. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@922 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86.h | 21 +- pcsx2/x86/ix86/ix86_group1.cpp | 103 ++++++---- pcsx2/x86/ix86/ix86_legacy.cpp | 339 +-------------------------------- pcsx2/x86/ix86/ix86_types.h | 4 +- 4 files changed, 85 insertions(+), 382 deletions(-) diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 3151de883f..939ff21e48 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -46,9 +46,16 @@ extern __threadlocal u8 *x86Ptr; extern __threadlocal u8 *j8Ptr[32]; extern __threadlocal u32 *j32Ptr[32]; +template< typename T > +static emitterT void x86write( T val ) +{ + *(T*)x86Ptr = val; + x86Ptr += sizeof(T); +} + static emitterT void write8( u8 val ) { - *x86Ptr = (u8)val; + *x86Ptr = val; x86Ptr++; } @@ -271,9 +278,9 @@ extern void ADD32MtoR( x86IntRegType to, uptr from ); // add r16 to r16 extern void ADD16RtoR( x86IntRegType to , x86IntRegType from ); // add imm16 to r16 -extern void ADD16ItoR( x86IntRegType to, s16 from ); +extern void ADD16ItoR( x86IntRegType to, u16 imm ); // add imm16 to m16 -extern void ADD16ItoM( uptr to, s16 from ); +extern void ADD16ItoM( uptr to, u16 imm ); // add r16 to m16 extern void ADD16RtoM( uptr to, x86IntRegType from ); // add m16 to r16 @@ -313,7 +320,7 @@ extern void SUB32MtoR( x86IntRegType to, uptr from ) ; // sub r32 to m32 extern void SUB32RtoM( uptr to, x86IntRegType from ); // sub r16 to r16 -extern void SUB16RtoR( x86IntRegType to, u16 from ); +extern void SUB16RtoR( x86IntRegType to, x86IntRegType from ); // sub imm16 to r16 extern void SUB16ItoR( x86IntRegType to, u16 from ); // sub imm16 to m16 @@ -643,12 +650,6 @@ extern void CMP32ItoM( uptr to, u32 from ); extern void CMP32RtoR( x86IntRegType to, x86IntRegType from ); // cmp m32 to r32 extern void CMP32MtoR( x86IntRegType to, uptr from ); -// cmp imm32 to [r32] -extern void CMP32I8toRm( x86IntRegType to, u8 from); -// cmp imm32 to [r32+off] -extern void CMP32I8toRm8( x86IntRegType to, u8 from, u8 off); -// cmp imm8 to [r32] -extern void CMP32I8toM( uptr to, u8 from); // cmp imm16 to r16 extern void CMP16ItoR( x86IntRegType to, u16 from ); diff --git a/pcsx2/x86/ix86/ix86_group1.cpp b/pcsx2/x86/ix86/ix86_group1.cpp index 1af689e82c..190550e1eb 100644 --- a/pcsx2/x86/ix86/ix86_group1.cpp +++ b/pcsx2/x86/ix86/ix86_group1.cpp @@ -46,13 +46,27 @@ enum Group1InstructionType }; -emitterT void Group1_32( Group1InstructionType inst, x86Register to, x86Register from ) +static emitterT void Group1( Group1InstructionType inst, x86Register to, x86Register from ) { write8( 0x01 | (inst<<3) ); ModRM( 3, from.Id, to.Id ); } -emitterT void Group1_32( Group1InstructionType inst, x86Register to, u32 imm ) +static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) +{ + write8( 0x01 | (inst<<3) ); + EmitSibMagic( from, sibdest ); +} + +/* add m32 to r32 */ +static emitterT void Group1( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) +{ + write8( 0x03 | (inst<<3) ); + EmitSibMagic( to, sibsrc ); +} + +template< typename T > +static emitterT void Group1_Imm( Group1InstructionType inst, x86Register to, T imm ) { if( is_s8( imm ) ) { @@ -69,11 +83,12 @@ emitterT void Group1_32( Group1InstructionType inst, x86Register to, u32 imm ) write8( 0x81 ); ModRM( 3, inst, to.Id ); } - write32( imm ); + x86write( imm ); } } -emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, u32 imm ) +template< typename T > +static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm ) { write8( is_s8( imm ) ? 0x83 : 0x81 ); @@ -82,23 +97,10 @@ emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, u32 if( is_s8( imm ) ) write8( (s8)imm ); else - write32( imm ); + x86write( imm ); } -emitterT void Group1_32( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) -{ - write8( 0x01 | (inst<<3) ); - EmitSibMagic( from, sibdest ); -} - -/* add m32 to r32 */ -emitterT void Group1_32( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) -{ - write8( 0x03 | (inst<<3) ); - EmitSibMagic( to, sibsrc ); -} - -emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) +static emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) { if( to == eax ) { @@ -113,28 +115,30 @@ emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) } } +// 16 bit instruction prefix! +static __forceinline void prefix16() { write8(0x66); } +static __forceinline x86Register cvt2reg( x86Register16 src ) { return x86Register( src.Id ); } + ////////////////////////////////////////////////////////////////////////////////////////// // #define DEFINE_GROUP1_OPCODE( lwr, cod ) \ - emitterT void lwr##32( x86Register to, x86Register from ) { Group1_32( G1Type_##cod, to, from ); } \ - emitterT void lwr##32( x86Register to, u32 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ - emitterT void lwr##32( x86Register to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( void* to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( void* to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } - -/* - emitterT void lwr##16( x86Register16 to, x86Register16 from ) { Group1_32( G1Type_##cod, to, from ); } \ - emitterT void lwr##16( x86Register16 to, u16 imm ) { Group1_32( G1Type_##cod, to, imm ); } \ - emitterT void lwr##16( x86Register16 to, void* from ) { Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##16( void* to, x86Register16 from ) { Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##16( void* to, u16 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##16( x86Register16 to, const x86ModRm& from ){ Group1_32( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##16( const x86ModRm& to, x86Register16 from ){ Group1_32( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##16( const x86ModRm& to, u32 imm ) { Group1_32( G1Type_##cod, ptr[to], imm ); } -*/ + emitterT void lwr##32( x86Register to, x86Register from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void lwr##32( x86Register to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( void* to, x86Register from ) { Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void lwr##32( x86Register to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void lwr##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + \ + emitterT void lwr##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, cvt2reg(to), cvt2reg(from) ); } \ + emitterT void lwr##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, cvt2reg(to), ptr[from] ); } \ + emitterT void lwr##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], cvt2reg(from) ); } \ + emitterT void lwr##16( x86Register16 to, const x86ModRm& from ){ prefix16(); Group1( G1Type_##cod, cvt2reg(to), ptr[from] ); } \ + emitterT void lwr##16( const x86ModRm& to, x86Register16 from ){ prefix16(); Group1( G1Type_##cod, ptr[to], cvt2reg(from) ); } \ + emitterT void lwr##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, cvt2reg(to), imm ); } \ + emitterT void lwr##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void lwr##16( const x86ModRm& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } DEFINE_GROUP1_OPCODE( add, ADD ); DEFINE_GROUP1_OPCODE( cmp, CMP ); @@ -153,6 +157,10 @@ static __forceinline x86Emitter::x86Register _reghlp( x86IntRegType src ) return x86Emitter::x86Register( src ); } +static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) +{ + return x86Emitter::x86Register16( src ); +} static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) { @@ -164,12 +172,21 @@ static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) #define DEFINE_GROUP1_OPCODE_LEGACY( lwr, cod ) \ emitterT void cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ emitterT void cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ - emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ - emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ + emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ + emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ emitterT void cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ - emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset ){ x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } + emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } \ + \ + emitterT void cod##16RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##16( _reghlp16(to), _reghlp16(from) ); } \ + emitterT void cod##16ItoR( x86IntRegType to, u16 imm ) { x86Emitter::lwr##16( _reghlp16(to), imm ); } \ + emitterT void cod##16MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##16( _reghlp16(to), (void*)from ); } \ + emitterT void cod##16RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##16( (void*)to, _reghlp16(from) ); } \ + emitterT void cod##16ItoM( uptr to, u16 imm ) { x86Emitter::lwr##16( (void*)to, imm ); } \ + emitterT void cod##16ItoRm( x86IntRegType to, u16 imm, int offset ) { x86Emitter::lwr##16( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##16RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##16( _reghlp16(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##16RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##16( _mrmhlp(to) + offset, _reghlp16(from) ); } DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); @@ -180,6 +197,8 @@ DEFINE_GROUP1_OPCODE_LEGACY( and, AND ); DEFINE_GROUP1_OPCODE_LEGACY( sub, SUB ); DEFINE_GROUP1_OPCODE_LEGACY( xor, XOR ); +// Special forms needed by the legacy emitter syntax: + emitterT void AND32I8toR( x86IntRegType to, s8 from ) { x86Emitter::and32( _reghlp(to), from ); diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 8a99058c02..3f4134836c 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -802,80 +802,6 @@ emitterT void CMOVLE32MtoR( x86IntRegType to, uptr from ) // arithmetic instructions / //////////////////////////////////// -// add r16 to r16 -emitterT void ADD16RtoR( x86IntRegType to , x86IntRegType from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x03 ); - ModRM( 3, to, from ); -} - -/* add imm16 to r16 */ -emitterT void ADD16ItoR( x86IntRegType to, s16 imm ) -{ - write8( 0x66 ); - RexB(0,to); - - if (to == EAX) - { - write8( 0x05 ); - write16( imm ); - } - else if(is_s8(imm)) - { - write8( 0x83 ); - ModRM( 3, 0, to ); - write8((u8)imm ); - } - else - { - write8( 0x81 ); - ModRM( 3, 0, to ); - write16( imm ); - } -} - -/* add imm16 to m16 */ -emitterT void ADD16ItoM( uptr to, s16 imm ) -{ - write8( 0x66 ); - if(is_s8(imm)) - { - write8( 0x83 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)imm ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( imm ); - } -} - -/* add r16 to m16 */ -emitterT void ADD16RtoM(uptr to, x86IntRegType from ) -{ - write8( 0x66 ); - RexR(0,from); - write8( 0x01 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* add m16 to r16 */ -emitterT void ADD16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x03 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - // add m8 to r8 emitterT void ADD8MtoR( x86IntRegType to, uptr from ) { @@ -915,49 +841,6 @@ emitterT void INC16M( u32 to ) write32( MEMADDR(to, 4) ); } - - -// sub r16 to r16 -emitterT void SUB16RtoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x2b ); - ModRM( 3, to, from ); -} - -/* sub imm16 to r16 */ -emitterT void SUB16ItoR( x86IntRegType to, u16 from ) { - write8( 0x66 ); - RexB(0,to); - if ( to == EAX ) { - write8( 0x2D ); - } - else { - write8( 0x81 ); - ModRM( 3, 5, to ); - } - write16( from ); -} - -/* sub imm16 to m16 */ -emitterT void SUB16ItoM( uptr to, u16 from ) { - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 5, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* sub m16 to r16 */ -emitterT void SUB16MtoR( x86IntRegType to, uptr from ) { - write8( 0x66 ); - RexR(0,to); - write8( 0x2B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - /* dec r32 */ emitterT void DEC32R( x86IntRegType to ) { @@ -1331,60 +1214,6 @@ emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) // logical instructions / //////////////////////////////////// -// or r16 to r16 -emitterT void OR16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0,from,to); - write8( 0x09 ); - ModRM( 3, from, to ); -} - -// or imm16 to r16 -emitterT void OR16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - if ( to == EAX ) { - write8( 0x0D ); - } - else { - write8( 0x81 ); - ModRM( 3, 1, to ); - } - write16( from ); -} - -// or imm16 to m316 -emitterT void OR16ItoM( uptr to, u16 from ) -{ - write8(0x66); - write8( 0x81 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* or m16 to r16 */ -emitterT void OR16MtoR( x86IntRegType to, uptr from ) -{ - write8(0x66); - RexR(0,to); - write8( 0x0B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// or r16 to m16 -emitterT void OR16RtoM( uptr to, x86IntRegType from ) -{ - write8(0x66); - RexR(0,from); - write8( 0x09 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - // or r8 to r8 emitterT void OR8RtoR( x86IntRegType to, x86IntRegType from ) { @@ -1420,77 +1249,6 @@ emitterT void OR8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -// and r16 to r16 -emitterT void AND16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0,to,from); - write8( 0x23 ); - ModRM( 3, to, from ); -} - -/* and imm16 to r16 */ -emitterT void AND16ItoR( x86IntRegType to, u16 from ) -{ - write8(0x66); - RexB(0,to); - - if ( to == EAX ) { - write8( 0x25 ); - write16( from ); - } - else if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 3, 0x4, to ); - write8((u8)from ); - } - else { - write8( 0x81 ); - ModRM( 3, 0x4, to ); - write16( from ); - } -} - -/* and imm16 to m16 */ -emitterT void AND16ItoM( uptr to, u16 from ) -{ - write8(0x66); - if ( from < 0x80 ) { - write8( 0x83 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write8((u8)from ); - } - else - { - write8( 0x81 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); - - } -} - -/* and r16 to m16 */ -emitterT void AND16RtoM( uptr to, x86IntRegType from ) -{ - write8( 0x66 ); - RexR(0,from); - write8( 0x21 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m16 to r16 */ -emitterT void AND16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x23 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); -} - /* and imm8 to r8 */ emitterT void AND8ItoR( x86IntRegType to, u8 from ) { @@ -1906,93 +1664,6 @@ emitterT void CALL32M( u32 to ) // misc instructions / //////////////////////////////////// -// cmp imm8 to [r32] (byte ptr) -emitterT void CMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) -{ - RexB(0,to); - write8( 0x80 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write8(from); -} - -// cmp imm8 to [r32] -emitterT void CMP32I8toRm( x86IntRegType to, u8 from, s8 off=0 ) -{ - RexB(0,to); - write8( 0x83 ); - ModRM( (off!=0), 7, to ); - if( off != 0 ) write8(off); - write8(from); -} - -// cmp imm32 to [r32] -emitterT void CMP32ItoRm( x86IntRegType to, u32 from, s8 off=0 ) -{ - // fixme : This should use the imm8 form if 'from' is between 127 and -128. - - RexB(0,to); - write8( 0x81 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write32(from); -} - -// cmp imm8 to [mem] (dword ptr) -emitterT void CMP32I8toM( uptr to, u8 from ) -{ - write8( 0x83 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -/* cmp imm16 to r16 */ -emitterT void CMP16ItoR( x86IntRegType to, u16 from ) -{ - write8( 0x66 ); - RexB(0,to); - if ( to == EAX ) - { - write8( 0x3D ); - } - else - { - write8( 0x81 ); - ModRM( 3, 7, to ); - } - write16( from ); -} - -/* cmp imm16 to m16 */ -emitterT void CMP16ItoM( uptr to, u16 from ) -{ - write8( 0x66 ); - write8( 0x81 ); - ModRM( 0, 7, DISP32 ); - write32( MEMADDR(to, 6) ); - write16( from ); -} - -/* cmp r16 to r16 */ -emitterT void CMP16RtoR( x86IntRegType to, x86IntRegType from ) -{ - write8( 0x66 ); - RexRB(0,from,to); - write8( 0x39 ); - ModRM( 3, from, to ); -} - -/* cmp m16 to r16 */ -emitterT void CMP16MtoR( x86IntRegType to, uptr from ) -{ - write8( 0x66 ); - RexR(0,to); - write8( 0x3B ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - // cmp imm8 to r8 emitterT void CMP8ItoR( x86IntRegType to, u8 from ) { @@ -2018,6 +1689,16 @@ emitterT void CMP8MtoR( x86IntRegType to, uptr from ) write32( MEMADDR(from, 4) ); } +// cmp imm8 to [r32] (byte ptr) +emitterT void CMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) +{ + RexB(0,to); + write8( 0x80 ); + ModRM( (off != 0), 7, to ); + if( off != 0 ) write8(off); + write8(from); +} + /* test imm32 to r32 */ emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index be0e0d3ec1..589badb78a 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -151,7 +151,9 @@ struct CPUINFO{ extern CPUINFO cpuinfo; //------------------------------------------------------------------ -static __forceinline bool is_s8( u32 imm ) { return (s8)imm == (s32)imm; } +// templated version of is_s8 is required, so that u16's get correct sign extension treatment. +template< typename T > +static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; } namespace x86Emitter { From 3dd99a09327656d094ae85c61f9940c4c73535a9 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Wed, 8 Apr 2009 06:25:40 +0000 Subject: [PATCH 014/143] More updates to the new emitter: switched over some Push/Pop instructions, did a fully compliant implementation of LEa (both 16 and 32!), and fixed a couple small bugs in the ModRM/Sib encoder regarding EBP as an [index*scale] formation. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@923 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUmicroLower.cpp | 2 +- pcsx2/x86/ix86/ix86.cpp | 265 +++++++++++++++++++++++++++----- pcsx2/x86/ix86/ix86.h | 50 ++++++ pcsx2/x86/ix86/ix86_group1.cpp | 187 ++++++++++++----------- pcsx2/x86/ix86/ix86_internal.h | 2 +- pcsx2/x86/ix86/ix86_legacy.cpp | 271 +++++---------------------------- pcsx2/x86/ix86/ix86_types.h | 111 +++++++++----- 7 files changed, 495 insertions(+), 393 deletions(-) diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 3271d69681..9f9d816cf7 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -354,7 +354,7 @@ void recVUMI_IADD( VURegs *VU, int info ) if( fdreg == fsreg ) ADD32RtoR(fdreg, ftreg); else if( fdreg == ftreg ) ADD32RtoR(fdreg, fsreg); - else LEA16RRtoR(fdreg, fsreg, ftreg); + else LEA32RRtoR(fdreg, fsreg, ftreg); MOVZX32R16toR(fdreg, fdreg); // neeed since don't know if fdreg's upper bits are 0 } } diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index bbdf9da5ff..a13c7b34a7 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -45,16 +45,16 @@ x86IndexerType ptr; ////////////////////////////////////////////////////////////////////////////////////////// // -const x86Register x86Register::Empty( -1 ); +const x86Register32 x86Register32::Empty( -1 ); -const x86Register eax( 0 ); -const x86Register ebx( 3 ); -const x86Register ecx( 1 ); -const x86Register edx( 2 ); -const x86Register esi( 6 ); -const x86Register edi( 7 ); -const x86Register ebp( 5 ); -const x86Register esp( 4 ); +const x86Register32 eax( 0 ); +const x86Register32 ebx( 3 ); +const x86Register32 ecx( 1 ); +const x86Register32 edx( 2 ); +const x86Register32 esi( 6 ); +const x86Register32 edi( 7 ); +const x86Register32 ebp( 5 ); +const x86Register32 esp( 4 ); const x86Register16 ax( 0 ); const x86Register16 bx( 3 ); @@ -77,20 +77,30 @@ const x86Register8 bh( 7 ); ////////////////////////////////////////////////////////////////////////////////////////// // x86Register Method Implementations // -x86ModRm x86Register::operator+( const x86Register& right ) const +x86ModRm x86Register32::operator+( const x86Register32& right ) const { return x86ModRm( *this, right ); } -x86ModRm x86Register::operator+( const x86ModRm& right ) const +x86ModRm x86Register32::operator+( const x86ModRm& right ) const { return right + *this; } +x86ModRm x86Register32::operator+( s32 right ) const +{ + return x86ModRm( *this, right ); +} + +x86ModRm x86Register32::operator*( u32 right ) const +{ + return x86ModRm( Empty, *this, right ); +} + ////////////////////////////////////////////////////////////////////////////////////////// // x86ModRm Method Implementations // -x86ModRm& x86ModRm::Add( const x86Register& src ) +x86ModRm& x86ModRm::Add( const x86IndexReg& src ) { if( src == Index ) { @@ -99,7 +109,7 @@ x86ModRm& x86ModRm::Add( const x86Register& src ) else if( src == Base ) { // Compound the existing register reference into the Index/Scale pair. - Base = x86Register::Empty; + Base = x86IndexReg::Empty; if( src == Index ) Factor++; @@ -153,13 +163,20 @@ x86ModRm& x86ModRm::Add( const x86ModRm& src ) // void ModSib::Reduce() { - // If no index reg, then nothing for us to do... - if( Index.IsEmpty() || Scale == 0 ) return; + // If no index reg, then load the base register into the index slot. + if( Index.IsEmpty() ) + { + Index = Base; + Scale = 0; + Base = x86IndexReg::Empty; + return; + } // The Scale has a series of valid forms, all shown here: switch( Scale ) { + case 0: break; case 1: Scale = 0; break; case 2: Scale = 1; break; @@ -203,7 +220,7 @@ ModSib::ModSib( const x86ModRm& src ) : Reduce(); } -ModSib::ModSib( x86Register base, x86Register index, int scale, s32 displacement ) : +ModSib::ModSib( x86IndexReg base, x86IndexReg index, int scale, s32 displacement ) : Base( base ), Index( index ), Scale( scale ), @@ -220,27 +237,24 @@ ModSib::ModSib( s32 displacement ) : { } -x86Register ModSib::GetEitherReg() const -{ - return Base.IsEmpty() ? Base : Index; -} - // ------------------------------------------------------------------------ // returns TRUE if this instruction requires SIB to be encoded, or FALSE if the // instruction ca be encoded as ModRm alone. -emitterT bool NeedsSibMagic( const ModSib& info ) +bool NeedsSibMagic( const ModSib& info ) { // no registers? no sibs! - if( info.Base.IsEmpty() && info.Index.IsEmpty() ) return false; + if( info.Index.IsEmpty() ) return false; // A scaled register needs a SIB - if( info.Scale != 0 && !info.Index.IsEmpty() ) return true; + if( info.Scale != 0 ) return true; // two registers needs a SIB - if( !info.Base.IsEmpty() && !info.Index.IsEmpty() ) return true; + if( !info.Base.IsEmpty() ) return true; - // If register is ESP, then we need a SIB: - if( info.Base == esp || info.Index == esp ) return true; + // If index register is ESP, then we need a SIB: + // (the ModSib::Reduce() ensures that stand-alone ESP will be in the + // index position for us) + if( info.Index == esp ) return true; return false; } @@ -251,7 +265,7 @@ emitterT bool NeedsSibMagic( const ModSib& info ) // regfield - register field to be written to the ModRm. This is either a register specifier // or an opcode extension. In either case, the instruction determines the value for us. // -emitterT void EmitSibMagic( int regfield, const ModSib& info ) +void EmitSibMagic( int regfield, const ModSib& info ) { int displacement_size = (info.Displacement == 0) ? 0 : ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); @@ -263,29 +277,45 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) // which is encoded as "EBP w/o displacement" (which is why EBP must always be // encoded *with* a displacement of 0, if it would otherwise not have one). - x86Register basereg = info.GetEitherReg(); - - if( basereg.IsEmpty() ) + if( info.Index.IsEmpty() ) ModRM( 0, regfield, ModRm_UseDisp32 ); else { - if( basereg == ebp && displacement_size == 0 ) + if( info.Index == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - ModRM( displacement_size, regfield, basereg.Id ); + ModRM( displacement_size, regfield, info.Index.Id ); } } else { - ModRM( displacement_size, regfield, ModRm_UseSib ); - SibSB( info.Index.Id, info.Scale, info.Base.Id ); + // In order to encode "just" index*scale (and no base), we have to encode + // it as a special [index*scale + displacement] form, which is done by + // specifying EBP as the base register and setting the displacement field + // to zero. (same as ModRm w/o SIB form above, basically, except the + // ModRm_UseDisp flag is specified in the SIB instead of the ModRM field). + + if( info.Base.IsEmpty() ) + { + ModRM( 0, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); + displacement_size = 2; + } + else + { + if( info.Base == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, info.Base.Id ); + } } switch( displacement_size ) { - case 0: break; - case 1: write8( info.Displacement ); break; - case 2: write32( info.Displacement ); break; + case 0: break; + case 1: write8( info.Displacement ); break; + case 2: write32( info.Displacement ); break; jNO_DEFAULT } } @@ -296,9 +326,166 @@ emitterT void EmitSibMagic( int regfield, const ModSib& info ) // regfield - register field to be written to the ModRm. This is either a register specifier // or an opcode extension. In either case, the instruction determines the value for us. // -emitterT void EmitSibMagic( x86Register regfield, const ModSib& info ) +emitterT void EmitSibMagic( x86Register32 regfield, const ModSib& info ) { EmitSibMagic( regfield.Id, info ); } +template< typename ToReg > +static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) +{ + int displacement_size = (src.Displacement == 0) ? 0 : + ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); + + // See EmitSibMagic for commenting on SIB encoding. + + if( !NeedsSibMagic( src ) ) + { + // LEA Land: means we have either 1-register encoding or just an offset. + // offset is encodable as an immediate MOV, and a register is encodable + // as a register MOV. + + if( src.Index.IsEmpty() ) + { + if( is16bit ) + MOV16ItoR( to.Id, src.Displacement ); + else + MOV32ItoR( to.Id, src.Displacement ); + return; + } + else if( displacement_size == 0 ) + { + if( is16bit ) + MOV16RtoR( to.Id, src.Index.Id ); + else + MOV32RtoR( to.Id, src.Index.Id ); + return; + } + else + { + // note: no need to do ebp+0 check since we encode all 0 displacements as + // register assignments above (via MOV) + + write8( 0x8d ); + ModRM( displacement_size, to.Id, src.Index.Id ); + } + } + else + { + if( src.Base.IsEmpty() ) + { + if( displacement_size == 0 ) + { + // Encode [Index*Scale] as a combination of Mov and Shl. + // This is more efficient because of the bloated format which requires + // a 32 bit displacement. + + if( is16bit ) + { + MOV16RtoR( to.Id, src.Index.Id ); + SHL16ItoR( to.Id, src.Scale ); + } + else + { + MOV32RtoR( to.Id, src.Index.Id ); + SHL32ItoR( to.Id, src.Scale ); + } + return; + } + + write8( 0x8d ); + ModRM( 0, to.Id, ModRm_UseSib ); + SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 ); + displacement_size = 2; // force 32bit displacement. + } + else + { + if( src.Base == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + write8( 0x8d ); + ModRM( displacement_size, to.Id, ModRm_UseSib ); + SibSB( src.Scale, src.Index.Id, src.Base.Id ); + } + } + + switch( displacement_size ) + { + case 0: break; + case 1: write8( src.Displacement ); break; + case 2: write32( src.Displacement ); break; + jNO_DEFAULT + } + +} + +emitterT void LEA32( x86Register32 to, const ModSib& src ) +{ + EmitLeaMagic( to, src ); +} + + +emitterT void LEA16( x86Register16 to, const ModSib& src ) +{ + // fixme: is this right? Does Lea16 use 32 bit displacement and ModRM form? + + write8( 0x66 ); + EmitLeaMagic( to, src ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// Miscellaneous Section! +// Various Instructions with no parameter and no special encoding logic. +// +emitterT void RET() { write8( 0xC3 ); } +emitterT void CBW() { write16( 0x9866 ); } +emitterT void CWD() { write8( 0x98 ); } +emitterT void CDQ() { write8( 0x99 ); } +emitterT void CWDE() { write8( 0x98 ); } + +emitterT void LAHF() { write8( 0x9f ); } +emitterT void SAHF() { write8( 0x9e ); } + + +////////////////////////////////////////////////////////////////////////////////////////// +// Push / Pop Emitters +// +// fixme? push/pop instructions always push and pop aligned to whatever mode the cpu +// is running in. So even thought these say push32, they would essentially be push64 on +// an x64 build. Should I rename them accordingly? --air +// +// Note: pushad/popad implementations are intentionally left out. The instructions are +// invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead. + + +emitterT void POP( x86Register32 from ) +{ + write8( 0x58 | from.Id ); +} + +emitterT void POP( const ModSib& from ) +{ + write8( 0x8f ); EmitSibMagic( 0, from ); +} + +emitterT void PUSH( u32 imm ) +{ + write8( 0x68 ); write32( imm ); +} + +emitterT void PUSH( x86Register32 from ) +{ + write8( 0x50 | from.Id ); +} + +emitterT void PUSH( const ModSib& from ) +{ + write8( 0xff ); EmitSibMagic( 6, from ); +} + +// pushes the EFLAGS register onto the stack +emitterT void PUSHFD() { write8( 0x9C ); } +// pops the EFLAGS register from the stack +emitterT void POPFD() { write8( 0x9D ); } + } diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 939ff21e48..bf89a17853 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -100,6 +100,54 @@ extern void x86Align( int bytes ); extern void x86AlignExecutable( int align ); //------------------------------------------------------------------ +////////////////////////////////////////////////////////////////////////////////////////// +// New C++ Emitter! +// +// To use it just include the x86Emitter namespace into your file/class/function off choice. + +namespace x86Emitter +{ + extern void POP( x86Register32 from ); + extern void POP( const ModSib& from ); + + extern void PUSH( u32 imm ); + extern void PUSH( x86Register32 from ); + extern void PUSH( const ModSib& from ); + + extern void LEA32( x86Register32 to, const ModSib& src ); + extern void LEA16( x86Register16 to, const ModSib& src ); + + + static __forceinline void POP( void* from ) { POP( ptr[from] ); } + static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } + + #define DECLARE_GROUP1_OPCODE_HELPER( lwr, bits ) \ + emitterT void lwr##bits( x86Register##bits to, x86Register##bits from ); \ + emitterT void lwr##bits( x86Register##bits to, void* from ); \ + emitterT void lwr##bits( x86Register##bits to, const ModSib& from ); \ + emitterT void lwr##bits( x86Register##bits to, u##bits imm ); \ + emitterT void lwr##bits( const ModSib& to, x86Register##bits from ); \ + emitterT void lwr##bits( void* to, x86Register##bits from ); \ + emitterT void lwr##bits( void* to, u##bits imm ); \ + emitterT void lwr##bits( const ModSib& to, u##bits imm ); + + #define DECLARE_GROUP1_OPCODE( lwr ) \ + DECLARE_GROUP1_OPCODE_HELPER( lwr, 32 ) + DECLARE_GROUP1_OPCODE_HELPER( lwr, 16 ) + DECLARE_GROUP1_OPCODE_HELPER( lwr, 8 ) + + DECLARE_GROUP1_OPCODE( ADD ) + DECLARE_GROUP1_OPCODE( CMP ) + DECLARE_GROUP1_OPCODE( OR ) + DECLARE_GROUP1_OPCODE( ADC ) + DECLARE_GROUP1_OPCODE( SBB ) + DECLARE_GROUP1_OPCODE( AND ) + DECLARE_GROUP1_OPCODE( SUB ) + DECLARE_GROUP1_OPCODE( XOR ) + +} + + extern void CLC( void ); extern void NOP( void ); @@ -130,6 +178,8 @@ extern void MOV32ItoRm( x86IntRegType to, u32 from, int offset=0); // mov r32 to [r32+off] extern void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset=0); +// mov r16 to r16 +extern void MOV16RtoR( x86IntRegType to, x86IntRegType from ) ; // mov r16 to m16 extern void MOV16RtoM( uptr to, x86IntRegType from ); // mov m16 to r16 diff --git a/pcsx2/x86/ix86/ix86_group1.cpp b/pcsx2/x86/ix86/ix86_group1.cpp index 190550e1eb..f76950c0ef 100644 --- a/pcsx2/x86/ix86/ix86_group1.cpp +++ b/pcsx2/x86/ix86/ix86_group1.cpp @@ -33,6 +33,20 @@ namespace x86Emitter { +////////////////////////////////////////////////////////////////////////////////////////// +// x86RegConverter - this class is used internally by the emitter as a helper for +// converting 8 and 16 register forms into 32 bit forms. This way the end-user exposed API +// can use type-safe 8/16/32 bit register types, and the underlying code can use a single +// unified emitter to generate all function variations + prefixes and such. :) +// +class x86RegConverter : public x86Register32 +{ +public: + x86RegConverter( x86Register32 src ) : x86Register32( src ) {} + x86RegConverter( x86Register16 src ) : x86Register32( src.Id ) {} + x86RegConverter( x86Register8 src ) : x86Register32( src.Id ) {} +}; + enum Group1InstructionType { G1Type_ADD=0, @@ -46,29 +60,32 @@ enum Group1InstructionType }; -static emitterT void Group1( Group1InstructionType inst, x86Register to, x86Register from ) +static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, x86RegConverter from, bool bit8form=false ) { - write8( 0x01 | (inst<<3) ); + write8( (bit8form ? 0 : 1) | (inst<<3) ); ModRM( 3, from.Id, to.Id ); } -static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86Register from ) +static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86RegConverter from, bool bit8form=false ) { - write8( 0x01 | (inst<<3) ); + write8( (bit8form ? 0 : 1) | (inst<<3) ); EmitSibMagic( from, sibdest ); } -/* add m32 to r32 */ -static emitterT void Group1( Group1InstructionType inst, x86Register to, const ModSib& sibsrc ) +static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, const ModSib& sibsrc, bool bit8form=false ) { - write8( 0x03 | (inst<<3) ); + write8( (bit8form ? 2 : 3) | (inst<<3) ); EmitSibMagic( to, sibsrc ); } +// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit +// instruction (AX,BX,etc). template< typename T > -static emitterT void Group1_Imm( Group1InstructionType inst, x86Register to, T imm ) +static emitterT void Group1_Imm( Group1InstructionType inst, x86RegConverter to, T imm ) { - if( is_s8( imm ) ) + bool bit8form = (sizeof(T) == 1); + + if( !bit8form && is_s8( imm ) ) { write8( 0x83 ); ModRM( 3, inst, to.Id ); @@ -77,84 +94,81 @@ static emitterT void Group1_Imm( Group1InstructionType inst, x86Register to, T i else { if( to == eax ) - write8( 0x05 | (inst<<3) ); + write8( (bit8form ? 4 : 5) | (inst<<3) ); else { - write8( 0x81 ); + write8( bit8form ? 0x80 : 0x81 ); ModRM( 3, inst, to.Id ); } x86write( imm ); } } +// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit +// instruction (AX,BX,etc). template< typename T > static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm ) { - write8( is_s8( imm ) ? 0x83 : 0x81 ); + bool bit8form = (sizeof(T) == 1); + + write8( bit8form ? 0x80 : (is_s8( imm ) ? 0x83 : 0x81) ); EmitSibMagic( inst, sibdest ); - if( is_s8( imm ) ) + if( !bit8form && is_s8( imm ) ) write8( (s8)imm ); else x86write( imm ); } -static emitterT void Group1_8( Group1InstructionType inst, x86Register to, s8 imm ) -{ - if( to == eax ) - { - write8( 0x04 | (inst<<3) ); - write8( imm ); - } - else - { - write8( 0x80 ); - ModRM( 3, inst, to.Id ); - write8( imm ); - } -} - // 16 bit instruction prefix! static __forceinline void prefix16() { write8(0x66); } -static __forceinline x86Register cvt2reg( x86Register16 src ) { return x86Register( src.Id ); } ////////////////////////////////////////////////////////////////////////////////////////// // -#define DEFINE_GROUP1_OPCODE( lwr, cod ) \ - emitterT void lwr##32( x86Register to, x86Register from ) { Group1( G1Type_##cod, to, from ); } \ - emitterT void lwr##32( x86Register to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( void* to, x86Register from ) { Group1( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( x86Register to, const x86ModRm& from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ - emitterT void lwr##32( const x86ModRm& to, x86Register from ) { Group1( G1Type_##cod, ptr[to], from ); } \ - emitterT void lwr##32( x86Register to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ - emitterT void lwr##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##32( const x86ModRm& to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ +#define DEFINE_GROUP1_OPCODE( cod ) \ + emitterT void cod##32( x86Register32 to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( x86Register32 to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void cod##32( x86Register32 to, const ModSib& from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( x86Register32 to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##32( const ModSib& to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##32( void* to, x86Register32 from ) { Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void cod##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##32( const ModSib& to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ \ - emitterT void lwr##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, cvt2reg(to), cvt2reg(from) ); } \ - emitterT void lwr##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, cvt2reg(to), ptr[from] ); } \ - emitterT void lwr##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], cvt2reg(from) ); } \ - emitterT void lwr##16( x86Register16 to, const x86ModRm& from ){ prefix16(); Group1( G1Type_##cod, cvt2reg(to), ptr[from] ); } \ - emitterT void lwr##16( const x86ModRm& to, x86Register16 from ){ prefix16(); Group1( G1Type_##cod, ptr[to], cvt2reg(from) ); } \ - emitterT void lwr##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, cvt2reg(to), imm ); } \ - emitterT void lwr##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void lwr##16( const x86ModRm& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } + emitterT void cod##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, to, ptr[from] ); } \ + emitterT void cod##16( x86Register16 to, const ModSib& from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##16( const ModSib& to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ + emitterT void cod##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], from ); } \ + emitterT void cod##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##16( const ModSib& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ + \ + emitterT void cod##8( x86Register8 to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( x86Register8 to, void* from ) { Group1( G1Type_##cod, to, ptr[from], true ); } \ + emitterT void cod##8( x86Register8 to, const ModSib& from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( x86Register8 to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ + emitterT void cod##8( const ModSib& to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ + emitterT void cod##8( void* to, x86Register8 from ) { Group1( G1Type_##cod, ptr[to], from , true ); } \ + emitterT void cod##8( void* to, u8 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ + emitterT void cod##8( const ModSib& to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } -DEFINE_GROUP1_OPCODE( add, ADD ); -DEFINE_GROUP1_OPCODE( cmp, CMP ); -DEFINE_GROUP1_OPCODE( or, OR ); -DEFINE_GROUP1_OPCODE( adc, ADC ); -DEFINE_GROUP1_OPCODE( sbb, SBB ); -DEFINE_GROUP1_OPCODE( and, AND ); -DEFINE_GROUP1_OPCODE( sub, SUB ); -DEFINE_GROUP1_OPCODE( xor, XOR ); +DEFINE_GROUP1_OPCODE( ADD ) +DEFINE_GROUP1_OPCODE( CMP ) +DEFINE_GROUP1_OPCODE( OR ) +DEFINE_GROUP1_OPCODE( ADC ) +DEFINE_GROUP1_OPCODE( SBB ) +DEFINE_GROUP1_OPCODE( AND ) +DEFINE_GROUP1_OPCODE( SUB ) +DEFINE_GROUP1_OPCODE( XOR ) } // end namespace x86Emitter -static __forceinline x86Emitter::x86Register _reghlp( x86IntRegType src ) +static __forceinline x86Emitter::x86Register32 _reghlp32( x86IntRegType src ) { - return x86Emitter::x86Register( src ); + return x86Emitter::x86Register32( src ); } static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) @@ -162,49 +176,50 @@ static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) return x86Emitter::x86Register16( src ); } -static __forceinline x86Emitter::x86ModRm _mrmhlp( x86IntRegType src ) +static __forceinline x86Emitter::x86Register8 _reghlp8( x86IntRegType src ) { - return x86Emitter::x86ModRm( _reghlp(src) ); + return x86Emitter::x86Register8( src ); +} + +static __forceinline x86Emitter::ModSib _mrmhlp( x86IntRegType src ) +{ + return x86Emitter::ModSib( x86Emitter::x86ModRm( _reghlp32(src) ) ); } ////////////////////////////////////////////////////////////////////////////////////////// // -#define DEFINE_GROUP1_OPCODE_LEGACY( lwr, cod ) \ - emitterT void cod##32RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##32( _reghlp(to), _reghlp(from) ); } \ - emitterT void cod##32ItoR( x86IntRegType to, u32 imm ) { x86Emitter::lwr##32( _reghlp(to), imm ); } \ - emitterT void cod##32MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##32( _reghlp(to), (void*)from ); } \ - emitterT void cod##32RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##32( (void*)to, _reghlp(from) ); } \ - emitterT void cod##32ItoM( uptr to, u32 imm ) { x86Emitter::lwr##32( (void*)to, imm ); } \ - emitterT void cod##32ItoRm( x86IntRegType to, u32 imm, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, imm ); } \ - emitterT void cod##32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _reghlp(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##32RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##32( _mrmhlp(to) + offset, _reghlp(from) ); } \ - \ - emitterT void cod##16RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::lwr##16( _reghlp16(to), _reghlp16(from) ); } \ - emitterT void cod##16ItoR( x86IntRegType to, u16 imm ) { x86Emitter::lwr##16( _reghlp16(to), imm ); } \ - emitterT void cod##16MtoR( x86IntRegType to, uptr from ) { x86Emitter::lwr##16( _reghlp16(to), (void*)from ); } \ - emitterT void cod##16RtoM( uptr to, x86IntRegType from ) { x86Emitter::lwr##16( (void*)to, _reghlp16(from) ); } \ - emitterT void cod##16ItoM( uptr to, u16 imm ) { x86Emitter::lwr##16( (void*)to, imm ); } \ - emitterT void cod##16ItoRm( x86IntRegType to, u16 imm, int offset ) { x86Emitter::lwr##16( _mrmhlp(to) + offset, imm ); } \ - emitterT void cod##16RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##16( _reghlp16(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##16RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::lwr##16( _mrmhlp(to) + offset, _reghlp16(from) ); } +#define DEFINE_LEGACY_HELPER( cod, bits ) \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::cod##bits( _reghlp##bits(to), _reghlp##bits(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { x86Emitter::cod##bits( _reghlp##bits(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { x86Emitter::cod##bits( _reghlp##bits(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { x86Emitter::cod##bits( (void*)to, _reghlp##bits(from) ); } \ + emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { x86Emitter::cod##bits( (void*)to, imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _reghlp##bits(to), _mrmhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, _reghlp##bits(from) ); } -DEFINE_GROUP1_OPCODE_LEGACY( add, ADD ); -DEFINE_GROUP1_OPCODE_LEGACY( cmp, CMP ); -DEFINE_GROUP1_OPCODE_LEGACY( or, OR ); -DEFINE_GROUP1_OPCODE_LEGACY( adc, ADC ); -DEFINE_GROUP1_OPCODE_LEGACY( sbb, SBB ); -DEFINE_GROUP1_OPCODE_LEGACY( and, AND ); -DEFINE_GROUP1_OPCODE_LEGACY( sub, SUB ); -DEFINE_GROUP1_OPCODE_LEGACY( xor, XOR ); +#define DEFINE_GROUP1_OPCODE_LEGACY( cod ) \ + DEFINE_LEGACY_HELPER( cod, 32 ) \ + DEFINE_LEGACY_HELPER( cod, 16 ) \ + DEFINE_LEGACY_HELPER( cod, 8 ) + +DEFINE_GROUP1_OPCODE_LEGACY( ADD ) +DEFINE_GROUP1_OPCODE_LEGACY( CMP ) +DEFINE_GROUP1_OPCODE_LEGACY( OR ) +DEFINE_GROUP1_OPCODE_LEGACY( ADC ) +DEFINE_GROUP1_OPCODE_LEGACY( SBB ) +DEFINE_GROUP1_OPCODE_LEGACY( AND ) +DEFINE_GROUP1_OPCODE_LEGACY( SUB ) +DEFINE_GROUP1_OPCODE_LEGACY( XOR ) // Special forms needed by the legacy emitter syntax: emitterT void AND32I8toR( x86IntRegType to, s8 from ) { - x86Emitter::and32( _reghlp(to), from ); + x86Emitter::AND32( _reghlp32(to), from ); } emitterT void AND32I8toM( uptr to, s8 from ) { - x86Emitter::and32( (void*)to, from ); + x86Emitter::AND32( (void*)to, from ); } diff --git a/pcsx2/x86/ix86/ix86_internal.h b/pcsx2/x86/ix86/ix86_internal.h index 83ec23a291..4f3f72f2e4 100644 --- a/pcsx2/x86/ix86/ix86_internal.h +++ b/pcsx2/x86/ix86/ix86_internal.h @@ -27,7 +27,7 @@ static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod fi namespace x86Emitter { extern void EmitSibMagic( int regfield, const ModSib& info ); - extern void EmitSibMagic( x86Register regfield, const ModSib& info ); + extern void EmitSibMagic( x86Register32 regfield, const ModSib& info ); extern bool NeedsSibMagic( const ModSib& info ); } diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 3f4134836c..752414a1b2 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -24,8 +24,6 @@ * cottonvibes(@gmail.com) */ -#pragma once - //------------------------------------------------------------------ // ix86 legacy emitter functions //------------------------------------------------------------------ @@ -34,6 +32,8 @@ #include "System.h" #include "ix86_internal.h" +using namespace x86Emitter; + // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. @@ -256,6 +256,8 @@ emitterT void NOP( void ) /* mov r32 to r32 */ emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ) { + if( to == from ) return; + RexRB(0, from, to); write8( 0x89 ); ModRM( 3, from, to ); @@ -356,6 +358,18 @@ emitterT void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset) WriteRmOffsetFrom(from, to, offset); } + +/* mov r32 to r32 */ +emitterT void MOV16RtoR( x86IntRegType to, x86IntRegType from ) +{ + if( to == from ) return; + + write8( 0x66 ); + RexRB(0, from, to); + write8( 0x89 ); + ModRM( 3, from, to ); +} + /* mov r16 to m16 */ emitterT void MOV16RtoM(uptr to, x86IntRegType from ) { @@ -802,15 +816,6 @@ emitterT void CMOVLE32MtoR( x86IntRegType to, uptr from ) // arithmetic instructions / //////////////////////////////////// -// add m8 to r8 -emitterT void ADD8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x02 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - /* inc r32 */ emitterT void INC32R( x86IntRegType to ) { @@ -1214,90 +1219,6 @@ emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) // logical instructions / //////////////////////////////////// -// or r8 to r8 -emitterT void OR8RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,from,to); - write8( 0x08 ); - ModRM( 3, from, to ); -} - -// or r8 to m8 -emitterT void OR8RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x08 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -// or imm8 to m8 -emitterT void OR8ItoM( uptr to, u8 from ) -{ - write8( 0x80 ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -// or m8 to r8 -emitterT void OR8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x0A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* and imm8 to r8 */ -emitterT void AND8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( to == EAX ) { - write8( 0x24 ); - } - else { - write8( 0x80 ); - ModRM( 3, 0x4, to ); - } - write8( from ); -} - -/* and imm8 to m8 */ -emitterT void AND8ItoM( uptr to, u8 from ) -{ - write8( 0x80 ); - ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(to, 5) ); - write8( from ); -} - -// and r8 to r8 -emitterT void AND8RtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write8( 0x22 ); - ModRM( 3, to, from ); -} - -/* and r8 to m8 */ -emitterT void AND8RtoM( uptr to, x86IntRegType from ) -{ - RexR(0,from); - write8( 0x20 ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* and m8 to r8 */ -emitterT void AND8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x22 ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4)); -} - /* not r32 */ emitterT void NOT32R( x86IntRegType from ) { @@ -1664,41 +1585,6 @@ emitterT void CALL32M( u32 to ) // misc instructions / //////////////////////////////////// -// cmp imm8 to r8 -emitterT void CMP8ItoR( x86IntRegType to, u8 from ) -{ - RexB(0,to); - if ( to == EAX ) - { - write8( 0x3C ); - } - else - { - write8( 0x80 ); - ModRM( 3, 7, to ); - } - write8( from ); -} - -// cmp m8 to r8 -emitterT void CMP8MtoR( x86IntRegType to, uptr from ) -{ - RexR(0,to); - write8( 0x3A ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// cmp imm8 to [r32] (byte ptr) -emitterT void CMP8I8toRm( x86IntRegType to, s8 from, s8 off=0 ) -{ - RexB(0,to); - write8( 0x80 ); - ModRM( (off != 0), 7, to ); - if( off != 0 ) write8(off); - write8(from); -} - /* test imm32 to r32 */ emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { @@ -1830,31 +1716,19 @@ emitterT void SETZ8R( x86IntRegType to ) { SET8R(0x94, to); } emitterT void SETE8R( x86IntRegType to ) { SET8R(0x94, to); } /* push imm32 */ -emitterT void PUSH32I( u32 from ) -{; -write8( 0x68 ); -write32( from ); -} +emitterT void PUSH32I( u32 from ) { PUSH( from ); } /* push r32 */ -emitterT void PUSH32R( x86IntRegType from ) { write8( 0x50 | from ); } +emitterT void PUSH32R( x86IntRegType from ) { PUSH( x86Register32( from ) ); } /* push m32 */ -emitterT void PUSH32M( u32 from ) +emitterT void PUSH32M( u32 from ) { - write8( 0xFF ); - ModRM( 0, 6, DISP32 ); - write32( MEMADDR(from, 4) ); + PUSH( ptr[from] ); } /* pop r32 */ -emitterT void POP32R( x86IntRegType from ) { write8( 0x58 | from ); } - -/* pushad */ -emitterT void PUSHA32( void ) { write8( 0x60 ); } - -/* popad */ -emitterT void POPA32( void ) { write8( 0x61 ); } +emitterT void POP32R( x86IntRegType from ) { POP( x86Register32( from ) ); } /* pushfd */ emitterT void PUSHFD( void ) { write8( 0x9C ); } @@ -1899,95 +1773,34 @@ emitterT void BSWAP32R( x86IntRegType to ) emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) { - RexRB(0,to,from); - write8(0x8d); - - if( (from&7) == ESP ) { - if( offset == 0 ) { - ModRM(1, to, from); - write8(0x24); - } - else if( is_s8(offset) ) { - ModRM(1, to, from); - write8(0x24); - write8(offset); - } - else { - ModRM(2, to, from); - write8(0x24); - write32(offset); - } - } - else { - if( offset == 0 && from != EBP && from!=ESP ) { - ModRM(0, to, from); - } - else if( is_s8(offset) ) { - ModRM(1, to, from); - write8(offset); - } - else { - ModRM(2, to, from); - write32(offset); - } - } -} - -// to = from + offset -emitterT void LEA16RtoR(x86IntRegType to, x86IntRegType from, s16 offset) -{ - write8(0x66); - LEA32RtoR(to, from, offset); -} - -// to = from0 + from1 -emitterT void LEA16RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) -{ - write8(0x66); - LEA32RRtoR(to, from0, from1); + LEA32( x86Register32( to ), ptr[x86IndexReg(from)+offset] ); } emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) { - RexRXB(0, to, from0, from1); - write8(0x8d); - - if( (from1&7) == EBP ) { - ModRM(1, to, 4); - ModRM(0, from0, from1); - write8(0); - } - else { - ModRM(0, to, 4); - ModRM(0, from0, from1); - } -} - -// to = from << scale (max is 3) -emitterT void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) -{ - write8(0x66); - LEA32RStoR(to, from, scale); + LEA32( x86Register32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] ); } // Don't inline recursive functions emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) { - if( to == from ) { - SHL32ItoR(to, scale); - return; - } - - if( from != ESP ) { - RexRXB(0,to,from,0); - write8(0x8d); - ModRM(0, to, 4); - ModRM(scale, from, 5); - write32(0); - } - else { - assert( to != ESP ); - MOV32RtoR(to, from); - LEA32RStoR(to, to, scale); - } + LEA32( x86Register32( to ), ptr[x86IndexReg(from)*(1<= -1 && Id < 8 ); } bool IsEmpty() const { return Id == -1; } - bool operator==( const x86Register& src ) const { return Id == src.Id; } - bool operator!=( const x86Register& src ) const { return Id != src.Id; } + bool operator==( const x86Register32& src ) const { return Id == src.Id; } + bool operator!=( const x86Register32& src ) const { return Id != src.Id; } - x86ModRm operator+( const x86Register& right ) const; + x86ModRm operator+( const x86Register32& right ) const; x86ModRm operator+( const x86ModRm& right ) const; + x86ModRm operator+( s32 right ) const; + + x86ModRm operator*( u32 factor ) const; - x86Register& operator=( const x86Register& src ) + x86Register32& operator=( const x86Register32& src ) { Id = src.Id; return *this; } }; - + ////////////////////////////////////////////////////////////////////////////////////////// // Similar to x86Register, but without the ability to add/combine them with ModSib. // @@ -198,7 +201,7 @@ namespace x86Emitter x86Register16( const x86Register16& src ) : Id( src.Id ) {} x86Register16() : Id( -1 ) {} - explicit x86Register16( int regId ) : Id( regId ) { } + explicit x86Register16( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id == -1; } @@ -224,7 +227,7 @@ namespace x86Emitter x86Register8( const x86Register16& src ) : Id( src.Id ) {} x86Register8() : Id( -1 ) {} - explicit x86Register8( int regId ) : Id( regId ) { } + explicit x86Register8( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id == -1; } @@ -237,19 +240,22 @@ namespace x86Emitter return *this; } }; + + // Use 32 bit registers as out index register (for ModSig memory address calculations) + typedef x86Register32 x86IndexReg; ////////////////////////////////////////////////////////////////////////////////////////// // class x86ModRm { public: - x86Register Base; // base register (no scale) - x86Register Index; // index reg gets multiplied by the scale + x86IndexReg Base; // base register (no scale) + x86IndexReg Index; // index reg gets multiplied by the scale int Factor; // scale applied to the index register, in factor form (not a shift!) s32 Displacement; // address displacement public: - x86ModRm( x86Register base, x86Register index, int factor=1, s32 displacement=0 ) : + x86ModRm( x86IndexReg base, x86IndexReg index, int factor=1, s32 displacement=0 ) : Base( base ), Index( index ), Factor( factor ), @@ -257,7 +263,7 @@ namespace x86Emitter { } - explicit x86ModRm( x86Register base, int displacement=0 ) : + explicit x86ModRm( x86IndexReg base, int displacement=0 ) : Base( base ), Index(), Factor(0), @@ -273,11 +279,11 @@ namespace x86Emitter { } - static x86ModRm FromIndexReg( x86Register index, int scale=0, s32 displacement=0 ); + static x86ModRm FromIndexReg( x86IndexReg index, int scale=0, s32 displacement=0 ); public: bool IsByteSizeDisp() const { return is_s8( Displacement ); } - x86Register GetEitherReg() const; + x86IndexReg GetEitherReg() const; x86ModRm& Add( s32 imm ) { @@ -285,10 +291,10 @@ namespace x86Emitter return *this; } - x86ModRm& Add( const x86Register& src ); + x86ModRm& Add( const x86IndexReg& src ); x86ModRm& Add( const x86ModRm& src ); - x86ModRm operator+( const x86Register& right ) const { return x86ModRm( *this ).Add( right ); } + x86ModRm operator+( const x86IndexReg& right ) const { return x86ModRm( *this ).Add( right ); } x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); } x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); } x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); } @@ -306,18 +312,27 @@ namespace x86Emitter class ModSib { public: - x86Register Base; // base register (no scale) - x86Register Index; // index reg gets multiplied by the scale + x86IndexReg Base; // base register (no scale) + x86IndexReg Index; // index reg gets multiplied by the scale int Scale; // scale applied to the index register, in scale/shift form s32 Displacement; // offset applied to the Base/Index registers. - ModSib( const x86ModRm& src ); - ModSib( x86Register base, x86Register index, int scale=0, s32 displacement=0 ); - ModSib( s32 disp ); + explicit ModSib( const x86ModRm& src ); + explicit ModSib( s32 disp ); + ModSib( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); - x86Register GetEitherReg() const; + x86IndexReg GetEitherReg() const; bool IsByteSizeDisp() const { return is_s8( Displacement ); } + ModSib& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + ModSib operator+( const s32 imm ) const { return ModSib( *this ).Add( imm ); } + ModSib operator-( const s32 imm ) const { return ModSib( *this ).Add( -imm ); } + protected: void Reduce(); }; @@ -327,9 +342,13 @@ namespace x86Emitter // struct x86IndexerType { - ModSib operator[]( x86Register src ) const + // passthrough instruction, allows ModSib to pass silently through ptr translation + // without doing anything and without compiler error. + const ModSib& operator[]( const ModSib& src ) const { return src; } + + ModSib operator[]( x86IndexReg src ) const { - return ModSib( src, x86Register::Empty ); + return ModSib( src, x86IndexReg::Empty ); } ModSib operator[]( const x86ModRm& src ) const @@ -349,14 +368,32 @@ namespace x86Emitter }; // ------------------------------------------------------------------------ - extern const x86Register eax; - extern const x86Register ebx; - extern const x86Register ecx; - extern const x86Register edx; - extern const x86Register esi; - extern const x86Register edi; - extern const x86Register ebp; - extern const x86Register esp; - extern x86IndexerType ptr; + + extern const x86Register32 eax; + extern const x86Register32 ebx; + extern const x86Register32 ecx; + extern const x86Register32 edx; + extern const x86Register32 esi; + extern const x86Register32 edi; + extern const x86Register32 ebp; + extern const x86Register32 esp; + + extern const x86Register16 ax; + extern const x86Register16 bx; + extern const x86Register16 cx; + extern const x86Register16 dx; + extern const x86Register16 si; + extern const x86Register16 di; + extern const x86Register16 bp; + extern const x86Register16 sp; + + extern const x86Register8 al; + extern const x86Register8 cl; + extern const x86Register8 dl; + extern const x86Register8 bl; + extern const x86Register8 ah; + extern const x86Register8 ch; + extern const x86Register8 dh; + extern const x86Register8 bh; } \ No newline at end of file From e3a87fecd926fa910f9a84fe9268b5dd97030e28 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 8 Apr 2009 10:07:14 +0000 Subject: [PATCH 015/143] microVU: -fixed rm instructions to work with Jake's emitter changes -implemented the case where upper and lower instructions write to same reg at once (the lower instruction's result is discarded) -implemented more first pass analyzing stuff -fixed various bugs... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@924 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 2 +- pcsx2/x86/microVU.h | 6 +-- pcsx2/x86/microVU_Alloc.h | 31 +++------------- pcsx2/x86/microVU_Alloc.inl | 14 +++---- pcsx2/x86/microVU_Analyze.inl | 33 +++++++++++++++++ pcsx2/x86/microVU_Compile.inl | 48 ++++++++++++++++++++---- pcsx2/x86/microVU_Lower.inl | 70 +++++++++++++++++------------------ pcsx2/x86/microVU_Misc.h | 9 +++-- pcsx2/x86/microVU_Misc.inl | 54 +++++++++++++-------------- 9 files changed, 157 insertions(+), 110 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 39e9697f38..0e537472e6 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -41,7 +41,7 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->regs = vuRegsPtr; mVU->index = vuIndex; mVU->microSize = (vuIndex ? 0x4000 : 0x1000); - mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 8; + mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4; mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000); mVU->cache = NULL; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index a95251ffc0..049520d255 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -91,9 +91,9 @@ public: template struct microProgram { - u32 data[progSize]; + u32 data[progSize/4]; u32 used; // Number of times its been used - microBlockManager* block[progSize / 2]; + microBlockManager* block[progSize/8]; microAllocInfo allocInfo; }; @@ -116,7 +116,7 @@ struct microVU { u32 cacheAddr; // VU Cache Start Address static const u32 cacheSize = 0x500000; // VU Cache Size - microProgManager<0x1000> prog; // Micro Program Data + microProgManager<0x4000> prog; // Micro Program Data VURegs* regs; // VU Regs Struct u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index f039c0a980..e277f12723 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -33,6 +33,8 @@ struct microRegInfo { u8 VI[32]; u8 q; u8 p; + u8 r; + u8 xgkick; }; struct microTempRegInfo { @@ -42,6 +44,8 @@ struct microTempRegInfo { u8 VIreg; // Index of the VI reg u8 q; // Holds cycle info for Q reg u8 p; // Holds cycle info for P reg + u8 r; // Holds cycle info for R reg (Will never cause stalls, but useful to know if R is modified) + u8 xgkick; // Holds the cycle info for XGkick }; template @@ -49,34 +53,9 @@ struct microAllocInfo { microRegInfo regs; // Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR) - u8 divFlag; // 0 = Transfer DS/IS flags normally, 1 = Clear DS/IS Flags, > 1 = set DS/IS flags to bit 2::1 of divFlag - u8 divFlagTimer; // Used to ensure divFlag's contents are merged at the appropriate time. u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block - u32 info[pSize]; // bit 00 = Lower Instruction is NOP - // bit 01 - // bit 02 - // bit 03 - // bit 04 - // bit 05 = Write to Q1 or Q2? - // bit 06 = Read Q1 or Q2? - // bit 07 = Read/Write to P1 or P2? - // bit 08 = Update Mac Flags? - // bit 09 = Update Status Flags? - // bit 10 = Used with bit 11 to make a 2-bit key for mac flag instance - // bit 11 - // bit 12 = Used with bit 13 to make a 2-bit key for status flag instance - // bit 13 - // bit 14 = Used with bit 15 to make a 2-bit key for clip flag instance - // bit 15 - // bit 16 = Used with bit 17 to make a 2-bit key for mac flag instance - // bit 17 - // bit 18 = Used with bit 19 to make a 2-bit key for status flag instance - // bit 19 - // bit 20 = Used with bit 21 to make a 2-bit key for clip flag instance - // bit 21 - // bit 22 = Read VI(Fs) from backup memory? - // bit 23 = Read VI(Ft) from backup memory? + u32 info[pSize/8]; // Info for Instructions in current block }; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index ca6cee9e09..3316d7360c 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -201,11 +201,11 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) { // FMAC6 - Normal FMAC Opcodes (I Reg) //------------------------------------------------------------------ -#define getIreg(reg) { \ +#define getIreg(reg, modXYZW) { \ MOV32ItoR(gprT1, mVU->iReg); \ SSE2_MOVD_R_to_XMM(reg, gprT1); \ if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 8); \ - if (!_XYZW_SS) { mVUunpack_xyzw(reg, reg, 0); } \ + if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \ } microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) { @@ -213,7 +213,7 @@ microVUt(void) mVUallocFMAC6a(int& Fd, int& Fs, int& Ft) { Fs = xmmFs; Ft = xmmFt; Fd = xmmFs; - getIreg(Ft); + getIreg(Ft, 1); getReg6(Fs, _Fs_); } @@ -230,7 +230,7 @@ microVUt(void) mVUallocFMAC7a(int& ACC, int& Fs, int& Ft) { ACC = xmmACC; Fs = (_X_Y_Z_W == 15) ? xmmACC : xmmFs; Ft = xmmFt; - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -374,7 +374,7 @@ microVUt(void) mVUallocFMAC12a(int& Fd, int& ACC, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmFs; ACC = xmmACC; - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -395,7 +395,7 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) { Fd = xmmT1; ACC = xmmT1; SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -480,7 +480,7 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) { ACCw = xmmACC; ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1; SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); - getIreg(Ft); + getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index a493da8968..dd5918cdf0 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -143,6 +143,25 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { analyzePreg(xCycles); } +//------------------------------------------------------------------ +// R*** - R Reg Opcodes +//------------------------------------------------------------------ + +#define analyzeRreg() { mVUregsTemp.r = 1; } + +microVUt(void) mVUanalyzeR1(int Fs, int Fsf) { + microVU* mVU = mVUx; + analyzeReg5(Fs, Fsf); + analyzeRreg(); +} + +microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { + microVU* mVU = mVUx; + if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); return; } + analyzeReg2(Ft); + analyzeRreg(); +} + //------------------------------------------------------------------ // Sflag - Status Flag Opcodes //------------------------------------------------------------------ @@ -157,4 +176,18 @@ microVUt(void) mVUanalyzeSflag(int It) { analyzeVIreg2(It, 1); } +//------------------------------------------------------------------ +// XGkick +//------------------------------------------------------------------ + +#define analyzeXGkick1() { mVUstall = aMax(mVUstall, mVUregs.xgkick); } +#define analyzeXGkick2(x) { mVUregsTemp.xgkick = x; } + +microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { + microVU* mVU = mVUx; + analyzeVIreg1(Fs); + analyzeXGkick1(); + analyzeXGkick2(xCycles); +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 686e4a6563..554cf2612b 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -29,19 +29,51 @@ } \ } -#define curI mVUcurProg.data[iPC] -#define setCode() { mVU->code = curI; } -#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } -#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } +#define curI mVUcurProg.data[iPC] +#define setCode() { mVU->code = curI; } +#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } +#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } +#define incCycles(x) { mVUincCycles(x); } +#define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } + +microVUt(void) mVUincCycles(int x) { + mVUcycles += x; + for (int z = 31; z > 0; z--) { + calcCycles(mVUregs.VF[z].x, x); + calcCycles(mVUregs.VF[z].y, x); + calcCycles(mVUregs.VF[z].z, x); + calcCycles(mVUregs.VF[z].w, x); + } + for (int z = 16; z > 0; z--) { + calcCycles(mVUregs.VI[z], x); + } + if (mVUregs.q) { + calcCycles(mVUregs.q, x); + if (!mVUregs.q) {} // Do Status Flag Merging Stuff? + } + calcCycles(mVUregs.p, x); + calcCycles(mVUregs.r, x); + calcCycles(mVUregs.xgkick, x); +} microVUt(void) mVUsetCycles() { microVU* mVU = mVUx; incCycles(mVUstall); + if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg + mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP + //mVUregsTemp.VF[1].reg = mVUregsTemp.VF[0]; // Just use cycles from upper Op (incorrect?) + mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector (correct?) + mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y); + mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z); + mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w); + } mVUregs.VF[mVUregsTemp.VFreg[0]].reg = mVUregsTemp.VF[0].reg; - mVUregs.VF[mVUregsTemp.VFreg[1]].reg =(mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1]) ? (aMax(mVUregsTemp.VF[0].reg, mVUregsTemp.VF[1].reg)) : (mVUregsTemp.VF[1].reg); + mVUregs.VF[mVUregsTemp.VFreg[1]].reg = mVUregsTemp.VF[1].reg; mVUregs.VI[mVUregsTemp.VIreg] = mVUregsTemp.VI; mVUregs.q = mVUregsTemp.q; mVUregs.p = mVUregsTemp.p; + mVUregs.r = mVUregsTemp.r; + mVUregs.xgkick = mVUregsTemp.xgkick; } microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { @@ -70,14 +102,15 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, else if (branch == 1) { branch = 2; } if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } incPC(1); + incCycles(1); } // Second Pass - iPC = startPC; + iPC = mVUstartPC; setCode(); for (bool x = 1; x; ) { // - // ToDo: status/mac flag stuff + // ToDo: status/mac flag stuff? // if (isEOB) { x = 0; } else if (isBranch) { mVUopU(); incPC(2); } @@ -85,6 +118,7 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, mVUopU(); if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } else { incPC(1); mVUopL(); } + if (!isBdelay) { incPC(1); } else { incPC(-2); // Go back to Branch Opcode diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index a7e8438c24..5ac22e06e7 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -648,18 +648,9 @@ microVUf(void) mVU_ISUBIU() { } //------------------------------------------------------------------ -// MOVE/MFIR/MFP/MTIR/MR32 +// MFIR/MFP/MOVE/MR32/MTIR //------------------------------------------------------------------ -microVUf(void) mVU_MOVE() { - microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } - else { - mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); - mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); - } -} - microVUf(void) mVU_MFIR() { microVU* mVU = mVUx; if (!recPass) { /*If (!_Ft_) nop();*/ } @@ -681,12 +672,12 @@ microVUf(void) mVU_MFP() { } } -microVUf(void) mVU_MTIR() { +microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } else { - MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); - mVUallocVIb(gprT1, _Ft_); + mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } } @@ -700,6 +691,15 @@ microVUf(void) mVU_MR32() { } } +microVUf(void) mVU_MTIR() { + microVU* mVU = mVUx; + if (!recPass) {} + else { + MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); + mVUallocVIb(gprT1, _Ft_); + } +} + //------------------------------------------------------------------ // ILW/ILWR //------------------------------------------------------------------ @@ -716,7 +716,7 @@ microVUf(void) mVU_ILW() { mVUallocVIa(gprT1, _Fs_); ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); - MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS); if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); mVUallocVIb(gprT1, _Ft_); } @@ -728,13 +728,13 @@ microVUf(void) mVU_ILWR() { if (!recPass) { /*If (!_Ft_) nop();*/ } else { if (!_Fs_) { - MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + offsetSS ); + MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS); mVUallocVIb(gprT1, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); mVUaddrFix(gprT1); - MOV32RmSOffsettoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS, 0); // ToDo: check if this works. + MOV32RmtoR(gprT1, gprT1, (uptr)mVU->regs->Mem + offsetSS); if (isMMX(_Ft_)) AND32ItoR(gprT1, 0xffff); mVUallocVIb(gprT1, _Ft_); } @@ -762,10 +762,10 @@ microVUf(void) mVU_ISW() { mVUallocVIa(gprT2, _Ft_); ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); - if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); - if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); - if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); - if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12); } } } @@ -785,10 +785,10 @@ microVUf(void) mVU_ISWR() { mVUallocVIa(gprT1, _Fs_); mVUallocVIa(gprT2, _Ft_); mVUaddrFix(gprT1); - if (_X) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem); - if (_Y) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+4); - if (_Z) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+8); - if (_W) MOV32RtoRmOffset(gprT1, gprT2, (uptr)mVU->regs->Mem+12); + if (_X) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem); + if (_Y) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+4); + if (_Z) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+8); + if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12); } } } @@ -819,7 +819,7 @@ microVUf(void) mVU_LQD() { microVU* mVU = mVUx; if (!recPass) {} else { - if (!_Fs_ && _Ft_) { + if (!_Fs_ && _Ft_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } @@ -827,7 +827,7 @@ microVUf(void) mVU_LQD() { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, 1); mVUallocVIb(gprT1, _Fs_); // ToDo: Backup to memory check. - if (_Ft_) { + if (_Ft_ && !noWriteVF) { mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -840,13 +840,13 @@ microVUf(void) mVU_LQI() { microVU* mVU = mVUx; if (!recPass) {} else { - if (!_Fs_ && _Ft_) { + if (!_Fs_ && _Ft_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } else { mVUallocVIa((_Ft_) ? gprT1 : gprT2, _Fs_); - if (_Ft_) { + if (_Ft_ && !noWriteVF) { MOV32RtoR(gprT2, gprT1); mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); @@ -925,7 +925,7 @@ microVUf(void) mVU_SQI() { microVUf(void) mVU_RINIT() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } else { if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprR, _Fs_, _Fsf_); @@ -938,7 +938,7 @@ microVUf(void) mVU_RINIT() { microVUt(void) mVU_RGET_() { microVU* mVU = mVUx; - if (_Ft_) { + if (!noWriteVF) { if (_X) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[0], gprR); if (_Y) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[1], gprR); if (_Z) MOV32RtoM((uptr)&mVU->regs->VF[_Ft_].UL[2], gprR); @@ -948,13 +948,13 @@ microVUt(void) mVU_RGET_() { microVUf(void) mVU_RGET() { microVU* mVU = mVUx; - if (!recPass) { /*if (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeR2(_Ft_, 1); } else { mVU_RGET_(); } } microVUf(void) mVU_RNEXT() { microVU* mVU = mVUx; - if (!recPass) { /*if (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeR2(_Ft_, 0); } else { // algorithm from www.project-fao.org MOV32RtoR(gprT1, gprR); @@ -976,7 +976,7 @@ microVUf(void) mVU_RNEXT() { microVUf(void) mVU_RXOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } else { if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprT1, _Fs_, _Fsf_); @@ -1039,7 +1039,7 @@ void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); } microVUf(void) mVU_XGKICK() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeXGkick(_Fs_, 4); } else { mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 90232a5088..dd52f1b489 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -152,7 +152,6 @@ declareAllVariables #define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC #define xPC ((iPC / 2) * 8) -#define incCycles(x) { mVUcycles += x; } #define _isNOP (1<<0) // Skip Lower Instruction #define _isBranch (1<<1) // Cur Instruction is a Branch @@ -168,12 +167,13 @@ declareAllVariables #define _doStatus (1<<9) #define _fmInstance (3<<10) #define _fsInstance (3<<12) -#define _fcInstance (3<<14) -#define _fpmInstance (3<<10) #define _fpsInstance (3<<12) +#define _fcInstance (3<<14) +#define _fpcInstance (3<<14) #define _fvmInstance (3<<16) #define _fvsInstance (3<<18) -#define _fvcInstance (3<<14) +#define _fvcInstance (3<<20) +#define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -195,6 +195,7 @@ declareAllVariables #define fvmInstance ((mVUinfo >> 16) & 3) #define fvsInstance ((mVUinfo >> 18) & 3) #define fvcInstance ((mVUinfo >> 20) & 3) +#define noWriteVF (mVUinfo & (1<<21)) //#define getFs (mVUinfo & (1<<13)) //#define getFt (mVUinfo & (1<<14)) diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 6977b765e1..208a3852c7 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -85,11 +85,11 @@ microVUx(void) mVUloadReg(int reg, uptr offset, int xyzw) { microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { switch( xyzw ) { - case 8: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset); break; // X - case 4: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+4); break; // Y - case 2: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+8); break; // Z - case 1: SSE_MOVSS_RmOffset_to_XMM(reg, gprReg, offset+12); break; // W - default: SSE_MOVAPSRmtoROffset(reg, gprReg, offset); break; + case 8: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset); break; // X + case 4: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+4); break; // Y + case 2: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+8); break; // Z + case 1: SSE_MOVSS_Rm_to_XMM(reg, gprReg, offset+12); break; // W + default: SSE_MOVAPSRmtoR(reg, gprReg, offset); break; } } @@ -142,44 +142,44 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) { switch ( xyzw ) { case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // YW case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); - SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); + SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4); break; // YZ case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+4, xmmT1); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // YZW case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // XW case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); break; //XZ - case 11: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); + case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; //XZW case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW - SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset, xmmT1); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, xmmT1); + SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); break; // XYW case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); - SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, xmmT1); + SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); break; // XYZ - case 8: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset, reg); break; // X - case 4: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+4, reg); break; // Y - case 2: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // Z - case 1: SSE_MOVSS_XMM_to_RmOffset(gprReg, offset+12, reg); break; // W - case 12: SSE_MOVLPS_XMM_to_RmOffset(gprReg, offset, reg); break; // XY - case 3: SSE_MOVHPS_XMM_to_RmOffset(gprReg, offset+8, reg); break; // ZW - default: SSE_MOVAPSRtoRmOffset(gprReg, offset, reg); break; // XYZW + case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X + case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y + case 2: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; // Z + case 1: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // W + case 12: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); break; // XY + case 3: SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; // ZW + default: SSE_MOVAPSRtoRm(gprReg, reg, offset); break; // XYZW } } From 08c6f68abaa47070a272a3e840b6ebf29e3fc303 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 8 Apr 2009 13:34:51 +0000 Subject: [PATCH 016/143] Fix Linux again (and again and again and again...) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@925 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Common.h | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 5 ++++- pcsx2/x86/ix86/Makefile.am | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pcsx2/Common.h b/pcsx2/Common.h index 46135a75ac..4daeee5663 100644 --- a/pcsx2/Common.h +++ b/pcsx2/Common.h @@ -31,8 +31,8 @@ #include "System.h" -#include "SaveState.h" #include "Plugins.h" +#include "SaveState.h" #include "DebugTools/Debug.h" #include "Memory.h" diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index cbe8ca8130..60fa164cc6 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -131,8 +131,11 @@ static void iDumpBlock( int startpc, u8 * ptr ) Console::Status( "dump1 %x:%x, %x", params startpc, pc, cpuRegs.cycle ); Path::CreateDirectory( "dumps" ); +#ifndef __LINUX__ ssprintf( filename, "dumps\\R5900dump%.8X.txt", startpc ); - +#else + ssprintf( filename, "dumps/R5900dump%.8X.txt", startpc ); +#endif fflush( stdout ); // f = fopen( "dump1", "wb" ); // fwrite( ptr, 1, (u32)x86Ptr - (u32)ptr, f ); diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index 880d6f18e3..30e16e264a 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -1,4 +1,5 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty noinst_LIBRARIES = libix86.a -libix86_a_SOURCES = ix86.cpp ix86.inl ix86_3dnow.inl ix86.h ix86_fpu.inl ix86_mmx.inl ix86_sse.inl ix86_tools.cpp ix86_cpudetect.cpp ix86_macros.h ix86_group1.inl \ No newline at end of file +libix86_a_SOURCES = ix86_mmx.cpp ix86_tools.cpp ix86.cpp ix86_3dnow.cpp ix86_fpu.cpp ix86_legacy.cpp ix86_sse.cpp ix86_cpudetect.cpp ix86_group1.cpp \ +ix86_internal.h ix86.h ix86_macros.h ix86_sse_helpers.h ix86_types.h From f4e9178c06009f03bc00e9f0dd44f4c358b027f1 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Wed, 8 Apr 2009 17:15:51 +0000 Subject: [PATCH 017/143] backing up some changes git-svn-id: http://pcsx2.googlecode.com/svn/trunk@926 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/R3000A.cpp | 2 +- pcsx2/R3000A.h | 2 +- pcsx2/R5900.cpp | 14 ++++++-------- pcsx2/R5900.h | 8 ++++---- pcsx2/x86/iR3000A.cpp | 2 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 2 +- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/pcsx2/R3000A.cpp b/pcsx2/R3000A.cpp index 50931387a3..c821733549 100644 --- a/pcsx2/R3000A.cpp +++ b/pcsx2/R3000A.cpp @@ -224,7 +224,7 @@ static __forceinline void _psxTestInterrupts() } } -void psxBranchTest() +__releaseinline void psxBranchTest() { if( psxTestCycle( psxNextsCounter, psxNextCounter ) ) { diff --git a/pcsx2/R3000A.h b/pcsx2/R3000A.h index 7ab13663e8..9a93b851c1 100644 --- a/pcsx2/R3000A.h +++ b/pcsx2/R3000A.h @@ -200,7 +200,7 @@ extern R3000Acpu psxRec; void psxReset(); void psxShutdown(); void psxException(u32 code, u32 step); -void psxBranchTest(); +extern void psxBranchTest(); void psxExecuteBios(); void psxMemReset(); diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index b19d06dd0b..35e7dda1fe 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -106,7 +106,7 @@ void cpuShutdown() disR5900FreeSyms(); } -__releaseinline void __fastcall cpuException(u32 code, u32 bd) +__releaseinline void cpuException(u32 code, u32 bd) { cpuRegs.branch = 0; // Tells the interpreter that an exception occurred during a branch. bool errLevel2, checkStatus; @@ -244,7 +244,7 @@ void cpuTestMissingHwInts() { } // sets a branch test to occur some time from an arbitrary starting point. -__forceinline int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ) +__forceinline void cpuSetNextBranch( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't blow up // if startCycle is greater than our next branch cycle. @@ -252,20 +252,18 @@ __forceinline int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ) if( (int)(g_nextBranchCycle - startCycle) > delta ) { g_nextBranchCycle = startCycle + delta; - return 1; } - return 0; } // sets a branch to occur some time from the current cycle -__forceinline int __fastcall cpuSetNextBranchDelta( s32 delta ) +__forceinline void cpuSetNextBranchDelta( s32 delta ) { - return cpuSetNextBranch( cpuRegs.cycle, delta ); + cpuSetNextBranch( cpuRegs.cycle, delta ); } // tests the cpu cycle agaisnt the given start and delta values. // Returns true if the delta time has passed. -__forceinline int __fastcall cpuTestCycle( u32 startCycle, s32 delta ) +__forceinline int cpuTestCycle( u32 startCycle, s32 delta ) { // typecast the conditional to signed so that things don't explode // if the startCycle is ahead of our current cpu cycle. @@ -279,7 +277,7 @@ __forceinline void cpuSetBranch() g_nextBranchCycle = cpuRegs.cycle; } -void cpuClearInt( uint i ) +__forceinline void cpuClearInt( uint i ) { jASSUME( i < 32 ); cpuRegs.interrupt &= ~(1 << i); diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index b2dd4db46f..cb482ee6bf 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -257,14 +257,14 @@ extern void cpuInit(); extern void cpuReset(); // can throw Exception::FileNotFound. extern void cpuShutdown(); extern void cpuExecuteBios(); -extern void __fastcall cpuException(u32 code, u32 bd); +extern void cpuException(u32 code, u32 bd); extern void cpuTlbMissR(u32 addr, u32 bd); extern void cpuTlbMissW(u32 addr, u32 bd); extern void cpuTestHwInts(); -extern int __fastcall cpuSetNextBranch( u32 startCycle, s32 delta ); -extern int __fastcall cpuSetNextBranchDelta( s32 delta ); -extern int __fastcall cpuTestCycle( u32 startCycle, s32 delta ); +extern void cpuSetNextBranch( u32 startCycle, s32 delta ); +extern void cpuSetNextBranchDelta( s32 delta ); +extern int cpuTestCycle( u32 startCycle, s32 delta ); extern void cpuSetBranch(); extern bool _cpuBranchTest_Shared(); // for internal use by the Dynarecs and Ints inside R5900: diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index a506bdc4a8..38e2021609 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -798,7 +798,7 @@ void psxSetBranchImm( u32 imm ) // So for now these are new settings that work. // (rama) -static u32 psxScaleBlockCycles() +static __forceinline u32 psxScaleBlockCycles() { return s_psxBlockCycles * (CHECK_IOP_CYCLERATE ? 2 : 1); } diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 60fa164cc6..2dd76b7778 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -683,7 +683,7 @@ static void __naked DispatcherReg() } } -__forceinline void recExecute() +void recExecute() { // Optimization note : Compared pushad against manually pushing the regs one-by-one. // Manually pushing is faster, especially on Core2's and such. :) From 57f9c2bc64ff054fc703641836e958b331ce2f76 Mon Sep 17 00:00:00 2001 From: Nneeve Date: Wed, 8 Apr 2009 18:19:48 +0000 Subject: [PATCH 018/143] Disabled a VU recompiler option that caused some SPS in Ratchet and Clank and didn't actually affect speed. Modified VU stalling logic of MR32 and MTIR instructions and modified FDIV stalling. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@927 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VUops.cpp | 14 ++++++++++++-- pcsx2/x86/iMMI.cpp | 11 ++--------- pcsx2/x86/iVUmicro.cpp | 3 ++- pcsx2/x86/iVUzerorec.cpp | 8 ++++---- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index e043760350..299803d2d6 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2508,13 +2508,23 @@ void _vuRegsMTIR(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->pipe = VUPIPE_FMAC; VUregsn->VFwrite = 0; VUregsn->VFread0 = _Fs_; - VUregsn->VFr0xyzw= _XYZW; + VUregsn->VFr0xyzw= 1 << (3-_Fsf_); VUregsn->VFread1 = 0; VUregsn->VIwrite = 1 << _Ft_; VUregsn->VIread = GET_VF0_FLAG(_Fs_); } -VUREGS_FTFS(MR32); +void _vuRegsMR32(VURegs * VU, _VURegsNum *VUregsn) { + VUregsn->pipe = VUPIPE_FMAC; + VUregsn->VFwrite = _Ft_; + VUregsn->VFwxyzw = _XYZW; + VUregsn->VFread0 = _Fs_; + VUregsn->VFr0xyzw= (_XYZW >> 1) | ((_XYZW << 3) & 0xf); //rotate + VUregsn->VFread1 = 0; + VUregsn->VFr1xyzw = 0xff; + VUregsn->VIwrite = 0; + VUregsn->VIread = (_Ft_ ? GET_VF0_FLAG(_Fs_) : 0); +} void _vuRegsLQ(VURegs * VU, _VURegsNum *VUregsn) { VUregsn->pipe = VUPIPE_FMAC; diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index 8dabe0b5c8..e031a7a6c6 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -2676,9 +2676,6 @@ CPU_SSE_XMMCACHE_END recCall( Interp::PHMADH, _Rd_ ); } -//////////////////////////////////////////////////// -//upper word of each doubleword in LO and HI is undocumented/undefined -//contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result) void recPMSUBH() { CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) @@ -2740,12 +2737,8 @@ CPU_SSE_XMMCACHE_END } //////////////////////////////////////////////////// - -// rs = ... a1 a0 -// rt = ... b1 b0 -// rd = ... a1*b1 - a0*b0 -// hi = ... -// lo = ... (undefined by doc)NOT(a1*b1), a1*b1 - a0*b0 +//upper word of each doubleword in LO and HI is undocumented/undefined +//it contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result) void recPHMSBH() { CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI) diff --git a/pcsx2/x86/iVUmicro.cpp b/pcsx2/x86/iVUmicro.cpp index 2422e5ad10..9335151798 100644 --- a/pcsx2/x86/iVUmicro.cpp +++ b/pcsx2/x86/iVUmicro.cpp @@ -280,6 +280,7 @@ void _recvuIALUTestStall(VURegs * VU, int reg) { VU->ialu[i].enable = 0; vucycle+= cycle; + _recvuTestPipes(VU, true); } void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) { @@ -387,7 +388,7 @@ void _recvuFlushFDIV(VURegs * VU) { if (VU->fdiv.enable == 0) return; - cycle = VU->fdiv.Cycle - (vucycle - VU->fdiv.sCycle); + cycle = VU->fdiv.Cycle + 1 - (vucycle - VU->fdiv.sCycle); //VU->fdiv.Cycle contains the latency minus 1 (6 or 12) // Console::WriteLn("waiting FDIV pipe %d", params cycle); VU->fdiv.enable = 0; vucycle+= cycle; diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 78ab51b4f6..0f454e638e 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -58,7 +58,7 @@ extern void iDumpVU1Registers(); #define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on... #ifndef _DEBUG -#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster) +//#define SUPERVU_INTERCACHING // registers won't be flushed at block boundaries (faster) (nothing noticable speed-wise, causes SPS in Ratchet and clank (Nneeve) ) #endif #define SUPERVU_CHECKCONDITION 0 // has to be 0!! @@ -2060,9 +2060,9 @@ void VuBaseBlock::AssignVFRegs() _freeXMMreg(free1); _freeXMMreg(free2); } - else if( regs->VIwrite & (1<VIwrite & (1<VIwrite & (1<VIwrite & (1<VIread & (1< Date: Wed, 8 Apr 2009 21:19:50 +0000 Subject: [PATCH 019/143] Fixed Gradius V, had to destroy the templates arcum did a bit to get it to work without ape escape crashing (sorry mate lol. Took out my V3_# discovery, ape escape is getting spikey now, so ill just remove it. Also altered V2_# to work slightly different incase the packet starts on the Y vector, it now wont suffer underrunning (possible bad data) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@928 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/DebugTools/Debug.h | 2 + pcsx2/Vif.cpp | 108 ++++++++++++++------------------------- pcsx2/VifDma.cpp | 14 ++--- 3 files changed, 47 insertions(+), 77 deletions(-) diff --git a/pcsx2/DebugTools/Debug.h b/pcsx2/DebugTools/Debug.h index 1e42cea714..5abcd4f2d0 100644 --- a/pcsx2/DebugTools/Debug.h +++ b/pcsx2/DebugTools/Debug.h @@ -190,6 +190,8 @@ extern bool SrcLog_GPU( const char* fmt, ... ); #define MEMCARDS_LOG 0&& #endif +//#define VIFUNPACKDEBUG //enable unpack debugging output + #ifdef VIFUNPACKDEBUG #define VIFUNPACK_LOG VIF_LOG #else diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 3e6626391f..792cd4ee44 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -333,89 +333,57 @@ static __releaseinline void writeW(u32 &dest, u32 data) template static void _UNPACKpart(u32 offnum, u32 &x, T y) { - if (_vifRegs->offset == offnum) - { - switch (offnum) - { - case OFFSET_X: - writeX(x,y); - break; - case OFFSET_Y: - writeY(x,y); - break; - case OFFSET_Z: - writeZ(x,y); - break; - case OFFSET_W: - writeW(x,y); - break; - default: - break; - } - _vifRegs->offset++; - } -} -template -static void _UNPACKpart(u32 offnum, u32 &x, T y, int size) -{ - if(size == 0) return; - - if (_vifRegs->offset == offnum) + switch (offnum) { - switch (offnum) - { - case OFFSET_X: - writeX(x,y); - break; - case OFFSET_Y: - writeY(x,y); - break; - case OFFSET_Z: - writeZ(x,y); - break; - case OFFSET_W: - writeW(x,y); - break; - default: - break; - } - size--; - _vifRegs->offset++; + case OFFSET_X: + writeX(x,y); + break; + case OFFSET_Y: + writeY(x,y); + break; + case OFFSET_Z: + writeZ(x,y); + break; + case OFFSET_W: + writeW(x,y); + break; + default: + break; } + _vifRegs->offset++; + } template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data, size); - _UNPACKpart(OFFSET_Y, *dest++, *data, size); - _UNPACKpart(OFFSET_Z, *dest++, *data, size); - _UNPACKpart(OFFSET_W, *dest , *data, size); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest , *data); size--; } if (_vifRegs->offset == 4) _vifRegs->offset = 0; } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data--, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++); - _UNPACKpart(OFFSET_W, *dest , *data); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data); size--; } + if( _vifRegs->offset == OFFSET_Z )_UNPACKpart(OFFSET_Z, *dest++, *dest-2); + if( _vifRegs->offset == OFFSET_W )_UNPACKpart(OFFSET_W, *dest , *data); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data++, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++, size); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data++); size--; } //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) - if((_vif->qwcalign % 24) == 0) - _UNPACKpart(OFFSET_W, *dest, 0); - else - _UNPACKpart(OFFSET_W, *dest, *data); + //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate + if( _vifRegs->offset == OFFSET_W )_UNPACKpart(OFFSET_W, *dest, *data); if (_vifRegs->offset == 4) _vifRegs->offset = 0; } @@ -423,19 +391,19 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) template void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { - _UNPACKpart(OFFSET_X, *dest++, *data++, size); - _UNPACKpart(OFFSET_Y, *dest++, *data++, size); - _UNPACKpart(OFFSET_Z, *dest++, *data++, size); - _UNPACKpart(OFFSET_W, *dest , *data, size); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data++); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest , *data); size--; } if (_vifRegs->offset == 4) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - _UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3), size); - _UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2), size); - _UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7), size); - _UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8), size); + if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); size--; } + if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); size--; } if (_vifRegs->offset == 4) _vifRegs->offset = 0; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 32840541c0..cd7866e224 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -29,7 +29,7 @@ using namespace std; // for min / max -//#define VIFUNPACKDEBUG //enable unpack debugging output + #define gif ((DMACh*)&PS2MEM_HW[0xA000]) @@ -379,6 +379,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); + VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); #ifdef _DEBUG if (v->size != size) { @@ -422,6 +423,8 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); + if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) + VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); // SSE doesn't handle such small data if (v->size != (size >> 2)) ProcessMemSkip(size, unpackType, VIFdmanum); @@ -439,8 +442,9 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma unpacksize = 0; Console::WriteLn("Unpack align offset = 0"); } + VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); destinc = (4 - ft->qsize) + unpacksize; - vif->qwcalign += unpacksize * ft->dsize; + func(dest, (u32*)cdata, unpacksize); size -= unpacksize * ft->dsize; cdata += unpacksize * ft->dsize; @@ -482,7 +486,6 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma while ((size >= ft->gsize) && (vifRegs->num > 0)) { - vif->qwcalign += ft->gsize; func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -596,7 +599,6 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma while ((size >= ft->gsize) && (vifRegs->num > 0)) { - vif->qwcalign += ft->gsize; //Must do this before the transfer, else the confusing packets dont go right :P func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -651,7 +653,6 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ - vif->qwcalign += (size / ft->dsize) * ft->dsize; func(dest, (u32*)cdata, size / ft->dsize); size = 0; @@ -789,7 +790,6 @@ static __forceinline void vif0UNPACK(u32 *data) len = ((((32 >> vl) * (vn + 1)) * n) + 31) >> 5; } - vif0.qwcalign = 0; vif0.cl = 0; vif0.tag.cmd = vif0.cmd; vif0.tag.addr &= 0xfff; @@ -1519,7 +1519,7 @@ static __forceinline void vif1UNPACK(u32 *data) else vif1.tag.addr = vif1Regs->code & 0x3ff; - vif1.qwcalign = 0; + vif1Regs->offset = 0; vif1.cl = 0; vif1.tag.addr <<= 4; vif1.tag.cmd = vif1.cmd; From 76b52a97a1b82b2d9979246da10c5fc6628124f9 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Thu, 9 Apr 2009 02:02:49 +0000 Subject: [PATCH 020/143] LilyPad: Small/large motor defaults should work for most devices, when creating new effect bindings. Keyboard queue fixed up a bit, mainly to favor escape down when PCSX2 is dying. Fix for ignore bindings being swapped with the swap pad bindings buttons. Updated version number, thinking of releasing soon. No known bugs, not that much more to do. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@929 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Sio.cpp | 7 +-- plugins/LilyPad/Config.cpp | 55 ++++++++++++++++++++--- plugins/LilyPad/KeyboardQueue.cpp | 75 +++++++++++++++---------------- plugins/LilyPad/LilyPad.cpp | 22 +++++---- plugins/LilyPad/XInput.cpp | 9 ++-- 5 files changed, 105 insertions(+), 63 deletions(-) diff --git a/pcsx2/Sio.cpp b/pcsx2/Sio.cpp index b40fdbccbd..45c3274a2a 100644 --- a/pcsx2/Sio.cpp +++ b/pcsx2/Sio.cpp @@ -451,16 +451,16 @@ void SIO_CommandWrite(u8 value,int way) { break; case 0x21: // Set pad slot. - sio.mtapst = 0x21; + sio.mtapst = value; sio.bufcount = 6; // No idea why this is 6, saved from old code. break; case 0x22: // Set memcard slot. - sio.mtapst = 0x22; + sio.mtapst = value; sio.bufcount = 6; // No idea why this is 6, saved from old code. break; } - // Commented out values are from original code. Break multitap in bios. + // Commented out values are from original code. They break multitap in bios. sio.buf[sio.bufcount-1]=0;//'+'; sio.buf[sio.bufcount]=0;//'Z'; return; @@ -554,6 +554,7 @@ void InitializeSIO(u8 value) int port = sio.GetMultitapPort(); if (!IsMtapPresent(port)) { + // If "unplug" multitap mid game, set active slots to 0. sio.activePadSlot[port] = 0; sio.activeMemcardSlot[port] = 0; } diff --git a/plugins/LilyPad/Config.cpp b/plugins/LilyPad/Config.cpp index 39e9e30ad1..2e2050a573 100644 --- a/plugins/LilyPad/Config.cpp +++ b/plugins/LilyPad/Config.cpp @@ -1404,18 +1404,59 @@ INT_PTR CALLBACK DialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, LPARAM l if (i >= 0) { unsigned int index = (unsigned int)SendMessage(GetDlgItem(hWnd, IDC_FORCEFEEDBACK), CB_GETITEMDATA, i, 0); if (index < (unsigned int) dm->numDevices) { + Device *dev = dm->devices[index]; ForceFeedbackBinding *b; - int count = CreateEffectBinding(dm->devices[index], 0, port, slot, cmd-ID_BIG_MOTOR, &b); + int count = CreateEffectBinding(dev, 0, port, slot, cmd-ID_BIG_MOTOR, &b); if (b) { - for (int j=0; j<2 && j devices[index]->numFFAxes; j++) { - b->axes[j].force = BASE_SENSITIVITY; + int needSet = 1; + if (dev->api == XINPUT && dev->numFFAxes == 2) { + needSet = 0; + if (cmd == ID_BIG_MOTOR) { + b->axes[0].force = BASE_SENSITIVITY; + } + else { + b->axes[1].force = BASE_SENSITIVITY; + } + } + else if (dev->api == DI) { + int bigIndex=0, littleIndex=0; + int constantEffect = 0, squareEffect = 0; + int j; + for (j=0; jnumFFAxes; j++) { + // DI object instance. 0 is x-axis, 1 is y-axis. + int instance = (dev->ffAxes[j].id>>8)&0xFFFF; + if (instance == 0) { + bigIndex = j; + } + else if (instance == 1) { + littleIndex = j; + } + } + for (j=0; jnumFFEffectTypes; j++) { + if (!wcsicmp(L"13541C20-8E33-11D0-9AD0-00A0C9A06E35", dev->ffEffectTypes[j].effectID)) constantEffect = j; + if (!wcsicmp(L"13541C22-8E33-11D0-9AD0-00A0C9A06E35", dev->ffEffectTypes[j].effectID)) squareEffect = j; + } + needSet = 0; + if (cmd == ID_BIG_MOTOR) { + b->axes[bigIndex].force = BASE_SENSITIVITY; + b->axes[littleIndex].force = 1; + b->effectIndex = constantEffect; + } + else { + b->axes[bigIndex].force = 1; + b->axes[littleIndex].force = BASE_SENSITIVITY; + b->effectIndex = squareEffect; + } + } + if (needSet) { + for (int j=0; j<2 && j numFFAxes; j++) { + b->axes[j].force = BASE_SENSITIVITY; + } } - } - if (count >= 0) { - PropSheet_Changed(hWndProp, hWnd); UnselectAll(hWndList); ListView_SetItemState(hWndList, count, LVIS_SELECTED, LVIS_SELECTED); } + PropSheet_Changed(hWndProp, hWnd); } } } @@ -1886,6 +1927,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L config.padConfigs[port1][slot1] = config.padConfigs[port2][slot2]; config.padConfigs[port2][slot2] = padCfgTemp; for (int i=0; inumDevices; i++) { + if (dm->devices[i]->type == IGNORE) continue; PadBindings bindings = dm->devices[i]->pads[port1][slot1]; dm->devices[i]->pads[port1][slot1] = dm->devices[i]->pads[port2][slot2]; dm->devices[i]->pads[port2][slot2] = bindings; @@ -1893,6 +1935,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L } else { for (int i=0; inumDevices; i++) { + if (dm->devices[i]->type == IGNORE) continue; free(dm->devices[i]->pads[port1][slot1].bindings); for (int j=0; jdevices[i]->pads[port1][slot1].numFFBindings; j++) { free(dm->devices[i]->pads[port1][slot1].ffBindings[j].axes); diff --git a/plugins/LilyPad/KeyboardQueue.cpp b/plugins/LilyPad/KeyboardQueue.cpp index 39ad231bf3..70badb83de 100644 --- a/plugins/LilyPad/KeyboardQueue.cpp +++ b/plugins/LilyPad/KeyboardQueue.cpp @@ -1,13 +1,16 @@ // This is undoubtedly completely unnecessary. #include "KeyboardQueue.h" -static int numQueuedEvents = 0; -static keyEvent queuedEvents[20]; - // What MS calls a single process Mutex. Faster, supposedly. // More importantly, can be abbreviated, amusingly, as cSection. static CRITICAL_SECTION cSection; -static int csInitialized = 0; +static u8 csInitialized = 0; + +#define EVENT_QUEUE_LEN 16 +// Actually points one beyond the last queued event. +static u8 lastQueuedEvent = 0; +static u8 nextQueuedEvent = 0; +static keyEvent queuedEvents[EVENT_QUEUE_LEN]; void QueueKeyEvent(int key, int event) { if (!csInitialized) { @@ -15,50 +18,42 @@ void QueueKeyEvent(int key, int event) { InitializeCriticalSection(&cSection); } EnterCriticalSection(&cSection); - if (numQueuedEvents >= 15) { - // Generally shouldn't happen. - for (int i=0; i<15; i++) { - queuedEvents[i] = queuedEvents[i+5]; - } - numQueuedEvents = 15; + + // Don't queue events if escape is on top of queue. This is just for safety + // purposes when a game is killing the emulator for whatever reason. + if (nextQueuedEvent == lastQueuedEvent || + queuedEvents[nextQueuedEvent].key != VK_ESCAPE || + queuedEvents[nextQueuedEvent].evt != KEYPRESS) { + // Clear queue on escape down, bringing escape to front. May do something + // with shift/ctrl/alt and F-keys, later. + if (event == KEYPRESS && key == VK_ESCAPE) { + nextQueuedEvent = lastQueuedEvent; + } + + queuedEvents[lastQueuedEvent].key = key; + queuedEvents[lastQueuedEvent].evt = event; + + lastQueuedEvent = (lastQueuedEvent + 1) % EVENT_QUEUE_LEN; + // If queue wrapped around, remove last element. + if (nextQueuedEvent == lastQueuedEvent) { + nextQueuedEvent = (nextQueuedEvent + 1) % EVENT_QUEUE_LEN; + } } - int index = numQueuedEvents; - // Move escape to top of queue. May do something - // with shift/ctrl/alt and F-keys, later. - if (event == KEYPRESS && key == VK_ESCAPE) { - while (index) { - queuedEvents[index-1] = queuedEvents[index]; - index--; - } - } - queuedEvents[index].key = key; - queuedEvents[index].evt = event; - numQueuedEvents ++; LeaveCriticalSection(&cSection); } int GetQueuedKeyEvent(keyEvent *event) { - int out = 0; - if (numQueuedEvents) { - EnterCriticalSection(&cSection); - // Shouldn't be 0, but just in case... - if (numQueuedEvents) { - *event = queuedEvents[0]; - numQueuedEvents--; - out = 1; - for (int i=0; iAddDevice(new XInputDevice(i, temp)); } From 62d6c0f3e73a09bd66deefdff3602bf82a36cc45 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Thu, 9 Apr 2009 02:07:45 +0000 Subject: [PATCH 021/143] LilyPad: Debug line removed. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@930 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/XInput.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/LilyPad/XInput.cpp b/plugins/LilyPad/XInput.cpp index 00e4900086..68bf7cd087 100644 --- a/plugins/LilyPad/XInput.cpp +++ b/plugins/LilyPad/XInput.cpp @@ -194,7 +194,7 @@ void EnumXInputDevices() { pXInputEnable(1); for (i=0; i<4; i++) { XINPUT_STATE state; - if (!i || ERROR_SUCCESS == pXInputGetState(i, &state)) { + if (ERROR_SUCCESS == pXInputGetState(i, &state)) { wsprintfW(temp, L"XInput Pad %i", i); dm->AddDevice(new XInputDevice(i, temp)); } From 88ae29ac56800afdd63bd943cf857f9eff2bf153 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 9 Apr 2009 15:22:59 +0000 Subject: [PATCH 022/143] More Vif Unpacking cleanup. (And probably not the last of it.) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@931 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 136 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 30 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 792cd4ee44..0ae8139a95 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -333,7 +333,6 @@ static __releaseinline void writeW(u32 &dest, u32 data) template static void _UNPACKpart(u32 offnum, u32 &x, T y) { - switch (offnum) { case OFFSET_X: @@ -352,59 +351,136 @@ static void _UNPACKpart(u32 offnum, u32 &x, T y) break; } _vifRegs->offset++; - } template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest , *data); size--; } - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + case OFFSET_Y: + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, *data); + size--; + break; + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest , *data); + size--; + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data); size--; } - if( _vifRegs->offset == OFFSET_Z )_UNPACKpart(OFFSET_Z, *dest++, *dest-2); - if( _vifRegs->offset == OFFSET_W )_UNPACKpart(OFFSET_W, *dest , *data); - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; + break; + case OFFSET_Y: + _UNPACKpart(_vifRegs->offset, *dest++, *data); + size--; + break; + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, *dest-2); + break; + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest , *data); + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data++); size--; } - //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) - //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - if( _vifRegs->offset == OFFSET_W )_UNPACKpart(OFFSET_W, *dest, *data); - - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + case OFFSET_Y: + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; + break; + //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) + //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest, *data); + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } template void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, *data++); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest , *data); size--; } - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + case OFFSET_Y: + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; + break; + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest , *data); + size--; + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - if(size > 0 && _vifRegs->offset == OFFSET_X ) { _UNPACKpart(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Y ) { _UNPACKpart(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_Z ) { _UNPACKpart(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); size--; } - if(size > 0 && _vifRegs->offset == OFFSET_W ) { _UNPACKpart(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); size--; } - if (_vifRegs->offset == 4) _vifRegs->offset = 0; + while (size > 0) + { + switch (_vifRegs->offset) + { + case OFFSET_X: + _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x001f) << 3)); + size--; + break; + case OFFSET_Y: + _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x03e0) >> 2)); + size--; + break; + case OFFSET_Z: + _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x7c00) >> 7)); + size--; + break; + case OFFSET_W: + _UNPACKpart(_vifRegs->offset, *dest, ((*data & 0x8000) >> 8)); + size--; + break; + default: + if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; + break; + } + } } void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) From cf0c393514fac218c965575746d537dfb6320eef Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 9 Apr 2009 20:15:01 +0000 Subject: [PATCH 023/143] Fixed Outlaw Tennis error on the loading bar, as a strange side effect, this fixes the missing textures in Crash N Burn too. What is more annoying is this code use to be in the emulator ages ago (before processing skipping) and it was removed as we didn't think it actually had a use! :D git-svn-id: http://pcsx2.googlecode.com/svn/trunk@933 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index cd7866e224..099470d993 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -328,6 +328,16 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int break; } + //Append any skips in to the equasion + + if (vifRegs->cycle.cl > vifRegs->cycle.wl) + { + VIFUNPACK_LOG("Old addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); + vif->tag.addr += (size / (unpack->gsize*vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl)*16); + VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); + } + + if ((vif->tag.addr & 0xf) == unpack->gsize) { vif->tag.addr += 16 - unpack->gsize; From feaed9f4fa2e409402e02512d20ea94e178389a8 Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 9 Apr 2009 21:02:37 +0000 Subject: [PATCH 024/143] The last changes to clean up Vif wouldn't have worked in some situations, tried to rearrange it and space things out and skipping unnecessary checks git-svn-id: http://pcsx2.googlecode.com/svn/trunk@935 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 145 ++++++++++++++++---------------------------------- 1 file changed, 46 insertions(+), 99 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 0ae8139a95..23587c1f4a 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -356,77 +356,56 @@ static void _UNPACKpart(u32 offnum, u32 &x, T y) template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { - while (size > 0) - { - switch (_vifRegs->offset) - { - case OFFSET_X: - case OFFSET_Y: - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, *data); - size--; - break; - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest , *data); - size--; - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } - } + //S-# will always be a complete packet, no matter what. So we can skip the offset bits + writeX(*dest++, *data); + writeY(*dest++, *data); + writeZ(*dest++, *data); + writeW(*dest , *data); } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - while (size > 0) + if(_vifRegs->offset == OFFSET_X && size > 0) { - switch (_vifRegs->offset) - { - case OFFSET_X: - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; - break; - case OFFSET_Y: - _UNPACKpart(_vifRegs->offset, *dest++, *data); - size--; - break; - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, *dest-2); - break; - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest , *data); - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + } + if(_vifRegs->offset == OFFSET_Y && size > 0) + { + _UNPACKpart(_vifRegs->offset, *dest++, *data); + } + if(_vifRegs->offset == OFFSET_Z) + { + _UNPACKpart(_vifRegs->offset, *dest++, *dest-2); + } + if(_vifRegs->offset == OFFSET_W) + { + _UNPACKpart(_vifRegs->offset, *dest, *data); + _vifRegs->offset = 0; } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - while (size > 0) + if(_vifRegs->offset == OFFSET_X && size > 0) { - switch (_vifRegs->offset) - { - case OFFSET_X: - case OFFSET_Y: - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; - break; - //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) - //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest, *data); - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + } + if(_vifRegs->offset == OFFSET_Y && size > 0) + { + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + } + if(_vifRegs->offset == OFFSET_Z) + { + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + } + if(_vifRegs->offset == OFFSET_W) + { + //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) + //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate + _UNPACKpart(_vifRegs->offset, *dest, *data); + _vifRegs->offset = 0; } } @@ -435,52 +414,20 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - switch (_vifRegs->offset) - { - case OFFSET_X: - case OFFSET_Y: - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; - break; - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest , *data); - size--; - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } + _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; } + + if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = 0; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { - while (size > 0) - { - switch (_vifRegs->offset) - { - case OFFSET_X: - _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x001f) << 3)); - size--; - break; - case OFFSET_Y: - _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x03e0) >> 2)); - size--; - break; - case OFFSET_Z: - _UNPACKpart(_vifRegs->offset, *dest++, ((*data & 0x7c00) >> 7)); - size--; - break; - case OFFSET_W: - _UNPACKpart(_vifRegs->offset, *dest, ((*data & 0x8000) >> 8)); - size--; - break; - default: - if ((_vifRegs->offset > OFFSET_W) || (_vifRegs->offset < OFFSET_X)) _vifRegs->offset = 0; - break; - } - } + //As with S-#, this will always be a complete packet + writeX(*dest++, ((*data & 0x001f) << 3)); + writeY(*dest++, ((*data & 0x03e0) >> 2)); + writeZ(*dest++, ((*data & 0x7c00) >> 7)); + writeW(*dest, ((*data & 0x8000) >> 8)); } void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) From 08d6f10d57fd1baa7a3dfe76a36a059a97fef8ae Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 9 Apr 2009 23:06:11 +0000 Subject: [PATCH 025/143] Slap my wrists for the silliest error ever :p only thing that gave it away was the sirens on top of the heads in Ape Escape 3 had no light lol git-svn-id: http://pcsx2.googlecode.com/svn/trunk@937 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 23587c1f4a..3ae921089d 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -369,10 +369,12 @@ void __fastcall UNPACK_V2(u32 *dest, T *data, int size) if(_vifRegs->offset == OFFSET_X && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; } if(_vifRegs->offset == OFFSET_Y && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data); + size--; } if(_vifRegs->offset == OFFSET_Z) { @@ -391,10 +393,12 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) if(_vifRegs->offset == OFFSET_X && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; } if(_vifRegs->offset == OFFSET_Y && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); + size--; } if(_vifRegs->offset == OFFSET_Z) { From ccacbedc50b575271d456524366efe4a12732015 Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 9 Apr 2009 23:20:13 +0000 Subject: [PATCH 026/143] last silly mistake, promise :P git-svn-id: http://pcsx2.googlecode.com/svn/trunk@938 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 3ae921089d..147f450188 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -400,7 +400,7 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) _UNPACKpart(_vifRegs->offset, *dest++, *data++); size--; } - if(_vifRegs->offset == OFFSET_Z) + if(_vifRegs->offset == OFFSET_Z && size > 0) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); } From 98258eeffe83a5006006c29727940602cfbe6be3 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 9 Apr 2009 23:57:58 +0000 Subject: [PATCH 027/143] Yes, more Vif work. writeX, writeY, writeZ, and writeW are all merged into one function. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@939 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 334 +++++++++----------------------------------------- pcsx2/Vif.h | 90 ++++++++++++++ 2 files changed, 145 insertions(+), 279 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 147f450188..684927a591 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -44,7 +44,7 @@ enum UnpackOffset OFFSET_X = 0, OFFSET_Y = 1, OFFSET_Z = 2, - OFFSET_W =3 + OFFSET_W = 3 }; #define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) @@ -54,25 +54,29 @@ __forceinline static int _limit(int a, int max) return (a > max) ? max : a; } -static __releaseinline void writeX(u32 &dest, u32 data) +static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) { int n; + u32 vifRowReg = getVifRowRegs(offnum); if (_vifRegs->code & 0x10000000) { switch (_vif->cl) { case 0: - n = (_vifRegs->mask) & 0x3; + if (offnum == OFFSET_X) + n = (_vifRegs->mask) & 0x3; + else + n = (_vifRegs->mask >> (offnum * 2)) & 0x3; break; case 1: - n = (_vifRegs->mask >> 8) & 0x3; + n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; case 2: - n = (_vifRegs->mask >> 16) & 0x3; + n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; default: - n = (_vifRegs->mask >> 24) & 0x3; + n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; } } @@ -85,271 +89,48 @@ static __releaseinline void writeX(u32 &dest, u32 data) { dest = data; } - else if (_vifRegs->mode == 1) + else switch (_vifRegs->mode) { - dest = data + _vifRegs->r0; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r0 += data; - dest = _vifRegs->r0; - } - else - { - dest = data; + case 1: + dest = data + vifRowReg; + break; + case 2: + vifRowReg += data; + dest = vifRowReg; + break; + default: + dest = data; + break; } break; case 1: - dest = _vifRegs->r0; + dest = vifRowReg; break; case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } + if (_vif->cl > 2) + dest = getVifColRegs(3); + else + dest = getVifColRegs(_vif->cl); break; } + setVifRowRegs(offnum, vifRowReg); // VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); } -static __releaseinline void writeY(u32 &dest, u32 data) +template +static __releaseinline void _UNPACKpart(u32 offnum, u32 &x, T y, int size) { - int n; - - if (_vifRegs->code & 0x10000000) + if (size > 0) { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 2) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 10) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 18) & 0x3; - break; - default: - n = (_vifRegs->mask >> 26) & 0x3; - break; - } + writeXYZW(offnum, x, y); + _vifRegs->offset++; } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r1; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r1 += data; - dest = _vifRegs->r1; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r1; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeY %8.8x : Mode %d, r1 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r1,data); -} - -static __releaseinline void writeZ(u32 &dest, u32 data) -{ - int n; - - if (_vifRegs->code & 0x10000000) - { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 4) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 12) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 20) & 0x3; - break; - default: - n = (_vifRegs->mask >> 28) & 0x3; - break; - } - } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r2; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r2 += data; - dest = _vifRegs->r2; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r2; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeZ %8.8x : Mode %d, r2 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r2,data); -} - -static __releaseinline void writeW(u32 &dest, u32 data) -{ - int n; - - if (_vifRegs->code & 0x10000000) - { - switch (_vif->cl) - { - case 0: - n = (_vifRegs->mask >> 6) & 0x3; - break; - case 1: - n = (_vifRegs->mask >> 14) & 0x3; - break; - case 2: - n = (_vifRegs->mask >> 22) & 0x3; - break; - default: - n = (_vifRegs->mask >> 30) & 0x3; - break; - } - } - else n = 0; - - switch (n) - { - case 0: - if ((_vif->cmd & 0x6F) == 0x6f) - { - dest = data; - } - else if (_vifRegs->mode == 1) - { - dest = data + _vifRegs->r3; - } - else if (_vifRegs->mode == 2) - { - _vifRegs->r3 += data; - dest = _vifRegs->r3; - } - else - { - dest = data; - } - break; - case 1: - dest = _vifRegs->r3; - break; - case 2: - switch (_vif->cl) - { - case 0: - dest = _vifRegs->c0; - break; - case 1: - dest = _vifRegs->c1; - break; - case 2: - dest = _vifRegs->c2; - break; - default: - dest = _vifRegs->c3; - break; - } - break; - } -// VIF_LOG("writeW %8.8x : Mode %d, r3 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r3,data); } template -static void _UNPACKpart(u32 offnum, u32 &x, T y) +static __releaseinline void _UNPACKpart(u32 offnum, u32 &x, T y) { - switch (offnum) - { - case OFFSET_X: - writeX(x,y); - break; - case OFFSET_Y: - writeY(x,y); - break; - case OFFSET_Z: - writeZ(x,y); - break; - case OFFSET_W: - writeW(x,y); - break; - default: - break; - } + writeXYZW(offnum, x, y); _vifRegs->offset++; } @@ -357,24 +138,22 @@ template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { //S-# will always be a complete packet, no matter what. So we can skip the offset bits - writeX(*dest++, *data); - writeY(*dest++, *data); - writeZ(*dest++, *data); - writeW(*dest , *data); + writeXYZW(OFFSET_X, *dest++, *data); + writeXYZW(OFFSET_Y, *dest++, *data); + writeXYZW(OFFSET_Z, *dest++, *data); + writeXYZW(OFFSET_W, *dest , *data); } template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X && size > 0) + if(_vifRegs->offset == OFFSET_X) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); } - if(_vifRegs->offset == OFFSET_Y && size > 0) + if(_vifRegs->offset == OFFSET_Y) { - _UNPACKpart(_vifRegs->offset, *dest++, *data); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data, size--); } if(_vifRegs->offset == OFFSET_Z) { @@ -383,24 +162,22 @@ void __fastcall UNPACK_V2(u32 *dest, T *data, int size) if(_vifRegs->offset == OFFSET_W) { _UNPACKpart(_vifRegs->offset, *dest, *data); - _vifRegs->offset = 0; + _vifRegs->offset = OFFSET_X; } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X && size > 0) + if(_vifRegs->offset == OFFSET_X) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); } - if(_vifRegs->offset == OFFSET_Y && size > 0) + if(_vifRegs->offset == OFFSET_Y) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); } - if(_vifRegs->offset == OFFSET_Z && size > 0) + if(_vifRegs->offset == OFFSET_Z) { _UNPACKpart(_vifRegs->offset, *dest++, *data++); } @@ -409,7 +186,7 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate _UNPACKpart(_vifRegs->offset, *dest, *data); - _vifRegs->offset = 0; + _vifRegs->offset = OFFSET_X; } } @@ -418,20 +195,19 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); - size--; + _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); } - if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = 0; + if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) { //As with S-#, this will always be a complete packet - writeX(*dest++, ((*data & 0x001f) << 3)); - writeY(*dest++, ((*data & 0x03e0) >> 2)); - writeZ(*dest++, ((*data & 0x7c00) >> 7)); - writeW(*dest, ((*data & 0x8000) >> 8)); + writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); + writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); + writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); + writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); } void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size) diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 90c3a6f105..eb26758fbf 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -87,6 +87,96 @@ extern "C" extern u32* _vifCol; } +__forceinline void setVifRowRegs(u32 reg, u32 data) +{ + switch (reg) + { + case 0: + _vifRegs->r0 = data; + break; + case 1: + _vifRegs->r1 = data; + break; + case 2: + _vifRegs->r2 = data; + break; + case 3: + _vifRegs->r3 = data; + break; + default: + assert(0); + break; + } +} + +__forceinline u32 getVifRowRegs(u32 reg) +{ + switch (reg) + { + case 0: + return _vifRegs->r0; + break; + case 1: + return _vifRegs->r1; + break; + case 2: + return _vifRegs->r2; + break; + case 3: + return _vifRegs->r3; + break; + default: + assert(0); + return 0; + break; + } +} + +__forceinline void setVifColRegs(u32 reg, u32 data) +{ + switch (reg) + { + case 0: + _vifRegs->c0 = data; + break; + case 1: + _vifRegs->c1 = data; + break; + case 2: + _vifRegs->c2 = data; + break; + case 3: + _vifRegs->c3 = data; + break; + default: + assert(0); + break; + } +} + +__forceinline u32 getVifColRegs(u32 reg) +{ + switch (reg) + { + case 0: + return _vifRegs->c0; + break; + case 1: + return _vifRegs->c1; + break; + case 2: + return _vifRegs->c2; + break; + case 3: + return _vifRegs->c3; + break; + default: + assert(0); + return 0; + break; + } +} + #define vif0Regs ((VIFregisters*)&PS2MEM_HW[0x3800]) #define vif1Regs ((VIFregisters*)&PS2MEM_HW[0x3c00]) From 19362d3c55729180c426cf0df9da0be19d15492c Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 00:02:42 +0000 Subject: [PATCH 028/143] More unpack changes, tried to simplify a few sums a bit when processing skipping and a few other misc bits git-svn-id: http://pcsx2.googlecode.com/svn/trunk@940 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 76 ++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 099470d993..f0aa2ec660 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -272,39 +272,39 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int switch (unpackType) { case 0x0: - vif->tag.addr += size * 4; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-32 skip, size = %d", size); break; case 0x1: - vif->tag.addr += size * 8; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-16 skip, size = %d", size); break; case 0x2: - vif->tag.addr += size * 16; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing S-8 skip, size = %d", size); break; case 0x4: - vif->tag.addr += size + ((size / unpack->gsize) * 8); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V2-32 skip, size = %d", size); break; case 0x5: - vif->tag.addr += (size * 2) + ((size / unpack->gsize) * 8); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V2-16 skip, size = %d", size); break; case 0x6: - vif->tag.addr += (size * 4) + ((size / unpack->gsize) * 8); - VIFUNPACK_LOG("Processing V2-8 skip, size = %d", size); + vif->tag.addr += (size / unpack->gsize) * 16; + DevCon::Notice("Processing V2-8 skip, size = %d", params size); break; case 0x8: - vif->tag.addr += size + ((size / unpack->gsize) * 4); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V3-32 skip, size = %d", size); break; case 0x9: - vif->tag.addr += (size * 2) + ((size / unpack->gsize) * 4); - VIFUNPACK_LOG("Processing V3-16 skip, size = %d", size); + vif->tag.addr += (size / unpack->gsize) * 16; + DevCon::Notice("Processing V3-16 skip, size = %d", params size); break; case 0xA: - vif->tag.addr += (size * 4) + ((size / unpack->gsize) * 4); + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V3-8 skip, size = %d", size); break; case 0xC: @@ -312,15 +312,15 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFUNPACK_LOG("Processing V4-32 skip, size = %d, CL = %d, WL = %d", size, vif1Regs->cycle.cl, vif1Regs->cycle.wl); break; case 0xD: - vif->tag.addr += size * 2; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-16 skip, size = %d", size); break; case 0xE: - vif->tag.addr += size * 4; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-8 skip, size = %d", size); break; case 0xF: - vif->tag.addr += size * 8; + vif->tag.addr += (size / unpack->gsize) * 16; VIFUNPACK_LOG("Processing V4-5 skip, size = %d", size); break; default: @@ -337,11 +337,6 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); } - - if ((vif->tag.addr & 0xf) == unpack->gsize) - { - vif->tag.addr += 16 - unpack->gsize; - } } static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) @@ -386,8 +381,8 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma dest = (u32*)(VU->Mem + v->addr); - VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", - VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); + VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); #ifdef _DEBUG @@ -427,18 +422,33 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma memsize = size; #endif + if (v->size != (size >> 2)) + ProcessMemSkip(size, unpackType, VIFdmanum); + + + if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + //Sanity Check (memory overflow) + DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); + + } + if (_vifRegs->offset > 0) { int destinc, unpacksize; + //This is just to make sure the alignment isnt loopy on a split packet + if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) + { + DevCon::Error("Warning: Unpack alignment error"); + } + VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); // SSE doesn't handle such small data - if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); - + if (vifRegs->offset < (u32)ft->qsize) { if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) @@ -473,11 +483,10 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { dest += destinc; } + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); } - else if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { @@ -614,6 +623,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma size -= ft->gsize; vifRegs->num--; + //if(vifRegs->num == loophere) dest = (u32*)(VU->Mem); ++vif->cl; if (vif->cl == vifRegs->cycle.wl) { @@ -624,6 +634,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { dest += 4; } + } // have to update @@ -663,9 +674,10 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ + vif->tag.addr += (size / ft->dsize) * 4; func(dest, (u32*)cdata, size / ft->dsize); size = 0; - + VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); } @@ -674,8 +686,11 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { VIF_LOG("VIFunpack - filling write"); + if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0) + DevCon::Notice("Filling write warning! Size < packet size and CL != 0"); + VIFUNPACK_LOG("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType); - while (size >= ft->gsize || vifRegs->num > 0) + while (vifRegs->num > 0) { if (vif->cl == vifRegs->cycle.wl) { @@ -693,6 +708,11 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma { vif->cl = 0; } + if(size < ft->gsize) + { + VIF_LOG("Out of Filling write data"); + break; + } } else { From f127f69b3e9fb108effefcb9f6413ad0ace18437 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 10 Apr 2009 01:34:04 +0000 Subject: [PATCH 029/143] A few tweaks to the unpacking code. _UNPACKPart isn't really neccessary anymore, and optimised writeXYZW a little. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@941 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 88 ++++++++++++++++++++++++++++----------------------- pcsx2/Vif.h | 29 +++++++---------- 2 files changed, 59 insertions(+), 58 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 684927a591..8157efef21 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -47,8 +47,6 @@ enum UnpackOffset OFFSET_W = 3 }; -#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) - __forceinline static int _limit(int a, int max) { return (a > max) ? max : a; @@ -95,8 +93,8 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) dest = data + vifRowReg; break; case 2: - vifRowReg += data; - dest = vifRowReg; + // vifRowReg isn't used after this, or I would make it equal to dest here. + dest = setVifRowRegs(offnum, vifRowReg + data); break; default: dest = data; @@ -107,33 +105,13 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) dest = vifRowReg; break; case 2: - if (_vif->cl > 2) - dest = getVifColRegs(3); - else - dest = getVifColRegs(_vif->cl); + dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl); break; + jNO_DEFAULT; } - setVifRowRegs(offnum, vifRowReg); // VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); } -template -static __releaseinline void _UNPACKpart(u32 offnum, u32 &x, T y, int size) -{ - if (size > 0) - { - writeXYZW(offnum, x, y); - _vifRegs->offset++; - } -} - -template -static __releaseinline void _UNPACKpart(u32 offnum, u32 &x, T y) -{ - writeXYZW(offnum, x, y); - _vifRegs->offset++; -} - template void __fastcall UNPACK_S(u32 *dest, T *data, int size) { @@ -147,21 +125,35 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size) template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X) + if (_vifRegs->offset == OFFSET_X) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_Y; + size--; + } } - if(_vifRegs->offset == OFFSET_Y) + + if (_vifRegs->offset == OFFSET_Y) { - _UNPACKpart(_vifRegs->offset, *dest++, *data, size--); + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data); + _vifRegs->offset = OFFSET_Z; + size--; + } } - if(_vifRegs->offset == OFFSET_Z) + + if (_vifRegs->offset == OFFSET_Z) { - _UNPACKpart(_vifRegs->offset, *dest++, *dest-2); + writeXYZW(_vifRegs->offset, *dest++, *dest-2); + _vifRegs->offset = OFFSET_W; } - if(_vifRegs->offset == OFFSET_W) + + if (_vifRegs->offset == OFFSET_W) { - _UNPACKpart(_vifRegs->offset, *dest, *data); + writeXYZW(_vifRegs->offset, *dest, *data); _vifRegs->offset = OFFSET_X; } } @@ -171,21 +163,35 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { if(_vifRegs->offset == OFFSET_X) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_Y; + size--; + } } + if(_vifRegs->offset == OFFSET_Y) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_Z; + size--; + } } + if(_vifRegs->offset == OFFSET_Z) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++); + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_W; } - if(_vifRegs->offset == OFFSET_W) + + if(_vifRegs->offset == OFFSET_W) { //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - _UNPACKpart(_vifRegs->offset, *dest, *data); + writeXYZW(_vifRegs->offset, *dest, *data); _vifRegs->offset = OFFSET_X; } } @@ -195,7 +201,9 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - _UNPACKpart(_vifRegs->offset, *dest++, *data++, size--); + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset++; + size--; } if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X; diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index eb26758fbf..fa318d7618 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -87,7 +87,7 @@ extern "C" extern u32* _vifCol; } -__forceinline void setVifRowRegs(u32 reg, u32 data) +static __forceinline u32 setVifRowRegs(u32 reg, u32 data) { switch (reg) { @@ -103,13 +103,12 @@ __forceinline void setVifRowRegs(u32 reg, u32 data) case 3: _vifRegs->r3 = data; break; - default: - assert(0); - break; + jNO_DEFAULT; } + return data; } -__forceinline u32 getVifRowRegs(u32 reg) +static __forceinline u32 getVifRowRegs(u32 reg) { switch (reg) { @@ -125,14 +124,11 @@ __forceinline u32 getVifRowRegs(u32 reg) case 3: return _vifRegs->r3; break; - default: - assert(0); - return 0; - break; + jNO_DEFAULT; } } -__forceinline void setVifColRegs(u32 reg, u32 data) +static __forceinline u32 setVifColRegs(u32 reg, u32 data) { switch (reg) { @@ -148,13 +144,12 @@ __forceinline void setVifColRegs(u32 reg, u32 data) case 3: _vifRegs->c3 = data; break; - default: - assert(0); - break; + jNO_DEFAULT; } + return data; } -__forceinline u32 getVifColRegs(u32 reg) +static __forceinline u32 getVifColRegs(u32 reg) { switch (reg) { @@ -170,15 +165,13 @@ __forceinline u32 getVifColRegs(u32 reg) case 3: return _vifRegs->c3; break; - default: - assert(0); - return 0; - break; + jNO_DEFAULT; } } #define vif0Regs ((VIFregisters*)&PS2MEM_HW[0x3800]) #define vif1Regs ((VIFregisters*)&PS2MEM_HW[0x3c00]) +#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) void dmaVIF0(); void dmaVIF1(); From b1769a2061e22e7bc59ce5842fd631099ac9886a Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 10 Apr 2009 01:50:31 +0000 Subject: [PATCH 030/143] Take care of Issue 139. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@942 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86.cpp | 2 +- pcsx2/x86/ix86/ix86_types.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index a13c7b34a7..f62cabf789 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -41,7 +41,7 @@ XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; namespace x86Emitter { -x86IndexerType ptr; +const x86IndexerType ptr; ////////////////////////////////////////////////////////////////////////////////////////// // diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 0ab5d6bcf9..ede42f92af 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -365,10 +365,12 @@ namespace x86Emitter { return ModSib( (uptr)src ); } + + x86IndexerType() {} }; // ------------------------------------------------------------------------ - extern x86IndexerType ptr; + extern const x86IndexerType ptr; extern const x86Register32 eax; extern const x86Register32 ebx; From b0da55cb891d0a444ad2526b99312d275b49847d Mon Sep 17 00:00:00 2001 From: gabest11 Date: Fri, 10 Apr 2009 07:12:29 +0000 Subject: [PATCH 031/143] GSdx: this should probably fix taking snapshots with dx9, also upped the version to .15, since the revision number has passed what the last release still had from the old repository. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@943 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.h | 2 +- plugins/GSdx/GSRendererHW.h | 4 ++-- plugins/GSdx/GSTexture9.cpp | 18 +++++++----------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 388af54658..f670531aa4 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -26,7 +26,7 @@ #pragma once -#define PLUGIN_VERSION 14 +#define PLUGIN_VERSION 15 #include "GSVector.h" diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 2ad320f9e8..e343852712 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -349,8 +349,6 @@ protected: OverrideOutput(); - m_tc->InvalidateTextures(context->FRAME, context->ZBUF); - if(s_dump) { CString str; @@ -360,6 +358,8 @@ protected: if(s_savez) ds->m_texture.Save(str); // if(s_savez) m_dev.SaveToFileD32S8X24(ds->m_texture, str); // TODO } + + m_tc->InvalidateTextures(context->FRAME, context->ZBUF); } virtual void Draw(int prim, Texture& rt, Texture& ds, typename GSTextureCache::GSTexture* tex) = 0; diff --git a/plugins/GSdx/GSTexture9.cpp b/plugins/GSdx/GSTexture9.cpp index 8d75cc2845..bff9df141c 100644 --- a/plugins/GSdx/GSTexture9.cpp +++ b/plugins/GSdx/GSTexture9.cpp @@ -140,8 +140,8 @@ void GSTexture9::Unmap() bool GSTexture9::Save(CString fn, bool dds) { - CComPtr res; - + CComPtr surface; + if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) { HRESULT hr; @@ -153,8 +153,6 @@ bool GSTexture9::Save(CString fn, bool dds) if(desc.Format != D3DFMT_D32F_LOCKABLE) return false; - CComPtr surface; - hr = m_dev->CreateOffscreenPlainSurface(desc.Width, desc.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface, NULL); D3DLOCKED_RECT slr, dlr; @@ -175,24 +173,22 @@ bool GSTexture9::Save(CString fn, bool dds) m_surface->UnlockRect(); surface->UnlockRect(); - - res = surface; } else { - res = m_surface; + surface = m_surface; } - if(CComQIPtr surface = res) + if(surface != NULL) { return SUCCEEDED(D3DXSaveSurfaceToFile(fn, dds ? D3DXIFF_DDS : D3DXIFF_BMP, surface, NULL, NULL)); } - - if(CComQIPtr texture = res) +/* + if(CComQIPtr texture = surface) { return SUCCEEDED(D3DXSaveTextureToFile(fn, dds ? D3DXIFF_DDS : D3DXIFF_BMP, texture, NULL)); } - +*/ return false; } From 432b060109a3b7da0ad8566bbc75424b1d3ef842 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 13:08:19 +0000 Subject: [PATCH 032/143] Fix for one small bug, doesnt fix tekken 5 tho :( git-svn-id: http://pcsx2.googlecode.com/svn/trunk@944 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 8157efef21..48efdf60fc 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -183,8 +183,12 @@ void __fastcall UNPACK_V3(u32 *dest, T *data, int size) if(_vifRegs->offset == OFFSET_Z) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_W; + if (size > 0) + { + writeXYZW(_vifRegs->offset, *dest++, *data++); + _vifRegs->offset = OFFSET_W; + size--; + } } if(_vifRegs->offset == OFFSET_W) From e1bf40546f434180c5482de4d0b273ec19a29e39 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 13:15:52 +0000 Subject: [PATCH 033/143] Why this broke Tekken 5 i don't know! (answers on a postcard) anyhow, fixed :) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@945 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 48efdf60fc..f20103e72d 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -107,7 +107,9 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) case 2: dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl); break; - jNO_DEFAULT; + case 3: + //Masked so don't do anything + break; } // VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); } From 756127d09602cadc61634f207bbea473a89849e2 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 13:29:20 +0000 Subject: [PATCH 034/143] Fixed recently discovered bug from VIF which could have potentially happened anywhere jNODEFAULT is used (nobody noticed lol) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@946 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/PS2Etypes.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/common/include/PS2Etypes.h b/common/include/PS2Etypes.h index 705d22aa46..6f652e8736 100644 --- a/common/include/PS2Etypes.h +++ b/common/include/PS2Etypes.h @@ -55,8 +55,6 @@ // disable the default case in a switch #define jNO_DEFAULT \ { \ - break; \ - \ default: \ jASSUME(0); \ break; \ From 8738f75ba72296e1aa3dfc354f750e5f7c361a07 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 10 Apr 2009 13:42:37 +0000 Subject: [PATCH 035/143] Fixed alignment problems noticed in Digital Devil Saga git-svn-id: http://pcsx2.googlecode.com/svn/trunk@947 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index f0aa2ec660..4d2ec15a8c 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -336,7 +336,9 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int vif->tag.addr += (size / (unpack->gsize*vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl)*16); VIFUNPACK_LOG("New addr %x CL %x WL %x", vif->tag.addr, vifRegs->cycle.cl, vifRegs->cycle.wl); } - + + //This is sorted out later + vif->tag.addr &= ~0xf; } static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) From 6775b8a5cc047ac9267d119780273b92d632e9a9 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Fri, 10 Apr 2009 19:45:27 +0000 Subject: [PATCH 036/143] microVU: - added microVU_Execution.inl - dispatcher stuff is now recompiled with pcsx2's emitter instead of using inline asm, its cleaner than inline asm and its more portable since the asm won't have to be ported to GCC. - lots of first-pass implementation for lower opcodes - implemented documented branch behavior (first pass stuff only) Note: theres some undocumented stuff branches do according to Nneeve's tests, but i won't implement those for now since 99% of games shouldn't need it, and according to the tests, the behavior seems kind-of random/erratic. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@948 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 3 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/iVU0micro.cpp | 2 + pcsx2/x86/iVU1micro.cpp | 2 + pcsx2/x86/ix86/ix86_cpudetect.cpp | 1 + pcsx2/x86/microVU.cpp | 101 ++------------------- pcsx2/x86/microVU.h | 29 +++--- pcsx2/x86/microVU_Alloc.inl | 18 ++-- pcsx2/x86/microVU_Analyze.inl | 57 +++++++++++- pcsx2/x86/microVU_Compile.inl | 14 +-- pcsx2/x86/microVU_Lower.inl | 49 +++++++--- pcsx2/x86/microVU_Misc.h | 31 +++++-- 12 files changed, 154 insertions(+), 157 deletions(-) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index d99f47debe..2da21f9f11 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -44,7 +44,8 @@ extern SessionOverrideFlags g_Session; ////////////////////////////////////////////////////////////////////////// // Pcsx2 User Configuration Options! -//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs +//#define PCSX2_MICROVU // Use Micro VU recs instead of Zero VU Recs +//#define PCSX2_MICROVU_ // Fully enable Micro VU recs (temporary option for now) #define PCSX2_GSMULTITHREAD 1 // uses multi-threaded gs #define PCSX2_EEREC 0x10 #define PCSX2_VU0REC 0x20 diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index b013011d39..ba2df10aca 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2506,6 +2506,10 @@ RelativePath="..\..\x86\microVU_Compile.inl" > + + diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index 2dfaac7669..55af7a5c54 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -83,8 +83,10 @@ namespace VU0micro if((VU0.VI[REG_VPU_STAT].UL & 1) == 0) return; FreezeXMMRegs(1); + FreezeMMXRegs(1); runVUrec(VU0.VI[REG_TPC].UL & 0xfff, 0xffffffff, 0); FreezeXMMRegs(0); + FreezeMMXRegs(0); } } diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 87f3099d7b..7debdcfa8e 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -142,8 +142,10 @@ namespace VU1micro assert( (VU1.VI[REG_TPC].UL&7) == 0 ); FreezeXMMRegs(1); + FreezeMMXRegs(0); runVUrec(VU1.VI[REG_TPC].UL & 0x3fff, 0xffffffff, 1); FreezeXMMRegs(0); + FreezeMMXRegs(0); } } #endif diff --git a/pcsx2/x86/ix86/ix86_cpudetect.cpp b/pcsx2/x86/ix86/ix86_cpudetect.cpp index 7e3323e4a7..b1fc04a96e 100644 --- a/pcsx2/x86/ix86/ix86_cpudetect.cpp +++ b/pcsx2/x86/ix86/ix86_cpudetect.cpp @@ -398,6 +398,7 @@ void cpudetectInit() cpudetectSSE3(recSSE); HostSys::Munmap( recSSE, 0x1000 ); } + else { Console::Error("Error: Failed to allocate memory for SSE3 State detection."); } ////////////////////////////////////// // Core Counting! diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 0e537472e6..d2aef9b3bd 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -64,7 +64,12 @@ microVUt(void) mVUreset() { // Dynarec Cache mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - + mVU->ptr = mVU->cache; + + // Setup Entrance/Exit Points + mVUdispatcherA(); + mVUdispatcherB(); + // Other Variables memset(&mVU->prog, 0, sizeof(mVU->prog)); mVU->prog.finished = 1; @@ -99,33 +104,6 @@ microVUt(void) mVUclear(u32 addr, u32 size) { // that its probably not worth it... } -// Executes for number of cycles -microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { -/* - Pseudocode: (ToDo: implement # of cycles) - 1) Search for existing program - 2) If program not found, goto 5 - 3) Search for recompiled block - 4) If recompiled block found, goto 6 - 5) Recompile as much blocks as possible - 6) Return start execution address of block -*/ - microVU* mVU = mVUx; - if ( mVUsearchProg(mVU) ) { // Found Program - //microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState); - //if (block) return block->x86ptrStart; // Found Block - } - // Recompile code - return NULL; -} - -void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { - return mVUexecute<0>(startPC, cycles); -} -void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { - return mVUexecute<1>(startPC, cycles); -} - //------------------------------------------------------------------ // Micro VU - Private Functions //------------------------------------------------------------------ @@ -206,73 +184,6 @@ __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) { } } -//------------------------------------------------------------------ -// Dispatcher Functions -//------------------------------------------------------------------ - -#ifdef _MSC_VER -// Runs VU0 for number of cycles -__declspec(naked) void __fastcall startVU0(u32 startPC, u32 cycles) { - __asm { - // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. - call mVUexecuteVU0 - - /*backup cpu state*/ - push ebx; - push ebp; - push esi; - push edi; - - ldmxcsr g_sseVUMXCSR - /* Should set xmmZ? */ - jmp eax - } -} - -// Runs VU1 for number of cycles -__declspec(naked) void __fastcall startVU1(u32 startPC, u32 cycles) { - __asm { - - call mVUexecuteVU1 - - /*backup cpu state*/ - push ebx; - push ebp; - push esi; - push edi; - - ldmxcsr g_sseVUMXCSR - - jmp eax - } -} - -// Exit point -__declspec(naked) void __fastcall endVU0(u32 startPC, u32 cycles) { - __asm { - - //call mVUcleanUpVU0 - - /*restore cpu state*/ - pop edi; - pop esi; - pop ebp; - pop ebx; - - ldmxcsr g_sseMXCSR - emms - - ret - } -} -#else -extern "C" { - extern void __fastcall startVU0(u32 startPC, u32 cycles); - extern void __fastcall startVU1(u32 startPC, u32 cycles); - extern void __fastcall endVU0(u32 startPC, u32 cycles); -} -#endif - //------------------------------------------------------------------ // Wrapper Functions - Called by other parts of the Emu //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 049520d255..41412f70bf 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -125,17 +125,7 @@ struct microVU { u32 iReg; // iReg (only used in recompilation, not execution) u32 clipFlag[4]; // 4 instances of clip flag (used in execution) u32 divFlag; // 1 instance of I/D flags - -/* - uptr x86eax; // Accumulator register. Used in arithmetic operations. - uptr x86ecx; // Counter register. Used in shift/rotate instructions. - uptr x86edx; // Data register. Used in arithmetic operations and I/O operations. - uptr x86ebx; // Base register. Used as a pointer to data (located in DS in segmented mode). - uptr x86esp; // Stack Pointer register. Pointer to the top of the stack. - uptr x86ebp; // Stack Base Pointer register. Used to point to the base of the stack. - uptr x86esi; // Source register. Used as a pointer to a source in stream operations. - uptr x86edi; // Destination register. Used as a pointer to a destination in stream operations. -*/ + u32 VIbackup[2]; // Holds a backup of a VI reg if modified before a branch }; // microVU rec structs @@ -146,14 +136,24 @@ extern PCSX2_ALIGNED16(microVU microVU1); extern void (*mVU_UPPER_OPCODE[64])( VURegs* VU, s32 info ); extern void (*mVU_LOWER_OPCODE[128])( VURegs* VU, s32 info ); +// Main Functions +microVUt(void) mVUinit(VURegs*); +microVUt(void) mVUreset(); +microVUt(void) mVUclose(); +microVUt(void) mVUclear(u32, u32); + +// Private Functions __forceinline void mVUclearProg(microVU* mVU, int progIndex); __forceinline int mVUfindLeastUsedProg(microVU* mVU); __forceinline int mVUsearchProg(microVU* mVU); __forceinline void mVUcacheProg(microVU* mVU, int progIndex); +void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles); +void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles); -#ifdef __LINUX__ -microVUt(void) mVUreset(); -microVUt(void) mVUclose(); +#ifndef __LINUX__ +typedef void (__fastcall *mVUrecCall)(u32, u32); +#else +typedef void (*mVUrecCall)(u32, u32) __attribute__((__fastcall)); // Not sure if this is correct syntax (should be close xD) #endif // Include all the *.inl files (Needed because C++ sucks with templates and *.cpp files) @@ -162,3 +162,4 @@ microVUt(void) mVUclose(); #include "microVU_Alloc.inl" #include "microVU_Tables.inl" #include "microVU_Compile.inl" +#include "microVU_Execute.inl" diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 3316d7360c..dee76e8f92 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -708,19 +708,7 @@ microVUt(void) mVUallocCFLAGb(int reg, int fInstance) { microVU* mVU = mVUx; MOV32RtoM(mVU->clipFlag[fInstance], reg); } -/* -microVUt(void) mVUallocDFLAGa(int reg) { - microVU* mVU = mVUx; - //if (!mVUdivFlag) { MOV32MtoR(reg, (uptr)&mVU->divFlag[readQ]); AND32ItoR(reg, 0xc00); } - //else if (mVUdivFlag & 1) { XOR32RtoR(reg, reg); } - //else { MOV32ItoR(reg, (u32)((mVUdivFlag << 9) & 0xc00)); } -} -microVUt(void) mVUallocDFLAGb(int reg) { - microVU* mVU = mVUx; - //MOV32RtoM((uptr)&mVU->divFlag[writeQ], reg); -} -*/ //------------------------------------------------------------------ // VI Reg Allocators //------------------------------------------------------------------ @@ -734,6 +722,12 @@ microVUt(void) mVUallocVIa(int GPRreg, int _reg_) { microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { microVU* mVU = mVUx; + if (backupVI) { // Backs up reg to memory (used when VI is modified b4 a branch) + MOV32RtoM((uptr)&mVU->VIbackup[1], GPRreg); + mVUallocVIa(GPRreg, _reg_); + MOV32RtoM((uptr)&mVU->VIbackup[0], GPRreg); + MOV32MtoR(GPRreg, (uptr)&mVU->VIbackup[1]); + } if (_reg_ == 0) { return; } else if (_reg_ < 9) { MOVD32RtoMMX(mmVI(_reg_), GPRreg); } else { MOV16RtoM((uptr)&mVU->regs->VI[_reg_].UL, GPRreg); } diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index dd5918cdf0..6768153aee 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -143,6 +143,32 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { analyzePreg(xCycles); } +//------------------------------------------------------------------ +// LQx - LQ/LQD/LQI Opcodes +//------------------------------------------------------------------ + +#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } +#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } } + +microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { + microVU* mVU = mVUx; + analyzeVIreg1(Is); + analyzeReg2(Ft); + if (!Ft) { mVUinfo |= (writeIs && Is) ? _noWriteVF : _isNOP; } + if (writeIs) { analyzeVIreg2(Is, 1); } +} + +//------------------------------------------------------------------ +// SQx - SQ/SQD/SQI Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeSQ(int Fs, int It, bool writeIt) { + microVU* mVU = mVUx; + analyzeReg1(Fs); + analyzeVIreg1(It); + if (writeIt) { analyzeVIreg2(It, 1); } +} + //------------------------------------------------------------------ // R*** - R Reg Opcodes //------------------------------------------------------------------ @@ -166,9 +192,6 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { // Sflag - Status Flag Opcodes //------------------------------------------------------------------ -#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } -#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; } } - microVUt(void) mVUanalyzeSflag(int It) { microVU* mVU = mVUx; if (!It) { mVUinfo |= _isNOP; return; } @@ -190,4 +213,32 @@ microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { analyzeXGkick2(xCycles); } +//------------------------------------------------------------------ +// Branches - Branch Opcodes +//------------------------------------------------------------------ + +#define analyzeBranchVI(reg, infoVal) { \ + if (reg && (mVUcycles > 1)) { /* Ensures branch is not first opcode in block */ \ + incPC(-2); \ + if (writesVI && (reg == mVU->VIbackup[0])) { /* If prev Op modified VI reg */ \ + mVUinfo |= _backupVI; \ + incPC(2); \ + mVUinfo |= infoVal; \ + } \ + else { incPC(2); } \ + } \ +} + +microVUt(void) mVUanalyzeBranch1(int Is) { + microVU* mVU = mVUx; + if (mVUregs.VI[Is]) { analyzeVIreg1(Is); } + else { analyzeBranchVI(Is, _memReadIs); } +} + +microVUt(void) mVUanalyzeBranch2(int Is, int It) { + microVU* mVU = mVUx; + if (mVUregs.VI[Is] || mVUregs.VI[It]) { analyzeVIreg1(Is); analyzeVIreg1(It); } + else { analyzeBranchVI(Is, _memReadIs); analyzeBranchVI(It, _memReadIt);} +} + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 554cf2612b..c9a8217641 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -29,11 +29,7 @@ } \ } -#define curI mVUcurProg.data[iPC] -#define setCode() { mVU->code = curI; } -#define startLoop() { mVUdebugStuff1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } -#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } -#define incCycles(x) { mVUincCycles(x); } +#define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } microVUt(void) mVUincCycles(int x) { @@ -115,10 +111,10 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, if (isEOB) { x = 0; } else if (isBranch) { mVUopU(); incPC(2); } - mVUopU(); - if (isNop) { if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } - else { incPC(1); mVUopL(); } - + if (isNop) { mVUopU(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } + else if (!swapOps) { mVUopU(); incPC(1); mVUopL(); } + else { incPC(1); mVUopL(); incPC(-1); mVUopU(); incPC(1); } + if (!isBdelay) { incPC(1); } else { incPC(-2); // Go back to Branch Opcode diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 5ac22e06e7..18e73ebde7 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -799,7 +799,7 @@ microVUf(void) mVU_ISWR() { microVUf(void) mVU_LQ() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 0); } else { if (!_Fs_) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); @@ -817,9 +817,9 @@ microVUf(void) mVU_LQ() { microVUf(void) mVU_LQD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } else { - if (!_Fs_ && _Ft_ && !noWriteVF) { + if (!_Fs_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } @@ -827,7 +827,7 @@ microVUf(void) mVU_LQD() { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, 1); mVUallocVIb(gprT1, _Fs_); // ToDo: Backup to memory check. - if (_Ft_ && !noWriteVF) { + if (!noWriteVF) { mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -838,15 +838,15 @@ microVUf(void) mVU_LQD() { microVUf(void) mVU_LQI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } else { - if (!_Fs_ && _Ft_ && !noWriteVF) { + if (!_Fs_ && !noWriteVF) { mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } else { mVUallocVIa((_Ft_) ? gprT1 : gprT2, _Fs_); - if (_Ft_ && !noWriteVF) { + if (!noWriteVF) { MOV32RtoR(gprT2, gprT1); mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); @@ -864,7 +864,7 @@ microVUf(void) mVU_LQI() { microVUf(void) mVU_SQ() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 0); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -882,7 +882,7 @@ microVUf(void) mVU_SQ() { microVUf(void) mVU_SQD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -901,7 +901,7 @@ microVUf(void) mVU_SQD() { microVUf(void) mVU_SQI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } else { if (!_Ft_) { getReg7(xmmFs, _Fs_); @@ -1042,8 +1042,10 @@ microVUf(void) mVU_XGKICK() { if (!recPass) { mVUanalyzeXGkick(_Fs_, 4); } else { mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall + PUSH32R(gprR); // gprR = EDX is volatile so backup if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0); else CALLFunc((uptr)mVU_XGKICK1); + POP32R(gprR); // Restore } } @@ -1058,7 +1060,8 @@ microVUf(void) mVU_B() { microVUf(void) mVU_BAL() { microVU* mVU = mVUx; mVUbranch = 1; - if (recPass) { + if (!recPass) { analyzeVIreg2(_Ft_, 1); } + else { MOV32ItoR(gprT1, (xPC + (2 * 8)) & 0xffff); mVUallocVIb(gprT1, _Ft_); } @@ -1066,34 +1069,50 @@ microVUf(void) mVU_BAL() { microVUf(void) mVU_IBEQ() { microVU* mVU = mVUx; mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } + else {} } microVUf(void) mVU_IBGEZ() { microVU* mVU = mVUx; mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} } microVUf(void) mVU_IBGTZ() { microVU* mVU = mVUx; mVUbranch = 2; -} -microVUf(void) mVU_IBLTZ() { - microVU* mVU = mVUx; - mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} } microVUf(void) mVU_IBLEZ() { microVU* mVU = mVUx; mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} +} +microVUf(void) mVU_IBLTZ() { + microVU* mVU = mVUx; + mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} } microVUf(void) mVU_IBNE() { microVU* mVU = mVUx; mVUbranch = 2; + if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } + else {} } microVUf(void) mVU_JR() { microVU* mVU = mVUx; mVUbranch = 3; + if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else {} } microVUf(void) mVU_JALR() { microVU* mVU = mVUx; mVUbranch = 3; + if (!recPass) { mVUanalyzeBranch1(_Fs_); analyzeVIreg2(_Ft_, 1); } + else {} } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index dd52f1b489..5a1267a556 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -144,14 +144,16 @@ declareAllVariables #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUstall mVUallocInfo.maxStall -//#define mVUdivFlag mVUallocInfo.divFlag -//#define mVUdivFlagT mVUallocInfo.divFlagTimer #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp #define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2] #define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC #define xPC ((iPC / 2) * 8) +#define curI mVUcurProg.data[iPC] +#define setCode() { mVU->code = curI; } +#define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } +#define incCycles(x) { mVUincCycles(x); } #define _isNOP (1<<0) // Skip Lower Instruction #define _isBranch (1<<1) // Cur Instruction is a Branch @@ -174,6 +176,11 @@ declareAllVariables #define _fvsInstance (3<<18) #define _fvcInstance (3<<20) #define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) +#define _backupVI (1<<22) // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) +#define _memReadIs (1<<23) // Read Is (VI reg) from memory (used by branches) +#define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches) +#define _writesVI (1<<25) // Current Instruction writes to VI +#define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -196,16 +203,18 @@ declareAllVariables #define fvsInstance ((mVUinfo >> 18) & 3) #define fvcInstance ((mVUinfo >> 20) & 3) #define noWriteVF (mVUinfo & (1<<21)) - -//#define getFs (mVUinfo & (1<<13)) -//#define getFt (mVUinfo & (1<<14)) -//#define fpmInstance (((u8)((mVUinfo & (3<<10)) >> 10) - 1) & 0x3) +#define backupVI (mVUinfo & (1<<22)) +#define memReadIs (mVUinfo & (1<<23)) +#define memReadIt (mVUinfo & (1<<24)) +#define writesVI (mVUinfo & (1<<25)) +#define swapOps (mVUinfo & (1<<26)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) #ifdef mVUdebug -#define mVUdebugStuff1() { \ +#define mVUlog Console::Notice +#define mVUdebug1() { \ if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ @@ -213,5 +222,11 @@ declareAllVariables if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ } #else -#define mVUdebugStuff1() {} +#define mVUlog 0&& +#define mVUdebug1() {} #endif + +#define mVUcachCheck(x) { \ + uptr diff = mVU->ptr - mVU->cache; \ + if (diff > x) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ +} From 653286a6922497afac6839a3aeb8c50b66e12f83 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Fri, 10 Apr 2009 19:47:13 +0000 Subject: [PATCH 037/143] forgot to add microVU_Execution.inl in the last commit xD git-svn-id: http://pcsx2.googlecode.com/svn/trunk@949 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Execute.inl | 162 ++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 pcsx2/x86/microVU_Execute.inl diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl new file mode 100644 index 0000000000..61347d3898 --- /dev/null +++ b/pcsx2/x86/microVU_Execute.inl @@ -0,0 +1,162 @@ +/* Pcsx2 - Pc Ps2 Emulator +* Copyright (C) 2009 Pcsx2-Playground Team +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ +#pragma once +#ifdef PCSX2_MICROVU + +//------------------------------------------------------------------ +// Dispatcher Functions +//------------------------------------------------------------------ + +// Generates the code for entering recompiled blocks +microVUt(void) mVUdispatcherA() { + static u32 PCSX2_ALIGNED16(vuMXCSR); + microVU* mVU = mVUx; + x86SetPtr(mVU->ptr); + + // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. + if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); } + else { CALLFunc((uptr)mVUexecuteVU1); } + + // Backup cpu state + PUSH32R(EBX); + PUSH32R(EBP); + PUSH32R(ESI); + PUSH32R(EDI); + + // Load VU's MXCSR state + vuMXCSR = g_sseVUMXCSR; + SSE_LDMXCSR((uptr)&vuMXCSR); + + // Load Regs + MOV32MtoR(gprR, (uptr)&mVU->regs->VI[REG_R]); + MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG]); + MOV32MtoR(gprF1, (uptr)&mVU->regs->VI[REG_MAC_FLAG]); + SHL32ItoR(gprF0, 16); + AND32ItoR(gprF1, 0xffff); + OR32RtoR (gprF0, gprF1); + MOV32RtoR(gprF1, gprF0); + MOV32RtoR(gprF2, gprF0); + MOV32RtoR(gprF3, gprF0); + + for (int i = 0; i < 8; i++) { + MOVQMtoR(i, (uptr)&mVU->regs->VI[i+1]); + } + + SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC); + SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); + SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); + SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P]); + SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q]); + SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ + + // Jump to Recompiled Code Block + JMPR(EAX); + mVU->ptr = x86Ptr; +} + +// Generates the code to exit from recompiled blocks +microVUt(void) mVUdispatcherB() { + static u32 PCSX2_ALIGNED16(eeMXCSR); + microVU* mVU = mVUx; + x86SetPtr(mVU->ptr); + + // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. + if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); } + else { CALLFunc((uptr)mVUcleanUpVU1); } + + // Load EE's MXCSR state + eeMXCSR = g_sseMXCSR; + SSE_LDMXCSR((uptr)&eeMXCSR); + + // Save Regs + MOV32RtoR(gprT1, gprF0); // ToDo: Ensure Correct Flag instances + AND32ItoR(gprT1, 0xffff); + SHR32ItoR(gprF0, 16); + MOV32RtoM((uptr)&mVU->regs->VI[REG_R], gprR); + MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG], gprT1); + MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG], gprF0); + + for (int i = 0; i < 8; i++) { + MOVDMMXtoM((uptr)&mVU->regs->VI[i+1], i); + } + + SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC, xmmACC); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + + // Restore cpu state + POP32R(EDI); + POP32R(ESI); + POP32R(EBP); + POP32R(EBX); + + EMMS(); + RET(); + + mVU->ptr = x86Ptr; + mVUcachCheck(512); +} + +//------------------------------------------------------------------ +// Execution Functions +//------------------------------------------------------------------ + +// Executes for number of cycles +microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { +/* + Pseudocode: (ToDo: implement # of cycles) + 1) Search for existing program + 2) If program not found, goto 5 + 3) Search for recompiled block + 4) If recompiled block found, goto 6 + 5) Recompile as much blocks as possible + 6) Return start execution address of block +*/ + microVU* mVU = mVUx; + mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); + if ( mVUsearchProg(mVU) ) { // Found Program + //microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState); + //if (block) return block->x86ptrStart; // Found Block + } + // Recompile code + return NULL; +} + +//------------------------------------------------------------------ +// Cleanup Functions +//------------------------------------------------------------------ + +microVUt(void) mVUcleanUp() { + microVU* mVU = mVUx; + mVU->ptr = x86Ptr; + mVUcachCheck(1024); // ToDo: Implement Program Cache Limits +} + +//------------------------------------------------------------------ +// Caller Functions +//------------------------------------------------------------------ + +void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.cache)(startPC, cycles); } +void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.cache)(startPC, cycles); } +void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); } +void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); } +void mVUcleanUpVU0() { mVUcleanUp<0>(); } +void mVUcleanUpVU1() { mVUcleanUp<1>(); } + +#endif //PCSX2_MICROVU From cf995d07163e56551a9dc712eec3072b045adfa3 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Fri, 10 Apr 2009 19:55:05 +0000 Subject: [PATCH 038/143] GSdx: GoW2 fix, 16 bit drawing that caused the green overlay is skipped (character shadow) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@950 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSRendererHW.h | 29 +++++++++++++++++++++++++++++ plugins/GSdx/GSState.cpp | 18 +++++++++++++++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index e343852712..e7b46f7c6f 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -507,6 +507,35 @@ protected: #pragma endregion + #pragma region GoW2 z buffer clear + + if(m_game.title == CRC::GodOfWar2) + { + DWORD FBP = m_context->FRAME.Block(); + DWORD FBW = m_context->FRAME.FBW; + DWORD FPSM = m_context->FRAME.PSM; + + if(FBP == 0x00f00 && FPSM == PSM_PSMZ24) + { + GIFRegTEX0 TEX0; + + TEX0.TBP0 = FBP; + TEX0.TBW = FBW; + TEX0.PSM = FPSM; + + if(GSTextureCache::GSDepthStencil* ds = m_tc->GetDepthStencil(TEX0, m_width, m_height)) + { + m_dev.ClearDepth(ds->m_texture, 0); + } + + return false; + } + + return true; + } + + #pragma endregion + return true; } diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index ff17150ec4..de18465acd 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -2081,6 +2081,22 @@ bool GSC_GodOfWar(const GSFrameInfo& fi, int& skip) return true; } +bool GSC_GodOfWar2(const GSFrameInfo& fi, int& skip) +{ + if(skip == 0) + { + if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16) + { + skip = 30; // shadows + } + } + else + { + } + + return true; +} + bool GSC_GiTS(const GSFrameInfo& fi, int& skip) { if(skip == 0) @@ -2172,7 +2188,7 @@ bool GSState::IsBadFrame(int& skip) map[CRC::Tekken5] = GSC_Tekken5; map[CRC::IkkiTousen] = GSC_IkkiTousen; map[CRC::GodOfWar] = GSC_GodOfWar; - map[CRC::GodOfWar2] = GSC_GodOfWar; + map[CRC::GodOfWar2] = GSC_GodOfWar2; map[CRC::GiTS] = GSC_GiTS; map[CRC::Onimusha3] = GSC_Onimusha3; map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss; From 9c8a9712a9673f6d7156927f4d28dc8451626dec Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sat, 11 Apr 2009 08:54:48 +0000 Subject: [PATCH 039/143] GSdx: GoW2 fix #2, pal version this time git-svn-id: http://pcsx2.googlecode.com/svn/trunk@952 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSRendererHW.h | 2 +- plugins/GSdx/GSState.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index e7b46f7c6f..c5aa8e1f88 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -515,7 +515,7 @@ protected: DWORD FBW = m_context->FRAME.FBW; DWORD FPSM = m_context->FRAME.PSM; - if(FBP == 0x00f00 && FPSM == PSM_PSMZ24) + if((FBP == 0x00f00 || FBP == 0x00100) && FPSM == PSM_PSMZ24) // ntsc 0xf00, pal 0x100 { GIFRegTEX0 TEX0; diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index de18465acd..0664a1fff6 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -2085,10 +2085,15 @@ bool GSC_GodOfWar2(const GSFrameInfo& fi, int& skip) { if(skip == 0) { - if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16) + if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16 // ntsc + || fi.TME && fi.FBP == 0x02100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT16) // pal { skip = 30; // shadows } + else if(fi.TME && fi.FBP == 0x00500 && fi.FPSM == PSM_PSMCT24 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT32) // pal + { + // skip = 17; // only looks correct at native resolution + } } else { From 076e9e5386eb170c1d2bd9108302e8fbd3c0d26a Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 11 Apr 2009 09:25:47 +0000 Subject: [PATCH 040/143] more microVU stuff... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@953 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 17 +++-- pcsx2/x86/microVU.h | 13 +++- pcsx2/x86/microVU_Alloc.h | 3 +- pcsx2/x86/microVU_Analyze.inl | 75 ++++++++++++++++++++-- pcsx2/x86/microVU_Compile.inl | 86 ++++++++++++++++++++----- pcsx2/x86/microVU_Execute.inl | 18 +++--- pcsx2/x86/microVU_Lower.inl | 116 ++++++++++++++++++++-------------- pcsx2/x86/microVU_Misc.h | 10 ++- pcsx2/x86/microVU_Misc.inl | 2 +- 9 files changed, 252 insertions(+), 88 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index d2aef9b3bd..d54d8367ab 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -42,7 +42,6 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->index = vuIndex; mVU->microSize = (vuIndex ? 0x4000 : 0x1000); mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4; - mVU->cacheAddr = (vuIndex ? 0x1e840000 : 0x0e840000); mVU->cache = NULL; mVUreset(); @@ -62,7 +61,7 @@ microVUt(void) mVUreset() { } // Dynarec Cache - mVU->cache = SysMmapEx(mVU->cacheAddr, mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); + mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); mVU->ptr = mVU->cache; @@ -70,12 +69,21 @@ microVUt(void) mVUreset() { mVUdispatcherA(); mVUdispatcherB(); - // Other Variables + // Program Variables memset(&mVU->prog, 0, sizeof(mVU->prog)); mVU->prog.finished = 1; mVU->prog.cleared = 1; mVU->prog.cur = -1; mVU->prog.total = -1; + + // Setup Dynarec Cache Limits for Each Program + u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes + for (int i = 0; i <= mVU->prog.max; i++) { + mVU->prog.prog[i].x86start = z; + mVU->prog.prog[i].x86ptr = z; + z += (mVU->cacheSize / (mVU->prog.max + 1)); + mVU->prog.prog[i].x86end = z; + } } // Free Allocated Resources @@ -111,6 +119,7 @@ microVUt(void) mVUclear(u32 addr, u32 size) { // Clears program data (Sets used to 1 because calling this function implies the program will be used at least once) __forceinline void mVUclearProg(microVU* mVU, int progIndex) { mVU->prog.prog[progIndex].used = 1; + mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start; for (u32 i = 0; i < (mVU->progSize / 2); i++) { mVU->prog.prog[progIndex].block[i]->reset(); } @@ -149,7 +158,7 @@ __forceinline int mVUsearchProg(microVU* mVU) { for (int i = 0; i <= mVU->prog.total; i++) { //if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/) if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { - if (i == mVU->prog.cur) SysPrintf("microVU: Same micro program sent!\n"); + if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } mVU->prog.cur = i; mVU->prog.cleared = 0; mVU->prog.prog[i].used++; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 41412f70bf..81294fbe28 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -92,7 +92,10 @@ public: template struct microProgram { u32 data[progSize/4]; - u32 used; // Number of times its been used + u32 used; // Number of times its been used + u8* x86ptr; // Pointer to program's recompilation code + u8* x86start; // Start of program's rec-cache + u8* x86end; // Limit of program's rec-cache microBlockManager* block[progSize/8]; microAllocInfo allocInfo; }; @@ -112,20 +115,24 @@ struct microProgManager { struct microVU { u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size - u32 progSize; // VU Micro Program Size (microSize/8) - u32 cacheAddr; // VU Cache Start Address + u32 progSize; // VU Micro Program Size (microSize/4) static const u32 cacheSize = 0x500000; // VU Cache Size microProgManager<0x4000> prog; // Micro Program Data VURegs* regs; // VU Regs Struct u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) + u8* startFunct; // Ptr Function to the Start code for recompiled programs + u8* exitFunct; // Ptr Function to the Exit code for recompiled programs u8* ptr; // Pointer to next place to write recompiled code to u32 code; // Contains the current Instruction u32 iReg; // iReg (only used in recompilation, not execution) u32 clipFlag[4]; // 4 instances of clip flag (used in execution) u32 divFlag; // 1 instance of I/D flags u32 VIbackup[2]; // Holds a backup of a VI reg if modified before a branch + u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) + u32 p; // Holds current P instance index + u32 q; // Holds current Q instance index }; // microVU rec structs diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index e277f12723..cb6bbbbb5f 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -52,9 +52,10 @@ template struct microAllocInfo { microRegInfo regs; // Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) - u8 branch; // 0 = No Branch, 1 = Branch, 2 = Conditional Branch, 3 = Jump (JALR/JR) + u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block + u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block u32 info[pSize/8]; // Info for Instructions in current block diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 6768153aee..224ebbbd09 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -102,6 +102,49 @@ microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { analyzeReg4(Ft); } +//------------------------------------------------------------------ +// IALU - IALU Opcodes +//------------------------------------------------------------------ + +#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } +#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } } + +microVUt(void) mVUanalyzeIALU1(int Id, int Is, int It) { + microVU* mVU = mVUx; + if (!Id) { mVUinfo |= _isNOP; } + analyzeVIreg1(Is); + analyzeVIreg1(It); + analyzeVIreg2(Id, 1); +} + +microVUt(void) mVUanalyzeIALU2(int Is, int It) { + microVU* mVU = mVUx; + if (!It) { mVUinfo |= _isNOP; } + analyzeVIreg1(Is); + analyzeVIreg2(It, 1); +} + +//------------------------------------------------------------------ +// MR32 - MR32 Opcode +//------------------------------------------------------------------ + +// Flips xyzw stalls to yzwx +#define analyzeReg6(reg) { \ + if (reg) { \ + if (_X) { mVUstall = aMax(mVUstall, aReg(reg).y); } \ + if (_Y) { mVUstall = aMax(mVUstall, aReg(reg).z); } \ + if (_Z) { mVUstall = aMax(mVUstall, aReg(reg).w); } \ + if (_W) { mVUstall = aMax(mVUstall, aReg(reg).x); } \ + } \ +} + +microVUt(void) mVUanalyzeMR32(int Fs, int Ft) { + microVU* mVU = mVUx; + if (!Ft) { mVUinfo |= _isNOP; } + analyzeReg6(Fs); + analyzeReg2(Ft); +} + //------------------------------------------------------------------ // FDIV - DIV/SQRT/RSQRT Opcodes //------------------------------------------------------------------ @@ -144,11 +187,18 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { } //------------------------------------------------------------------ -// LQx - LQ/LQD/LQI Opcodes +// MFP - MFP Opcode //------------------------------------------------------------------ -#define analyzeVIreg1(reg) { if (reg) { mVUstall = aMax(mVUstall, mVUregs.VI[reg]); } } -#define analyzeVIreg2(reg, aCycles) { if (reg) { mVUregsTemp.VIreg = reg; mVUregsTemp.VI = aCycles; mVUinfo |= _writesVI; mVU->VIbackup[0] = reg; } } +microVUt(void) mVUanalyzeMFP(int Ft) { + microVU* mVU = mVUx; // ToDo: Needs special info for P reg? + if (!Ft) { mVUinfo |= _isNOP; } + analyzeReg2(Ft); +} + +//------------------------------------------------------------------ +// LQx - LQ/LQD/LQI Opcodes +//------------------------------------------------------------------ microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { microVU* mVU = mVUx; @@ -183,7 +233,7 @@ microVUt(void) mVUanalyzeR1(int Fs, int Fsf) { microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVU* mVU = mVUx; - if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); return; } + if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); } analyzeReg2(Ft); analyzeRreg(); } @@ -194,11 +244,22 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVUt(void) mVUanalyzeSflag(int It) { microVU* mVU = mVUx; - if (!It) { mVUinfo |= _isNOP; return; } - mVUinfo |= _isSflag; + if (!It) { mVUinfo |= _isNOP; } + else { mVUinfo |= _isSflag | _swapOps; } // ToDo: set s flag at right time analyzeVIreg2(It, 1); } +microVUt(void) mVUanalyzeFSSET() { + microVU* mVU = mVUx; + int i, curPC = iPC; + for (i = mVUcount; i > 0; i--) { + incPC2(-2); + if (isSflag) break; + mVUinfo &= ~_doStatus; + } + iPC = curPC; +} + //------------------------------------------------------------------ // XGkick //------------------------------------------------------------------ @@ -218,7 +279,7 @@ microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { //------------------------------------------------------------------ #define analyzeBranchVI(reg, infoVal) { \ - if (reg && (mVUcycles > 1)) { /* Ensures branch is not first opcode in block */ \ + if (reg && (mVUcount > 0)) { /* Ensures branch is not first opcode in block */ \ incPC(-2); \ if (writesVI && (reg == mVU->VIbackup[0])) { /* If prev Op modified VI reg */ \ mVUinfo |= _backupVI; \ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index c9a8217641..85c4938aa7 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -29,10 +29,25 @@ } \ } +#define branchCase(Xcmp) \ + CMP16ItoM((uptr)mVU->branch, 0); \ + ajmp = Xcmp((uptr)0); \ + break + +#define branchCase2() { \ + incPC(-2); \ + MOV32ItoR(gprT1, (xPC + (2 * 8)) & ((vuIndex) ? 0x3fff:0xfff)); \ + mVUallocVIb(gprT1, _Ft_); \ + incPC(+2); \ +} + #define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } +#define incP() { mVU->p = (mVU->p+1) & 1; } +#define incQ() { mVU->q = (mVU->q+1) & 1; } microVUt(void) mVUincCycles(int x) { + microVU* mVU = mVUx; mVUcycles += x; for (int z = 31; z > 0; z--) { calcCycles(mVUregs.VF[z].x, x); @@ -45,9 +60,12 @@ microVUt(void) mVUincCycles(int x) { } if (mVUregs.q) { calcCycles(mVUregs.q, x); - if (!mVUregs.q) {} // Do Status Flag Merging Stuff? + if (!mVUregs.q) { incQ(); } // Do Status Flag Merging Stuff? + } + if (mVUregs.p) { + calcCycles(mVUregs.p, x); + if (!mVUregs.p) { incP(); } } - calcCycles(mVUregs.p, x); calcCycles(mVUregs.r, x); calcCycles(mVUregs.xgkick, x); } @@ -57,8 +75,7 @@ microVUt(void) mVUsetCycles() { incCycles(mVUstall); if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP - //mVUregsTemp.VF[1].reg = mVUregsTemp.VF[0]; // Just use cycles from upper Op (incorrect?) - mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector (correct?) + mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y); mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z); mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w); @@ -72,20 +89,28 @@ microVUt(void) mVUsetCycles() { mVUregs.xgkick = mVUregsTemp.xgkick; } -microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { +//------------------------------------------------------------------ +// Recompiler +//------------------------------------------------------------------ + +microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { microVU* mVU = mVUx; microBlock block; + u8* thisPtr = mVUcurProg.x86Ptr; iPC = startPC / 4; // Searches for Existing Compiled Block (if found, then returns; else, compile) - microBlock* pblock = mVUblock[iPC]->search(pipelineState, pState); - if (block) { x86SetPtr(pblock->x86ptrEnd); return; } + microBlock* pblock = mVUblock[iPC/2]->search(pipelineState, pState); + if (block) { return pblock->x86ptrStart; } // First Pass setCode(); mVUbranch = 0; mVUstartPC = iPC; + mVUcount = 0; mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage + mVU->p = 0; // All blocks start at p index #0 + mVU->q = 0; // All blocks start at q index #0 for (int branch = 0;; ) { startLoop(); mVUopU(); @@ -94,11 +119,15 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, if (curI & _Ibit_) { incPC(1); mVUinfo |= _isNOP; } else { incPC(1); mVUopL(); } mVUsetCycles(); + if (mVU->p) { mVUinfo |= _readP; } + if (mVU->q) { mVUinfo |= _readQ; } + else { mVUinfo |= _writeQ; } if (branch >= 2) { mVUinfo |= _isEOB | ((branch == 3) ? _isBdelay : 0); if (mVUbranch) { Console::Error("microVU Warning: Branch in E-bit/Branch delay slot!"); mVUinfo |= _isNOP; } break; } else if (branch == 1) { branch = 2; } if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } incPC(1); incCycles(1); + mVUcount++; } // Second Pass @@ -109,24 +138,51 @@ microVUx(void) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, // ToDo: status/mac flag stuff? // if (isEOB) { x = 0; } - else if (isBranch) { mVUopU(); incPC(2); } + //if (isBranch2) { mVUopU(); incPC(2); } if (isNop) { mVUopU(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } else if (!swapOps) { mVUopU(); incPC(1); mVUopL(); } else { incPC(1); mVUopL(); incPC(-1); mVUopU(); incPC(1); } if (!isBdelay) { incPC(1); } - else { - incPC(-2); // Go back to Branch Opcode - mVUopL(); // Run Branch Opcode + else { + u32* ajmp; switch (mVUbranch) { - case 1: break; - case 2: break; - case 3: break; + case 3: branchCase(JZ32); // IBEQ + case 4: branchCase(JGE32); // IBGEZ + case 5: branchCase(JG32); // IBGTZ + case 6: branchCase(JLE32); // IBLEQ + case 7: branchCase(JL32); // IBLTZ + case 8: branchCase(JNZ32); // IBNEQ + case 2: branchCase2(); // BAL + case 1: + // search for block + ajmp = JMP32((uptr)0); + + break; // B/BAL + case 9: branchCase2(); // JALR + case 10: break; // JR/JALR + //mVUcurProg.x86Ptr } - break; + return thisPtr; } } + // Do E-bit end stuff here + + incCycles(55); // Ensures Valid P/Q instances + mVUcycles -= 55; + if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + + MOV32ItoM((uptr)&mVU->p, mVU->p); + MOV32ItoM((uptr)&mVU->q, mVU->q); + AND32ItoM((uptr)µVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag + AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Not sure what this does but zerorecs do it... + MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC); + JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); + return thisPtr; } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 61347d3898..a1a875e39a 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -27,6 +27,7 @@ microVUt(void) mVUdispatcherA() { static u32 PCSX2_ALIGNED16(vuMXCSR); microVU* mVU = mVUx; x86SetPtr(mVU->ptr); + mVU->startFunct = mVU->ptr; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); } @@ -74,6 +75,7 @@ microVUt(void) mVUdispatcherB() { static u32 PCSX2_ALIGNED16(eeMXCSR); microVU* mVU = mVUx; x86SetPtr(mVU->ptr); + mVU->exitFunct = mVU->ptr; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); } @@ -96,9 +98,9 @@ microVUt(void) mVUdispatcherB() { } SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC, xmmACC); - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances - SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances + //SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP + //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); // Restore cpu state POP32R(EDI); @@ -110,7 +112,7 @@ microVUt(void) mVUdispatcherB() { RET(); mVU->ptr = x86Ptr; - mVUcachCheck(512); + mVUcachCheck(mVU->cache, 512); } //------------------------------------------------------------------ @@ -144,16 +146,16 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVUt(void) mVUcleanUp() { microVU* mVU = mVUx; - mVU->ptr = x86Ptr; - mVUcachCheck(1024); // ToDo: Implement Program Cache Limits + mVU->ptr = mVUcurProg.x86ptr; + mVUcachCheck(mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); } //------------------------------------------------------------------ // Caller Functions //------------------------------------------------------------------ -void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.cache)(startPC, cycles); } -void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.cache)(startPC, cycles); } +void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.startFunct)(startPC, cycles); } +void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.startFunct)(startPC, cycles); } void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); } void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); } void mVUcleanUpVU0() { mVUcleanUp<0>(); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 18e73ebde7..a247f00a6c 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -545,14 +545,12 @@ microVUf(void) mVU_FSOR() { microVUf(void) mVU_FSSET() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeFSSET(); } else { int flagReg; getFlagReg(flagReg, fsInstance); - MOV16ItoR(gprT1, (_Imm12_ & 0xfc0)); - //if (_Imm12_ & 0xc00) { mVUdivFlag = _Imm12_ >> 9; } - //else { mVUdivFlag = 1; } - //mVUdivFlagT = 4; + AND32ItoR(flagReg, 0x03f); + OR32ItoR(flagReg, (_Imm12_ & 0xfc0)); } } @@ -562,7 +560,7 @@ microVUf(void) mVU_FSSET() { microVUf(void) mVU_IADD() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -576,7 +574,7 @@ microVUf(void) mVU_IADD() { microVUf(void) mVU_IADDI() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm5_); @@ -586,7 +584,7 @@ microVUf(void) mVU_IADDI() { microVUf(void) mVU_IADDIU() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm12_); @@ -596,7 +594,7 @@ microVUf(void) mVU_IADDIU() { microVUf(void) mVU_IAND() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -609,7 +607,7 @@ microVUf(void) mVU_IAND() { microVUf(void) mVU_IOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { @@ -622,7 +620,7 @@ microVUf(void) mVU_IOR() { microVUf(void) mVU_ISUB() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { if (_Ft_ != _Fs_) { mVUallocVIa(gprT1, _Fs_); @@ -639,7 +637,7 @@ microVUf(void) mVU_ISUB() { microVUf(void) mVU_ISUBIU() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, _Imm12_); @@ -653,7 +651,7 @@ microVUf(void) mVU_ISUBIU() { microVUf(void) mVU_MFIR() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeReg2(_Ft_); } else { mVUallocVIa(gprT1, _Fs_); MOVSX32R16toR(gprT1, gprT1); @@ -665,7 +663,7 @@ microVUf(void) mVU_MFIR() { microVUf(void) mVU_MFP() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeMFP(_Ft_); } else { getPreg(xmmFt); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -674,7 +672,7 @@ microVUf(void) mVU_MFP() { microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_ || (_Ft_ == _Fs_)) nop();*/ } + if (!recPass) { if (!_Ft_ || (_Ft_ == _Fs_)) { mVUinfo |= _isNOP; } analyzeReg1(_Fs_); analyzeReg2(_Ft_); } else { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -683,7 +681,7 @@ microVUf(void) mVU_MOVE() { microVUf(void) mVU_MR32() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { mVUanalyzeMR32(_Fs_, _Ft_); } else { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15); if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); } @@ -693,7 +691,7 @@ microVUf(void) mVU_MR32() { microVUf(void) mVU_MTIR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); mVUallocVIb(gprT1, _Ft_); @@ -706,7 +704,7 @@ microVUf(void) mVU_MTIR() { microVUf(void) mVU_ILW() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } else { if (!_Fs_) { MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS ); @@ -725,7 +723,7 @@ microVUf(void) mVU_ILW() { microVUf(void) mVU_ILWR() { microVU* mVU = mVUx; - if (!recPass) { /*If (!_Ft_) nop();*/ } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } else { if (!_Fs_) { MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS); @@ -747,7 +745,7 @@ microVUf(void) mVU_ILWR() { microVUf(void) mVU_ISW() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } else { if (!_Fs_) { int imm = getVUmem(_Imm11_); @@ -772,7 +770,7 @@ microVUf(void) mVU_ISW() { microVUf(void) mVU_ISWR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } else { if (!_Fs_) { mVUallocVIa(gprT1, _Ft_); @@ -1006,7 +1004,7 @@ microVUf(void) mVU_WAITQ() { microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; return; } analyzeVIreg2(_Ft_, 1); } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->top); mVUallocVIb(gprT1, _Ft_); @@ -1015,7 +1013,7 @@ microVUf(void) mVU_XTOP() { microVUf(void) mVU_XITOP() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; return; } analyzeVIreg2(_Ft_, 1); } + if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } else { MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->itop ); mVUallocVIb(gprT1, _Ft_); @@ -1055,64 +1053,90 @@ microVUf(void) mVU_XGKICK() { microVUf(void) mVU_B() { microVU* mVU = mVUx; - mVUbranch = 1; + mVUbranch = 1; + if (!recPass) { /*mVUinfo |= _isBranch2;*/ } } microVUf(void) mVU_BAL() { microVU* mVU = mVUx; - mVUbranch = 1; - if (!recPass) { analyzeVIreg2(_Ft_, 1); } - else { - MOV32ItoR(gprT1, (xPC + (2 * 8)) & 0xffff); - mVUallocVIb(gprT1, _Ft_); - } + mVUbranch = 2; + if (!recPass) { /*mVUinfo |= _isBranch2;*/ analyzeVIreg2(_Ft_, 1); } + else {} } microVUf(void) mVU_IBEQ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 3; if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBGEZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 4; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + //SHR32ItoR(gprT1, 15); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBGTZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 5; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBLEZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 6; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBLTZ() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 7; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + //SHR32ItoR(gprT1, 15); + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_IBNE() { microVU* mVU = mVUx; - mVUbranch = 2; + mVUbranch = 8; if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } - else {} + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } + MOV32RtoM((uptr)mVU->branch, gprT1); + } } microVUf(void) mVU_JR() { microVU* mVU = mVUx; - mVUbranch = 3; + mVUbranch = 9; if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else {} } microVUf(void) mVU_JALR() { microVU* mVU = mVUx; - mVUbranch = 3; + mVUbranch = 10; if (!recPass) { mVUanalyzeBranch1(_Fs_); analyzeVIreg2(_Ft_, 1); } - else {} } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 5a1267a556..aaf45425d4 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -143,6 +143,7 @@ declareAllVariables #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles +#define mVUcount mVUallocInfo.count #define mVUstall mVUallocInfo.maxStall #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp @@ -153,6 +154,7 @@ declareAllVariables #define curI mVUcurProg.data[iPC] #define setCode() { mVU->code = curI; } #define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } +#define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } #define incCycles(x) { mVUincCycles(x); } #define _isNOP (1<<0) // Skip Lower Instruction @@ -181,6 +183,7 @@ declareAllVariables #define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches) #define _writesVI (1<<25) // Current Instruction writes to VI #define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction +//#define _isBranch2 (1<<27) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -208,6 +211,7 @@ declareAllVariables #define memReadIt (mVUinfo & (1<<24)) #define writesVI (mVUinfo & (1<<25)) #define swapOps (mVUinfo & (1<<26)) +//#define isBranch2 (mVUinfo & (1<<27)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) @@ -226,7 +230,7 @@ declareAllVariables #define mVUdebug1() {} #endif -#define mVUcachCheck(x) { \ - uptr diff = mVU->ptr - mVU->cache; \ - if (diff > x) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ +#define mVUcachCheck(start, limit) { \ + uptr diff = mVU->ptr - start; \ + if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ } diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 208a3852c7..4154048046 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -251,7 +251,7 @@ microVUt(void) mVUaddrFix(int gprReg) { u8 *jmpA, *jmpB; CMP32ItoR(EAX, 0x400); jmpA = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs - AND32ItoR(EAX, 0x43f); + AND32ItoR(EAX, 0x43f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! jmpB = JMP8(0); x86SetJ8(jmpA); AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around From a388d2969c172b9266309e665d800f8148969ba6 Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 11 Apr 2009 13:23:05 +0000 Subject: [PATCH 041/143] Fixed bug from Issue 144. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@954 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 4d2ec15a8c..e6155bc4ea 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -607,7 +607,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available { // v4-32 - if ((vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0)) + if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0)) { vifRegs->num -= size >> 4; memcpy_fast((u8*)dest, cdata, size); From 4df3f80d30acbbab927c87ff7a31a7275ae28420 Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 11 Apr 2009 17:45:14 +0000 Subject: [PATCH 042/143] Optimized and split up the unpack call a bit so less checks are being run, should bring an overall speed increase. Also got rid of some duplicate pointer rubbish which was all over the place. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@955 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Vif.cpp | 89 +++--- pcsx2/Vif.h | 38 +-- pcsx2/VifDma.cpp | 463 +++++++++++++++-------------- pcsx2/x86/aVif.S | 20 +- pcsx2/x86/ix86-32/aVif_proc-32.asm | 20 +- 5 files changed, 331 insertions(+), 299 deletions(-) diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index f20103e72d..cb81e5f6b2 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -25,10 +25,10 @@ #include "Vif.h" #include "VifDma.h" -VIFregisters *_vifRegs; -u32* _vifRow = NULL, *_vifCol = NULL; -u32* _vifMaskRegs = NULL; -vifStruct *_vif; +VIFregisters *vifRegs; +u32* vifRow = NULL, *vifCol = NULL; +u32* vifMaskRegs = NULL; +vifStruct *vif; PCSX2_ALIGNED16(u32 g_vifRow0[4]); PCSX2_ALIGNED16(u32 g_vifCol0[4]); @@ -57,24 +57,24 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) int n; u32 vifRowReg = getVifRowRegs(offnum); - if (_vifRegs->code & 0x10000000) + if (vifRegs->code & 0x10000000) { - switch (_vif->cl) + switch (vif->cl) { case 0: if (offnum == OFFSET_X) - n = (_vifRegs->mask) & 0x3; + n = (vifRegs->mask) & 0x3; else - n = (_vifRegs->mask >> (offnum * 2)) & 0x3; + n = (vifRegs->mask >> (offnum * 2)) & 0x3; break; case 1: - n = (_vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; case 2: - n = (_vifRegs->mask >> (16 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; default: - n = (_vifRegs->mask >> (24 + (offnum * 2))) & 0x3; + n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; } } @@ -83,11 +83,11 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) switch (n) { case 0: - if ((_vif->cmd & 0x6F) == 0x6f) + if ((vif->cmd & 0x6F) == 0x6f) { dest = data; } - else switch (_vifRegs->mode) + else switch (vifRegs->mode) { case 1: dest = data + vifRowReg; @@ -105,13 +105,12 @@ static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) dest = vifRowReg; break; case 2: - dest = getVifColRegs((_vif->cl > 2) ? 3 : _vif->cl); + dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl); break; case 3: - //Masked so don't do anything break; } -// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,_vifRegs->mode,_vifRegs->r0,data); +// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data); } template @@ -127,78 +126,78 @@ void __fastcall UNPACK_S(u32 *dest, T *data, int size) template void __fastcall UNPACK_V2(u32 *dest, T *data, int size) { - if (_vifRegs->offset == OFFSET_X) + if (vifRegs->offset == OFFSET_X) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Y; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; size--; } } - if (_vifRegs->offset == OFFSET_Y) + if (vifRegs->offset == OFFSET_Y) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data); - _vifRegs->offset = OFFSET_Z; + writeXYZW(vifRegs->offset, *dest++, *data); + vifRegs->offset = OFFSET_Z; size--; } } - if (_vifRegs->offset == OFFSET_Z) + if (vifRegs->offset == OFFSET_Z) { - writeXYZW(_vifRegs->offset, *dest++, *dest-2); - _vifRegs->offset = OFFSET_W; + writeXYZW(vifRegs->offset, *dest++, *dest-2); + vifRegs->offset = OFFSET_W; } - if (_vifRegs->offset == OFFSET_W) + if (vifRegs->offset == OFFSET_W) { - writeXYZW(_vifRegs->offset, *dest, *data); - _vifRegs->offset = OFFSET_X; + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; } } template void __fastcall UNPACK_V3(u32 *dest, T *data, int size) { - if(_vifRegs->offset == OFFSET_X) + if(vifRegs->offset == OFFSET_X) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Y; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; size--; } } - if(_vifRegs->offset == OFFSET_Y) + if(vifRegs->offset == OFFSET_Y) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_Z; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Z; size--; } } - if(_vifRegs->offset == OFFSET_Z) + if(vifRegs->offset == OFFSET_Z) { if (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset = OFFSET_W; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_W; size--; } } - if(_vifRegs->offset == OFFSET_W) + if(vifRegs->offset == OFFSET_W) { //V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!) //Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate - writeXYZW(_vifRegs->offset, *dest, *data); - _vifRegs->offset = OFFSET_X; + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; } } @@ -207,12 +206,12 @@ void __fastcall UNPACK_V4(u32 *dest, T *data , int size) { while (size > 0) { - writeXYZW(_vifRegs->offset, *dest++, *data++); - _vifRegs->offset++; + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset++; size--; } - if (_vifRegs->offset > OFFSET_W) _vifRegs->offset = OFFSET_X; + if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X; } void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) @@ -391,7 +390,7 @@ static __forceinline int mfifoVIF1rbTransfer() return ret; } -static __forceinline int mfifoVIF1chain() +static __forceinline int mfifo_VIF1chain() { int ret; @@ -531,7 +530,7 @@ void vifMFIFOInterrupt() { g_vifCycles = 0; - if (vif1.inprogress == 1) mfifoVIF1chain(); + if (vif1.inprogress == 1) mfifo_VIF1chain(); if (vif1.irq && vif1.tag.size == 0) { diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index fa318d7618..e01cb32bd8 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -81,9 +81,9 @@ struct VIFregisters { extern "C" { // these use cdecl for Asm code references. - extern VIFregisters *_vifRegs; - extern u32* _vifMaskRegs; - extern u32* _vifRow; + extern VIFregisters *vifRegs; + extern u32* vifMaskRegs; + extern u32* vifRow; extern u32* _vifCol; } @@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data) switch (reg) { case 0: - _vifRegs->r0 = data; + vifRegs->r0 = data; break; case 1: - _vifRegs->r1 = data; + vifRegs->r1 = data; break; case 2: - _vifRegs->r2 = data; + vifRegs->r2 = data; break; case 3: - _vifRegs->r3 = data; + vifRegs->r3 = data; break; jNO_DEFAULT; } @@ -113,16 +113,16 @@ static __forceinline u32 getVifRowRegs(u32 reg) switch (reg) { case 0: - return _vifRegs->r0; + return vifRegs->r0; break; case 1: - return _vifRegs->r1; + return vifRegs->r1; break; case 2: - return _vifRegs->r2; + return vifRegs->r2; break; case 3: - return _vifRegs->r3; + return vifRegs->r3; break; jNO_DEFAULT; } @@ -133,16 +133,16 @@ static __forceinline u32 setVifColRegs(u32 reg, u32 data) switch (reg) { case 0: - _vifRegs->c0 = data; + vifRegs->c0 = data; break; case 1: - _vifRegs->c1 = data; + vifRegs->c1 = data; break; case 2: - _vifRegs->c2 = data; + vifRegs->c2 = data; break; case 3: - _vifRegs->c3 = data; + vifRegs->c3 = data; break; jNO_DEFAULT; } @@ -154,16 +154,16 @@ static __forceinline u32 getVifColRegs(u32 reg) switch (reg) { case 0: - return _vifRegs->c0; + return vifRegs->c0; break; case 1: - return _vifRegs->c1; + return vifRegs->c1; break; case 2: - return _vifRegs->c2; + return vifRegs->c2; break; case 3: - return _vifRegs->c3; + return vifRegs->c3; break; jNO_DEFAULT; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index e6155bc4ea..8efee3181d 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -37,10 +37,10 @@ using namespace std; // for min / max extern "C" { // Need cdecl on these for ASM references. - extern VIFregisters *_vifRegs; - extern u32* _vifMaskRegs; - extern u32* _vifRow; - extern u32* _vifCol; + extern VIFregisters *vifRegs; + extern u32* vifMaskRegs; + extern u32* vifRow; + extern u32* vifCol; } PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]); @@ -48,7 +48,7 @@ PCSX2_ALIGNED16_EXTERN(u32 g_vifCol0[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifRow1[4]); PCSX2_ALIGNED16_EXTERN(u32 g_vifCol1[4]); -extern vifStruct *_vif; +extern vifStruct *vif; vifStruct vif0, vif1; @@ -254,21 +254,9 @@ __forceinline static int _limit(int a, int max) static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int VIFdmanum) { const VIFUnpackFuncTable *unpack; - vifStruct *vif; - VIFregisters *vifRegs; + unpack = &VIFfuncTable[ unpackType ]; - if (VIFdmanum == 0) - { - vif = &vif0; - vifRegs = vif0Regs; - } - else - { - vif = &vif1; - vifRegs = vif1Regs; - } - switch (unpackType) { case 0x0: @@ -338,85 +326,49 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int } //This is sorted out later - vif->tag.addr &= ~0xf; + if((vif->tag.addr & 0xf) != (vifRegs->offset * 4)) + { + VIFUNPACK_LOG("addr aligned to %x", vif->tag.addr); + vif->tag.addr = (vif->tag.addr & ~0xf) + (vifRegs->offset * 4); + } + } -static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; UNPACKFUNCTYPE func; const VIFUnpackFuncTable *ft; - vifStruct *vif; - VIFregisters *vifRegs; VURegs * VU; u8 *cdata = (u8*)data; + #ifdef _DEBUG u32 memsize = VIFdmanum ? 0x4000 : 0x1000; #endif - _mm_prefetch((char*)data, _MM_HINT_NTA); - if (VIFdmanum == 0) { VU = &VU0; - vif = &vif0; - vifRegs = vif0Regs; assert(v->addr < memsize); } else { - VU = &VU1; - vif = &vif1; - vifRegs = vif1Regs; assert(v->addr < memsize); - - if (vu1MicroIsSkipping()) - { - // don't process since the frame is dummy - vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16); - return; - } } dest = (u32*)(VU->Mem + v->addr); - VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); - - VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); -#ifdef _DEBUG - if (v->size != size) - { - VIF_LOG("*PCSX2*: warning v->size != size"); - } - - if ((v->addr + size*4) > memsize) - { - Console::Notice("*PCSX2*: fixme unpack overflow"); - Console::WriteLn("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x", - params VIFdmanum, v->cmd & 0xf, v->size, size, v->addr); - } -#endif // The unpack type unpackType = v->cmd & 0xf; - - if (size == 0) - { - VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask); - } - - _mm_prefetch((char*)data + 128, _MM_HINT_NTA); - _vifRegs = (VIFregisters*)vifRegs; - _vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks; - _vif = vif; - _vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; ft = &VIFfuncTable[ unpackType ]; - func = _vif->usn ? ft->funcU : ft->funcS; + func = vif->usn ? ft->funcU : ft->funcS; size <<= 2; @@ -424,23 +376,12 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma memsize = size; #endif - if (v->size != (size >> 2)) - ProcessMemSkip(size, unpackType, VIFdmanum); - - - if(vif->tag.addr > (u32)(VIFdmanum ? 0x4000 : 0x1000)) - { - //Sanity Check (memory overflow) - DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); - - } - - if (_vifRegs->offset > 0) - { - int destinc, unpacksize; + if(vif1Regs->offset != 0) + { + int unpacksize; //This is just to make sure the alignment isnt loopy on a split packet - if(_vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) + if(vifRegs->offset != ((vif->tag.addr & 0xf) >> 2)) { DevCon::Error("Warning: Unpack alignment error"); } @@ -449,48 +390,50 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if(((size / ft->dsize) + vifRegs->offset) < (u32)ft->qsize) VIFUNPACK_LOG("Warning! Size needed to align %x size chunks available %x offset %x", ft->qsize - ((size / ft->dsize) + vifRegs->offset), vifRegs->offset); - // SSE doesn't handle such small data - if (vifRegs->offset < (u32)ft->qsize) + if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) { - if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset)) - { - Console::WriteLn("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset)); - } + DevCon::Error("Wasn't enough left size/dsize = %x left to write %x", params(size / ft->dsize), (ft->qsize - vifRegs->offset)); + } unpacksize = min(((u32)size / (u32)ft->dsize), ((u32)ft->qsize - vifRegs->offset)); - } - else - { - unpacksize = 0; - Console::WriteLn("Unpack align offset = 0"); - } - VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); - destinc = (4 - ft->qsize) + unpacksize; + + VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset); + func(dest, (u32*)cdata, unpacksize); size -= unpacksize * ft->dsize; - cdata += unpacksize * ft->dsize; - + vifRegs->num--; ++vif->cl; + if (vif->cl == vifRegs->cycle.wl) { if (vifRegs->cycle.cl != vifRegs->cycle.wl) - dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; + { + vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4; + //dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + destinc; + } else - dest += destinc; + { + vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4; + //dest += destinc; + } vif->cl = 0; + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); + return size >> 2; + } else { - dest += destinc; + vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4; + dest += (4 - ft->qsize) + unpacksize; + cdata += unpacksize * ft->dsize; + VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); } - - VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr); - } + - if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write + if (vif->cl != 0) //Check alignment for SSE unpacks { #ifdef _DEBUG @@ -499,7 +442,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma int incdest; - if (vif->cl != 0) + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { // continuation from last stream @@ -516,22 +459,109 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (vif->cl == vifRegs->cycle.wl) { dest += incdest; + vif->tag.addr += incdest * 4; vif->cl = 0; break; } dest += 4; + vif->tag.addr += 16; } - // have to update - _vifRow[0] = _vifRegs->r0; - _vifRow[1] = _vifRegs->r1; - _vifRow[2] = _vifRegs->r2; - _vifRow[3] = _vifRegs->r3; + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow = VIFdmanum ? g_vifRow1 : g_vifRow0; + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } } + } + return size>>2; +} - if ((size >= ft->gsize) && !(v->addr&0xf)) + +static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +{ + u32 *dest; + u32 unpackType; + UNPACKFUNCTYPE func; + const VIFUnpackFuncTable *ft; + VURegs * VU; + u8 *cdata = (u8*)data; + +#ifdef _DEBUG + u32 memsize = VIFdmanum ? 0x4000 : 0x1000; +#endif + + _mm_prefetch((char*)data, _MM_HINT_NTA); + + if (VIFdmanum == 0) + { + VU = &VU0; + //vifRegs = vif0Regs; + assert(v->addr < memsize); + } + else + { + + VU = &VU1; + //vifRegs = vif1Regs; + assert(v->addr < memsize); + + if (vu1MicroIsSkipping()) + { + // don't process since the frame is dummy + vif->tag.addr += (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16); + return; + } + } + + dest = (u32*)(VU->Mem + v->addr); + + VIF_LOG("VIF%d UNPACK: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x", + VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num); + + VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset); + + // The unpack type + unpackType = v->cmd & 0xf; + + _mm_prefetch((char*)data + 128, _MM_HINT_NTA); + + ft = &VIFfuncTable[ unpackType ]; + func = vif->usn ? ft->funcU : ft->funcS; + + size <<= 2; + +#ifdef _DEBUG + memsize = size; +#endif + + +#ifdef VIFUNPACKDEBUG + + if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + //Sanity Check (memory overflow) + DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); + + } +#endif + + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write + { + +#ifdef _DEBUG + static int s_count = 0; +#endif + + + if (size >= ft->gsize) { const UNPACKPARTFUNCTYPESSE* pfn; int writemask; @@ -579,6 +609,16 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (oldcycle != -1) *(u32*)&vifRegs->cycle = oldcycle; + if(vifRegs->mode == 2) + { + //Update the reg rows for non SSE + vifRegs->r0 = vifRow[0]; + vifRegs->r1 = vifRow[1]; + vifRegs->r2 = vifRow[2]; + vifRegs->r3 = vifRow[3]; + } + + // if size is left over, update the src,dst pointers if (writemask > 0) { @@ -586,92 +626,38 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma cdata += left * ft->gsize; dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16); vifRegs->num -= left; - _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + size = writemask; + + if (size >= ft->dsize && vifRegs->num > 0) + { + //VIF_LOG("warning, end with size = %d", size); + + /* unpack one qword */ + vif->tag.addr += (size / ft->dsize) * 4; + func(dest, (u32*)cdata, size / ft->dsize); + size = 0; + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } + VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); + } } else { vifRegs->num -= size / ft->gsize; - if (vifRegs->num > 0) _vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize; + size = 0; } - size = writemask; - - _vifRegs->r0 = _vifRow[0]; - _vifRegs->r1 = _vifRow[1]; - _vifRegs->r2 = _vifRow[2]; - _vifRegs->r3 = _vifRow[3]; - } - else - { - - if ((unpackType == 0xC) && (vifRegs->cycle.cl == vifRegs->cycle.wl)) //No use when SSE is available - { - // v4-32 - if (v->size == (size >> 2) && (vifRegs->mode == 0) && !(vifRegs->code & 0x10000000) && (vif->usn == 0)) - { - vifRegs->num -= size >> 4; - memcpy_fast((u8*)dest, cdata, size); - size = 0; - return; - } - } - - incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; - - while ((size >= ft->gsize) && (vifRegs->num > 0)) - { - func(dest, (u32*)cdata, ft->qsize); - cdata += ft->gsize; - size -= ft->gsize; - - vifRegs->num--; - //if(vifRegs->num == loophere) dest = (u32*)(VU->Mem); - ++vif->cl; - if (vif->cl == vifRegs->cycle.wl) - { - dest += incdest; - vif->cl = 0; - } - else - { - dest += 4; - } - - } - - // have to update - _vifRow[0] = _vifRegs->r0; - _vifRow[1] = _vifRegs->r1; - _vifRow[2] = _vifRegs->r2; - _vifRow[3] = _vifRegs->r3; - } - - // used for debugging vif -// { -// int i, j, k; -// u32* curdest = olddest; -// FILE* ftemp = fopen("temp.txt", s_count?"a+":"w"); -// fprintf(ftemp, "%x %x %x\n", s_count, size, vif->tag.addr); -// fprintf(ftemp, "%x %x %x\n", vifRegs->code>>24, vifRegs->mode, *(u32*)&vifRegs->cycle); -// fprintf(ftemp, "row: %x %x %x %x\n", _vifRow[0], _vifRow[1], _vifRow[2], _vifRow[3]); -// //fprintf(ftemp, "row2: %x %x %x %x\n", _vifRegs->r0, _vifRegs->r1, _vifRegs->r2, _vifRegs->r3); -// -// for(i = 0; i < memsize; ) { -// for(k = 0; k < vifRegs->cycle.wl; ++k) { -// for(j = 0; j <= ((vifRegs->code>>26)&3); ++j) { -// fprintf(ftemp, "%x ", curdest[4*k+j]); -// } -// } -// -// fprintf(ftemp, "\n"); -// curdest += 4*vifRegs->cycle.cl; -// i += (((vifRegs->code>>26)&3)+1)*ft->dsize*vifRegs->cycle.wl; -// } -// fclose(ftemp); -// } -// s_count++; - - if (size >= ft->dsize && vifRegs->num > 0) + } + else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have { //VIF_LOG("warning, end with size = %d", size); @@ -679,14 +665,20 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma vif->tag.addr += (size / ft->dsize) * 4; func(dest, (u32*)cdata, size / ft->dsize); size = 0; - + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, vif->tag.addr); } - } else /* filling write */ { - VIF_LOG("VIFunpack - filling write"); if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0) DevCon::Notice("Filling write warning! Size < packet size and CL != 0"); @@ -827,11 +819,16 @@ static __forceinline void vif0UNPACK(u32 *data) vif0.tag.addr &= 0xfff; vif0.tag.size = len; vif0Regs->offset = 0; + + vifRegs = (VIFregisters*)vif0Regs; + vifMaskRegs = g_vif0Masks; + vif = &vif0; + vifRow = g_vifRow0; } -static __forceinline void _vif0mpgTransfer(u32 addr, u32 *data, int size) +static __forceinline void vif0mpgTransfer(u32 addr, u32 *data, int size) { - /* Console::WriteLn("_vif0mpgTransfer addr=%x; size=%x", params addr, size); + /* Console::WriteLn("vif0mpgTransfer addr=%x; size=%x", params addr, size); { FILE *f = fopen("vu1.raw", "wb"); fwrite(data, 1, size*4, f); @@ -935,7 +932,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG { if (vif0.vifpacketsize < vif0.tag.size) { - _vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); + vif0mpgTransfer(vif0.tag.addr, data, vif0.vifpacketsize); vif0.tag.addr += vif0.vifpacketsize << 2; vif0.tag.size -= vif0.vifpacketsize; return vif0.vifpacketsize; @@ -944,7 +941,7 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG { int ret; - _vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); + vif0mpgTransfer(vif0.tag.addr, data, vif0.tag.size); ret = vif0.tag.size; vif0.tag.size = 0; vif0.cmd = 0; @@ -959,6 +956,9 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK { /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); + + ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum); + vif0.tag.size -= vif0.vifpacketsize; FreezeXMMRegs(0); return vif0.vifpacketsize; @@ -966,14 +966,27 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK else { /* we got all the data, transfer it fully */ - int ret; + int ret = vif0.tag.size; - VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); - ret = vif0.tag.size; - vif0.tag.size = 0; - vif0.cmd = 0; - FreezeXMMRegs(0); - return ret; + //Align data after a split transfer first + if(vif0Regs->offset != 0 || vif0.cl != 0) + { + vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + data += ret - vif0.tag.size; + if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + vif0.tag.size = 0; + vif0.cmd = 0; + FreezeXMMRegs(0); + return ret; + } + else + { + VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); + vif0.tag.size = 0; + vif0.cmd = 0; + FreezeXMMRegs(0); + return ret; + } } } @@ -1555,11 +1568,16 @@ static __forceinline void vif1UNPACK(u32 *data) vif1.cl = 0; vif1.tag.addr <<= 4; vif1.tag.cmd = vif1.cmd; + + vifRegs = (VIFregisters*)vif1Regs; + vifMaskRegs = g_vif1Masks; + vif = &vif1; + vifRow = g_vifRow1; } -static __forceinline void _vif1mpgTransfer(u32 addr, u32 *data, int size) +static __forceinline void vif1mpgTransfer(u32 addr, u32 *data, int size) { - /* Console::WriteLn("_vif1mpgTransfer addr=%x; size=%x", params addr, size); + /* Console::WriteLn("vif1mpgTransfer addr=%x; size=%x", params addr, size); { FILE *f = fopen("vu1.raw", "wb"); fwrite(data, 1, size*4, f); @@ -1661,7 +1679,7 @@ static int __fastcall Vif1TransMPG(u32 *data) { if (vif1.vifpacketsize < vif1.tag.size) { - _vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); + vif1mpgTransfer(vif1.tag.addr, data, vif1.vifpacketsize); vif1.tag.addr += vif1.vifpacketsize << 2; vif1.tag.size -= vif1.vifpacketsize; return vif1.vifpacketsize; @@ -1669,7 +1687,7 @@ static int __fastcall Vif1TransMPG(u32 *data) else { int ret; - _vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); + vif1mpgTransfer(vif1.tag.addr, data, vif1.tag.size); ret = vif1.tag.size; vif1.tag.size = 0; vif1.cmd = 0; @@ -1770,20 +1788,35 @@ static int __fastcall Vif1TransUnpack(u32 *data) /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); + + ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum); vif1.tag.size -= vif1.vifpacketsize; FreezeXMMRegs(0); return vif1.vifpacketsize; } else { - int ret; - /* we got all the data, transfer it fully */ - VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); - ret = vif1.tag.size; - vif1.tag.size = 0; - vif1.cmd = 0; - FreezeXMMRegs(0); - return ret; + int ret = vif1.tag.size; + + if(vif1Regs->offset != 0 || vif1.cl != 0) + { + vif1.tag.size = VIFalign(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + data += ret - vif1.tag.size; + if(vif1.tag.size > 0) VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + vif1.tag.size = 0; + vif1.cmd = 0; + FreezeXMMRegs(0); + return ret; + } + else + { + /* we got all the data, transfer it fully */ + VIFunpack(data, &vif1.tag, vif1.tag.size, VIF1dmanum); + vif1.tag.size = 0; + vif1.cmd = 0; + FreezeXMMRegs(0); + return ret; + } } } diff --git a/pcsx2/x86/aVif.S b/pcsx2/x86/aVif.S index 05a2e9248f..e4b64685f6 100644 --- a/pcsx2/x86/aVif.S +++ b/pcsx2/x86/aVif.S @@ -18,9 +18,9 @@ */ .intel_syntax noprefix -.extern _vifRegs -.extern _vifMaskRegs -.extern _vifRow +.extern vifRegs +.extern vifMaskRegs +.extern vifRow #define VIF_ESP esp #define VIF_SRC esi @@ -108,7 +108,7 @@ // setting up masks #define UNPACK_Setup_Mask_SSE(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(CL)]; \ @@ -118,7 +118,7 @@ #define UNPACK_Start_Setup_Mask_SSE_0(CL) UNPACK_Setup_Mask_SSE(CL) #define UNPACK_Start_Setup_Mask_SSE_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(CL) + 32]; \ pand XMM_ROWMASK, XMM_ROW; \ @@ -129,12 +129,12 @@ #define UNPACK_Setup_Mask_SSE_0_1(CL) #define UNPACK_Setup_Mask_SSE_1_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ // ignore CL, since vif.cycle.wl == 1 #define UNPACK_Setup_Mask_SSE_2_1(CL) \ - mov VIF_TMPADDR, _vifMaskRegs; \ + mov VIF_TMPADDR, vifMaskRegs; \ movdqa XMM_ROWMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 16]; \ movdqa XMM_ROWCOLMASK, xmmword ptr [VIF_TMPADDR + 64*(0) + 32]; \ movdqa XMM_WRITEMASK, xmmword ptr [VIF_TMPADDR + 64*(0)]; \ @@ -1312,9 +1312,9 @@ #pragma warning(disable:4731) #define SAVE_ROW_REG_BASE \ - mov VIF_TMPADDR, _vifRow; \ + mov VIF_TMPADDR, vifRow; \ movdqa xmmword ptr [VIF_TMPADDR], XMM_ROW; \ - mov VIF_TMPADDR, _vifRegs; \ + mov VIF_TMPADDR, vifRegs; \ movss dword ptr [VIF_TMPADDR+0x100], XMM_ROW; \ psrldq XMM_ROW, 4; \ movss dword ptr [VIF_TMPADDR+0x110], XMM_ROW; \ @@ -1349,7 +1349,7 @@ .globl UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType; \ UNPACK_SkippingWrite_##name##_##sign##_##MaskType##_##ModeType: \ INIT_ARGS(); \ - mov VIF_TMPADDR, _vifRegs; \ + mov VIF_TMPADDR, vifRegs; \ movzx VIF_INC, byte ptr [VIF_TMPADDR + 0x40]; \ movzx VIF_SAVEEBX, byte ptr [VIF_TMPADDR + 0x41]; \ sub VIF_INC, VIF_SAVEEBX; \ diff --git a/pcsx2/x86/ix86-32/aVif_proc-32.asm b/pcsx2/x86/ix86-32/aVif_proc-32.asm index 12c8b969b4..62fd377795 100644 --- a/pcsx2/x86/ix86-32/aVif_proc-32.asm +++ b/pcsx2/x86/ix86-32/aVif_proc-32.asm @@ -5,9 +5,9 @@ .xmm -extern _vifRegs:ptr -extern _vifMaskRegs:ptr -extern _vifRow:ptr +extern vifRegs:ptr +extern vifMaskRegs:ptr +extern vifRow:ptr extern s_TempDecompress:ptr @@ -104,7 +104,7 @@ UNPACK_Regular_SSE_2 macro r0 UNPACK_Setup_Mask_SSE macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm5, [eax + 64*(CL) + 32] movdqa xmm3, [eax + 64*(CL)] @@ -118,7 +118,7 @@ UNPACK_Start_Setup_Mask_SSE_0 macro CL endm UNPACK_Start_Setup_Mask_SSE_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(CL) + 16] movdqa xmm5, [eax + 64*(CL) + 32] pand xmm4, xmm6 @@ -132,14 +132,14 @@ UNPACK_Start_Setup_Mask_SSE_2 macro CL UNPACK_Setup_Mask_SSE_0_1 macro CL endm UNPACK_Setup_Mask_SSE_1_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm3, [eax + 64*(0)] endm UNPACK_Setup_Mask_SSE_2_1 macro CL - mov eax, [_vifMaskRegs] + mov eax, [vifMaskRegs] movdqa xmm4, [eax + 64*(0) + 16] movdqa xmm5, [eax + 64*(0) + 32] movdqa xmm3, [eax + 64*(0)] @@ -1521,9 +1521,9 @@ UNPACK_V4_5SSE_1A macro CL, TOTALCL, MaskType, ModeType SAVE_ROW_REG_BASE macro - mov eax, [_vifRow] + mov eax, [vifRow] movdqa [eax], xmm6 - mov eax, [_vifRegs] + mov eax, [vifRegs] movss dword ptr [eax+0100h], xmm6 psrldq xmm6, 4 movss dword ptr [eax+0110h], xmm6 @@ -1557,7 +1557,7 @@ defUNPACK_SkippingWrite macro name, MaskType, ModeType, qsize, sign, SAVE_ROW_RE push ebx INIT_ARGS - mov eax, [_vifRegs] + mov eax, [vifRegs] movzx ecx, byte ptr [eax + 040h] movzx ebx, byte ptr [eax + 041h] sub ecx, ebx From e100933069a8894771b44378f9207a912755557b Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 11 Apr 2009 19:23:39 +0000 Subject: [PATCH 043/143] Look over there! A THREE HEADED MONKEY! git-svn-id: http://pcsx2.googlecode.com/svn/trunk@956 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 8efee3181d..4d466169cc 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -281,7 +281,7 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int break; case 0x6: vif->tag.addr += (size / unpack->gsize) * 16; - DevCon::Notice("Processing V2-8 skip, size = %d", params size); + VIFUNPACK_LOG("Processing V2-8 skip, size = %d", size); break; case 0x8: vif->tag.addr += (size / unpack->gsize) * 16; @@ -289,7 +289,7 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int break; case 0x9: vif->tag.addr += (size / unpack->gsize) * 16; - DevCon::Notice("Processing V3-16 skip, size = %d", params size); + VIFUNPACK_LOG("Processing V3-16 skip, size = %d", size); break; case 0xA: vif->tag.addr += (size / unpack->gsize) * 16; From 97fac9e6357836bead6c65a4c4bec22f8d65b7d6 Mon Sep 17 00:00:00 2001 From: refraction Date: Sun, 12 Apr 2009 04:56:06 +0000 Subject: [PATCH 044/143] resolves Issue 143 Altered Beast git-svn-id: http://pcsx2.googlecode.com/svn/trunk@958 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/SPR.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pcsx2/SPR.cpp b/pcsx2/SPR.cpp index bb47768ba8..e33fb05e79 100644 --- a/pcsx2/SPR.cpp +++ b/pcsx2/SPR.cpp @@ -67,11 +67,12 @@ int _SPR0chain() if ((psHu32(DMAC_CTRL) & 0xC) >= 0x8) // 0x8 VIF1 MFIFO, 0xC GIF MFIFO { if ((spr0->madr & ~psHu32(DMAC_RBSR)) != psHu32(DMAC_RBOR)) Console::WriteLn("SPR MFIFO Write outside MFIFO area"); + else mfifotransferred += spr0->qwc; hwMFIFOWrite(spr0->madr, (u8*)&PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4); spr0->madr += spr0->qwc << 4; spr0->madr = psHu32(DMAC_RBOR) + (spr0->madr & psHu32(DMAC_RBSR)); - mfifotransferred += spr0->qwc; + } else { @@ -226,16 +227,17 @@ void SPRFROMinterrupt() { _dmaSPR0(); - if ((psHu32(DMAC_CTRL) & 0xC) == 0xC) // GIF MFIFO + if(mfifotransferred != 0) { - if ((spr0->madr & ~psHu32(DMAC_RBSR)) != psHu32(DMAC_RBOR)) Console::WriteLn("GIF MFIFO Write outside MFIFO area"); - spr0->madr = psHu32(DMAC_RBOR) + (spr0->madr & psHu32(DMAC_RBSR)); - //Console::WriteLn("mfifoGIFtransfer %x madr %x, tadr %x", params gif->chcr, gif->madr, gif->tadr); - mfifoGIFtransfer(mfifotransferred); - mfifotransferred = 0; - } - else - if ((psHu32(DMAC_CTRL) & 0xC) == 0x8) // VIF1 MFIFO + if ((psHu32(DMAC_CTRL) & 0xC) == 0xC) // GIF MFIFO + { + if ((spr0->madr & ~psHu32(DMAC_RBSR)) != psHu32(DMAC_RBOR)) Console::WriteLn("GIF MFIFO Write outside MFIFO area"); + spr0->madr = psHu32(DMAC_RBOR) + (spr0->madr & psHu32(DMAC_RBSR)); + //Console::WriteLn("mfifoGIFtransfer %x madr %x, tadr %x", params gif->chcr, gif->madr, gif->tadr); + mfifoGIFtransfer(mfifotransferred); + mfifotransferred = 0; + } + else if ((psHu32(DMAC_CTRL) & 0xC) == 0x8) // VIF1 MFIFO { if ((spr0->madr & ~psHu32(DMAC_RBSR)) != psHu32(DMAC_RBOR)) Console::WriteLn("VIF MFIFO Write outside MFIFO area"); spr0->madr = psHu32(DMAC_RBOR) + (spr0->madr & psHu32(DMAC_RBSR)); @@ -243,6 +245,7 @@ void SPRFROMinterrupt() mfifoVIF1transfer(mfifotransferred); mfifotransferred = 0; } + } if (spr0finished == 0) return; spr0->chcr &= ~0x100; hwDmacIrq(8); From 5b0d9b6723580302e48efcb8406ba81c6fe9c48e Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sun, 12 Apr 2009 08:29:53 +0000 Subject: [PATCH 045/143] microVU: fried my brain with some very-complex VU flag-handling logic/algorithms (hopefully they work as expected) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@959 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86_legacy.cpp | 2 +- pcsx2/x86/microVU_Alloc.h | 3 +- pcsx2/x86/microVU_Alloc.inl | 3 +- pcsx2/x86/microVU_Analyze.inl | 37 +++++++-- pcsx2/x86/microVU_Compile.inl | 134 ++++++++++++++++++++++++++++++--- pcsx2/x86/microVU_Lower.inl | 10 ++- pcsx2/x86/microVU_Misc.h | 13 ++-- 7 files changed, 173 insertions(+), 29 deletions(-) diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 752414a1b2..eec040ac33 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -359,7 +359,7 @@ emitterT void MOV32RtoRm( x86IntRegType to, x86IntRegType from, int offset) } -/* mov r32 to r32 */ +/* mov r16 to r16 */ emitterT void MOV16RtoR( x86IntRegType to, x86IntRegType from ) { if( to == from ) return; diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index cb6bbbbb5f..8de5d0737c 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -53,10 +53,11 @@ struct microAllocInfo { microRegInfo regs; // Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR - u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) + //u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) u32 cycles; // Cycles for current block u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block u32 info[pSize/8]; // Info for Instructions in current block + u8 stall[pSize/8]; // Info on how much each instruction stalled }; diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index dee76e8f92..c591e88082 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -682,8 +682,7 @@ microVUt(void) mVUallocSFLAGa(int reg, int fInstance) { microVUt(void) mVUallocSFLAGb(int reg, int fInstance) { getFlagReg(fInstance, fInstance); - AND32ItoR(fInstance, 0xffff0000); - OR16RtoR(fInstance, reg); + MOV16RtoR(fInstance, reg); } microVUt(void) mVUallocMFLAGa(int reg, int fInstance) { diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 224ebbbd09..571435b4af 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -245,19 +245,42 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVUt(void) mVUanalyzeSflag(int It) { microVU* mVU = mVUx; if (!It) { mVUinfo |= _isNOP; } - else { mVUinfo |= _isSflag | _swapOps; } // ToDo: set s flag at right time + else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from + mVUinfo |= _swapOps; + if (mVUcount >= 4) { incPC2(-8); mVUinfo |= _isSflag; incPC2(8); } + //else { incPC2((mVUcount*-2)); mVUinfo |= _isSflag; incPC2(mVUcount*-2); } + } analyzeVIreg2(It, 1); } microVUt(void) mVUanalyzeFSSET() { microVU* mVU = mVUx; - int i, curPC = iPC; - for (i = mVUcount; i > 0; i--) { - incPC2(-2); - if (isSflag) break; - mVUinfo &= ~_doStatus; + mVUinfo |= _isFSSSET; +} + +//------------------------------------------------------------------ +// Mflag - Mac Flag Opcodes +//------------------------------------------------------------------ + +microVUt(void) mVUanalyzeMflag(int Is, int It) { + microVU* mVU = mVUx; + if (!It) { mVUinfo |= _isNOP; } + else if (mVUcount >= 4) { + incPC2(-8); + if (doStatus) { mVUinfo |= _doMac; } + else { + int curPC = iPC; + int i = mVUcount; + for (; i > 0; i--) { + incPC2(-2); + if (doStatus) { mVUinfo |= _doMac; break; } + } + iPC = curPC; + } + incPC2(8); } - iPC = curPC; + analyzeVIreg1(Is); + analyzeVIreg2(It, 1); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 85c4938aa7..56228309ba 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -19,6 +19,10 @@ #pragma once #ifdef PCSX2_MICROVU +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ + #define createBlock(blockEndPtr) { \ block.pipelineState = pipelineState; \ block.x86ptrStart = x86ptrStart; \ @@ -46,6 +50,115 @@ #define incP() { mVU->p = (mVU->p+1) & 1; } #define incQ() { mVU->q = (mVU->q+1) & 1; } +//------------------------------------------------------------------ +// Helper Functions +//------------------------------------------------------------------ + +// Optimizes out unneeded status flag updates +microVUt(void) mVUstatusFlagOp() { + microVU* mVU = mVUx; + int curPC = iPC; + int i = mVUcount; + if (doStatus) { mVUinfo |= _isSflag; } + else { + for (; i > 0; i--) { + incPC2(-2); + if (doStatus) { mVUinfo |= _isSflag; break; } + } + } + for (; i > 0; i--) { + incPC2(-2); + if (isSflag) break; + mVUinfo &= ~_doStatus; + } + iPC = curPC; +} + +// Note: Flag handling is 'very' complex, it requires full knowledge of how microVU recs work, so don't touch! +microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { + microVU* mVU = mVUx; + + // Ensure last ~4+ instructions update mac flags + int endPC = iPC; + int aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances + for (int i = mVUcount; i > 0; i++, aCount++;) { + if (doStatus) { mVUinfo |= _doMac; if (i >= 4) { break; } } + incPC2(-2); + } + + // Status/Mac Flags Setup Code + int xStatus = 0; // Status Instance starts at #0 on every block + int xMac = 0; // Mac Instance starts at #0 on every block + int pStatus = 0; + int pMac = 0; + int xCount = mVUcount; // Backup count + mVUcount = 0; + iPC = mVUstartPC; + for (int i = 0; i < xCount; i++) { + if ((xCount - i) > aCount) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions + if (doStatus||isFSSET) { mVUinfo |= xStatus << 12; } // _fsInstance + if (doMac) { mVUinfo |= xMac << 10; } // _fmInstance + pStatus = (xStatus + ((mVUstall > 3) ? 3 : mVUstall)) & 3; + pMac = (xMac + ((mVUstall > 3) ? 3 : mVUstall)) & 3; + mVUinfo |= pStatus << 18; // _fvsInstance + mVUinfo |= pMac << 16; // _fvmInstance + if (doStatus||isFSSET) { xStatus = (xStatus+1) & 3; } + if (doMac) { xMac = (xMac+1) & 3; } + incPC2(2); + } + mVUcount = xCount; // Restore count + + // Setup Last 4 instances of Status/Mac flags (needed for accurate block linking) + iPC = endPC; + for (int i = 3, int j = 3, int ii = 1, int jj = 3; aCount > 0; ii++, aCount--) { + if (doStatus && (i >= 0)) { + for (; (ii > 0 && i >= 0); ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; i--; } + } + if (doMac && (j >= 0)) { + for (; (jj > 0 && j >= 0); jj--) { xMac = (xMac-1) & 3; bMac[i] = xMac; j--; } + } + incPC2(-2); + } +} + +#define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0))) +#define getFlagReg2(x) ((x == bStatus[3]) ? gprESP : ((x == bStatus[2]) ? gprR : ((x == bStatus[1]) ? gprT2 : gprT1))) + +// Recompiles Code for Proper Flags on Block Linkings +microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) { + + PUSHR(gprR); // Backup gprR + PUSHR(gprESP); // Backup gprESP + + MOV32RtoR(gprT1, getFlagReg1(bStatus[0])); + MOV32RtoR(gprT2, getFlagReg1(bStatus[1])); + MOV32RtoR(gprR, getFlagReg1(bStatus[2])); + MOV32RtoR(gprESP, getFlagReg1(bStatus[3])); + + MOV32RtoR(gprF0, gprT1); + MOV32RtoR(gprF1, gprT2); + MOV32RtoR(gprF2, gprR); + MOV32RtoR(gprF3, gprESP); + + AND32ItoR(gprT1, 0xffff0000); + AND32ItoR(gprT2, 0xffff0000); + AND32ItoR(gprR, 0xffff0000); + AND32ItoR(gprESP, 0xffff0000); + + AND32ItoR(gprF0, 0x0000ffff); + AND32ItoR(gprF1, 0x0000ffff); + AND32ItoR(gprF2, 0x0000ffff); + AND32ItoR(gprF3, 0x0000ffff); + + OR32RtoR(gprF0, getFlagReg2(bMac[0])); + OR32RtoR(gprF1, getFlagReg2(bMac[1])); + OR32RtoR(gprF2, getFlagReg2(bMac[2])); + OR32RtoR(gprF3, getFlagReg2(bMac[3])); + + POPR(gprESP); // Restore gprESP + POPR(gprR); // Restore gprR +} + microVUt(void) mVUincCycles(int x) { microVU* mVU = mVUx; mVUcycles += x; @@ -130,13 +243,14 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, mVUcount++; } + // Sets Up Flag instances + int bStatus[4]; int bMac[4]; + mVUsetFlags(bStatus, bMac); + // Second Pass iPC = mVUstartPC; setCode(); for (bool x = 1; x; ) { - // - // ToDo: status/mac flag stuff? - // if (isEOB) { x = 0; } //if (isBranch2) { mVUopU(); incPC(2); } @@ -156,9 +270,10 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, case 8: branchCase(JNZ32); // IBNEQ case 2: branchCase2(); // BAL case 1: - // search for block + // ToDo: search for block + // (remember about global variables and recursion!) + mVUsetFlagsRec(bStatus, bMac); ajmp = JMP32((uptr)0); - break; // B/BAL case 9: branchCase2(); // JALR case 10: break; // JR/JALR @@ -167,8 +282,8 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, return thisPtr; } } - // Do E-bit end stuff here + // Do E-bit end stuff here incCycles(55); // Ensures Valid P/Q instances mVUcycles -= 55; if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } @@ -176,10 +291,11 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); - MOV32ItoM((uptr)&mVU->p, mVU->p); - MOV32ItoM((uptr)&mVU->q, mVU->q); + //MOV32ItoM((uptr)&mVU->p, mVU->p); + //MOV32ItoM((uptr)&mVU->q, mVU->q); + AND32ItoM((uptr)µVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag - AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Not sure what this does but zerorecs do it... + AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC); JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); return thisPtr; diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index a247f00a6c..61f94a8b8c 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -547,10 +547,12 @@ microVUf(void) mVU_FSSET() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeFSSET(); } else { - int flagReg; - getFlagReg(flagReg, fsInstance); - AND32ItoR(flagReg, 0x03f); - OR32ItoR(flagReg, (_Imm12_ & 0xfc0)); + int flagReg = gprT1; + if (doStatus) { getFlagReg(flagReg, fsInstance); } // Get status result from upper instruction + else { mVUallocSFLAGa(flagReg, fpsInstance); } // Get status result from last status setting instruction + AND16ItoR(flagReg, 0x03f); // Remember not to modify upper 16 bits because of mac flag + OR16ItoR(flagReg, (_Imm12_ & 0xfc0)); + if (!doStatus) { mVUallocSFLAGb(flagReg, fsInstance); } } } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index aaf45425d4..c5e6c046cc 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -144,12 +144,13 @@ declareAllVariables #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUcount mVUallocInfo.count -#define mVUstall mVUallocInfo.maxStall +//#define mVUstall mVUallocInfo.maxStall #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp -#define mVUinfo mVUallocInfo.info[mVUallocInfo.curPC / 2] -#define mVUstartPC mVUallocInfo.startPC #define iPC mVUallocInfo.curPC +#define mVUinfo mVUallocInfo.info[iPC / 2] +#define mVUstall mVUallocInfo.stall[iPC / 2] +#define mVUstartPC mVUallocInfo.startPC #define xPC ((iPC / 2) * 8) #define curI mVUcurProg.data[iPC] #define setCode() { mVU->code = curI; } @@ -183,7 +184,8 @@ declareAllVariables #define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches) #define _writesVI (1<<25) // Current Instruction writes to VI #define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction -//#define _isBranch2 (1<<27) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) +#define _isFSSSET (1<<27) // Cur Instruction is FSSET +//#define _isBranch2 (1<<28) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -211,7 +213,8 @@ declareAllVariables #define memReadIt (mVUinfo & (1<<24)) #define writesVI (mVUinfo & (1<<25)) #define swapOps (mVUinfo & (1<<26)) -//#define isBranch2 (mVUinfo & (1<<27)) +#define isFSSET (mVUinfo & (1<<27)) +//#define isBranch2 (mVUinfo & (1<<28)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) From a3c166902401b0225954f45e1d38a82dfc45e8e0 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sun, 12 Apr 2009 10:23:25 +0000 Subject: [PATCH 046/143] microVU: more flag stuff (div/sqrt/rsqrt flags set at proper time) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@960 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Alloc.h | 1 + pcsx2/x86/microVU_Alloc.inl | 6 +++--- pcsx2/x86/microVU_Compile.inl | 35 +++++++++++++++++++++++------------ pcsx2/x86/microVU_Lower.inl | 17 ++++++++--------- pcsx2/x86/microVU_Misc.h | 7 +++++-- 5 files changed, 40 insertions(+), 26 deletions(-) diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 8de5d0737c..3bf038b7c3 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -54,6 +54,7 @@ struct microAllocInfo { microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR //u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) + //u8 divFlag; u32 cycles; // Cycles for current block u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 curPC; // Current PC diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index c591e88082..0fb6a1c155 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -665,13 +665,13 @@ microVUt(void) mVUallocFMAC26b(int& ACCw, int& ACCr) { // Flag Allocators //------------------------------------------------------------------ -#define getFlagReg(regX, fInst) { \ - switch (fInst) { \ +#define getFlagReg(regX, fInst) { \ + switch (fInst) { \ case 0: regX = gprF0; break; \ case 1: regX = gprF1; break; \ case 2: regX = gprF2; break; \ case 3: regX = gprF3; break; \ - } \ + } \ } microVUt(void) mVUallocSFLAGa(int reg, int fInstance) { diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 56228309ba..2c1846c2bc 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -49,6 +49,7 @@ #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } #define incP() { mVU->p = (mVU->p+1) & 1; } #define incQ() { mVU->q = (mVU->q+1) & 1; } +#define doUpperOp() { mVUopU(); mVUdivSet(); } //------------------------------------------------------------------ // Helper Functions @@ -59,7 +60,7 @@ microVUt(void) mVUstatusFlagOp() { microVU* mVU = mVUx; int curPC = iPC; int i = mVUcount; - if (doStatus) { mVUinfo |= _isSflag; } + if (doStatus) { mVUinfo |= _isSflag; } else { for (; i > 0; i--) { incPC2(-2); @@ -69,7 +70,7 @@ microVUt(void) mVUstatusFlagOp() { for (; i > 0; i--) { incPC2(-2); if (isSflag) break; - mVUinfo &= ~_doStatus; + mVUinfo &= ~(_doStatus|_doDivFlag); } iPC = curPC; } @@ -96,14 +97,14 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { iPC = mVUstartPC; for (int i = 0; i < xCount; i++) { if ((xCount - i) > aCount) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions - if (doStatus||isFSSET) { mVUinfo |= xStatus << 12; } // _fsInstance - if (doMac) { mVUinfo |= xMac << 10; } // _fmInstance + if (doStatus||isFSSET||doDivFlag) { mVUinfo |= xStatus << 12; } // _fsInstance + if (doMac) { mVUinfo |= xMac << 10; } // _fmInstance pStatus = (xStatus + ((mVUstall > 3) ? 3 : mVUstall)) & 3; pMac = (xMac + ((mVUstall > 3) ? 3 : mVUstall)) & 3; mVUinfo |= pStatus << 18; // _fvsInstance mVUinfo |= pMac << 16; // _fvmInstance - if (doStatus||isFSSET) { xStatus = (xStatus+1) & 3; } - if (doMac) { xMac = (xMac+1) & 3; } + if (doStatus||isFSSET||doDivFlag) { xStatus = (xStatus+1) & 3; } + if (doMac) { xMac = (xMac+1) & 3; } incPC2(2); } mVUcount = xCount; // Restore count @@ -111,7 +112,7 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { // Setup Last 4 instances of Status/Mac flags (needed for accurate block linking) iPC = endPC; for (int i = 3, int j = 3, int ii = 1, int jj = 3; aCount > 0; ii++, aCount--) { - if (doStatus && (i >= 0)) { + if ((doStatus||isFSSET||doDivFlag) && (i >= 0)) { for (; (ii > 0 && i >= 0); ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; i--; } } if (doMac && (j >= 0)) { @@ -172,8 +173,9 @@ microVUt(void) mVUincCycles(int x) { calcCycles(mVUregs.VI[z], x); } if (mVUregs.q) { - calcCycles(mVUregs.q, x); - if (!mVUregs.q) { incQ(); } // Do Status Flag Merging Stuff? + if (mVUregs.q > 4) { calcCycles(mVUregs.q, x); if (mVUregs.q <= 4) { mVUinfo |= _doDivFlag; } } + else { calcCycles(mVUregs.q, x); } + if (!mVUregs.q) { incQ(); } } if (mVUregs.p) { calcCycles(mVUregs.p, x); @@ -202,6 +204,15 @@ microVUt(void) mVUsetCycles() { mVUregs.xgkick = mVUregsTemp.xgkick; } +microVUt(void) mVUdivSet() { + microVU* mVU = mVUx; + int flagReg1, flagReg2; + getFlagReg(flagReg1, fsInstance); + if (!doStatus) { getFlagReg(flagReg2, fpsInstance); MOV16RtoR(flagReg1, flagReg2); } + AND16ItoR(flagReg1, 0xfcf); + OR16MtoR (flagReg1, (uptr)&mVU->divFlag); +} + //------------------------------------------------------------------ // Recompiler //------------------------------------------------------------------ @@ -254,9 +265,9 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, if (isEOB) { x = 0; } //if (isBranch2) { mVUopU(); incPC(2); } - if (isNop) { mVUopU(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } - else if (!swapOps) { mVUopU(); incPC(1); mVUopL(); } - else { incPC(1); mVUopL(); incPC(-1); mVUopU(); incPC(1); } + if (isNop) { doUpperOp(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } + else if (!swapOps) { doUpperOp(); incPC(1); mVUopL(); } + else { incPC(1); mVUopL(); incPC(-1); doUpperOp(); incPC(1); } if (!isBdelay) { incPC(1); } else { diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 61f94a8b8c..21634de639 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -474,7 +474,7 @@ microVUf(void) mVU_FCSET() { microVUf(void) mVU_FMAND() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeMflag(_Fs_, _Ft_); } else { mVUallocMFLAGa(gprT1, fvmInstance); mVUallocVIa(gprT2, _Fs_); @@ -485,7 +485,7 @@ microVUf(void) mVU_FMAND() { microVUf(void) mVU_FMEQ() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeMflag(_Fs_, _Ft_); } else { mVUallocMFLAGa(gprT1, fvmInstance); mVUallocVIa(gprT2, _Fs_); @@ -498,7 +498,7 @@ microVUf(void) mVU_FMEQ() { microVUf(void) mVU_FMOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) { mVUanalyzeMflag(_Fs_, _Ft_); } else { mVUallocMFLAGa(gprT1, fvmInstance); mVUallocVIa(gprT2, _Fs_); @@ -547,12 +547,11 @@ microVUf(void) mVU_FSSET() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeFSSET(); } else { - int flagReg = gprT1; - if (doStatus) { getFlagReg(flagReg, fsInstance); } // Get status result from upper instruction - else { mVUallocSFLAGa(flagReg, fpsInstance); } // Get status result from last status setting instruction - AND16ItoR(flagReg, 0x03f); // Remember not to modify upper 16 bits because of mac flag - OR16ItoR(flagReg, (_Imm12_ & 0xfc0)); - if (!doStatus) { mVUallocSFLAGb(flagReg, fsInstance); } + int flagReg1, flagReg2; + getFlagReg(flagReg1, fsInstance); + if (!(doStatus||doDivFlag)) { getFlagReg(flagReg2, fpsInstance); MOV16RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction + AND16ItoR(flagReg1, 0x03f); // Remember not to modify upper 16 bits because of mac flag + OR16ItoR (flagReg1, (_Imm12_ & 0xfc0)); } } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index c5e6c046cc..0017e98438 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -185,7 +185,9 @@ declareAllVariables #define _writesVI (1<<25) // Current Instruction writes to VI #define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction #define _isFSSSET (1<<27) // Cur Instruction is FSSET -//#define _isBranch2 (1<<28) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) +#define _doDivFlag (1<<28) // Transfer Div flag to Status Flag + +//#define _isBranch2 (1<<31) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -214,7 +216,8 @@ declareAllVariables #define writesVI (mVUinfo & (1<<25)) #define swapOps (mVUinfo & (1<<26)) #define isFSSET (mVUinfo & (1<<27)) -//#define isBranch2 (mVUinfo & (1<<28)) +#define doDivFlag (mVUinfo & (1<<28)) +//#define isBranch2 (mVUinfo & (1<<31)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) From 85cf1ab6dc14b5b2a6d432e978a91f89006b225c Mon Sep 17 00:00:00 2001 From: refraction Date: Sun, 12 Apr 2009 15:51:24 +0000 Subject: [PATCH 047/143] Fix for MGS3 corruption from r955, i don't know why but where vifRegs was set previously, it was completely ignored, regardless of the fact the code has run through there before doing anything else O_o git-svn-id: http://pcsx2.googlecode.com/svn/trunk@961 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 4d466169cc..be2cd2e5b7 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -351,11 +351,13 @@ static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanu if (VIFdmanum == 0) { VU = &VU0; + vifRegs = vif0Regs; assert(v->addr < memsize); } else { VU = &VU1; + vifRegs = vif1Regs; assert(v->addr < memsize); } @@ -502,14 +504,14 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma if (VIFdmanum == 0) { VU = &VU0; - //vifRegs = vif0Regs; + vifRegs = vif0Regs; assert(v->addr < memsize); } else { VU = &VU1; - //vifRegs = vif1Regs; + vifRegs = vif1Regs; assert(v->addr < memsize); if (vu1MicroIsSkipping()) @@ -544,7 +546,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma #ifdef VIFUNPACKDEBUG - if()vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * + if((vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000)) { //Sanity Check (memory overflow) @@ -820,7 +822,6 @@ static __forceinline void vif0UNPACK(u32 *data) vif0.tag.size = len; vif0Regs->offset = 0; - vifRegs = (VIFregisters*)vif0Regs; vifMaskRegs = g_vif0Masks; vif = &vif0; vifRow = g_vifRow0; @@ -1569,7 +1570,6 @@ static __forceinline void vif1UNPACK(u32 *data) vif1.tag.addr <<= 4; vif1.tag.cmd = vif1.cmd; - vifRegs = (VIFregisters*)vif1Regs; vifMaskRegs = g_vif1Masks; vif = &vif1; vifRow = g_vifRow1; From 6830f07f27151a27f78e62e8128587e4e9398393 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sun, 12 Apr 2009 19:46:42 +0000 Subject: [PATCH 048/143] GSdx: GoW2, try #3 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@962 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GPURenderer.h | 13 ++++--------- plugins/GSdx/GSRenderer.h | 39 +++++++++++--------------------------- plugins/GSdx/GSState.cpp | 2 +- 3 files changed, 16 insertions(+), 38 deletions(-) diff --git a/plugins/GSdx/GPURenderer.h b/plugins/GSdx/GPURenderer.h index 96ff0df01f..17df468a44 100644 --- a/plugins/GSdx/GPURenderer.h +++ b/plugins/GSdx/GPURenderer.h @@ -56,20 +56,15 @@ protected: { if(message == WM_KEYUP) { - if(wParam == VK_DELETE) + switch(wParam) { + case VK_DELETE: m_filter = (m_filter + 1) % 3; return 0; - } - - if(wParam == VK_END) - { + case VK_END: m_dither = m_dither ? 0 : 1; return 0; - } - - if(wParam == VK_NEXT) - { + case VK_NEXT: m_aspectratio = (m_aspectratio + 1) % 3; return 0; } diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index f7a5b05df0..ad56de020f 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -67,36 +67,25 @@ protected: { int step = (::GetAsyncKeyState(VK_SHIFT) & 0x8000) ? -1 : 1; - if(msg.wParam == VK_F5) + switch(msg.wParam) { + case VK_F5: m_interlace = (m_interlace + 7 + step) % 7; return true; - } - - if(msg.wParam == VK_F6) - { + case VK_F6: m_aspectratio = (m_aspectratio + 3 + step) % 3; return true; - } - - if(msg.wParam == VK_F7) - { + case VK_F7: m_wnd.SetWindowText(_T("PCSX2")); m_osd = !m_osd; return true; - } - - if(msg.wParam == VK_DELETE) - { + case VK_DELETE: m_aa1 = !m_aa1; return true; - } - - if(msg.wParam == VK_END) - { + case VK_END: m_blur = !m_blur; return true; - } + } } return false; @@ -398,17 +387,11 @@ protected: { if(msg.message == WM_KEYDOWN) { - if(msg.wParam == VK_F12) + switch(msg.wParam) { - if(m_capture.IsCapturing()) - { - m_capture.EndCapture(); - } - else - { - m_capture.BeginCapture(GetFPS()); - } - + case VK_F12: + if(m_capture.IsCapturing()) m_capture.EndCapture(); + else m_capture.BeginCapture(GetFPS()); return true; } } diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index 0664a1fff6..bf147881b0 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -2088,7 +2088,7 @@ bool GSC_GodOfWar2(const GSFrameInfo& fi, int& skip) if(fi.TME && fi.FBP == 0x00100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00100 && fi.TPSM == PSM_PSMCT16 // ntsc || fi.TME && fi.FBP == 0x02100 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT16) // pal { - skip = 30; // shadows + skip = 29; // shadows } else if(fi.TME && fi.FBP == 0x00500 && fi.FPSM == PSM_PSMCT24 && fi.TBP0 == 0x02100 && fi.TPSM == PSM_PSMCT32) // pal { From ad6dce5e9cb3323493e338e507f597004b5d781f Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Sun, 12 Apr 2009 21:19:29 +0000 Subject: [PATCH 049/143] SPU2-X: Worked on savestate support a bit. It now remembers an update timing variable more (could fix a few crashes). This increases the savestate version though, so make sure you have a memory card save ready before upgrading! Also implemented a way of delaying audio output after loading states. This masks the ugly noise that some games produce directly after loading, keeping your valuable speakers intact :p git-svn-id: http://pcsx2.googlecode.com/svn/trunk@963 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/spu2-x/src/SaveStateSPU.cpp | 10 +++++++--- plugins/spu2-x/src/SndOut.cpp | 15 ++++++++++++++- plugins/spu2-x/src/SndOut.h | 8 +++++--- plugins/spu2-x/src/Spu2.cpp | 3 --- plugins/spu2-x/src/Spu2.h | 1 - plugins/spu2-x/src/Timestretcher.cpp | 19 +++++++++++++++++-- 6 files changed, 43 insertions(+), 13 deletions(-) diff --git a/plugins/spu2-x/src/SaveStateSPU.cpp b/plugins/spu2-x/src/SaveStateSPU.cpp index 59445df78f..d8ea6b31a1 100644 --- a/plugins/spu2-x/src/SaveStateSPU.cpp +++ b/plugins/spu2-x/src/SaveStateSPU.cpp @@ -35,6 +35,7 @@ struct SPU2freezeData s16 OutPos; s16 InputPos; u32 Cycles; + u32 lClocks; int PlayMode; // Used as a base pointer to a series PcmCache blocks. @@ -47,7 +48,7 @@ static const u32 SAVE_ID = 0x1227521; // versioning for saves. // Increment this when changes to the savestate system are made. -static const u32 SAVE_VERSION = 0x0004; +static const u32 SAVE_VERSION = 0x0005; static void wipe_the_cache() { @@ -71,6 +72,7 @@ s32 __fastcall FreezeIt( SPU2freezeData& spud ) spud.OutPos = OutPos; spud.InputPos = InputPos; spud.Cycles = Cycles; + spud.lClocks = lClocks; spud.PlayMode = PlayMode; // Save our cache: @@ -115,8 +117,6 @@ s32 __fastcall ThawIt( SPU2freezeData& spud ) printf("\tAudio may not recover correctly. Save your game to memorycard, reset,\n\n"); printf(" and then continue from there.\n\n"); - resetClock = true; - // Do *not* reset the cores. // We'll need some "hints" as to how the cores should be initialized, // and the only way to get that is to use the game's existing core settings @@ -141,6 +141,7 @@ s32 __fastcall ThawIt( SPU2freezeData& spud ) OutPos = spud.OutPos; InputPos = spud.InputPos; Cycles = spud.Cycles; + lClocks = spud.lClocks; PlayMode = spud.PlayMode; // Load the ADPCM cache: @@ -172,6 +173,9 @@ s32 __fastcall ThawIt( SPU2freezeData& spud ) Cores[c].Voices[v].SBuffer = pcm_cache_data[cacheIdx].Sampledata; } } + + SndBuffer::ClearContents(); + } return 0; } diff --git a/plugins/spu2-x/src/SndOut.cpp b/plugins/spu2-x/src/SndOut.cpp index 5d6ebe7b5e..55f035e692 100644 --- a/plugins/spu2-x/src/SndOut.cpp +++ b/plugins/spu2-x/src/SndOut.cpp @@ -303,6 +303,13 @@ int SndBuffer::m_dsp_progress = 0; int SndBuffer::m_dsp_writepos = 0; int SndBuffer::m_timestretch_progress = 0; +int SndBuffer::ssFreeze = 0; + +void SndBuffer::ClearContents() +{ + SndBuffer::soundtouchClearContents(); + SndBuffer::ssFreeze = 30; //Delays sound output for about half a second. +} void SndBuffer::Write( const StereoOut32& Sample ) { @@ -320,7 +327,13 @@ void SndBuffer::Write( const StereoOut32& Sample ) if(sndTempProgress < SndOutPacketSize) return; sndTempProgress = 0; - if( dspPluginEnabled ) + //Don't play anything directly after loading a savestate, avoids static killing your speakers. + if ( ssFreeze > 0 ) + { + ssFreeze--; + return; + } + else if( dspPluginEnabled ) { // Convert in, send to winamp DSP, and convert out. diff --git a/plugins/spu2-x/src/SndOut.h b/plugins/spu2-x/src/SndOut.h index b057f1bc3d..ac9a03d2cb 100644 --- a/plugins/spu2-x/src/SndOut.h +++ b/plugins/spu2-x/src/SndOut.h @@ -251,13 +251,14 @@ private: static float cTempo; static float eTempo; static int freezeTempo; - + static int ssFreeze; static void _InitFail(); static void _WriteSamples(StereoOut32* bData, int nSamples); static bool CheckUnderrunStatus( int& nSamples, int& quietSampleCount ); static void soundtouchInit(); + static void soundtouchClearContents(); static void soundtouchCleanup(); static void timeStretchWrite(); static void timeStretchUnderrun(); @@ -266,13 +267,14 @@ private: static void PredictDataWrite( int samples ); static float GetStatusPct(); static void UpdateTempoChange(); - + public: static void Init(); static void Cleanup(); static void Write( const StereoOut32& Sample ); static s32 Test(); - + static void ClearContents(); + #ifdef _MSC_VER static void Configure(HWND parent, u32 module ); #else diff --git a/plugins/spu2-x/src/Spu2.cpp b/plugins/spu2-x/src/Spu2.cpp index 0deae90f3a..563179752e 100644 --- a/plugins/spu2-x/src/Spu2.cpp +++ b/plugins/spu2-x/src/Spu2.cpp @@ -36,9 +36,6 @@ void InitADSR(); DWORD CALLBACK TimeThread(PVOID /* unused param */); #endif -// [Air]: fixed the hacky part of UpdateTimer with this: -bool resetClock = true; - void (* _irqcallback)(); void (* dma4callback)(); void (* dma7callback)(); diff --git a/plugins/spu2-x/src/Spu2.h b/plugins/spu2-x/src/Spu2.h index b347a28484..374ff59d37 100644 --- a/plugins/spu2-x/src/Spu2.h +++ b/plugins/spu2-x/src/Spu2.h @@ -174,7 +174,6 @@ extern int recording; extern u32 lClocks; extern u32* cPtr; extern bool hasPtr; -extern bool resetClock; extern void SPU2writeLog( const char* action, u32 rmem, u16 value ); diff --git a/plugins/spu2-x/src/Timestretcher.cpp b/plugins/spu2-x/src/Timestretcher.cpp index ff5a097f8e..08ab039337 100644 --- a/plugins/spu2-x/src/Timestretcher.cpp +++ b/plugins/spu2-x/src/Timestretcher.cpp @@ -128,7 +128,7 @@ void SndBuffer::UpdateTempoChange() // "non-emergency" deadzone: In this area stretching will be strongly discouraged. // Note: due tot he nature of timestretch latency, it's always a wee bit harder to - // cope with low fps (underruns) tha it is high fps (overruns). So to help out a + // cope with low fps (underruns) than it is high fps (overruns). So to help out a // little, the low-end portions of this check are less forgiving than the high-sides. if( cTempo < 0.965f || cTempo > 1.060f || @@ -323,7 +323,22 @@ void SndBuffer::soundtouchInit() // just freeze tempo changes for a while at startup. // the driver buffers are bogus anyway. - freezeTempo = 8; + freezeTempo = 16; + m_predictData = 0; +} + +// reset timestretch management vars, and delay updates a bit: +void SndBuffer::soundtouchClearContents() +{ + pSoundTouch->clear(); + pSoundTouch->setTempo(1); + + cTempo = 1.0; + eTempo = 1.0; + lastPct = 0; + lastEmergencyAdj = 0; + + freezeTempo = 16; m_predictData = 0; } From 7471f251d7ecc8fe501da95840e068f621df4d96 Mon Sep 17 00:00:00 2001 From: msakhtar Date: Sun, 12 Apr 2009 23:31:27 +0000 Subject: [PATCH 050/143] Added a check to to clear QWC register if the upper 16bits are set. This fixes most of the broken backgrounds in movies. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@964 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/HwWrite.cpp | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/pcsx2/HwWrite.cpp b/pcsx2/HwWrite.cpp index d3ef5196dd..d78295d669 100644 --- a/pcsx2/HwWrite.cpp +++ b/pcsx2/HwWrite.cpp @@ -40,11 +40,22 @@ using namespace R5900; // dark cloud2 uses 8 bit DMAs register writes static __forceinline void DmaExec8( void (*func)(), u32 mem, u8 value ) { + u32 qwcRegister = mem | 0x20; + //Its invalid for the hardware to write a DMA while it is active, not without Suspending the DMAC if((value & 0x1) && (psHu8(mem) & 0x1) == 0x1 && (psHu32(DMAC_CTRL) & 0x1) == 1) { DMA_LOG( "DMAExec8 Attempt to run DMA while one is already active mem = %x", mem ); - return; } + + // Upper 16bits of QWC should not be written since QWC is 16bits in size. + if ((psHu32(qwcRegister) >> 16) != 0) + { + DMA_LOG("DMA QWC (%x) upper 16bits set to %x\n", + qwcRegister, + psHu32(qwcRegister) >> 16); + psHu32(qwcRegister) = 0; + } + psHu8(mem) = (u8)value; if ((psHu8(mem) & 0x1) && (psHu32(DMAC_CTRL) & 0x1)) { @@ -55,11 +66,22 @@ static __forceinline void DmaExec8( void (*func)(), u32 mem, u8 value ) static __forceinline void DmaExec16( void (*func)(), u32 mem, u16 value ) { + u32 qwcRegister = mem | 0x20; + //Its invalid for the hardware to write a DMA while it is active, not without Suspending the DMAC if((value & 0x100) && (psHu32(mem) & 0x100) == 0x100 && (psHu32(DMAC_CTRL) & 0x1) == 1) { DMA_LOG( "DMAExec16 Attempt to run DMA while one is already active mem = %x", mem); - return; } + + // Upper 16bits of QWC should not be written since QWC is 16bits in size. + if ((psHu32(qwcRegister) >> 16) != 0) + { + DMA_LOG("DMA QWC (%x) upper 16bits set to %x\n", + qwcRegister, + psHu32(qwcRegister) >> 16); + psHu32(qwcRegister) = 0; + } + psHu16(mem) = (u16)value; if ((psHu16(mem) & 0x100) && (psHu32(DMAC_CTRL) & 0x1)) { @@ -70,11 +92,22 @@ static __forceinline void DmaExec16( void (*func)(), u32 mem, u16 value ) static void DmaExec( void (*func)(), u32 mem, u32 value ) { + u32 qwcRegister = mem | 0x20; + //Its invalid for the hardware to write a DMA while it is active, not without Suspending the DMAC if((value & 0x100) && (psHu32(mem) & 0x100) == 0x100 && (psHu32(DMAC_CTRL) & 0x1) == 1) { DMA_LOG( "DMAExec32 Attempt to run DMA while one is already active mem = %x", mem ); - return; } + + // Upper 16bits of QWC should not be written since QWC is 16bits in size. + if ((psHu32(qwcRegister) >> 16) != 0) + { + DMA_LOG("DMA QWC (%x) upper 16bits set to %x\n", + qwcRegister, + psHu32(qwcRegister) >> 16); + psHu32(qwcRegister) = 0; + } + /* Keep the old tag if in chain mode and hw doesnt set it*/ if( (value & 0xc) == 0x4 && (value & 0xffff0000) == 0) psHu32(mem) = (psHu32(mem) & 0xFFFF0000) | (u16)value; From c43412948f018e304b845f3a2a49ba931b557a79 Mon Sep 17 00:00:00 2001 From: msakhtar Date: Sun, 12 Apr 2009 23:31:59 +0000 Subject: [PATCH 051/143] Added a check to to clear QWC register if the upper 16bits are set. This fixes most of the broken backgrounds in movies. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@965 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/IPU/IPU.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 748aa2fcb4..8c9ca709b9 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -1404,8 +1404,6 @@ int IPU1dma() return totalqwc; } - g_nDMATransfer &= ~(IPU_DMA_ACTV1 | IPU_DMA_DOTIE1); - if ((ipu1dma->chcr&0xc) == 0) { IPU_INT_TO(totalqwc*BIAS); @@ -1449,6 +1447,8 @@ int IPU1dma() return totalqwc; } } + + g_nDMATransfer &= ~(IPU_DMA_ACTV1 | IPU_DMA_DOTIE1); } if ((ipu1dma->chcr & 0xc) == 0 && ipu1dma->qwc == 0) // Normal Mode From 557564edc2284bd4d3354ac1e0386b3666ca882f Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 13 Apr 2009 03:46:19 +0000 Subject: [PATCH 052/143] microVU: changed flag handling algorithms some more... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@966 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Compile.inl | 55 +++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 2c1846c2bc..cba20c9136 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -88,35 +88,60 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { } // Status/Mac Flags Setup Code - int xStatus = 0; // Status Instance starts at #0 on every block - int xMac = 0; // Mac Instance starts at #0 on every block - int pStatus = 0; - int pMac = 0; + int xStatus = 8; // Status Instance starts at #0 on every block ((8&3) == 0) + int xMac = 8; // Mac Instance starts at #0 on every block ((8&3) == 0) + int pStatus = 3; + int pMac = 3; + int yStatus = 0; + int xS = 0, yS = 1, zS = 0; + int xM = 0, yM = 1, zM = 0; int xCount = mVUcount; // Backup count mVUcount = 0; iPC = mVUstartPC; for (int i = 0; i < xCount; i++) { if ((xCount - i) > aCount) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions - if (doStatus||isFSSET||doDivFlag) { mVUinfo |= xStatus << 12; } // _fsInstance - if (doMac) { mVUinfo |= xMac << 10; } // _fmInstance - pStatus = (xStatus + ((mVUstall > 3) ? 3 : mVUstall)) & 3; - pMac = (xMac + ((mVUstall > 3) ? 3 : mVUstall)) & 3; - mVUinfo |= pStatus << 18; // _fvsInstance - mVUinfo |= pMac << 16; // _fvmInstance - if (doStatus||isFSSET||doDivFlag) { xStatus = (xStatus+1) & 3; } - if (doMac) { xMac = (xMac+1) & 3; } + if (doStatus||isFSSET||doDivFlag) { mVUinfo |= (xStatus&3) << 12; } // _fsInstance + if (doMac) { mVUinfo |= (xMac&3) << 10; } // _fmInstance + + yS += (mVUstall > 3) ? 3 : mVUstall; + if (yS > zS) { + pStatus += (yS-zS); + if (pStatus >= xStatus) pStatus = (xStatus-1); + zS++; + xS = (yS-zS); + zS = yS; + yS -= xS; + } + yS++; + + yM += (mVUstall > 3) ? 3 : mVUstall; + if (yM > zM) { + pMac += (yM-zM); + if (pMac >= xMac) pMac = (xMac-1); + zM++; + xM = (yM-zM); + zM = yM; + yM -= xM; + } + yM++; + + mVUinfo |= (pStatus&3) << 18; // _fvsInstance + mVUinfo |= (pMac&3) << 16; // _fvmInstance + + if (doStatus||isFSSET||doDivFlag) { xStatus = (xStatus+1); } + if (doMac) { xMac = (xMac+1); } incPC2(2); } mVUcount = xCount; // Restore count // Setup Last 4 instances of Status/Mac flags (needed for accurate block linking) iPC = endPC; - for (int i = 3, int j = 3, int ii = 1, int jj = 3; aCount > 0; ii++, aCount--) { + for (int i = 3, int j = 3, int ii = 1, int jj = 1; aCount > 0; ii++, jj++, aCount--) { if ((doStatus||isFSSET||doDivFlag) && (i >= 0)) { - for (; (ii > 0 && i >= 0); ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; i--; } + for (; (ii > 0 && i >= 0); i--, ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; } } if (doMac && (j >= 0)) { - for (; (jj > 0 && j >= 0); jj--) { xMac = (xMac-1) & 3; bMac[i] = xMac; j--; } + for (; (jj > 0 && j >= 0); j--, jj--) { xMac = (xMac-1) & 3; bMac[i] = xMac; } } incPC2(-2); } From 6e1805de36432fbde16a97764f548c1dc753bdca Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 13 Apr 2009 05:39:18 +0000 Subject: [PATCH 053/143] microVU: minor changes... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@967 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Alloc.h | 4 +--- pcsx2/x86/microVU_Compile.inl | 7 +++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 3bf038b7c3..37aec058aa 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -53,12 +53,10 @@ struct microAllocInfo { microRegInfo regs; // Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR - //u8 maxStall; // Helps in computing stalls (stores the max amount of cycles to stall for the current opcodes) - //u8 divFlag; u32 cycles; // Cycles for current block u32 count; // Number of VU 64bit instructions ran (starts at 0 for each block) u32 curPC; // Current PC u32 startPC; // Start PC for Cur Block u32 info[pSize/8]; // Info for Instructions in current block - u8 stall[pSize/8]; // Info on how much each instruction stalled + u8 stall[pSize/8]; // Info on how much each instruction stalled (stores the max amount of cycles to stall for the current opcodes) }; diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index cba20c9136..69fbc72724 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -82,7 +82,7 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { // Ensure last ~4+ instructions update mac flags int endPC = iPC; int aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances - for (int i = mVUcount; i > 0; i++, aCount++;) { + for (int i = mVUcount; i > 0; i--, aCount++) { if (doStatus) { mVUinfo |= _doMac; if (i >= 4) { break; } } incPC2(-2); } @@ -96,10 +96,9 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { int xS = 0, yS = 1, zS = 0; int xM = 0, yM = 1, zM = 0; int xCount = mVUcount; // Backup count - mVUcount = 0; iPC = mVUstartPC; - for (int i = 0; i < xCount; i++) { - if ((xCount - i) > aCount) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions + for (mVUcount = 0; mVUcount < xCount; mVUcount++) { + if ((xCount - mVUcount) > aCount) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions if (doStatus||isFSSET||doDivFlag) { mVUinfo |= (xStatus&3) << 12; } // _fsInstance if (doMac) { mVUinfo |= (xMac&3) << 10; } // _fmInstance From b14881769a42ec074137cf518f591b08ccecabb2 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Mon, 13 Apr 2009 09:48:28 +0000 Subject: [PATCH 054/143] SPU2-X: Small cleanup and made it a bit faster. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@968 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/spu2-x/src/Mixer.cpp | 2 +- plugins/spu2-x/src/Spu2.cpp | 5 +---- plugins/spu2-x/src/Spu2.h | 4 +++- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/plugins/spu2-x/src/Mixer.cpp b/plugins/spu2-x/src/Mixer.cpp index b02282827c..61598816c3 100644 --- a/plugins/spu2-x/src/Mixer.cpp +++ b/plugins/spu2-x/src/Mixer.cpp @@ -579,7 +579,7 @@ struct VoiceMixSet const VoiceMixSet VoiceMixSet::Empty( StereoOut32::Empty, StereoOut32::Empty ); -static void __fastcall MixCoreVoices( VoiceMixSet& dest, const uint coreidx ) +static __forceinline void MixCoreVoices( VoiceMixSet& dest, const uint coreidx ) { V_Core& thiscore( Cores[coreidx] ); diff --git a/plugins/spu2-x/src/Spu2.cpp b/plugins/spu2-x/src/Spu2.cpp index 563179752e..cccd18f8ce 100644 --- a/plugins/spu2-x/src/Spu2.cpp +++ b/plugins/spu2-x/src/Spu2.cpp @@ -42,7 +42,6 @@ void (* dma7callback)(); short *spu2regs; short *_spu2mem; -s32 uTicks; u8 callirq; @@ -61,8 +60,6 @@ bool hasPtr=false; int PlayMode; -s16 attrhack[2]={0,0}; - #ifdef _MSC_VER HINSTANCE hInstance; CRITICAL_SECTION threadSync; @@ -376,7 +373,7 @@ static __forceinline void RestoreMMXRegs() #endif } -void __fastcall TimeUpdate(u32 cClocks) +__forceinline void TimeUpdate(u32 cClocks) { u32 dClocks = cClocks-lClocks; diff --git a/plugins/spu2-x/src/Spu2.h b/plugins/spu2-x/src/Spu2.h index 374ff59d37..9d3bcaf70c 100644 --- a/plugins/spu2-x/src/Spu2.h +++ b/plugins/spu2-x/src/Spu2.h @@ -97,7 +97,9 @@ EXPORT_C_(s32) SPU2test(); #include "Spu2replay.h" +#ifdef SPU2X_DEVBUILD #define SPU2_LOG +#endif #include "Debug.h" @@ -177,7 +179,7 @@ extern bool hasPtr; extern void SPU2writeLog( const char* action, u32 rmem, u16 value ); -extern void __fastcall TimeUpdate(u32 cClocks); +extern void TimeUpdate(u32 cClocks); extern u16 SPU_ps1_read(u32 mem); extern void SPU_ps1_write(u32 mem, u16 value); extern void SPU2_FastWrite( u32 rmem, u16 value ); From 44446b47e70f248b49beab57f65524bee8fedba6 Mon Sep 17 00:00:00 2001 From: refraction Date: Mon, 13 Apr 2009 12:15:31 +0000 Subject: [PATCH 055/143] fixed a minor bug from saqib's earlier commit git-svn-id: http://pcsx2.googlecode.com/svn/trunk@969 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/HwWrite.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/HwWrite.cpp b/pcsx2/HwWrite.cpp index d78295d669..4a389a4061 100644 --- a/pcsx2/HwWrite.cpp +++ b/pcsx2/HwWrite.cpp @@ -40,7 +40,7 @@ using namespace R5900; // dark cloud2 uses 8 bit DMAs register writes static __forceinline void DmaExec8( void (*func)(), u32 mem, u8 value ) { - u32 qwcRegister = mem | 0x20; + u32 qwcRegister = (mem | 0x20) & ~0x1; //Need to remove the lower bit else we end up clearing TADR //Its invalid for the hardware to write a DMA while it is active, not without Suspending the DMAC if((value & 0x1) && (psHu8(mem) & 0x1) == 0x1 && (psHu32(DMAC_CTRL) & 0x1) == 1) { From 2711d14f0c4d171967288538a18650d12094cbde Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Tue, 14 Apr 2009 00:15:18 +0000 Subject: [PATCH 056/143] microVU: bug fixes on some flag handling stuff. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@970 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.h | 2 +- pcsx2/x86/microVU_Analyze.inl | 15 ++++++--------- pcsx2/x86/microVU_Compile.inl | 14 +++++++------- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 81294fbe28..0493b38327 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -46,7 +46,7 @@ private: microBlock callersList[mMaxBlocks]; // Foreign Blocks that call Local Blocks public: - microBlockManager() { init(); } + microBlockManager() { init(); } ~microBlockManager() { close(); } void init() { listSize = -1; diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 571435b4af..375ebcbcaa 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -265,19 +265,16 @@ microVUt(void) mVUanalyzeFSSET() { microVUt(void) mVUanalyzeMflag(int Is, int It) { microVU* mVU = mVUx; if (!It) { mVUinfo |= _isNOP; } - else if (mVUcount >= 4) { - incPC2(-8); - if (doStatus) { mVUinfo |= _doMac; } - else { + else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) + mVUinfo |= _swapOps; + if (mVUcount > 1) { int curPC = iPC; - int i = mVUcount; - for (; i > 0; i--) { - incPC2(-2); - if (doStatus) { mVUinfo |= _doMac; break; } + for (int i = mVUcount, j = 0; i > 1; i--, j++) { + incPC(-2); + if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } } } iPC = curPC; } - incPC2(8); } analyzeVIreg1(Is); analyzeVIreg2(It, 1); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 69fbc72724..340406960f 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -82,8 +82,8 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { // Ensure last ~4+ instructions update mac flags int endPC = iPC; int aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances - for (int i = mVUcount; i > 0; i--, aCount++) { - if (doStatus) { mVUinfo |= _doMac; if (i >= 4) { break; } } + for (int i = mVUcount, int iX = 0; i > 0; i--, aCount++) { + if (doStatus) { mVUinfo |= _doMac; iX++; if ((iX >= 4) || (aCount > 4)) { break; } } incPC2(-2); } @@ -99,9 +99,7 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { iPC = mVUstartPC; for (mVUcount = 0; mVUcount < xCount; mVUcount++) { if ((xCount - mVUcount) > aCount) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions - if (doStatus||isFSSET||doDivFlag) { mVUinfo |= (xStatus&3) << 12; } // _fsInstance - if (doMac) { mVUinfo |= (xMac&3) << 10; } // _fmInstance - + yS += (mVUstall > 3) ? 3 : mVUstall; if (yS > zS) { pStatus += (yS-zS); @@ -124,8 +122,10 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { } yM++; - mVUinfo |= (pStatus&3) << 18; // _fvsInstance - mVUinfo |= (pMac&3) << 16; // _fvmInstance + mVUinfo |= (xStatus&3) << 12; // _fsInstance + mVUinfo |= (xMac&3) << 10; // _fmInstance + mVUinfo |= (pStatus&3) << 18; // _fvsInstance + mVUinfo |= (pMac&3) << 16; // _fvmInstance if (doStatus||isFSSET||doDivFlag) { xStatus = (xStatus+1); } if (doMac) { xMac = (xMac+1); } From 6cceed6268e30debbfe1f73e01e2a5ec66d71998 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 14 Apr 2009 01:26:57 +0000 Subject: [PATCH 057/143] Many Emitter updates: * added implementations for MOV and Shift instructions (SHL, SHR, ROL, ROR, etc). * Improved compilation optimization considerably, by improving inlining selection in cases where constant propagation can be resolved reliably. * Moved lots of code around, so that the new emitter and the legacy emitter are more clearly separated; and renamed some vars. * Changed recompilers to initialize the recBlocks array to 0xcc instead of 0xcd (fills the blocks with the single-byte instruction INT3, which fixes the misalignment mess that would sometimes happen when using disasm views on the RecBlocks contents). * Switched back to /O2 (Optimize for Speed) instead of /Ox, since MSVC (for me) generally fails to optimize Thread-Local storage in /Ox mode. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@971 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Memory.cpp | 4 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 23 +- pcsx2/x86/iR3000A.cpp | 2 +- pcsx2/x86/iVUmicroLower.cpp | 21 +- pcsx2/x86/iVUzerorec.cpp | 8 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 2 +- pcsx2/x86/ix86-32/recVTLB.cpp | 26 +- pcsx2/x86/ix86/ix86.cpp | 871 ++++++----- pcsx2/x86/ix86/ix86.h | 1541 +------------------- pcsx2/x86/ix86/ix86_3dnow.cpp | 2 +- pcsx2/x86/ix86/ix86_cpudetect.cpp | 3 +- pcsx2/x86/ix86/ix86_fpu.cpp | 2 +- pcsx2/x86/ix86/ix86_inlines.inl | 237 +++ pcsx2/x86/ix86/ix86_instructions.h | 107 ++ pcsx2/x86/ix86/ix86_internal.h | 59 +- pcsx2/x86/ix86/ix86_legacy.cpp | 692 ++------- pcsx2/x86/ix86/ix86_legacy_instructions.h | 1422 ++++++++++++++++++ pcsx2/x86/ix86/ix86_legacy_internal.h | 78 + pcsx2/x86/ix86/ix86_legacy_types.h | 140 ++ pcsx2/x86/ix86/ix86_mmx.cpp | 2 +- pcsx2/x86/ix86/ix86_sse.cpp | 2 +- pcsx2/x86/ix86/ix86_tools.cpp | 2 +- pcsx2/x86/ix86/ix86_types.h | 679 +++++++-- 23 files changed, 3286 insertions(+), 2639 deletions(-) create mode 100644 pcsx2/x86/ix86/ix86_inlines.inl create mode 100644 pcsx2/x86/ix86/ix86_instructions.h create mode 100644 pcsx2/x86/ix86/ix86_legacy_instructions.h create mode 100644 pcsx2/x86/ix86/ix86_legacy_internal.h create mode 100644 pcsx2/x86/ix86/ix86_legacy_types.h diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index b30a545335..8d993e7b70 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -517,8 +517,8 @@ void __fastcall vuMicroRead128(u32 addr,mem128_t* data) data[1]=*(u64*)&vu->Micro[addr+8]; } -// [TODO] : Profile this code and see how often the VUs get written, and how -// often it changes the values being written (invoking a cpuClear). +// Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per +// frame in-game, and usually none at all after BIOS), so cpu clears aren't much of a big deal. template void __fastcall vuMicroWrite8(u32 addr,mem8_t data) diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index ba2df10aca..6ed073af1e 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -217,7 +217,6 @@ /> + + + + + + + + @@ -2959,6 +2974,10 @@ > + + diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 38e2021609..66a78c0105 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -538,7 +538,7 @@ void recResetIOP() DevCon::Status( "iR3000A Resetting recompiler memory and structures" ); - memset_8<0xcd,RECMEM_SIZE>( recMem ); + memset_8<0xcc,RECMEM_SIZE>( recMem ); // 0xcc is INT3 iopClearRecLUT((BASEBLOCK*)m_recBlockAlloc, (((Ps2MemSize::IopRam + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4))); diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 9f9d816cf7..0c2ab15768 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -799,17 +799,22 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c); } else { + + // (this is one of my test cases for the new emitter --air) + using namespace x86Emitter; + if ( x86reg >= 0 ) { - if ( _X ) MOV32ItoRm(x86reg, 0x00000000, offset); - if ( _Y ) MOV32ItoRm(x86reg, 0x00000000, offset+4); - if ( _Z ) MOV32ItoRm(x86reg, 0x00000000, offset+8); - if ( _W ) MOV32ItoRm(x86reg, 0x3f800000, offset+12); + x86IndexReg thisreg( x86reg ); + if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000 ); + if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000 ); + if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000 ); + if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000); } else { - if ( _X ) MOV32ItoM(offset, 0x00000000); - if ( _Y ) MOV32ItoM(offset+4, 0x00000000); - if ( _Z ) MOV32ItoM(offset+8, 0x00000000); - if ( _W ) MOV32ItoM(offset+12, 0x3f800000); + if ( _X ) MOV(ptr32[offset], 0x00000000); + if ( _Y ) MOV(ptr32[offset+4], 0x00000000); + if ( _Z ) MOV(ptr32[offset+8], 0x00000000); + if ( _W ) MOV(ptr32[offset+14], 0x3f800000); } } return; diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 0f454e638e..bebcd3cdf3 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -3569,7 +3569,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); - MOV32ItoR(s_JumpX86, 0); + MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr-1; if( !(s_pCurInst->type & INST_BRANCH_DELAY) ) { @@ -3578,7 +3578,7 @@ void recVUMI_BranchHandle() if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION ) MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc+8); - MOV32ItoR(s_JumpX86, 0); + MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr-1; x86SetJ8( j8Ptr[ 1 ] ); @@ -3815,7 +3815,7 @@ void recVUMI_B( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - MOV32ItoR(s_JumpX86, 0); + MOV32ItoR(s_JumpX86, 1); s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } @@ -3841,7 +3841,7 @@ void recVUMI_BAL( VURegs* vuu, s32 info ) if( s_pCurBlock->blocks.size() > 1 ) { s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - MOV32ItoR(s_JumpX86, 0); + MOV32ItoR(s_JumpX86, 1); s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr-1; s_UnconditionalDelay = 1; } diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 2dd76b7778..a95147c1b4 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -531,7 +531,7 @@ void recResetEE( void ) maxrecmem = 0; - memset_8<0xcd, REC_CACHEMEM>(recMem); + memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3 memzero_ptr( m_recBlockAlloc ); ClearRecLUT((BASEBLOCK*)m_recBlockAlloc, (((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4))); diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index dd45f51d82..564a636c3e 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -31,14 +31,24 @@ using namespace vtlb_private; // (used as an equivalent to movaps, when a free XMM register is unavailable for some reason) void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) { - MOV32RmtoR(EAX,srcRm); - MOV32RtoRm(destRm,EAX); - MOV32RmtoR(EAX,srcRm,4); - MOV32RtoRm(destRm,EAX,4); - MOV32RmtoR(EAX,srcRm,8); - MOV32RtoRm(destRm,EAX,8); - MOV32RmtoR(EAX,srcRm,12); - MOV32RtoRm(destRm,EAX,12); + // (this is one of my test cases for the new emitter --air) + + using namespace x86Emitter; + + x86IndexReg src( srcRm ); + x86IndexReg dest( destRm ); + + MOV( eax, ptr[src] ); + MOV( ptr[dest], eax ); + + MOV( eax, ptr[src+4] ); + MOV( ptr[dest+4], eax ); + + MOV( eax, ptr[src+8] ); + MOV( ptr[dest+8], eax ); + + MOV( eax, ptr[src+12] ); + MOV( ptr[dest+12], eax ); } /* diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index f62cabf789..b3de07a04a 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -15,13 +15,20 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + /* - * ix86 core v0.6.2 - * Authors: linuzappz - * alexey silinov - * goldfinger - * zerofrog(@gmail.com) - * cottonvibes(@gmail.com) + * ix86 core v0.9.0 + * + * Original Authors (v0.6.2 and prior): + * linuzappz + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) */ #include "PrecompiledHeader.h" @@ -29,310 +36,363 @@ #include "System.h" #include "ix86_internal.h" +// ------------------------------------------------------------------------ +// Notes on Thread Local Storage: +// * TLS is pretty simple, and "just works" from a programmer perspective, with only +// some minor additional computational overhead (see performance notes below). +// +// * MSVC and GCC handle TLS differently internally, but behavior to the programmer is +// generally identical. +// +// Performance Considerations: +// * GCC's implementation involves an extra dereference from normal storage. +// +// * MSVC's implementation involves *two* extra dereferences from normal storage because +// it has to look up the TLS heap pointer from the Windows Thread Storage Area. (in +// generated ASM code, this dereference is denoted by access to the fs:[2ch] address). +// +// * However, in either case, the optimizer usually optimizes it to a register so the +// extra overhead is minimal over a series of instructions. (Note!! the Full Opt- +// imization [/Ox] option effectively disables TLS optimizations in MSVC, causing +// generally significant code bloat). +// + + __threadlocal u8 *x86Ptr; __threadlocal u8 *j8Ptr[32]; __threadlocal u32 *j32Ptr[32]; -PCSX2_ALIGNED16(u32 p[4]); -PCSX2_ALIGNED16(u32 p2[4]); -PCSX2_ALIGNED16(float f[4]); - XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; namespace x86Emitter { const x86IndexerType ptr; - -////////////////////////////////////////////////////////////////////////////////////////// -// -const x86Register32 x86Register32::Empty( -1 ); - -const x86Register32 eax( 0 ); -const x86Register32 ebx( 3 ); -const x86Register32 ecx( 1 ); -const x86Register32 edx( 2 ); -const x86Register32 esi( 6 ); -const x86Register32 edi( 7 ); -const x86Register32 ebp( 5 ); -const x86Register32 esp( 4 ); - -const x86Register16 ax( 0 ); -const x86Register16 bx( 3 ); -const x86Register16 cx( 1 ); -const x86Register16 dx( 2 ); -const x86Register16 si( 6 ); -const x86Register16 di( 7 ); -const x86Register16 bp( 5 ); -const x86Register16 sp( 4 ); - -const x86Register8 al( 0 ); -const x86Register8 cl( 1 ); -const x86Register8 dl( 2 ); -const x86Register8 bl( 3 ); -const x86Register8 ah( 4 ); -const x86Register8 ch( 5 ); -const x86Register8 dh( 6 ); -const x86Register8 bh( 7 ); - -////////////////////////////////////////////////////////////////////////////////////////// -// x86Register Method Implementations -// -x86ModRm x86Register32::operator+( const x86Register32& right ) const -{ - return x86ModRm( *this, right ); -} - -x86ModRm x86Register32::operator+( const x86ModRm& right ) const -{ - return right + *this; -} - -x86ModRm x86Register32::operator+( s32 right ) const -{ - return x86ModRm( *this, right ); -} - -x86ModRm x86Register32::operator*( u32 right ) const -{ - return x86ModRm( Empty, *this, right ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// x86ModRm Method Implementations -// -x86ModRm& x86ModRm::Add( const x86IndexReg& src ) -{ - if( src == Index ) - { - Factor++; - } - else if( src == Base ) - { - // Compound the existing register reference into the Index/Scale pair. - Base = x86IndexReg::Empty; - - if( src == Index ) - Factor++; - else - { - jASSUME( Index.IsEmpty() ); // or die if we already have an index! - Index = src; - Factor = 2; - } - } - else if( Base.IsEmpty() ) - Base = src; - else if( Index.IsEmpty() ) - Index = src; - else - assert( false ); // oops, only 2 regs allowed per ModRm! - - return *this; -} - -x86ModRm& x86ModRm::Add( const x86ModRm& src ) -{ - Add( src.Base ); - Add( src.Displacement ); - - // If the factor is 1, we can just treat index like a base register also. - if( src.Factor == 1 ) - { - Add( src.Index ); - } - else if( Index.IsEmpty() ) - { - Index = src.Index; - Factor = 1; - } - else if( Index == src.Index ) - Factor++; - else - assert( false ); // oops, only 2 regs allowed! - - return *this; -} - -////////////////////////////////////////////////////////////////////////////////////////// -// ModSib Method Implementations -// +const x86IndexerTypeExplicit<4> ptr32; +const x86IndexerTypeExplicit<2> ptr16; +const x86IndexerTypeExplicit<1> ptr8; // ------------------------------------------------------------------------ -// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. -// Necessary because by default ModSib compounds registers into Index when possible. -// -void ModSib::Reduce() +const x86Register32 x86Register32::Empty; +const x86Register16 x86Register16::Empty; +const x86Register8 x86Register8::Empty; +const x86IndexReg x86IndexReg::Empty; + +const x86Register32 + eax( 0 ), ebx( 3 ), + ecx( 1 ), edx( 2 ), + esi( 6 ), edi( 7 ), + ebp( 5 ), esp( 4 ); + +const x86Register16 + ax( 0 ), bx( 3 ), + cx( 1 ), dx( 2 ), + si( 6 ), di( 7 ), + bp( 5 ), sp( 4 ); + +const x86Register8 + al( 0 ), cl( 1 ), + dl( 2 ), bl( 3 ), + ah( 4 ), ch( 5 ), + dh( 6 ), bh( 7 ); + +namespace Internal { - // If no index reg, then load the base register into the index slot. - if( Index.IsEmpty() ) + const Group1ImplAll ADD; + const Group1ImplAll OR; + const Group1ImplAll ADC; + const Group1ImplAll SBB; + const Group1ImplAll AND; + const Group1ImplAll SUB; + const Group1ImplAll XOR; + const Group1ImplAll CMP; + + const Group2ImplAll ROL; + const Group2ImplAll ROR; + const Group2ImplAll RCL; + const Group2ImplAll RCR; + const Group2ImplAll SHL; + const Group2ImplAll SHR; + const Group2ImplAll SAR; + + // Performance note: VC++ wants to use byte/word register form for the following + // ModRM/SibSB constructors if we use iWrite, and furthermore unrolls the + // the shift using a series of ADDs for the following results: + // add cl,cl + // add cl,cl + // add cl,cl + // or cl,bl + // add cl,cl + // ... etc. + // + // This is unquestionably bad optimization by Core2 standard, an generates tons of + // register aliases and false dependencies. (although may have been ideal for early- + // brand P4s with a broken barrel shifter?). The workaround is to do our own manual + // x86Ptr access and update using a u32 instead of u8. Thanks to little endianness, + // the same end result is achieved and no false dependencies are generated. + // + // (btw, I know this isn't a critical performance item by any means, but it's + // annoying simply because it *should* be an easy thing to optimize) + + __forceinline void ModRM( uint mod, uint reg, uint rm ) + { + *(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; + x86Ptr++; + } + + __forceinline void SibSB( u32 ss, u32 index, u32 base ) + { + *(u32*)x86Ptr = (ss << 6) | (index << 3) | base; + x86Ptr++; + } + + // ------------------------------------------------------------------------ + // returns TRUE if this instruction requires SIB to be encoded, or FALSE if the + // instruction ca be encoded as ModRm alone. + static __forceinline bool NeedsSibMagic( const ModSibBase& info ) + { + // If base register is ESP, then we need a SIB: + if( info.Base.IsStackPointer() ) return true; + + // no registers? no sibs! + // (ModSibBase::Reduce + if( info.Index.IsEmpty() ) return false; + + // A scaled register needs a SIB + if( info.Scale != 0 ) return true; + + // two registers needs a SIB + if( !info.Base.IsEmpty() ) return true; + + return false; + } + + ////////////////////////////////////////////////////////////////////////////////////////// + // Conditionally generates Sib encoding information! + // + // regfield - register field to be written to the ModRm. This is either a register specifier + // or an opcode extension. In either case, the instruction determines the value for us. + // + __forceinline void EmitSibMagic( uint regfield, const ModSibBase& info ) + { + jASSUME( regfield < 8 ); + + int displacement_size = (info.Displacement == 0) ? 0 : + ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); + + if( !NeedsSibMagic( info ) ) + { + // Use ModRm-only encoding, with the rm field holding an index/base register, if + // one has been specified. If neither register is specified then use Disp32 form, + // which is encoded as "EBP w/o displacement" (which is why EBP must always be + // encoded *with* a displacement of 0, if it would otherwise not have one). + + if( info.Index.IsEmpty() ) + { + ModRM( 0, regfield, ModRm_UseDisp32 ); + iWrite( info.Displacement ); + return; + } + else + { + if( info.Index == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, info.Index.Id ); + } + } + else + { + // In order to encode "just" index*scale (and no base), we have to encode + // it as a special [index*scale + displacement] form, which is done by + // specifying EBP as the base register and setting the displacement field + // to zero. (same as ModRm w/o SIB form above, basically, except the + // ModRm_UseDisp flag is specified in the SIB instead of the ModRM field). + + if( info.Base.IsEmpty() ) + { + ModRM( 0, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); + iWrite( info.Displacement ); + return; + } + else + { + if( info.Base == ebp && displacement_size == 0 ) + displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + + ModRM( displacement_size, regfield, ModRm_UseSib ); + SibSB( info.Scale, info.Index.Id, info.Base.Id ); + } + } + + if( displacement_size != 0 ) + { + *(u32*)x86Ptr = info.Displacement; + x86Ptr += (displacement_size == 1) ? 1 : 4; + } + } +} + +using namespace Internal; + +/* +emitterT void x86SetPtr( u8* ptr ) +{ + x86Ptr = ptr; +} + +////////////////////////////////////////////////////////////////////////////////////////// +// x86Ptr Label API +// + +class x86Label +{ +public: + class Entry + { + protected: + u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type) + u8* m_base; // base address of the instruction (passed to the instruction) + int m_cc; // comparison type of the instruction + + public: + explicit Entry( int cc ) : + m_base( x86Ptr ) + , m_writebackpos( writebackidx ) + { + } + + void Commit( const u8* target ) const + { + //uptr reltarget = (uptr)m_base - (uptr)target; + //*((u32*)&m_base[m_writebackpos]) = reltarget; + jASSUME( m_emit != NULL ); + jASSUME( m_base != NULL ); + return m_emit( m_base, target, m_cc ); + } + }; + +protected: + u8* m_target; // x86Ptr target address of this label + Entry m_writebacks[8]; + int m_writeback_curpos; + +public: + // creates a label list with no valid target. + // Use x86LabelList::Set() to set a target prior to class destruction. + x86Label() : m_target() + { + } + + x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() ) { - Index = Base; - Scale = 0; - Base = x86IndexReg::Empty; - return; } - // The Scale has a series of valid forms, all shown here: + // Performs all address writebacks on destruction. + virtual ~x86Label() + { + IssueWritebacks(); + } + + void SetTarget() { m_address = x86Ptr; } + void SetTarget( void* addr ) { m_address = (u8*)addr; } + + void Clear() + { + m_writeback_curpos = 0; + } - switch( Scale ) + // Adds a jump/call instruction to this label for writebacks. + void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc ) { - case 0: break; - case 1: Scale = 0; break; - case 2: Scale = 1; break; - - case 3: // becomes [reg*2+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 1; - break; - - case 4: Scale = 2; break; - - case 5: // becomes [reg*4+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 2; - break; - - case 6: // invalid! - assert( false ); - break; - - case 7: // so invalid! - assert( false ); - break; - - case 8: Scale = 3; break; - case 9: // becomes [reg*8+reg] - jASSUME( Base.IsEmpty() ); - Base = Index; - Scale = 3; - break; + jASSUME( m_writeback_curpos < MaxWritebacks ); + m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) ); + m_writeback_curpos++; } -} - -ModSib::ModSib( const x86ModRm& src ) : - Base( src.Base ), - Index( src.Index ), - Scale( src.Factor ), - Displacement( src.Displacement ) -{ - Reduce(); -} - -ModSib::ModSib( x86IndexReg base, x86IndexReg index, int scale, s32 displacement ) : - Base( base ), - Index( index ), - Scale( scale ), - Displacement( displacement ) -{ - Reduce(); -} - -ModSib::ModSib( s32 displacement ) : - Base(), - Index(), - Scale(0), - Displacement( displacement ) -{ -} - -// ------------------------------------------------------------------------ -// returns TRUE if this instruction requires SIB to be encoded, or FALSE if the -// instruction ca be encoded as ModRm alone. -bool NeedsSibMagic( const ModSib& info ) -{ - // no registers? no sibs! - if( info.Index.IsEmpty() ) return false; - - // A scaled register needs a SIB - if( info.Scale != 0 ) return true; - - // two registers needs a SIB - if( !info.Base.IsEmpty() ) return true; - - // If index register is ESP, then we need a SIB: - // (the ModSib::Reduce() ensures that stand-alone ESP will be in the - // index position for us) - if( info.Index == esp ) return true; - - return false; -} - -// ------------------------------------------------------------------------ -// Conditionally generates Sib encoding information! -// -// regfield - register field to be written to the ModRm. This is either a register specifier -// or an opcode extension. In either case, the instruction determines the value for us. -// -void EmitSibMagic( int regfield, const ModSib& info ) -{ - int displacement_size = (info.Displacement == 0) ? 0 : - ( ( info.IsByteSizeDisp() ) ? 1 : 2 ); - - if( !NeedsSibMagic( info ) ) + + void IssueWritebacks() const { - // Use ModRm-only encoding, with the rm field holding an index/base register, if - // one has been specified. If neither register is specified then use Disp32 form, - // which is encoded as "EBP w/o displacement" (which is why EBP must always be - // encoded *with* a displacement of 0, if it would otherwise not have one). - - if( info.Index.IsEmpty() ) - ModRM( 0, regfield, ModRm_UseDisp32 ); - else + const std::list::const_iterator& start = m_list_writebacks. + for( ; start!=end; start++ ) { - if( info.Index == ebp && displacement_size == 0 ) - displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! + Entry& current = *start; + u8* donespot = current.Commit(); + + // Copy the data from the m_nextinst to the current location, + // and update any additional writebacks (but what about multiple labels?!?) - ModRM( displacement_size, regfield, info.Index.Id ); } } - else - { - // In order to encode "just" index*scale (and no base), we have to encode - // it as a special [index*scale + displacement] form, which is done by - // specifying EBP as the base register and setting the displacement field - // to zero. (same as ModRm w/o SIB form above, basically, except the - // ModRm_UseDisp flag is specified in the SIB instead of the ModRM field). +}; +#endif - if( info.Base.IsEmpty() ) - { - ModRM( 0, regfield, ModRm_UseSib ); - SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); - displacement_size = 2; - } - else - { - if( info.Base == ebp && displacement_size == 0 ) - displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - - ModRM( displacement_size, regfield, ModRm_UseSib ); - SibSB( info.Scale, info.Index.Id, info.Base.Id ); - } - } - - switch( displacement_size ) - { - case 0: break; - case 1: write8( info.Displacement ); break; - case 2: write32( info.Displacement ); break; - jNO_DEFAULT - } +void JMP( x86Label& dest ) +{ + dest.AddWriteback( x86Ptr, emitJMP, 0 ); } +void JLE( x86Label& dest ) +{ + dest.AddWriteback( x86Ptr, emitJCC, 0 ); +} + +void x86SetJ8( u8* j8 ) +{ + u32 jump = ( x86Ptr - j8 ) - 1; + + if ( jump > 0x7f ) { + Console::Error( "j8 greater than 0x7f!!" ); + assert(0); + } + *j8 = (u8)jump; +} + +void x86SetJ8A( u8* j8 ) +{ + u32 jump = ( x86Ptr - j8 ) - 1; + + if ( jump > 0x7f ) { + Console::Error( "j8 greater than 0x7f!!" ); + assert(0); + } + + if( ((uptr)x86Ptr&0xf) > 4 ) { + + uptr newjump = jump + 16-((uptr)x86Ptr&0xf); + + if( newjump <= 0x7f ) { + jump = newjump; + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + } + } + *j8 = (u8)jump; +} + +emitterT void x86SetJ32( u32* j32 ) +{ + *j32 = ( x86Ptr - (u8*)j32 ) - 4; +} + +emitterT void x86SetJ32A( u32* j32 ) +{ + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + x86SetJ32(j32); +} + +emitterT void x86Align( int bytes ) +{ + // forward align + x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); +} +*/ + // ------------------------------------------------------------------------ -// Conditionally generates Sib encoding information! +// Internal implementation of EmitSibMagic which has been custom tailored +// to optimize special forms of the Lea instructions accordingly, such +// as when a LEA can be replaced with a "MOV reg,imm" or "MOV reg,reg". // -// regfield - register field to be written to the ModRm. This is either a register specifier -// or an opcode extension. In either case, the instruction determines the value for us. -// -emitterT void EmitSibMagic( x86Register32 regfield, const ModSib& info ) -{ - EmitSibMagic( regfield.Id, info ); -} - template< typename ToReg > -static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) +static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false ) { int displacement_size = (src.Displacement == 0) ? 0 : ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); @@ -348,17 +408,17 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) if( src.Index.IsEmpty() ) { if( is16bit ) - MOV16ItoR( to.Id, src.Displacement ); + MOV( to, src.Displacement ); else - MOV32ItoR( to.Id, src.Displacement ); + MOV( to, src.Displacement ); return; } else if( displacement_size == 0 ) { if( is16bit ) - MOV16RtoR( to.Id, src.Index.Id ); + MOV( to, ToReg( src.Index.Id ) ); else - MOV32RtoR( to.Id, src.Index.Id ); + MOV( to, ToReg( src.Index.Id ) ); return; } else @@ -366,7 +426,7 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) // note: no need to do ebp+0 check since we encode all 0 displacements as // register assignments above (via MOV) - write8( 0x8d ); + iWrite( 0x8d ); ModRM( displacement_size, to.Id, src.Index.Id ); } } @@ -377,115 +437,236 @@ static void EmitLeaMagic( ToReg to, const ModSib& src, bool is16bit=false ) if( displacement_size == 0 ) { // Encode [Index*Scale] as a combination of Mov and Shl. - // This is more efficient because of the bloated format which requires - // a 32 bit displacement. + // This is more efficient because of the bloated LEA format which requires + // a 32 bit displacement, and the compact nature of the alterntive. + // + // (this does not apply to older model P4s with the broken barrel shifter, + // but we currently aren't optimizing for that target anyway). - if( is16bit ) - { - MOV16RtoR( to.Id, src.Index.Id ); - SHL16ItoR( to.Id, src.Scale ); - } - else - { - MOV32RtoR( to.Id, src.Index.Id ); - SHL32ItoR( to.Id, src.Scale ); - } + MOV( to, ToReg( src.Index.Id ) ); + SHL( to, src.Scale ); return; } - - write8( 0x8d ); + iWrite( 0x8d ); ModRM( 0, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 ); - displacement_size = 2; // force 32bit displacement. + iWrite( src.Displacement ); + return; } else { if( src.Base == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - write8( 0x8d ); + iWrite( 0x8d ); ModRM( displacement_size, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, src.Base.Id ); + + /*switch( displacement_size ) + { + case 0: break; + case 1: emit.write( src.Displacement ); break; + case 2: emit.write( src.Displacement ); break; + jNO_DEFAULT + }*/ } } - - switch( displacement_size ) - { - case 0: break; - case 1: write8( src.Displacement ); break; - case 2: write32( src.Displacement ); break; - jNO_DEFAULT - } + if( displacement_size != 0 ) + { + *(u32*)x86Ptr = src.Displacement; + x86Ptr += (displacement_size == 1) ? 1 : 4; + } } -emitterT void LEA32( x86Register32 to, const ModSib& src ) +__emitinline void LEA( x86Register32 to, const ModSibBase& src ) { EmitLeaMagic( to, src ); } -emitterT void LEA16( x86Register16 to, const ModSib& src ) +__emitinline void LEA( x86Register16 to, const ModSibBase& src ) { - // fixme: is this right? Does Lea16 use 32 bit displacement and ModRM form? - write8( 0x66 ); EmitLeaMagic( to, src ); } +////////////////////////////////////////////////////////////////////////////////////////// +// MOV instruction Implementation + +template< typename ImmType, typename SibMagicType > +class MovImpl +{ +public: + static const uint OperandSize = sizeof(ImmType); + +protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + static __forceinline void Emit( const x86Register& to, const x86Register& from ) + { + if( to == from ) return; // ignore redundant MOVs. + + prefix16(); + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + ModRM( 3, from.Id, to.Id ); + } + + static __forceinline void Emit( const ModSibBase& dest, const x86Register& from ) + { + prefix16(); + + // mov eax has a special from when writing directly to a DISP32 address + // (sans any register index/base registers). + + if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() ) + { + iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( dest.Displacement ); + } + else + { + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + SibMagicType::Emit( from.Id, dest ); + } + } + + static __forceinline void Emit( const x86Register& to, const ModSibBase& src ) + { + prefix16(); + + // mov eax has a special from when reading directly from a DISP32 address + // (sans any register index/base registers). + + if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() ) + { + iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( src.Displacement ); + } + else + { + iWrite( Is8BitOperand() ? 0x8a : 0x8b ); + SibMagicType::Emit( to.Id, src ); + } + } + + static __forceinline void Emit( const x86Register& to, ImmType imm ) + { + // Note: MOV does not have (reg16/32,imm8) forms. + + if( imm == 0 ) + XOR( to, to ); + else + { + prefix16(); + iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); + iWrite( imm ); + } + } + + static __forceinline void Emit( ModSibStrict dest, ImmType imm ) + { + prefix16(); + iWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); + SibMagicType::Emit( 0, dest ); + iWrite( imm ); + } +}; + +namespace Internal +{ + typedef MovImpl MOV32; + typedef MovImpl MOV16; + typedef MovImpl MOV8; + + typedef MovImpl MOV32i; + typedef MovImpl MOV16i; + typedef MovImpl MOV8i; +} + +// Inlining Notes: +// I've set up the inlining to be as practical and intelligent as possible, which means +// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to +// virtually no code. In the case of (Reg, Imm) forms, the inlinign is up to the dis- +// cretion of the compiler. +// + +// TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D + +// ---------- 32 Bit Interface ----------- +__forceinline void MOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); } +__forceinline void MOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); } +__forceinline void MOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); } +__noinline void MOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); } +__noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } +__noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } + +void MOV( const x86Register32& to, u32 imm ) { MOV32i::Emit( to, imm ); } + + +// ---------- 16 Bit Interface ----------- +__forceinline void MOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); } +__forceinline void MOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); } +__forceinline void MOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); } +__noinline void MOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); } +__noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } +__noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } + +void MOV( const x86Register16& to, u16 imm ) { MOV16i::Emit( to, imm ); } + + +// ---------- 8 Bit Interface ----------- +__forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } +__forceinline void MOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); } +__forceinline void MOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); } +__noinline void MOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); } +__noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } +__noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } + +void MOV( const x86Register8& to, u8 imm ) { MOV8i::Emit( to, imm ); } + + ////////////////////////////////////////////////////////////////////////////////////////// // Miscellaneous Section! // Various Instructions with no parameter and no special encoding logic. // -emitterT void RET() { write8( 0xC3 ); } -emitterT void CBW() { write16( 0x9866 ); } -emitterT void CWD() { write8( 0x98 ); } -emitterT void CDQ() { write8( 0x99 ); } -emitterT void CWDE() { write8( 0x98 ); } +__forceinline void RET() { write8( 0xC3 ); } +__forceinline void CBW() { write16( 0x9866 ); } +__forceinline void CWD() { write8( 0x98 ); } +__forceinline void CDQ() { write8( 0x99 ); } +__forceinline void CWDE() { write8( 0x98 ); } -emitterT void LAHF() { write8( 0x9f ); } -emitterT void SAHF() { write8( 0x9e ); } +__forceinline void LAHF() { write8( 0x9f ); } +__forceinline void SAHF() { write8( 0x9e ); } ////////////////////////////////////////////////////////////////////////////////////////// // Push / Pop Emitters // -// fixme? push/pop instructions always push and pop aligned to whatever mode the cpu -// is running in. So even thought these say push32, they would essentially be push64 on -// an x64 build. Should I rename them accordingly? --air -// // Note: pushad/popad implementations are intentionally left out. The instructions are // invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead. -emitterT void POP( x86Register32 from ) +__forceinline void POP( x86Register32 from ) { write8( 0x58 | from.Id ); } + +__emitinline void POP( const ModSibBase& from ) { - write8( 0x58 | from.Id ); + iWrite( 0x8f ); Internal::EmitSibMagic( 0, from ); } -emitterT void POP( const ModSib& from ) -{ - write8( 0x8f ); EmitSibMagic( 0, from ); -} +__forceinline void PUSH( u32 imm ) { write8( 0x68 ); write32( imm ); } +__forceinline void PUSH( x86Register32 from ) { write8( 0x50 | from.Id ); } -emitterT void PUSH( u32 imm ) +__emitinline void PUSH( const ModSibBase& from ) { - write8( 0x68 ); write32( imm ); -} - -emitterT void PUSH( x86Register32 from ) -{ - write8( 0x50 | from.Id ); -} - -emitterT void PUSH( const ModSib& from ) -{ - write8( 0xff ); EmitSibMagic( 6, from ); + iWrite( 0xff ); Internal::EmitSibMagic( 6, from ); } // pushes the EFLAGS register onto the stack -emitterT void PUSHFD() { write8( 0x9C ); } +__forceinline void PUSHFD() { write8( 0x9C ); } // pops the EFLAGS register from the stack -emitterT void POPFD() { write8( 0x9D ); } +__forceinline void POPFD() { write8( 0x9D ); } } diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index bf89a17853..6f2cb31575 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -15,1512 +15,49 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + /* - * ix86 definitions v0.6.2 - * Authors: linuzappz - * alexey silinov - * goldfinger - * shadow < shadow@pcsx2.net > - * cottonvibes(@gmail.com) + * ix86 public header v0.9.0 + * + * Original Authors (v0.6.2 and prior): + * linuzappz + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) */ -#pragma once - -#include "ix86_types.h" - -#ifdef _MSC_VER -#define __threadlocal __declspec(thread) -#else -#define __threadlocal __thread -#endif - -#define MMXONLY(code) code - - -//------------------------------------------------------------------ -// write functions -//------------------------------------------------------------------ -#define emitterT __forceinline - -extern __threadlocal u8 *x86Ptr; -extern __threadlocal u8 *j8Ptr[32]; -extern __threadlocal u32 *j32Ptr[32]; - -template< typename T > -static emitterT void x86write( T val ) -{ - *(T*)x86Ptr = val; - x86Ptr += sizeof(T); -} - -static emitterT void write8( u8 val ) -{ - *x86Ptr = val; - x86Ptr++; -} - -static emitterT void write16( u16 val ) -{ - *(u16*)x86Ptr = val; - x86Ptr += 2; -} - -static emitterT void write24( u32 val ) -{ - *x86Ptr++ = (u8)(val & 0xff); - *x86Ptr++ = (u8)((val >> 8) & 0xff); - *x86Ptr++ = (u8)((val >> 16) & 0xff); -} - -static emitterT void write32( u32 val ) -{ - *(u32*)x86Ptr = val; - x86Ptr += 4; -} - -static emitterT void write64( u64 val ) -{ - *(u64*)x86Ptr = val; - x86Ptr += 8; -} - -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// jump/align functions -//------------------------------------------------------------------ -extern void x86SetPtr( u8 *ptr ); -extern void x86SetJ8( u8 *j8 ); -extern void x86SetJ8A( u8 *j8 ); -extern void x86SetJ16( u16 *j16 ); -extern void x86SetJ16A( u16 *j16 ); -extern void x86SetJ32( u32 *j32 ); -extern void x86SetJ32A( u32 *j32 ); -extern void x86Align( int bytes ); -extern void x86AlignExecutable( int align ); -//------------------------------------------------------------------ - ////////////////////////////////////////////////////////////////////////////////////////// // New C++ Emitter! // // To use it just include the x86Emitter namespace into your file/class/function off choice. - -namespace x86Emitter -{ - extern void POP( x86Register32 from ); - extern void POP( const ModSib& from ); - - extern void PUSH( u32 imm ); - extern void PUSH( x86Register32 from ); - extern void PUSH( const ModSib& from ); - - extern void LEA32( x86Register32 to, const ModSib& src ); - extern void LEA16( x86Register16 to, const ModSib& src ); - - - static __forceinline void POP( void* from ) { POP( ptr[from] ); } - static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } - - #define DECLARE_GROUP1_OPCODE_HELPER( lwr, bits ) \ - emitterT void lwr##bits( x86Register##bits to, x86Register##bits from ); \ - emitterT void lwr##bits( x86Register##bits to, void* from ); \ - emitterT void lwr##bits( x86Register##bits to, const ModSib& from ); \ - emitterT void lwr##bits( x86Register##bits to, u##bits imm ); \ - emitterT void lwr##bits( const ModSib& to, x86Register##bits from ); \ - emitterT void lwr##bits( void* to, x86Register##bits from ); \ - emitterT void lwr##bits( void* to, u##bits imm ); \ - emitterT void lwr##bits( const ModSib& to, u##bits imm ); - - #define DECLARE_GROUP1_OPCODE( lwr ) \ - DECLARE_GROUP1_OPCODE_HELPER( lwr, 32 ) - DECLARE_GROUP1_OPCODE_HELPER( lwr, 16 ) - DECLARE_GROUP1_OPCODE_HELPER( lwr, 8 ) - - DECLARE_GROUP1_OPCODE( ADD ) - DECLARE_GROUP1_OPCODE( CMP ) - DECLARE_GROUP1_OPCODE( OR ) - DECLARE_GROUP1_OPCODE( ADC ) - DECLARE_GROUP1_OPCODE( SBB ) - DECLARE_GROUP1_OPCODE( AND ) - DECLARE_GROUP1_OPCODE( SUB ) - DECLARE_GROUP1_OPCODE( XOR ) - -} - - -extern void CLC( void ); -extern void NOP( void ); - -//////////////////////////////////// -// mov instructions // -//////////////////////////////////// - -// mov r32 to r32 -extern void MOV32RtoR( x86IntRegType to, x86IntRegType from ); -// mov r32 to m32 -extern void MOV32RtoM( uptr to, x86IntRegType from ); -// mov m32 to r32 -extern void MOV32MtoR( x86IntRegType to, uptr from ); -// mov [r32] to r32 -extern void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset=0 ); -// mov [r32][r32< subtract ST(0) from ST(1), store in ST(1) and POP stack -extern void FSUBP( void ); -// fmul ST(src) to fpu reg stack ST(0) -extern void FMUL32Rto0( x86IntRegType src ); -// fmul ST(0) to fpu reg stack ST(src) -extern void FMUL320toR( x86IntRegType src ); -// fdiv ST(src) to fpu reg stack ST(0) -extern void FDIV32Rto0( x86IntRegType src ); -// fdiv ST(0) to fpu reg stack ST(src) -extern void FDIV320toR( x86IntRegType src ); -// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) -extern void FDIV320toRP( x86IntRegType src ); - -// fadd m32 to fpu reg stack -extern void FADD32( u32 from ); -// fsub m32 to fpu reg stack -extern void FSUB32( u32 from ); -// fmul m32 to fpu reg stack -extern void FMUL32( u32 from ); -// fdiv m32 to fpu reg stack -extern void FDIV32( u32 from ); -// fcomi st, st( i) -extern void FCOMI( x86IntRegType src ); -// fcomip st, st( i) -extern void FCOMIP( x86IntRegType src ); -// fucomi st, st( i) -extern void FUCOMI( x86IntRegType src ); -// fucomip st, st( i) -extern void FUCOMIP( x86IntRegType src ); -// fcom m32 to fpu reg stack -extern void FCOM32( u32 from ); -// fabs fpu reg stack -extern void FABS( void ); -// fsqrt fpu reg stack -extern void FSQRT( void ); -// ftan fpu reg stack -extern void FPATAN( void ); -// fsin fpu reg stack -extern void FSIN( void ); -// fchs fpu reg stack -extern void FCHS( void ); - -// fcmovb fpu reg to fpu reg stack -extern void FCMOVB32( x86IntRegType from ); -// fcmove fpu reg to fpu reg stack -extern void FCMOVE32( x86IntRegType from ); -// fcmovbe fpu reg to fpu reg stack -extern void FCMOVBE32( x86IntRegType from ); -// fcmovu fpu reg to fpu reg stack -extern void FCMOVU32( x86IntRegType from ); -// fcmovnb fpu reg to fpu reg stack -extern void FCMOVNB32( x86IntRegType from ); -// fcmovne fpu reg to fpu reg stack -extern void FCMOVNE32( x86IntRegType from ); -// fcmovnbe fpu reg to fpu reg stack -extern void FCMOVNBE32( x86IntRegType from ); -// fcmovnu fpu reg to fpu reg stack -extern void FCMOVNU32( x86IntRegType from ); -extern void FCOMP32( u32 from ); -extern void FNSTSWtoAX( void ); - -#define MMXONLY(code) code - -//****************** -// MMX instructions -//****************** - -// r64 = mm - -// movq m64 to r64 -extern void MOVQMtoR( x86MMXRegType to, uptr from ); -// movq r64 to m64 -extern void MOVQRtoM( uptr to, x86MMXRegType from ); - -// pand r64 to r64 -extern void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); -// pand m64 to r64 ; -extern void PANDMtoR( x86MMXRegType to, uptr from ); -// pandn r64 to r64 -extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); -// pandn r64 to r64 -extern void PANDNMtoR( x86MMXRegType to, uptr from ); -// por r64 to r64 -extern void PORRtoR( x86MMXRegType to, x86MMXRegType from ); -// por m64 to r64 -extern void PORMtoR( x86MMXRegType to, uptr from ); -// pxor r64 to r64 -extern void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); -// pxor m64 to r64 -extern void PXORMtoR( x86MMXRegType to, uptr from ); - -// psllq r64 to r64 -extern void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); -// psllq m64 to r64 -extern void PSLLQMtoR( x86MMXRegType to, uptr from ); -// psllq imm8 to r64 -extern void PSLLQItoR( x86MMXRegType to, u8 from ); -// psrlq r64 to r64 -extern void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); -// psrlq m64 to r64 -extern void PSRLQMtoR( x86MMXRegType to, uptr from ); -// psrlq imm8 to r64 -extern void PSRLQItoR( x86MMXRegType to, u8 from ); - -// paddusb r64 to r64 -extern void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddusb m64 to r64 -extern void PADDUSBMtoR( x86MMXRegType to, uptr from ); -// paddusw r64 to r64 -extern void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddusw m64 to r64 -extern void PADDUSWMtoR( x86MMXRegType to, uptr from ); - -// paddb r64 to r64 -extern void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddb m64 to r64 -extern void PADDBMtoR( x86MMXRegType to, uptr from ); -// paddw r64 to r64 -extern void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddw m64 to r64 -extern void PADDWMtoR( x86MMXRegType to, uptr from ); -// paddd r64 to r64 -extern void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddd m64 to r64 -extern void PADDDMtoR( x86MMXRegType to, uptr from ); -extern void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); - -// paddq m64 to r64 (sse2 only?) -extern void PADDQMtoR( x86MMXRegType to, uptr from ); -// paddq r64 to r64 (sse2 only?) -extern void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); - -extern void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); - -extern void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSUBDMtoR( x86MMXRegType to, uptr from ); - -// psubq m64 to r64 (sse2 only?) -extern void PSUBQMtoR( x86MMXRegType to, uptr from ); -// psubq r64 to r64 (sse2 only?) -extern void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); - -// pmuludq m64 to r64 (sse2 only?) -extern void PMULUDQMtoR( x86MMXRegType to, uptr from ); -// pmuludq r64 to r64 (sse2 only?) -extern void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); - -extern void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPEQDMtoR( x86MMXRegType to, uptr from ); -extern void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PCMPGTDMtoR( x86MMXRegType to, uptr from ); -extern void PSRLWItoR( x86MMXRegType to, u8 from ); -extern void PSRLDItoR( x86MMXRegType to, u8 from ); -extern void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSLLWItoR( x86MMXRegType to, u8 from ); -extern void PSLLDItoR( x86MMXRegType to, u8 from ); -extern void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PSRAWItoR( x86MMXRegType to, u8 from ); -extern void PSRADItoR( x86MMXRegType to, u8 from ); -extern void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); -extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); -extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 -extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); -extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ); -extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); -extern void MOVDMtoMMX( x86MMXRegType to, uptr from ); -extern void MOVDMMXtoM( uptr to, x86MMXRegType from ); -extern void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); -extern void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset=0 ); -extern void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); -extern void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); -extern void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); -extern void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); -extern void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); -extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); - -// emms -extern void EMMS( void ); - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits -//********************************************************************************** -extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); -extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); - -extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); - -//********************* -// SSE instructions * -//********************* -extern void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); -extern void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); - -extern void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); -extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); -extern void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); - -extern void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); -extern void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); - -extern void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); -extern void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); - -extern void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); -extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); -extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); - -extern void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); -extern void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); -extern void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); - -extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVUPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); - -extern void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); -extern void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); - -extern void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); - -extern void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); -extern void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); -extern void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); -extern void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); -extern void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); - -extern void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); -extern void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); -extern void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); -extern void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); - -extern void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); -extern void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); -extern void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -// VectorPath -extern void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); - -extern void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); -extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); - -extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); - -extern void SSE_STMXCSR( uptr from ); -extern void SSE_LDMXCSR( uptr from ); - - -//********************* -// SSE 2 Instructions* -//********************* - -extern void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); -extern void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); - -extern void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); -extern void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); - -extern void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); -extern void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); - -extern void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word -//********************************************************************************** -extern void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); - -//**********************************************************************************/ -//PUNPCKHWD: Unpack 16bit high -//********************************************************************************** -extern void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); - -// mult by half words -extern void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); - - -//**********************************************************************************/ -//PMOVMSKB: Create 16bit mask from signs of 8bit integers -//********************************************************************************** -extern void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); - -extern void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); -extern void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); - -//**********************************************************************************/ -//PEXTRW,PINSRW: Packed Extract/Insert Word * -//********************************************************************************** -extern void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); -extern void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); - - -//**********************************************************************************/ -//PSUBx: Subtract Packed Integers * -//********************************************************************************** -extern void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PCMPxx: Compare Packed Integers * -//********************************************************************************** -extern void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); -extern void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); - -//**********************************************************************************/ -//MOVD: Move Dword(32bit) to /from XMM reg * -//********************************************************************************** -extern void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); -extern void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); -extern void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); -extern void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); - -extern void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -extern void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); -extern void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); -extern void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); -extern void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); - -//**********************************************************************************/ -//MOVD: Move Qword(64bit) to/from MMX/XMM reg * -//********************************************************************************** -extern void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); -extern void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); - - -//**********************************************************************************/ -//POR : SSE Bitwise OR * -//********************************************************************************** -extern void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); - -extern void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); - -extern void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); - -// SSSE3 - -extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); - -// SSE4.1 - -#ifndef _MM_MK_INSERTPS_NDX -#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) -#endif - -extern void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); -extern void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8); -extern void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); -extern void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8); -extern void SSE4_BLENDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); -extern void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8); -extern void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); -extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); - -//********************* -// 3DNOW instructions * -//********************* -extern void FEMMS( void ); -extern void PFCMPEQMtoR( x86IntRegType to, uptr from ); -extern void PFCMPGTMtoR( x86IntRegType to, uptr from ); -extern void PFCMPGEMtoR( x86IntRegType to, uptr from ); -extern void PFADDMtoR( x86IntRegType to, uptr from ); -extern void PFADDRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFSUBMtoR( x86IntRegType to, uptr from ); -extern void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFMULMtoR( x86IntRegType to, uptr from ); -extern void PFMULRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRCPMtoR( x86IntRegType to, uptr from ); -extern void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); -extern void PF2IDMtoR( x86IntRegType to, uptr from ); -extern void PI2FDMtoR( x86IntRegType to, uptr from ); -extern void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFMAXMtoR( x86IntRegType to, uptr from ); -extern void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); -extern void PFMINMtoR( x86IntRegType to, uptr from ); -extern void PFMINRtoR( x86IntRegType to, x86IntRegType from ); +// +// This header file is intended for use by public code. It includes the appropriate +// inlines and class definitions for efficient codegen. (code internal to the emitter +// should usually use ix86_internal.h instead, and manually include the +// ix86_inlines.inl file when it is known that inlining of ModSib functions are +// wanted). +// +// +// Important when Using the New Emitter: +// Make sure there is *no* data in use or of importance past the end of the +// current x86Ptr. Ie, don't do fancy x86Ptr rewind tricks of your own. The +// emitter uses optimized writes which will clobber data past the end of the +// instruction it's emitting, so even if you know for sure the instruction you +// are writing is 5 bytes, the emitter will likely emit 9 bytes and the re- +// wind the x86Ptr to the end of the instruction. +// + +#pragma once + +#include "ix86_types.h" +#include "ix86_instructions.h" + +// Including legacy items for now, but these should be removed eventually, +// once most code is no longer dependent on them. +#include "ix86_legacy_types.h" +#include "ix86_legacy_instructions.h" diff --git a/pcsx2/x86/ix86/ix86_3dnow.cpp b/pcsx2/x86/ix86/ix86_3dnow.cpp index ae6743cc3d..4f053ff37c 100644 --- a/pcsx2/x86/ix86/ix86_3dnow.cpp +++ b/pcsx2/x86/ix86/ix86_3dnow.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" //------------------------------------------------------------------ // 3DNOW instructions diff --git a/pcsx2/x86/ix86/ix86_cpudetect.cpp b/pcsx2/x86/ix86/ix86_cpudetect.cpp index b1fc04a96e..083d72e1b2 100644 --- a/pcsx2/x86/ix86/ix86_cpudetect.cpp +++ b/pcsx2/x86/ix86/ix86_cpudetect.cpp @@ -18,9 +18,8 @@ #include "PrecompiledHeader.h" -#include "ix86_internal.h" #include "System.h" -#include "Threading.h" +#include "ix86_legacy_internal.h" #include "RedtapeWindows.h" diff --git a/pcsx2/x86/ix86/ix86_fpu.cpp b/pcsx2/x86/ix86/ix86_fpu.cpp index d7e3a65963..f3f9631e01 100644 --- a/pcsx2/x86/ix86/ix86_fpu.cpp +++ b/pcsx2/x86/ix86/ix86_fpu.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" //------------------------------------------------------------------ // FPU instructions diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl new file mode 100644 index 0000000000..69cc3b03f5 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -0,0 +1,237 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* + * ix86 core v0.9.0 + * + * Original Authors (v0.6.2 and prior): + * linuzappz + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) + */ + +#pragma once + +// This header module contains functions which, under most circumstances, inline +// nicely with constant propagation from the compiler, resulting in little or +// no actual codegen in the majority of emitter statements. (common forms include: +// RegToReg, PointerToReg, RegToPointer). These cannot be included in the class +// definitions in the .h file because of inter-dependencies with other classes. +// (score one for C++!!) +// +// In order for MSVC to work correctly with __forceinline on class members, +// however, we need to include these methods into all source files which might +// reference them. Without this MSVC generates linker errors. Or, in other words, +// global optimization fails to resolve the externals and junk. +// (score one for MSVC!) + +namespace x86Emitter +{ + ////////////////////////////////////////////////////////////////////////////////////////// + // x86Register Method Implementations + // + __forceinline x86AddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const + { + return x86AddressInfo( *this, right ); + } + + __forceinline x86AddressInfo x86IndexReg::operator+( const x86AddressInfo& right ) const + { + return right + *this; + } + + __forceinline x86AddressInfo x86IndexReg::operator+( s32 right ) const + { + return x86AddressInfo( *this, right ); + } + + __forceinline x86AddressInfo x86IndexReg::operator*( u32 right ) const + { + return x86AddressInfo( Empty, *this, right ); + } + + __forceinline x86AddressInfo x86IndexReg::operator<<( u32 shift ) const + { + return x86AddressInfo( Empty, *this, 1< + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) + */ + +#pragma once + +namespace x86Emitter +{ + // ----- Lea Instructions (Load Effective Address) ----- + // Note: alternate (void*) forms of these instructions are not provided since those + // forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs + // instead. + + extern void LEA( x86Register32 to, const ModSibBase& src ); + extern void LEA( x86Register16 to, const ModSibBase& src ); + + // ----- Push / Pop Instructions ----- + + extern void POP( x86Register32 from ); + extern void POP( const ModSibBase& from ); + + extern void PUSH( u32 imm ); + extern void PUSH( x86Register32 from ); + extern void PUSH( const ModSibBase& from ); + + static __forceinline void POP( void* from ) { POP( ptr[from] ); } + static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } + + // ------------------------------------------------------------------------ + using Internal::ADD; + using Internal::OR; + using Internal::ADC; + using Internal::SBB; + using Internal::AND; + using Internal::SUB; + using Internal::XOR; + using Internal::CMP; + + using Internal::ROL; + using Internal::ROR; + using Internal::RCL; + using Internal::RCR; + using Internal::SHL; + using Internal::SHR; + using Internal::SAR; + + // ---------- 32 Bit Interface ----------- + extern void MOV( const x86Register32& to, const x86Register32& from ); + extern void MOV( const ModSibBase& sibdest, const x86Register32& from ); + extern void MOV( const x86Register32& to, const ModSibBase& sibsrc ); + extern void MOV( const x86Register32& to, const void* src ); + extern void MOV( const void* dest, const x86Register32& from ); + + extern void MOV( const x86Register32& to, u32 imm ); + extern void MOV( const ModSibStrict<4>& sibdest, u32 imm ); + + // ---------- 16 Bit Interface ----------- + extern void MOV( const x86Register16& to, const x86Register16& from ); + extern void MOV( const ModSibBase& sibdest, const x86Register16& from ); + extern void MOV( const x86Register16& to, const ModSibBase& sibsrc ); + extern void MOV( const x86Register16& to, const void* src ); + extern void MOV( const void* dest, const x86Register16& from ); + + extern void MOV( const x86Register16& to, u16 imm ); + extern void MOV( const ModSibStrict<2>& sibdest, u16 imm ); + + // ---------- 8 Bit Interface ----------- + extern void MOV( const x86Register8& to, const x86Register8& from ); + extern void MOV( const ModSibBase& sibdest, const x86Register8& from ); + extern void MOV( const x86Register8& to, const ModSibBase& sibsrc ); + extern void MOV( const x86Register8& to, const void* src ); + extern void MOV( const void* dest, const x86Register8& from ); + + extern void MOV( const x86Register8& to, u8 imm ); + extern void MOV( const ModSibStrict<1>& sibdest, u8 imm ); + +} + diff --git a/pcsx2/x86/ix86/ix86_internal.h b/pcsx2/x86/ix86/ix86_internal.h index 4f3f72f2e4..b8ed9269ca 100644 --- a/pcsx2/x86/ix86/ix86_internal.h +++ b/pcsx2/x86/ix86/ix86_internal.h @@ -1,43 +1,22 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ #pragma once -#include "ix86.h" -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ - -#define MEMADDR(addr, oplen) (addr) - -#define Rex(w,r,x,b) assert(0) -#define RexR(w, reg) assert( !(w || (reg)>=8) ) -#define RexB(w, base) assert( !(w || (base)>=8) ) -#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) -#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) - -#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) - -static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) -static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) - - -//------------------------------------------------------------------ -// General Emitter Helper functions -//------------------------------------------------------------------ - -namespace x86Emitter -{ - extern void EmitSibMagic( int regfield, const ModSib& info ); - extern void EmitSibMagic( x86Register32 regfield, const ModSib& info ); - extern bool NeedsSibMagic( const ModSib& info ); -} - -// From here out are the legacy (old) emitter functions... - -extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); -extern void ModRM( int mod, int reg, int rm ); -extern void SibSB( int ss, int index, int base ); -extern void SET8R( int cc, int to ); -extern u8* J8Rel( int cc, int to ); -extern u32* J32Rel( int cc, u32 to ); -extern u64 GetCPUTick( void ); -//------------------------------------------------------------------ +#include "ix86_types.h" +#include "ix86_instructions.h" diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index eec040ac33..722aba5089 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -30,10 +30,119 @@ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" using namespace x86Emitter; +template< int OperandSize > +static __forceinline x86Register _reghlp( x86IntRegType src ) +{ + return x86Register( src ); +} + +static __forceinline ModSibBase _mrmhlp( x86IntRegType src ) +{ + return ptr[_reghlp<4>(src)]; +} + +template< int OperandSize > +static __forceinline ModSibStrict _mhlp( x86IntRegType src ) +{ + return ModSibStrict( x86IndexReg::Empty, x86IndexReg(src) ); +} + +template< int OperandSize > +static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86IntRegType src2 ) +{ + return ModSibStrict( x86IndexReg(src2), x86IndexReg(src1) ); +} + +#define DEFINE_LEGACY_HELPER( cod, bits ) \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { cod( _reghlp(to), _reghlp(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { cod( _reghlp(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { cod( (void*)to, _reghlp(from) ); } \ + emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { cod( _reghlp(to), _mhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { cod( _mhlp(to) + offset, _reghlp(from) ); } \ + emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ + { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ + emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \ + { cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } + +#define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \ + emitterT void cod##bits##CLtoR( x86IntRegType to ) { cod( _reghlp(to), cl ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##CLtoM( uptr to ) { cod( ptr##bits[to], cl ); } \ + emitterT void cod##bits##ItoM( uptr to, u8 imm ) { cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { cod( _mhlp(to) + offset, cl ); } + +//emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ +// { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ + +#define DEFINE_OPCODE_LEGACY( cod ) \ + DEFINE_LEGACY_HELPER( cod, 32 ) \ + DEFINE_LEGACY_HELPER( cod, 16 ) \ + DEFINE_LEGACY_HELPER( cod, 8 ) + +#define DEFINE_OPCODE_SHIFT_LEGACY( cod ) \ + DEFINE_LEGACY_SHIFT_HELPER( cod, 32 ) \ + DEFINE_LEGACY_SHIFT_HELPER( cod, 16 ) \ + DEFINE_LEGACY_SHIFT_HELPER( cod, 8 ) + +////////////////////////////////////////////////////////////////////////////////////////// +// +DEFINE_OPCODE_LEGACY( ADD ) +DEFINE_OPCODE_LEGACY( CMP ) +DEFINE_OPCODE_LEGACY( OR ) +DEFINE_OPCODE_LEGACY( ADC ) +DEFINE_OPCODE_LEGACY( SBB ) +DEFINE_OPCODE_LEGACY( AND ) +DEFINE_OPCODE_LEGACY( SUB ) +DEFINE_OPCODE_LEGACY( XOR ) + +DEFINE_OPCODE_SHIFT_LEGACY( ROL ) +DEFINE_OPCODE_SHIFT_LEGACY( ROR ) +DEFINE_OPCODE_SHIFT_LEGACY( RCL ) +DEFINE_OPCODE_SHIFT_LEGACY( RCR ) +DEFINE_OPCODE_SHIFT_LEGACY( SHL ) +DEFINE_OPCODE_SHIFT_LEGACY( SHR ) +DEFINE_OPCODE_SHIFT_LEGACY( SAR ) + +DEFINE_OPCODE_LEGACY( MOV ) + +// mov r32 to [r32<(to), from ); +} + +emitterT void AND32I8toM( uptr to, s8 from ) +{ + AND( ptr8[to], from ); +} + + + // Note: the 'to' field can either be a register or a special opcode extension specifier // depending on the opcode's encoding. @@ -70,16 +179,6 @@ emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset } } -emitterT void ModRM( s32 mod, s32 reg, s32 rm ) -{ - write8( ( mod << 6 ) | ( (reg & 7) << 3 ) | ( rm & 7 ) ); -} - -emitterT void SibSB( s32 ss, s32 index, s32 base ) -{ - write8( ( ss << 6 ) | ( (index & 7) << 3 ) | ( base & 7 ) ); -} - emitterT void SET8R( int cc, int to ) { RexB(0, to); @@ -191,43 +290,6 @@ emitterT void x86Align( int bytes ) x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); } -//////////////////////////////////////////////////// -// Generates executable code to align to the given alignment (could be useful for the second leg -// of if/else conditionals, which usually fall through a jump target label). -// -// Note: Left in for now just in case, but usefulness is moot. Only K8's and older (non-Prescott) -// P4s benefit from this, and we don't optimize for those platforms anyway. -// -void x86AlignExecutable( int align ) -{ - uptr newx86 = ( (uptr)x86Ptr + align - 1) & ~( align - 1 ); - uptr bytes = ( newx86 - (uptr)x86Ptr ); - - switch( bytes ) - { - case 0: break; - - case 1: NOP(); break; - case 2: MOV32RtoR( ESI, ESI ); break; - case 3: write8(0x08D); write8(0x024); write8(0x024); break; - case 5: NOP(); // falls through to 4... - case 4: write8(0x08D); write8(0x064); write8(0x024); write8(0); break; - case 6: write8(0x08D); write8(0x0B6); write32(0); break; - case 8: NOP(); // falls through to 7... - case 7: write8(0x08D); write8(0x034); write8(0x035); write32(0); break; - - default: - { - // for larger alignments, just use a JMP... - u8* aligned_target = JMP8(0); - x86Ptr = (u8*)newx86; - x86SetJ8( aligned_target ); - } - } - - jASSUME( x86Ptr == (u8*)newx86 ); -} - /********************/ /* IX86 instructions */ /********************/ @@ -249,281 +311,6 @@ emitterT void NOP( void ) } -//////////////////////////////////// -// mov instructions / -//////////////////////////////////// - -/* mov r32 to r32 */ -emitterT void MOV32RtoR( x86IntRegType to, x86IntRegType from ) -{ - if( to == from ) return; - - RexRB(0, from, to); - write8( 0x89 ); - ModRM( 3, from, to ); -} - -/* mov r32 to m32 */ -emitterT void MOV32RtoM( uptr to, x86IntRegType from ) -{ - RexR(0, from); - if (from == EAX) { - write8(0xA3); - } else { - write8( 0x89 ); - ModRM( 0, from, DISP32 ); - } - write32( MEMADDR(to, 4) ); -} - -/* mov m32 to r32 */ -emitterT void MOV32MtoR( x86IntRegType to, uptr from ) -{ - RexR(0, to); - if (to == EAX) { - write8(0xA1); - } else { - write8( 0x8B ); - ModRM( 0, to, DISP32 ); - } - write32( MEMADDR(from, 4) ); -} - -emitterT void MOV32RmtoR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write8( 0x8B ); - WriteRmOffsetFrom(to, from, offset); -} - -/* mov [r32+r32*scale] to r32 */ -emitterT void MOV32RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - RexRXB(0,to,from2,from); - write8( 0x8B ); - ModRM( 0, to, 0x4 ); - SibSB(scale, from2, from ); -} - -// mov r32 to [r32< subtract ST(0) from ST(1), store in ST(1) and POP stack +extern void FSUBP( void ); +// fmul ST(src) to fpu reg stack ST(0) +extern void FMUL32Rto0( x86IntRegType src ); +// fmul ST(0) to fpu reg stack ST(src) +extern void FMUL320toR( x86IntRegType src ); +// fdiv ST(src) to fpu reg stack ST(0) +extern void FDIV32Rto0( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src) +extern void FDIV320toR( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) +extern void FDIV320toRP( x86IntRegType src ); + +// fadd m32 to fpu reg stack +extern void FADD32( u32 from ); +// fsub m32 to fpu reg stack +extern void FSUB32( u32 from ); +// fmul m32 to fpu reg stack +extern void FMUL32( u32 from ); +// fdiv m32 to fpu reg stack +extern void FDIV32( u32 from ); +// fcomi st, st( i) +extern void FCOMI( x86IntRegType src ); +// fcomip st, st( i) +extern void FCOMIP( x86IntRegType src ); +// fucomi st, st( i) +extern void FUCOMI( x86IntRegType src ); +// fucomip st, st( i) +extern void FUCOMIP( x86IntRegType src ); +// fcom m32 to fpu reg stack +extern void FCOM32( u32 from ); +// fabs fpu reg stack +extern void FABS( void ); +// fsqrt fpu reg stack +extern void FSQRT( void ); +// ftan fpu reg stack +extern void FPATAN( void ); +// fsin fpu reg stack +extern void FSIN( void ); +// fchs fpu reg stack +extern void FCHS( void ); + +// fcmovb fpu reg to fpu reg stack +extern void FCMOVB32( x86IntRegType from ); +// fcmove fpu reg to fpu reg stack +extern void FCMOVE32( x86IntRegType from ); +// fcmovbe fpu reg to fpu reg stack +extern void FCMOVBE32( x86IntRegType from ); +// fcmovu fpu reg to fpu reg stack +extern void FCMOVU32( x86IntRegType from ); +// fcmovnb fpu reg to fpu reg stack +extern void FCMOVNB32( x86IntRegType from ); +// fcmovne fpu reg to fpu reg stack +extern void FCMOVNE32( x86IntRegType from ); +// fcmovnbe fpu reg to fpu reg stack +extern void FCMOVNBE32( x86IntRegType from ); +// fcmovnu fpu reg to fpu reg stack +extern void FCMOVNU32( x86IntRegType from ); +extern void FCOMP32( u32 from ); +extern void FNSTSWtoAX( void ); + +#define MMXONLY(code) code + +//****************** +// MMX instructions +//****************** + +// r64 = mm + +// movq m64 to r64 +extern void MOVQMtoR( x86MMXRegType to, uptr from ); +// movq r64 to m64 +extern void MOVQRtoM( uptr to, x86MMXRegType from ); + +// pand r64 to r64 +extern void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pand m64 to r64 ; +extern void PANDMtoR( x86MMXRegType to, uptr from ); +// pandn r64 to r64 +extern void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pandn r64 to r64 +extern void PANDNMtoR( x86MMXRegType to, uptr from ); +// por r64 to r64 +extern void PORRtoR( x86MMXRegType to, x86MMXRegType from ); +// por m64 to r64 +extern void PORMtoR( x86MMXRegType to, uptr from ); +// pxor r64 to r64 +extern void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); +// pxor m64 to r64 +extern void PXORMtoR( x86MMXRegType to, uptr from ); + +// psllq r64 to r64 +extern void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psllq m64 to r64 +extern void PSLLQMtoR( x86MMXRegType to, uptr from ); +// psllq imm8 to r64 +extern void PSLLQItoR( x86MMXRegType to, u8 from ); +// psrlq r64 to r64 +extern void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psrlq m64 to r64 +extern void PSRLQMtoR( x86MMXRegType to, uptr from ); +// psrlq imm8 to r64 +extern void PSRLQItoR( x86MMXRegType to, u8 from ); + +// paddusb r64 to r64 +extern void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusb m64 to r64 +extern void PADDUSBMtoR( x86MMXRegType to, uptr from ); +// paddusw r64 to r64 +extern void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusw m64 to r64 +extern void PADDUSWMtoR( x86MMXRegType to, uptr from ); + +// paddb r64 to r64 +extern void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddb m64 to r64 +extern void PADDBMtoR( x86MMXRegType to, uptr from ); +// paddw r64 to r64 +extern void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddw m64 to r64 +extern void PADDWMtoR( x86MMXRegType to, uptr from ); +// paddd r64 to r64 +extern void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddd m64 to r64 +extern void PADDDMtoR( x86MMXRegType to, uptr from ); +extern void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +// paddq m64 to r64 (sse2 only?) +extern void PADDQMtoR( x86MMXRegType to, uptr from ); +// paddq r64 to r64 (sse2 only?) +extern void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSUBDMtoR( x86MMXRegType to, uptr from ); + +// psubq m64 to r64 (sse2 only?) +extern void PSUBQMtoR( x86MMXRegType to, uptr from ); +// psubq r64 to r64 (sse2 only?) +extern void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); + +// pmuludq m64 to r64 (sse2 only?) +extern void PMULUDQMtoR( x86MMXRegType to, uptr from ); +// pmuludq r64 to r64 (sse2 only?) +extern void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +extern void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPEQDMtoR( x86MMXRegType to, uptr from ); +extern void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PCMPGTDMtoR( x86MMXRegType to, uptr from ); +extern void PSRLWItoR( x86MMXRegType to, u8 from ); +extern void PSRLDItoR( x86MMXRegType to, u8 from ); +extern void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSLLWItoR( x86MMXRegType to, u8 from ); +extern void PSLLDItoR( x86MMXRegType to, u8 from ); +extern void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PSRAWItoR( x86MMXRegType to, u8 from ); +extern void PSRADItoR( x86MMXRegType to, u8 from ); +extern void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); +extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); +extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 +extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); +extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void MOVDMtoMMX( x86MMXRegType to, uptr from ); +extern void MOVDMMXtoM( uptr to, x86MMXRegType from ); +extern void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); +extern void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset=0 ); +extern void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); +extern void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); +extern void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); +extern void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); +extern void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); +extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); + +// emms +extern void EMMS( void ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits +//********************************************************************************** +extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); +extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); + +extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); + +//********************* +// SSE instructions * +//********************* +extern void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); +extern void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); + +extern void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +extern void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); +extern void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); +extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); +extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); +extern void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); + +extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE_MOVUPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); +extern void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); + +extern void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +extern void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); +extern void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); +extern void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); + +extern void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); +extern void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); +extern void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); + +extern void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); +extern void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +// VectorPath +extern void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +extern void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +extern void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +extern void SSE_STMXCSR( uptr from ); +extern void SSE_LDMXCSR( uptr from ); + + +//********************* +// SSE 2 Instructions* +//********************* + +extern void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); +extern void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +extern void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +extern void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +extern void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +extern void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +extern void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +// mult by half words +extern void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); + + +//**********************************************************************************/ +//PMOVMSKB: Create 16bit mask from signs of 8bit integers +//********************************************************************************** +extern void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +extern void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); +extern void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +extern void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); +extern void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); + + +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +extern void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +extern void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); +extern void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +extern void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); + +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +extern void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset=0 ); +extern void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset=0 ); + +extern void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +extern void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); +extern void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); +extern void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); +extern void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); + +//**********************************************************************************/ +//MOVD: Move Qword(64bit) to/from MMX/XMM reg * +//********************************************************************************** +extern void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +extern void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); + + +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +extern void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +extern void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); + +extern void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); + +extern void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); + +// SSSE3 + +extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); + +// SSE4.1 + +#ifndef _MM_MK_INSERTPS_NDX +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) +#endif + +extern void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8); +extern void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +extern void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8); +extern void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); +extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); + +//********************* +// 3DNOW instructions * +//********************* +extern void FEMMS( void ); +extern void PFCMPEQMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGTMtoR( x86IntRegType to, uptr from ); +extern void PFCMPGEMtoR( x86IntRegType to, uptr from ); +extern void PFADDMtoR( x86IntRegType to, uptr from ); +extern void PFADDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFSUBMtoR( x86IntRegType to, uptr from ); +extern void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMULMtoR( x86IntRegType to, uptr from ); +extern void PFMULRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPMtoR( x86IntRegType to, uptr from ); +extern void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); +extern void PF2IDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDMtoR( x86IntRegType to, uptr from ); +extern void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMAXMtoR( x86IntRegType to, uptr from ); +extern void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); +extern void PFMINMtoR( x86IntRegType to, uptr from ); +extern void PFMINRtoR( x86IntRegType to, x86IntRegType from ); + diff --git a/pcsx2/x86/ix86/ix86_legacy_internal.h b/pcsx2/x86/ix86/ix86_legacy_internal.h new file mode 100644 index 0000000000..92aab168f0 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_legacy_internal.h @@ -0,0 +1,78 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +#include "ix86_internal.h" + +//------------------------------------------------------------------ +// Legacy Helper Macros and Functions (depreciated) +//------------------------------------------------------------------ + +#include "ix86_legacy_types.h" +#include "ix86_legacy_instructions.h" + +#define MEMADDR(addr, oplen) (addr) + +#define Rex(w,r,x,b) assert(0) +#define RexR(w, reg) assert( !(w || (reg)>=8) ) +#define RexB(w, base) assert( !(w || (base)>=8) ) +#define RexRB(w, reg, base) assert( !(w || (reg) >= 8 || (base)>=8) ) +#define RexRXB(w, reg, index, base) assert( !(w || (reg) >= 8 || (index) >= 8 || (base) >= 8) ) + +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) + +extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); +extern void ModRM( uint mod, uint reg, uint rm ); +extern void SibSB( uint ss, uint index, uint base ); +extern void SET8R( int cc, int to ); +extern u8* J8Rel( int cc, int to ); +extern u32* J32Rel( int cc, u32 to ); +extern u64 GetCPUTick( void ); + + +////////////////////////////////////////////////////////////////////////////////////////// +// +emitterT void ModRM( uint mod, uint reg, uint rm ) +{ + // Note: Following ASSUMEs are for legacy support only. + // The new emitter performs these sanity checks during operand construction, so these + // assertions can probably be removed once all legacy emitter code has been removed. + jASSUME( mod < 4 ); + jASSUME( reg < 8 ); + jASSUME( rm < 8 ); + //write8( (mod << 6) | (reg << 3) | rm ); + + *(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; + x86Ptr++; + +} + +emitterT void SibSB( uint ss, uint index, uint base ) +{ + // Note: Following ASSUMEs are for legacy support only. + // The new emitter performs these sanity checks during operand construction, so these + // assertions can probably be removed once all legacy emitter code has been removed. + jASSUME( ss < 4 ); + jASSUME( index < 8 ); + jASSUME( base < 8 ); + //write8( (ss << 6) | (index << 3) | base ); + + *(u32*)x86Ptr = (ss << 6) | (index << 3) | base; + x86Ptr++; +} diff --git a/pcsx2/x86/ix86/ix86_legacy_types.h b/pcsx2/x86/ix86/ix86_legacy_types.h new file mode 100644 index 0000000000..eb936c3366 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_legacy_types.h @@ -0,0 +1,140 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +#define SIB 4 // maps to ESP +#define SIBDISP 5 // maps to EBP +#define DISP32 5 // maps to EBP + +// general types +typedef int x86IntRegType; + +#define EAX 0 +#define EBX 3 +#define ECX 1 +#define EDX 2 +#define ESI 6 +#define EDI 7 +#define EBP 5 +#define ESP 4 + +#define X86ARG1 EAX +#define X86ARG2 ECX +#define X86ARG3 EDX +#define X86ARG4 EBX + +#define MM0 0 +#define MM1 1 +#define MM2 2 +#define MM3 3 +#define MM4 4 +#define MM5 5 +#define MM6 6 +#define MM7 7 + +typedef int x86MMXRegType; + +#define XMM0 0 +#define XMM1 1 +#define XMM2 2 +#define XMM3 3 +#define XMM4 4 +#define XMM5 5 +#define XMM6 6 +#define XMM7 7 +#define XMM8 8 +#define XMM9 9 +#define XMM10 10 +#define XMM11 11 +#define XMM12 12 +#define XMM13 13 +#define XMM14 14 +#define XMM15 15 + +typedef int x86SSERegType; +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +#define SIB 4 // maps to ESP +#define SIBDISP 5 // maps to EBP +#define DISP32 5 // maps to EBP + +// general types +typedef int x86IntRegType; + +#define EAX 0 +#define EBX 3 +#define ECX 1 +#define EDX 2 +#define ESI 6 +#define EDI 7 +#define EBP 5 +#define ESP 4 + +#define X86ARG1 EAX +#define X86ARG2 ECX +#define X86ARG3 EDX +#define X86ARG4 EBX + +#define MM0 0 +#define MM1 1 +#define MM2 2 +#define MM3 3 +#define MM4 4 +#define MM5 5 +#define MM6 6 +#define MM7 7 + +typedef int x86MMXRegType; + +#define XMM0 0 +#define XMM1 1 +#define XMM2 2 +#define XMM3 3 +#define XMM4 4 +#define XMM5 5 +#define XMM6 6 +#define XMM7 7 +#define XMM8 8 +#define XMM9 9 +#define XMM10 10 +#define XMM11 11 +#define XMM12 12 +#define XMM13 13 +#define XMM14 14 +#define XMM15 15 + +typedef int x86SSERegType; diff --git a/pcsx2/x86/ix86/ix86_mmx.cpp b/pcsx2/x86/ix86/ix86_mmx.cpp index 77f8f33c97..74abe3e5df 100644 --- a/pcsx2/x86/ix86/ix86_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_mmx.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" //------------------------------------------------------------------ // MMX instructions diff --git a/pcsx2/x86/ix86/ix86_sse.cpp b/pcsx2/x86/ix86/ix86_sse.cpp index ffeb51365b..6eb149dca3 100644 --- a/pcsx2/x86/ix86/ix86_sse.cpp +++ b/pcsx2/x86/ix86/ix86_sse.cpp @@ -17,7 +17,7 @@ */ #include "PrecompiledHeader.h" -#include "ix86_internal.h" +#include "ix86_legacy_internal.h" #include "ix86_sse_helpers.h" ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_tools.cpp b/pcsx2/x86/ix86/ix86_tools.cpp index 13028b7432..9218c76260 100644 --- a/pcsx2/x86/ix86/ix86_tools.cpp +++ b/pcsx2/x86/ix86/ix86_tools.cpp @@ -19,7 +19,7 @@ #include "PrecompiledHeader.h" #include "System.h" -#include "ix86/ix86.h" +#include "ix86.h" // used to make sure regs don't get changed while in recompiler // use FreezeMMXRegs, FreezeXMMRegs diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index ede42f92af..6c91b2a9d7 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -23,57 +23,6 @@ #define X86REGS 8 #define MMXREGS 8 -#define SIB 4 -#define SIBDISP 5 -#define DISP32 5 - -// general types -typedef int x86IntRegType; - -#define EAX 0 -#define EBX 3 -#define ECX 1 -#define EDX 2 -#define ESI 6 -#define EDI 7 -#define EBP 5 -#define ESP 4 - -#define X86ARG1 EAX -#define X86ARG2 ECX -#define X86ARG3 EDX -#define X86ARG4 EBX - -#define MM0 0 -#define MM1 1 -#define MM2 2 -#define MM3 3 -#define MM4 4 -#define MM5 5 -#define MM6 6 -#define MM7 7 - -typedef int x86MMXRegType; - -#define XMM0 0 -#define XMM1 1 -#define XMM2 2 -#define XMM3 3 -#define XMM4 4 -#define XMM5 5 -#define XMM6 6 -#define XMM7 7 -#define XMM8 8 -#define XMM9 9 -#define XMM10 10 -#define XMM11 11 -#define XMM12 12 -#define XMM13 13 -#define XMM14 14 -#define XMM15 15 - -typedef int x86SSERegType; - enum XMMSSEType { XMMT_INT = 0, // integer (sse2 only) @@ -149,104 +98,164 @@ struct CPUINFO{ }; extern CPUINFO cpuinfo; -//------------------------------------------------------------------ +//------------------------------------------------------------------ +#ifdef _MSC_VER +#define __threadlocal __declspec(thread) +#else +#define __threadlocal __thread +#endif + +extern __threadlocal u8 *x86Ptr; +extern __threadlocal u8 *j8Ptr[32]; +extern __threadlocal u32 *j32Ptr[32]; + + +//------------------------------------------------------------------ // templated version of is_s8 is required, so that u16's get correct sign extension treatment. template< typename T > static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; } +template< typename T > +static __forceinline void iWrite( T val ) +{ + *(T*)x86Ptr = val; + x86Ptr += sizeof(T); +} + namespace x86Emitter { - class x86ModRm; - ////////////////////////////////////////////////////////////////////////////////////////// - // - struct x86Register32 +///////////////////////////////////////////////////////////////////////////////////////////// +// __emitline - preprocessors definition +// +// This is configured to inline emitter functions appropriately for release builds, and +// disables some of the more aggressive inlines for dev builds (which can be helpful when +// debugging). +// +// Note: I use __forceinline directly for most single-line class members, when needed. +// There's no point in using __emitline in these cases since the debugger can't trace into +// single-line functions anyway. +// +#ifdef PCSX2_DEVBUILD +#define __emitinline +#else +#define __emitinline __forceinline +#endif + +#ifdef _MSC_VER +# define __noinline __declspec(noinline) +#else +# define __noinline +#endif + + + static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) + static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) + + class x86AddressInfo; + class ModSibBase; + + static __forceinline void write8( u8 val ) { - static const x86Register32 Empty; // defined as an empty/unused value (-1) - - int Id; + iWrite( val ); + } - x86Register32( const x86Register32& src ) : Id( src.Id ) {} - x86Register32() : Id( -1 ) {} - explicit x86Register32( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + static __forceinline void write16( u16 val ) + { + iWrite( val ); + } - bool IsEmpty() const { return Id == -1; } + static __forceinline void write24( u32 val ) + { + *(u32*)x86Ptr = val; + x86Ptr += 3; + } - bool operator==( const x86Register32& src ) const { return Id == src.Id; } - bool operator!=( const x86Register32& src ) const { return Id != src.Id; } - - x86ModRm operator+( const x86Register32& right ) const; - x86ModRm operator+( const x86ModRm& right ) const; - x86ModRm operator+( s32 right ) const; + static __forceinline void write32( u32 val ) + { + iWrite( val ); + } + + static __forceinline void write64( u64 val ) + { + iWrite( val ); + } - x86ModRm operator*( u32 factor ) const; - - x86Register32& operator=( const x86Register32& src ) - { - Id = src.Id; - return *this; - } - }; - ////////////////////////////////////////////////////////////////////////////////////////// - // Similar to x86Register, but without the ability to add/combine them with ModSib. // - class x86Register16 + template< int OperandSize > + class x86Register { public: - static const x86Register16 Empty; + static const x86Register Empty; // defined as an empty/unused value (-1) int Id; - x86Register16( const x86Register16& src ) : Id( src.Id ) {} - x86Register16() : Id( -1 ) {} - explicit x86Register16( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + x86Register( const x86Register& src ) : Id( src.Id ) {} + x86Register(): Id( -1 ) {} + explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id == -1; } - bool operator==( const x86Register16& src ) const { return Id == src.Id; } - bool operator!=( const x86Register16& src ) const { return Id != src.Id; } + // Returns true if the register is a valid accumulator: Eax, Ax, Al. + bool IsAccumulator() const { return Id == 0; } - x86Register16& operator=( const x86Register16& src ) + bool operator==( const x86Register& src ) const + { + return (Id == src.Id); + } + + bool operator!=( const x86Register& src ) const + { + return (Id != src.Id); + } + + x86Register& operator=( const x86Register& src ) { Id = src.Id; return *this; } }; + typedef x86Register<4> x86Register32; + typedef x86Register<2> x86Register16; + typedef x86Register<1> x86Register8; + ////////////////////////////////////////////////////////////////////////////////////////// - // Similar to x86Register, but without the ability to add/combine them with ModSib. - // - class x86Register8 + // Use 32 bit registers as out index register (for ModSib memory address calculations) + // Only x86IndexReg provides operators for constructing x86AddressInfo types. + class x86IndexReg : public x86Register32 { public: - static const x86Register8 Empty; + static const x86IndexReg Empty; // defined as an empty/unused value (-1) + + public: + x86IndexReg(): x86Register32() {} + x86IndexReg( const x86IndexReg& src ) : x86Register32( src.Id ) {} + x86IndexReg( const x86Register32& src ) : x86Register32( src ) {} + explicit x86IndexReg( int regId ) : x86Register32( regId ) {} - int Id; + // Returns true if the register is the stack pointer: ESP. + bool IsStackPointer() const { return Id == 4; } - x86Register8( const x86Register16& src ) : Id( src.Id ) {} - x86Register8() : Id( -1 ) {} - explicit x86Register8( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + x86AddressInfo operator+( const x86IndexReg& right ) const; + x86AddressInfo operator+( const x86AddressInfo& right ) const; + x86AddressInfo operator+( s32 right ) const; - bool IsEmpty() const { return Id == -1; } - - bool operator==( const x86Register8& src ) const { return Id == src.Id; } - bool operator!=( const x86Register8& src ) const { return Id != src.Id; } - - x86Register8& operator=( const x86Register8& src ) + x86AddressInfo operator*( u32 factor ) const; + x86AddressInfo operator<<( u32 shift ) const; + + x86IndexReg& operator=( const x86Register32& src ) { Id = src.Id; return *this; } }; - - // Use 32 bit registers as out index register (for ModSig memory address calculations) - typedef x86Register32 x86IndexReg; ////////////////////////////////////////////////////////////////////////////////////////// // - class x86ModRm + class x86AddressInfo { public: x86IndexReg Base; // base register (no scale) @@ -255,7 +264,7 @@ namespace x86Emitter s32 Displacement; // address displacement public: - x86ModRm( x86IndexReg base, x86IndexReg index, int factor=1, s32 displacement=0 ) : + __forceinline x86AddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) : Base( base ), Index( index ), Factor( factor ), @@ -263,7 +272,7 @@ namespace x86Emitter { } - explicit x86ModRm( x86IndexReg base, int displacement=0 ) : + __forceinline explicit x86AddressInfo( const x86IndexReg& base, int displacement=0 ) : Base( base ), Index(), Factor(0), @@ -271,7 +280,7 @@ namespace x86Emitter { } - explicit x86ModRm( s32 displacement ) : + __forceinline explicit x86AddressInfo( s32 displacement ) : Base(), Index(), Factor(0), @@ -279,62 +288,87 @@ namespace x86Emitter { } - static x86ModRm FromIndexReg( x86IndexReg index, int scale=0, s32 displacement=0 ); + static x86AddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 ); public: bool IsByteSizeDisp() const { return is_s8( Displacement ); } - x86IndexReg GetEitherReg() const; - x86ModRm& Add( s32 imm ) + __forceinline x86AddressInfo& Add( s32 imm ) { Displacement += imm; return *this; } - x86ModRm& Add( const x86IndexReg& src ); - x86ModRm& Add( const x86ModRm& src ); + __forceinline x86AddressInfo& Add( const x86IndexReg& src ); + __forceinline x86AddressInfo& Add( const x86AddressInfo& src ); - x86ModRm operator+( const x86IndexReg& right ) const { return x86ModRm( *this ).Add( right ); } - x86ModRm operator+( const x86ModRm& right ) const { return x86ModRm( *this ).Add( right ); } - x86ModRm operator+( const s32 imm ) const { return x86ModRm( *this ).Add( imm ); } - x86ModRm operator-( const s32 imm ) const { return x86ModRm( *this ).Add( -imm ); } + __forceinline x86AddressInfo operator+( const x86IndexReg& right ) const { return x86AddressInfo( *this ).Add( right ); } + __forceinline x86AddressInfo operator+( const x86AddressInfo& right ) const { return x86AddressInfo( *this ).Add( right ); } + __forceinline x86AddressInfo operator+( s32 imm ) const { return x86AddressInfo( *this ).Add( imm ); } + __forceinline x86AddressInfo operator-( s32 imm ) const { return x86AddressInfo( *this ).Add( -imm ); } }; ////////////////////////////////////////////////////////////////////////////////////////// // ModSib - Internal low-level representation of the ModRM/SIB information. // - // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means that - // the Base, Index, Scale, and Displacement values are all valid, and it serves as a type- - // safe layer between the x86Register's operators (which generate x86ModRm types) and the - // emitter's ModSib instruction forms. Without this, the x86Register would pass as a - // ModSib type implicitly, and that would cause ambiguity on a number of instructions. + // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means + // that the Base, Index, Scale, and Displacement values are all in the correct arrange- + // ments, and it serves as a type-safe layer between the x86Register's operators (which + // generate x86AddressInfo types) and the emitter's ModSib instruction forms. Without this, + // the x86Register would pass as a ModSib type implicitly, and that would cause ambiguity + // on a number of instructions. // - class ModSib + // End users should always use x86AddressInfo instead. + // + class ModSibBase { public: x86IndexReg Base; // base register (no scale) x86IndexReg Index; // index reg gets multiplied by the scale - int Scale; // scale applied to the index register, in scale/shift form + uint Scale; // scale applied to the index register, in scale/shift form s32 Displacement; // offset applied to the Base/Index registers. - explicit ModSib( const x86ModRm& src ); - explicit ModSib( s32 disp ); - ModSib( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); + public: + explicit ModSibBase( const x86AddressInfo& src ); + explicit ModSibBase( s32 disp ); + ModSibBase( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); - x86IndexReg GetEitherReg() const; bool IsByteSizeDisp() const { return is_s8( Displacement ); } - ModSib& Add( s32 imm ) + __forceinline ModSibBase& Add( s32 imm ) { Displacement += imm; return *this; } - ModSib operator+( const s32 imm ) const { return ModSib( *this ).Add( imm ); } - ModSib operator-( const s32 imm ) const { return ModSib( *this ).Add( -imm ); } + __forceinline ModSibBase operator+( const s32 imm ) const { return ModSibBase( *this ).Add( imm ); } + __forceinline ModSibBase operator-( const s32 imm ) const { return ModSibBase( *this ).Add( -imm ); } protected: - void Reduce(); + __forceinline void Reduce(); + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Strictly-typed version of ModSibBase, which is used to apply operand size information + // to ImmToMem operations. + // + template< int OperandSize > + class ModSibStrict : public ModSibBase + { + public: + __forceinline explicit ModSibStrict( const x86AddressInfo& src ) : ModSibBase( src ) {} + __forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {} + __forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : + ModSibBase( base, index, scale, displacement ) {} + + __forceinline ModSibStrict& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + __forceinline ModSibStrict operator+( const s32 imm ) const { return ModSibStrict( *this ).Add( imm ); } + __forceinline ModSibStrict operator-( const s32 imm ) const { return ModSibStrict( *this ).Add( -imm ); } }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -344,33 +378,390 @@ namespace x86Emitter { // passthrough instruction, allows ModSib to pass silently through ptr translation // without doing anything and without compiler error. - const ModSib& operator[]( const ModSib& src ) const { return src; } + const ModSibBase& operator[]( const ModSibBase& src ) const { return src; } - ModSib operator[]( x86IndexReg src ) const + __forceinline ModSibBase operator[]( x86IndexReg src ) const { - return ModSib( src, x86IndexReg::Empty ); + return ModSibBase( src, x86IndexReg::Empty ); } - ModSib operator[]( const x86ModRm& src ) const + __forceinline ModSibBase operator[]( const x86AddressInfo& src ) const { - return ModSib( src ); + return ModSibBase( src ); } - ModSib operator[]( uptr src ) const + __forceinline ModSibBase operator[]( uptr src ) const { - return ModSib( src ); + return ModSibBase( src ); } - ModSib operator[]( void* src ) const + __forceinline ModSibBase operator[]( const void* src ) const { - return ModSib( (uptr)src ); + return ModSibBase( (uptr)src ); } x86IndexerType() {} }; - // ------------------------------------------------------------------------ + ////////////////////////////////////////////////////////////////////////////////////////// + // Explicit version of ptr[], in the form of ptr32[], ptr16[], etc. which allows + // specification of the operand size for ImmToMem operations. + // + template< int OperandSize > + struct x86IndexerTypeExplicit + { + // passthrough instruction, allows ModSib to pass silently through ptr translation + // without doing anything and without compiler error. + const ModSibStrict& operator[]( const ModSibStrict& src ) const { return src; } + + __forceinline ModSibStrict operator[]( x86IndexReg src ) const + { + return ModSibStrict( src, x86IndexReg::Empty ); + } + + __forceinline ModSibStrict operator[]( const x86AddressInfo& src ) const + { + return ModSibStrict( src ); + } + + __forceinline ModSibStrict operator[]( uptr src ) const + { + return ModSibStrict( src ); + } + + __forceinline ModSibStrict operator[]( const void* src ) const + { + return ModSibStrict( (uptr)src ); + } + }; + extern const x86IndexerType ptr; + extern const x86IndexerTypeExplicit<4> ptr32; + extern const x86IndexerTypeExplicit<2> ptr16; + extern const x86IndexerTypeExplicit<1> ptr8; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + namespace Internal + { + extern void ModRM( uint mod, uint reg, uint rm ); + extern void SibSB( u32 ss, u32 index, u32 base ); + extern void EmitSibMagic( uint regfield, const ModSibBase& info ); + + struct SibMagic + { + static void Emit( uint regfield, const ModSibBase& info ) + { + EmitSibMagic( regfield, info ); + } + }; + + struct SibMagicInline + { + static __forceinline void Emit( uint regfield, const ModSibBase& info ) + { + EmitSibMagic( regfield, info ); + } + }; + + + enum G1Type + { + G1Type_ADD=0, + G1Type_OR, + G1Type_ADC, + G1Type_SBB, + G1Type_AND, + G1Type_SUB, + G1Type_XOR, + G1Type_CMP + }; + + enum G2Type + { + G2Type_ROL=0, + G2Type_ROR, + G2Type_RCL, + G2Type_RCR, + G2Type_SHL, + G2Type_SHR, + G2Type_Unused, + G2Type_SAR + }; + + // ------------------------------------------------------------------- + template< typename ImmType, G1Type InstType, typename SibMagicType > + class Group1Impl + { + public: + static const uint OperandSize = sizeof(ImmType); + + protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + + public: + static __emitinline void Emit( const x86Register& to, const x86Register& from ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + ModRM( 3, from.Id, to.Id ); + } + + static __emitinline void Emit( const ModSibBase& sibdest, const x86Register& from ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + SibMagicType::Emit( from.Id, sibdest ); + } + + static __emitinline void Emit( const x86Register& to, const ModSibBase& sibsrc ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + SibMagicType::Emit( to.Id, sibsrc ); + } + + static __emitinline void Emit( const x86Register& to, ImmType imm ) + { + if( !Is8BitOperand() && is_s8( imm ) ) + { + iWrite( 0x83 ); + ModRM( 3, InstType, to.Id ); + iWrite( imm ); + } + else + { + prefix16(); + if( to.IsAccumulator() ) + iWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); + else + { + iWrite( Is8BitOperand() ? 0x80 : 0x81 ); + ModRM( 3, InstType, to.Id ); + } + iWrite( imm ); + } + } + + static __emitinline void Emit( const ModSibStrict& sibdest, ImmType imm ) + { + if( Is8BitOperand() ) + { + iWrite( 0x80 ); + SibMagicType::Emit( InstType, sibdest ); + iWrite( imm ); + } + else + { + prefix16(); + iWrite( is_s8( imm ) ? 0x83 : 0x81 ); + SibMagicType::Emit( InstType, sibdest ); + if( is_s8( imm ) ) + iWrite( imm ); + else + iWrite( imm ); + } + } + }; + + // ------------------------------------------------------------------- + // Group 2 (shift) instructions have no Sib/ModRM forms. + // Note: For Imm forms, we ignore the instruction if the shift count is zero. This + // is a safe optimization since any zero-value shift does not affect any flags. + // + template< typename ImmType, G2Type InstType, typename SibMagicType > + class Group2Impl + { + public: + static const uint OperandSize = sizeof(ImmType); + + protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + + public: + static __emitinline void Emit( const x86Register& to, const x86Register8& from ) + { + jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) + + prefix16(); + iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + ModRM( 3, InstType, to.Id ); + } + + static __emitinline void Emit( const x86Register& to, u8 imm ) + { + if( imm == 0 ) return; + + prefix16(); + if( imm == 1 ) + { + // special encoding of 1's + iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + ModRM( 3, InstType, to.Id ); + } + else + { + iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + ModRM( 3, InstType, to.Id ); + iWrite( imm ); + } + } + + static __emitinline void Emit( const ModSibStrict& sibdest, const x86Register8& from ) + { + jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) + + prefix16(); + iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + SibMagicType::Emit( from.Id, sibdest ); + } + + static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) + { + if( imm == 0 ) return; + + prefix16(); + if( imm == 1 ) + { + // special encoding of 1's + iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + SibMagicType::Emit( InstType, sibdest ); + } + else + { + iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + SibMagicType::Emit( InstType, sibdest ); + iWrite( imm ); + } + } + }; + + // ------------------------------------------------------------------- + // + template< G1Type InstType > + class Group1ImplAll + { + protected: + typedef Group1Impl m_32; + typedef Group1Impl m_16; + typedef Group1Impl m_8; + + typedef Group1Impl m_32i; + typedef Group1Impl m_16i; + typedef Group1Impl m_8i; + + // Inlining Notes: + // I've set up the inlining to be as practical and intelligent as possible, which means + // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to + // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- + // cretion of the compiler. + // + + // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) + + public: + // ---------- 32 Bit Interface ----------- + __forceinline void operator()( const x86Register32& to, const x86Register32& from ) const { m_32i::Emit( to, from ); } + __forceinline void operator()( const x86Register32& to, const void* src ) const { m_32i::Emit( to, ptr32[src] ); } + __forceinline void operator()( const void* dest, const x86Register32& from ) const { m_32i::Emit( ptr32[dest], from ); } + __noinline void operator()( const ModSibBase& sibdest, const x86Register32& from ) const { m_32::Emit( sibdest, from ); } + __noinline void operator()( const x86Register32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); } + + void operator()( const x86Register32& to, u32 imm ) const { m_32i::Emit( to, imm ); } + + // ---------- 16 Bit Interface ----------- + __forceinline void operator()( const x86Register16& to, const x86Register16& from ) const { m_16i::Emit( to, from ); } + __forceinline void operator()( const x86Register16& to, const void* src ) const { m_16i::Emit( to, ptr16[src] ); } + __forceinline void operator()( const void* dest, const x86Register16& from ) const { m_16i::Emit( ptr16[dest], from ); } + __noinline void operator()( const ModSibBase& sibdest, const x86Register16& from ) const { m_16::Emit( sibdest, from ); } + __noinline void operator()( const x86Register16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); } + + void operator()( const x86Register16& to, u16 imm ) const { m_16i::Emit( to, imm ); } + + // ---------- 8 Bit Interface ----------- + __forceinline void operator()( const x86Register8& to, const x86Register8& from ) const { m_8i::Emit( to, from ); } + __forceinline void operator()( const x86Register8& to, const void* src ) const { m_8i::Emit( to, ptr8[src] ); } + __forceinline void operator()( const void* dest, const x86Register8& from ) const { m_8i::Emit( ptr8[dest], from ); } + __noinline void operator()( const ModSibBase& sibdest, const x86Register8& from ) const { m_8::Emit( sibdest, from ); } + __noinline void operator()( const x86Register8& to, const ModSibBase& sibsrc ) const { m_8::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } + + void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } + }; + + + // ------------------------------------------------------------------- + // + template< G2Type InstType > + class Group2ImplAll + { + protected: + typedef Group2Impl m_32; + typedef Group2Impl m_16; + typedef Group2Impl m_8; + + typedef Group2Impl m_32i; + typedef Group2Impl m_16i; + typedef Group2Impl m_8i; + + // Inlining Notes: + // I've set up the inlining to be as practical and intelligent as possible, which means + // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to + // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- + // cretion of the compiler. + // + + // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) + + public: + // ---------- 32 Bit Interface ----------- + __forceinline void operator()( const x86Register32& to, const x86Register8& from ) const{ m_32i::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, const x86Register8& from ) const{ m_32::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); } + void operator()( const x86Register32& to, u8 imm ) const { m_32i::Emit( to, imm ); } + + // ---------- 16 Bit Interface ----------- + __forceinline void operator()( const x86Register16& to, const x86Register8& from ) const{ m_16i::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, const x86Register8& from ) const{ m_16::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); } + void operator()( const x86Register16& to, u8 imm ) const { m_16i::Emit( to, imm ); } + + // ---------- 8 Bit Interface ----------- + __forceinline void operator()( const x86Register8& to, const x86Register8& from ) const{ m_8i::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, const x86Register8& from ) const{ m_8::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } + void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } + }; + + // Define the externals for Group1/2 instructions here (inside the Internal namespace). + // and then import then into the x86Emitter namespace later. Done because it saves a + // lot of Internal:: namespace resolution mess, and is better than the alternative of + // importing Internal into x86Emitter, which done at the header file level would defeat + // the purpose!) + + extern const Group1ImplAll ADD; + extern const Group1ImplAll OR; + extern const Group1ImplAll ADC; + extern const Group1ImplAll SBB; + extern const Group1ImplAll AND; + extern const Group1ImplAll SUB; + extern const Group1ImplAll XOR; + extern const Group1ImplAll CMP; + + extern const Group2ImplAll ROL; + extern const Group2ImplAll ROR; + extern const Group2ImplAll RCL; + extern const Group2ImplAll RCR; + extern const Group2ImplAll SHL; + extern const Group2ImplAll SHR; + extern const Group2ImplAll SAR; + } + + // ------------------------------------------------------------------------ extern const x86Register32 eax; extern const x86Register32 ebx; @@ -398,4 +789,6 @@ namespace x86Emitter extern const x86Register8 ch; extern const x86Register8 dh; extern const x86Register8 bh; -} \ No newline at end of file +} + +#include "ix86_inlines.inl" From ac1ecd58128e673ac223b67b7c3f342d33b5647c Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Tue, 14 Apr 2009 03:34:47 +0000 Subject: [PATCH 058/143] microVU: more flag "stuff" xD cotton: your commits always contain "stuff" I think you should rename one of your files to "microVU_stuff" And maybe others called "microVU_junk" and "microVU_crap" Then you could be much clearer "Fixed some junk, broke some crap, added some stuff...." lol lol git-svn-id: http://pcsx2.googlecode.com/svn/trunk@972 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Analyze.inl | 7 ++++++- pcsx2/x86/microVU_Compile.inl | 14 +++++++++----- pcsx2/x86/microVU_Misc.h | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 375ebcbcaa..044a9b4baf 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -255,7 +255,12 @@ microVUt(void) mVUanalyzeSflag(int It) { microVUt(void) mVUanalyzeFSSET() { microVU* mVU = mVUx; - mVUinfo |= _isFSSSET; + mVUinfo |= _isFSSET; + // mVUinfo &= ~_doStatus; + // Note: I'm not entirely sure if the non-sticky flags + // should be taken from the current upper instruction + // or if they should be taken from the previous instruction + // Uncomment the above line if the latter-case is true } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 340406960f..116d2eae9b 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -60,17 +60,21 @@ microVUt(void) mVUstatusFlagOp() { microVU* mVU = mVUx; int curPC = iPC; int i = mVUcount; + bool runLoop = 1; if (doStatus) { mVUinfo |= _isSflag; } else { for (; i > 0; i--) { incPC2(-2); + if (isSflag) { runLoop = 0; break; } if (doStatus) { mVUinfo |= _isSflag; break; } } } - for (; i > 0; i--) { - incPC2(-2); - if (isSflag) break; - mVUinfo &= ~(_doStatus|_doDivFlag); + if (runLoop) { + for (; i > 0; i--) { + incPC2(-2); + if (isSflag) break; + mVUinfo &= ~_doStatus; + } } iPC = curPC; } @@ -98,7 +102,7 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { int xCount = mVUcount; // Backup count iPC = mVUstartPC; for (mVUcount = 0; mVUcount < xCount; mVUcount++) { - if ((xCount - mVUcount) > aCount) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions + if (((xCount - mVUcount) > aCount) && isFSSET) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions yS += (mVUstall > 3) ? 3 : mVUstall; if (yS > zS) { diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 0017e98438..6044f0100d 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -184,7 +184,7 @@ declareAllVariables #define _memReadIt (1<<24) // Read If (VI reg) from memory (used by branches) #define _writesVI (1<<25) // Current Instruction writes to VI #define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction -#define _isFSSSET (1<<27) // Cur Instruction is FSSET +#define _isFSSET (1<<27) // Cur Instruction is FSSET #define _doDivFlag (1<<28) // Transfer Div flag to Status Flag //#define _isBranch2 (1<<31) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) From be430d5d893ec34dd3f0cee0c845234d28c5ef16 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 14 Apr 2009 12:37:48 +0000 Subject: [PATCH 059/143] Fixed a bug in the Emitter the caused the VU1 to screw up a bit (bad gfx and freezeups and stuff). Also: Resolved some GCC/C++ troubles. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@974 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/iVUmicroLower.cpp | 20 +- pcsx2/x86/ix86/ix86.cpp | 223 ++++----------------- pcsx2/x86/ix86/ix86_instructions.h | 14 +- pcsx2/x86/ix86/ix86_jmp.cpp | 192 ++++++++++++++++++ pcsx2/x86/ix86/ix86_types.h | 163 ++++++++++----- 6 files changed, 363 insertions(+), 253 deletions(-) create mode 100644 pcsx2/x86/ix86/ix86_jmp.cpp diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 6ed073af1e..fc01b41d0c 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2937,6 +2937,10 @@ RelativePath="..\..\x86\ix86\ix86_internal.h" > + + diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 0c2ab15768..d057cd9791 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -355,7 +355,7 @@ void recVUMI_IADD( VURegs *VU, int info ) if( fdreg == fsreg ) ADD32RtoR(fdreg, ftreg); else if( fdreg == ftreg ) ADD32RtoR(fdreg, fsreg); else LEA32RRtoR(fdreg, fsreg, ftreg); - MOVZX32R16toR(fdreg, fdreg); // neeed since don't know if fdreg's upper bits are 0 + MOVZX32R16toR(fdreg, fdreg); // needed since don't know if fdreg's upper bits are 0 } } //------------------------------------------------------------------ @@ -802,20 +802,12 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) // (this is one of my test cases for the new emitter --air) using namespace x86Emitter; + x86IndexReg thisreg( x86reg ); - if ( x86reg >= 0 ) { - x86IndexReg thisreg( x86reg ); - if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000 ); - if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000 ); - if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000 ); - if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000); - } - else { - if ( _X ) MOV(ptr32[offset], 0x00000000); - if ( _Y ) MOV(ptr32[offset+4], 0x00000000); - if ( _Z ) MOV(ptr32[offset+8], 0x00000000); - if ( _W ) MOV(ptr32[offset+14], 0x3f800000); - } + if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000); + if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000); + if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000); + if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000); } return; } diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index b3de07a04a..35bd1791b5 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -62,7 +62,7 @@ __threadlocal u8 *x86Ptr; __threadlocal u8 *j8Ptr[32]; __threadlocal u32 *j32Ptr[32]; -XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; +__threadlocal XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; namespace x86Emitter { @@ -72,9 +72,8 @@ const x86IndexerTypeExplicit<2> ptr16; const x86IndexerTypeExplicit<1> ptr8; // ------------------------------------------------------------------------ -const x86Register32 x86Register32::Empty; -const x86Register16 x86Register16::Empty; -const x86Register8 x86Register8::Empty; + +template< int OperandSize > const x86Register x86Register::Empty; const x86IndexReg x86IndexReg::Empty; const x86Register32 @@ -235,164 +234,16 @@ namespace Internal using namespace Internal; -/* -emitterT void x86SetPtr( u8* ptr ) -{ - x86Ptr = ptr; -} - -////////////////////////////////////////////////////////////////////////////////////////// -// x86Ptr Label API -// - -class x86Label -{ -public: - class Entry - { - protected: - u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type) - u8* m_base; // base address of the instruction (passed to the instruction) - int m_cc; // comparison type of the instruction - - public: - explicit Entry( int cc ) : - m_base( x86Ptr ) - , m_writebackpos( writebackidx ) - { - } - - void Commit( const u8* target ) const - { - //uptr reltarget = (uptr)m_base - (uptr)target; - //*((u32*)&m_base[m_writebackpos]) = reltarget; - jASSUME( m_emit != NULL ); - jASSUME( m_base != NULL ); - return m_emit( m_base, target, m_cc ); - } - }; - -protected: - u8* m_target; // x86Ptr target address of this label - Entry m_writebacks[8]; - int m_writeback_curpos; - -public: - // creates a label list with no valid target. - // Use x86LabelList::Set() to set a target prior to class destruction. - x86Label() : m_target() - { - } - - x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() ) - { - } - - // Performs all address writebacks on destruction. - virtual ~x86Label() - { - IssueWritebacks(); - } - - void SetTarget() { m_address = x86Ptr; } - void SetTarget( void* addr ) { m_address = (u8*)addr; } - - void Clear() - { - m_writeback_curpos = 0; - } - - // Adds a jump/call instruction to this label for writebacks. - void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc ) - { - jASSUME( m_writeback_curpos < MaxWritebacks ); - m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) ); - m_writeback_curpos++; - } - - void IssueWritebacks() const - { - const std::list::const_iterator& start = m_list_writebacks. - for( ; start!=end; start++ ) - { - Entry& current = *start; - u8* donespot = current.Commit(); - - // Copy the data from the m_nextinst to the current location, - // and update any additional writebacks (but what about multiple labels?!?) - - } - } -}; -#endif - -void JMP( x86Label& dest ) -{ - dest.AddWriteback( x86Ptr, emitJMP, 0 ); -} - -void JLE( x86Label& dest ) -{ - dest.AddWriteback( x86Ptr, emitJCC, 0 ); -} - -void x86SetJ8( u8* j8 ) -{ - u32 jump = ( x86Ptr - j8 ) - 1; - - if ( jump > 0x7f ) { - Console::Error( "j8 greater than 0x7f!!" ); - assert(0); - } - *j8 = (u8)jump; -} - -void x86SetJ8A( u8* j8 ) -{ - u32 jump = ( x86Ptr - j8 ) - 1; - - if ( jump > 0x7f ) { - Console::Error( "j8 greater than 0x7f!!" ); - assert(0); - } - - if( ((uptr)x86Ptr&0xf) > 4 ) { - - uptr newjump = jump + 16-((uptr)x86Ptr&0xf); - - if( newjump <= 0x7f ) { - jump = newjump; - while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; - } - } - *j8 = (u8)jump; -} - -emitterT void x86SetJ32( u32* j32 ) -{ - *j32 = ( x86Ptr - (u8*)j32 ) - 4; -} - -emitterT void x86SetJ32A( u32* j32 ) -{ - while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; - x86SetJ32(j32); -} - -emitterT void x86Align( int bytes ) -{ - // forward align - x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); -} -*/ - // ------------------------------------------------------------------------ // Internal implementation of EmitSibMagic which has been custom tailored // to optimize special forms of the Lea instructions accordingly, such // as when a LEA can be replaced with a "MOV reg,imm" or "MOV reg,reg". // +// preserve_flags - set to ture to disable use of SHL on [Index*Base] forms +// of LEA, which alters flags states. +// template< typename ToReg > -static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false ) +static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) { int displacement_size = (src.Displacement == 0) ? 0 : ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); @@ -407,18 +258,12 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false ) if( src.Index.IsEmpty() ) { - if( is16bit ) - MOV( to, src.Displacement ); - else - MOV( to, src.Displacement ); + MOV( to, src.Displacement ); return; } else if( displacement_size == 0 ) { - if( is16bit ) - MOV( to, ToReg( src.Index.Id ) ); - else - MOV( to, ToReg( src.Index.Id ) ); + MOV( to, ToReg( src.Index.Id ) ); return; } else @@ -434,11 +279,11 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false ) { if( src.Base.IsEmpty() ) { - if( displacement_size == 0 ) + if( !preserve_flags && (displacement_size == 0) ) { // Encode [Index*Scale] as a combination of Mov and Shl. // This is more efficient because of the bloated LEA format which requires - // a 32 bit displacement, and the compact nature of the alterntive. + // a 32 bit displacement, and the compact nature of the alternative. // // (this does not apply to older model P4s with the broken barrel shifter, // but we currently aren't optimizing for that target anyway). @@ -479,16 +324,16 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool is16bit=false ) } } -__emitinline void LEA( x86Register32 to, const ModSibBase& src ) +__emitinline void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags ) { - EmitLeaMagic( to, src ); + EmitLeaMagic( to, src, preserve_flags ); } -__emitinline void LEA( x86Register16 to, const ModSibBase& src ) +__emitinline void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags ) { write8( 0x66 ); - EmitLeaMagic( to, src ); + EmitLeaMagic( to, src, preserve_flags ); } ////////////////////////////////////////////////////////////////////////////////////////// @@ -555,15 +400,10 @@ public: static __forceinline void Emit( const x86Register& to, ImmType imm ) { // Note: MOV does not have (reg16/32,imm8) forms. - - if( imm == 0 ) - XOR( to, to ); - else - { - prefix16(); - iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); - iWrite( imm ); - } + + prefix16(); + iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); + iWrite( imm ); } static __forceinline void Emit( ModSibStrict dest, ImmType imm ) @@ -603,7 +443,13 @@ __noinline void MOV( const ModSibBase& sibdest, const x86Register32& from ) { M __noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } __noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } -void MOV( const x86Register32& to, u32 imm ) { MOV32i::Emit( to, imm ); } +void MOV( const x86Register32& to, u32 imm, bool preserve_flags ) +{ + if( !preserve_flags && (imm == 0) ) + XOR( to, to ); + else + MOV32i::Emit( to, imm ); +} // ---------- 16 Bit Interface ----------- @@ -614,8 +460,13 @@ __noinline void MOV( const ModSibBase& sibdest, const x86Register16& from ) { M __noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } __noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } -void MOV( const x86Register16& to, u16 imm ) { MOV16i::Emit( to, imm ); } - +void MOV( const x86Register16& to, u16 imm, bool preserve_flags ) +{ + if( !preserve_flags && (imm == 0) ) + XOR( to, to ); + else + MOV16i::Emit( to, imm ); +} // ---------- 8 Bit Interface ----------- __forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } @@ -625,7 +476,13 @@ __noinline void MOV( const ModSibBase& sibdest, const x86Register8& from ) { MO __noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } __noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } -void MOV( const x86Register8& to, u8 imm ) { MOV8i::Emit( to, imm ); } +void MOV( const x86Register8& to, u8 imm, bool preserve_flags ) +{ + if( !preserve_flags && (imm == 0) ) + XOR( to, to ); + else + MOV8i::Emit( to, imm ); +} ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 9115597f1b..0f218d1a0b 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -40,8 +40,8 @@ namespace x86Emitter // forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs // instead. - extern void LEA( x86Register32 to, const ModSibBase& src ); - extern void LEA( x86Register16 to, const ModSibBase& src ); + extern void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags=false ); + extern void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags=false ); // ----- Push / Pop Instructions ----- @@ -80,7 +80,9 @@ namespace x86Emitter extern void MOV( const x86Register32& to, const void* src ); extern void MOV( const void* dest, const x86Register32& from ); - extern void MOV( const x86Register32& to, u32 imm ); + // preserve_flags - set to true to disable optimizations which could alter the state of + // the flags (namely replacing mov reg,0 with xor). + extern void MOV( const x86Register32& to, u32 imm, bool preserve_flags=false ); extern void MOV( const ModSibStrict<4>& sibdest, u32 imm ); // ---------- 16 Bit Interface ----------- @@ -90,7 +92,9 @@ namespace x86Emitter extern void MOV( const x86Register16& to, const void* src ); extern void MOV( const void* dest, const x86Register16& from ); - extern void MOV( const x86Register16& to, u16 imm ); + // preserve_flags - set to true to disable optimizations which could alter the state of + // the flags (namely replacing mov reg,0 with xor). + extern void MOV( const x86Register16& to, u16 imm, bool preserve_flags=false ); extern void MOV( const ModSibStrict<2>& sibdest, u16 imm ); // ---------- 8 Bit Interface ----------- @@ -100,7 +104,7 @@ namespace x86Emitter extern void MOV( const x86Register8& to, const void* src ); extern void MOV( const void* dest, const x86Register8& from ); - extern void MOV( const x86Register8& to, u8 imm ); + extern void MOV( const x86Register8& to, u8 imm, bool preserve_flags=false ); extern void MOV( const ModSibStrict<1>& sibdest, u8 imm ); } diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp new file mode 100644 index 0000000000..4b2c11eb17 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -0,0 +1,192 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +/* + * ix86 core v0.9.0 + * + * Original Authors (v0.6.2 and prior): + * linuzappz + * alexey silinov + * goldfinger + * zerofrog(@gmail.com) + * + * Authors of v0.9.0: + * Jake.Stine(@gmail.com) + * cottonvibes(@gmail.com) + * sudonim(1@gmail.com) + */ + +#include "PrecompiledHeader.h" + +#include "System.h" +#include "ix86_internal.h" + + +// Another Work-in-Progress!! + + +/* +emitterT void x86SetPtr( u8* ptr ) +{ + x86Ptr = ptr; +} + +////////////////////////////////////////////////////////////////////////////////////////// +// x86Ptr Label API +// + +class x86Label +{ +public: + class Entry + { + protected: + u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type) + u8* m_base; // base address of the instruction (passed to the instruction) + int m_cc; // comparison type of the instruction + + public: + explicit Entry( int cc ) : + m_base( x86Ptr ) + , m_writebackpos( writebackidx ) + { + } + + void Commit( const u8* target ) const + { + //uptr reltarget = (uptr)m_base - (uptr)target; + //*((u32*)&m_base[m_writebackpos]) = reltarget; + jASSUME( m_emit != NULL ); + jASSUME( m_base != NULL ); + return m_emit( m_base, target, m_cc ); + } + }; + +protected: + u8* m_target; // x86Ptr target address of this label + Entry m_writebacks[8]; + int m_writeback_curpos; + +public: + // creates a label list with no valid target. + // Use x86LabelList::Set() to set a target prior to class destruction. + x86Label() : m_target() + { + } + + x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() ) + { + } + + // Performs all address writebacks on destruction. + virtual ~x86Label() + { + IssueWritebacks(); + } + + void SetTarget() { m_address = x86Ptr; } + void SetTarget( void* addr ) { m_address = (u8*)addr; } + + void Clear() + { + m_writeback_curpos = 0; + } + + // Adds a jump/call instruction to this label for writebacks. + void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc ) + { + jASSUME( m_writeback_curpos < MaxWritebacks ); + m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) ); + m_writeback_curpos++; + } + + void IssueWritebacks() const + { + const std::list::const_iterator& start = m_list_writebacks. + for( ; start!=end; start++ ) + { + Entry& current = *start; + u8* donespot = current.Commit(); + + // Copy the data from the m_nextinst to the current location, + // and update any additional writebacks (but what about multiple labels?!?) + + } + } +}; +#endif + +void JMP( x86Label& dest ) +{ + dest.AddWriteback( x86Ptr, emitJMP, 0 ); +} + +void JLE( x86Label& dest ) +{ + dest.AddWriteback( x86Ptr, emitJCC, 0 ); +} + +void x86SetJ8( u8* j8 ) +{ + u32 jump = ( x86Ptr - j8 ) - 1; + + if ( jump > 0x7f ) { + Console::Error( "j8 greater than 0x7f!!" ); + assert(0); + } + *j8 = (u8)jump; +} + +void x86SetJ8A( u8* j8 ) +{ + u32 jump = ( x86Ptr - j8 ) - 1; + + if ( jump > 0x7f ) { + Console::Error( "j8 greater than 0x7f!!" ); + assert(0); + } + + if( ((uptr)x86Ptr&0xf) > 4 ) { + + uptr newjump = jump + 16-((uptr)x86Ptr&0xf); + + if( newjump <= 0x7f ) { + jump = newjump; + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + } + } + *j8 = (u8)jump; +} + +emitterT void x86SetJ32( u32* j32 ) +{ + *j32 = ( x86Ptr - (u8*)j32 ) - 4; +} + +emitterT void x86SetJ32A( u32* j32 ) +{ + while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; + x86SetJ32(j32); +} + +emitterT void x86Align( int bytes ) +{ + // forward align + x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); +} +*/ diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 6c91b2a9d7..b1497dd139 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -18,20 +18,6 @@ #pragma once -// x86 opcode descriptors -#define XMMREGS 8 -#define X86REGS 8 -#define MMXREGS 8 - -enum XMMSSEType -{ - XMMT_INT = 0, // integer (sse2 only) - XMMT_FPS = 1, // floating point - //XMMT_FPD = 3, // double -}; - -extern XMMSSEType g_xmmtypes[XMMREGS]; - extern void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs typedef struct CAPABILITIES CAPABILITIES; @@ -106,10 +92,23 @@ extern CPUINFO cpuinfo; #define __threadlocal __thread #endif +// x86 opcode descriptors +#define XMMREGS 8 +#define X86REGS 8 +#define MMXREGS 8 + +enum XMMSSEType +{ + XMMT_INT = 0, // integer (sse2 only) + XMMT_FPS = 1, // floating point + //XMMT_FPD = 3, // double +}; + extern __threadlocal u8 *x86Ptr; extern __threadlocal u8 *j8Ptr[32]; extern __threadlocal u32 *j32Ptr[32]; +extern __threadlocal XMMSSEType g_xmmtypes[XMMREGS]; //------------------------------------------------------------------ // templated version of is_s8 is required, so that u16's get correct sign extension treatment. @@ -218,10 +217,45 @@ namespace x86Emitter } }; + // ------------------------------------------------------------------------ + // Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which + // means it finds undeclared variables when MSVC does not (Since MSVC compiles templates + // when they are actually used). In practice this sucks since it means we have to move all' + // our variable and function prototypes from a nicely/neatly unified location to being strewn + // all about the the templated code in haphazard fashion. Yay.. >_< + // + typedef x86Register<4> x86Register32; typedef x86Register<2> x86Register16; typedef x86Register<1> x86Register8; - + + extern const x86Register32 eax; + extern const x86Register32 ebx; + extern const x86Register32 ecx; + extern const x86Register32 edx; + extern const x86Register32 esi; + extern const x86Register32 edi; + extern const x86Register32 ebp; + extern const x86Register32 esp; + + extern const x86Register16 ax; + extern const x86Register16 bx; + extern const x86Register16 cx; + extern const x86Register16 dx; + extern const x86Register16 si; + extern const x86Register16 di; + extern const x86Register16 bp; + extern const x86Register16 sp; + + extern const x86Register8 al; + extern const x86Register8 cl; + extern const x86Register8 dl; + extern const x86Register8 bl; + extern const x86Register8 ah; + extern const x86Register8 ch; + extern const x86Register8 dh; + extern const x86Register8 bh; + ////////////////////////////////////////////////////////////////////////////////////////// // Use 32 bit registers as out index register (for ModSib memory address calculations) // Only x86IndexReg provides operators for constructing x86AddressInfo types. @@ -400,7 +434,7 @@ namespace x86Emitter return ModSibBase( (uptr)src ); } - x86IndexerType() {} + x86IndexerType() {} // applease the GCC gods }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -433,6 +467,8 @@ namespace x86Emitter { return ModSibStrict( (uptr)src ); } + + x86IndexerTypeExplicit() {} // GCC initialization dummy }; extern const x86IndexerType ptr; @@ -496,6 +532,8 @@ namespace x86Emitter public: static const uint OperandSize = sizeof(ImmType); + Group1Impl() {} // because GCC doesn't like static classes + protected: static bool Is8BitOperand() { return OperandSize == 1; } static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } @@ -524,6 +562,7 @@ namespace x86Emitter static __emitinline void Emit( const x86Register& to, ImmType imm ) { + prefix16(); if( !Is8BitOperand() && is_s8( imm ) ) { iWrite( 0x83 ); @@ -532,7 +571,6 @@ namespace x86Emitter } else { - prefix16(); if( to.IsAccumulator() ) iWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); else @@ -576,6 +614,8 @@ namespace x86Emitter public: static const uint OperandSize = sizeof(ImmType); + Group2Impl() {} // For the love of GCC. + protected: static bool Is8BitOperand() { return OperandSize == 1; } static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } @@ -637,6 +677,13 @@ namespace x86Emitter } } }; + + // if the immediate is zero, we can replace the instruction, or ignore it + // entirely, depending on the instruction being issued. That's what we do here. + // (returns FALSE if no optimization is performed) + // [TODO] : Work-in-progress! + //template< G1Type InstType, typename RegType > + //static __forceinline void _optimize_imm0( RegType to ); // ------------------------------------------------------------------- // @@ -656,7 +703,7 @@ namespace x86Emitter // I've set up the inlining to be as practical and intelligent as possible, which means // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- - // cretion of the compiler. + // creation of the compiler. // // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) @@ -670,7 +717,11 @@ namespace x86Emitter __noinline void operator()( const x86Register32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } __noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); } - void operator()( const x86Register32& to, u32 imm ) const { m_32i::Emit( to, imm ); } + void operator()( const x86Register32& to, u32 imm, bool needs_flags=false ) const + { + //if( needs_flags || (imm != 0) || !_optimize_imm0() ) + m_32i::Emit( to, imm ); + } // ---------- 16 Bit Interface ----------- __forceinline void operator()( const x86Register16& to, const x86Register16& from ) const { m_16i::Emit( to, from ); } @@ -680,7 +731,7 @@ namespace x86Emitter __noinline void operator()( const x86Register16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } __noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); } - void operator()( const x86Register16& to, u16 imm ) const { m_16i::Emit( to, imm ); } + void operator()( const x86Register16& to, u16 imm, bool needs_flags=false ) const { m_16i::Emit( to, imm ); } // ---------- 8 Bit Interface ----------- __forceinline void operator()( const x86Register8& to, const x86Register8& from ) const { m_8i::Emit( to, from ); } @@ -690,7 +741,9 @@ namespace x86Emitter __noinline void operator()( const x86Register8& to, const ModSibBase& sibsrc ) const { m_8::Emit( to, sibsrc ); } __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } - void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } + void operator()( const x86Register8& to, u8 imm, bool needs_flags=false ) const { m_8i::Emit( to, imm ); } + + Group1ImplAll() {} // Why does GCC need these? }; @@ -712,7 +765,7 @@ namespace x86Emitter // I've set up the inlining to be as practical and intelligent as possible, which means // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- - // cretion of the compiler. + // creation of the compiler. // // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) @@ -735,6 +788,9 @@ namespace x86Emitter __noinline void operator()( const ModSibStrict<1>& sibdest, const x86Register8& from ) const{ m_8::Emit( sibdest, from ); } __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } + + + Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. }; // Define the externals for Group1/2 instructions here (inside the Internal namespace). @@ -759,36 +815,41 @@ namespace x86Emitter extern const Group2ImplAll SHL; extern const Group2ImplAll SHR; extern const Group2ImplAll SAR; + + /*template< G1Type InstType, typename RegType > + static __forceinline void _optimize_imm0( const RegType& to ) + { + switch( InstType ) + { + // ADD, SUB, and OR can be ignored if the imm is zero.. + case G1Type_ADD: + case G1Type_SUB: + case G1Type_OR: + return true; + + // ADC and SBB can never be ignored (could have carry bits) + // XOR behavior is distinct as well [or is it the same as NEG or NOT?] + case G1Type_ADC: + case G1Type_SBB: + case G1Type_XOR: + return false; + + // replace AND with XOR (or SUB works too.. whatever!) + case G1Type_AND: + XOR( to, to ); + return true; + + // replace CMP with OR reg,reg: + case G1Type_CMP: + OR( to, to ); + return true; + + jNO_DEFAULT + } + return false; + }*/ + } - - // ------------------------------------------------------------------------ - - extern const x86Register32 eax; - extern const x86Register32 ebx; - extern const x86Register32 ecx; - extern const x86Register32 edx; - extern const x86Register32 esi; - extern const x86Register32 edi; - extern const x86Register32 ebp; - extern const x86Register32 esp; - - extern const x86Register16 ax; - extern const x86Register16 bx; - extern const x86Register16 cx; - extern const x86Register16 dx; - extern const x86Register16 si; - extern const x86Register16 di; - extern const x86Register16 bp; - extern const x86Register16 sp; - - extern const x86Register8 al; - extern const x86Register8 cl; - extern const x86Register8 dl; - extern const x86Register8 bl; - extern const x86Register8 ah; - extern const x86Register8 ch; - extern const x86Register8 dh; - extern const x86Register8 bh; } #include "ix86_inlines.inl" From b748068b38584b6b3a0f96a6b4395347334db765 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Tue, 14 Apr 2009 13:40:28 +0000 Subject: [PATCH 060/143] LilyPad: Changed how device updates are handled to be more multithreaded friendly. Mutexes when "read input in GS thread" is disabled removed, as they should (hopefully) no longer be needed. May just ditch the option entirely in the future, since enabling it doesn't seem to make much difference, and slows things down for some people. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@975 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/LilyPad.cpp | 48 +++++++++++-------------------------- plugins/LilyPad/LilyPad.rc | 2 +- 2 files changed, 15 insertions(+), 35 deletions(-) diff --git a/plugins/LilyPad/LilyPad.cpp b/plugins/LilyPad/LilyPad.cpp index 3c2a2ca412..067ed73fb2 100644 --- a/plugins/LilyPad/LilyPad.cpp +++ b/plugins/LilyPad/LilyPad.cpp @@ -25,11 +25,6 @@ // LilyPad version. #define VERSION ((0<<8) | 10 | (0<<24)) -// Used to prevent reading input and cleaning up input devices at the same time. -// Only an issue when not reading input in GS thread and disabling devices due to -// lost focus. -CRITICAL_SECTION readInputCriticalSection; - HINSTANCE hInst; HWND hWnd; @@ -241,11 +236,9 @@ void UpdateEnabledDevices(int updateList = 0) { BOOL WINAPI DllMain(HINSTANCE hInstance, DWORD fdwReason, void* lpvReserved) { hInst = hInstance; if (fdwReason == DLL_PROCESS_ATTACH) { - InitializeCriticalSection(&readInputCriticalSection); DisableThreadLibraryCalls(hInstance); } else if (fdwReason == DLL_PROCESS_DETACH) { - DeleteCriticalSection(&readInputCriticalSection); while (openCount) PADclose(); PADshutdown(); @@ -353,8 +346,17 @@ u8 padReadKeyUpdated = 0; #define LOCK_BUTTONS 4 #define LOCK_BOTH 1 +int deviceUpdateQueued = 0; +void QueueDeviceUpdate(int updateList=0) { + deviceUpdateQueued = deviceUpdateQueued | 1 | (updateList<<1); +}; + void Update(unsigned int port, unsigned int slot) { + if (deviceUpdateQueued) { + UpdateEnabledDevices((deviceUpdateQueued & 0x2)==0x2); + deviceUpdateQueued = 0; + } if (port > 2) return; u8 *stateUpdated; if (port < 2) @@ -383,9 +385,6 @@ void Update(unsigned int port, unsigned int slot) { 0, hWnd, hWnd, 0 }; - if (!config.GSThreadUpdates) { - EnterCriticalSection(&readInputCriticalSection); - } dm->Update(&info); static int turbo = 0; turbo++; @@ -408,7 +407,7 @@ void Update(unsigned int port, unsigned int slot) { else if ((state>>15) && !(dev->oldVirtualControlState[b->controlIndex]>>15)) { if (cmd == 0x0F) { miceEnabled = !miceEnabled; - UpdateEnabledDevices(); + QueueDeviceUpdate(); } else if (cmd == 0x0C) { lockStateChanged[port][slot] |= LOCK_BUTTONS; @@ -435,10 +434,6 @@ void Update(unsigned int port, unsigned int slot) { } dm->PostRead(); - if (!config.GSThreadUpdates) { - LeaveCriticalSection(&readInputCriticalSection); - } - { for (int port=0; port<2; port++) { for (int slot=0; slot<4; slot++) { @@ -750,30 +745,15 @@ ExtraWndProcResult HackWndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lPara break; case WM_DEVICECHANGE: if (wParam == DBT_DEVNODES_CHANGED) { - // Need to do this when not reading input from gs thread. - // Checking for that case not worth the effort. - EnterCriticalSection(&readInputCriticalSection); - UpdateEnabledDevices(1); - LeaveCriticalSection(&readInputCriticalSection); + QueueDeviceUpdate(1); } break; case WM_ACTIVATEAPP: // Release any buttons PCSX2 may think are down when // losing/gaining focus. ReleaseModifierKeys(); - - // Need to do this when not reading input from gs thread. - // Checking for that case not worth the effort. - EnterCriticalSection(&readInputCriticalSection); - if (!wParam) { - activeWindow = 0; - UpdateEnabledDevices(); - } - else { - activeWindow = 1; - UpdateEnabledDevices(); - } - LeaveCriticalSection(&readInputCriticalSection); + activeWindow = wParam != 0; + QueueDeviceUpdate(); break; case WM_CLOSE: if (config.closeHacks & 1) { @@ -871,7 +851,7 @@ s32 CALLBACK PADopen(void *pDsp) { // activeWindow = (GetAncestor(hWnd, GA_ROOT) == GetAncestor(GetForegroundWindow(), GA_ROOT)); activeWindow = 1; - UpdateEnabledDevices(); + QueueDeviceUpdate(); return 0; } diff --git a/plugins/LilyPad/LilyPad.rc b/plugins/LilyPad/LilyPad.rc index 8c3b7ada71..5a6b1ad24c 100644 --- a/plugins/LilyPad/LilyPad.rc +++ b/plugins/LilyPad/LilyPad.rc @@ -235,7 +235,7 @@ BEGIN PUSHBUTTON "Test Device",ID_TEST,86,289,57,15 PUSHBUTTON "Refresh",ID_REFRESH,152,289,48,15 GROUPBOX "Miscellaneous",IDC_STATIC,216,211,201,34 - CONTROL "Use GS thread (Recommended)",IDC_GS_THREAD_INPUT,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,221,221,116,10 + CONTROL "Use GS thread",IDC_GS_THREAD_INPUT,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,221,221,62,10 CONTROL "Disable screensaver",IDC_DISABLE_SCREENSAVER,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,221,232,80,10 CONTROL "Local volume control",IDC_VISTA_VOLUME,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,336,221,77,10 CONTROL "Enable logging",IDC_DEBUG_FILE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,336,232,62,10 From 881b01c8d75839bf3f5f874c4424af9f4a55c085 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Tue, 14 Apr 2009 14:14:40 +0000 Subject: [PATCH 061/143] Linux: Fix the Makefile. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@976 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/Makefile.am | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index 30e16e264a..6943135b1b 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -1,5 +1,6 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty noinst_LIBRARIES = libix86.a -libix86_a_SOURCES = ix86_mmx.cpp ix86_tools.cpp ix86.cpp ix86_3dnow.cpp ix86_fpu.cpp ix86_legacy.cpp ix86_sse.cpp ix86_cpudetect.cpp ix86_group1.cpp \ -ix86_internal.h ix86.h ix86_macros.h ix86_sse_helpers.h ix86_types.h +libix86_a_SOURCES = libix86_a_SOURCES = ix86_mmx.cpp ix86_tools.cpp ix86.cpp ix86_3dnow.cpp \ +ix86_fpu.cpp ix86_legacy.cpp ix86_sse.cpp ix86_cpudetect.cpp ix86_jmp.cpp \ + ix86_internal.h ix86.h ix86_macros.h ix86_sse_helpers.h ix86_types.h From 12adbcc061783c7d11a2686a037d5bc952da16ac Mon Sep 17 00:00:00 2001 From: mattmenke Date: Tue, 14 Apr 2009 18:46:59 +0000 Subject: [PATCH 062/143] Port 2 Multitap should be fixed (PCSX2 bug). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@977 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Sio.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/Sio.cpp b/pcsx2/Sio.cpp index 45c3274a2a..44f2666b3f 100644 --- a/pcsx2/Sio.cpp +++ b/pcsx2/Sio.cpp @@ -527,7 +527,7 @@ void InitializeSIO(u8 value) } break; case 0x2002: - if (!PAD2setSlot(1, 1+sio.activePadSlot[1])) { + if (!PAD2setSlot(2, 1+sio.activePadSlot[1])) { // Pad is not present. Don't send poll, just return a bunch of 0's. sio2.packet.recvVal1 = 0x1D100; sio.padst = 3; From fa1a79b3681d11d9e3d27cfccb98f418b90d907d Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 15 Apr 2009 01:25:11 +0000 Subject: [PATCH 063/143] Cleaned a few things up, and moved a few things around. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@978 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/GS.cpp | 34 +++++++++------------- pcsx2/Gif.cpp | 16 ++++------ pcsx2/Hw.h | 63 +++++++++++++++++----------------------- pcsx2/IPU/IPU.cpp | 5 ---- pcsx2/Memory.h | 34 ++++++++++++++++++++-- pcsx2/SPR.cpp | 53 +++++++++++++-------------------- pcsx2/Sif.cpp | 21 +++++++++----- pcsx2/Vif.cpp | 6 ++-- pcsx2/Vif.h | 1 - pcsx2/VifDma.cpp | 35 ++++++++++------------ pcsx2/VifDma.h | 3 -- pcsx2/x86/iFPUd.cpp | 2 +- pcsx2/x86/iR3000A.cpp | 2 +- pcsx2/x86/iVUzerorec.cpp | 4 +-- 14 files changed, 136 insertions(+), 143 deletions(-) diff --git a/pcsx2/GS.cpp b/pcsx2/GS.cpp index 8772c51056..4ff204c57d 100644 --- a/pcsx2/GS.cpp +++ b/pcsx2/GS.cpp @@ -24,16 +24,27 @@ #include "GS.h" #include "iR5900.h" #include "Counters.h" - #include "VifDma.h" using namespace Threading; using namespace std; - using namespace R5900; static bool m_gsOpened = false; +u32 CSRw; + +PCSX2_ALIGNED16( u8 g_RealGSMem[0x2000] ); +extern int m_nCounters[]; + +// FrameSkipping Stuff +// Yuck, iSlowStart is needed by the MTGS, so can't make it static yet. + +u64 m_iSlowStart=0; +static s64 m_iSlowTicks=0; +static bool m_justSkipped = false; +static bool m_StrictSkipping = false; + #ifdef PCSX2_DEVBUILD // GS Playback @@ -98,21 +109,6 @@ __forceinline void GSVSYNC(void) { } #endif -u32 CSRw; - -PCSX2_ALIGNED16( u8 g_RealGSMem[0x2000] ); -#define PS2GS_BASE(mem) (g_RealGSMem+(mem&0x13ff)) - -extern int m_nCounters[]; - -// FrameSkipping Stuff -// Yuck, iSlowStart is needed by the MTGS, so can't make it static yet. - -u64 m_iSlowStart=0; -static s64 m_iSlowTicks=0; -static bool m_justSkipped = false; -static bool m_StrictSkipping = false; - void _gs_ChangeTimings( u32 framerate, u32 iTicks ) { m_iSlowStart = GetCPUTicks(); @@ -839,8 +835,6 @@ void RunGSState( gzLoadingState& f ) list::iterator it = packets.begin(); g_SaveGSStream = 3; - //int skipfirst = 1; - // first extract the data while(1) { @@ -877,4 +871,4 @@ void RunGSState( gzLoadingState& f ) #endif -#undef GIFchain +//#undef GIFchain diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index a36c17f775..4bc894a97f 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -28,9 +28,6 @@ using std::min; -#define gif ((DMACh*)&psH[0xA000]) -#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) - enum gifstate_t { GIF_STATE_EMPTY = 0, @@ -49,6 +46,7 @@ static int gspath3done = 0; static u32 gscycles = 0, prevcycles = 0, mfifocycles = 0; static u32 gifqwc = 0; +bool gifmfifoirq = FALSE; __forceinline void gsInterrupt() { GIF_LOG("gsInterrupt: %8.8x", cpuRegs.cycle); @@ -151,7 +149,7 @@ int _GIFchain() { return (qwc)*2; } -__forceinline void GIFchain() +static __forceinline void GIFchain() { FreezeRegs(1); if (gif->qwc) gscycles+= _GIFchain(); /* guessing */ @@ -243,14 +241,10 @@ void GIFdma() } } } - // When MTGS is enabled, Gifchain calls WRITERING_DMA, which calls GSRINGBUF_DONECOPY, which freezes - // the registers inside of the FreezeXMMRegs calls here and in the other two below.. - // I'm not really sure that is intentional. --arcum42 + GIFchain(); - // Theres a comment below that says not to unfreeze the xmm regs, so not sure about freezing and unfreezing in GIFchain. if((gif->qwc == 0) && ((gspath3done == 1) || (gif->chcr & 0xc) == 0)){ - //if(gif->qwc > 0) Console::WriteLn("Hurray!"); // We *know* it is 0! gspath3done = 0; gif->chcr &= ~0x100; GSCSRr &= ~0xC000; @@ -445,7 +439,6 @@ static __forceinline int mfifoGIFchain() { return 0; } -bool gifmfifoirq = FALSE; void mfifoGIFtransfer(int qwc) { u32 *ptag; @@ -521,12 +514,15 @@ void mfifoGIFtransfer(int qwc) { gifmfifoirq = TRUE; } } + FreezeRegs(1); + if (mfifoGIFchain() == -1) { Console::WriteLn("GIF dmaChain error size=%d, madr=%lx, tadr=%lx", params gif->qwc, gif->madr, gif->tadr); gifstate = GIF_STATE_STALL; } + FreezeRegs(0); if(gif->qwc == 0 && gifstate == GIF_STATE_DONE) gifstate = GIF_STATE_STALL; diff --git a/pcsx2/Hw.h b/pcsx2/Hw.h index 07f21049ce..97a981c82b 100644 --- a/pcsx2/Hw.h +++ b/pcsx2/Hw.h @@ -21,25 +21,16 @@ extern u8 *psH; // hw mem -#define psHs8(mem) (*(s8 *)&PS2MEM_HW[(mem) & 0xffff]) -#define psHs16(mem) (*(s16*)&PS2MEM_HW[(mem) & 0xffff]) -#define psHs32(mem) (*(s32*)&PS2MEM_HW[(mem) & 0xffff]) -#define psHs64(mem) (*(s64*)&PS2MEM_HW[(mem) & 0xffff]) -#define psHu8(mem) (*(u8 *)&PS2MEM_HW[(mem) & 0xffff]) -#define psHu16(mem) (*(u16*)&PS2MEM_HW[(mem) & 0xffff]) -#define psHu32(mem) (*(u32*)&PS2MEM_HW[(mem) & 0xffff]) -#define psHu64(mem) (*(u64*)&PS2MEM_HW[(mem) & 0xffff]) - extern void CPU_INT( u32 n, s32 ecycle ); ////////////////////////////////////////////////////////////////////////// // Hardware FIFOs (128 bit access only!) // -// VIF0 -- 0x10004000 -- psH[0x4000] -// VIF1 -- 0x10005000 -- psH[0x5000] -// GIF -- 0x10006000 -- psH[0x6000] -// IPUout -- 0x10007000 -- psH[0x7000] -// IPUin -- 0x10007010 -- psH[0x7010] +// VIF0 -- 0x10004000 -- PS2MEM_HW[0x4000] +// VIF1 -- 0x10005000 -- PS2MEM_HW[0x5000] +// GIF -- 0x10006000 -- PS2MEM_HW[0x6000] +// IPUout -- 0x10007000 -- PS2MEM_HW[0x7000] +// IPUin -- 0x10007010 -- PS2MEM_HW[0x7010] void __fastcall ReadFIFO_page_4(u32 mem, mem128_t *out); void __fastcall ReadFIFO_page_5(u32 mem, mem128_t *out); @@ -215,8 +206,8 @@ struct DMACh { #define INTC_GS 0 #define INTC_SBUS 1 -#define INTC_VBLANK_S 2 -#define INTC_VBLANK_E 3 +#define INTC_VBLANK_S 2 +#define INTC_VBLANK_E 3 #define INTC_VIF0 4 #define INTC_VIF1 5 #define INTC_VU0 6 @@ -290,21 +281,6 @@ struct DMACh { #define SISintr (0x20002000) #define MEISintr (0x40004000) -#define DMAend(dma, num) { \ - dma->chcr &= ~0x100; \ - psHu32(DMAC_STAT)|= 1<chcr &= ~0x100; + } + + return ptr; +} void hwInit(); void hwReset(); diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 8c9ca709b9..a1b7626df5 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -48,11 +48,6 @@ using namespace std; // for min / max # define IPU_FORCEINLINE __forceinline #endif -//IPUregisters g_ipuRegsReal; - -#define ipu0dma ((DMACh *)&PS2MEM_HW[0xb000]) -#define ipu1dma ((DMACh *)&PS2MEM_HW[0xb400]) - #define IPU_DMA_GIFSTALL 1 #define IPU_DMA_TIE0 2 #define IPU_DMA_TIE1 4 diff --git a/pcsx2/Memory.h b/pcsx2/Memory.h index 1ac1e085b7..7acd4c3fb9 100644 --- a/pcsx2/Memory.h +++ b/pcsx2/Memory.h @@ -23,6 +23,7 @@ #endif //#define ENABLECACHE +#include "vtlb.h" namespace Ps2MemSize { @@ -57,9 +58,40 @@ extern u8 *psS; //0.015 mb, scratch pad extern u8 g_RealGSMem[Ps2MemSize::GSregs]; #define PS2MEM_GS g_RealGSMem +#define PS2GS_BASE(mem) (g_RealGSMem+(mem&0x13ff)) + +// Various useful locations +#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) +#define spr1 ((DMACh*)&PS2MEM_HW[0xD400]) + +#define gif ((DMACh*)&PS2MEM_HW[0xA000]) + +#define vif0ch ((DMACh*)&PS2MEM_HW[0x8000]) +#define vif1ch ((DMACh*)&PS2MEM_HW[0x9000]) + +#define sif0dma ((DMACh*)&PS2MEM_HW[0xc000]) +#define sif1dma ((DMACh*)&PS2MEM_HW[0xc400]) +#define sif2dma ((DMACh*)&PS2MEM_HW[0xc800]) + +#define ipu0dma ((DMACh *)&PS2MEM_HW[0xb000]) +#define ipu1dma ((DMACh *)&PS2MEM_HW[0xb400]) + +// From Gif.h +#define GSCSRr *((u64*)(g_RealGSMem+0x1000)) +#define GSIMR *((u32*)(g_RealGSMem+0x1010)) +#define GSSIGLBLID ((GSRegSIGBLID*)(g_RealGSMem+0x1080)) #define PSM(mem) (vtlb_GetPhyPtr((mem)&0x1fffffff)) //pcsx2 is a competition.The one with most hacks wins :D +#define psHs8(mem) (*(s8 *)&PS2MEM_HW[(mem) & 0xffff]) +#define psHs16(mem) (*(s16*)&PS2MEM_HW[(mem) & 0xffff]) +#define psHs32(mem) (*(s32*)&PS2MEM_HW[(mem) & 0xffff]) +#define psHs64(mem) (*(s64*)&PS2MEM_HW[(mem) & 0xffff]) +#define psHu8(mem) (*(u8 *)&PS2MEM_HW[(mem) & 0xffff]) +#define psHu16(mem) (*(u16*)&PS2MEM_HW[(mem) & 0xffff]) +#define psHu32(mem) (*(u32*)&PS2MEM_HW[(mem) & 0xffff]) +#define psHu64(mem) (*(u64*)&PS2MEM_HW[(mem) & 0xffff]) + #define psMs8(mem) (*(s8 *)&PS2MEM_BASE[(mem) & 0x1ffffff]) #define psMs16(mem) (*(s16*)&PS2MEM_BASE[(mem) & 0x1ffffff]) #define psMs32(mem) (*(s32*)&PS2MEM_BASE[(mem) & 0x1ffffff]) @@ -134,8 +166,6 @@ extern void memClearPageAddr(u32 vaddr); extern void memMapVUmicro(); -#include "vtlb.h" - extern int mmap_GetRamPageInfo(void* ptr); extern void mmap_MarkCountedRamPage(void* ptr,u32 vaddr); extern void mmap_ResetBlockTracking(); diff --git a/pcsx2/SPR.cpp b/pcsx2/SPR.cpp index e33fb05e79..71bfc6b948 100644 --- a/pcsx2/SPR.cpp +++ b/pcsx2/SPR.cpp @@ -23,10 +23,6 @@ #include "iR5900.h" #include "VUmicro.h" -#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) -#define spr1 ((DMACh*)&PS2MEM_HW[0xD400]) -#define gif ((DMACh*)&PS2MEM_HW[0xA000]) - extern void mfifoGIFtransfer(int); /* Both of these should be bools. Again, next savestate break. --arcum42 */ @@ -66,8 +62,10 @@ int _SPR0chain() if ((psHu32(DMAC_CTRL) & 0xC) >= 0x8) // 0x8 VIF1 MFIFO, 0xC GIF MFIFO { - if ((spr0->madr & ~psHu32(DMAC_RBSR)) != psHu32(DMAC_RBOR)) Console::WriteLn("SPR MFIFO Write outside MFIFO area"); - else mfifotransferred += spr0->qwc; + if ((spr0->madr & ~psHu32(DMAC_RBSR)) != psHu32(DMAC_RBOR)) + Console::WriteLn("SPR MFIFO Write outside MFIFO area"); + else + mfifotransferred += spr0->qwc; hwMFIFOWrite(spr0->madr, (u8*)&PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4); spr0->madr += spr0->qwc << 4; @@ -89,9 +87,11 @@ int _SPR0chain() return (spr0->qwc) * BIAS; // bus is 1/2 the ee speed } -#define SPR0chain() \ - cycles += _SPR0chain(); \ +__forceinline void SPR0chain() +{ + _SPR0chain(); spr0->qwc = 0; +} void _SPR0interleave() @@ -99,7 +99,6 @@ void _SPR0interleave() int qwc = spr0->qwc; int sqwc = psHu32(DMAC_SQWC) & 0xff; int tqwc = (psHu32(DMAC_SQWC) >> 16) & 0xff; - //int cycles = 0; u32 *pMem; if (tqwc == 0) tqwc = qwc; @@ -124,9 +123,8 @@ void _SPR0interleave() TestClearVUs(spr0->madr, spr0->qwc << 2); memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4); } - //cycles += tqwc * BIAS; spr0->sadr += spr0->qwc * 16; - spr0->madr += (sqwc + spr0->qwc) * 16; //qwc-= sqwc; + spr0->madr += (sqwc + spr0->qwc) * 16; } spr0->qwc = 0; @@ -143,15 +141,12 @@ static __forceinline void _dmaSPR0() // Transfer Dn_QWC from SPR to Dn_MADR if ((spr0->chcr & 0xc) == 0x0) // Normal Mode { - int cycles = 0; SPR0chain(); spr0finished = 1; - return; } else if ((spr0->chcr & 0xc) == 0x4) { - int cycles = 0; u32 *ptag; int id; bool done = FALSE; @@ -167,7 +162,6 @@ static __forceinline void _dmaSPR0() spr0->sadr += 16; // Transfer dma tag if tte is set - spr0->chcr = (spr0->chcr & 0xFFFF) | ((*ptag) & 0xFFFF0000); //Transfer upper part of tag to CHCR bits 31-15 id = (ptag[0] >> 28) & 0x7; //ID for DmaChain copied from bit 28 of the tag @@ -201,7 +195,6 @@ static __forceinline void _dmaSPR0() { //Console::WriteLn("SPR0 TIE"); done = TRUE; - spr0->qwc = 0; } spr0finished = (done) ? 1 : 0; @@ -209,9 +202,7 @@ static __forceinline void _dmaSPR0() if (!done) { ptag = (u32*) & PS2MEM_SCRATCH[spr0->sadr & 0x3fff]; //Set memory pointer to SADR - //spr0->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag - CPU_INT(8, ((u16)ptag[0]) / BIAS); //spr0->qwc / BIAS); - spr0->qwc = 0; + CPU_INT(8, ((u16)ptag[0]) / BIAS); // the lower 16bits of the tag / BIAS); return; } SPR_LOG("spr0 dmaChain complete %8.8x_%8.8x size=%d, id=%d, addr=%lx spr=%lx", @@ -292,9 +283,11 @@ int _SPR1chain() return (spr1->qwc) * BIAS; } -#define SPR1chain() \ - cycles += _SPR1chain(); \ - spr1->qwc = 0; +__forceinline void SPR1chain() +{ + _SPR1chain(); + spr1->qwc = 0; +} void _SPR1interleave() @@ -302,7 +295,6 @@ void _SPR1interleave() int qwc = spr1->qwc; int sqwc = psHu32(DMAC_SQWC) & 0xff; int tqwc = (psHu32(DMAC_SQWC) >> 16) & 0xff; - //int cycles = 0; u32 *pMem; if (tqwc == 0) tqwc = qwc; @@ -316,8 +308,7 @@ void _SPR1interleave() pMem = (u32*)dmaGetAddr(spr1->madr); memcpy_fast(&PS2MEM_SCRATCH[spr1->sadr & 0x3fff], (u8*)pMem, spr1->qwc << 4); spr1->sadr += spr1->qwc * 16; - //cycles += spr1->qwc * BIAS; - spr1->madr += (sqwc + spr1->qwc) * 16; //qwc-= sqwc; + spr1->madr += (sqwc + spr1->qwc) * 16; } spr1->qwc = 0; @@ -328,7 +319,7 @@ void _dmaSPR1() // toSPR work function { if ((spr1->chcr & 0xc) == 0) // Normal Mode { - int cycles = 0; + //int cycles = 0; // Transfer Dn_QWC from Dn_MADR to SPR1 SPR1chain(); spr1finished = 1; @@ -336,7 +327,6 @@ void _dmaSPR1() // toSPR work function } else if ((spr1->chcr & 0xc) == 0x4) { - int cycles = 0; u32 *ptag; int id; bool done = FALSE; @@ -363,7 +353,7 @@ void _dmaSPR1() // toSPR work function spr1->chcr = (spr1->chcr & 0xFFFF) | ((*ptag) & 0xFFFF0000); //Transfer upper part of tag to CHCR bits 31-15 id = (ptag[0] >> 28) & 0x7; //ID for DmaChain copied from bit 28 of the tag - spr1->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag + spr1->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag until SPR1chain is called in a few lines. spr1->madr = ptag[1]; //MADR = ADDR field // Transfer dma tag if tte is set @@ -384,7 +374,6 @@ void _dmaSPR1() // toSPR work function SPR_LOG("dmaIrq Set"); //Console::WriteLn("SPR1 TIE"); - spr1->qwc = 0; done = TRUE; } @@ -392,9 +381,7 @@ void _dmaSPR1() // toSPR work function if (!done) { ptag = (u32*)dmaGetAddr(spr1->tadr); //Set memory pointer to TADR - //spr1->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag - CPU_INT(9, (((u16)ptag[0]) / BIAS));// spr1->qwc / BIAS); - spr1->qwc = 0; + CPU_INT(9, (((u16)ptag[0]) / BIAS));// the lower 16 bits of the tag / BIAS); } } else // Interleave Mode @@ -411,7 +398,7 @@ void dmaSPR1() // toSPR spr1->chcr, spr1->madr, spr1->qwc, spr1->tadr, spr1->sadr); - if ((spr1->chcr & 0xc) == 0x4 && spr1->qwc == 0) + if (((spr1->chcr & 0xc) == 0x4) && (spr1->qwc == 0)) { u32 *ptag; ptag = (u32*)dmaGetAddr(spr1->tadr); //Set memory pointer to TADR diff --git a/pcsx2/Sif.cpp b/pcsx2/Sif.cpp index 796be3f9c7..23576f767f 100644 --- a/pcsx2/Sif.cpp +++ b/pcsx2/Sif.cpp @@ -25,10 +25,6 @@ using namespace std; -#define sif0dma ((DMACh*)&PS2MEM_HW[0xc000]) -#define sif1dma ((DMACh*)&PS2MEM_HW[0xc400]) -#define sif2dma ((DMACh*)&PS2MEM_HW[0xc800]) - DMACh *sif0ch; DMACh *sif1ch; DMACh *sif2ch; @@ -206,7 +202,10 @@ __forceinline void SIF0Dma() //SIF_LOG(" EE SIF doing transfer %04Xqw to %08X", readSize, sif0dma->madr); SIF_LOG("----------- %lX of %lX", readSize << 2, size << 2); - _dmaGetAddr(sif0dma, ptag, sif0dma->madr, 5); + ptag = _dmaGetAddr(sif0dma, sif0dma->madr, 5); + if (ptag == NULL) return; + + //_dmaGetAddr(sif0dma, *ptag, sif0dma->madr, 5); SIF0read((u32*)ptag, readSize << 2); @@ -285,7 +284,12 @@ __forceinline void SIF1Dma() { // Process DMA tag at sif1dma->tadr done = FALSE; - _dmaGetAddr(sif1dma, ptag, sif1dma->tadr, 6); + ptag = _dmaGetAddr(sif1dma, sif1dma->tadr, 6); + if (ptag == NULL) return; + + //_dmaGetAddr(sif1dma, *ptag, sif1dma->tadr, 6); + + sif1dma->chcr = (sif1dma->chcr & 0xFFFF) | ((*ptag) & 0xFFFF0000); // Copy the tag sif1dma->qwc = (u16)ptag[0]; @@ -348,7 +352,10 @@ __forceinline void SIF1Dma() int qwTransfer = sif1dma->qwc; u32 *data; - _dmaGetAddr(sif1dma, data, sif1dma->madr, 6); + data = _dmaGetAddr(sif1dma, sif1dma->madr, 6); + if (data == NULL) return; + + //_dmaGetAddr(sif1dma, *data, sif1dma->madr, 6); if (qwTransfer > (FIFO_SIF1_W - sif1.fifoSize) / 4) // Copy part of sif1dma into FIFO qwTransfer = (FIFO_SIF1_W - sif1.fifoSize) / 4; diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index cb81e5f6b2..da8bd88aaf 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -427,7 +427,7 @@ void mfifoVIF1transfer(int qwc) { u32 *ptag; int id; - int ret, temp; + int ret; g_vifCycles = 0; @@ -493,12 +493,14 @@ void mfifoVIF1transfer(int qwc) break; case 2: // Next - Transfer QWC following tag. TADR = ADDR - temp = vif1ch->madr; //Temporarily Store ADDR + { + int temp = vif1ch->madr; //Temporarily Store ADDR vif1ch->madr = psHu32(DMAC_RBOR) + ((vif1ch->tadr + 16) & psHu32(DMAC_RBSR)); //Set MADR to QW following the tag vif1ch->tadr = temp; //Copy temporarily stored ADDR to Tag if ((temp & psHu32(DMAC_RBSR)) != psHu32(DMAC_RBOR)) Console::WriteLn("Next tag = %x outside ring %x size %x", params temp, psHu32(DMAC_RBOR), psHu32(DMAC_RBSR)); vif1.done = 0; break; + } case 3: // Ref - Transfer QWC from ADDR field case 4: // Refs - Transfer QWC from ADDR field (Stall Control) diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index e01cb32bd8..64335c4dde 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -171,7 +171,6 @@ static __forceinline u32 getVifColRegs(u32 reg) #define vif0Regs ((VIFregisters*)&PS2MEM_HW[0x3800]) #define vif1Regs ((VIFregisters*)&PS2MEM_HW[0x3c00]) -#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) void dmaVIF0(); void dmaVIF1(); diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index be2cd2e5b7..0c67d44a01 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -29,10 +29,6 @@ using namespace std; // for min / max - - -#define gif ((DMACh*)&PS2MEM_HW[0xA000]) - // Extern variables extern "C" { @@ -66,6 +62,9 @@ int g_vifCycles = 0; bool path3hack = FALSE; bool Path3transfer = FALSE; +u32 splittransfer[4]; +u32 splitptr = 0; + typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data, int size); typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size); extern void (*Vif1CMDTLB[82])(); @@ -952,6 +951,8 @@ static int __fastcall Vif0TransMPG(u32 *data) // MPG static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK { + int ret; + FreezeXMMRegs(1); if (vif0.vifpacketsize < vif0.tag.size) { @@ -960,35 +961,32 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK ProcessMemSkip(vif0.vifpacketsize << 2, (vif0.cmd & 0xf), VIF0dmanum); - vif0.tag.size -= vif0.vifpacketsize; - FreezeXMMRegs(0); - return vif0.vifpacketsize; + ret = vif0.vifpacketsize; + vif0.tag.size -= ret; } else { /* we got all the data, transfer it fully */ - int ret = vif0.tag.size; + ret = vif0.tag.size; //Align data after a split transfer first - if(vif0Regs->offset != 0 || vif0.cl != 0) + if ((vif0Regs->offset != 0) || (vif0.cl != 0)) { vif0.tag.size = VIFalign(data, &vif0.tag, vif0.tag.size, VIF0dmanum); data += ret - vif0.tag.size; if(vif0.tag.size > 0) VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); - vif0.tag.size = 0; - vif0.cmd = 0; - FreezeXMMRegs(0); - return ret; } else { VIFunpack(data, &vif0.tag, vif0.tag.size, VIF0dmanum); - vif0.tag.size = 0; - vif0.cmd = 0; - FreezeXMMRegs(0); - return ret; } + + vif0.tag.size = 0; + vif0.cmd = 0; } + + FreezeXMMRegs(0); + return ret; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1694,8 +1692,6 @@ static int __fastcall Vif1TransMPG(u32 *data) return ret; } } -u32 splittransfer[4]; -u32 splitptr = 0; static int __fastcall Vif1TransDirectHL(u32 *data) { @@ -2371,7 +2367,6 @@ __forceinline void vif1Interrupt() if (vif1Regs->mskpath3 == 0 || (vif1ch->chcr & 0x1) == 0x1)vif1Regs->stat &= ~0x1F000000; // FQC=0 } -#define spr0 ((DMACh*)&PS2MEM_HW[0xD000]) void dmaVIF1() { diff --git a/pcsx2/VifDma.h b/pcsx2/VifDma.h index 6af78214e7..7a1023a3a8 100644 --- a/pcsx2/VifDma.h +++ b/pcsx2/VifDma.h @@ -50,9 +50,6 @@ struct vifStruct { extern vifStruct vif0, vif1; extern bool Path3transfer; -#define vif0ch ((DMACh*)&PS2MEM_HW[0x8000]) -#define vif1ch ((DMACh*)&PS2MEM_HW[0x9000]) - void __fastcall UNPACK_S_32( u32 *dest, u32 *data, int size ); void __fastcall UNPACK_S_16u( u32 *dest, u32 *data, int size ); diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp index ec7b489b76..33a9693cc3 100644 --- a/pcsx2/x86/iFPUd.cpp +++ b/pcsx2/x86/iFPUd.cpp @@ -116,7 +116,7 @@ static u64 PCSX2_ALIGNED16(dbl_ps2_overflow) = DOUBLE(0,1152,0); //overflow & cl static u64 PCSX2_ALIGNED16(dbl_underflow) = DOUBLE(0,897,0); //underflow if below static u64 PCSX2_ALIGNED16(dbl_s_pos[2]) = {0x7fffffffffffffffULL, 0}; -static u64 PCSX2_ALIGNED16(dbl_s_neg[2]) = {0x8000000000000000ULL, 0}; +//static u64 PCSX2_ALIGNED16(dbl_s_neg[2]) = {0x8000000000000000ULL, 0}; // converts small normal numbers to double equivalent // converts large normal numbers (which represent NaN/inf in IEEE) to double equivalent diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 66a78c0105..9f79701e7a 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -837,7 +837,7 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch) x86SetJ8( j8Ptr[0] ); } -static const int *s_pCode; +//static const int *s_pCode; #if !defined(_MSC_VER) static void checkcodefn() diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index bebcd3cdf3..ce9549be0c 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -2963,8 +2963,8 @@ int VuInstruction::SetCachedRegs(int upper, u32 vuxyz) void VuInstruction::Recompile(list::iterator& itinst, u32 vuxyz) { - static PCSX2_ALIGNED16(VECTOR _VF); - static PCSX2_ALIGNED16(VECTOR _VFc); + //static PCSX2_ALIGNED16(VECTOR _VF); + //static PCSX2_ALIGNED16(VECTOR _VFc); u32 *ptr; u8* pjmp; int vfregstore=0; From a661c80a4a6eabead8403bf40d172db56a30a3bc Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 15 Apr 2009 04:41:42 +0000 Subject: [PATCH 064/143] Some work on Vif & Hw. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@979 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 30 ++--- pcsx2/Hw.cpp | 178 +++++++++++++------------ pcsx2/Hw.h | 327 +++++++++++++++++++++++++--------------------- pcsx2/HwWrite.cpp | 6 +- pcsx2/Memory.h | 9 -- pcsx2/Vif.cpp | 32 ++--- pcsx2/VifDma.cpp | 92 ++++++------- pcsx2/VifDma.h | 9 +- 8 files changed, 352 insertions(+), 331 deletions(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index 4bc894a97f..dd6056effa 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -55,8 +55,8 @@ __forceinline void gsInterrupt() { //Console::WriteLn("Eh? why are you still interrupting! chcr %x, qwc %x, done = %x", params gif->chcr, gif->qwc, done); return; } - if(gif->qwc > 0 || gspath3done == 0) { - if( !(psHu32(DMAC_CTRL) & 0x1) ) { + if (gif->qwc > 0 || gspath3done == 0) { + if (!(psHu32(DMAC_CTRL) & 0x1)) { Console::Notice("gs dma masked, re-scheduling..."); // re-raise the int shortly in the future CPU_INT( 2, 64 ); @@ -71,7 +71,7 @@ __forceinline void gsInterrupt() { /*if (!(vif1Regs->mskpath3 && (vif1ch->chcr & 0x100)) || (psHu32(GIF_MODE) & 0x1)) CPU_INT( 2, 64 );*/ #endif - if(gspath3done == 0) return; + if (gspath3done == 0) return; } gspath3done = 0; @@ -223,7 +223,7 @@ void GIFdma() ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR if (ptag == NULL) { //Is ptag empty? - psHu32(DMAC_STAT)|= 1<<15; //If yes, set BEIS (BUSERR) in DMAC_STAT register + psHu32(DMAC_STAT) |= DMAC_STAT_BEIS; //If yes, set BEIS (BUSERR) in DMAC_STAT register return; } gscycles += 2; @@ -276,7 +276,7 @@ void GIFdma() while ((gspath3done == 0) && (gif->qwc == 0)) { //Loop if the transfers aren't intermittent ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR if (ptag == NULL) { //Is ptag empty? - psHu32(DMAC_STAT)|= 1<<15; //If yes, set BEIS (BUSERR) in DMAC_STAT register + psHu32(DMAC_STAT)|= DMAC_STAT_BEIS; //If yes, set BEIS (BUSERR) in DMAC_STAT register return; } gscycles+=2; // Add 1 cycles from the QW read for the tag @@ -294,7 +294,7 @@ void GIFdma() if ((psHu32(DMAC_CTRL) & 0xC0) == 0x80) { // STD == GIF // there are still bugs, need to also check if gif->madr +16*qwc >= stadr, if not, stall - if(!gspath3done && gif->madr + (gif->qwc * 16) > psHu32(DMAC_STADR) && id == 4) { + if(!gspath3done && ((gif->madr + (gif->qwc * 16)) > psHu32(DMAC_STADR)) && (id == 4)) { // stalled Console::WriteLn("GS Stall Control Source = %x, Drain = %x\n MADR = %x, STADR = %x", params (psHu32(0xe000) >> 4) & 0x3, (psHu32(0xe000) >> 6) & 0x3,gif->madr, psHu32(DMAC_STADR)); prevcycles = gscycles; @@ -307,7 +307,7 @@ void GIFdma() } GIFchain(); //Transfers the data set by the switch - if ((gif->chcr & 0x80) && ptag[0] >> 31) { //Check TIE bit of CHCR and IRQ bit of tag + if ((gif->chcr & 0x80) && (ptag[0] >> 31)) { //Check TIE bit of CHCR and IRQ bit of tag GIF_LOG("dmaIrq Set"); gspath3done = 1; } @@ -408,8 +408,8 @@ static __forceinline int mfifoGIFrbTransfer() { gifqwc -= mfifoqwc; gif->qwc -= mfifoqwc; - gif->madr+= mfifoqwc*16; - mfifocycles+= (mfifoqwc) * 2; /* guessing */ + gif->madr += mfifoqwc*16; + mfifocycles += (mfifoqwc) * 2; /* guessing */ return 0; } @@ -432,9 +432,9 @@ static __forceinline int mfifoGIFchain() { if (pMem == NULL) return -1; WRITERING_DMA(pMem, mfifoqwc); - gif->madr+= mfifoqwc*16; + gif->madr += mfifoqwc*16; gif->qwc -= mfifoqwc; - mfifocycles+= (mfifoqwc) * 2; /* guessing */ + mfifocycles += (mfifoqwc) * 2; /* guessing */ } return 0; @@ -450,14 +450,14 @@ void mfifoGIFtransfer(int qwc) { if(qwc > 0 ) { gifqwc += qwc; - if(!(gif->chcr & 0x100))return; - if(gifstate == GIF_STATE_STALL) return; + if (!(gif->chcr & 0x100)) return; + if (gifstate == GIF_STATE_STALL) return; } SPR_LOG("mfifoGIFtransfer %x madr %x, tadr %x", gif->chcr, gif->madr, gif->tadr); - if(gif->qwc == 0){ - if(gif->tadr == spr0->madr) { + if (gif->qwc == 0) { + if (gif->tadr == spr0->madr) { //if( gifqwc > 1 ) DevCon::WriteLn("gif mfifo tadr==madr but qwc = %d", params gifqwc); //hwDmacIrq(14); diff --git a/pcsx2/Hw.cpp b/pcsx2/Hw.cpp index bb3087bfce..159c121328 100644 --- a/pcsx2/Hw.cpp +++ b/pcsx2/Hw.cpp @@ -148,28 +148,28 @@ int hwMFIFOWrite(u32 addr, u8 *data, u32 size) { } -int hwDmacSrcChainWithStack(DMACh *dma, int id) { +bool hwDmacSrcChainWithStack(DMACh *dma, int id) { u32 temp; switch (id) { case 0: // Refe - Transfer Packet According to ADDR field - return 1; //End Transfer + return true; //End Transfer case 1: // CNT - Transfer QWC following the tag. dma->madr = dma->tadr + 16; //Set MADR to QW after Tag dma->tadr = dma->madr + (dma->qwc << 4); //Set TADR to QW following the data - return 0; + return false; case 2: // Next - Transfer QWC following tag. TADR = ADDR temp = dma->madr; //Temporarily Store ADDR dma->madr = dma->tadr + 16; //Set MADR to QW following the tag dma->tadr = temp; //Copy temporarily stored ADDR to Tag - return 0; + return false; case 3: // Ref - Transfer QWC from ADDR field case 4: // Refs - Transfer QWC from ADDR field (Stall Control) dma->tadr += 16; //Set TADR to next tag - return 0; + return false; case 5: // Call - Transfer QWC following the tag, save succeeding tag temp = dma->madr; //Temporarily Store ADDR @@ -185,11 +185,11 @@ int hwDmacSrcChainWithStack(DMACh *dma, int id) { dma->asr1 = dma->madr + (dma->qwc << 4); //If no store Succeeding tag in ASR1 }else { Console::Notice("Call Stack Overflow (report if it fixes/breaks anything)"); - return 1; //Return done + return true; //Return done } dma->tadr = temp; //Set TADR to temporarily stored ADDR - return 0; + return false; case 6: // Ret - Transfer QWC following the tag, load next tag dma->madr = dma->tadr + 16; //Set MADR to data following the tag @@ -209,47 +209,47 @@ int hwDmacSrcChainWithStack(DMACh *dma, int id) { return 1; //End Transfer } } - return 0; + return false; case 7: // End - Transfer QWC following the tag dma->madr = dma->tadr + 16; //Set MADR to data following the tag //Dont Increment tadr, breaks Soul Calibur II and III - return 1; //End Transfer + return true; //End Transfer } - return -1; + return false; } -int hwDmacSrcChain(DMACh *dma, int id) { +bool hwDmacSrcChain(DMACh *dma, int id) { u32 temp; switch (id) { case 0: // Refe - Transfer Packet According to ADDR field - return 1; //End Transfer + return true; //End Transfer case 1: // CNT - Transfer QWC following the tag. dma->madr = dma->tadr + 16; //Set MADR to QW after Tag dma->tadr = dma->madr + (dma->qwc << 4); //Set TADR to QW following the data - return 0; + return false; case 2: // Next - Transfer QWC following tag. TADR = ADDR temp = dma->madr; //Temporarily Store ADDR dma->madr = dma->tadr + 16; //Set MADR to QW following the tag dma->tadr = temp; //Copy temporarily stored ADDR to Tag - return 0; + return false; case 3: // Ref - Transfer QWC from ADDR field case 4: // Refs - Transfer QWC from ADDR field (Stall Control) dma->tadr += 16; //Set TADR to next tag - return 0; + return false; case 7: // End - Transfer QWC following the tag dma->madr = dma->tadr + 16; //Set MADR to data following the tag //Dont Increment tadr, breaks Soul Calibur II and III - return 1; //End Transfer + return true; //End Transfer } - return -1; + return false; } // Original hwRead/Write32 functions .. left in for now, for troubleshooting purposes. @@ -301,23 +301,27 @@ mem32_t __fastcall hwRead32(u32 mem) } #if 0 // Counters Reference Block (original case setup) - case 0x10000000: return (u16)rcntRcount(0); - case 0x10000010: return (u16)counters[0].modeval; - case 0x10000020: return (u16)counters[0].target; - case 0x10000030: return (u16)counters[0].hold; + // 0x10000000 - 0x10000030 + case RCNT0_COUNT: return (u16)rcntRcount(0); + case RCNT0_MODE: return (u16)counters[0].modeval; + case RCNT0_TARGET: return (u16)counters[0].target; + case RCNT0_HOLD: return (u16)counters[0].hold; - case 0x10000800: return (u16)rcntRcount(1); - case 0x10000810: return (u16)counters[1].modeval; - case 0x10000820: return (u16)counters[1].target; - case 0x10000830: return (u16)counters[1].hold; + // 0x10000800 - 0x10000830 + case RCNT1_COUNT: return (u16)rcntRcount(1); + case RCNT1_MODE: return (u16)counters[1].modeval; + case RCNT1_TARGET: return (u16)counters[1].target; + case RCNT1_HOLD: return (u16)counters[1].hold; - case 0x10001000: return (u16)rcntRcount(2); - case 0x10001010: return (u16)counters[2].modeval; - case 0x10001020: return (u16)counters[2].target; + // 0x10001000 - 0x10001020 + case RCNT2_COUNT: return (u16)rcntRcount(2); + case RCNT2_MODE: return (u16)counters[2].modeval; + case RCNT2_TARGET: return (u16)counters[2].target; - case 0x10001800: return (u16)rcntRcount(3); - case 0x10001810: return (u16)counters[3].modeval; - case 0x10001820: return (u16)counters[3].target; + // 0x10001800 - 0x10001820 + case RCNT3_COUNT: return (u16)rcntRcount(3); + case RCNT3_MODE: return (u16)counters[3].modeval; + case RCNT3_TARGET: return (u16)counters[3].target; #endif break; @@ -386,7 +390,7 @@ mem32_t __fastcall hwRead32(u32 mem) { const char* regName = "Unknown"; - switch( mem ) + switch (mem) { case D2_CHCR: regName = "DMA2_CHCR"; break; case D2_MADR: regName = "DMA2_MADR"; break; @@ -426,8 +430,8 @@ mem32_t __fastcall hwRead32(u32 mem) __forceinline void __fastcall hwWrite32(u32 mem, u32 value) { - - if ((mem>=0x10002000) && (mem<0x10003000)) { //IPU regs + // Would ((mem >= IPU_CMD) && (mem <= IPU_TOP)) be better? -arcum42 + if ((mem >= IPU_CMD) && (mem < GIF_CTRL)) { //IPU regs ipuWrite32(mem,value); return; } @@ -439,25 +443,25 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) vif1Write32(mem, value); return; } - + switch (mem) { - case 0x10000000: rcntWcount(0, value); break; - case 0x10000010: rcntWmode(0, value); break; - case 0x10000020: rcntWtarget(0, value); break; - case 0x10000030: rcntWhold(0, value); break; + case RCNT0_COUNT: rcntWcount(0, value); break; + case RCNT0_MODE: rcntWmode(0, value); break; + case RCNT0_TARGET: rcntWtarget(0, value); break; + case RCNT0_TARGET: rcntWhold(0, value); break; - case 0x10000800: rcntWcount(1, value); break; - case 0x10000810: rcntWmode(1, value); break; - case 0x10000820: rcntWtarget(1, value); break; - case 0x10000830: rcntWhold(1, value); break; + case RCNT1_COUNT: rcntWcount(1, value); break; + case RCNT1_MODE: rcntWmode(1, value); break; + case RCNT1_TARGET: rcntWtarget(1, value); break; + case RCNT1_HOLD: rcntWhold(1, value); break; - case 0x10001000: rcntWcount(2, value); break; - case 0x10001010: rcntWmode(2, value); break; - case 0x10001020: rcntWtarget(2, value); break; + case RCNT2_COUNT: rcntWcount(2, value); break; + case RCNT2_MODE: rcntWmode(2, value); break; + case RCNT2_TARGET: rcntWtarget(2, value); break; - case 0x10001800: rcntWcount(3, value); break; - case 0x10001810: rcntWmode(3, value); break; - case 0x10001820: rcntWtarget(3, value); break; + case RCNT3_COUNT: rcntWcount(3, value); break; + case RCNT3_MODE: rcntWmode(3, value); break; + case RCNT3_TARGET: rcntWtarget(3, value); break; case GIF_CTRL: //Console::WriteLn("GIF_CTRL write %x", params value); @@ -492,150 +496,150 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) Console::WriteLn("Gifstat write value = %x", params value); return; - case 0x10008000: // dma0 - vif0 + case D0_CHCR: // dma0 - vif0 DMA_LOG("VIF0dma %lx", value); DmaExec(dmaVIF0, mem, value); break; - case 0x10009000: // dma1 - vif1 - chcr + case D1_CHCR: // dma1 - vif1 - chcr DMA_LOG("VIF1dma CHCR %lx", value); DmaExec(dmaVIF1, mem, value); break; #ifdef PCSX2_DEVBUILD - case 0x10009010: // dma1 - vif1 - madr + case D1_MADR: // dma1 - vif1 - madr HW_LOG("VIF1dma Madr %lx", value); psHu32(mem) = value;//dma1 madr break; - case 0x10009020: // dma1 - vif1 - qwc + case D1_QWC: // dma1 - vif1 - qwc HW_LOG("VIF1dma QWC %lx", value); psHu32(mem) = value;//dma1 qwc break; - case 0x10009030: // dma1 - vif1 - tadr + case D1_TADR: // dma1 - vif1 - tadr HW_LOG("VIF1dma TADR %lx", value); psHu32(mem) = value;//dma1 tadr break; - case 0x10009040: // dma1 - vif1 - asr0 + case D1_ASR0: // dma1 - vif1 - asr0 HW_LOG("VIF1dma ASR0 %lx", value); psHu32(mem) = value;//dma1 asr0 break; - case 0x10009050: // dma1 - vif1 - asr1 + case D1_ASR1: // dma1 - vif1 - asr1 HW_LOG("VIF1dma ASR1 %lx", value); psHu32(mem) = value;//dma1 asr1 break; - case 0x10009080: // dma1 - vif1 - sadr + case D1_SADR: // dma1 - vif1 - sadr HW_LOG("VIF1dma SADR %lx", value); psHu32(mem) = value;//dma1 sadr break; #endif - case 0x1000a000: // dma2 - gif + case D2_CHCR: // dma2 - gif DMA_LOG("0x%8.8x hwWrite32: GSdma %lx", cpuRegs.cycle, value); DmaExec(dmaGIF, mem, value); break; #ifdef PCSX2_DEVBUILD - case 0x1000a010: + case D2_MADR: psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write DMA2_MADR 32bit at %x with value %x",mem,value); break; - case 0x1000a020: + case D2_QWC: psHu32(mem) = value;//dma2 qwc HW_LOG("Hardware write DMA2_QWC 32bit at %x with value %x",mem,value); break; - case 0x1000a030: + case D2_TADR: psHu32(mem) = value;//dma2 taddr HW_LOG("Hardware write DMA2_TADDR 32bit at %x with value %x",mem,value); break; - case 0x1000a040: + case D2_ASR0: psHu32(mem) = value;//dma2 asr0 HW_LOG("Hardware write DMA2_ASR0 32bit at %x with value %x",mem,value); break; - case 0x1000a050: + case D2_ASR1: psHu32(mem) = value;//dma2 asr1 HW_LOG("Hardware write DMA2_ASR1 32bit at %x with value %x",mem,value); break; - case 0x1000a080: + case D2_SADR: psHu32(mem) = value;//dma2 saddr HW_LOG("Hardware write DMA2_SADDR 32bit at %x with value %x",mem,value); break; #endif - case 0x1000b000: // dma3 - fromIPU + case D3_CHCR: // dma3 - fromIPU DMA_LOG("IPU0dma %lx", value); DmaExec(dmaIPU0, mem, value); break; #ifdef PCSX2_DEVBUILD - case 0x1000b010: + case D3_MADR: psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write IPU0DMA_MADR 32bit at %x with value %x",mem,value); break; - case 0x1000b020: + case D3_QWC: psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write IPU0DMA_QWC 32bit at %x with value %x",mem,value); break; - case 0x1000b030: + case D3_TADR: psHu32(mem) = value;//dma2 tadr HW_LOG("Hardware write IPU0DMA_TADR 32bit at %x with value %x",mem,value); break; - case 0x1000b080: + case D3_SADR: psHu32(mem) = value;//dma2 saddr HW_LOG("Hardware write IPU0DMA_SADDR 32bit at %x with value %x",mem,value); break; #endif - case 0x1000b400: // dma4 - toIPU + case D4_CHCR: // dma4 - toIPU DMA_LOG("IPU1dma %lx", value); DmaExec(dmaIPU1, mem, value); break; #ifdef PCSX2_DEVBUILD - case 0x1000b410: + case D4_MADR: psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write IPU1DMA_MADR 32bit at %x with value %x",mem,value); break; - case 0x1000b420: + case D4_QWC: psHu32(mem) = value;//dma2 madr HW_LOG("Hardware write IPU1DMA_QWC 32bit at %x with value %x",mem,value); break; - case 0x1000b430: + case D4_TADR: psHu32(mem) = value;//dma2 tadr HW_LOG("Hardware write IPU1DMA_TADR 32bit at %x with value %x",mem,value); break; - case 0x1000b480: + case D4_SADR: psHu32(mem) = value;//dma2 saddr HW_LOG("Hardware write IPU1DMA_SADDR 32bit at %x with value %x",mem,value); break; #endif - case 0x1000c000: // dma5 - sif0 + case D5_CHCR: // dma5 - sif0 DMA_LOG("SIF0dma %lx", value); DmaExec(dmaSIF0, mem, value); break; - case 0x1000c400: // dma6 - sif1 + case D6_CHCR: // dma6 - sif1 DMA_LOG("SIF1dma %lx", value); DmaExec(dmaSIF1, mem, value); break; #ifdef PCSX2_DEVBUILD - case 0x1000c420: // dma6 - sif1 - qwc + case D6_QWC: // dma6 - sif1 - qwc HW_LOG("SIF1dma QWC = %lx", value); psHu32(mem) = value; break; @@ -645,12 +649,12 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) psHu32(mem) = value; break; #endif - case 0x1000c800: // dma7 - sif2 + case D7_CHCR: // dma7 - sif2 DMA_LOG("SIF2dma %lx", value); DmaExec(dmaSIF2, mem, value); break; - case 0x1000d000: // dma8 - fromSPR + case D8_CHCR: // dma8 - fromSPR DMA_LOG("fromSPRdma %lx", value); DmaExec(dmaSPR0, mem, value); break; @@ -660,12 +664,12 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) DmaExec(dmaSPR1, mem, value); break; - case 0x1000e000: // DMAC_CTRL + case DMAC_CTRL: // DMAC_CTRL HW_LOG("DMAC_CTRL Write 32bit %x", value); psHu32(0xe000) = value; break; - case 0x1000e010: // DMAC_STAT + case DMAC_STAT: // DMAC_STAT HW_LOG("DMAC_STAT Write 32bit %x", value); psHu16(0xe010)&= ~(value & 0xffff); // clear on 1 psHu16(0xe012) ^= (u16)(value >> 16); @@ -673,12 +677,12 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) cpuTestDMACInts(); break; - case 0x1000f000: // INTC_STAT + case INTC_STAT: // INTC_STAT HW_LOG("INTC_STAT Write 32bit %x", value); psHu32(0xf000)&=~value; break; - case 0x1000f010: // INTC_MASK + case INTC_MASK: // INTC_MASK HW_LOG("INTC_MASK Write 32bit %x", value); psHu32(0xf010) ^= (u16)value; cpuTestINTCInts(); @@ -694,7 +698,7 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) psHu32(mem) = value; break; - case 0x1000f590: // DMAC_ENABLEW + case DMAC_ENABLEW: // DMAC_ENABLEW HW_LOG("DMAC_ENABLEW Write 32bit %lx", value); psHu32(0xf590) = value; psHu32(0xf520) = value; @@ -704,15 +708,15 @@ __forceinline void __fastcall hwWrite32(u32 mem, u32 value) psHu32(mem) = value; break; - case 0x1000f220: + case SBUS_F220: psHu32(mem) |= value; break; - case 0x1000f230: + case SBUS_SMFLG: psHu32(mem) &= ~value; break; - case 0x1000f240: + case SBUS_F240: if(!(value & 0x100)) psHu32(mem) &= ~0x100; else diff --git a/pcsx2/Hw.h b/pcsx2/Hw.h index 97a981c82b..996cbbc3a8 100644 --- a/pcsx2/Hw.h +++ b/pcsx2/Hw.h @@ -64,177 +64,183 @@ struct DMACh { }; // HW defines +enum HWaddress +{ + RCNT0_COUNT = 0x10000000, + RCNT0_MODE = 0x10000010, + RCNT0_TARGET = 0x10000020, + RCNT0_HOLD = 0x10000030, -#define RCNT0_COUNT 0x10000000 -#define RCNT0_MODE 0x10000010 -#define RCNT0_TARGET 0x10000020 -#define RCNT0_HOLD 0x10000030 + RCNT1_COUNT = 0x10000800, + RCNT1_MODE = 0x10000810, + RCNT1_TARGET = 0x10000820, + RCNT1_HOLD = 0x10000830, -#define RCNT1_COUNT 0x10000800 -#define RCNT1_MODE 0x10000810 -#define RCNT1_TARGET 0x10000820 -#define RCNT1_HOLD 0x10000830 + RCNT2_COUNT = 0x10001000, + RCNT2_MODE = 0x10001010, + RCNT2_TARGET = 0x10001020, -#define RCNT2_COUNT 0x10001000 -#define RCNT2_MODE 0x10001010 -#define RCNT2_TARGET 0x10001020 + RCNT3_COUNT = 0x10001800, + RCNT3_MODE = 0x10001810, + RCNT3_TARGET = 0x10001820, -#define RCNT3_COUNT 0x10001800 -#define RCNT3_MODE 0x10001810 -#define RCNT3_TARGET 0x10001820 + IPU_CMD = 0x10002000, + IPU_CTRL = 0x10002010, + IPU_BP = 0x10002020, + IPU_TOP = 0x10002030, -#define IPU_CMD 0x10002000 -#define IPU_CTRL 0x10002010 -#define IPU_BP 0x10002020 -#define IPU_TOP 0x10002030 + GIF_CTRL = 0x10003000, + GIF_MODE = 0x10003010, + GIF_STAT = 0x10003020, + GIF_TAG0 = 0x10003040, + GIF_TAG1 = 0x10003050, + GIF_TAG2 = 0x10003060, + GIF_TAG3 = 0x10003070, + GIF_CNT = 0x10003080, + GIF_P3CNT = 0x10003090, + GIF_P3TAG = 0x100030A0, + GIF_FIFO = 0x10006000, -#define GIF_CTRL 0x10003000 -#define GIF_MODE 0x10003010 -#define GIF_STAT 0x10003020 -#define GIF_TAG0 0x10003040 -#define GIF_TAG1 0x10003050 -#define GIF_TAG2 0x10003060 -#define GIF_TAG3 0x10003070 -#define GIF_CNT 0x10003080 -#define GIF_P3CNT 0x10003090 -#define GIF_P3TAG 0x100030A0 - -#define GIF_FIFO 0x10006000 - -#define IPUout_FIFO 0x10007000 -#define IPUin_FIFO 0x10007010 + IPUout_FIFO = 0x10007000, + IPUin_FIFO = 0x10007010, //VIF0 -#define D0_CHCR 0x10008000 -#define D0_MADR 0x10008010 -#define D0_QWC 0x10008020 + D0_CHCR = 0x10008000, + D0_MADR = 0x10008010, + D0_QWC = 0x10008020, //VIF1 -#define D1_CHCR 0x10009000 -#define D1_MADR 0x10009010 -#define D1_QWC 0x10009020 -#define D1_TADR 0x10009030 -#define D1_ASR0 0x10009040 -#define D1_ASR1 0x10009050 -#define D1_SADR 0x10009080 + D1_CHCR = 0x10009000, + D1_MADR = 0x10009010, + D1_QWC = 0x10009020, + D1_TADR = 0x10009030, + D1_ASR0 = 0x10009040, + D1_ASR1 = 0x10009050, + D1_SADR = 0x10009080, //GS -#define D2_CHCR 0x1000A000 -#define D2_MADR 0x1000A010 -#define D2_QWC 0x1000A020 -#define D2_TADR 0x1000A030 -#define D2_ASR0 0x1000A040 -#define D2_ASR1 0x1000A050 -#define D2_SADR 0x1000A080 + D2_CHCR = 0x1000A000, + D2_MADR = 0x1000A010, + D2_QWC = 0x1000A020, + D2_TADR = 0x1000A030, + D2_ASR0 = 0x1000A040, + D2_ASR1 = 0x1000A050, + D2_SADR = 0x1000A080, //fromIPU -#define D3_CHCR 0x1000B000 -#define D3_MADR 0x1000B010 -#define D3_QWC 0x1000B020 -#define D3_TADR 0x1000B030 -#define D3_SADR 0x1000B080 + D3_CHCR = 0x1000B000, + D3_MADR = 0x1000B010, + D3_QWC = 0x1000B020, + D3_TADR = 0x1000B030, + D3_SADR = 0x1000B080, //toIPU -#define D4_CHCR 0x1000B400 -#define D4_MADR 0x1000B410 -#define D4_QWC 0x1000B420 -#define D4_TADR 0x1000B430 -#define D4_SADR 0x1000B480 + D4_CHCR = 0x1000B400, + D4_MADR = 0x1000B410, + D4_QWC = 0x1000B420, + D4_TADR = 0x1000B430, + D4_SADR = 0x1000B480, //SIF0 -#define D5_CHCR 0x1000C000 -#define D5_MADR 0x1000C010 -#define D5_QWC 0x1000C020 + D5_CHCR = 0x1000C000, + D5_MADR = 0x1000C010, + D5_QWC = 0x1000C020, //SIF1 -#define D6_CHCR 0x1000C400 -#define D6_MADR 0x1000C410 -#define D6_QWC 0x1000C420 + D6_CHCR = 0x1000C400, + D6_MADR = 0x1000C410, + D6_QWC = 0x1000C420, //SIF2 -#define D7_CHCR 0x1000C800 -#define D7_MADR 0x1000C810 -#define D7_QWC 0x1000C820 + D7_CHCR = 0x1000C800, + D7_MADR = 0x1000C810, + D7_QWC = 0x1000C820, //fromSPR -#define D8_CHCR 0x1000D000 -#define D8_MADR 0x1000D010 -#define D8_QWC 0x1000D020 -#define D8_SADR 0x1000D080 + D8_CHCR = 0x1000D000, + D8_MADR = 0x1000D010, + D8_QWC = 0x1000D020, + D8_SADR = 0x1000D080, + DMAC_CTRL = 0x1000E000, + DMAC_STAT = 0x1000E010, + DMAC_PCR = 0x1000E020, + DMAC_SQWC = 0x1000E030, + DMAC_RBSR = 0x1000E040, + DMAC_RBOR = 0x1000E050, + DMAC_STADR = 0x1000E060, -#define DMAC_CTRL 0x1000E000 -#define DMAC_STAT 0x1000E010 -#define DMAC_PCR 0x1000E020 -#define DMAC_SQWC 0x1000E030 -#define DMAC_RBSR 0x1000E040 -#define DMAC_RBOR 0x1000E050 -#define DMAC_STADR 0x1000E060 + INTC_STAT = 0x1000F000, + INTC_MASK = 0x1000F010, -#define INTC_STAT 0x1000F000 -#define INTC_MASK 0x1000F010 + SBUS_F220 = 0x1000F220, + SBUS_SMFLG = 0x1000F230, + SBUS_F240 = 0x1000F240, -#define SBUS_F220 0x1000F220 -#define SBUS_SMFLG 0x1000F230 -#define SBUS_F240 0x1000F240 + DMAC_ENABLER = 0x1000F520, + DMAC_ENABLEW = 0x1000F590, -#define DMAC_ENABLER 0x1000F520 -#define DMAC_ENABLEW 0x1000F590 + GS_PMODE = 0x12000000, + GS_SMODE1 = 0x12000010, + GS_SMODE2 = 0x12000020, + GS_SRFSH = 0x12000030, + GS_SYNCH1 = 0x12000040, + GS_SYNCH2 = 0x12000050, + GS_SYNCV = 0x12000060, + GS_DISPFB1 = 0x12000070, + GS_DISPLAY1 = 0x12000080, + GS_DISPFB2 = 0x12000090, + GS_DISPLAY2 = 0x120000A0, + GS_EXTBUF = 0x120000B0, + GS_EXTDATA = 0x120000C0, + GS_EXTWRITE = 0x120000D0, + GS_BGCOLOR = 0x120000E0, + GS_CSR = 0x12001000, + GS_IMR = 0x12001010, + GS_BUSDIR = 0x12001040, + GS_SIGLBLID = 0x12001080 +}; -#define SBFLG_IOPALIVE 0x10000 -#define SBFLG_IOPSYNC 0x40000 - -#define GS_PMODE 0x12000000 -#define GS_SMODE1 0x12000010 -#define GS_SMODE2 0x12000020 -#define GS_SRFSH 0x12000030 -#define GS_SYNCH1 0x12000040 -#define GS_SYNCH2 0x12000050 -#define GS_SYNCV 0x12000060 -#define GS_DISPFB1 0x12000070 -#define GS_DISPLAY1 0x12000080 -#define GS_DISPFB2 0x12000090 -#define GS_DISPLAY2 0x120000A0 -#define GS_EXTBUF 0x120000B0 -#define GS_EXTDATA 0x120000C0 -#define GS_EXTWRITE 0x120000D0 -#define GS_BGCOLOR 0x120000E0 -#define GS_CSR 0x12001000 -#define GS_IMR 0x12001010 -#define GS_BUSDIR 0x12001040 -#define GS_SIGLBLID 0x12001080 - -#define INTC_GS 0 -#define INTC_SBUS 1 -#define INTC_VBLANK_S 2 -#define INTC_VBLANK_E 3 -#define INTC_VIF0 4 -#define INTC_VIF1 5 -#define INTC_VU0 6 -#define INTC_VU1 7 -#define INTC_IPU 8 -#define INTC_TIM0 9 -#define INTC_TIM1 10 -#define INTC_TIM2 11 -#define INTC_TIM3 12 +#define SBFLG_IOPALIVE 0x10000 +#define SBFLG_IOPSYNC 0x40000 +enum INTCIrqs +{ + INTC_GS = 0, + INTC_SBUS, + INTC_VBLANK_S, + INTC_VBLANK_E, + INTC_VIF0, + INTC_VIF1, + INTC_VU0, + INTC_VU1, + INTC_IPU, + INTC_TIM0, + INTC_TIM1, + INTC_TIM2, + INTC_TIM3, +}; + #define DMAC_STAT_SIS (1<<13) // stall condition #define DMAC_STAT_MEIS (1<<14) // mfifo empty #define DMAC_STAT_BEIS (1<<15) // bus error #define DMAC_STAT_SIM (1<<29) // stall mask #define DMAC_STAT_MEIM (1<<30) // mfifo mask -#define DMAC_VIF0 0 -#define DMAC_VIF1 1 -#define DMAC_GIF 2 -#define DMAC_FROM_IPU 3 -#define DMAC_TO_IPU 4 -#define DMAC_SIF0 5 -#define DMAC_SIF1 6 -#define DMAC_SIF2 7 -#define DMAC_FROM_SPR 8 -#define DMAC_TO_SPR 9 -#define DMAC_ERROR 15 +enum DMACIrqs +{ + DMAC_VIF0 = 0, + DMAC_VIF1, + DMAC_GIF, + DMAC_FROM_IPU, + DMAC_TO_IPU, + DMAC_SIF0, + DMAC_SIF1, + DMAC_SIF2, + DMAC_FROM_SPR, + DMAC_TO_SPR, + DMAC_ERROR = 15, +}; #define VIF0_STAT_VPS_W (1) #define VIF0_STAT_VPS_D (2) @@ -266,20 +272,39 @@ struct DMACh { #define VIF1_STAT_ER1 (1<<13) #define VIF1_STAT_FDR (1<<23) +#define VIF_STAT_VPS_W (1) +#define VIF_STAT_VPS_D (2) +#define VIF_STAT_VPS_T (3) +#define VIF_STAT_VPS (3) +#define VIF_STAT_VEW (1<<2) +#define VIF_STAT_VGW (1<<3) +#define VIF_STAT_MRK (1<<6) +#define VIF_STAT_DBF (1<<7) +#define VIF_STAT_VSS (1<<8) +#define VIF_STAT_VFS (1<<9) +#define VIF_STAT_VIS (1<<10) +#define VIF_STAT_INT (1<<11) +#define VIF_STAT_ER0 (1<<12) +#define VIF_STAT_ER1 (1<<13) +#define VIF_STAT_FDR (1<<23) + //DMA interrupts & masks -#define BEISintr (0x8000) -#define VIF0intr (0x10001) -#define VIF1intr (0x20002) -#define GIFintr (0x40004) -#define IPU0intr (0x80008) -#define IPU1intr (0x100010) -#define SIF0intr (0x200020) -#define SIF1intr (0x400040) -#define SIF2intr (0x800080) -#define SPR0intr (0x1000100) -#define SPR1intr (0x2000200) -#define SISintr (0x20002000) -#define MEISintr (0x40004000) +enum DMAInter +{ + BEISintr = 0x8000, + VIF0intr = 0x10001, + VIF1intr = 0x20002, + GIFintr = 0x40004, + IPU0intr = 0x80008, + IPU1intr = 0x100010, + SIF0intr = 0x200020, + SIF1intr =0x400040, + SIF2intr = 0x800080, + SPR0intr = 0x1000100, + SPR1intr = 0x2000200, + SISintr = 0x20002000, + MEISintr = 0x40004000 +}; #ifdef PCSX2_VIRTUAL_MEM @@ -344,10 +369,10 @@ static __forceinline u32 *_dmaGetAddr(DMACh *dma, u32 addr, u32 num) if (ptr == NULL) { // DMA Error - psHu32(DMAC_STAT)|= 1<<15; /* BUS error */ + psHu32(DMAC_STAT) |= DMAC_STAT_BEIS; /* BUS error */ // DMA End - psHu32(DMAC_STAT)|= 1<chcr &= ~0x100; } @@ -413,8 +438,8 @@ void hwDmacIrq(int n); int hwMFIFORead(u32 addr, u8 *data, u32 size); int hwMFIFOWrite(u32 addr, u8 *data, u32 size); -int hwDmacSrcChainWithStack(DMACh *dma, int id); -int hwDmacSrcChain(DMACh *dma, int id); +bool hwDmacSrcChainWithStack(DMACh *dma, int id); +bool hwDmacSrcChain(DMACh *dma, int id); int hwConstRead8 (u32 x86reg, u32 mem, u32 sign); int hwConstRead16(u32 x86reg, u32 mem, u32 sign); diff --git a/pcsx2/HwWrite.cpp b/pcsx2/HwWrite.cpp index 4a389a4061..9c56df9e6d 100644 --- a/pcsx2/HwWrite.cpp +++ b/pcsx2/HwWrite.cpp @@ -199,7 +199,7 @@ void hwWrite8(u32 mem, u8 value) { DevCon::Notice("8 bit VIF1 DMA Start while DMAC Disabled\n"); QueuedDMA |= 0x2; } - if(value & 0x1) vif1.done = 0; //This must be done here! some games (ala Crash of the Titans) pause the dma to start MFIFO + if(value & 0x1) vif1.done = false; //This must be done here! some games (ala Crash of the Titans) pause the dma to start MFIFO DmaExec8(dmaVIF1, mem, value); break; @@ -356,7 +356,7 @@ __forceinline void hwWrite16(u32 mem, u16 value) DevCon::Notice("16 bit VIF1 DMA Start while DMAC Disabled\n"); QueuedDMA |= 0x2; } - if(value & 0x100) vif1.done = 0; //This must be done here! some games (ala Crash of the Titans) pause the dma to start MFIFO + if(value & 0x100) vif1.done = false; //This must be done here! some games (ala Crash of the Titans) pause the dma to start MFIFO DmaExec16(dmaVIF1, mem, value); break; @@ -859,7 +859,7 @@ void __fastcall hwWrite32_generic( u32 mem, u32 value ) } if(value & 0x100) { - vif1.done = 0; //This must be done here! some games (ala Crash of the Titans) pause the dma to start MFIFO + vif1.done = false; //This must be done here! some games (ala Crash of the Titans) pause the dma to start MFIFO } DmaExec(dmaVIF1, mem, value); return; diff --git a/pcsx2/Memory.h b/pcsx2/Memory.h index 7acd4c3fb9..18ff8f20fc 100644 --- a/pcsx2/Memory.h +++ b/pcsx2/Memory.h @@ -146,15 +146,6 @@ extern u8 g_RealGSMem[Ps2MemSize::GSregs]; #define psSu32(mem) (*(u32*)&PS2MEM_SCRATCH[(mem) & 0x3fff]) #define psSu64(mem) (*(u64*)&PS2MEM_SCRATCH[(mem) & 0x3fff]) -//#define PSMs8(mem) (*(s8 *)PSM(mem)) -//#define PSMs16(mem) (*(s16*)PSM(mem)) -//#define PSMs32(mem) (*(s32*)PSM(mem)) -//#define PSMs64(mem) (*(s64*)PSM(mem)) -//#define PSMu8(mem) (*(u8 *)PSM(mem)) -//#define PSMu16(mem) (*(u16*)PSM(mem)) -//#define PSMu32(mem) (*(u32*)PSM(mem)) -//#define PSMu64(mem) (*(u64*)PSM(mem)) - extern void memAlloc(); extern void memReset(); // clears PS2 ram and loads the bios. Throws Exception::FileNotFound on error. extern void memShutdown(); diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index da8bd88aaf..56e399dbc6 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -37,7 +37,7 @@ PCSX2_ALIGNED16(u32 g_vifCol1[4]); extern int g_vifCycles; u16 vifqwc = 0; -bool mfifodmairq = FALSE; +bool mfifodmairq = false; enum UnpackOffset { @@ -357,7 +357,7 @@ static __forceinline int mfifoVIF1rbTransfer() src = (u32*)PSM(vif1ch->madr); if (src == NULL) return -1; - if (vif1.vifstalled == 1) + if (vif1.vifstalled) ret = VIF1transfer(src + vif1.irqoffset, s1 - vif1.irqoffset, 0); else ret = VIF1transfer(src, s1, 0); @@ -379,7 +379,7 @@ static __forceinline int mfifoVIF1rbTransfer() src = (u32*)PSM(vif1ch->madr); if (src == NULL) return -1; - if (vif1.vifstalled == 1) + if (vif1.vifstalled) ret = VIF1transfer(src + vif1.irqoffset, mfifoqwc * 4 - vif1.irqoffset, 0); else ret = VIF1transfer(src, mfifoqwc << 2, 0); @@ -395,7 +395,7 @@ static __forceinline int mfifo_VIF1chain() int ret; /* Is QWC = 0? if so there is nothing to transfer */ - if (vif1ch->qwc == 0 && vif1.vifstalled == 0) + if ((vif1ch->qwc == 0) && (!vif1.vifstalled)) { vif1.inprogress = 0; return 0; @@ -414,7 +414,7 @@ static __forceinline int mfifo_VIF1chain() SPR_LOG("Non-MFIFO Location"); if (pMem == NULL) return -1; - if (vif1.vifstalled == 1) + if (vif1.vifstalled) ret = VIF1transfer(pMem + vif1.irqoffset, vif1ch->qwc * 4 - vif1.irqoffset, 0); else ret = VIF1transfer(pMem, vif1ch->qwc << 2, 0); @@ -448,7 +448,7 @@ void mfifoVIF1transfer(int qwc) return; } - mfifodmairq = FALSE; //Clear any previous TIE interrupt + mfifodmairq = false; //Clear any previous TIE interrupt if (vif1ch->qwc == 0) { @@ -456,7 +456,7 @@ void mfifoVIF1transfer(int qwc) if (vif1ch->chcr & 0x40) { - if (vif1.stallontag == 1) + if (vif1.stallontag) ret = VIF1transfer(ptag + (2 + vif1.irqoffset), 2 - vif1.irqoffset, 1); //Transfer Tag on Stall else ret = VIF1transfer(ptag + 2, 2, 1); //Transfer Tag @@ -464,7 +464,7 @@ void mfifoVIF1transfer(int qwc) if (ret == -2) { VIF_LOG("MFIFO Stallon tag"); - vif1.stallontag = 1; + vif1.stallontag = true; return; //IRQ set by VIFTransfer } } @@ -483,13 +483,13 @@ void mfifoVIF1transfer(int qwc) { case 0: // Refe - Transfer Packet According to ADDR field vif1ch->tadr = psHu32(DMAC_RBOR) + ((vif1ch->tadr + 16) & psHu32(DMAC_RBSR)); - vif1.done = 1; //End Transfer + vif1.done = true; //End Transfer break; case 1: // CNT - Transfer QWC following the tag. vif1ch->madr = psHu32(DMAC_RBOR) + ((vif1ch->tadr + 16) & psHu32(DMAC_RBSR)); //Set MADR to QW after Tag vif1ch->tadr = psHu32(DMAC_RBOR) + ((vif1ch->madr + (vif1ch->qwc << 4)) & psHu32(DMAC_RBSR)); //Set TADR to QW following the data - vif1.done = 0; + vif1.done = false; break; case 2: // Next - Transfer QWC following tag. TADR = ADDR @@ -498,28 +498,28 @@ void mfifoVIF1transfer(int qwc) vif1ch->madr = psHu32(DMAC_RBOR) + ((vif1ch->tadr + 16) & psHu32(DMAC_RBSR)); //Set MADR to QW following the tag vif1ch->tadr = temp; //Copy temporarily stored ADDR to Tag if ((temp & psHu32(DMAC_RBSR)) != psHu32(DMAC_RBOR)) Console::WriteLn("Next tag = %x outside ring %x size %x", params temp, psHu32(DMAC_RBOR), psHu32(DMAC_RBSR)); - vif1.done = 0; + vif1.done = false; break; } case 3: // Ref - Transfer QWC from ADDR field case 4: // Refs - Transfer QWC from ADDR field (Stall Control) vif1ch->tadr = psHu32(DMAC_RBOR) + ((vif1ch->tadr + 16) & psHu32(DMAC_RBSR)); //Set TADR to next tag - vif1.done = 0; + vif1.done = false; break; case 7: // End - Transfer QWC following the tag vif1ch->madr = psHu32(DMAC_RBOR) + ((vif1ch->tadr + 16) & psHu32(DMAC_RBSR)); //Set MADR to data following the tag vif1ch->tadr = psHu32(DMAC_RBOR) + ((vif1ch->madr + (vif1ch->qwc << 4)) & psHu32(DMAC_RBSR)); //Set TADR to QW following the data - vif1.done = 1; //End Transfer + vif1.done = true; //End Transfer break; } if ((vif1ch->chcr & 0x80) && (ptag[0] >> 31)) { VIF_LOG("dmaIrq Set"); - vif1.done = 1; - mfifodmairq = TRUE; //Let the handler know we have prematurely ended MFIFO + vif1.done = true; + mfifodmairq = true; //Let the handler know we have prematurely ended MFIFO } } @@ -547,7 +547,7 @@ void vifMFIFOInterrupt() } } - if (vif1.done != 1 || vif1.inprogress & 1) + if (!vif1.done || vif1.inprogress & 1) { if (vifqwc <= 0) { diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 0c67d44a01..16d33d1b08 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -59,8 +59,8 @@ static const unsigned int VIF0dmanum = 0; static const unsigned int VIF1dmanum = 1; int g_vifCycles = 0; -bool path3hack = FALSE; -bool Path3transfer = FALSE; +bool path3hack = false; +bool Path3transfer = false; u32 splittransfer[4]; u32 splitptr = 0; @@ -1081,8 +1081,8 @@ int VIF0transfer(u32 *data, int size, int istag) int transferred = vif0.vifstalled ? vif0.irqoffset : 0; // irqoffset necessary to add up the right qws, or else will spin (spiderman) VIF_LOG("VIF0transfer: size %x (vif0.cmd %x)", size, vif0.cmd); - vif0.stallontag = 0; - vif0.vifstalled = 0; + vif0.stallontag = false; + vif0.vifstalled = false; vif0.vifpacketsize = size; while (vif0.vifpacketsize > 0) @@ -1144,7 +1144,7 @@ int VIF0transfer(u32 *data, int size, int istag) ++vif0.irq; - if (istag && vif0.tag.size <= vif0.vifpacketsize) vif0.stallontag = 1; + if (istag && vif0.tag.size <= vif0.vifpacketsize) vif0.stallontag = true; if (vif0.tag.size == 0) break; } @@ -1158,7 +1158,7 @@ int VIF0transfer(u32 *data, int size, int istag) if (vif0.irq && (vif0.tag.size == 0)) { - vif0.vifstalled = 1; + vif0.vifstalled = true; if (((vif0Regs->code >> 24) & 0x7f) != 0x7)vif0Regs->stat |= VIF0_STAT_VIS; //else Console::WriteLn("VIF0 IRQ on MARK"); @@ -1194,7 +1194,7 @@ int _VIF0chain() u32 *pMem; u32 ret; - if (vif0ch->qwc == 0 && vif0.vifstalled == 0) return 0; + if ((vif0ch->qwc == 0) && !vif0.vifstalled) return 0; pMem = (u32*)dmaGetAddr(vif0ch->madr); if (pMem == NULL) return -1; @@ -1207,7 +1207,7 @@ int _VIF0chain() return ret; } -int _chainVIF0() +bool _chainVIF0() { int id, ret; @@ -1232,8 +1232,10 @@ int _chainVIF0() if (vif0ch->chcr & 0x40) { - if (vif0.vifstalled == 1) ret = VIF0transfer(vif0ptag + (2 + vif0.irqoffset), 2 - vif0.irqoffset, 1); //Transfer Tag on stall - else ret = VIF0transfer(vif0ptag + 2, 2, 1); //Transfer Tag + if (vif0.vifstalled) + ret = VIF0transfer(vif0ptag + (2 + vif0.irqoffset), 2 - vif0.irqoffset, 1); //Transfer Tag on stall + else + ret = VIF0transfer(vif0ptag + 2, 2, 1); //Transfer Tag if (ret == -1) return -1; //There has been an error if (ret == -2) return -2; //IRQ set by VIFTransfer } @@ -1249,7 +1251,7 @@ int _chainVIF0() { VIF_LOG("dmaIrq Set\n"); - vif0.done = 1; + vif0.done = true; return vif0.done; //End Transfer } return vif0.done; //Return Done @@ -1274,7 +1276,7 @@ void vif0Interrupt() } if (vif0ch->qwc > 0 || vif0.irqoffset > 0) { - if (vif0.stallontag == 1) + if (vif0.stallontag) _chainVIF0(); else _VIF0chain(); @@ -1286,7 +1288,7 @@ void vif0Interrupt() if ((vif0ch->chcr & 0x100) == 0) Console::WriteLn("Vif0 running when CHCR = %x", params vif0ch->chcr); - if ((vif0ch->chcr & 0x4) && (vif0.done == 0) && (vif0.vifstalled == 0)) + if ((vif0ch->chcr & 0x4) && (!vif0.done) && (!vif0.vifstalled)) { if (!(psHu32(DMAC_CTRL) & 0x1)) @@ -1363,16 +1365,16 @@ void dmaVIF0() if (_VIF0chain() == -2) { Console::WriteLn("Stall on normal %x", params vif0Regs->stat); - vif0.vifstalled = 1; + vif0.vifstalled = true; return; } - vif0.done = 1; + vif0.done = true; CPU_INT(0, g_vifCycles); return; } // Chain Mode - vif0.done = 0; + vif0.done = false; CPU_INT(0, 0); } @@ -1400,7 +1402,7 @@ void vif0Write32(u32 mem, u32 value) cpuRegs.interrupt &= ~1; //Stop all vif0 DMA's psHu64(0x10004000) = 0; psHu64(0x10004008) = 0; - vif0.done = 1; + vif0.done = true; vif0Regs->err = 0; vif0Regs->stat &= ~(0xF000000 | VIF0_STAT_INT | VIF0_STAT_VSS | VIF0_STAT_VIS | VIF0_STAT_VFS | VIF0_STAT_VPS); // FQC=0 } @@ -1411,7 +1413,7 @@ void vif0Write32(u32 mem, u32 value) cpuRegs.interrupt &= ~1; //Stop all vif0 DMA's vif0Regs->stat |= VIF0_STAT_VFS; vif0Regs->stat &= ~VIF0_STAT_VPS; - vif0.vifstalled = 1; + vif0.vifstalled = true; Console::WriteLn("vif0 force break"); } if (value & 0x4) @@ -1421,15 +1423,15 @@ void vif0Write32(u32 mem, u32 value) // just stoppin the VIF (linuz). vif0Regs->stat |= VIF0_STAT_VSS; vif0Regs->stat &= ~VIF0_STAT_VPS; - vif0.vifstalled = 1; + vif0.vifstalled = true; } if (value & 0x8) { - bool cancel = FALSE; + bool cancel = false; /* Cancel stall, first check if there is a stall to cancel, and then clear VIF0_STAT VSS|VFS|VIS|INT|ER0|ER1 bits */ if (vif0Regs->stat & (VIF0_STAT_VSS | VIF0_STAT_VIS | VIF0_STAT_VFS)) - cancel = TRUE; + cancel = true; vif0Regs->stat &= ~(VIF0_STAT_VSS | VIF0_STAT_VFS | VIF0_STAT_VIS | VIF0_STAT_INT | VIF0_STAT_ER0 | VIF0_STAT_ER1); @@ -1440,7 +1442,7 @@ void vif0Write32(u32 mem, u32 value) g_vifCycles = 0; // loop necessary for spiderman - if (vif0.stallontag == 1) + if (vif0.stallontag) _chainVIF0(); else _VIF0chain(); @@ -1490,7 +1492,7 @@ void vif0Reset() psHu64(0x10004000) = 0; psHu64(0x10004008) = 0; vif0Regs->stat &= ~VIF0_STAT_VPS; - vif0.done = 1; + vif0.done = true; vif0Regs->stat &= ~0xF000000; // FQC=0 } @@ -2027,8 +2029,8 @@ int VIF1transfer(u32 *data, int size, int istag) VIF_LOG("VIF1transfer: size %x (vif1.cmd %x)", size, vif1.cmd); vif1.irqoffset = 0; - vif1.vifstalled = 0; - vif1.stallontag = 0; + vif1.vifstalled = false; + vif1.stallontag = false; vif1.vifpacketsize = size; while (vif1.vifpacketsize > 0) @@ -2086,7 +2088,7 @@ int VIF1transfer(u32 *data, int size, int istag) ++vif1.irq; - if (istag && vif1.tag.size <= vif1.vifpacketsize) vif1.stallontag = 1; + if (istag && vif1.tag.size <= vif1.vifpacketsize) vif1.stallontag = true; if (vif1.tag.size == 0) break; } @@ -2100,7 +2102,7 @@ int VIF1transfer(u32 *data, int size, int istag) if (vif1.irq && vif1.cmd == 0) { - vif1.vifstalled = 1; + vif1.vifstalled = true; if (((vif1Regs->code >> 24) & 0x7f) != 0x7)vif1Regs->stat |= VIF1_STAT_VIS; // Note: commenting this out fixes WALL-E @@ -2145,7 +2147,7 @@ void vif1TransferFromMemory() { Console::WriteLn("Vif1 Tag BUSERR"); psHu32(DMAC_STAT) |= 1 << 15; //If yes, set BEIS (BUSERR) in DMAC_STAT register - vif1.done = 1; + vif1.done = true; vif1Regs->stat &= ~0x1f000000; vif1ch->qwc = 0; CPU_INT(1, 0); @@ -2221,7 +2223,7 @@ int _VIF1chain() return ret; } -int _chainVIF1() +bool _chainVIF1() { return vif1.done;//Return Done } @@ -2233,7 +2235,7 @@ __forceinline void vif1SetupTransfer() case 0: //Normal case 1: //Normal (From memory) vif1.inprogress = 1; - vif1.done = 1; + vif1.done = true; break; case 2: //Chain @@ -2278,7 +2280,7 @@ __forceinline void vif1SetupTransfer() if (vif1ch->chcr & 0x40) { - if (vif1.vifstalled == 1) + if (vif1.vifstalled) ret = VIF1transfer(vif1ptag + (2 + vif1.irqoffset), 2 - vif1.irqoffset, 1); //Transfer Tag on stall else ret = VIF1transfer(vif1ptag + 2, 2, 1); //Transfer Tag @@ -2297,7 +2299,7 @@ __forceinline void vif1SetupTransfer() { VIF_LOG("dmaIrq Set"); - vif1.done = 1; + vif1.done = true; return; //End Transfer } break; @@ -2327,16 +2329,16 @@ __forceinline void vif1Interrupt() } else if ((vif1ch->qwc > 0) || (vif1.irqoffset > 0)) { - if (vif1.stallontag == 1) + if (vif1.stallontag) vif1SetupTransfer(); else _VIF1chain();//CPU_INT(13, vif1ch->qwc * BIAS); } } - if (vif1.inprogress == 1) _VIF1chain(); + if (vif1.inprogress) _VIF1chain(); - if ((vif1.done == 0) || (vif1.inprogress == 1)) + if ((!vif1.done) || (vif1.inprogress)) { if (!(psHu32(DMAC_CTRL) & 0x1)) @@ -2351,7 +2353,7 @@ __forceinline void vif1Interrupt() return; } - if(vif1.vifstalled && vif1.irq) + if (vif1.vifstalled && vif1.irq) { CPU_INT(1, 0); return; //Dont want to end if vif is stalled. @@ -2413,7 +2415,7 @@ void dmaVIF1() } // Chain Mode - vif1.done = 0; + vif1.done = false; CPU_INT(1, 0); } @@ -2441,7 +2443,7 @@ void vif1Write32(u32 mem, u32 value) vif1ch->qwc = 0; //? psHu64(0x10005000) = 0; psHu64(0x10005008) = 0; - vif1.done = 1; + vif1.done = true; vif1Regs->err = 0; vif1.inprogress = 0; vif1Regs->stat &= ~(0x1F800000 | VIF1_STAT_INT | VIF1_STAT_VSS | VIF1_STAT_VIS | VIF1_STAT_VFS | VIF1_STAT_VPS); // FQC=0 @@ -2453,7 +2455,7 @@ void vif1Write32(u32 mem, u32 value) vif1Regs->stat |= VIF1_STAT_VFS; vif1Regs->stat &= ~VIF1_STAT_VPS; cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's - vif1.vifstalled = 1; + vif1.vifstalled = true; Console::WriteLn("vif1 force break"); } if (value & 0x4) @@ -2464,16 +2466,16 @@ void vif1Write32(u32 mem, u32 value) vif1Regs->stat |= VIF1_STAT_VSS; vif1Regs->stat &= ~VIF1_STAT_VPS; cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's - vif1.vifstalled = 1; + vif1.vifstalled = true; } if (value & 0x8) { - bool cancel = FALSE; + bool cancel = false; /* Cancel stall, first check if there is a stall to cancel, and then clear VIF1_STAT VSS|VFS|VIS|INT|ER0|ER1 bits */ if (vif1Regs->stat & (VIF1_STAT_VSS | VIF1_STAT_VIS | VIF1_STAT_VFS)) { - cancel = TRUE; + cancel = true; } vif1Regs->stat &= ~(VIF1_STAT_VSS | VIF1_STAT_VFS | VIF1_STAT_VIS | @@ -2531,8 +2533,8 @@ void vif1Write32(u32 mem, u32 value) else { vif1ch->qwc = 0; - vif1.vifstalled = 0; - vif1.done = 1; + vif1.vifstalled = false; + vif1.done = true; vif1Regs->stat &= ~0x1F000000; // FQC=0 } break; @@ -2570,7 +2572,7 @@ void vif1Reset() psHu64(0x10005000) = 0; psHu64(0x10005008) = 0; vif1Regs->stat &= ~VIF1_STAT_VPS; - vif1.done = 1; + vif1.done = true; cpuRegs.interrupt &= ~((1 << 1) | (1 << 10)); //Stop all vif1 DMA's vif1Regs->stat &= ~0x1F000000; // FQC=0 } diff --git a/pcsx2/VifDma.h b/pcsx2/VifDma.h index 7a1023a3a8..21d45e4a4e 100644 --- a/pcsx2/VifDma.h +++ b/pcsx2/VifDma.h @@ -35,13 +35,12 @@ struct vifStruct { int qwcalign; u8 usn; - // The next three should be boolean, and will be next time I break savestate compatability. --arcum42 - u8 done; - u8 vifstalled; - u8 stallontag; + bool done; + bool vifstalled; + bool stallontag; u8 irqoffset; // 32bit offset where next vif code is - u32 savedtag; // need this for backwards compat with save states + u32 savedtag; // need this for backwards compat with save states u32 vifpacketsize; u8 inprogress; u8 dmamode; From 5e87ea31274c54c6e5f3f273a425fa86303d16fb Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 15 Apr 2009 05:12:03 +0000 Subject: [PATCH 065/143] Fix compiler warning. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@980 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 16d33d1b08..6e594881fe 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -1207,7 +1207,7 @@ int _VIF0chain() return ret; } -bool _chainVIF0() +int _chainVIF0() { int id, ret; @@ -1252,9 +1252,9 @@ bool _chainVIF0() VIF_LOG("dmaIrq Set\n"); vif0.done = true; - return vif0.done; //End Transfer + return (vif0.done)?1:0; //End Transfer } - return vif0.done; //Return Done + return (vif0.done)?1:0; //Return Done } void vif0Interrupt() From af792b769473385bbda9e40c0a6a44ead1610ca1 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Wed, 15 Apr 2009 10:13:48 +0000 Subject: [PATCH 066/143] zeroRecs: -minor change microVU: -fixed a lot of various errors -partially implemented some clip flag stuff -partially implemented some branch/jump stuff git-svn-id: http://pcsx2.googlecode.com/svn/trunk@981 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUmicroLower.cpp | 4 +- pcsx2/x86/microVU.h | 6 +- pcsx2/x86/microVU_Alloc.h | 1 + pcsx2/x86/microVU_Alloc.inl | 4 +- pcsx2/x86/microVU_Analyze.inl | 6 +- pcsx2/x86/microVU_Compile.inl | 115 +++++++++++++++++----------------- pcsx2/x86/microVU_Execute.inl | 4 +- pcsx2/x86/microVU_Lower.inl | 56 +++++++++++------ pcsx2/x86/microVU_Misc.h | 23 ++++--- pcsx2/x86/microVU_Misc.inl | 84 ++++++++++++------------- pcsx2/x86/microVU_Upper.inl | 2 +- 11 files changed, 162 insertions(+), 143 deletions(-) diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index d057cd9791..65f2f04ab1 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -866,12 +866,10 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); break; case 9: // XW - SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); - if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - else SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); + SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xff); //WWWW if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 0493b38327..90096299de 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -64,8 +64,8 @@ public: blockList[listSize].x86ptrStart = x86ptrStart; } }*/ - microBlock* search(u32 pipelineState, microRegInfo* pState) { - if (pipelineState & 1) { // Needs Detailed Search (Exact Match of Pipeline State) + microBlock* search(/*u32 pipelineState,*/ microRegInfo* pState) { + /*if (pipelineState & 1) { // Needs Detailed Search (Exact Match of Pipeline State) for (int i = 0; i < listSize; i++) { if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo))) return &blockList[i]; } @@ -74,7 +74,7 @@ public: for (int i = 0; i < listSize; i++) { if (blockList[i].pipelineState == pipelineState) return &blockList[i]; } - } + }*/ return NULL; } void clearFast() { diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 37aec058aa..8a827eea3f 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -35,6 +35,7 @@ struct microRegInfo { u8 p; u8 r; u8 xgkick; + u8 needExactMatch; // This block needs an exact match of pipeline state }; struct microTempRegInfo { diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 0fb6a1c155..c256d71753 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -700,12 +700,12 @@ microVUt(void) mVUallocMFLAGb(int reg, int fInstance) { microVUt(void) mVUallocCFLAGa(int reg, int fInstance) { microVU* mVU = mVUx; - MOV32MtoR(reg, mVU->clipFlag[fInstance]); + MOV32MtoR(reg, (uptr)&mVU->clipFlag[fInstance]); } microVUt(void) mVUallocCFLAGb(int reg, int fInstance) { microVU* mVU = mVUx; - MOV32RtoM(mVU->clipFlag[fInstance], reg); + MOV32RtoM((uptr)&mVU->clipFlag[fInstance], reg); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 044a9b4baf..94f75d0f2f 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -247,8 +247,10 @@ microVUt(void) mVUanalyzeSflag(int It) { if (!It) { mVUinfo |= _isNOP; } else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from mVUinfo |= _swapOps; - if (mVUcount >= 4) { incPC2(-8); mVUinfo |= _isSflag; incPC2(8); } - //else { incPC2((mVUcount*-2)); mVUinfo |= _isSflag; incPC2(mVUcount*-2); } + if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); } + // Note: _isSflag is used for status flag optimizations. + // Do to stalls, it can only be set one instruction prior to the status flag read instruction + // if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior. } analyzeVIreg2(It, 1); } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 116d2eae9b..775a2aec5d 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -33,16 +33,22 @@ } \ } -#define branchCase(Xcmp) \ +#define branchCase(JMPcc) \ CMP16ItoM((uptr)mVU->branch, 0); \ - ajmp = Xcmp((uptr)0); \ + ajmp = JMPcc((uptr)0); \ break -#define branchCase2() { \ - incPC(-2); \ - MOV32ItoR(gprT1, (xPC + (2 * 8)) & ((vuIndex) ? 0x3fff:0xfff)); \ - mVUallocVIb(gprT1, _Ft_); \ - incPC(+2); \ +#define flagSetMacro(xFlag, pFlag, xF, yF, zF) { \ + yF += (mVUstall > 3) ? 3 : mVUstall; \ + if (yF > zF) { \ + pFlag += (yF-zF); \ + if (pFlag >= xFlag) pFlag = (xFlag-1); \ + zF++; \ + xF = (yF-zF); \ + zF = yF; \ + yF -= xF; \ + } \ + yF++; \ } #define startLoop() { mVUdebug1(); mVUstall = 0; memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); } @@ -85,61 +91,45 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { // Ensure last ~4+ instructions update mac flags int endPC = iPC; - int aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances - for (int i = mVUcount, int iX = 0; i > 0; i--, aCount++) { + u32 aCount = 1; // Amount of instructions needed to get 4 valid status/mac flag instances + for (int i = mVUcount, iX = 0; i > 0; i--, aCount++) { if (doStatus) { mVUinfo |= _doMac; iX++; if ((iX >= 4) || (aCount > 4)) { break; } } incPC2(-2); } // Status/Mac Flags Setup Code - int xStatus = 8; // Status Instance starts at #0 on every block ((8&3) == 0) - int xMac = 8; // Mac Instance starts at #0 on every block ((8&3) == 0) - int pStatus = 3; - int pMac = 3; - int yStatus = 0; + int xStatus = 8, xMac = 8, xClip = 8; // Flag Instances start at #0 on every block ((8&3) == 0) + int pStatus = 3, pMac = 3, pClip = 3; int xS = 0, yS = 1, zS = 0; int xM = 0, yM = 1, zM = 0; - int xCount = mVUcount; // Backup count + int xC = 0, yC = 1, zC = 0; + u32 xCount = mVUcount; // Backup count iPC = mVUstartPC; for (mVUcount = 0; mVUcount < xCount; mVUcount++) { if (((xCount - mVUcount) > aCount) && isFSSET) mVUstatusFlagOp(); // Don't Optimize out on the last ~4+ instructions - yS += (mVUstall > 3) ? 3 : mVUstall; - if (yS > zS) { - pStatus += (yS-zS); - if (pStatus >= xStatus) pStatus = (xStatus-1); - zS++; - xS = (yS-zS); - zS = yS; - yS -= xS; - } - yS++; - - yM += (mVUstall > 3) ? 3 : mVUstall; - if (yM > zM) { - pMac += (yM-zM); - if (pMac >= xMac) pMac = (xMac-1); - zM++; - xM = (yM-zM); - zM = yM; - yM -= xM; - } - yM++; + flagSetMacro(xStatus, pStatus, xS, yS, zS); // Handles _fvsinstances + flagSetMacro(xMac, pMac, xM, yM, zM); // Handles _fvminstances + flagSetMacro(xClip, pClip, xC, yC, zC); // Handles _fvcinstances mVUinfo |= (xStatus&3) << 12; // _fsInstance mVUinfo |= (xMac&3) << 10; // _fmInstance + mVUinfo |= (xClip&3) << 14; // _fcInstance + mVUinfo |= (pStatus&3) << 18; // _fvsInstance mVUinfo |= (pMac&3) << 16; // _fvmInstance + mVUinfo |= (pClip&3) << 20; // _fvcInstance if (doStatus||isFSSET||doDivFlag) { xStatus = (xStatus+1); } if (doMac) { xMac = (xMac+1); } + if (doClip) { xClip = (xClip+1); } incPC2(2); } mVUcount = xCount; // Restore count // Setup Last 4 instances of Status/Mac flags (needed for accurate block linking) iPC = endPC; - for (int i = 3, int j = 3, int ii = 1, int jj = 1; aCount > 0; ii++, jj++, aCount--) { + for (int i = 3, j = 3, ii = 1, jj = 1; aCount > 0; ii++, jj++, aCount--) { if ((doStatus||isFSSET||doDivFlag) && (i >= 0)) { for (; (ii > 0 && i >= 0); i--, ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; } } @@ -156,8 +146,8 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { // Recompiles Code for Proper Flags on Block Linkings microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) { - PUSHR(gprR); // Backup gprR - PUSHR(gprESP); // Backup gprESP + PUSH32R(gprR); // Backup gprR + PUSH32R(gprESP); // Backup gprESP MOV32RtoR(gprT1, getFlagReg1(bStatus[0])); MOV32RtoR(gprT2, getFlagReg1(bStatus[1])); @@ -184,8 +174,8 @@ microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) { OR32RtoR(gprF2, getFlagReg2(bMac[2])); OR32RtoR(gprF3, getFlagReg2(bMac[3])); - POPR(gprESP); // Restore gprESP - POPR(gprR); // Restore gprR + POP32R(gprESP); // Restore gprESP + POP32R(gprR); // Restore gprR } microVUt(void) mVUincCycles(int x) { @@ -245,15 +235,14 @@ microVUt(void) mVUdivSet() { // Recompiler //------------------------------------------------------------------ -microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, u8* x86ptrStart) { +microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { microVU* mVU = mVUx; - microBlock block; - u8* thisPtr = mVUcurProg.x86Ptr; + u8* thisPtr = mVUcurProg.x86ptr; iPC = startPC / 4; // Searches for Existing Compiled Block (if found, then returns; else, compile) - microBlock* pblock = mVUblock[iPC/2]->search(pipelineState, pState); - if (block) { return pblock->x86ptrStart; } + microBlock* pblock = mVUblock[iPC/2]->search((microRegInfo*)pState); + if (pblock) { return pblock->x86ptrStart; } // First Pass setCode(); @@ -291,9 +280,7 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, setCode(); for (bool x = 1; x; ) { if (isEOB) { x = 0; } - //if (isBranch2) { mVUopU(); incPC(2); } - - if (isNop) { doUpperOp(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } + if (isNOP) { doUpperOp(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } else if (!swapOps) { doUpperOp(); incPC(1); mVUopL(); } else { incPC(1); mVUopL(); incPC(-1); doUpperOp(); incPC(1); } @@ -307,17 +294,30 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, case 6: branchCase(JLE32); // IBLEQ case 7: branchCase(JL32); // IBLTZ case 8: branchCase(JNZ32); // IBNEQ - case 2: branchCase2(); // BAL - case 1: + case 1: case 2: // B/BAL // ToDo: search for block // (remember about global variables and recursion!) mVUsetFlagsRec(bStatus, bMac); ajmp = JMP32((uptr)0); - break; // B/BAL - case 9: branchCase2(); // JALR - case 10: break; // JR/JALR - //mVUcurProg.x86Ptr + break; + case 9: case 10: // JR/JALR + + mVUsetFlagsRec(bStatus, bMac); + + PUSH32R(gprR); // Backup EDX + MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) + AND32ItoR(gprT2, (vuIndex) ? 0x3ff8 : 0xff8); + MOV32ItoR(gprR, (u32)&pblock->pState); // Get pState (EDX second argument for __fastcall) + + //ToDo: Add block to block manager and use its address instead of pblock! + + if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) + else CALLFunc((uptr)mVUcompileVU1); + POP32R(gprR); // Restore + JMPR(gprT1); // Jump to rec-code address + break; } + //mVUcurProg.x86Ptr return thisPtr; } } @@ -333,11 +333,14 @@ microVUx(void*) mVUcompile(u32 startPC, u32 pipelineState, microRegInfo* pState, //MOV32ItoM((uptr)&mVU->p, mVU->p); //MOV32ItoM((uptr)&mVU->q, mVU->q); - AND32ItoM((uptr)µVU0.regs.VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag + AND32ItoM((uptr)µVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC); JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); return thisPtr; } +void* __fastcall mVUcompileVU0(u32 startPC, uptr pState) { return mVUcompile<0>(startPC, pState); } +void* __fastcall mVUcompileVU1(u32 startPC, uptr pState) { return mVUcompile<1>(startPC, pState); } + #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index a1a875e39a..c467464241 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -59,8 +59,8 @@ microVUt(void) mVUdispatcherA() { } SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC); - SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); - SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); + SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)&mVU_maxvals[0]); + SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)&mVU_minvals[0]); SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P]); SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q]); SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 21634de639..35827af0ed 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -1055,24 +1055,29 @@ microVUf(void) mVU_XGKICK() { microVUf(void) mVU_B() { microVU* mVU = mVUx; mVUbranch = 1; - if (!recPass) { /*mVUinfo |= _isBranch2;*/ } } microVUf(void) mVU_BAL() { microVU* mVU = mVUx; mVUbranch = 2; - if (!recPass) { /*mVUinfo |= _isBranch2;*/ analyzeVIreg2(_Ft_, 1); } - else {} + if (!recPass) { analyzeVIreg2(_Ft_, 1); } + else { + MOV32ItoR(gprT1, bSaveAddr); + mVUallocVIb(gprT1, _Ft_); + // Note: Not sure if the lower instruction in the branch-delay slot + // should read the previous VI-value or the VI-value resulting from this branch. + // This code does the latter... + } } microVUf(void) mVU_IBEQ() { microVU* mVU = mVUx; mVUbranch = 3; if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } else { - if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); - if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + if (memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } - MOV32RtoM((uptr)mVU->branch, gprT1); + MOV32RtoM((uptr)&mVU->branch, gprT1); } } microVUf(void) mVU_IBGEZ() { @@ -1080,10 +1085,9 @@ microVUf(void) mVU_IBGEZ() { mVUbranch = 4; if (!recPass) { mVUanalyzeBranch1(_Fs_); } else { - if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); - //SHR32ItoR(gprT1, 15); - MOV32RtoM((uptr)mVU->branch, gprT1); + MOV32RtoM((uptr)&mVU->branch, gprT1); } } microVUf(void) mVU_IBGTZ() { @@ -1091,9 +1095,9 @@ microVUf(void) mVU_IBGTZ() { mVUbranch = 5; if (!recPass) { mVUanalyzeBranch1(_Fs_); } else { - if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); - MOV32RtoM((uptr)mVU->branch, gprT1); + MOV32RtoM((uptr)&mVU->branch, gprT1); } } microVUf(void) mVU_IBLEZ() { @@ -1101,9 +1105,9 @@ microVUf(void) mVU_IBLEZ() { mVUbranch = 6; if (!recPass) { mVUanalyzeBranch1(_Fs_); } else { - if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); - MOV32RtoM((uptr)mVU->branch, gprT1); + MOV32RtoM((uptr)&mVU->branch, gprT1); } } microVUf(void) mVU_IBLTZ() { @@ -1111,10 +1115,9 @@ microVUf(void) mVU_IBLTZ() { mVUbranch = 7; if (!recPass) { mVUanalyzeBranch1(_Fs_); } else { - if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); - //SHR32ItoR(gprT1, 15); - MOV32RtoM((uptr)mVU->branch, gprT1); + MOV32RtoM((uptr)&mVU->branch, gprT1); } } microVUf(void) mVU_IBNE() { @@ -1122,22 +1125,37 @@ microVUf(void) mVU_IBNE() { mVUbranch = 8; if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } else { - if (memReadIs) MOV32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); - if (memReadIt) XOR32MtoR(gprT1, (uptr)mVU->VIbackup[0]); + if (memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } - MOV32RtoM((uptr)mVU->branch, gprT1); + MOV32RtoM((uptr)&mVU->branch, gprT1); } } microVUf(void) mVU_JR() { microVU* mVU = mVUx; mVUbranch = 9; if (!recPass) { mVUanalyzeBranch1(_Fs_); } + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + MOV32RtoM((uptr)&mVU->branch, gprT1); + } } microVUf(void) mVU_JALR() { microVU* mVU = mVUx; mVUbranch = 10; if (!recPass) { mVUanalyzeBranch1(_Fs_); analyzeVIreg2(_Ft_, 1); } + else { + if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); + else mVUallocVIa(gprT1, _Fs_); + MOV32RtoM((uptr)&mVU->branch, gprT1); + MOV32ItoR(gprT1, bSaveAddr); + mVUallocVIb(gprT1, _Ft_); + // Note: Not sure if the lower instruction in the branch-delay slot + // should read the previous VI-value or the VI-value resulting from this branch. + // This code does the latter... + } } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 6044f0100d..31e6cefab4 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -144,7 +144,6 @@ declareAllVariables #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUcount mVUallocInfo.count -//#define mVUstall mVUallocInfo.maxStall #define mVUregs mVUallocInfo.regs #define mVUregsTemp mVUallocInfo.regsTemp #define iPC mVUallocInfo.curPC @@ -157,6 +156,7 @@ declareAllVariables #define incPC(x) { iPC = ((iPC + x) & (mVU->progSize-1)); setCode(); } #define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } #define incCycles(x) { mVUincCycles(x); } +#define bSaveAddr ((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) #define _isNOP (1<<0) // Skip Lower Instruction #define _isBranch (1<<1) // Cur Instruction is a Branch @@ -170,14 +170,14 @@ declareAllVariables #define _doFlags (3<<8) #define _doMac (1<<8) #define _doStatus (1<<9) -#define _fmInstance (3<<10) -#define _fsInstance (3<<12) -#define _fpsInstance (3<<12) -#define _fcInstance (3<<14) -#define _fpcInstance (3<<14) -#define _fvmInstance (3<<16) -#define _fvsInstance (3<<18) -#define _fvcInstance (3<<20) +#define _fmInstance (3<<10) // Mac Write Instance +#define _fsInstance (3<<12) // Status Write Instance +#define _fcInstance (3<<14) // Clip Write Instance +#define _fpsInstance (3<<12) // Prev.S. Write Instance +#define _fpcInstance (3<<14) // Prev.C. Write Instance +#define _fvmInstance (3<<16) // Mac Read Instance (at T-stage for lower instruction) +#define _fvsInstance (3<<18) // Status Read Instance (at T-stage for lower instruction) +#define _fvcInstance (3<<20) // Clip Read Instance (at T-stage for lower instruction) #define _noWriteVF (1<<21) // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) #define _backupVI (1<<22) // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) #define _memReadIs (1<<23) // Read Is (VI reg) from memory (used by branches) @@ -186,8 +186,7 @@ declareAllVariables #define _swapOps (1<<26) // Runs Lower Instruction Before Upper Instruction #define _isFSSET (1<<27) // Cur Instruction is FSSET #define _doDivFlag (1<<28) // Transfer Div flag to Status Flag - -//#define _isBranch2 (1<<31) // Cur Instruction is a Branch that writes VI regs (BAL/JALR) +#define _doClip (1<<29) #define isNOP (mVUinfo & (1<<0)) #define isBranch (mVUinfo & (1<<1)) @@ -217,7 +216,7 @@ declareAllVariables #define swapOps (mVUinfo & (1<<26)) #define isFSSET (mVUinfo & (1<<27)) #define doDivFlag (mVUinfo & (1<<28)) -//#define isBranch2 (mVUinfo & (1<<31)) +#define doClip (mVUinfo & (1<<29)) #define isMMX(_VIreg_) (_VIreg_ >= 1 && _VIreg_ <=9) #define mmVI(_VIreg_) (_VIreg_ - 1) diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 4154048046..26034fa598 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -93,40 +93,39 @@ microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { } } +// Modifies the Source Reg! microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) { switch ( xyzw ) { - case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); - SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY SSE_MOVSS_XMM_to_M32(offset+4, reg); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW + SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // YW - case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); - SSE_MOVLPS_XMM_to_M64(offset+4, xmmT1); + case 6: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xc9); + SSE_MOVLPS_XMM_to_M64(offset+4, reg); break; // YZ - case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW - SSE_MOVHPS_XMM_to_M64(offset+4, xmmT1); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + case 7: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x93); //ZYXW + SSE_MOVHPS_XMM_to_M64(offset+4, reg); + SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // YZW - case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_M32(offset, reg); - if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); - else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + case 9: SSE_MOVSS_XMM_to_M32(offset, reg); + SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW + SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // XW - case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_M32(offset, reg); - SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); + case 10: SSE_MOVSS_XMM_to_M32(offset, reg); + SSE_MOVHLPS_XMM_to_XMM(reg, reg); + SSE_MOVSS_XMM_to_M32(offset+8, reg); break; //XZ case 11: SSE_MOVSS_XMM_to_M32(offset, reg); SSE_MOVHPS_XMM_to_M64(offset+8, reg); break; //XZW - case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW - SSE_MOVHPS_XMM_to_M64(offset, xmmT1); - SSE_MOVSS_XMM_to_M32(offset+12, xmmT1); + case 13: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x4b); //YXZW + SSE_MOVHPS_XMM_to_M64(offset, reg); + SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // XYW - case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVLPS_XMM_to_M64(offset, reg); - SSE_MOVSS_XMM_to_M32(offset+8, xmmT1); + case 14: SSE_MOVLPS_XMM_to_M64(offset, reg); + SSE_MOVHLPS_XMM_to_XMM(reg, reg); + SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // XYZ case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y @@ -138,39 +137,38 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) { } } +// Modifies the Source Reg! microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) { switch ( xyzw ) { - case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xB1); - SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); + case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); - SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); + SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // YW - case 6: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9); - SSE_MOVLPS_XMM_to_Rm(gprReg, xmmT1, offset+4); + case 6: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xc9); + SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset+4); break; // YZ - case 7: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x93); //ZYXW - SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset+4); - SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); + case 7: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x93); //ZYXW + SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+4); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // YZW - case 9: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); - if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(xmmT1, xmmT1); - else SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x55); - SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); + case 9: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); + SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xff); //WWWW + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // XW - case 10: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); - SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); + case 10: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVHLPS_XMM_to_XMM(reg, reg); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; //XZ case 11: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset+8); break; //XZW - case 13: SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x4b); //YXZW - SSE_MOVHPS_XMM_to_Rm(gprReg, xmmT1, offset); - SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+12); + case 13: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0x4b); //YXZW + SSE_MOVHPS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+12); break; // XYW - case 14: SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg); - SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); + case 14: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); + SSE_MOVHLPS_XMM_to_XMM(reg, reg); SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); break; // XYZ case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 1b02f1f37e..a8ae64ace7 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -31,7 +31,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX microVU* mVU = mVUx; int sReg, mReg = gprT1; static u8 *pjmp, *pjmp2; - static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; + static const u16 flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; if (!doFlags) return; if (!doMac) { regT1 = reg; } From 74db1c818942badbf637a6c28c5bd4581afc104d Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Wed, 15 Apr 2009 15:45:52 +0000 Subject: [PATCH 067/143] Implemented Jmp/Jcc and MOVSX/ZX instructions, and added 'i' prefix to most things (will add 'i' to a few more soon -- I think iRegister will be nicer than 'x86Register'). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@982 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUmicroLower.cpp | 8 +- pcsx2/x86/ix86-32/recVTLB.cpp | 55 ++++--- pcsx2/x86/ix86/ix86.cpp | 208 +++++++++++++++-------- pcsx2/x86/ix86/ix86_group1.cpp | 225 ------------------------- pcsx2/x86/ix86/ix86_inlines.inl | 54 ++++++ pcsx2/x86/ix86/ix86_instructions.h | 172 +++++++++++++++---- pcsx2/x86/ix86/ix86_jmp.cpp | 190 +++++++-------------- pcsx2/x86/ix86/ix86_legacy.cpp | 153 ++++------------- pcsx2/x86/ix86/ix86_types.h | 255 ++++++++++++++++++++++++----- 9 files changed, 673 insertions(+), 647 deletions(-) delete mode 100644 pcsx2/x86/ix86/ix86_group1.cpp diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 65f2f04ab1..a54cefe86a 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -804,10 +804,10 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) using namespace x86Emitter; x86IndexReg thisreg( x86reg ); - if ( _X ) MOV(ptr32[thisreg+offset], 0x00000000); - if ( _Y ) MOV(ptr32[thisreg+offset+4], 0x00000000); - if ( _Z ) MOV(ptr32[thisreg+offset+8], 0x00000000); - if ( _W ) MOV(ptr32[thisreg+offset+12], 0x3f800000); + if ( _X ) iMOV(ptr32[thisreg+offset], 0x00000000); + if ( _Y ) iMOV(ptr32[thisreg+offset+4], 0x00000000); + if ( _Z ) iMOV(ptr32[thisreg+offset+8], 0x00000000); + if ( _W ) iMOV(ptr32[thisreg+offset+12], 0x3f800000); } return; } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 564a636c3e..99ac820221 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -25,6 +25,7 @@ #include "iR5900.h" using namespace vtlb_private; +using namespace x86Emitter; // NOTICE: This function *destroys* EAX!! // Moves 128 bits of memory from the source register ptr to the dest register ptr. @@ -33,22 +34,20 @@ void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) { // (this is one of my test cases for the new emitter --air) - using namespace x86Emitter; - x86IndexReg src( srcRm ); x86IndexReg dest( destRm ); - MOV( eax, ptr[src] ); - MOV( ptr[dest], eax ); + iMOV( eax, ptr[src] ); + iMOV( ptr[dest], eax ); - MOV( eax, ptr[src+4] ); - MOV( ptr[dest+4], eax ); + iMOV( eax, ptr[src+4] ); + iMOV( ptr[dest+4], eax ); - MOV( eax, ptr[src+8] ); - MOV( ptr[dest+8], eax ); + iMOV( eax, ptr[src+8] ); + iMOV( ptr[dest+8], eax ); - MOV( eax, ptr[src+12] ); - MOV( ptr[dest+12], eax ); + iMOV( eax, ptr[src+12] ); + iMOV( ptr[dest+12], eax ); } /* @@ -166,6 +165,7 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign ) } } +// ------------------------------------------------------------------------ static void _vtlb_DynGen_IndirectRead( u32 bits ) { int szidx; @@ -188,6 +188,7 @@ static void _vtlb_DynGen_IndirectRead( u32 bits ) CALL32R(EAX); } +// ------------------------------------------------------------------------ // Recompiled input registers: // ecx = source addr to read from // edx = ptr to dest to write to @@ -199,17 +200,18 @@ void vtlb_DynGenRead64(u32 bits) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _fullread = JS8(0); + iForwardJS8 _fullread; _vtlb_DynGen_DirectRead( bits, false ); - u8* cont = JMP8(0); + iForwardJump8 cont; - x86SetJ8(_fullread); + _fullread.SetTarget(); + _vtlb_DynGen_IndirectRead( bits ); - - x86SetJ8(cont); + cont.SetTarget(); } +// ------------------------------------------------------------------------ // Recompiled input registers: // ecx - source address to read from // Returns read value in eax. @@ -221,12 +223,12 @@ void vtlb_DynGenRead32(u32 bits, bool sign) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _fullread = JS8(0); + iForwardJS8 _fullread; _vtlb_DynGen_DirectRead( bits, sign ); - u8* cont = JMP8(0); + iForwardJump8 cont; - x86SetJ8(_fullread); + _fullread.SetTarget(); _vtlb_DynGen_IndirectRead( bits ); // perform sign extension on the result: @@ -245,11 +247,10 @@ void vtlb_DynGenRead32(u32 bits, bool sign) else MOVZX32R16toR(EAX,EAX); } - - x86SetJ8(cont); + cont.SetTarget(); } -// +// ------------------------------------------------------------------------ // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // recompiler if the TLB is changed. void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) @@ -317,6 +318,7 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ) } } +// ------------------------------------------------------------------------ // Recompiled input registers: // ecx - source address to read from // Returns read value in eax. @@ -449,6 +451,7 @@ static void _vtlb_DynGen_DirectWrite( u32 bits ) } } +// ------------------------------------------------------------------------ static void _vtlb_DynGen_IndirectWrite( u32 bits ) { int szidx=0; @@ -468,24 +471,26 @@ static void _vtlb_DynGen_IndirectWrite( u32 bits ) CALL32R(EAX); } +// ------------------------------------------------------------------------ void vtlb_DynGenWrite(u32 sz) { MOV32RtoR(EAX,ECX); SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - u8* _full=JS8(0); + iForwardJS8 _full; _vtlb_DynGen_DirectWrite( sz ); - u8* cont = JMP8(0); + iForwardJump8 cont; - x86SetJ8(_full); + _full.SetTarget(); _vtlb_DynGen_IndirectWrite( sz ); - x86SetJ8(cont); + cont.SetTarget(); } +// ------------------------------------------------------------------------ // Generates code for a store instruction, where the address is a known constant. // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // recompiler if the TLB is changed. diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 35bd1791b5..1f4656d778 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -96,22 +96,25 @@ const x86Register8 namespace Internal { - const Group1ImplAll ADD; - const Group1ImplAll OR; - const Group1ImplAll ADC; - const Group1ImplAll SBB; - const Group1ImplAll AND; - const Group1ImplAll SUB; - const Group1ImplAll XOR; - const Group1ImplAll CMP; + const Group1ImplAll iADD; + const Group1ImplAll iOR; + const Group1ImplAll iADC; + const Group1ImplAll iSBB; + const Group1ImplAll iAND; + const Group1ImplAll iSUB; + const Group1ImplAll iXOR; + const Group1ImplAll iCMP; - const Group2ImplAll ROL; - const Group2ImplAll ROR; - const Group2ImplAll RCL; - const Group2ImplAll RCR; - const Group2ImplAll SHL; - const Group2ImplAll SHR; - const Group2ImplAll SAR; + const Group2ImplAll iROL; + const Group2ImplAll iROR; + const Group2ImplAll iRCL; + const Group2ImplAll iRCR; + const Group2ImplAll iSHL; + const Group2ImplAll iSHR; + const Group2ImplAll iSAR; + + const MovExtendImplAll iMOVSX; + const MovExtendImplAll iMOVZX; // Performance note: VC++ wants to use byte/word register form for the following // ModRM/SibSB constructors if we use iWrite, and furthermore unrolls the @@ -149,11 +152,9 @@ namespace Internal // instruction ca be encoded as ModRm alone. static __forceinline bool NeedsSibMagic( const ModSibBase& info ) { - // If base register is ESP, then we need a SIB: - if( info.Base.IsStackPointer() ) return true; - // no registers? no sibs! - // (ModSibBase::Reduce + // (ModSibBase::Reduce always places a register in Index, and optionally leaves + // Base empty if only register is specified) if( info.Index.IsEmpty() ) return false; // A scaled register needs a SIB @@ -188,7 +189,7 @@ namespace Internal if( info.Index.IsEmpty() ) { ModRM( 0, regfield, ModRm_UseDisp32 ); - iWrite( info.Displacement ); + iWrite( info.Displacement ); return; } else @@ -211,7 +212,7 @@ namespace Internal { ModRM( 0, regfield, ModRm_UseSib ); SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); - iWrite( info.Displacement ); + iWrite( info.Displacement ); return; } else @@ -226,7 +227,7 @@ namespace Internal if( displacement_size != 0 ) { - *(u32*)x86Ptr = info.Displacement; + *(s32*)x86Ptr = info.Displacement; x86Ptr += (displacement_size == 1) ? 1 : 4; } } @@ -234,6 +235,44 @@ namespace Internal using namespace Internal; +// ------------------------------------------------------------------------ +// Assigns the current emitter buffer target address. +// This is provided instead of using x86Ptr directly, since we may in the future find +// a need to change the storage class system for the x86Ptr 'under the hood.' +__emitinline void iSetPtr( void* ptr ) +{ + x86Ptr = (u8*)ptr; +} + +// ------------------------------------------------------------------------ +// Retrieves the current emitter buffer target address. +// This is provided instead of using x86Ptr directly, since we may in the future find +// a need to change the storage class system for the x86Ptr 'under the hood.' +__emitinline u8* iGetPtr() +{ + return x86Ptr; +} + +// ------------------------------------------------------------------------ +__emitinline void iAlignPtr( uint bytes ) +{ + // forward align + x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) ); +} + +// ------------------------------------------------------------------------ +__emitinline void iAdvancePtr( uint bytes ) +{ + if( IsDevBuild ) + { + // common debugger courtesy: advance with INT3 as filler. + for( uint i=0; i( 0xcc ); + } + else + x86Ptr += bytes; +} + // ------------------------------------------------------------------------ // Internal implementation of EmitSibMagic which has been custom tailored // to optimize special forms of the Lea instructions accordingly, such @@ -258,21 +297,33 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) if( src.Index.IsEmpty() ) { - MOV( to, src.Displacement ); + iMOV( to, src.Displacement ); return; } else if( displacement_size == 0 ) { - MOV( to, ToReg( src.Index.Id ) ); + iMOV( to, ToReg( src.Index.Id ) ); return; } else { - // note: no need to do ebp+0 check since we encode all 0 displacements as - // register assignments above (via MOV) + if( !preserve_flags ) + { + // encode as MOV and ADD combo. Make sure to use the immediate on the + // ADD since it can encode as an 8-bit sign-extended value. + + iMOV( to, ToReg( src.Index.Id ) ); + iADD( to, src.Displacement ); + return; + } + else + { + // note: no need to do ebp+0 check since we encode all 0 displacements as + // register assignments above (via MOV) - iWrite( 0x8d ); - ModRM( displacement_size, to.Id, src.Index.Id ); + iWrite( 0x8d ); + ModRM( displacement_size, to.Id, src.Index.Id ); + } } } else @@ -288,8 +339,8 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) // (this does not apply to older model P4s with the broken barrel shifter, // but we currently aren't optimizing for that target anyway). - MOV( to, ToReg( src.Index.Id ) ); - SHL( to, src.Scale ); + iMOV( to, ToReg( src.Index.Id ) ); + iSHL( to, src.Scale ); return; } iWrite( 0x8d ); @@ -300,26 +351,46 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) } else { + if( src.Scale == 0 ) + { + if( !preserve_flags ) + { + if( src.Index == esp ) + { + // ESP is not encodable as an index (ix86 ignores it), thus: + iMOV( to, ToReg( src.Base.Id ) ); // will do the trick! + iADD( to, src.Displacement ); + return; + } + else if( src.Displacement == 0 ) + { + iMOV( to, ToReg( src.Base.Id ) ); + iADD( to, ToReg( src.Index.Id ) ); + return; + } + } + else if( (src.Index == esp) && (src.Displacement == 0) ) + { + // special case handling of ESP as Index, which is replaceable with + // a single MOV even when preserve_flags is set! :D + + iMOV( to, ToReg( src.Base.Id ) ); + return; + } + } + if( src.Base == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! iWrite( 0x8d ); ModRM( displacement_size, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, src.Base.Id ); - - /*switch( displacement_size ) - { - case 0: break; - case 1: emit.write( src.Displacement ); break; - case 2: emit.write( src.Displacement ); break; - jNO_DEFAULT - }*/ } } if( displacement_size != 0 ) { - *(u32*)x86Ptr = src.Displacement; + *(s32*)x86Ptr = src.Displacement; x86Ptr += (displacement_size == 1) ? 1 : 4; } } @@ -350,6 +421,7 @@ protected: static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } public: + // ------------------------------------------------------------------------ static __forceinline void Emit( const x86Register& to, const x86Register& from ) { if( to == from ) return; // ignore redundant MOVs. @@ -359,6 +431,7 @@ public: ModRM( 3, from.Id, to.Id ); } + // ------------------------------------------------------------------------ static __forceinline void Emit( const ModSibBase& dest, const x86Register& from ) { prefix16(); @@ -378,6 +451,7 @@ public: } } + // ------------------------------------------------------------------------ static __forceinline void Emit( const x86Register& to, const ModSibBase& src ) { prefix16(); @@ -397,6 +471,7 @@ public: } } + // ------------------------------------------------------------------------ static __forceinline void Emit( const x86Register& to, ImmType imm ) { // Note: MOV does not have (reg16/32,imm8) forms. @@ -406,6 +481,7 @@ public: iWrite( imm ); } + // ------------------------------------------------------------------------ static __forceinline void Emit( ModSibStrict dest, ImmType imm ) { prefix16(); @@ -436,50 +512,50 @@ namespace Internal // TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D // ---------- 32 Bit Interface ----------- -__forceinline void MOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); } -__forceinline void MOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); } -__forceinline void MOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); } -__noinline void MOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); } -__noinline void MOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } -__noinline void MOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } +__forceinline void iMOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); } +__forceinline void iMOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); } +__forceinline void iMOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); } +__noinline void iMOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } +__noinline void iMOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } -void MOV( const x86Register32& to, u32 imm, bool preserve_flags ) +void iMOV( const x86Register32& to, u32 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) - XOR( to, to ); + iXOR( to, to ); else MOV32i::Emit( to, imm ); } // ---------- 16 Bit Interface ----------- -__forceinline void MOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); } -__forceinline void MOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); } -__forceinline void MOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); } -__noinline void MOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); } -__noinline void MOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } -__noinline void MOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } +__forceinline void iMOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); } +__forceinline void iMOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); } +__forceinline void iMOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); } +__noinline void iMOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } +__noinline void iMOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } -void MOV( const x86Register16& to, u16 imm, bool preserve_flags ) +void iMOV( const x86Register16& to, u16 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) - XOR( to, to ); + iXOR( to, to ); else MOV16i::Emit( to, imm ); } // ---------- 8 Bit Interface ----------- -__forceinline void MOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } -__forceinline void MOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); } -__forceinline void MOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); } -__noinline void MOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); } -__noinline void MOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } -__noinline void MOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } +__forceinline void iMOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } +__forceinline void iMOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); } +__forceinline void iMOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); } +__noinline void iMOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } +__noinline void iMOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } -void MOV( const x86Register8& to, u8 imm, bool preserve_flags ) +void iMOV( const x86Register8& to, u8 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) - XOR( to, to ); + iXOR( to, to ); else MOV8i::Emit( to, imm ); } @@ -510,7 +586,8 @@ __forceinline void POP( x86Register32 from ) { write8( 0x58 | from.Id ); } __emitinline void POP( const ModSibBase& from ) { - iWrite( 0x8f ); Internal::EmitSibMagic( 0, from ); + iWrite( 0x8f ); + Internal::EmitSibMagic( 0, from ); } __forceinline void PUSH( u32 imm ) { write8( 0x68 ); write32( imm ); } @@ -518,7 +595,8 @@ __forceinline void PUSH( x86Register32 from ) { write8( 0x50 | from.Id ); } __emitinline void PUSH( const ModSibBase& from ) { - iWrite( 0xff ); Internal::EmitSibMagic( 6, from ); + iWrite( 0xff ); + Internal::EmitSibMagic( 6, from ); } // pushes the EFLAGS register onto the stack diff --git a/pcsx2/x86/ix86/ix86_group1.cpp b/pcsx2/x86/ix86/ix86_group1.cpp deleted file mode 100644 index f76950c0ef..0000000000 --- a/pcsx2/x86/ix86/ix86_group1.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "PrecompiledHeader.h" -#include "ix86_internal.h" - -//------------------------------------------------------------------ -// x86 Group 1 Instructions -//------------------------------------------------------------------ -// Group 1 instructions all adhere to the same encoding scheme, and so they all -// share the same emitter which has been coded here. -// -// Group 1 Table: [column value is the Reg field of the ModRM byte] -// -// 0 1 2 3 4 5 6 7 -// ADD OR ADC SBB AND SUB XOR CMP -// - -namespace x86Emitter { - -////////////////////////////////////////////////////////////////////////////////////////// -// x86RegConverter - this class is used internally by the emitter as a helper for -// converting 8 and 16 register forms into 32 bit forms. This way the end-user exposed API -// can use type-safe 8/16/32 bit register types, and the underlying code can use a single -// unified emitter to generate all function variations + prefixes and such. :) -// -class x86RegConverter : public x86Register32 -{ -public: - x86RegConverter( x86Register32 src ) : x86Register32( src ) {} - x86RegConverter( x86Register16 src ) : x86Register32( src.Id ) {} - x86RegConverter( x86Register8 src ) : x86Register32( src.Id ) {} -}; - -enum Group1InstructionType -{ - G1Type_ADD=0, - G1Type_OR, - G1Type_ADC, - G1Type_SBB, - G1Type_AND, - G1Type_SUB, - G1Type_XOR, - G1Type_CMP -}; - - -static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, x86RegConverter from, bool bit8form=false ) -{ - write8( (bit8form ? 0 : 1) | (inst<<3) ); - ModRM( 3, from.Id, to.Id ); -} - -static emitterT void Group1( Group1InstructionType inst, const ModSib& sibdest, x86RegConverter from, bool bit8form=false ) -{ - write8( (bit8form ? 0 : 1) | (inst<<3) ); - EmitSibMagic( from, sibdest ); -} - -static emitterT void Group1( Group1InstructionType inst, x86RegConverter to, const ModSib& sibsrc, bool bit8form=false ) -{ - write8( (bit8form ? 2 : 3) | (inst<<3) ); - EmitSibMagic( to, sibsrc ); -} - -// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit -// instruction (AX,BX,etc). -template< typename T > -static emitterT void Group1_Imm( Group1InstructionType inst, x86RegConverter to, T imm ) -{ - bool bit8form = (sizeof(T) == 1); - - if( !bit8form && is_s8( imm ) ) - { - write8( 0x83 ); - ModRM( 3, inst, to.Id ); - write8( (s8)imm ); - } - else - { - if( to == eax ) - write8( (bit8form ? 4 : 5) | (inst<<3) ); - else - { - write8( bit8form ? 0x80 : 0x81 ); - ModRM( 3, inst, to.Id ); - } - x86write( imm ); - } -} - -// Note: this function emits based on the operand size of imm, so 16 bit imms generate a 16 bit -// instruction (AX,BX,etc). -template< typename T > -static emitterT void Group1_Imm( Group1InstructionType inst, const ModSib& sibdest, T imm ) -{ - bool bit8form = (sizeof(T) == 1); - - write8( bit8form ? 0x80 : (is_s8( imm ) ? 0x83 : 0x81) ); - - EmitSibMagic( inst, sibdest ); - - if( !bit8form && is_s8( imm ) ) - write8( (s8)imm ); - else - x86write( imm ); -} - -// 16 bit instruction prefix! -static __forceinline void prefix16() { write8(0x66); } - -////////////////////////////////////////////////////////////////////////////////////////// -// -#define DEFINE_GROUP1_OPCODE( cod ) \ - emitterT void cod##32( x86Register32 to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##32( x86Register32 to, void* from ) { Group1( G1Type_##cod, to, ptr[from] ); } \ - emitterT void cod##32( x86Register32 to, const ModSib& from ) { Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##32( x86Register32 to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ - emitterT void cod##32( const ModSib& to, x86Register32 from ) { Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##32( void* to, x86Register32 from ) { Group1( G1Type_##cod, ptr[to], from ); } \ - emitterT void cod##32( void* to, u32 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void cod##32( const ModSib& to, u32 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ - \ - emitterT void cod##16( x86Register16 to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##16( x86Register16 to, void* from ) { prefix16(); Group1( G1Type_##cod, to, ptr[from] ); } \ - emitterT void cod##16( x86Register16 to, const ModSib& from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##16( x86Register16 to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ - emitterT void cod##16( const ModSib& to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, to, from ); } \ - emitterT void cod##16( void* to, x86Register16 from ) { prefix16(); Group1( G1Type_##cod, ptr[to], from ); } \ - emitterT void cod##16( void* to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void cod##16( const ModSib& to, u16 imm ) { prefix16(); Group1_Imm( G1Type_##cod, to, imm ); } \ - \ - emitterT void cod##8( x86Register8 to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ - emitterT void cod##8( x86Register8 to, void* from ) { Group1( G1Type_##cod, to, ptr[from], true ); } \ - emitterT void cod##8( x86Register8 to, const ModSib& from ) { Group1( G1Type_##cod, to, from , true ); } \ - emitterT void cod##8( x86Register8 to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } \ - emitterT void cod##8( const ModSib& to, x86Register8 from ) { Group1( G1Type_##cod, to, from , true ); } \ - emitterT void cod##8( void* to, x86Register8 from ) { Group1( G1Type_##cod, ptr[to], from , true ); } \ - emitterT void cod##8( void* to, u8 imm ) { Group1_Imm( G1Type_##cod, ptr[to], imm ); } \ - emitterT void cod##8( const ModSib& to, u8 imm ) { Group1_Imm( G1Type_##cod, to, imm ); } - -DEFINE_GROUP1_OPCODE( ADD ) -DEFINE_GROUP1_OPCODE( CMP ) -DEFINE_GROUP1_OPCODE( OR ) -DEFINE_GROUP1_OPCODE( ADC ) -DEFINE_GROUP1_OPCODE( SBB ) -DEFINE_GROUP1_OPCODE( AND ) -DEFINE_GROUP1_OPCODE( SUB ) -DEFINE_GROUP1_OPCODE( XOR ) - -} // end namespace x86Emitter - - -static __forceinline x86Emitter::x86Register32 _reghlp32( x86IntRegType src ) -{ - return x86Emitter::x86Register32( src ); -} - -static __forceinline x86Emitter::x86Register16 _reghlp16( x86IntRegType src ) -{ - return x86Emitter::x86Register16( src ); -} - -static __forceinline x86Emitter::x86Register8 _reghlp8( x86IntRegType src ) -{ - return x86Emitter::x86Register8( src ); -} - -static __forceinline x86Emitter::ModSib _mrmhlp( x86IntRegType src ) -{ - return x86Emitter::ModSib( x86Emitter::x86ModRm( _reghlp32(src) ) ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// -#define DEFINE_LEGACY_HELPER( cod, bits ) \ - emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { x86Emitter::cod##bits( _reghlp##bits(to), _reghlp##bits(from) ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { x86Emitter::cod##bits( _reghlp##bits(to), imm ); } \ - emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { x86Emitter::cod##bits( _reghlp##bits(to), (void*)from ); } \ - emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { x86Emitter::cod##bits( (void*)to, _reghlp##bits(from) ); } \ - emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { x86Emitter::cod##bits( (void*)to, imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, imm ); } \ - emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _reghlp##bits(to), _mrmhlp(from) + offset ); } \ - emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x86Emitter::cod##bits( _mrmhlp(to) + offset, _reghlp##bits(from) ); } - -#define DEFINE_GROUP1_OPCODE_LEGACY( cod ) \ - DEFINE_LEGACY_HELPER( cod, 32 ) \ - DEFINE_LEGACY_HELPER( cod, 16 ) \ - DEFINE_LEGACY_HELPER( cod, 8 ) - -DEFINE_GROUP1_OPCODE_LEGACY( ADD ) -DEFINE_GROUP1_OPCODE_LEGACY( CMP ) -DEFINE_GROUP1_OPCODE_LEGACY( OR ) -DEFINE_GROUP1_OPCODE_LEGACY( ADC ) -DEFINE_GROUP1_OPCODE_LEGACY( SBB ) -DEFINE_GROUP1_OPCODE_LEGACY( AND ) -DEFINE_GROUP1_OPCODE_LEGACY( SUB ) -DEFINE_GROUP1_OPCODE_LEGACY( XOR ) - -// Special forms needed by the legacy emitter syntax: - -emitterT void AND32I8toR( x86IntRegType to, s8 from ) -{ - x86Emitter::AND32( _reghlp32(to), from ); -} - -emitterT void AND32I8toM( uptr to, s8 from ) -{ - x86Emitter::AND32( (void*)to, from ); -} diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl index 69cc3b03f5..3fe2ec54fb 100644 --- a/pcsx2/x86/ix86/ix86_inlines.inl +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -234,4 +234,58 @@ namespace x86Emitter return *this; } + + ////////////////////////////////////////////////////////////////////////////////////////// + // + + // ------------------------------------------------------------------------ + template< typename OperandType > + iForwardJump::iForwardJump( JccComparisonType cctype ) : + BasePtr( (s8*)iGetPtr() + + ((OperandSize == 1) ? 2 : // j8's are always 2 bytes. + ((cctype==Jcc_Unconditional) ? 5 : 6 )) // j32's are either 5 or 6 bytes + ) + { + jASSUME( cctype != Jcc_Unknown ); + jASSUME( OperandSize == 1 || OperandSize == 4 ); + + if( OperandSize == 1 ) + iWrite( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) ); + else + { + if( cctype == Jcc_Unconditional ) + iWrite( 0xe9 ); + else + { + iWrite( 0x0f ); + iWrite( 0x80 | cctype ); + } + } + + iAdvancePtr( OperandSize ); + } + + // ------------------------------------------------------------------------ + template< typename OperandType > + void iForwardJump::SetTarget() const + { + jASSUME( BasePtr != NULL ); + + sptr displacement = (sptr)iGetPtr() - (sptr)BasePtr; + if( OperandSize == 1 ) + { + if( !is_s8( displacement ) ) + { + assert( false ); + Console::Error( "Emitter Error: Invalid short jump displacement = 0x%x", params (int)displacement ); + } + BasePtr[-1] = (s8)displacement; + } + else + { + // full displacement, no sanity checks needed :D + ((s32*)BasePtr)[-1] = displacement; + } + } + } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 0f218d1a0b..48aa07808e 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -35,6 +35,9 @@ namespace x86Emitter { + extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false ); + + // ----- Lea Instructions (Load Effective Address) ----- // Note: alternate (void*) forms of these instructions are not provided since those // forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs @@ -56,56 +59,153 @@ namespace x86Emitter static __forceinline void PUSH( void* from ) { PUSH( ptr[from] ); } // ------------------------------------------------------------------------ - using Internal::ADD; - using Internal::OR; - using Internal::ADC; - using Internal::SBB; - using Internal::AND; - using Internal::SUB; - using Internal::XOR; - using Internal::CMP; + using Internal::iADD; + using Internal::iOR; + using Internal::iADC; + using Internal::iSBB; + using Internal::iAND; + using Internal::iSUB; + using Internal::iXOR; + using Internal::iCMP; - using Internal::ROL; - using Internal::ROR; - using Internal::RCL; - using Internal::RCR; - using Internal::SHL; - using Internal::SHR; - using Internal::SAR; + using Internal::iROL; + using Internal::iROR; + using Internal::iRCL; + using Internal::iRCR; + using Internal::iSHL; + using Internal::iSHR; + using Internal::iSAR; + using Internal::iMOVSX; + using Internal::iMOVZX; + + ////////////////////////////////////////////////////////////////////////////////////////// + // MOV instructions! // ---------- 32 Bit Interface ----------- - extern void MOV( const x86Register32& to, const x86Register32& from ); - extern void MOV( const ModSibBase& sibdest, const x86Register32& from ); - extern void MOV( const x86Register32& to, const ModSibBase& sibsrc ); - extern void MOV( const x86Register32& to, const void* src ); - extern void MOV( const void* dest, const x86Register32& from ); + extern void iMOV( const x86Register32& to, const x86Register32& from ); + extern void iMOV( const ModSibBase& sibdest, const x86Register32& from ); + extern void iMOV( const x86Register32& to, const ModSibBase& sibsrc ); + extern void iMOV( const x86Register32& to, const void* src ); + extern void iMOV( const void* dest, const x86Register32& from ); // preserve_flags - set to true to disable optimizations which could alter the state of // the flags (namely replacing mov reg,0 with xor). - extern void MOV( const x86Register32& to, u32 imm, bool preserve_flags=false ); - extern void MOV( const ModSibStrict<4>& sibdest, u32 imm ); + extern void iMOV( const x86Register32& to, u32 imm, bool preserve_flags=false ); + extern void iMOV( const ModSibStrict<4>& sibdest, u32 imm ); // ---------- 16 Bit Interface ----------- - extern void MOV( const x86Register16& to, const x86Register16& from ); - extern void MOV( const ModSibBase& sibdest, const x86Register16& from ); - extern void MOV( const x86Register16& to, const ModSibBase& sibsrc ); - extern void MOV( const x86Register16& to, const void* src ); - extern void MOV( const void* dest, const x86Register16& from ); + extern void iMOV( const x86Register16& to, const x86Register16& from ); + extern void iMOV( const ModSibBase& sibdest, const x86Register16& from ); + extern void iMOV( const x86Register16& to, const ModSibBase& sibsrc ); + extern void iMOV( const x86Register16& to, const void* src ); + extern void iMOV( const void* dest, const x86Register16& from ); // preserve_flags - set to true to disable optimizations which could alter the state of // the flags (namely replacing mov reg,0 with xor). - extern void MOV( const x86Register16& to, u16 imm, bool preserve_flags=false ); - extern void MOV( const ModSibStrict<2>& sibdest, u16 imm ); + extern void iMOV( const x86Register16& to, u16 imm, bool preserve_flags=false ); + extern void iMOV( const ModSibStrict<2>& sibdest, u16 imm ); // ---------- 8 Bit Interface ----------- - extern void MOV( const x86Register8& to, const x86Register8& from ); - extern void MOV( const ModSibBase& sibdest, const x86Register8& from ); - extern void MOV( const x86Register8& to, const ModSibBase& sibsrc ); - extern void MOV( const x86Register8& to, const void* src ); - extern void MOV( const void* dest, const x86Register8& from ); + extern void iMOV( const x86Register8& to, const x86Register8& from ); + extern void iMOV( const ModSibBase& sibdest, const x86Register8& from ); + extern void iMOV( const x86Register8& to, const ModSibBase& sibsrc ); + extern void iMOV( const x86Register8& to, const void* src ); + extern void iMOV( const void* dest, const x86Register8& from ); - extern void MOV( const x86Register8& to, u8 imm, bool preserve_flags=false ); - extern void MOV( const ModSibStrict<1>& sibdest, u8 imm ); + extern void iMOV( const x86Register8& to, u8 imm, bool preserve_flags=false ); + extern void iMOV( const ModSibStrict<1>& sibdest, u8 imm ); + ////////////////////////////////////////////////////////////////////////////////////////// + // JMP / Jcc Instructions! + +#define DEFINE_FORWARD_JUMP( label, cond ) \ + template< typename OperandType > \ + class iForward##label : public iForwardJump \ + { \ + public: \ + iForward##label() : iForwardJump( cond ) {} \ + }; + + // ------------------------------------------------------------------------ + // Note: typedefs below are defined individually in order to appease Intellisense + // resolution. Including them into the class definition macro above breaks it. + + typedef iForwardJump iForwardJump8; + typedef iForwardJump iForwardJump32; + + + DEFINE_FORWARD_JUMP( JA, Jcc_Above ); + DEFINE_FORWARD_JUMP( JB, Jcc_Below ); + DEFINE_FORWARD_JUMP( JAE, Jcc_AboveOrEqual ); + DEFINE_FORWARD_JUMP( JBE, Jcc_BelowOrEqual ); + + typedef iForwardJA iForwardJA8; + typedef iForwardJA iForwardJA32; + typedef iForwardJB iForwardJB8; + typedef iForwardJB iForwardJB32; + typedef iForwardJAE iForwardJAE8; + typedef iForwardJAE iForwardJAE32; + typedef iForwardJBE iForwardJBE8; + typedef iForwardJBE iForwardJBE32; + + DEFINE_FORWARD_JUMP( JG, Jcc_Greater ); + DEFINE_FORWARD_JUMP( JL, Jcc_Less ); + DEFINE_FORWARD_JUMP( JGE, Jcc_GreaterOrEqual ); + DEFINE_FORWARD_JUMP( JLE, Jcc_LessOrEqual ); + + typedef iForwardJG iForwardJG8; + typedef iForwardJG iForwardJG32; + typedef iForwardJL iForwardJL8; + typedef iForwardJL iForwardJL32; + typedef iForwardJGE iForwardJGE8; + typedef iForwardJGE iForwardJGE32; + typedef iForwardJLE iForwardJLE8; + typedef iForwardJLE iForwardJLE32; + + DEFINE_FORWARD_JUMP( JZ, Jcc_Zero ); + DEFINE_FORWARD_JUMP( JE, Jcc_Equal ); + DEFINE_FORWARD_JUMP( JNZ, Jcc_NotZero ); + DEFINE_FORWARD_JUMP( JNE, Jcc_NotEqual ); + + typedef iForwardJZ iForwardJZ8; + typedef iForwardJZ iForwardJZ32; + typedef iForwardJE iForwardJE8; + typedef iForwardJE iForwardJE32; + typedef iForwardJNZ iForwardJNZ8; + typedef iForwardJNZ iForwardJNZ32; + typedef iForwardJNE iForwardJNE8; + typedef iForwardJNE iForwardJNE32; + + DEFINE_FORWARD_JUMP( JS, Jcc_Signed ); + DEFINE_FORWARD_JUMP( JNS, Jcc_Unsigned ); + + typedef iForwardJS iForwardJS8; + typedef iForwardJS iForwardJS32; + typedef iForwardJNS iForwardJNS8; + typedef iForwardJNS iForwardJNS32; + + DEFINE_FORWARD_JUMP( JO, Jcc_Overflow ); + DEFINE_FORWARD_JUMP( JNO, Jcc_NotOverflow ); + + typedef iForwardJO iForwardJO8; + typedef iForwardJO iForwardJO32; + typedef iForwardJNO iForwardJNO8; + typedef iForwardJNO iForwardJNO32; + + DEFINE_FORWARD_JUMP( JC, Jcc_Carry ); + DEFINE_FORWARD_JUMP( JNC, Jcc_NotCarry ); + + typedef iForwardJC iForwardJC8; + typedef iForwardJC iForwardJC32; + typedef iForwardJNC iForwardJNC8; + typedef iForwardJNC iForwardJNC32; + + DEFINE_FORWARD_JUMP( JPE, Jcc_ParityEven ); + DEFINE_FORWARD_JUMP( JPO, Jcc_ParityOdd ); + + typedef iForwardJPE iForwardJPE8; + typedef iForwardJPE iForwardJPE32; + typedef iForwardJPO iForwardJPO8; + typedef iForwardJPO iForwardJPO32; } diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp index 4b2c11eb17..20fde0d093 100644 --- a/pcsx2/x86/ix86/ix86_jmp.cpp +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -36,157 +36,83 @@ #include "System.h" #include "ix86_internal.h" +namespace x86Emitter { -// Another Work-in-Progress!! - - -/* -emitterT void x86SetPtr( u8* ptr ) +// ------------------------------------------------------------------------ +void iSmartJump::SetTarget() { - x86Ptr = ptr; + jASSUME( !m_written ); + if( m_written ) + throw Exception::InvalidOperation( "Attempted to set SmartJump label multiple times." ); + + m_target = iGetPtr(); + if( m_baseptr == NULL ) return; + + iSetPtr( m_baseptr ); + u8* const saveme = m_baseptr + GetMaxInstructionSize(); + iJccKnownTarget( m_cc, m_target, true ); + + // Copy recompiled data inward if the jump instruction didn't fill the + // alloted buffer (means that we optimized things to a j8!) + + const int spacer = (sptr)saveme - (sptr)iGetPtr(); + if( spacer != 0 ) + { + u8* destpos = iGetPtr(); + const int copylen = (sptr)m_target - (sptr)saveme; + + memcpy_fast( destpos, saveme, copylen ); + iSetPtr( m_target - spacer ); + } + + m_written = true; } ////////////////////////////////////////////////////////////////////////////////////////// -// x86Ptr Label API // -class x86Label +// ------------------------------------------------------------------------ +// Writes a jump at the current x86Ptr, which targets a pre-established target address. +// (usually a backwards jump) +// +// slideForward - used internally by iSmartJump to indicate that the jump target is going +// to slide forward in the event of an 8 bit displacement. +// +__emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ) { -public: - class Entry - { - protected: - u8* (*m_emit)( u8* emitTo, u8* label_target, int cc ); // callback for the instruction to emit (cc = comparison type) - u8* m_base; // base address of the instruction (passed to the instruction) - int m_cc; // comparison type of the instruction - - public: - explicit Entry( int cc ) : - m_base( x86Ptr ) - , m_writebackpos( writebackidx ) - { - } + // Calculate the potential j8 displacement first, assuming an instruction length of 2: + sptr displacement8 = (sptr)target - ((sptr)iGetPtr() + 2); - void Commit( const u8* target ) const - { - //uptr reltarget = (uptr)m_base - (uptr)target; - //*((u32*)&m_base[m_writebackpos]) = reltarget; - jASSUME( m_emit != NULL ); - jASSUME( m_base != NULL ); - return m_emit( m_base, target, m_cc ); - } - }; + const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0; + displacement8 -= slideVal; -protected: - u8* m_target; // x86Ptr target address of this label - Entry m_writebacks[8]; - int m_writeback_curpos; - -public: - // creates a label list with no valid target. - // Use x86LabelList::Set() to set a target prior to class destruction. - x86Label() : m_target() - { - } - - x86Label( EmitPtrCache& src ) : m_target( src.GetPtr() ) - { - } + // if the following assert fails it means we accidentally used slideForard on a backward + // jump (which is an invalid operation since there's nothing to slide forward). + if( slideForward ) jASSUME( displacement8 >= 0 ); - // Performs all address writebacks on destruction. - virtual ~x86Label() + if( is_s8( displacement8 ) ) { - IssueWritebacks(); + iWrite( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) ); + iWrite( displacement8 ); } + else + { + // Perform a 32 bit jump instead. :( - void SetTarget() { m_address = x86Ptr; } - void SetTarget( void* addr ) { m_address = (u8*)addr; } - - void Clear() - { - m_writeback_curpos = 0; - } - - // Adds a jump/call instruction to this label for writebacks. - void AddWriteback( void* emit_addr, u8* (*instruction)(), int cc ) - { - jASSUME( m_writeback_curpos < MaxWritebacks ); - m_writebacks[m_writeback_curpos] = Entry( (u8*)instruction, addrpart ) ); - m_writeback_curpos++; - } - - void IssueWritebacks() const - { - const std::list::const_iterator& start = m_list_writebacks. - for( ; start!=end; start++ ) + if( comparison == Jcc_Unconditional ) + iWrite( 0xe9 ); + else { - Entry& current = *start; - u8* donespot = current.Commit(); - - // Copy the data from the m_nextinst to the current location, - // and update any additional writebacks (but what about multiple labels?!?) - + iWrite( 0x0f ); + iWrite( 0x80 | comparison ); } + iWrite( (sptr)target - ((sptr)iGetPtr() + 4) ); } -}; -#endif - -void JMP( x86Label& dest ) -{ - dest.AddWriteback( x86Ptr, emitJMP, 0 ); } -void JLE( x86Label& dest ) +__emitinline void iJcc( JccComparisonType comparison, void* target ) { - dest.AddWriteback( x86Ptr, emitJCC, 0 ); + iJccKnownTarget( comparison, target ); } -void x86SetJ8( u8* j8 ) -{ - u32 jump = ( x86Ptr - j8 ) - 1; - - if ( jump > 0x7f ) { - Console::Error( "j8 greater than 0x7f!!" ); - assert(0); - } - *j8 = (u8)jump; -} - -void x86SetJ8A( u8* j8 ) -{ - u32 jump = ( x86Ptr - j8 ) - 1; - - if ( jump > 0x7f ) { - Console::Error( "j8 greater than 0x7f!!" ); - assert(0); - } - - if( ((uptr)x86Ptr&0xf) > 4 ) { - - uptr newjump = jump + 16-((uptr)x86Ptr&0xf); - - if( newjump <= 0x7f ) { - jump = newjump; - while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; - } - } - *j8 = (u8)jump; -} - -emitterT void x86SetJ32( u32* j32 ) -{ - *j32 = ( x86Ptr - (u8*)j32 ) - 4; -} - -emitterT void x86SetJ32A( u32* j32 ) -{ - while((uptr)x86Ptr&0xf) *x86Ptr++ = 0x90; - x86SetJ32(j32); -} - -emitterT void x86Align( int bytes ) -{ - // forward align - x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~( bytes - 1 ) ); -} -*/ +} \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 722aba5089..f89443112c 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -57,27 +57,29 @@ static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86In return ModSibStrict( x86IndexReg(src2), x86IndexReg(src1) ); } +////////////////////////////////////////////////////////////////////////////////////////// +// #define DEFINE_LEGACY_HELPER( cod, bits ) \ - emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { cod( _reghlp(to), _reghlp(from) ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { cod( _reghlp(to), imm ); } \ - emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { cod( _reghlp(to), (void*)from ); } \ - emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { cod( (void*)to, _reghlp(from) ); } \ - emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { cod( ptr##bits[to], imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { cod( _mhlp(to) + offset, imm ); } \ - emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { cod( _reghlp(to), _mhlp(from) + offset ); } \ - emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { cod( _mhlp(to) + offset, _reghlp(from) ); } \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { i##cod( _reghlp(to), _reghlp(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { i##cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { i##cod( _reghlp(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { i##cod( (void*)to, _reghlp(from) ); } \ + emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { i##cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _reghlp(to), _mhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _mhlp(to) + offset, _reghlp(from) ); } \ emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ - { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ + { i##cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \ - { cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } + { i##cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } #define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \ - emitterT void cod##bits##CLtoR( x86IntRegType to ) { cod( _reghlp(to), cl ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { cod( _reghlp(to), imm ); } \ - emitterT void cod##bits##CLtoM( uptr to ) { cod( ptr##bits[to], cl ); } \ - emitterT void cod##bits##ItoM( uptr to, u8 imm ) { cod( ptr##bits[to], imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { cod( _mhlp(to) + offset, imm ); } \ - emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { cod( _mhlp(to) + offset, cl ); } + emitterT void cod##bits##CLtoR( x86IntRegType to ) { i##cod( _reghlp(to), cl ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { i##cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##CLtoM( uptr to ) { i##cod( ptr##bits[to], cl ); } \ + emitterT void cod##bits##ItoM( uptr to, u8 imm ) { i##cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { i##cod( _mhlp(to) + offset, cl ); } //emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ // { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ @@ -113,32 +115,44 @@ DEFINE_OPCODE_SHIFT_LEGACY( SAR ) DEFINE_OPCODE_LEGACY( MOV ) +// ------------------------------------------------------------------------ +#define DEFINE_LEGACY_MOVEXTEND( form, srcbits ) \ + emitterT void MOV##form##X32R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form##X( x86Register32( to ), x86Register##srcbits( from ) ); } \ + emitterT void MOV##form##X32Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \ + emitterT void MOV##form##X32M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[from] ); } + +DEFINE_LEGACY_MOVEXTEND( S, 16 ) +DEFINE_LEGACY_MOVEXTEND( Z, 16 ) +DEFINE_LEGACY_MOVEXTEND( S, 8 ) +DEFINE_LEGACY_MOVEXTEND( Z, 8 ) + + // mov r32 to [r32<(to), from ); + iAND( _reghlp<4>(to), from ); } emitterT void AND32I8toM( uptr to, s8 from ) { - AND( ptr8[to], from ); + iAND( ptr8[to], from ); } @@ -310,103 +324,6 @@ emitterT void NOP( void ) write8(0x90); } - -/* movsx r8 to r32 */ -emitterT void MOVSX32R8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBE0F ); - ModRM( 3, to, from ); -} - -emitterT void MOVSX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xBE0F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movsx m8 to r32 */ -emitterT void MOVSX32M8toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xBE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movsx r16 to r32 */ -emitterT void MOVSX32R16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - ModRM( 3, to, from ); -} - -emitterT void MOVSX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xBF0F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movsx m16 to r32 */ -emitterT void MOVSX32M16toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xBF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movzx r8 to r32 */ -emitterT void MOVZX32R8toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - ModRM( 3, to, from ); -} - -emitterT void MOVZX32Rm8toR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xB60F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movzx m8 to r32 */ -emitterT void MOVZX32M8toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xB60F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* movzx r16 to r32 */ -emitterT void MOVZX32R16toR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - ModRM( 3, to, from ); -} - -emitterT void MOVZX32Rm16toR( x86IntRegType to, x86IntRegType from, int offset ) -{ - RexRB(0,to,from); - write16( 0xB70F ); - WriteRmOffsetFrom(to,from,offset); -} - -/* movzx m16 to r32 */ -emitterT void MOVZX32M16toR( x86IntRegType to, u32 from ) -{ - RexR(0,to); - write16( 0xB70F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - /* cmovbe r32 to r32 */ emitterT void CMOVBE32RtoR( x86IntRegType to, x86IntRegType from ) { diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index b1497dd139..bba16e87ed 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -18,9 +18,8 @@ #pragma once -extern void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs +extern void cpudetectInit();//this is all that needs to be called and will fill up the below structs -typedef struct CAPABILITIES CAPABILITIES; //cpu capabilities structure struct CAPABILITIES { u32 hasFloatingPointUnit; @@ -137,9 +136,9 @@ namespace x86Emitter // single-line functions anyway. // #ifdef PCSX2_DEVBUILD -#define __emitinline +# define __emitinline #else -#define __emitinline __forceinline +# define __emitinline __forceinline #endif #ifdef _MSC_VER @@ -148,13 +147,18 @@ namespace x86Emitter # define __noinline #endif - static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) class x86AddressInfo; class ModSibBase; + extern void iSetPtr( void* ptr ); + extern u8* iGetPtr(); + extern void iAlignPtr( uint bytes ); + extern void iAdvancePtr( uint bytes ); + + static __forceinline void write8( u8 val ) { iWrite( val ); @@ -195,7 +199,7 @@ namespace x86Emitter x86Register(): Id( -1 ) {} explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } - bool IsEmpty() const { return Id == -1; } + bool IsEmpty() const { return Id < 0; } // Returns true if the register is a valid accumulator: Eax, Ax, Al. bool IsAccumulator() const { return Id == 0; } @@ -220,7 +224,7 @@ namespace x86Emitter // ------------------------------------------------------------------------ // Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which // means it finds undeclared variables when MSVC does not (Since MSVC compiles templates - // when they are actually used). In practice this sucks since it means we have to move all' + // when they are actually used). In practice this sucks since it means we have to move all // our variable and function prototypes from a nicely/neatly unified location to being strewn // all about the the templated code in haphazard fashion. Yay.. >_< // @@ -476,6 +480,118 @@ namespace x86Emitter extern const x86IndexerTypeExplicit<2> ptr16; extern const x86IndexerTypeExplicit<1> ptr8; + ////////////////////////////////////////////////////////////////////////////////////////// + // JccComparisonType - enumerated possibilities for inspired code branching! + // + enum JccComparisonType + { + Jcc_Unknown = -2, + Jcc_Unconditional = -1, + Jcc_Overflow = 0x0, + Jcc_NotOverflow = 0x1, + Jcc_Below = 0x2, + Jcc_Carry = 0x2, + Jcc_AboveOrEqual = 0x3, + Jcc_NotCarry = 0x3, + Jcc_Zero = 0x4, + Jcc_Equal = 0x4, + Jcc_NotZero = 0x5, + Jcc_NotEqual = 0x5, + Jcc_BelowOrEqual = 0x6, + Jcc_Above = 0x7, + Jcc_Signed = 0x8, + Jcc_Unsigned = 0x9, + Jcc_ParityEven = 0xa, + Jcc_ParityOdd = 0xb, + Jcc_Less = 0xc, + Jcc_GreaterOrEqual = 0xd, + Jcc_LessOrEqual = 0xe, + Jcc_Greater = 0xf, + }; + + // Not supported yet: + //E3 cb JECXZ rel8 Jump short if ECX register is 0. + + + ////////////////////////////////////////////////////////////////////////////////////////// + // iSmartJump + // This class provides an interface for generating forward-based j8's or j32's "smartly" + // as per the measured displacement distance. If the displacement is a valid s8, then + // a j8 is inserted, else a j32. + // + // Performance Analysis: j8's use 4 less byes per opcode, and thus can provide + // minor speed benefits in the form of L1/L2 cache clutter. They're also notably faster + // on P4's, and mildly faster on AMDs. (Core2's and i7's don't care) + // + class iSmartJump + { + protected: + u8* m_target; // x86Ptr target address of this label + u8* m_baseptr; // base address of the instruction (passed to the instruction emitter) + JccComparisonType m_cc; // comparison type of the instruction + bool m_written; // set true when the jump is written (at which point the object becomes invalid) + + public: + + const int GetMaxInstructionSize() const + { + jASSUME( m_cc != Jcc_Unknown ); + return ( m_cc == Jcc_Unconditional ) ? 5 : 6; + } + + // Creates a backward jump label which will be passed into a Jxx instruction (or few!) + // later on, and the current x86Ptr is recorded as the target [thus making the class + // creation point the jump target]. + iSmartJump() + { + m_target = iGetPtr(); + m_baseptr = NULL; + m_cc = Jcc_Unknown; + m_written = false; + } + + // ccType - Comparison type to be written back to the jump instruction position. + // + iSmartJump( JccComparisonType ccType ) + { + jASSUME( ccType != Jcc_Unknown ); + m_target = NULL; + m_baseptr = iGetPtr(); + m_cc = ccType; + m_written = false; + iAdvancePtr( GetMaxInstructionSize() ); + } + + JccComparisonType GetCondition() const + { + return m_cc; + } + + u8* GetTarget() const + { + return m_target; + } + + void SetTarget(); + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + template< typename OperandType > + class iForwardJump + { + public: + static const uint OperandSize = sizeof( OperandType ); + + // pointer to base of the instruction *Following* the jump. The jump address will be + // relative to this address. + s8* const BasePtr; + + public: + iForwardJump( JccComparisonType cctype = Jcc_Unconditional ); + void SetTarget() const; + }; + ////////////////////////////////////////////////////////////////////////////////////////// // namespace Internal @@ -678,13 +794,6 @@ namespace x86Emitter } }; - // if the immediate is zero, we can replace the instruction, or ignore it - // entirely, depending on the instruction being issued. That's what we do here. - // (returns FALSE if no optimization is performed) - // [TODO] : Work-in-progress! - //template< G1Type InstType, typename RegType > - //static __forceinline void _optimize_imm0( RegType to ); - // ------------------------------------------------------------------- // template< G1Type InstType > @@ -789,7 +898,6 @@ namespace x86Emitter __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } - Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. }; @@ -799,22 +907,85 @@ namespace x86Emitter // importing Internal into x86Emitter, which done at the header file level would defeat // the purpose!) - extern const Group1ImplAll ADD; - extern const Group1ImplAll OR; - extern const Group1ImplAll ADC; - extern const Group1ImplAll SBB; - extern const Group1ImplAll AND; - extern const Group1ImplAll SUB; - extern const Group1ImplAll XOR; - extern const Group1ImplAll CMP; + extern const Group1ImplAll iADD; + extern const Group1ImplAll iOR; + extern const Group1ImplAll iADC; + extern const Group1ImplAll iSBB; + extern const Group1ImplAll iAND; + extern const Group1ImplAll iSUB; + extern const Group1ImplAll iXOR; + extern const Group1ImplAll iCMP; - extern const Group2ImplAll ROL; - extern const Group2ImplAll ROR; - extern const Group2ImplAll RCL; - extern const Group2ImplAll RCR; - extern const Group2ImplAll SHL; - extern const Group2ImplAll SHR; - extern const Group2ImplAll SAR; + extern const Group2ImplAll iROL; + extern const Group2ImplAll iROR; + extern const Group2ImplAll iRCL; + extern const Group2ImplAll iRCR; + extern const Group2ImplAll iSHL; + extern const Group2ImplAll iSHR; + extern const Group2ImplAll iSAR; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Mov with sign/zero extension implementations: + // + template< int DestOperandSize, int SrcOperandSize > + class MovExtendImpl + { + protected: + static bool Is8BitOperand() { return SrcOperandSize == 1; } + static void prefix16() { if( DestOperandSize == 2 ) iWrite( 0x66 ); } + static __forceinline void emit_base( bool SignExtend ) + { + prefix16(); + iWrite( 0x0f ); + iWrite( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) ); + } + + public: + MovExtendImpl() {} // For the love of GCC. + + static __emitinline void Emit( const x86Register& to, const x86Register& from, bool SignExtend ) + { + emit_base( SignExtend ); + ModRM( 3, from.Id, to.Id ); + } + + static __emitinline void Emit( const x86Register& to, const ModSibStrict& sibsrc, bool SignExtend ) + { + emit_base( SignExtend ); + EmitSibMagic( to.Id, sibsrc ); + } + }; + + // ------------------------------------------------------------------------ + template< bool SignExtend > + class MovExtendImplAll + { + protected: + typedef MovExtendImpl<4, 2> m_16to32; + typedef MovExtendImpl<4, 1> m_8to32; + + public: + __forceinline void operator()( const x86Register32& to, const x86Register16& from ) const { m_16to32::Emit( to, from, SignExtend ); } + __noinline void operator()( const x86Register32& to, const ModSibStrict<2>& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); } + + __forceinline void operator()( const x86Register32& to, const x86Register8& from ) const { m_8to32::Emit( to, from, SignExtend ); } + __noinline void operator()( const x86Register32& to, const ModSibStrict<1>& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); } + + MovExtendImplAll() {} // don't ask. + }; + + // ------------------------------------------------------------------------ + + extern const MovExtendImplAll iMOVSX; + extern const MovExtendImplAll iMOVZX; + + + // if the immediate is zero, we can replace the instruction, or ignore it + // entirely, depending on the instruction being issued. That's what we do here. + // (returns FALSE if no optimization is performed) + // [TODO] : Work-in-progress! + //template< G1Type InstType, typename RegType > + //static __forceinline void _optimize_imm0( RegType to ); /*template< G1Type InstType, typename RegType > static __forceinline void _optimize_imm0( const RegType& to ) @@ -822,26 +993,26 @@ namespace x86Emitter switch( InstType ) { // ADD, SUB, and OR can be ignored if the imm is zero.. - case G1Type_ADD: - case G1Type_SUB: - case G1Type_OR: - return true; + case G1Type_ADD: + case G1Type_SUB: + case G1Type_OR: + return true; // ADC and SBB can never be ignored (could have carry bits) // XOR behavior is distinct as well [or is it the same as NEG or NOT?] - case G1Type_ADC: - case G1Type_SBB: - case G1Type_XOR: - return false; + case G1Type_ADC: + case G1Type_SBB: + case G1Type_XOR: + return false; // replace AND with XOR (or SUB works too.. whatever!) - case G1Type_AND: - XOR( to, to ); + case G1Type_AND: + iXOR( to, to ); return true; // replace CMP with OR reg,reg: - case G1Type_CMP: - OR( to, to ); + case G1Type_CMP: + iOR( to, to ); return true; jNO_DEFAULT From fca65ba2ef35517409f15a3dc42acb2c253f3949 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 15 Apr 2009 17:22:34 +0000 Subject: [PATCH 068/143] Add an include so things compile. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@983 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/HwWrite.cpp | 56 ++++++++++++++++----------------- pcsx2/x86/ix86/ix86_inlines.inl | 2 ++ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/pcsx2/HwWrite.cpp b/pcsx2/HwWrite.cpp index 9c56df9e6d..3739c23619 100644 --- a/pcsx2/HwWrite.cpp +++ b/pcsx2/HwWrite.cpp @@ -403,26 +403,26 @@ __forceinline void hwWrite16(u32 mem, u16 value) psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write DMA2_MADR 32bit at %x with value %x",mem,value); break; - case 0x1000a020: - psHu16(mem) = value;//dma2 qwc - HW_LOG("Hardware write DMA2_QWC 32bit at %x with value %x",mem,value); - break; - case 0x1000a030: - psHu16(mem) = value;//dma2 taddr - HW_LOG("Hardware write DMA2_TADDR 32bit at %x with value %x",mem,value); - break; - case 0x1000a040: - psHu16(mem) = value;//dma2 asr0 - HW_LOG("Hardware write DMA2_ASR0 32bit at %x with value %x",mem,value); - break; - case 0x1000a050: - psHu16(mem) = value;//dma2 asr1 - HW_LOG("Hardware write DMA2_ASR1 32bit at %x with value %x",mem,value); - break; - case 0x1000a080: - psHu16(mem) = value;//dma2 saddr - HW_LOG("Hardware write DMA2_SADDR 32bit at %x with value %x",mem,value); - break; + case 0x1000a020: + psHu16(mem) = value;//dma2 qwc + HW_LOG("Hardware write DMA2_QWC 32bit at %x with value %x",mem,value); + break; + case 0x1000a030: + psHu16(mem) = value;//dma2 taddr + HW_LOG("Hardware write DMA2_TADDR 32bit at %x with value %x",mem,value); + break; + case 0x1000a040: + psHu16(mem) = value;//dma2 asr0 + HW_LOG("Hardware write DMA2_ASR0 32bit at %x with value %x",mem,value); + break; + case 0x1000a050: + psHu16(mem) = value;//dma2 asr1 + HW_LOG("Hardware write DMA2_ASR1 32bit at %x with value %x",mem,value); + break; + case 0x1000a080: + psHu16(mem) = value;//dma2 saddr + HW_LOG("Hardware write DMA2_SADDR 32bit at %x with value %x",mem,value); + break; #endif case 0x1000b000: // dma3 - fromIPU @@ -441,7 +441,7 @@ __forceinline void hwWrite16(u32 mem, u16 value) HW_LOG("Hardware write IPU0DMA_MADR 32bit at %x with value %x",mem,value); break; case 0x1000b020: - psHu16(mem) = value;//dma2 madr + psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write IPU0DMA_QWC 32bit at %x with value %x",mem,value); break; case 0x1000b030: @@ -466,11 +466,11 @@ __forceinline void hwWrite16(u32 mem, u16 value) #ifdef PCSX2_DEVBUILD case 0x1000b410: - psHu16(mem) = value;//dma2 madr + psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write IPU1DMA_MADR 32bit at %x with value %x",mem,value); break; case 0x1000b420: - psHu16(mem) = value;//dma2 madr + psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write IPU1DMA_QWC 32bit at %x with value %x",mem,value); break; case 0x1000b430: @@ -883,11 +883,11 @@ void __fastcall hwWrite32_generic( u32 mem, u32 value ) return; case D2_MADR: regName = "GIFdma MADR"; break; - case D2_QWC: regName = "GIFdma QWC"; break; - case D2_TADR: regName = "GIFdma TADDR"; break; - case D2_ASR0: regName = "GIFdma ASR0"; break; - case D2_ASR1: regName = "GIFdma ASR1"; break; - case D2_SADR: regName = "GIFdma SADDR"; break; + case D2_QWC: regName = "GIFdma QWC"; break; + case D2_TADR: regName = "GIFdma TADDR"; break; + case D2_ASR0: regName = "GIFdma ASR0"; break; + case D2_ASR1: regName = "GIFdma ASR1"; break; + case D2_SADR: regName = "GIFdma SADDR"; break; //------------------------------------------------------------------ case 0x1000c000: // dma5 - sif0 diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl index 3fe2ec54fb..a101a3e386 100644 --- a/pcsx2/x86/ix86/ix86_inlines.inl +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -46,6 +46,8 @@ // global optimization fails to resolve the externals and junk. // (score one for MSVC!) +#include "System.h" + namespace x86Emitter { ////////////////////////////////////////////////////////////////////////////////////////// From 4d2adcae9f4634f0c212a97846edea1554a5e03e Mon Sep 17 00:00:00 2001 From: sudonim1 Date: Wed, 15 Apr 2009 20:31:58 +0000 Subject: [PATCH 069/143] New speed hack mainly targeting 3D geometry. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@984 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 1 + pcsx2/Linux/Pref.cpp | 4 ++++ pcsx2/windows/HacksDlg.cpp | 41 +++++++++++++++++++++++++++++++++++- pcsx2/windows/ini.cpp | 3 +++ pcsx2/windows/pcsx2.rc | 5 +++++ pcsx2/windows/resource.h | 5 ++++- pcsx2/x86/iVUzerorec.cpp | 1 + 7 files changed, 58 insertions(+), 2 deletions(-) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index 2da21f9f11..cfff17a591 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -137,6 +137,7 @@ public: int Patch; int CustomFps; int Hacks; + int VUCycleHack; int GameFixes; int CustomFrameSkip; int CustomConsecutiveFrames; diff --git a/pcsx2/Linux/Pref.cpp b/pcsx2/Linux/Pref.cpp index f2097abce9..90f513353b 100644 --- a/pcsx2/Linux/Pref.cpp +++ b/pcsx2/Linux/Pref.cpp @@ -101,6 +101,9 @@ int LoadConfig() #endif GetValuel("Options", Config.Options); GetValuel("Hacks", Config.Hacks); + GetValuel("VUCycleHack", Config.VUCycleHack); + if (Config.VUCycleHack < 0 || Config.VUCycleHack > 4) + Config.VUCycleHack = 0; GetValuel("Fixes", Config.GameFixes); GetValuel("CustomFps", Config.CustomFps); @@ -163,6 +166,7 @@ void SaveConfig() SetValuel("Options", Config.Options); SetValuel("Hacks", Config.Hacks); + SetValuel("VUCycleHack", Config.VUCycleHack); SetValuel("Fixes", Config.GameFixes); SetValuel("Patch", Config.Patch); diff --git a/pcsx2/windows/HacksDlg.cpp b/pcsx2/windows/HacksDlg.cpp index 045705f6bd..3bead95237 100644 --- a/pcsx2/windows/HacksDlg.cpp +++ b/pcsx2/windows/HacksDlg.cpp @@ -19,6 +19,23 @@ #include "win32.h" +static _TCHAR *VUCycleHackLevels[] = { + _T("Speedup for 3D games.\nCurrently off"), + _T("Slight speedup for 3D geometry, should work with most games."), + _T("Moderate speedup for 3D geometry, should work with most games with minor problems."), + _T("Large speedup for 3D geometry, may break many games and make others skip frames."), + _T("Very large speedup for 3D geometry, will break games in interesting ways."), +}; + +static void CheckVUCycleHack(HWND hDlg, int &vucyclehack) +{ + if (vucyclehack < 0 || vucyclehack > 4) { + vucyclehack = 0; + SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_SETPOS, TRUE, vucyclehack); + } + SetDlgItemText(hDlg, IDC_VUCYCLEDESC, VUCycleHackLevels[vucyclehack]); +} + BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { @@ -31,8 +48,26 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) if(CHECK_INTC_STAT_HACK) CheckDlgButton(hDlg, IDC_INTCSTATHACK, TRUE); if(CHECK_ESCAPE_HACK) CheckDlgButton(hDlg, IDC_ESCHACK, TRUE); + SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_SETRANGE, TRUE, MAKELONG(0, 4)); + CheckVUCycleHack(hDlg, Config.VUCycleHack); + SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_SETPOS, TRUE, Config.VUCycleHack); + return TRUE; + case WM_HSCROLL: { + HWND slider = (HWND)lParam; + int curpos = HIWORD(wParam); + switch (LOWORD(wParam)) { + case TB_THUMBTRACK: + case TB_THUMBPOSITION: + break; + default: + curpos = SendMessage(slider, TBM_GETPOS, 0, 0); + } + CheckVUCycleHack(hDlg, curpos); + return FALSE; + } + case WM_COMMAND: switch (LOWORD(wParam)) { @@ -53,12 +88,16 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) newhacks |= IsDlgButtonChecked(hDlg, IDC_INTCSTATHACK) << 5; newhacks |= IsDlgButtonChecked(hDlg, IDC_ESCHACK) << 10; + int newvucyclehack = SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_GETPOS, 0, 0); + CheckVUCycleHack(hDlg, newvucyclehack); + EndDialog(hDlg, TRUE); - if( newhacks != Config.Hacks ) + if( newhacks != Config.Hacks || newvucyclehack != Config.VUCycleHack) { SysRestorableReset(); Config.Hacks = newhacks; + Config.VUCycleHack = newvucyclehack; SaveConfig(); } } diff --git a/pcsx2/windows/ini.cpp b/pcsx2/windows/ini.cpp index 7fe4942aea..6a1016afef 100644 --- a/pcsx2/windows/ini.cpp +++ b/pcsx2/windows/ini.cpp @@ -223,6 +223,9 @@ void IniFile::DoConfig( PcsxConfig& Conf ) Entry( "eeOptions", Conf.eeOptions, DEFAULT_eeOptions ); Entry( "vuOptions", Conf.vuOptions, DEFAULT_vuOptions ); Entry( "SpeedHacks", Conf.Hacks ); + Entry( "VUCycleHack", Conf.VUCycleHack, 0 ); + if (Conf.VUCycleHack < 0 || Conf.VUCycleHack > 4) + Conf.VUCycleHack = 0; } ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/windows/pcsx2.rc b/pcsx2/windows/pcsx2.rc index 63316593a2..65623bd61d 100644 --- a/pcsx2/windows/pcsx2.rc +++ b/pcsx2/windows/pcsx2.rc @@ -371,6 +371,9 @@ BEGIN LTEXT "Moderate speedup and works well with most games.",IDC_STATIC,25,90,129,19 CONTROL "INTC Sync Hack (experimental)",IDC_INTCSTATHACK,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,174,43,127,10 LTEXT "Huge speedup in many games, and a pretty high compatibility rate (some games still work better with EE sync hacks).",IDC_STATIC,186,55,140,28 + CONTROL "",IDC_VUCYCLE,"msctls_trackbar32",TBS_BOTH | TBS_NOTICKS | WS_TABSTOP,174,176,36,15 + LTEXT "This space intentionally left blank",IDC_VUCYCLEDESC,186,194,142,30 + LTEXT "VU Cycle Stealing (experimental)",IDC_STATIC,210,180,105,8 END @@ -393,6 +396,8 @@ BEGIN LEFTMARGIN, 7 RIGHTMARGIN, 328 VERTGUIDE, 13 + VERTGUIDE, 174 + VERTGUIDE, 186 TOPMARGIN, 7 BOTTOMMARGIN, 256 END diff --git a/pcsx2/windows/resource.h b/pcsx2/windows/resource.h index 6e9f1ab34c..24e02c3816 100644 --- a/pcsx2/windows/resource.h +++ b/pcsx2/windows/resource.h @@ -272,6 +272,9 @@ #define IDC_MCD_LABEL2 1325 #define IDC_INTCSTATHACK 1326 #define IDC_EE_CHECK3 1327 +#define IDC_SLIDER1 1327 +#define IDC_VUCYCLE 1327 +#define IDC_VUCYCLEDESC 1328 #define IDC_CPULOG 1500 #define IDC_MEMLOG 1501 #define IDC_HWLOG 1502 @@ -405,7 +408,7 @@ #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 141 #define _APS_NEXT_COMMAND_VALUE 40018 -#define _APS_NEXT_CONTROL_VALUE 1326 +#define _APS_NEXT_CONTROL_VALUE 1329 #define _APS_NEXT_SYMED_VALUE 104 #endif #endif diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index ce9549be0c..d2fbdf1e17 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -2294,6 +2294,7 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex) VU = vuindex ? &VU1 : &VU0; VU->cycle += s_TotalVUCycles; + cpuRegs.cycle += s_TotalVUCycles * Config.VUCycleHack; if( (int)s_writeQ > 0 ) VU->VI[REG_Q] = VU->q; if( (int)s_writeP > 0 ) { assert(VU == &VU1); From f228a91c939ce62050fd97dd2a7c9ea336d16086 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Wed, 15 Apr 2009 21:00:32 +0000 Subject: [PATCH 070/143] Added CMOV to the emitter, renamed x86Struct stuff to iStruct, renamed XMMREGS / X86REGS / MMXREGS defines to iRegCnt_XMM / iRegCnt_GPR / iRegCnt_MMX, and undid a couple u32 optimizations which could have caused unexpected behavior in the future, if we ever decided to employ some particularly obscure case of self-modifying code. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@985 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 12 + pcsx2/x86/iCore.cpp | 84 +-- pcsx2/x86/iCore.h | 6 +- pcsx2/x86/iVUmicroUpper.cpp | 2 +- pcsx2/x86/iVUzerorec.cpp | 30 +- pcsx2/x86/ix86-32/iCore-32.cpp | 68 +-- pcsx2/x86/ix86-32/iR5900-32.cpp | 14 +- pcsx2/x86/ix86/ix86.cpp | 261 +++++---- pcsx2/x86/ix86/ix86.h | 9 - pcsx2/x86/ix86/ix86_impl_group1.h | 179 +++++++ pcsx2/x86/ix86/ix86_impl_group2.h | 151 ++++++ pcsx2/x86/ix86/ix86_impl_movs.h | 157 ++++++ pcsx2/x86/ix86/ix86_inlines.inl | 26 +- pcsx2/x86/ix86/ix86_instructions.h | 94 ++-- pcsx2/x86/ix86/ix86_legacy.cpp | 298 ++--------- pcsx2/x86/ix86/ix86_legacy_internal.h | 11 +- pcsx2/x86/ix86/ix86_sse.cpp | 20 +- pcsx2/x86/ix86/ix86_tools.cpp | 2 +- pcsx2/x86/ix86/ix86_types.h | 592 +++++---------------- 19 files changed, 1014 insertions(+), 1002 deletions(-) create mode 100644 pcsx2/x86/ix86/ix86_impl_group1.h create mode 100644 pcsx2/x86/ix86/ix86_impl_group2.h create mode 100644 pcsx2/x86/ix86/ix86_impl_movs.h diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index fc01b41d0c..f238a8c12e 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2925,6 +2925,18 @@ RelativePath="..\..\x86\ix86\ix86_fpu.cpp" > + + + + + + diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 0ce3a2c5b5..4cb6dc6bc6 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -40,10 +40,10 @@ u32 g_recWriteback = 0; char g_globalXMMLocked = 0; #endif -_xmmregs xmmregs[XMMREGS], s_saveXMMregs[XMMREGS]; +_xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM]; // X86 caching -_x86regs x86regs[X86REGS], s_saveX86regs[X86REGS]; +_x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR]; #include using namespace std; @@ -119,16 +119,16 @@ int _getFreeXMMreg() int i, tempi; u32 bestcount = 0x10000; - for (i=0; iregs[xmmregs[i].reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)) ) { @@ -139,7 +139,7 @@ int _getFreeXMMreg() } // check for future xmm usage - for (i=0; iregs[xmmregs[i].reg] & EEINST_XMM) ) { @@ -151,7 +151,7 @@ int _getFreeXMMreg() tempi = -1; bestcount = 0xffff; - for (i=0; iregs[xmmregs[i].reg]&EEINST_USED) ) { @@ -951,12 +951,12 @@ void _moveXMMreg(int xmmreg) int i; if( !xmmregs[xmmreg].inuse ) return; - for (i=0; i& blocks, int vuindex) if( (*itblock)->nStartx86 >= 0 ) { pregs = &s_vecRegArray[(*itblock)->nStartx86]; fprintf(f, "STR: "); - for(i = 0; i < X86REGS; ++i) { + for(i = 0; i < iREGCNT_GPR; ++i) { if( pregs[i].inuse ) fprintf(f, "%.2d ", pregs[i].reg); else fprintf(f, "-1 "); } @@ -581,7 +581,7 @@ void SuperVUDumpBlock(list& blocks, int vuindex) if( (*itblock)->nEndx86 >= 0 ) { fprintf(f, "END: "); pregs = &s_vecRegArray[(*itblock)->nEndx86]; - for(i = 0; i < X86REGS; ++i) { + for(i = 0; i < iREGCNT_GPR; ++i) { if( pregs[i].inuse ) fprintf(f, "%.2d ", pregs[i].reg); else fprintf(f, "-1 "); } @@ -1879,14 +1879,14 @@ void VuBaseBlock::AssignVFRegs() if( type & BLOCKTYPE_ANALYZED ) { // check if changed - for(i = 0; i < XMMREGS; ++i) { + for(i = 0; i < iREGCNT_XMM; ++i) { if( xmmregs[i].inuse != startregs[i].inuse ) break; if( xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type) ) break; } - if( i == XMMREGS ) return; // nothing changed + if( i == iREGCNT_XMM ) return; // nothing changed } u8* oldX86 = x86Ptr; @@ -1904,7 +1904,7 @@ void VuBaseBlock::AssignVFRegs() // redo the counters so that the proper regs are released - for(int j = 0; j < XMMREGS; ++j) { + for(int j = 0; j < iREGCNT_XMM; ++j) { if( xmmregs[j].inuse ) { if( xmmregs[j].type == XMMTYPE_VFREG ) { int count = 0; @@ -2119,10 +2119,10 @@ void VuBaseBlock::AssignVIRegs(int parent) // child assert( allocX86Regs == -1 ); allocX86Regs = s_vecRegArray.size(); - s_vecRegArray.resize(allocX86Regs+X86REGS); + s_vecRegArray.resize(allocX86Regs+iREGCNT_GPR); _x86regs* pregs = &s_vecRegArray[allocX86Regs]; - memset(pregs, 0, sizeof(_x86regs)*X86REGS); + memset(pregs, 0, sizeof(_x86regs)*iREGCNT_GPR); assert( parents.size() > 0 ); @@ -2210,10 +2210,10 @@ static void SuperVUAssignRegs() // assign the regs int regid = s_vecRegArray.size(); - s_vecRegArray.resize(regid+X86REGS); + s_vecRegArray.resize(regid+iREGCNT_GPR); _x86regs* mergedx86 = &s_vecRegArray[regid]; - memset(mergedx86, 0, sizeof(_x86regs)*X86REGS); + memset(mergedx86, 0, sizeof(_x86regs)*iREGCNT_GPR); if( !bfirst ) { *(u32*)usedregs = *((u32*)usedregs+1) = *((u32*)usedregs+2) = *((u32*)usedregs+3) = 0; @@ -2221,7 +2221,7 @@ static void SuperVUAssignRegs() FORIT(itblock2, s_markov.children) { assert( (*itblock2)->allocX86Regs >= 0 ); _x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs]; - for(int i = 0; i < X86REGS; ++i) { + for(int i = 0; i < iREGCNT_GPR; ++i) { if( pregs[i].inuse && pregs[i].reg < 16) { //assert( pregs[i].reg < 16); usedregs[pregs[i].reg]++; @@ -2237,7 +2237,7 @@ static void SuperVUAssignRegs() mergedx86[num].reg = i; mergedx86[num].type = (s_vu?X86TYPE_VU1:0)|X86TYPE_VI; mergedx86[num].mode = MODE_READ; - if( ++num >= X86REGS ) + if( ++num >= iREGCNT_GPR ) break; if( num == ESP ) ++num; @@ -2559,7 +2559,7 @@ void svudispfntemp() // frees all regs taking into account the livevars void SuperVUFreeXMMregs(u32* livevars) { - for(int i = 0; i < XMMREGS; ++i) { + for(int i = 0; i < iREGCNT_XMM; ++i) { if( xmmregs[i].inuse ) { // same reg if( (xmmregs[i].mode & MODE_WRITE) ) { @@ -2772,7 +2772,7 @@ void VuBaseBlock::Recompile() #ifdef SUPERVU_X86CACHING if( nEndx86 >= 0 ) { _x86regs* endx86 = &s_vecRegArray[nEndx86]; - for(int i = 0; i < X86REGS; ++i) { + for(int i = 0; i < iREGCNT_GPR; ++i) { if( endx86[i].inuse ) { if( s_JumpX86 == i && x86regs[s_JumpX86].inuse ) { @@ -3239,7 +3239,7 @@ void VuInstruction::Recompile(list::iterator& itinst, u32 vuxyz) #ifdef SUPERVU_X86CACHING // redo the counters so that the proper regs are released - for(int j = 0; j < X86REGS; ++j) { + for(int j = 0; j < iREGCNT_GPR; ++j) { if( x86regs[j].inuse && X86_ISVI(x86regs[j].type) ) { int count = 0; itinst2 = itinst; diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 9904f342a1..e90e051a22 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -78,16 +78,16 @@ int _getFreeX86reg(int mode) int i, tempi; u32 bestcount = 0x10000; - int maxreg = (mode&MODE_8BITREG)?4:X86REGS; + int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR; - for (i=0; i= maxreg ) continue; if( (mode&MODE_NOFRAME) && reg==EBP ) continue; if (x86regs[reg].inuse == 0) { - g_x86checknext = (reg+1)%X86REGS; + g_x86checknext = (reg+1)%iREGCNT_GPR; return reg; } } @@ -207,16 +207,16 @@ int _allocX86reg(int x86reg, int type, int reg, int mode) // don't alloc EAX and ESP,EBP if MODE_NOFRAME int oldmode = mode; int noframe = mode&MODE_NOFRAME; - int maxreg = (mode&MODE_8BITREG)?4:X86REGS; + int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR; mode &= ~(MODE_NOFRAME|MODE_8BITREG); int readfromreg = -1; if( type != X86TYPE_TEMP ) { - if( maxreg < X86REGS ) { + if( maxreg < iREGCNT_GPR ) { // make sure reg isn't in the higher regs - for(i = maxreg; i < X86REGS; ++i) { + for(i = maxreg; i < iREGCNT_GPR; ++i) { if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg) continue; if( mode & MODE_READ ) { @@ -324,7 +324,7 @@ int _checkX86reg(int type, int reg, int mode) { int i; - for (i=0; i= 0 && x86reg < X86REGS ); + assert( x86reg >= 0 && x86reg < iREGCNT_GPR ); if( x86regs[x86reg].inuse && (x86regs[x86reg].mode&MODE_WRITE) ) { x86regs[x86reg].mode &= ~MODE_WRITE; @@ -419,7 +419,7 @@ void _freeX86reg(int x86reg) void _freeX86regs() { int i; - for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { // mmxregs[i] is unsigned, and MMX_GPR == 0, so the first part is always true. if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR] & (EEINST_LIVE0|EEINST_LIVE1)) ) { @@ -483,7 +483,7 @@ int _getFreeMMXreg() } // check for future xmm usage - for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { if( !(g_pCurInstInfo->regs[mmxregs[i].reg] & EEINST_MMX) ) { @@ -493,7 +493,7 @@ int _getFreeMMXreg() } } - for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { if( !EEINST_ISLIVE64(mmxregs[i].reg-MMX_GPR) ) { @@ -721,7 +721,7 @@ u8 _hasFreeMMXreg() } // check for dead regs - for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR]&EEINST_USED) ) { @@ -735,7 +735,7 @@ u8 _hasFreeMMXreg() void _freeMMXreg(int mmxreg) { - assert( mmxreg < MMXREGS ); + assert( mmxreg < iREGCNT_MMX ); if (!mmxregs[mmxreg].inuse) return; if (mmxregs[mmxreg].mode & MODE_WRITE ) { @@ -762,12 +762,12 @@ void _moveMMXreg(int mmxreg) int i; if( !mmxregs[mmxreg].inuse ) return; - for (i=0; i 0 ) xmmregs[i].counter = 1000-count; @@ -1587,7 +1587,7 @@ StartRecomp: // see how many stores there are u32 j; // use xmmregs since only supporting lwc1,lq,swc1,sq - for(j = i+8; j < s_nEndBlock && j < i+4*XMMREGS; j += 4 ) { + for(j = i+8; j < s_nEndBlock && j < i+4*iREGCNT_XMM; j += 4 ) { u32 nncode = *(u32*)PSM(j); if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) || _eeLoadWritesRs(nncode)) @@ -1596,7 +1596,7 @@ StartRecomp: if( j > i+8 ) { u32 num = (j-i)>>2; // number of stores that can coissue - assert( num <= XMMREGS ); + assert( num <= iREGCNT_XMM ); g_pCurInstInfo[0].numpeeps = num-1; g_pCurInstInfo[0].info |= EEINSTINFO_COREC; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 1f4656d778..c1aed679ea 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -62,7 +62,7 @@ __threadlocal u8 *x86Ptr; __threadlocal u8 *j8Ptr[32]; __threadlocal u32 *j32Ptr[32]; -__threadlocal XMMSSEType g_xmmtypes[XMMREGS] = { XMMT_INT }; +__threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; namespace x86Emitter { @@ -73,22 +73,22 @@ const x86IndexerTypeExplicit<1> ptr8; // ------------------------------------------------------------------------ -template< int OperandSize > const x86Register x86Register::Empty; +template< int OperandSize > const iRegister iRegister::Empty; const x86IndexReg x86IndexReg::Empty; -const x86Register32 +const iRegister32 eax( 0 ), ebx( 3 ), ecx( 1 ), edx( 2 ), esi( 6 ), edi( 7 ), ebp( 5 ), esp( 4 ); -const x86Register16 +const iRegister16 ax( 0 ), bx( 3 ), cx( 1 ), dx( 2 ), si( 6 ), di( 7 ), bp( 5 ), sp( 4 ); -const x86Register8 +const iRegister8 al( 0 ), cl( 1 ), dl( 2 ), bl( 3 ), ah( 4 ), ch( 5 ), @@ -96,28 +96,8 @@ const x86Register8 namespace Internal { - const Group1ImplAll iADD; - const Group1ImplAll iOR; - const Group1ImplAll iADC; - const Group1ImplAll iSBB; - const Group1ImplAll iAND; - const Group1ImplAll iSUB; - const Group1ImplAll iXOR; - const Group1ImplAll iCMP; - - const Group2ImplAll iROL; - const Group2ImplAll iROR; - const Group2ImplAll iRCL; - const Group2ImplAll iRCR; - const Group2ImplAll iSHL; - const Group2ImplAll iSHR; - const Group2ImplAll iSAR; - - const MovExtendImplAll iMOVSX; - const MovExtendImplAll iMOVZX; - // Performance note: VC++ wants to use byte/word register form for the following - // ModRM/SibSB constructors if we use iWrite, and furthermore unrolls the + // ModRM/SibSB constructors when we use iWrite, and furthermore unrolls the // the shift using a series of ADDs for the following results: // add cl,cl // add cl,cl @@ -130,21 +110,38 @@ namespace Internal // register aliases and false dependencies. (although may have been ideal for early- // brand P4s with a broken barrel shifter?). The workaround is to do our own manual // x86Ptr access and update using a u32 instead of u8. Thanks to little endianness, - // the same end result is achieved and no false dependencies are generated. + // the same end result is achieved and no false dependencies are generated. The draw- + // back is that it clobbers 3 bytes past the end of the write, which could cause a + // headache for someone who himself is doing some kind of headache-inducing amount of + // recompiler SMC. So we don't do a work-around, and just hope for the compiler to + // stop sucking someday instead. :) // // (btw, I know this isn't a critical performance item by any means, but it's // annoying simply because it *should* be an easy thing to optimize) __forceinline void ModRM( uint mod, uint reg, uint rm ) { - *(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; - x86Ptr++; + iWrite( (mod << 6) | (reg << 3) | rm ); + //*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; + //x86Ptr++; } __forceinline void SibSB( u32 ss, u32 index, u32 base ) { - *(u32*)x86Ptr = (ss << 6) | (index << 3) | base; - x86Ptr++; + iWrite( (ss << 6) | (index << 3) | base ); + //*(u32*)x86Ptr = (ss << 6) | (index << 3) | base; + //x86Ptr++; + } + + __forceinline void iWriteDisp( int regfield, s32 displacement ) + { + ModRM( 0, regfield, ModRm_UseDisp32 ); + iWrite( displacement ); + } + + __forceinline void iWriteDisp( int regfield, const void* address ) + { + iWriteDisp( regfield, (s32)address ); } // ------------------------------------------------------------------------ @@ -172,7 +169,7 @@ namespace Internal // regfield - register field to be written to the ModRm. This is either a register specifier // or an opcode extension. In either case, the instruction determines the value for us. // - __forceinline void EmitSibMagic( uint regfield, const ModSibBase& info ) + void EmitSibMagic( uint regfield, const ModSibBase& info ) { jASSUME( regfield < 8 ); @@ -188,8 +185,7 @@ namespace Internal if( info.Index.IsEmpty() ) { - ModRM( 0, regfield, ModRm_UseDisp32 ); - iWrite( info.Displacement ); + iWriteDisp( regfield, info.Displacement ); return; } else @@ -227,14 +223,63 @@ namespace Internal if( displacement_size != 0 ) { - *(s32*)x86Ptr = info.Displacement; - x86Ptr += (displacement_size == 1) ? 1 : 4; + if( displacement_size == 1 ) + iWrite( info.Displacement ); + else + iWrite( info.Displacement ); } } } using namespace Internal; +const Group1ImplAll iADD; +const Group1ImplAll iOR; +const Group1ImplAll iADC; +const Group1ImplAll iSBB; +const Group1ImplAll iAND; +const Group1ImplAll iSUB; +const Group1ImplAll iXOR; +const Group1ImplAll iCMP; + +const Group2ImplAll iROL; +const Group2ImplAll iROR; +const Group2ImplAll iRCL; +const Group2ImplAll iRCR; +const Group2ImplAll iSHL; +const Group2ImplAll iSHR; +const Group2ImplAll iSAR; + +const MovExtendImplAll iMOVSX; +const MovExtendImplAll iMOVZX; + +const CMovImplGeneric iCMOV; + +const CMovImplAll iCMOVA; +const CMovImplAll iCMOVAE; +const CMovImplAll iCMOVB; +const CMovImplAll iCMOVBE; + +const CMovImplAll iCMOVG; +const CMovImplAll iCMOVGE; +const CMovImplAll iCMOVL; +const CMovImplAll iCMOVLE; + +const CMovImplAll iCMOVZ; +const CMovImplAll iCMOVE; +const CMovImplAll iCMOVNZ; +const CMovImplAll iCMOVNE; + +const CMovImplAll iCMOVO; +const CMovImplAll iCMOVNO; +const CMovImplAll iCMOVC; +const CMovImplAll iCMOVNC; + +const CMovImplAll iCMOVS; +const CMovImplAll iCMOVNS; +const CMovImplAll iCMOVPE; +const CMovImplAll iCMOVPO; + // ------------------------------------------------------------------------ // Assigns the current emitter buffer target address. // This is provided instead of using x86Ptr directly, since we may in the future find @@ -390,18 +435,20 @@ static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) if( displacement_size != 0 ) { - *(s32*)x86Ptr = src.Displacement; - x86Ptr += (displacement_size == 1) ? 1 : 4; + if( displacement_size == 1 ) + iWrite( src.Displacement ); + else + iWrite( src.Displacement ); } } -__emitinline void LEA( x86Register32 to, const ModSibBase& src, bool preserve_flags ) +__emitinline void iLEA( iRegister32 to, const ModSibBase& src, bool preserve_flags ) { EmitLeaMagic( to, src, preserve_flags ); } -__emitinline void LEA( x86Register16 to, const ModSibBase& src, bool preserve_flags ) +__emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_flags ) { write8( 0x66 ); EmitLeaMagic( to, src, preserve_flags ); @@ -410,7 +457,7 @@ __emitinline void LEA( x86Register16 to, const ModSibBase& src, bool preserve_fl ////////////////////////////////////////////////////////////////////////////////////////// // MOV instruction Implementation -template< typename ImmType, typename SibMagicType > +template< typename ImmType > class MovImpl { public: @@ -422,7 +469,7 @@ protected: public: // ------------------------------------------------------------------------ - static __forceinline void Emit( const x86Register& to, const x86Register& from ) + static __forceinline void Emit( const iRegister& to, const iRegister& from ) { if( to == from ) return; // ignore redundant MOVs. @@ -432,7 +479,7 @@ public: } // ------------------------------------------------------------------------ - static __forceinline void Emit( const ModSibBase& dest, const x86Register& from ) + static __forceinline void Emit( const ModSibBase& dest, const iRegister& from ) { prefix16(); @@ -447,12 +494,12 @@ public: else { iWrite( Is8BitOperand() ? 0x88 : 0x89 ); - SibMagicType::Emit( from.Id, dest ); + EmitSibMagic( from.Id, dest ); } } // ------------------------------------------------------------------------ - static __forceinline void Emit( const x86Register& to, const ModSibBase& src ) + static __forceinline void Emit( const iRegister& to, const ModSibBase& src ) { prefix16(); @@ -467,12 +514,50 @@ public: else { iWrite( Is8BitOperand() ? 0x8a : 0x8b ); - SibMagicType::Emit( to.Id, src ); + EmitSibMagic( to.Id, src ); } } // ------------------------------------------------------------------------ - static __forceinline void Emit( const x86Register& to, ImmType imm ) + static __forceinline void Emit( void* dest, const iRegister& from ) + { + prefix16(); + + // mov eax has a special from when writing directly to a DISP32 address + + if( from.IsAccumulator() ) + { + iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( (s32)dest ); + } + else + { + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + iWriteDisp( from.Id, dest ); + } + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const void* src ) + { + prefix16(); + + // mov eax has a special from when reading directly from a DISP32 address + + if( to.IsAccumulator() ) + { + iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( (s32)src ); + } + else + { + iWrite( Is8BitOperand() ? 0x8a : 0x8b ); + iWriteDisp( to.Id, src ); + } + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, ImmType imm ) { // Note: MOV does not have (reg16/32,imm8) forms. @@ -486,20 +571,16 @@ public: { prefix16(); iWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); - SibMagicType::Emit( 0, dest ); + EmitSibMagic( 0, dest ); iWrite( imm ); } }; namespace Internal { - typedef MovImpl MOV32; - typedef MovImpl MOV16; - typedef MovImpl MOV8; - - typedef MovImpl MOV32i; - typedef MovImpl MOV16i; - typedef MovImpl MOV8i; + typedef MovImpl MOV32; + typedef MovImpl MOV16; + typedef MovImpl MOV8; } // Inlining Notes: @@ -512,96 +593,72 @@ namespace Internal // TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D // ---------- 32 Bit Interface ----------- -__forceinline void iMOV( const x86Register32& to, const x86Register32& from ) { MOV32i::Emit( to, from ); } -__forceinline void iMOV( const x86Register32& to, const void* src ) { MOV32i::Emit( to, ptr32[src] ); } -__forceinline void iMOV( const void* dest, const x86Register32& from ) { MOV32i::Emit( ptr32[dest], from ); } -__noinline void iMOV( const ModSibBase& sibdest, const x86Register32& from ) { MOV32::Emit( sibdest, from ); } -__noinline void iMOV( const x86Register32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } +__forceinline void iMOV( const iRegister32& to, const iRegister32& from ) { MOV32::Emit( to, from ); } +__forceinline void iMOV( const iRegister32& to, const void* src ) { MOV32::Emit( to, ptr32[src] ); } +__forceinline void iMOV( void* dest, const iRegister32& from ) { MOV32::Emit( ptr32[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const iRegister32& from ) { MOV32::Emit( sibdest, from ); } +__noinline void iMOV( const iRegister32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } __noinline void iMOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } -void iMOV( const x86Register32& to, u32 imm, bool preserve_flags ) +void iMOV( const iRegister32& to, u32 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) iXOR( to, to ); else - MOV32i::Emit( to, imm ); + MOV32::Emit( to, imm ); } // ---------- 16 Bit Interface ----------- -__forceinline void iMOV( const x86Register16& to, const x86Register16& from ) { MOV16i::Emit( to, from ); } -__forceinline void iMOV( const x86Register16& to, const void* src ) { MOV16i::Emit( to, ptr16[src] ); } -__forceinline void iMOV( const void* dest, const x86Register16& from ) { MOV16i::Emit( ptr16[dest], from ); } -__noinline void iMOV( const ModSibBase& sibdest, const x86Register16& from ) { MOV16::Emit( sibdest, from ); } -__noinline void iMOV( const x86Register16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } +__forceinline void iMOV( const iRegister16& to, const iRegister16& from ) { MOV16::Emit( to, from ); } +__forceinline void iMOV( const iRegister16& to, const void* src ) { MOV16::Emit( to, ptr16[src] ); } +__forceinline void iMOV( void* dest, const iRegister16& from ) { MOV16::Emit( ptr16[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const iRegister16& from ) { MOV16::Emit( sibdest, from ); } +__noinline void iMOV( const iRegister16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } __noinline void iMOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } -void iMOV( const x86Register16& to, u16 imm, bool preserve_flags ) +void iMOV( const iRegister16& to, u16 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) iXOR( to, to ); else - MOV16i::Emit( to, imm ); + MOV16::Emit( to, imm ); } // ---------- 8 Bit Interface ----------- -__forceinline void iMOV( const x86Register8& to, const x86Register8& from ) { MOV8i::Emit( to, from ); } -__forceinline void iMOV( const x86Register8& to, const void* src ) { MOV8i::Emit( to, ptr8[src] ); } -__forceinline void iMOV( const void* dest, const x86Register8& from ) { MOV8i::Emit( ptr8[dest], from ); } -__noinline void iMOV( const ModSibBase& sibdest, const x86Register8& from ) { MOV8::Emit( sibdest, from ); } -__noinline void iMOV( const x86Register8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } +__forceinline void iMOV( const iRegister8& to, const iRegister8& from ) { MOV8::Emit( to, from ); } +__forceinline void iMOV( const iRegister8& to, const void* src ) { MOV8::Emit( to, ptr8[src] ); } +__forceinline void iMOV( void* dest, const iRegister8& from ) { MOV8::Emit( ptr8[dest], from ); } +__noinline void iMOV( const ModSibBase& sibdest, const iRegister8& from ) { MOV8::Emit( sibdest, from ); } +__noinline void iMOV( const iRegister8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } __noinline void iMOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } -void iMOV( const x86Register8& to, u8 imm, bool preserve_flags ) +void iMOV( const iRegister8& to, u8 imm, bool preserve_flags ) { if( !preserve_flags && (imm == 0) ) iXOR( to, to ); else - MOV8i::Emit( to, imm ); + MOV8::Emit( to, imm ); } -////////////////////////////////////////////////////////////////////////////////////////// -// Miscellaneous Section! -// Various Instructions with no parameter and no special encoding logic. -// -__forceinline void RET() { write8( 0xC3 ); } -__forceinline void CBW() { write16( 0x9866 ); } -__forceinline void CWD() { write8( 0x98 ); } -__forceinline void CDQ() { write8( 0x99 ); } -__forceinline void CWDE() { write8( 0x98 ); } - -__forceinline void LAHF() { write8( 0x9f ); } -__forceinline void SAHF() { write8( 0x9e ); } - - ////////////////////////////////////////////////////////////////////////////////////////// // Push / Pop Emitters // // Note: pushad/popad implementations are intentionally left out. The instructions are // invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead. - -__forceinline void POP( x86Register32 from ) { write8( 0x58 | from.Id ); } - -__emitinline void POP( const ModSibBase& from ) +__emitinline void iPOP( const ModSibBase& from ) { iWrite( 0x8f ); Internal::EmitSibMagic( 0, from ); } -__forceinline void PUSH( u32 imm ) { write8( 0x68 ); write32( imm ); } -__forceinline void PUSH( x86Register32 from ) { write8( 0x50 | from.Id ); } - -__emitinline void PUSH( const ModSibBase& from ) +__emitinline void iPUSH( const ModSibBase& from ) { iWrite( 0xff ); Internal::EmitSibMagic( 6, from ); } -// pushes the EFLAGS register onto the stack -__forceinline void PUSHFD() { write8( 0x9C ); } -// pops the EFLAGS register from the stack -__forceinline void POPFD() { write8( 0x9D ); } } diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index 6f2cb31575..502cfcded9 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -42,15 +42,6 @@ // ix86_inlines.inl file when it is known that inlining of ModSib functions are // wanted). // -// -// Important when Using the New Emitter: -// Make sure there is *no* data in use or of importance past the end of the -// current x86Ptr. Ie, don't do fancy x86Ptr rewind tricks of your own. The -// emitter uses optimized writes which will clobber data past the end of the -// instruction it's emitting, so even if you know for sure the instruction you -// are writing is 5 bytes, the emitter will likely emit 9 bytes and the re- -// wind the x86Ptr to the end of the instruction. -// #pragma once diff --git a/pcsx2/x86/ix86/ix86_impl_group1.h b/pcsx2/x86/ix86/ix86_impl_group1.h new file mode 100644 index 0000000000..b948700b9b --- /dev/null +++ b/pcsx2/x86/ix86/ix86_impl_group1.h @@ -0,0 +1,179 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// Note: This header is meant to be included from within the x86Emitter::Internal namespace. + +// Instructions implemented in this header are as follows -->> + +enum G1Type +{ + G1Type_ADD=0, + G1Type_OR, + G1Type_ADC, + G1Type_SBB, + G1Type_AND, + G1Type_SUB, + G1Type_XOR, + G1Type_CMP +}; + +// ------------------------------------------------------------------- +template< typename ImmType, G1Type InstType > +class Group1Impl +{ +public: + static const uint OperandSize = sizeof(ImmType); + + Group1Impl() {} // because GCC doesn't like static classes + +protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + static __emitinline void Emit( const iRegister& to, const iRegister& from ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + ModRM( 3, from.Id, to.Id ); + } + + static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + EmitSibMagic( from.Id, sibdest ); + } + + static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + EmitSibMagic( to.Id, sibsrc ); + } + + static __emitinline void Emit( void* dest, const iRegister& from ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + iWriteDisp( from.Id, dest ); + } + + static __emitinline void Emit( const iRegister& to, const void* src ) + { + prefix16(); + iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + iWriteDisp( to.Id, src ); + } + + static __emitinline void Emit( const iRegister& to, ImmType imm ) + { + prefix16(); + if( !Is8BitOperand() && is_s8( imm ) ) + { + iWrite( 0x83 ); + ModRM( 3, InstType, to.Id ); + iWrite( imm ); + } + else + { + if( to.IsAccumulator() ) + iWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); + else + { + iWrite( Is8BitOperand() ? 0x80 : 0x81 ); + ModRM( 3, InstType, to.Id ); + } + iWrite( imm ); + } + } + + static __emitinline void Emit( const ModSibStrict& sibdest, ImmType imm ) + { + if( Is8BitOperand() ) + { + iWrite( 0x80 ); + EmitSibMagic( InstType, sibdest ); + iWrite( imm ); + } + else + { + prefix16(); + iWrite( is_s8( imm ) ? 0x83 : 0x81 ); + EmitSibMagic( InstType, sibdest ); + if( is_s8( imm ) ) + iWrite( imm ); + else + iWrite( imm ); + } + } +}; + + +// ------------------------------------------------------------------- +// +template< G1Type InstType > +class Group1ImplAll +{ +protected: + typedef Group1Impl m_32; + typedef Group1Impl m_16; + typedef Group1Impl m_8; + + // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) + +public: + // ---------- 32 Bit Interface ----------- + __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( to, from ); } + __forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( to, src ); } + __forceinline void operator()( void* dest, const iRegister32& from ) const { m_32::Emit( dest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister32& from ) const { m_32::Emit( sibdest, from ); } + __noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const{ m_32::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); } + + void operator()( const iRegister32& to, u32 imm, bool needs_flags=false ) const + { + //if( needs_flags || (imm != 0) || !_optimize_imm0() ) + m_32::Emit( to, imm ); + } + + // ---------- 16 Bit Interface ----------- + __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( to, from ); } + __forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( to, src ); } + __forceinline void operator()( void* dest, const iRegister16& from ) const { m_16::Emit( dest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister16& from ) const { m_16::Emit( sibdest, from ); } + __noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const{ m_16::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); } + + void operator()( const iRegister16& to, u16 imm, bool needs_flags=false ) const { m_16::Emit( to, imm ); } + + // ---------- 8 Bit Interface ----------- + __forceinline void operator()( const iRegister8& to, const iRegister8& from ) const { m_8::Emit( to, from ); } + __forceinline void operator()( const iRegister8& to, const void* src ) const { m_8::Emit( to, src ); } + __forceinline void operator()( void* dest, const iRegister8& from ) const { m_8::Emit( dest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister8& from ) const { m_8::Emit( sibdest, from ); } + __noinline void operator()( const iRegister8& to, const ModSibBase& sibsrc ) const{ m_8::Emit( to, sibsrc ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } + + void operator()( const iRegister8& to, u8 imm, bool needs_flags=false ) const { m_8::Emit( to, imm ); } + + Group1ImplAll() {} // Why does GCC need these? +}; + diff --git a/pcsx2/x86/ix86/ix86_impl_group2.h b/pcsx2/x86/ix86/ix86_impl_group2.h new file mode 100644 index 0000000000..23cb8c78a3 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_impl_group2.h @@ -0,0 +1,151 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// Note: This header is meant to be included from within the x86Emitter::Internal namespace. + +// Instructions implemented in this header are as follows -->> + +enum G2Type +{ + G2Type_ROL=0, + G2Type_ROR, + G2Type_RCL, + G2Type_RCR, + G2Type_SHL, + G2Type_SHR, + G2Type_Unused, + G2Type_SAR +}; + +// ------------------------------------------------------------------- +// Group 2 (shift) instructions have no Sib/ModRM forms. +// Optimization Note: For Imm forms, we ignore the instruction if the shift count is zero. +// This is a safe optimization since any zero-value shift does not affect any flags. +// +template< typename ImmType, G2Type InstType > +class Group2Impl +{ +public: + static const uint OperandSize = sizeof(ImmType); + + Group2Impl() {} // For the love of GCC. + +protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + static __emitinline void Emit( const iRegister& to, const iRegister8& from ) + { + jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) + + prefix16(); + iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + ModRM( 3, InstType, to.Id ); + } + + static __emitinline void Emit( const iRegister& to, u8 imm ) + { + if( imm == 0 ) return; + + prefix16(); + if( imm == 1 ) + { + // special encoding of 1's + iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + ModRM( 3, InstType, to.Id ); + } + else + { + iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + ModRM( 3, InstType, to.Id ); + iWrite( imm ); + } + } + + static __emitinline void Emit( const ModSibStrict& sibdest, const iRegister8& from ) + { + jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) + + prefix16(); + iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + EmitSibMagic( from.Id, sibdest ); + } + + static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) + { + if( imm == 0 ) return; + + prefix16(); + if( imm == 1 ) + { + // special encoding of 1's + iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + EmitSibMagic( InstType, sibdest ); + } + else + { + iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + EmitSibMagic( InstType, sibdest ); + iWrite( imm ); + } + } +}; + +// ------------------------------------------------------------------- +// +template< G2Type InstType > +class Group2ImplAll +{ +protected: + typedef Group2Impl m_32; + typedef Group2Impl m_16; + typedef Group2Impl m_8; + + // Inlining Notes: + // I've set up the inlining to be as practical and intelligent as possible, which means + // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to + // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- + // creation of the compiler. + // + + // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) + +public: + // ---------- 32 Bit Interface ----------- + __forceinline void operator()( const iRegister32& to, const iRegister8& from ) const { m_32::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, const iRegister8& from ) const { m_32::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); } + void operator()( const iRegister32& to, u8 imm ) const { m_32::Emit( to, imm ); } + + // ---------- 16 Bit Interface ----------- + __forceinline void operator()( const iRegister16& to, const iRegister8& from ) const { m_16::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, const iRegister8& from ) const { m_16::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); } + void operator()( const iRegister16& to, u8 imm ) const { m_16::Emit( to, imm ); } + + // ---------- 8 Bit Interface ----------- + __forceinline void operator()( const iRegister8& to, const iRegister8& from ) const { m_8::Emit( to, from ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, const iRegister8& from ) const { m_8::Emit( sibdest, from ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } + void operator()( const iRegister8& to, u8 imm ) const { m_8::Emit( to, imm ); } + + Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. +}; diff --git a/pcsx2/x86/ix86/ix86_impl_movs.h b/pcsx2/x86/ix86/ix86_impl_movs.h new file mode 100644 index 0000000000..b9b67460b9 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_impl_movs.h @@ -0,0 +1,157 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// Header: ix86_impl_movs.h -- covers cmov and movsx/movzx. +// Note: This header is meant to be included from within the x86Emitter::Internal namespace. + +////////////////////////////////////////////////////////////////////////////////////////// +// CMOV !! [in all of it's disappointing lack-of glory] +// +template< int OperandSize > +class CMovImpl +{ +protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + + static __forceinline void emit_base( JccComparisonType cc ) + { + jASSUME( cc >= 0 && cc <= 0x0f ); + prefix16(); + write8( 0x0f ); + write8( 0x40 | cc ); + } + +public: + CMovImpl() {} + + static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const iRegister& from ) + { + emit_base( cc ); + ModRM( 3, to.Id, from.Id ); + } + + static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const void* src ) + { + emit_base( cc ); + iWriteDisp( to.Id, src ); + } + + static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const ModSibBase& sibsrc ) + { + emit_base( cc ); + EmitSibMagic( to.Id, sibsrc ); + } + +}; + +// ------------------------------------------------------------------------ +class CMovImplGeneric +{ +protected: + typedef CMovImpl<4> m_32; + typedef CMovImpl<2> m_16; + +public: + __forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); } + __forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); } + __noinline void operator()( JccComparisonType ccType, const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); } + + __forceinline void operator()( JccComparisonType ccType, const iRegister16& to, const iRegister16& from ) const { m_16::Emit( ccType, to, from ); } + __forceinline void operator()( JccComparisonType ccType, const iRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); } + __noinline void operator()( JccComparisonType ccType, const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); } + + CMovImplGeneric() {} // don't ask. +}; + +// ------------------------------------------------------------------------ +template< JccComparisonType ccType > +class CMovImplAll +{ +protected: + typedef CMovImpl<4> m_32; + typedef CMovImpl<2> m_16; + +public: + __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); } + __forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); } + __noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); } + + __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( ccType, to, from ); } + __forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); } + __noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); } + + CMovImplAll() {} // don't ask. +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Mov with sign/zero extension implementations (movsx / movzx) +// +template< int DestOperandSize, int SrcOperandSize > +class MovExtendImpl +{ +protected: + static bool Is8BitOperand() { return SrcOperandSize == 1; } + static void prefix16() { if( DestOperandSize == 2 ) iWrite( 0x66 ); } + static __forceinline void emit_base( bool SignExtend ) + { + prefix16(); + iWrite( 0x0f ); + iWrite( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) ); + } + +public: + MovExtendImpl() {} // For the love of GCC. + + static __emitinline void Emit( const iRegister& to, const iRegister& from, bool SignExtend ) + { + emit_base( SignExtend ); + ModRM( 3, from.Id, to.Id ); + } + + static __emitinline void Emit( const iRegister& to, const ModSibStrict& sibsrc, bool SignExtend ) + { + emit_base( SignExtend ); + EmitSibMagic( to.Id, sibsrc ); + } +}; + +// ------------------------------------------------------------------------ +template< bool SignExtend > +class MovExtendImplAll +{ +protected: + typedef MovExtendImpl<4, 2> m_16to32; + typedef MovExtendImpl<4, 1> m_8to32; + typedef MovExtendImpl<2, 1> m_8to16; + +public: + __forceinline void operator()( const iRegister32& to, const iRegister16& from ) const { m_16to32::Emit( to, from, SignExtend ); } + __noinline void operator()( const iRegister32& to, const ModSibStrict<2>& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); } + + __forceinline void operator()( const iRegister32& to, const iRegister8& from ) const { m_8to32::Emit( to, from, SignExtend ); } + __noinline void operator()( const iRegister32& to, const ModSibStrict<1>& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); } + + __forceinline void operator()( const iRegister16& to, const iRegister8& from ) const { m_8to16::Emit( to, from, SignExtend ); } + __noinline void operator()( const iRegister16& to, const ModSibStrict<1>& sibsrc ) const { m_8to16::Emit( to, sibsrc, SignExtend ); } + + MovExtendImplAll() {} // don't ask. +}; + diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl index a101a3e386..4eed2390f5 100644 --- a/pcsx2/x86/ix86/ix86_inlines.inl +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -53,29 +53,29 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // x86Register Method Implementations // - __forceinline x86AddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const + __forceinline iAddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const { - return x86AddressInfo( *this, right ); + return iAddressInfo( *this, right ); } - __forceinline x86AddressInfo x86IndexReg::operator+( const x86AddressInfo& right ) const + __forceinline iAddressInfo x86IndexReg::operator+( const iAddressInfo& right ) const { return right + *this; } - __forceinline x86AddressInfo x86IndexReg::operator+( s32 right ) const + __forceinline iAddressInfo x86IndexReg::operator+( s32 right ) const { - return x86AddressInfo( *this, right ); + return iAddressInfo( *this, right ); } - __forceinline x86AddressInfo x86IndexReg::operator*( u32 right ) const + __forceinline iAddressInfo x86IndexReg::operator*( u32 right ) const { - return x86AddressInfo( Empty, *this, right ); + return iAddressInfo( Empty, *this, right ); } - __forceinline x86AddressInfo x86IndexReg::operator<<( u32 shift ) const + __forceinline iAddressInfo x86IndexReg::operator<<( u32 shift ) const { - return x86AddressInfo( Empty, *this, 1<& sibdest, u32 imm ); // ---------- 16 Bit Interface ----------- - extern void iMOV( const x86Register16& to, const x86Register16& from ); - extern void iMOV( const ModSibBase& sibdest, const x86Register16& from ); - extern void iMOV( const x86Register16& to, const ModSibBase& sibsrc ); - extern void iMOV( const x86Register16& to, const void* src ); - extern void iMOV( const void* dest, const x86Register16& from ); + extern void iMOV( const iRegister16& to, const iRegister16& from ); + extern void iMOV( const ModSibBase& sibdest, const iRegister16& from ); + extern void iMOV( const iRegister16& to, const ModSibBase& sibsrc ); + extern void iMOV( const iRegister16& to, const void* src ); + extern void iMOV( void* dest, const iRegister16& from ); // preserve_flags - set to true to disable optimizations which could alter the state of // the flags (namely replacing mov reg,0 with xor). - extern void iMOV( const x86Register16& to, u16 imm, bool preserve_flags=false ); + extern void iMOV( const iRegister16& to, u16 imm, bool preserve_flags=false ); extern void iMOV( const ModSibStrict<2>& sibdest, u16 imm ); // ---------- 8 Bit Interface ----------- - extern void iMOV( const x86Register8& to, const x86Register8& from ); - extern void iMOV( const ModSibBase& sibdest, const x86Register8& from ); - extern void iMOV( const x86Register8& to, const ModSibBase& sibsrc ); - extern void iMOV( const x86Register8& to, const void* src ); - extern void iMOV( const void* dest, const x86Register8& from ); + extern void iMOV( const iRegister8& to, const iRegister8& from ); + extern void iMOV( const ModSibBase& sibdest, const iRegister8& from ); + extern void iMOV( const iRegister8& to, const ModSibBase& sibsrc ); + extern void iMOV( const iRegister8& to, const void* src ); + extern void iMOV( void* dest, const iRegister8& from ); - extern void iMOV( const x86Register8& to, u8 imm, bool preserve_flags=false ); + extern void iMOV( const iRegister8& to, u8 imm, bool preserve_flags=false ); extern void iMOV( const ModSibStrict<1>& sibdest, u8 imm ); ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index f89443112c..d0fd220455 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -35,9 +35,9 @@ using namespace x86Emitter; template< int OperandSize > -static __forceinline x86Register _reghlp( x86IntRegType src ) +static __forceinline iRegister _reghlp( x86IntRegType src ) { - return x86Register( src ); + return iRegister( src ); } static __forceinline ModSibBase _mrmhlp( x86IntRegType src ) @@ -116,31 +116,34 @@ DEFINE_OPCODE_SHIFT_LEGACY( SAR ) DEFINE_OPCODE_LEGACY( MOV ) // ------------------------------------------------------------------------ -#define DEFINE_LEGACY_MOVEXTEND( form, srcbits ) \ - emitterT void MOV##form##X32R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form##X( x86Register32( to ), x86Register##srcbits( from ) ); } \ - emitterT void MOV##form##X32Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \ - emitterT void MOV##form##X32M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form##X( x86Register32( to ), ptr##srcbits[from] ); } +#define DEFINE_LEGACY_MOVEXTEND( form, destbits, srcbits ) \ + emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form##( iRegister##destbits( to ), iRegister##srcbits( from ) ); } \ + emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form##( iRegister##destbits( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \ + emitterT void MOV##form##destbits##M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form##( iRegister##destbits( to ), ptr##srcbits[from] ); } -DEFINE_LEGACY_MOVEXTEND( S, 16 ) -DEFINE_LEGACY_MOVEXTEND( Z, 16 ) -DEFINE_LEGACY_MOVEXTEND( S, 8 ) -DEFINE_LEGACY_MOVEXTEND( Z, 8 ) +DEFINE_LEGACY_MOVEXTEND( SX, 32, 16 ) +DEFINE_LEGACY_MOVEXTEND( ZX, 32, 16 ) +DEFINE_LEGACY_MOVEXTEND( SX, 32, 8 ) +DEFINE_LEGACY_MOVEXTEND( ZX, 32, 8 ) + +DEFINE_LEGACY_MOVEXTEND( SX, 16, 8 ) +DEFINE_LEGACY_MOVEXTEND( ZX, 16, 8 ) // mov r32 to [r32< - class x86Register + class iRegister { public: - static const x86Register Empty; // defined as an empty/unused value (-1) + static const iRegister Empty; // defined as an empty/unused value (-1) int Id; - x86Register( const x86Register& src ) : Id( src.Id ) {} - x86Register(): Id( -1 ) {} - explicit x86Register( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + iRegister( const iRegister& src ) : Id( src.Id ) {} + iRegister(): Id( -1 ) {} + explicit iRegister( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id < 0; } // Returns true if the register is a valid accumulator: Eax, Ax, Al. bool IsAccumulator() const { return Id == 0; } - bool operator==( const x86Register& src ) const + bool operator==( const iRegister& src ) const { return (Id == src.Id); } - bool operator!=( const x86Register& src ) const + bool operator!=( const iRegister& src ) const { return (Id != src.Id); } - x86Register& operator=( const x86Register& src ) + iRegister& operator=( const iRegister& src ) { Id = src.Id; return *this; @@ -229,62 +229,62 @@ namespace x86Emitter // all about the the templated code in haphazard fashion. Yay.. >_< // - typedef x86Register<4> x86Register32; - typedef x86Register<2> x86Register16; - typedef x86Register<1> x86Register8; + typedef iRegister<4> iRegister32; + typedef iRegister<2> iRegister16; + typedef iRegister<1> iRegister8; - extern const x86Register32 eax; - extern const x86Register32 ebx; - extern const x86Register32 ecx; - extern const x86Register32 edx; - extern const x86Register32 esi; - extern const x86Register32 edi; - extern const x86Register32 ebp; - extern const x86Register32 esp; + extern const iRegister32 eax; + extern const iRegister32 ebx; + extern const iRegister32 ecx; + extern const iRegister32 edx; + extern const iRegister32 esi; + extern const iRegister32 edi; + extern const iRegister32 ebp; + extern const iRegister32 esp; - extern const x86Register16 ax; - extern const x86Register16 bx; - extern const x86Register16 cx; - extern const x86Register16 dx; - extern const x86Register16 si; - extern const x86Register16 di; - extern const x86Register16 bp; - extern const x86Register16 sp; + extern const iRegister16 ax; + extern const iRegister16 bx; + extern const iRegister16 cx; + extern const iRegister16 dx; + extern const iRegister16 si; + extern const iRegister16 di; + extern const iRegister16 bp; + extern const iRegister16 sp; - extern const x86Register8 al; - extern const x86Register8 cl; - extern const x86Register8 dl; - extern const x86Register8 bl; - extern const x86Register8 ah; - extern const x86Register8 ch; - extern const x86Register8 dh; - extern const x86Register8 bh; + extern const iRegister8 al; + extern const iRegister8 cl; + extern const iRegister8 dl; + extern const iRegister8 bl; + extern const iRegister8 ah; + extern const iRegister8 ch; + extern const iRegister8 dh; + extern const iRegister8 bh; ////////////////////////////////////////////////////////////////////////////////////////// // Use 32 bit registers as out index register (for ModSib memory address calculations) - // Only x86IndexReg provides operators for constructing x86AddressInfo types. - class x86IndexReg : public x86Register32 + // Only x86IndexReg provides operators for constructing iAddressInfo types. + class x86IndexReg : public iRegister32 { public: static const x86IndexReg Empty; // defined as an empty/unused value (-1) public: - x86IndexReg(): x86Register32() {} - x86IndexReg( const x86IndexReg& src ) : x86Register32( src.Id ) {} - x86IndexReg( const x86Register32& src ) : x86Register32( src ) {} - explicit x86IndexReg( int regId ) : x86Register32( regId ) {} + x86IndexReg(): iRegister32() {} + x86IndexReg( const x86IndexReg& src ) : iRegister32( src.Id ) {} + x86IndexReg( const iRegister32& src ) : iRegister32( src ) {} + explicit x86IndexReg( int regId ) : iRegister32( regId ) {} // Returns true if the register is the stack pointer: ESP. bool IsStackPointer() const { return Id == 4; } - x86AddressInfo operator+( const x86IndexReg& right ) const; - x86AddressInfo operator+( const x86AddressInfo& right ) const; - x86AddressInfo operator+( s32 right ) const; + iAddressInfo operator+( const x86IndexReg& right ) const; + iAddressInfo operator+( const iAddressInfo& right ) const; + iAddressInfo operator+( s32 right ) const; - x86AddressInfo operator*( u32 factor ) const; - x86AddressInfo operator<<( u32 shift ) const; + iAddressInfo operator*( u32 factor ) const; + iAddressInfo operator<<( u32 shift ) const; - x86IndexReg& operator=( const x86Register32& src ) + x86IndexReg& operator=( const iRegister32& src ) { Id = src.Id; return *this; @@ -293,7 +293,7 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // - class x86AddressInfo + class iAddressInfo { public: x86IndexReg Base; // base register (no scale) @@ -302,7 +302,7 @@ namespace x86Emitter s32 Displacement; // address displacement public: - __forceinline x86AddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) : + __forceinline iAddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) : Base( base ), Index( index ), Factor( factor ), @@ -310,7 +310,7 @@ namespace x86Emitter { } - __forceinline explicit x86AddressInfo( const x86IndexReg& base, int displacement=0 ) : + __forceinline explicit iAddressInfo( const x86IndexReg& base, int displacement=0 ) : Base( base ), Index(), Factor(0), @@ -318,7 +318,7 @@ namespace x86Emitter { } - __forceinline explicit x86AddressInfo( s32 displacement ) : + __forceinline explicit iAddressInfo( s32 displacement ) : Base(), Index(), Factor(0), @@ -326,24 +326,24 @@ namespace x86Emitter { } - static x86AddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 ); + static iAddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 ); public: bool IsByteSizeDisp() const { return is_s8( Displacement ); } - __forceinline x86AddressInfo& Add( s32 imm ) + __forceinline iAddressInfo& Add( s32 imm ) { Displacement += imm; return *this; } - __forceinline x86AddressInfo& Add( const x86IndexReg& src ); - __forceinline x86AddressInfo& Add( const x86AddressInfo& src ); + __forceinline iAddressInfo& Add( const x86IndexReg& src ); + __forceinline iAddressInfo& Add( const iAddressInfo& src ); - __forceinline x86AddressInfo operator+( const x86IndexReg& right ) const { return x86AddressInfo( *this ).Add( right ); } - __forceinline x86AddressInfo operator+( const x86AddressInfo& right ) const { return x86AddressInfo( *this ).Add( right ); } - __forceinline x86AddressInfo operator+( s32 imm ) const { return x86AddressInfo( *this ).Add( imm ); } - __forceinline x86AddressInfo operator-( s32 imm ) const { return x86AddressInfo( *this ).Add( -imm ); } + __forceinline iAddressInfo operator+( const x86IndexReg& right ) const { return iAddressInfo( *this ).Add( right ); } + __forceinline iAddressInfo operator+( const iAddressInfo& right ) const { return iAddressInfo( *this ).Add( right ); } + __forceinline iAddressInfo operator+( s32 imm ) const { return iAddressInfo( *this ).Add( imm ); } + __forceinline iAddressInfo operator-( s32 imm ) const { return iAddressInfo( *this ).Add( -imm ); } }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -351,12 +351,12 @@ namespace x86Emitter // // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means // that the Base, Index, Scale, and Displacement values are all in the correct arrange- - // ments, and it serves as a type-safe layer between the x86Register's operators (which - // generate x86AddressInfo types) and the emitter's ModSib instruction forms. Without this, - // the x86Register would pass as a ModSib type implicitly, and that would cause ambiguity + // ments, and it serves as a type-safe layer between the iRegister's operators (which + // generate iAddressInfo types) and the emitter's ModSib instruction forms. Without this, + // the iRegister would pass as a ModSib type implicitly, and that would cause ambiguity // on a number of instructions. // - // End users should always use x86AddressInfo instead. + // End users should always use iAddressInfo instead. // class ModSibBase { @@ -367,7 +367,7 @@ namespace x86Emitter s32 Displacement; // offset applied to the Base/Index registers. public: - explicit ModSibBase( const x86AddressInfo& src ); + explicit ModSibBase( const iAddressInfo& src ); explicit ModSibBase( s32 disp ); ModSibBase( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); @@ -394,7 +394,7 @@ namespace x86Emitter class ModSibStrict : public ModSibBase { public: - __forceinline explicit ModSibStrict( const x86AddressInfo& src ) : ModSibBase( src ) {} + __forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibBase( src ) {} __forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {} __forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : ModSibBase( base, index, scale, displacement ) {} @@ -423,7 +423,7 @@ namespace x86Emitter return ModSibBase( src, x86IndexReg::Empty ); } - __forceinline ModSibBase operator[]( const x86AddressInfo& src ) const + __forceinline ModSibBase operator[]( const iAddressInfo& src ) const { return ModSibBase( src ); } @@ -457,7 +457,7 @@ namespace x86Emitter return ModSibStrict( src, x86IndexReg::Empty ); } - __forceinline ModSibStrict operator[]( const x86AddressInfo& src ) const + __forceinline ModSibStrict operator[]( const iAddressInfo& src ) const { return ModSibStrict( src ); } @@ -598,387 +598,14 @@ namespace x86Emitter { extern void ModRM( uint mod, uint reg, uint rm ); extern void SibSB( u32 ss, u32 index, u32 base ); + extern void iWriteDisp( int regfield, s32 displacement ); + extern void iWriteDisp( int regfield, const void* address ); + extern void EmitSibMagic( uint regfield, const ModSibBase& info ); - struct SibMagic - { - static void Emit( uint regfield, const ModSibBase& info ) - { - EmitSibMagic( regfield, info ); - } - }; - - struct SibMagicInline - { - static __forceinline void Emit( uint regfield, const ModSibBase& info ) - { - EmitSibMagic( regfield, info ); - } - }; - - - enum G1Type - { - G1Type_ADD=0, - G1Type_OR, - G1Type_ADC, - G1Type_SBB, - G1Type_AND, - G1Type_SUB, - G1Type_XOR, - G1Type_CMP - }; - - enum G2Type - { - G2Type_ROL=0, - G2Type_ROR, - G2Type_RCL, - G2Type_RCR, - G2Type_SHL, - G2Type_SHR, - G2Type_Unused, - G2Type_SAR - }; - - // ------------------------------------------------------------------- - template< typename ImmType, G1Type InstType, typename SibMagicType > - class Group1Impl - { - public: - static const uint OperandSize = sizeof(ImmType); - - Group1Impl() {} // because GCC doesn't like static classes - - protected: - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } - - public: - static __emitinline void Emit( const x86Register& to, const x86Register& from ) - { - prefix16(); - iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); - ModRM( 3, from.Id, to.Id ); - } - - static __emitinline void Emit( const ModSibBase& sibdest, const x86Register& from ) - { - prefix16(); - iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); - SibMagicType::Emit( from.Id, sibdest ); - } - - static __emitinline void Emit( const x86Register& to, const ModSibBase& sibsrc ) - { - prefix16(); - iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); - SibMagicType::Emit( to.Id, sibsrc ); - } - - static __emitinline void Emit( const x86Register& to, ImmType imm ) - { - prefix16(); - if( !Is8BitOperand() && is_s8( imm ) ) - { - iWrite( 0x83 ); - ModRM( 3, InstType, to.Id ); - iWrite( imm ); - } - else - { - if( to.IsAccumulator() ) - iWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); - else - { - iWrite( Is8BitOperand() ? 0x80 : 0x81 ); - ModRM( 3, InstType, to.Id ); - } - iWrite( imm ); - } - } - - static __emitinline void Emit( const ModSibStrict& sibdest, ImmType imm ) - { - if( Is8BitOperand() ) - { - iWrite( 0x80 ); - SibMagicType::Emit( InstType, sibdest ); - iWrite( imm ); - } - else - { - prefix16(); - iWrite( is_s8( imm ) ? 0x83 : 0x81 ); - SibMagicType::Emit( InstType, sibdest ); - if( is_s8( imm ) ) - iWrite( imm ); - else - iWrite( imm ); - } - } - }; - - // ------------------------------------------------------------------- - // Group 2 (shift) instructions have no Sib/ModRM forms. - // Note: For Imm forms, we ignore the instruction if the shift count is zero. This - // is a safe optimization since any zero-value shift does not affect any flags. - // - template< typename ImmType, G2Type InstType, typename SibMagicType > - class Group2Impl - { - public: - static const uint OperandSize = sizeof(ImmType); - - Group2Impl() {} // For the love of GCC. - - protected: - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } - - public: - static __emitinline void Emit( const x86Register& to, const x86Register8& from ) - { - jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) - - prefix16(); - iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); - ModRM( 3, InstType, to.Id ); - } - - static __emitinline void Emit( const x86Register& to, u8 imm ) - { - if( imm == 0 ) return; - - prefix16(); - if( imm == 1 ) - { - // special encoding of 1's - iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); - ModRM( 3, InstType, to.Id ); - } - else - { - iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); - ModRM( 3, InstType, to.Id ); - iWrite( imm ); - } - } - - static __emitinline void Emit( const ModSibStrict& sibdest, const x86Register8& from ) - { - jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) - - prefix16(); - iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); - SibMagicType::Emit( from.Id, sibdest ); - } - - static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) - { - if( imm == 0 ) return; - - prefix16(); - if( imm == 1 ) - { - // special encoding of 1's - iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); - SibMagicType::Emit( InstType, sibdest ); - } - else - { - iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); - SibMagicType::Emit( InstType, sibdest ); - iWrite( imm ); - } - } - }; - - // ------------------------------------------------------------------- - // - template< G1Type InstType > - class Group1ImplAll - { - protected: - typedef Group1Impl m_32; - typedef Group1Impl m_16; - typedef Group1Impl m_8; - - typedef Group1Impl m_32i; - typedef Group1Impl m_16i; - typedef Group1Impl m_8i; - - // Inlining Notes: - // I've set up the inlining to be as practical and intelligent as possible, which means - // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to - // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- - // creation of the compiler. - // - - // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) - - public: - // ---------- 32 Bit Interface ----------- - __forceinline void operator()( const x86Register32& to, const x86Register32& from ) const { m_32i::Emit( to, from ); } - __forceinline void operator()( const x86Register32& to, const void* src ) const { m_32i::Emit( to, ptr32[src] ); } - __forceinline void operator()( const void* dest, const x86Register32& from ) const { m_32i::Emit( ptr32[dest], from ); } - __noinline void operator()( const ModSibBase& sibdest, const x86Register32& from ) const { m_32::Emit( sibdest, from ); } - __noinline void operator()( const x86Register32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } - __noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); } - - void operator()( const x86Register32& to, u32 imm, bool needs_flags=false ) const - { - //if( needs_flags || (imm != 0) || !_optimize_imm0() ) - m_32i::Emit( to, imm ); - } - - // ---------- 16 Bit Interface ----------- - __forceinline void operator()( const x86Register16& to, const x86Register16& from ) const { m_16i::Emit( to, from ); } - __forceinline void operator()( const x86Register16& to, const void* src ) const { m_16i::Emit( to, ptr16[src] ); } - __forceinline void operator()( const void* dest, const x86Register16& from ) const { m_16i::Emit( ptr16[dest], from ); } - __noinline void operator()( const ModSibBase& sibdest, const x86Register16& from ) const { m_16::Emit( sibdest, from ); } - __noinline void operator()( const x86Register16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } - __noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); } - - void operator()( const x86Register16& to, u16 imm, bool needs_flags=false ) const { m_16i::Emit( to, imm ); } - - // ---------- 8 Bit Interface ----------- - __forceinline void operator()( const x86Register8& to, const x86Register8& from ) const { m_8i::Emit( to, from ); } - __forceinline void operator()( const x86Register8& to, const void* src ) const { m_8i::Emit( to, ptr8[src] ); } - __forceinline void operator()( const void* dest, const x86Register8& from ) const { m_8i::Emit( ptr8[dest], from ); } - __noinline void operator()( const ModSibBase& sibdest, const x86Register8& from ) const { m_8::Emit( sibdest, from ); } - __noinline void operator()( const x86Register8& to, const ModSibBase& sibsrc ) const { m_8::Emit( to, sibsrc ); } - __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } - - void operator()( const x86Register8& to, u8 imm, bool needs_flags=false ) const { m_8i::Emit( to, imm ); } - - Group1ImplAll() {} // Why does GCC need these? - }; - - - // ------------------------------------------------------------------- - // - template< G2Type InstType > - class Group2ImplAll - { - protected: - typedef Group2Impl m_32; - typedef Group2Impl m_16; - typedef Group2Impl m_8; - - typedef Group2Impl m_32i; - typedef Group2Impl m_16i; - typedef Group2Impl m_8i; - - // Inlining Notes: - // I've set up the inlining to be as practical and intelligent as possible, which means - // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to - // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- - // creation of the compiler. - // - - // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) - - public: - // ---------- 32 Bit Interface ----------- - __forceinline void operator()( const x86Register32& to, const x86Register8& from ) const{ m_32i::Emit( to, from ); } - __noinline void operator()( const ModSibStrict<4>& sibdest, const x86Register8& from ) const{ m_32::Emit( sibdest, from ); } - __noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); } - void operator()( const x86Register32& to, u8 imm ) const { m_32i::Emit( to, imm ); } - - // ---------- 16 Bit Interface ----------- - __forceinline void operator()( const x86Register16& to, const x86Register8& from ) const{ m_16i::Emit( to, from ); } - __noinline void operator()( const ModSibStrict<2>& sibdest, const x86Register8& from ) const{ m_16::Emit( sibdest, from ); } - __noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); } - void operator()( const x86Register16& to, u8 imm ) const { m_16i::Emit( to, imm ); } - - // ---------- 8 Bit Interface ----------- - __forceinline void operator()( const x86Register8& to, const x86Register8& from ) const{ m_8i::Emit( to, from ); } - __noinline void operator()( const ModSibStrict<1>& sibdest, const x86Register8& from ) const{ m_8::Emit( sibdest, from ); } - __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } - void operator()( const x86Register8& to, u8 imm ) const { m_8i::Emit( to, imm ); } - - Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. - }; - - // Define the externals for Group1/2 instructions here (inside the Internal namespace). - // and then import then into the x86Emitter namespace later. Done because it saves a - // lot of Internal:: namespace resolution mess, and is better than the alternative of - // importing Internal into x86Emitter, which done at the header file level would defeat - // the purpose!) - - extern const Group1ImplAll iADD; - extern const Group1ImplAll iOR; - extern const Group1ImplAll iADC; - extern const Group1ImplAll iSBB; - extern const Group1ImplAll iAND; - extern const Group1ImplAll iSUB; - extern const Group1ImplAll iXOR; - extern const Group1ImplAll iCMP; - - extern const Group2ImplAll iROL; - extern const Group2ImplAll iROR; - extern const Group2ImplAll iRCL; - extern const Group2ImplAll iRCR; - extern const Group2ImplAll iSHL; - extern const Group2ImplAll iSHR; - extern const Group2ImplAll iSAR; - - ////////////////////////////////////////////////////////////////////////////////////////// - // Mov with sign/zero extension implementations: - // - template< int DestOperandSize, int SrcOperandSize > - class MovExtendImpl - { - protected: - static bool Is8BitOperand() { return SrcOperandSize == 1; } - static void prefix16() { if( DestOperandSize == 2 ) iWrite( 0x66 ); } - static __forceinline void emit_base( bool SignExtend ) - { - prefix16(); - iWrite( 0x0f ); - iWrite( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) ); - } - - public: - MovExtendImpl() {} // For the love of GCC. - - static __emitinline void Emit( const x86Register& to, const x86Register& from, bool SignExtend ) - { - emit_base( SignExtend ); - ModRM( 3, from.Id, to.Id ); - } - - static __emitinline void Emit( const x86Register& to, const ModSibStrict& sibsrc, bool SignExtend ) - { - emit_base( SignExtend ); - EmitSibMagic( to.Id, sibsrc ); - } - }; - - // ------------------------------------------------------------------------ - template< bool SignExtend > - class MovExtendImplAll - { - protected: - typedef MovExtendImpl<4, 2> m_16to32; - typedef MovExtendImpl<4, 1> m_8to32; - - public: - __forceinline void operator()( const x86Register32& to, const x86Register16& from ) const { m_16to32::Emit( to, from, SignExtend ); } - __noinline void operator()( const x86Register32& to, const ModSibStrict<2>& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); } - - __forceinline void operator()( const x86Register32& to, const x86Register8& from ) const { m_8to32::Emit( to, from, SignExtend ); } - __noinline void operator()( const x86Register32& to, const ModSibStrict<1>& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); } - - MovExtendImplAll() {} // don't ask. - }; - - // ------------------------------------------------------------------------ - - extern const MovExtendImplAll iMOVSX; - extern const MovExtendImplAll iMOVZX; - + #include "ix86_impl_group1.h" + #include "ix86_impl_group2.h" + #include "ix86_impl_movs.h" // if the immediate is zero, we can replace the instruction, or ignore it // entirely, depending on the instruction being issued. That's what we do here. @@ -1021,6 +648,63 @@ namespace x86Emitter }*/ } + + // ------------------------------------------------------------------------ + + // ----- Group 1 Instruction Class ----- + + extern const Internal::Group1ImplAll iADD; + extern const Internal::Group1ImplAll iOR; + extern const Internal::Group1ImplAll iADC; + extern const Internal::Group1ImplAll iSBB; + extern const Internal::Group1ImplAll iAND; + extern const Internal::Group1ImplAll iSUB; + extern const Internal::Group1ImplAll iXOR; + extern const Internal::Group1ImplAll iCMP; + + // ----- Group 2 Instruction Class ----- + // Optimization Note: For Imm forms, we ignore the instruction if the shift count is + // zero. This is a safe optimization since any zero-value shift does not affect any + // flags. + + extern const Internal::Group2ImplAll iROL; + extern const Internal::Group2ImplAll iROR; + extern const Internal::Group2ImplAll iRCL; + extern const Internal::Group2ImplAll iRCR; + extern const Internal::Group2ImplAll iSHL; + extern const Internal::Group2ImplAll iSHR; + extern const Internal::Group2ImplAll iSAR; + + extern const Internal::MovExtendImplAll iMOVSX; + extern const Internal::MovExtendImplAll iMOVZX; + + extern const Internal::CMovImplGeneric iCMOV; + + extern const Internal::CMovImplAll iCMOVA; + extern const Internal::CMovImplAll iCMOVAE; + extern const Internal::CMovImplAll iCMOVB; + extern const Internal::CMovImplAll iCMOVBE; + + extern const Internal::CMovImplAll iCMOVG; + extern const Internal::CMovImplAll iCMOVGE; + extern const Internal::CMovImplAll iCMOVL; + extern const Internal::CMovImplAll iCMOVLE; + + extern const Internal::CMovImplAll iCMOVZ; + extern const Internal::CMovImplAll iCMOVE; + extern const Internal::CMovImplAll iCMOVNZ; + extern const Internal::CMovImplAll iCMOVNE; + + extern const Internal::CMovImplAll iCMOVO; + extern const Internal::CMovImplAll iCMOVNO; + extern const Internal::CMovImplAll iCMOVC; + extern const Internal::CMovImplAll iCMOVNC; + + extern const Internal::CMovImplAll iCMOVS; + extern const Internal::CMovImplAll iCMOVNS; + extern const Internal::CMovImplAll iCMOVPE; + extern const Internal::CMovImplAll iCMOVPO; + } #include "ix86_inlines.inl" From 46585299704c91fd57230ec28f330b46d317dd21 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 16 Apr 2009 01:34:09 +0000 Subject: [PATCH 071/143] Emitter: * Added MUL/DIV/IMUL/IDIV to the emitter, renamed as UMUL/SMUL respectively (to remove ambiguity of the instruction behaviors). * Fixed a bug in the shift instruction emitter that would have caused it to emit the wrong instruction type (like a SHR instead of a SHL, for example). * Added type strictness to the shift instructions that take the CL register as a parameter. Passing anything other than CL will generate compile time errors now. * Fixed a syntax error in one of the legacy defines. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@986 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/ix86/ix86.cpp | 155 ++++++++++++++++++- pcsx2/x86/ix86/ix86_impl_group2.h | 22 ++- pcsx2/x86/ix86/ix86_instructions.h | 36 +++++ pcsx2/x86/ix86/ix86_legacy.cpp | 172 +++++++-------------- pcsx2/x86/ix86/ix86_types.h | 119 +++++++++----- 6 files changed, 340 insertions(+), 168 deletions(-) diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index f238a8c12e..d81664fa89 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2925,6 +2925,10 @@ RelativePath="..\..\x86\ix86\ix86_fpu.cpp" > + + diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index c1aed679ea..ec41c81e50 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -74,7 +74,7 @@ const x86IndexerTypeExplicit<1> ptr8; // ------------------------------------------------------------------------ template< int OperandSize > const iRegister iRegister::Empty; -const x86IndexReg x86IndexReg::Empty; +const x86IndexReg x86IndexReg::Empty; const iRegister32 eax( 0 ), ebx( 3 ), @@ -89,10 +89,12 @@ const iRegister16 bp( 5 ), sp( 4 ); const iRegister8 - al( 0 ), cl( 1 ), + al( 0 ), dl( 2 ), bl( 3 ), ah( 4 ), ch( 5 ), dh( 6 ), bh( 7 ); + +const iRegisterCL cl; namespace Internal { @@ -250,8 +252,11 @@ const Group2ImplAll iSHL; const Group2ImplAll iSHR; const Group2ImplAll iSAR; -const MovExtendImplAll iMOVSX; const MovExtendImplAll iMOVZX; +const MovExtendImplAll iMOVSX; + +const Internal::DwordShiftImplAll iSHLD; +const Internal::DwordShiftImplAll iSHRD; const CMovImplGeneric iCMOV; @@ -641,6 +646,150 @@ void iMOV( const iRegister8& to, u8 imm, bool preserve_flags ) MOV8::Emit( to, imm ); } +////////////////////////////////////////////////////////////////////////////////////////// +// DIV/MUL/IDIV/IMUL instructions (Implemented!) + +// F6 is r8, F7 is r32. +// MUL is 4, DIV is 6. + +enum MulDivType +{ + MDT_Mul = 4, + MDT_iMul = 5, + MDT_Div = 6, + MDT_iDiv = 7 +}; + +// ------------------------------------------------------------------------ +// EAX form emitter for Mul/Div/iMUL/iDIV +// +template< int OperandSize > +static __forceinline void EmitMulDiv_OneRegForm( MulDivType InstType, const iRegister& from ) +{ + if( OperandSize == 2 ) iWrite( 0x66 ); + iWrite( (OperandSize == 1) ? 0xf6 : 0xf7 ); + ModRM( ModRm_Direct, InstType, from.Id ); +} + +static __forceinline void EmitMulDiv_OneRegForm( MulDivType InstType, const ModSibSized& sibsrc ) +{ + if( sibsrc.OperandSize == 2 ) iWrite( 0x66 ); + iWrite( (sibsrc.OperandSize == 1) ? 0xf6 : 0xf7 ); + EmitSibMagic( InstType, sibsrc ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// All ioMul forms are valid for 16 and 32 bit register operands only! + +template< typename ImmType > +class iMulImpl +{ +public: + static const uint OperandSize = sizeof(ImmType); + +protected: + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const iRegister& from ) + { + prefix16(); + write16( 0xaf0f ); + ModRM( ModRm_Direct, to.Id, from.Id ); + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const void* src ) + { + prefix16(); + write16( 0xaf0f ); + iWriteDisp( to.Id, src ); + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const ModSibBase& src ) + { + prefix16(); + write16( 0xaf0f ); + EmitSibMagic( to.Id, src ); + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const iRegister& from, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + ModRM( ModRm_Direct, to.Id, from.Id ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const void* src, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + iWriteDisp( to.Id, src ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const ModSibBase& src, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + EmitSibMagic( to.Id, src ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } +}; + +namespace Internal +{ + typedef iMulImpl iMUL32; + typedef iMulImpl iMUL16; +} + +__forceinline void iUMUL( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } +__forceinline void iUMUL( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } +__forceinline void iUMUL( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } +__noinline void iUMUL( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } + +__forceinline void iUDIV( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_Div, from ); } +__forceinline void iUDIV( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_Div, from ); } +__forceinline void iUDIV( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_Div, from ); } +__noinline void iUDIV( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_Div, from ); } + +__forceinline void iSDIV( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } +__forceinline void iSDIV( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } +__forceinline void iSDIV( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } +__noinline void iSDIV( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } + +__forceinline void iSMUL( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } +__forceinline void iSMUL( const iRegister32& to, const iRegister32& from ) { iMUL32::Emit( to, from ); } +__forceinline void iSMUL( const iRegister32& to, const void* src ) { iMUL32::Emit( to, src ); } +__forceinline void iSMUL( const iRegister32& to, const iRegister32& from, s32 imm ) { iMUL32::Emit( to, from, imm ); } +__noinline void iSMUL( const iRegister32& to, const ModSibBase& src ) { iMUL32::Emit( to, src ); } +__noinline void iSMUL( const iRegister32& to, const ModSibBase& from, s32 imm ) { iMUL32::Emit( to, from, imm ); } + +__forceinline void iSMUL( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } +__forceinline void iSMUL( const iRegister16& to, const iRegister16& from ) { iMUL16::Emit( to, from ); } +__forceinline void iSMUL( const iRegister16& to, const void* src ) { iMUL16::Emit( to, src ); } +__forceinline void iSMUL( const iRegister16& to, const iRegister16& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } +__noinline void iSMUL( const iRegister16& to, const ModSibBase& src ) { iMUL16::Emit( to, src ); } +__noinline void iSMUL( const iRegister16& to, const ModSibBase& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } + +__forceinline void iSMUL( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } +__noinline void iSMUL( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } + ////////////////////////////////////////////////////////////////////////////////////////// // Push / Pop Emitters diff --git a/pcsx2/x86/ix86/ix86_impl_group2.h b/pcsx2/x86/ix86/ix86_impl_group2.h index 23cb8c78a3..dd551713d1 100644 --- a/pcsx2/x86/ix86/ix86_impl_group2.h +++ b/pcsx2/x86/ix86/ix86_impl_group2.h @@ -52,10 +52,8 @@ protected: static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } public: - static __emitinline void Emit( const iRegister& to, const iRegister8& from ) + static __emitinline void Emit( const iRegister& to ) { - jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) - prefix16(); iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); ModRM( 3, InstType, to.Id ); @@ -80,13 +78,11 @@ public: } } - static __emitinline void Emit( const ModSibStrict& sibdest, const iRegister8& from ) + static __emitinline void Emit( const ModSibStrict& sibdest ) { - jASSUME( from == cl ); // cl is the only valid shift register. (turn this into a compile time check?) - prefix16(); iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); - EmitSibMagic( from.Id, sibdest ); + EmitSibMagic( InstType, sibdest ); } static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) @@ -130,20 +126,20 @@ protected: public: // ---------- 32 Bit Interface ----------- - __forceinline void operator()( const iRegister32& to, const iRegister8& from ) const { m_32::Emit( to, from ); } - __noinline void operator()( const ModSibStrict<4>& sibdest, const iRegister8& from ) const { m_32::Emit( sibdest, from ); } + __forceinline void operator()( const iRegister32& to, __unused const iRegisterCL& from ) const{ m_32::Emit( to ); } + __noinline void operator()( const ModSibStrict<4>& sibdest, __unused const iRegisterCL& from ) const{ m_32::Emit( sibdest ); } __noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); } void operator()( const iRegister32& to, u8 imm ) const { m_32::Emit( to, imm ); } // ---------- 16 Bit Interface ----------- - __forceinline void operator()( const iRegister16& to, const iRegister8& from ) const { m_16::Emit( to, from ); } - __noinline void operator()( const ModSibStrict<2>& sibdest, const iRegister8& from ) const { m_16::Emit( sibdest, from ); } + __forceinline void operator()( const iRegister16& to, __unused const iRegisterCL& from ) const{ m_16::Emit( to ); } + __noinline void operator()( const ModSibStrict<2>& sibdest, __unused const iRegisterCL& from ) const{ m_16::Emit( sibdest ); } __noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); } void operator()( const iRegister16& to, u8 imm ) const { m_16::Emit( to, imm ); } // ---------- 8 Bit Interface ----------- - __forceinline void operator()( const iRegister8& to, const iRegister8& from ) const { m_8::Emit( to, from ); } - __noinline void operator()( const ModSibStrict<1>& sibdest, const iRegister8& from ) const { m_8::Emit( sibdest, from ); } + __forceinline void operator()( const iRegister8& to, __unused const iRegisterCL& from ) const{ m_8::Emit( to ); } + __noinline void operator()( const ModSibStrict<1>& sibdest, __unused const iRegisterCL& from ) const{ m_8::Emit( sibdest ); } __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } void operator()( const iRegister8& to, u8 imm ) const { m_8::Emit( to, imm ); } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 3b8b885a11..533c2dbd57 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -82,6 +82,42 @@ namespace x86Emitter // NOP 1-byte __forceinline void iNOP() { write8(0x90); } + + ////////////////////////////////////////////////////////////////////////////////////////// + // MUL / DIV instructions + + extern void iUMUL( const iRegister32& from ); + extern void iUMUL( const iRegister16& from ); + extern void iUMUL( const iRegister8& from ); + extern void iUMUL( const ModSibSized& from ); + + extern void iUDIV( const iRegister32& from ); + extern void iUDIV( const iRegister16& from ); + extern void iUDIV( const iRegister8& from ); + extern void iUDIV( const ModSibSized& from ); + + extern void iSDIV( const iRegister32& from ); + extern void iSDIV( const iRegister16& from ); + extern void iSDIV( const iRegister8& from ); + extern void iSDIV( const ModSibSized& from ); + + extern void iSMUL( const iRegister32& from ); + extern void iSMUL( const iRegister32& to, const iRegister32& from ); + extern void iSMUL( const iRegister32& to, const void* src ); + extern void iSMUL( const iRegister32& to, const iRegister32& from, s32 imm ); + extern void iSMUL( const iRegister32& to, const ModSibBase& src ); + extern void iSMUL( const iRegister32& to, const ModSibBase& src, s32 imm ); + + extern void iSMUL( const iRegister16& from ); + extern void iSMUL( const iRegister16& to, const iRegister16& from ); + extern void iSMUL( const iRegister16& to, const void* src ); + extern void iSMUL( const iRegister16& to, const iRegister16& from, s16 imm ); + extern void iSMUL( const iRegister16& to, const ModSibBase& src ); + extern void iSMUL( const iRegister16& to, const ModSibBase& src, s16 imm ); + + extern void iSMUL( const iRegister8& from ); + extern void iSMUL( const ModSibSized& from ); + ////////////////////////////////////////////////////////////////////////////////////////// // MOV instructions! diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index d0fd220455..3f12a7f818 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -117,9 +117,9 @@ DEFINE_OPCODE_LEGACY( MOV ) // ------------------------------------------------------------------------ #define DEFINE_LEGACY_MOVEXTEND( form, destbits, srcbits ) \ - emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form##( iRegister##destbits( to ), iRegister##srcbits( from ) ); } \ - emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form##( iRegister##destbits( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \ - emitterT void MOV##form##destbits##M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form##( iRegister##destbits( to ), ptr##srcbits[from] ); } + emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form( iRegister##destbits( to ), iRegister##srcbits( from ) ); } \ + emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form( iRegister##destbits( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \ + emitterT void MOV##form##destbits##M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form( iRegister##destbits( to ), ptr##srcbits[from] ); } DEFINE_LEGACY_MOVEXTEND( SX, 32, 16 ) DEFINE_LEGACY_MOVEXTEND( ZX, 32, 16 ) @@ -164,7 +164,53 @@ emitterT void CMOVE32RtoR( x86IntRegType to, x86IntRegType from ) iCMOVE( iRegister32(to), iRegister32(from) ); } +// shld imm8 to r32 +emitterT void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) +{ + iSHLD( iRegister32(to), iRegister32(from), shift ); +} +// shrd imm8 to r32 +emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) +{ + iSHRD( iRegister32(to), iRegister32(from), shift ); +} + +emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) +{ + iLEA( iRegister32( to ), ptr[x86IndexReg(from)+offset] ); +} + +emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) +{ + iLEA( iRegister32( to ), ptr[x86IndexReg(from0)+x86IndexReg(from1)] ); +} + +// Don't inline recursive functions +emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) +{ + iLEA( iRegister32( to ), ptr[x86IndexReg(from)*(1< iRegister16; typedef iRegister<1> iRegister8; - extern const iRegister32 eax; - extern const iRegister32 ebx; - extern const iRegister32 ecx; - extern const iRegister32 edx; - extern const iRegister32 esi; - extern const iRegister32 edi; - extern const iRegister32 ebp; - extern const iRegister32 esp; + class iRegisterCL : public iRegister8 + { + public: + iRegisterCL(): iRegister8( 1 ) {} + }; - extern const iRegister16 ax; - extern const iRegister16 bx; - extern const iRegister16 cx; - extern const iRegister16 dx; - extern const iRegister16 si; - extern const iRegister16 di; - extern const iRegister16 bp; - extern const iRegister16 sp; + extern const iRegister32 + eax, ebx, ecx, edx, + esi, edi, ebp, esp; - extern const iRegister8 al; - extern const iRegister8 cl; - extern const iRegister8 dl; - extern const iRegister8 bl; - extern const iRegister8 ah; - extern const iRegister8 ch; - extern const iRegister8 dh; - extern const iRegister8 bh; + extern const iRegister16 + ax, bx, cx, dx, + si, di, bp, sp; + + extern const iRegister8 + al, dl, bl, + ah, ch, dh, bh; + + extern const iRegisterCL cl; // I'm special! ////////////////////////////////////////////////////////////////////////////////////////// // Use 32 bit registers as out index register (for ModSib memory address calculations) @@ -346,6 +340,13 @@ namespace x86Emitter __forceinline iAddressInfo operator-( s32 imm ) const { return iAddressInfo( *this ).Add( -imm ); } }; + enum OperandSizeType + { + OpSize_8 = 1, + OpSize_16 = 2, + OpSize_32 = 4, + }; + ////////////////////////////////////////////////////////////////////////////////////////// // ModSib - Internal low-level representation of the ModRM/SIB information. // @@ -387,26 +388,64 @@ namespace x86Emitter }; ////////////////////////////////////////////////////////////////////////////////////////// - // Strictly-typed version of ModSibBase, which is used to apply operand size information - // to ImmToMem operations. // - template< int OperandSize > - class ModSibStrict : public ModSibBase + class ModSibSized : public ModSibBase { public: - __forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibBase( src ) {} - __forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {} - __forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : - ModSibBase( base, index, scale, displacement ) {} + int OperandSize; - __forceinline ModSibStrict& Add( s32 imm ) + ModSibSized( int opsize, const iAddressInfo& src ) : + ModSibBase( src ), + OperandSize( opsize ) + { + jASSUME( OperandSize == 1 || OperandSize == 2 || OperandSize == 4 ); + } + + ModSibSized( int opsize, s32 disp ) : + ModSibBase( disp ), + OperandSize( opsize ) + { + jASSUME( OperandSize == 1 || OperandSize == 2 || OperandSize == 4 ); + } + + ModSibSized( int opsize, x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : + ModSibBase( base, index, scale, displacement ), + OperandSize( opsize ) + { + jASSUME( OperandSize == 1 || OperandSize == 2 || OperandSize == 4 ); + } + + __forceinline ModSibSized& Add( s32 imm ) { Displacement += imm; return *this; } - __forceinline ModSibStrict operator+( const s32 imm ) const { return ModSibStrict( *this ).Add( imm ); } - __forceinline ModSibStrict operator-( const s32 imm ) const { return ModSibStrict( *this ).Add( -imm ); } + __forceinline ModSibSized operator+( const s32 imm ) const { return ModSibSized( *this ).Add( imm ); } + __forceinline ModSibSized operator-( const s32 imm ) const { return ModSibSized( *this ).Add( -imm ); } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Strictly-typed version of ModSibBase, which is used to apply operand size information + // to ImmToMem operations. + // + template< int OpSize > + class ModSibStrict : public ModSibSized + { + public: + __forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibSized( OpSize, src ) {} + __forceinline explicit ModSibStrict( s32 disp ) : ModSibSized( OpSize, disp ) {} + __forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : + ModSibSized( OpSize, base, index, scale, displacement ) {} + + __forceinline ModSibStrict& Add( s32 imm ) + { + Displacement += imm; + return *this; + } + + __forceinline ModSibStrict operator+( const s32 imm ) const { return ModSibStrict( *this ).Add( imm ); } + __forceinline ModSibStrict operator-( const s32 imm ) const { return ModSibStrict( *this ).Add( -imm ); } }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -605,7 +644,8 @@ namespace x86Emitter #include "ix86_impl_group1.h" #include "ix86_impl_group2.h" - #include "ix86_impl_movs.h" + #include "ix86_impl_movs.h" // cmov and movsx/zx + #include "ix86_impl_dwshift.h" // dowubleword shifts! // if the immediate is zero, we can replace the instruction, or ignore it // entirely, depending on the instruction being issued. That's what we do here. @@ -675,8 +715,11 @@ namespace x86Emitter extern const Internal::Group2ImplAll iSHR; extern const Internal::Group2ImplAll iSAR; + extern const Internal::MovExtendImplAll iMOVZX; extern const Internal::MovExtendImplAll iMOVSX; - extern const Internal::MovExtendImplAll iMOVZX; + + extern const Internal::DwordShiftImplAll iSHLD; + extern const Internal::DwordShiftImplAll iSHRD; extern const Internal::CMovImplGeneric iCMOV; From 986b4fdf939e9463ef08cb9b0c930de4622283ad Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 16 Apr 2009 01:39:38 +0000 Subject: [PATCH 072/143] Forgot to add a new file from the last commit... >_< [ix86_impl_dwshift.h] git-svn-id: http://pcsx2.googlecode.com/svn/trunk@987 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86_impl_dwshift.h | 124 +++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 pcsx2/x86/ix86/ix86_impl_dwshift.h diff --git a/pcsx2/x86/ix86/ix86_impl_dwshift.h b/pcsx2/x86/ix86/ix86_impl_dwshift.h new file mode 100644 index 0000000000..db7fe4ca3f --- /dev/null +++ b/pcsx2/x86/ix86/ix86_impl_dwshift.h @@ -0,0 +1,124 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// Header: ix86_impl_dwshift.h -- covers SHLD and SHRD. +// Note: This header is meant to be included from within the x86Emitter::Internal namespace. + + +// ------------------------------------------------------------------- +// Optimization Note: Imm shifts by 0 are ignore (no code generated). This is a safe optimization +// because shifts by 0 do *not* affect flags status. + +template< typename ImmType, bool isShiftRight > +class DwordShiftImpl +{ +public: + static const uint OperandSize = sizeof(ImmType); + + DwordShiftImpl() {} // because GCC doesn't like static classes + +protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void basesibform( bool isCL ) + { + prefix16(); + write8( 0x0f ); + write8( (isCL ? 0xa5 : 0xa4) | (isShiftRight ? 0x8 : 0) ); + } + +public: + static __emitinline void Emit( const iRegister& to, const iRegister& from ) + { + prefix16(); + write16( 0xa50f | (isShiftRight ? 0x800 : 0) ); + ModRM( 3, from.Id, to.Id ); + } + + static __emitinline void Emit( const iRegister& to, const iRegister& from, u8 imm ) + { + if( imm == 0 ) return; + prefix16(); + write16( 0xa40f | (isShiftRight ? 0x800 : 0) ); + ModRM( 3, from.Id, to.Id ); + write8( imm ); + } + + static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, __unused const iRegisterCL& clreg ) + { + basesibform(); + EmitSibMagic( from.Id, sibdest ); + } + + static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, u8 imm ) + { + basesibform(); + EmitSibMagic( from.Id, sibdest ); + write8( imm ); + } + + // dest data type is inferred from the 'from' register, so we can do void* resolution :) + static __emitinline void Emit( void* dest, const iRegister& from, __unused const iRegisterCL& clreg ) + { + basesibform(); + iWriteDisp( from.Id, dest ); + } + + // dest data type is inferred from the 'from' register, so we can do void* resolution :) + static __emitinline void Emit( void* dest, const iRegister& from, u8 imm ) + { + basesibform(); + iWriteDisp( from.Id, dest ); + write8( imm ); + } +}; + + +// ------------------------------------------------------------------- +// +template< bool isShiftRight > +class DwordShiftImplAll +{ +protected: + typedef DwordShiftImpl m_32; + typedef DwordShiftImpl m_16; + + // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) + +public: + // ---------- 32 Bit Interface ----------- + __forceinline void operator()( const iRegister32& to, const iRegister32& from, __unused const iRegisterCL& clreg ) const { m_32::Emit( to, from ); } + __forceinline void operator()( void* dest, const iRegister32& from, __unused const iRegisterCL& clreg ) const { m_32::Emit( dest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister32& from, __unused const iRegisterCL& clreg ) const { m_32::Emit( sibdest, from ); } + __forceinline void operator()( const iRegister32& to, const iRegister32& from, u8 imm ) const { m_32::Emit( to, from, imm ); } + __forceinline void operator()( void* dest, const iRegister32& from, u8 imm ) const { m_32::Emit( dest, from, imm ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister32& from, u8 imm ) const { m_32::Emit( sibdest, from ); } + + // ---------- 16 Bit Interface ----------- + __forceinline void operator()( const iRegister16& to, const iRegister16& from, __unused const iRegisterCL& clreg ) const { m_16::Emit( to, from ); } + __forceinline void operator()( void* dest, const iRegister16& from, __unused const iRegisterCL& clreg ) const { m_16::Emit( dest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister16& from, __unused const iRegisterCL& clreg ) const { m_16::Emit( sibdest, from ); } + __forceinline void operator()( const iRegister16& to, const iRegister16& from, u8 imm ) const { m_16::Emit( to, from, imm ); } + __forceinline void operator()( void* dest, const iRegister16& from, u8 imm ) const { m_16::Emit( dest, from, imm ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister16& from, u8 imm ) const { m_16::Emit( sibdest, from ); } + + DwordShiftImplAll() {} // Why does GCC need these? +}; + From ac2f5713fc2d241cd1f144f03e7ab08c9f141257 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 16 Apr 2009 14:45:13 +0000 Subject: [PATCH 073/143] Emitter: fixed a bug in MOVSX/ZX's reg->reg form [resolves Issue 159 - missing geometry in DQ8], and moved some files around. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@988 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 40 ++++++++----- .../dwshift.h} | 6 +- .../group1.h} | 17 ++---- .../group2.h} | 7 +-- pcsx2/x86/ix86/implement/incdec.h | 22 +++++++ .../{ix86_impl_movs.h => implement/movs.h} | 5 +- pcsx2/x86/ix86/ix86.cpp | 31 ++++++---- pcsx2/x86/ix86/ix86_legacy.cpp | 49 +++++++-------- pcsx2/x86/ix86/ix86_types.h | 59 +++++-------------- 9 files changed, 120 insertions(+), 116 deletions(-) rename pcsx2/x86/ix86/{ix86_impl_dwshift.h => implement/dwshift.h} (97%) rename pcsx2/x86/ix86/{ix86_impl_group1.h => implement/group1.h} (92%) rename pcsx2/x86/ix86/{ix86_impl_group2.h => implement/group2.h} (97%) create mode 100644 pcsx2/x86/ix86/implement/incdec.h rename pcsx2/x86/ix86/{ix86_impl_movs.h => implement/movs.h} (98%) diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index d81664fa89..7f961b7061 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2925,22 +2925,6 @@ RelativePath="..\..\x86\ix86\ix86_fpu.cpp" > - - - - - - - - @@ -2993,6 +2977,30 @@ RelativePath="..\..\x86\ix86\ix86_types.h" > + + + + + + + + + + + + & to, const iRegister& from, u8 imm ) @@ -57,7 +57,7 @@ public: if( imm == 0 ) return; prefix16(); write16( 0xa40f | (isShiftRight ? 0x800 : 0) ); - ModRM( 3, from.Id, to.Id ); + ModRM_Direct( from.Id, to.Id ); write8( imm ); } diff --git a/pcsx2/x86/ix86/ix86_impl_group1.h b/pcsx2/x86/ix86/implement/group1.h similarity index 92% rename from pcsx2/x86/ix86/ix86_impl_group1.h rename to pcsx2/x86/ix86/implement/group1.h index b948700b9b..c9b313cb85 100644 --- a/pcsx2/x86/ix86/ix86_impl_group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -19,7 +19,6 @@ #pragma once // Note: This header is meant to be included from within the x86Emitter::Internal namespace. - // Instructions implemented in this header are as follows -->> enum G1Type @@ -52,7 +51,7 @@ public: { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); - ModRM( 3, from.Id, to.Id ); + ModRM_Direct( from.Id, to.Id ); } static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from ) @@ -89,7 +88,7 @@ public: if( !Is8BitOperand() && is_s8( imm ) ) { iWrite( 0x83 ); - ModRM( 3, InstType, to.Id ); + ModRM_Direct( InstType, to.Id ); iWrite( imm ); } else @@ -99,7 +98,7 @@ public: else { iWrite( Is8BitOperand() ? 0x80 : 0x81 ); - ModRM( 3, InstType, to.Id ); + ModRM_Direct( InstType, to.Id ); } iWrite( imm ); } @@ -148,11 +147,7 @@ public: __noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const{ m_32::Emit( to, sibsrc ); } __noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); } - void operator()( const iRegister32& to, u32 imm, bool needs_flags=false ) const - { - //if( needs_flags || (imm != 0) || !_optimize_imm0() ) - m_32::Emit( to, imm ); - } + void operator()( const iRegister32& to, u32 imm ) const { m_32::Emit( to, imm ); } // ---------- 16 Bit Interface ----------- __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( to, from ); } @@ -162,7 +157,7 @@ public: __noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const{ m_16::Emit( to, sibsrc ); } __noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); } - void operator()( const iRegister16& to, u16 imm, bool needs_flags=false ) const { m_16::Emit( to, imm ); } + void operator()( const iRegister16& to, u16 imm ) const { m_16::Emit( to, imm ); } // ---------- 8 Bit Interface ----------- __forceinline void operator()( const iRegister8& to, const iRegister8& from ) const { m_8::Emit( to, from ); } @@ -172,7 +167,7 @@ public: __noinline void operator()( const iRegister8& to, const ModSibBase& sibsrc ) const{ m_8::Emit( to, sibsrc ); } __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } - void operator()( const iRegister8& to, u8 imm, bool needs_flags=false ) const { m_8::Emit( to, imm ); } + void operator()( const iRegister8& to, u8 imm ) const { m_8::Emit( to, imm ); } Group1ImplAll() {} // Why does GCC need these? }; diff --git a/pcsx2/x86/ix86/ix86_impl_group2.h b/pcsx2/x86/ix86/implement/group2.h similarity index 97% rename from pcsx2/x86/ix86/ix86_impl_group2.h rename to pcsx2/x86/ix86/implement/group2.h index dd551713d1..481e3b37d8 100644 --- a/pcsx2/x86/ix86/ix86_impl_group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -19,7 +19,6 @@ #pragma once // Note: This header is meant to be included from within the x86Emitter::Internal namespace. - // Instructions implemented in this header are as follows -->> enum G2Type @@ -56,7 +55,7 @@ public: { prefix16(); iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); - ModRM( 3, InstType, to.Id ); + ModRM_Direct( InstType, to.Id ); } static __emitinline void Emit( const iRegister& to, u8 imm ) @@ -68,12 +67,12 @@ public: { // special encoding of 1's iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); - ModRM( 3, InstType, to.Id ); + ModRM_Direct( InstType, to.Id ); } else { iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); - ModRM( 3, InstType, to.Id ); + ModRM_Direct( InstType, to.Id ); iWrite( imm ); } } diff --git a/pcsx2/x86/ix86/implement/incdec.h b/pcsx2/x86/ix86/implement/incdec.h new file mode 100644 index 0000000000..3a3f9690eb --- /dev/null +++ b/pcsx2/x86/ix86/implement/incdec.h @@ -0,0 +1,22 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// Implementations found here: Increment and Decrement Instructions! +// Note: This header is meant to be included from within the x86Emitter::Internal namespace. diff --git a/pcsx2/x86/ix86/ix86_impl_movs.h b/pcsx2/x86/ix86/implement/movs.h similarity index 98% rename from pcsx2/x86/ix86/ix86_impl_movs.h rename to pcsx2/x86/ix86/implement/movs.h index b9b67460b9..2ba18be506 100644 --- a/pcsx2/x86/ix86/ix86_impl_movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -44,8 +44,9 @@ public: static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const iRegister& from ) { + if( to == from ) return; emit_base( cc ); - ModRM( 3, to.Id, from.Id ); + ModRM( ModRm_Direct, to.Id, from.Id ); } static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const void* src ) @@ -123,7 +124,7 @@ public: static __emitinline void Emit( const iRegister& to, const iRegister& from, bool SignExtend ) { emit_base( SignExtend ); - ModRM( 3, from.Id, to.Id ); + ModRM_Direct( to.Id, from.Id ); } static __emitinline void Emit( const iRegister& to, const ModSibStrict& sibsrc, bool SignExtend ) diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index ec41c81e50..7a1d4ec89a 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -128,6 +128,11 @@ namespace Internal //x86Ptr++; } + __forceinline void ModRM_Direct( uint reg, uint rm ) + { + ModRM( Mod_Direct, reg, rm ); + } + __forceinline void SibSB( u32 ss, u32 index, u32 base ) { iWrite( (ss << 6) | (index << 3) | base ); @@ -679,7 +684,7 @@ static __forceinline void EmitMulDiv_OneRegForm( MulDivType InstType, const ModS } ////////////////////////////////////////////////////////////////////////////////////////// -// All ioMul forms are valid for 16 and 32 bit register operands only! +// All iMul forms are valid for 16 and 32 bit register operands only! template< typename ImmType > class iMulImpl @@ -752,12 +757,6 @@ public: } }; -namespace Internal -{ - typedef iMulImpl iMUL32; - typedef iMulImpl iMUL16; -} - __forceinline void iUMUL( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } __forceinline void iUMUL( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } __forceinline void iUMUL( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } @@ -773,22 +772,32 @@ __forceinline void iSDIV( const iRegister16& from ) { EmitMulDiv_OneRegForm( MD __forceinline void iSDIV( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } __noinline void iSDIV( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } -__forceinline void iSMUL( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } +__forceinline void iSMUL( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } +__forceinline void iSMUL( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } +__forceinline void iSMUL( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } +__noinline void iSMUL( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } + +// ------------------------------------------------------------------------ +// iMUL's special forms (unique to iMUL alone) + +namespace Internal +{ + typedef iMulImpl iMUL32; + typedef iMulImpl iMUL16; +} + __forceinline void iSMUL( const iRegister32& to, const iRegister32& from ) { iMUL32::Emit( to, from ); } __forceinline void iSMUL( const iRegister32& to, const void* src ) { iMUL32::Emit( to, src ); } __forceinline void iSMUL( const iRegister32& to, const iRegister32& from, s32 imm ) { iMUL32::Emit( to, from, imm ); } __noinline void iSMUL( const iRegister32& to, const ModSibBase& src ) { iMUL32::Emit( to, src ); } __noinline void iSMUL( const iRegister32& to, const ModSibBase& from, s32 imm ) { iMUL32::Emit( to, from, imm ); } -__forceinline void iSMUL( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } __forceinline void iSMUL( const iRegister16& to, const iRegister16& from ) { iMUL16::Emit( to, from ); } __forceinline void iSMUL( const iRegister16& to, const void* src ) { iMUL16::Emit( to, src ); } __forceinline void iSMUL( const iRegister16& to, const iRegister16& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } __noinline void iSMUL( const iRegister16& to, const ModSibBase& src ) { iMUL16::Emit( to, src ); } __noinline void iSMUL( const iRegister16& to, const ModSibBase& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } -__forceinline void iSMUL( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } -__noinline void iSMUL( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 3f12a7f818..2fc12afd89 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -176,6 +176,31 @@ emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) iSHRD( iRegister32(to), iRegister32(from), shift ); } +/* mul eax by r32 to edx:eax */ +emitterT void MUL32R( x86IntRegType from ) { iUMUL( iRegister32(from) ); } +/* imul eax by r32 to edx:eax */ +emitterT void IMUL32R( x86IntRegType from ) { iSMUL( iRegister32(from) ); } +/* mul eax by m32 to edx:eax */ +emitterT void MUL32M( u32 from ) { iUMUL( ptr32[from] ); } +/* imul eax by m32 to edx:eax */ +emitterT void IMUL32M( u32 from ) { iSMUL( ptr32[from] ); } + +/* imul r32 by r32 to r32 */ +emitterT void IMUL32RtoR( x86IntRegType to, x86IntRegType from ) +{ + iSMUL( iRegister32(to), iRegister32(from) ); +} + +/* div eax by r32 to edx:eax */ +emitterT void DIV32R( x86IntRegType from ) { iUDIV( iRegister32(from) ); } +/* idiv eax by r32 to edx:eax */ +emitterT void IDIV32R( x86IntRegType from ) { iSDIV( iRegister32(from) ); } +/* div eax by m32 to edx:eax */ +emitterT void DIV32M( u32 from ) { iUDIV( ptr32[from] ); } +/* idiv eax by m32 to edx:eax */ +emitterT void IDIV32M( u32 from ) { iSDIV( ptr32[from] ); } + + emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) { iLEA( iRegister32( to ), ptr[x86IndexReg(from)+offset] ); @@ -413,30 +438,6 @@ emitterT void DEC16M( u32 to ) write32( MEMADDR(to, 4) ); } -/* mul eax by r32 to edx:eax */ -emitterT void MUL32R( x86IntRegType from ) { iUMUL( iRegister32(from) ); } -/* imul eax by r32 to edx:eax */ -emitterT void IMUL32R( x86IntRegType from ) { iSMUL( iRegister32(from) ); } -/* mul eax by m32 to edx:eax */ -emitterT void MUL32M( u32 from ) { iUMUL( ptr32[from] ); } -/* imul eax by m32 to edx:eax */ -emitterT void IMUL32M( u32 from ) { iSMUL( ptr32[from] ); } - -/* imul r32 by r32 to r32 */ -emitterT void IMUL32RtoR( x86IntRegType to, x86IntRegType from ) -{ - iSMUL( iRegister32(to), iRegister32(from) ); -} - -/* div eax by r32 to edx:eax */ -emitterT void DIV32R( x86IntRegType from ) { iUDIV( iRegister32(from) ); } -/* idiv eax by r32 to edx:eax */ -emitterT void IDIV32R( x86IntRegType from ) { iSDIV( iRegister32(from) ); } -/* div eax by m32 to edx:eax */ -emitterT void DIV32M( u32 from ) { iUDIV( ptr32[from] ); } -/* idiv eax by m32 to edx:eax */ -emitterT void IDIV32M( u32 from ) { iSDIV( ptr32[from] ); } - //////////////////////////////////// // logical instructions / //////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index e2f3c16a1e..af473c5c8a 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -147,6 +147,15 @@ namespace x86Emitter # define __noinline #endif + // ModRM 'mod' field enumeration. Provided mostly for reference: + enum ModRm_ModField + { + Mod_NoDisp = 0, // effective address operation with no displacement, in the form of [reg] (or uses special Disp32-only encoding in the case of [ebp] form) + Mod_Disp8, // effective address operation with 8 bit displacement, in the form of [reg+disp8] + Mod_Disp32, // effective address operation with 32 bit displacement, in the form of [reg+disp32], + Mod_Direct, // direct reg/reg operation + }; + static const int ModRm_Direct = 3; // when used as the first parameter, specifies direct register operation (no mem) static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) @@ -636,57 +645,17 @@ namespace x86Emitter namespace Internal { extern void ModRM( uint mod, uint reg, uint rm ); + extern void ModRM_Direct( uint reg, uint rm ); extern void SibSB( u32 ss, u32 index, u32 base ); extern void iWriteDisp( int regfield, s32 displacement ); extern void iWriteDisp( int regfield, const void* address ); extern void EmitSibMagic( uint regfield, const ModSibBase& info ); - #include "ix86_impl_group1.h" - #include "ix86_impl_group2.h" - #include "ix86_impl_movs.h" // cmov and movsx/zx - #include "ix86_impl_dwshift.h" // dowubleword shifts! - - // if the immediate is zero, we can replace the instruction, or ignore it - // entirely, depending on the instruction being issued. That's what we do here. - // (returns FALSE if no optimization is performed) - // [TODO] : Work-in-progress! - //template< G1Type InstType, typename RegType > - //static __forceinline void _optimize_imm0( RegType to ); - - /*template< G1Type InstType, typename RegType > - static __forceinline void _optimize_imm0( const RegType& to ) - { - switch( InstType ) - { - // ADD, SUB, and OR can be ignored if the imm is zero.. - case G1Type_ADD: - case G1Type_SUB: - case G1Type_OR: - return true; - - // ADC and SBB can never be ignored (could have carry bits) - // XOR behavior is distinct as well [or is it the same as NEG or NOT?] - case G1Type_ADC: - case G1Type_SBB: - case G1Type_XOR: - return false; - - // replace AND with XOR (or SUB works too.. whatever!) - case G1Type_AND: - iXOR( to, to ); - return true; - - // replace CMP with OR reg,reg: - case G1Type_CMP: - iOR( to, to ); - return true; - - jNO_DEFAULT - } - return false; - }*/ - + #include "implement/group1.h" + #include "implement/group2.h" + #include "implement/movs.h" // cmov and movsx/zx + #include "implement/dwshift.h" // dowubleword shifts! } // ------------------------------------------------------------------------ From dfd433993f7c200efb8684c930167e3ceb7c586d Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 16 Apr 2009 22:33:18 +0000 Subject: [PATCH 074/143] Minor bugfix for unpack mode 2 Fixed split videos in Gradius V Fixed Spyro hanging problem in Issue 112 Put in a hacky fix for FFX videos into IPU to compensate the spyro fix (which is actually correct). Implementing unpack overflow protection (Guitar Hero 3 & Toni Hawks Project 8) Writing XGKick to a temp buffer before sending to the GS (part of the GH3 / THP8 fix) Note! THP8 and GH3 will STILL crash with any VUrecs on and MTGS on, these must all be OFF. Also use GSDX in software mode with the NLoop hack on for now. Slow i know, but it works :P hopefully we can fix the rec side of it soon. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@989 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 34 ++++++++++++++---------- pcsx2/IPU/IPU.cpp | 10 +++++++ pcsx2/VUops.cpp | 33 +++++++++++++++-------- pcsx2/Vif.h | 8 +++--- pcsx2/VifDma.cpp | 68 ++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 117 insertions(+), 36 deletions(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index dd6056effa..59ef287d07 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -28,6 +28,7 @@ using std::min; +#define gifsplit 128 enum gifstate_t { GIF_STATE_EMPTY = 0, @@ -71,7 +72,7 @@ __forceinline void gsInterrupt() { /*if (!(vif1Regs->mskpath3 && (vif1ch->chcr & 0x100)) || (psHu32(GIF_MODE) & 0x1)) CPU_INT( 2, 64 );*/ #endif - if (gspath3done == 0) return; + if(gspath3done == 0 || gif->qwc > 0) return; } gspath3done = 0; @@ -83,6 +84,7 @@ __forceinline void gsInterrupt() { psHu32(GIF_STAT)&= ~0xE00; // OPH=0 | APATH=0 psHu32(GIF_STAT)&= ~0x1F000000; // QFC=0 hwDmacIrq(DMAC_GIF); + GIF_LOG("GIF DMA end"); } @@ -125,7 +127,7 @@ static void WRITERING_DMA(u32 *pMem, u32 qwc) int _GIFchain() { #ifdef GSPATH3FIX - u32 qwc = ((psHu32(GIF_MODE) & 0x4) && (vif1Regs->mskpath3)) ? min(8, (int)gif->qwc) : gif->qwc; + u32 qwc = ((psHu32(GIF_MODE) & 0x4) && (vif1Regs->mskpath3)) ? min(8, (int)gif->qwc) : min( gifsplit, (int)gif->qwc ); #else u32 qwc = gif->qwc; #endif @@ -161,7 +163,7 @@ static __forceinline void dmaGIFend() if ((psHu32(GIF_MODE) & 0x4) && gif->qwc != 0) CPU_INT(2, min( 8, (int)gif->qwc ) /** BIAS*/); else - CPU_INT(2, gif->qwc /** BIAS*/); + CPU_INT(2, min( gifsplit, (int)gif->qwc ) /** BIAS*/); } // These could probably be consolidated into one function, @@ -172,7 +174,7 @@ static __forceinline void GIFdmaEnd() if (psHu32(GIF_MODE) & 0x4) CPU_INT(2, min( 8, (int)gif->qwc ) /** BIAS*/); else - CPU_INT(2, gif->qwc /** BIAS*/); + CPU_INT(2, min( gifsplit, (int)gif->qwc ) /** BIAS*/); } void GIFdma() @@ -187,7 +189,7 @@ void GIFdma() return; } - GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n tadr = %lx, asr0 = %lx, asr1 = %lx", gif->chcr, gif->madr, gif->qwc, gif->tadr, gif->asr0, gif->asr1); + #ifndef GSPATH3FIX if ( !(psHu32(GIF_MODE) & 0x4) ) { @@ -266,14 +268,17 @@ void GIFdma() if (((gif->qwc == 0) && (gif->chcr & 0xc) == 0)) gspath3done = 1; - else + else if(gif->qwc > 0) + { GIFdmaEnd(); return; } - else { + } + if ((gif->chcr & 0xc) == 0x4 && gspath3done == 0) + { // Chain Mode - while ((gspath3done == 0) && (gif->qwc == 0)) { //Loop if the transfers aren't intermittent + //while ((gspath3done == 0) && (gif->qwc == 0)) { //Loop if the transfers aren't intermittent ptag = (u32*)dmaGetAddr(gif->tadr); //Set memory pointer to TADR if (ptag == NULL) { //Is ptag empty? psHu32(DMAC_STAT)|= DMAC_STAT_BEIS; //If yes, set BEIS (BUSERR) in DMAC_STAT register @@ -311,16 +316,16 @@ void GIFdma() GIF_LOG("dmaIrq Set"); gspath3done = 1; } - } + //} } prevcycles = 0; if (!(vif1Regs->mskpath3 || (psHu32(GIF_MODE) & 0x1))) { - if (gspath3done == 0) + if (gspath3done == 0 || gif->qwc > 0) { - if ((psHu32(GIF_MODE) & 0x4) && gif->qwc != 0) + if (gif->qwc != 0) { - CPU_INT(2, min( 8, (int)gif->qwc )/** BIAS*/); + GIFdmaEnd(); } else { @@ -340,7 +345,7 @@ void GIFdma() void dmaGIF() { //We used to addd wait time for the buffer to fill here, fixing some timing problems in path 3 masking //It takes the time of 24 QW for the BUS to become ready - The Punisher, And1 Streetball - + GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n tadr = %lx, asr0 = %lx, asr1 = %lx", gif->chcr, gif->madr, gif->qwc, gif->tadr, gif->asr0, gif->asr1); if ((psHu32(DMAC_CTRL) & 0xC) == 0xC ) { // GIF MFIFO Console::WriteLn("GIF MFIFO"); gifMFIFOInterrupt(); @@ -359,7 +364,8 @@ void dmaGIF() { gif->qwc = (u16)ptag[0]; //QWC set to lower 16bits of the tag gif->chcr = ( gif->chcr & 0xFFFF ) | ( (*ptag) & 0xFFFF0000 ); //Transfer upper part of tag to CHCR bits 31-15 - dmaGIFend(); + //gspath3done = hwDmacSrcChainWithStack(gif, (ptag[0] >> 28) & 0x7); + GIFdmaEnd(); gif->qwc = 0; return; } diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index a1b7626df5..04152bfd10 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -1370,6 +1370,9 @@ int FIFOto_write(u32* pMem, int size) } \ } +#define gif ((DMACh*)&PS2MEM_HW[0xA000]) +extern void gsInterrupt(); + int IPU1dma() { u32 *ptag, *pMem; @@ -1383,6 +1386,13 @@ int IPU1dma() assert(!(g_nDMATransfer & IPU_DMA_TIE1)); + //We need to make sure GIF has flushed before sending IPU data, it seems to REALLY screw FFX videos + while(gif->chcr & 0x100) + { + GIF_LOG("Flushing gif chcr %x tadr %x madr %x qwc %x", gif->chcr, gif->tadr, gif->madr, gif->qwc); + gsInterrupt(); + } + // in kh, qwc == 0 when dma_actv1 is set if ((g_nDMATransfer & IPU_DMA_ACTV1) && ipu1dma->qwc > 0) { diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 299803d2d6..bd8b81b0b7 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -1585,7 +1585,7 @@ void _vuLQ(VURegs * VU) { if (_Ft_ == 0) return; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; @@ -1601,7 +1601,7 @@ void _vuLQD( VURegs * VU ) { if (_Fs_ != 0) VU->VI[_Fs_].US[0]--; if (_Ft_ == 0) return; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16) & (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; if (_Y) VU->VF[_Ft_].UL[1] = ptr[1]; @@ -1614,7 +1614,7 @@ void _vuLQI(VURegs * VU) { u32 addr; u32 *ptr; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) VU->VF[_Ft_].UL[0] = ptr[0]; if (_Y) VU->VF[_Ft_].UL[1] = ptr[1]; @@ -1631,7 +1631,7 @@ void _vuSQ(VURegs * VU) { u32 *ptr; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Ft_].SS[0]) * 16; + addr = ((imm + VU->VI[_Ft_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1644,7 +1644,7 @@ void _vuSQD(VURegs * VU) { u32 *ptr; if(_Ft_ != 0) VU->VI[_Ft_].US[0]--; - addr = VU->VI[_Ft_].US[0] * 16; + addr = (VU->VI[_Ft_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1656,7 +1656,7 @@ void _vuSQI(VURegs * VU) { u32 addr; u32 *ptr; - addr = VU->VI[_Ft_].US[0] * 16; + addr = (VU->VI[_Ft_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u32*)GET_VU_MEM(VU, addr); if (_X) ptr[0] = VU->VF[_Fs_].UL[0]; if (_Y) ptr[1] = VU->VF[_Fs_].UL[1]; @@ -1673,7 +1673,7 @@ void _vuILW(VURegs * VU) { if (_Ft_ == 0) return; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) VU->VI[_Ft_].US[0] = ptr[0]; if (_Y) VU->VI[_Ft_].US[0] = ptr[2]; @@ -1687,7 +1687,7 @@ void _vuISW(VURegs * VU) { u16 *ptr; imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - addr = (imm + VU->VI[_Fs_].SS[0]) * 16; + addr = ((imm + VU->VI[_Fs_].SS[0]) * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) { ptr[0] = VU->VI[_Ft_].US[0]; ptr[1] = 0; } if (_Y) { ptr[2] = VU->VI[_Ft_].US[0]; ptr[3] = 0; } @@ -1700,7 +1700,7 @@ void _vuILWR(VURegs * VU) { u16 *ptr; if (_Ft_ == 0) return; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16)& (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) VU->VI[_Ft_].US[0] = ptr[0]; if (_Y) VU->VI[_Ft_].US[0] = ptr[2]; @@ -1712,7 +1712,7 @@ void _vuISWR(VURegs * VU) { u32 addr; u16 *ptr; - addr = VU->VI[_Fs_].US[0] * 16; + addr = (VU->VI[_Fs_].US[0] * 16) & (VU == &VU1 ? 0x3fff : 0xfff); ptr = (u16*)GET_VU_MEM(VU, addr); if (_X) { ptr[0] = VU->VI[_Ft_].US[0]; ptr[1] = 0; } if (_Y) { ptr[2] = VU->VI[_Ft_].US[0]; ptr[3] = 0; } @@ -2045,9 +2045,20 @@ void _vuXITOP(VURegs * VU) { void _vuXGKICK(VURegs * VU) { + u32* ptr = (u32*)GET_VU_MEM(VU, (VU->VI[_Fs_].US[0]*16) & (VU == &VU1 ? 0x3fff : 0xfff)); + int temp = 0x4000 - ((VU->VI[_Fs_].US[0]*16) & 0x3fff); + u32 tempmem[0x8000]; + // flush all pipelines first (in the right order) _vuFlushAll(VU); - GSGIFTRANSFER1((u32*)VU->Mem, (VU->VI[_Fs_].US[0]*16) & 0x3fff); + + //Gonna be slow but reshuffles the memory so overflows wont occur + memset(tempmem, 0, sizeof(tempmem)); + memcpy(tempmem, ptr, temp); + ptr = (u32*)GET_VU_MEM(VU, 0); + memcpy(&tempmem[temp], ptr, ((VU->VI[_Fs_].US[0]*16) & 0x3fff)); + GSGIFTRANSFER1((u32*)&tempmem[0], 0); + //} else GSGIFTRANSFER1((u32*)VU->Mem, (VU->VI[_Fs_].US[0]*16) & 0x3fff); } void _vuXTOP(VURegs * VU) { diff --git a/pcsx2/Vif.h b/pcsx2/Vif.h index 64335c4dde..2f84369b5a 100644 --- a/pcsx2/Vif.h +++ b/pcsx2/Vif.h @@ -92,16 +92,16 @@ static __forceinline u32 setVifRowRegs(u32 reg, u32 data) switch (reg) { case 0: - vifRegs->r0 = data; + vifRegs->r0 += data; break; case 1: - vifRegs->r1 = data; + vifRegs->r1 += data; break; case 2: - vifRegs->r2 = data; + vifRegs->r2 += data; break; case 3: - vifRegs->r3 = data; + vifRegs->r3 += data; break; jNO_DEFAULT; } diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 6e594881fe..5b57c430f7 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -80,10 +80,10 @@ struct VIFUnpackFuncTable UNPACKFUNCTYPE funcU; UNPACKFUNCTYPE funcS; - int bsize; // currently unused - int dsize; // byte size of one channel - int gsize; // size of data in bytes used for each write cycle - int qsize; // used for unpack parts, num of vectors that + u32 bsize; // currently unused + u32 dsize; // byte size of one channel + u32 gsize; // size of data in bytes used for each write cycle + u32 qsize; // used for unpack parts, num of vectors that // will be decompressed from data for 1 cycle }; @@ -333,7 +333,7 @@ static void ProcessMemSkip(int size, unsigned int unpackType, const unsigned int } -static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static int VIFalign(u32 *data, vifCode *v, unsigned int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; @@ -485,7 +485,7 @@ static int VIFalign(u32 *data, vifCode *v, int size, const unsigned int VIFdmanu } -static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdmanum) +static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned int VIFdmanum) { u32 *dest; u32 unpackType; @@ -493,6 +493,7 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma const VIFUnpackFuncTable *ft; VURegs * VU; u8 *cdata = (u8*)data; + u32 tempsize = 0; #ifdef _DEBUG u32 memsize = VIFdmanum ? 0x4000 : 0x1000; @@ -554,6 +555,18 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma } #endif + tempsize = (vif->tag.addr + (size / (ft->gsize * vifRegs->cycle.wl)) * + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) + ((size / ft->gsize) * 16); + + //Sanity Check (memory overflow) + if(tempsize > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + + // DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); + tempsize = size; + size = 0; + } else tempsize = 0; + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { @@ -658,7 +671,48 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma } } - else if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have + else if(tempsize) + { + int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; + size = 0; + + + while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) + { + //VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, vif->tag.addr); + func(dest, (u32*)cdata, ft->qsize); + cdata += ft->gsize; + tempsize -= ft->gsize; + + vifRegs->num--; + ++vif->cl; + if (vif->cl == vifRegs->cycle.wl) + { + dest += incdest; + v->addr = (v->addr + (incdest * 4)) & (VIFdmanum ? 0x3fff : 0xfff); + if(v->addr <= (u32)(VIFdmanum ? 0x3000 : 0x500)) dest = (u32*)(VU->Mem + v->addr); + vif->cl = 0; + } + else + { + dest += 4; + v->addr = (v->addr + 16) & (VIFdmanum ? 0x3fff : 0xfff); + if(v->addr <= (u32)(VIFdmanum ? 0x3000 : 0x500)) dest = (u32*)(VU->Mem + v->addr); + } + } + + if(vifRegs->mode == 2) + { + //Update the reg rows for SSE + vifRow[0] = vifRegs->r0; + vifRow[1] = vifRegs->r1; + vifRow[2] = vifRegs->r2; + vifRow[3] = vifRegs->r3; + } + if(tempsize > 0) size = tempsize; + + } + if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have { //VIF_LOG("warning, end with size = %d", size); From 1279fe2c2188b69dddabb69444ea91f0879b91bf Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 16 Apr 2009 22:38:55 +0000 Subject: [PATCH 075/143] Emitter: Implemented INC/DEC/NEG/NOT instructions. Plus: many code cleanups using a better form of template parameter inference. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@990 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 12 +- pcsx2/x86/ix86/implement/dwshift.h | 29 +-- pcsx2/x86/ix86/implement/group1.h | 77 ++---- pcsx2/x86/ix86/implement/group2.h | 47 ++-- pcsx2/x86/ix86/implement/group3.h | 66 +++++ pcsx2/x86/ix86/implement/incdec.h | 51 ++++ pcsx2/x86/ix86/implement/movs.h | 197 ++++++++++++-- pcsx2/x86/ix86/ix86.cpp | 285 +++------------------ pcsx2/x86/ix86/ix86_instructions.h | 61 +---- pcsx2/x86/ix86/ix86_legacy.cpp | 174 +++---------- pcsx2/x86/ix86/ix86_types.h | 136 +++++----- 11 files changed, 497 insertions(+), 638 deletions(-) create mode 100644 pcsx2/x86/ix86/implement/group3.h diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 7f961b7061..51852e0cdc 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2977,10 +2977,6 @@ RelativePath="..\..\x86\ix86\ix86_types.h" > - - @@ -2996,10 +2992,18 @@ RelativePath="..\..\x86\ix86\implement\group2.h" > + + + + -class DwordShiftImpl +class DwordShiftImpl : public ImplementationHelper< ImmType > { -public: - static const uint OperandSize = sizeof(ImmType); - - DwordShiftImpl() {} // because GCC doesn't like static classes - protected: - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } static void basesibform( bool isCL ) { prefix16(); @@ -44,15 +37,17 @@ protected: write8( (isCL ? 0xa5 : 0xa4) | (isShiftRight ? 0x8 : 0) ); } -public: - static __emitinline void Emit( const iRegister& to, const iRegister& from ) +public: + DwordShiftImpl() {} // because GCC doesn't like static classes + + static __emitinline void Emit( const iRegister& to, const iRegister& from ) { prefix16(); write16( 0xa50f | (isShiftRight ? 0x800 : 0) ); ModRM_Direct( from.Id, to.Id ); } - static __emitinline void Emit( const iRegister& to, const iRegister& from, u8 imm ) + static __emitinline void Emit( const iRegister& to, const iRegister& from, u8 imm ) { if( imm == 0 ) return; prefix16(); @@ -61,13 +56,13 @@ public: write8( imm ); } - static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, __unused const iRegisterCL& clreg ) + static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, __unused const iRegisterCL& clreg ) { basesibform(); EmitSibMagic( from.Id, sibdest ); } - static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, u8 imm ) + static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, u8 imm ) { basesibform(); EmitSibMagic( from.Id, sibdest ); @@ -75,14 +70,14 @@ public: } // dest data type is inferred from the 'from' register, so we can do void* resolution :) - static __emitinline void Emit( void* dest, const iRegister& from, __unused const iRegisterCL& clreg ) + static __emitinline void Emit( void* dest, const iRegister& from, __unused const iRegisterCL& clreg ) { basesibform(); iWriteDisp( from.Id, dest ); } // dest data type is inferred from the 'from' register, so we can do void* resolution :) - static __emitinline void Emit( void* dest, const iRegister& from, u8 imm ) + static __emitinline void Emit( void* dest, const iRegister& from, u8 imm ) { basesibform(); iWriteDisp( from.Id, dest ); @@ -92,6 +87,8 @@ public: // ------------------------------------------------------------------- +// I use explicit method declarations here instead of templates, in order to provide +// *only* 32 and 16 bit register operand forms (8 bit registers are not valid in SHLD/SHRD). // template< bool isShiftRight > class DwordShiftImplAll @@ -100,8 +97,6 @@ protected: typedef DwordShiftImpl m_32; typedef DwordShiftImpl m_16; - // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) - public: // ---------- 32 Bit Interface ----------- __forceinline void operator()( const iRegister32& to, const iRegister32& from, __unused const iRegisterCL& clreg ) const { m_32::Emit( to, from ); } diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index c9b313cb85..2837960f73 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -34,55 +34,48 @@ enum G1Type }; // ------------------------------------------------------------------- -template< typename ImmType, G1Type InstType > -class Group1Impl +template< G1Type InstType, typename ImmType > +class Group1Impl : public ImplementationHelper< ImmType > { public: - static const uint OperandSize = sizeof(ImmType); - Group1Impl() {} // because GCC doesn't like static classes -protected: - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } - -public: - static __emitinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( const iRegister& to, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); ModRM_Direct( from.Id, to.Id ); } - static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from ) + static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); EmitSibMagic( from.Id, sibdest ); } - static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) + static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) { prefix16(); iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); EmitSibMagic( to.Id, sibsrc ); } - static __emitinline void Emit( void* dest, const iRegister& from ) + static __emitinline void Emit( void* dest, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); iWriteDisp( from.Id, dest ); } - static __emitinline void Emit( const iRegister& to, const void* src ) + static __emitinline void Emit( const iRegister& to, const void* src ) { prefix16(); iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); iWriteDisp( to.Id, src ); } - static __emitinline void Emit( const iRegister& to, ImmType imm ) + static __emitinline void Emit( const iRegister& to, int imm ) { prefix16(); if( !Is8BitOperand() && is_s8( imm ) ) @@ -104,7 +97,7 @@ public: } } - static __emitinline void Emit( const ModSibStrict& sibdest, ImmType imm ) + static __emitinline void Emit( const ModSibStrict& sibdest, int imm ) { if( Is8BitOperand() ) { @@ -131,43 +124,27 @@ public: template< G1Type InstType > class Group1ImplAll { -protected: - typedef Group1Impl m_32; - typedef Group1Impl m_16; - typedef Group1Impl m_8; - - // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) - public: - // ---------- 32 Bit Interface ----------- - __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( to, from ); } - __forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( to, src ); } - __forceinline void operator()( void* dest, const iRegister32& from ) const { m_32::Emit( dest, from ); } - __noinline void operator()( const ModSibBase& sibdest, const iRegister32& from ) const { m_32::Emit( sibdest, from ); } - __noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const{ m_32::Emit( to, sibsrc ); } - __noinline void operator()( const ModSibStrict<4>& sibdest, u32 imm ) const { m_32::Emit( sibdest, imm ); } + template< typename T > + __forceinline void operator()( const iRegister& to, const iRegister& from ) const { Group1Impl::Emit( to, from ); } + template< typename T > + __forceinline void operator()( const iRegister& to, const void* src ) const { Group1Impl::Emit( to, src ); } + template< typename T > + __forceinline void operator()( void* dest, const iRegister& from ) const { Group1Impl::Emit( dest, from ); } + template< typename T > + __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { Group1Impl::Emit( sibdest, from ); } + template< typename T > + __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { Group1Impl::Emit( to, sibsrc ); } - void operator()( const iRegister32& to, u32 imm ) const { m_32::Emit( to, imm ); } + // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler + // perspective. (using uint tends to make the compiler try and fail to match signed immediates with + // one of the other overloads). + + template< typename T > + __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { Group1Impl::Emit( sibdest, imm ); } + template< typename T > + void operator()( const iRegister& to, int imm ) const { Group1Impl::Emit( to, imm ); } - // ---------- 16 Bit Interface ----------- - __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( to, from ); } - __forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( to, src ); } - __forceinline void operator()( void* dest, const iRegister16& from ) const { m_16::Emit( dest, from ); } - __noinline void operator()( const ModSibBase& sibdest, const iRegister16& from ) const { m_16::Emit( sibdest, from ); } - __noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const{ m_16::Emit( to, sibsrc ); } - __noinline void operator()( const ModSibStrict<2>& sibdest, u16 imm ) const { m_16::Emit( sibdest, imm ); } - - void operator()( const iRegister16& to, u16 imm ) const { m_16::Emit( to, imm ); } - - // ---------- 8 Bit Interface ----------- - __forceinline void operator()( const iRegister8& to, const iRegister8& from ) const { m_8::Emit( to, from ); } - __forceinline void operator()( const iRegister8& to, const void* src ) const { m_8::Emit( to, src ); } - __forceinline void operator()( void* dest, const iRegister8& from ) const { m_8::Emit( dest, from ); } - __noinline void operator()( const ModSibBase& sibdest, const iRegister8& from ) const { m_8::Emit( sibdest, from ); } - __noinline void operator()( const iRegister8& to, const ModSibBase& sibsrc ) const{ m_8::Emit( to, sibsrc ); } - __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } - - void operator()( const iRegister8& to, u8 imm ) const { m_8::Emit( to, imm ); } Group1ImplAll() {} // Why does GCC need these? }; diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index 481e3b37d8..74979f8c91 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -38,27 +38,20 @@ enum G2Type // Optimization Note: For Imm forms, we ignore the instruction if the shift count is zero. // This is a safe optimization since any zero-value shift does not affect any flags. // -template< typename ImmType, G2Type InstType > -class Group2Impl +template< G2Type InstType, typename ImmType > +class Group2Impl : public ImplementationHelper< ImmType > { public: - static const uint OperandSize = sizeof(ImmType); - Group2Impl() {} // For the love of GCC. -protected: - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } - -public: - static __emitinline void Emit( const iRegister& to ) + static __emitinline void Emit( const iRegister& to ) { prefix16(); iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); ModRM_Direct( InstType, to.Id ); } - static __emitinline void Emit( const iRegister& to, u8 imm ) + static __emitinline void Emit( const iRegister& to, u8 imm ) { if( imm == 0 ) return; @@ -77,14 +70,14 @@ public: } } - static __emitinline void Emit( const ModSibStrict& sibdest ) + static __emitinline void Emit( const ModSibStrict& sibdest ) { prefix16(); iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); EmitSibMagic( InstType, sibdest ); } - static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) + static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) { if( imm == 0 ) return; @@ -109,11 +102,6 @@ public: template< G2Type InstType > class Group2ImplAll { -protected: - typedef Group2Impl m_32; - typedef Group2Impl m_16; - typedef Group2Impl m_8; - // Inlining Notes: // I've set up the inlining to be as practical and intelligent as possible, which means // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to @@ -125,22 +113,17 @@ protected: public: // ---------- 32 Bit Interface ----------- - __forceinline void operator()( const iRegister32& to, __unused const iRegisterCL& from ) const{ m_32::Emit( to ); } - __noinline void operator()( const ModSibStrict<4>& sibdest, __unused const iRegisterCL& from ) const{ m_32::Emit( sibdest ); } - __noinline void operator()( const ModSibStrict<4>& sibdest, u8 imm ) const { m_32::Emit( sibdest, imm ); } - void operator()( const iRegister32& to, u8 imm ) const { m_32::Emit( to, imm ); } + template< typename T > __forceinline void operator()( const iRegister& to, __unused const iRegisterCL& from ) const + { Group2Impl::Emit( to ); } - // ---------- 16 Bit Interface ----------- - __forceinline void operator()( const iRegister16& to, __unused const iRegisterCL& from ) const{ m_16::Emit( to ); } - __noinline void operator()( const ModSibStrict<2>& sibdest, __unused const iRegisterCL& from ) const{ m_16::Emit( sibdest ); } - __noinline void operator()( const ModSibStrict<2>& sibdest, u8 imm ) const { m_16::Emit( sibdest, imm ); } - void operator()( const iRegister16& to, u8 imm ) const { m_16::Emit( to, imm ); } + template< typename T > __noinline void operator()( const ModSibStrict& sibdest, __unused const iRegisterCL& from ) const + { Group2Impl::Emit( sibdest ); } - // ---------- 8 Bit Interface ----------- - __forceinline void operator()( const iRegister8& to, __unused const iRegisterCL& from ) const{ m_8::Emit( to ); } - __noinline void operator()( const ModSibStrict<1>& sibdest, __unused const iRegisterCL& from ) const{ m_8::Emit( sibdest ); } - __noinline void operator()( const ModSibStrict<1>& sibdest, u8 imm ) const { m_8::Emit( sibdest, imm ); } - void operator()( const iRegister8& to, u8 imm ) const { m_8::Emit( to, imm ); } + template< typename T > __noinline void operator()( const ModSibStrict& sibdest, u8 imm ) const + { Group2Impl::Emit( sibdest, imm ); } + + template< typename T > void operator()( const iRegister& to, u8 imm ) const + { Group2Impl::Emit( to, imm ); } Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. }; diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h new file mode 100644 index 0000000000..9cf60b2256 --- /dev/null +++ b/pcsx2/x86/ix86/implement/group3.h @@ -0,0 +1,66 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// Note: This header is meant to be included from within the x86Emitter::Internal namespace. +// Instructions implemented in this header are as follows -->> + +enum G3Type +{ + G3Type_NOT = 2, + G3Type_NEG = 3, + G3Type_MUL = 4, + G3Type_iMUL = 5, // partial implementation, iMul has additional forms in ix86.cpp + G3Type_DIV = 6, + G3Type_iDIV = 7 +}; + +template< typename ImmType > +class Group3Impl : public ImplementationHelper +{ +public: + Group3Impl() {} // For the love of GCC. + + static __emitinline void Emit( G3Type InstType, const iRegister& from ) + { + prefix16(); + iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + ModRM_Direct( InstType, from.Id ); + } + + static __emitinline void Emit( G3Type InstType, const ModSibStrict& sibsrc ) + { + prefix16(); + iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + EmitSibMagic( InstType, sibsrc ); + } +}; + +// ------------------------------------------------------------------- +// +template< G3Type InstType > +class Group3ImplAll +{ +public: + template< typename T > + __forceinline void operator()( const iRegister& from ) const { Group3Impl::Emit( InstType, from ); } + + template< typename T > + __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( InstType, from ); } +}; \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/incdec.h b/pcsx2/x86/ix86/implement/incdec.h index 3a3f9690eb..0cf5a08c62 100644 --- a/pcsx2/x86/ix86/implement/incdec.h +++ b/pcsx2/x86/ix86/implement/incdec.h @@ -20,3 +20,54 @@ // Implementations found here: Increment and Decrement Instructions! // Note: This header is meant to be included from within the x86Emitter::Internal namespace. + +template< typename ImmType > +class IncDecImpl : public ImplementationHelper +{ +public: + IncDecImpl() {} // For the love of GCC. + + static __emitinline void Emit( bool isDec, const iRegister& to ) + { + // There is no valid 8-bit form of direct register inc/dec, so fall + // back on Mod/RM format instead: + if( Is8BitOperand() ) + { + write8( 0xfe ); + ModRM_Direct( isDec ? 1 : 0, to.Id ); + } + else + { + prefix16(); + write8( (isDec ? 0x48 : 0x40) | to.Id ); + } + } + + static __emitinline void Emit( bool isDec, const ModSibStrict& dest ) + { + write8( Is8BitOperand() ? 0xfe : 0xff ); + EmitSibMagic( isDec ? 1: 0, dest ); + } +}; + +// ------------------------------------------------------------------------ +template< bool isDec > +class IncDecImplAll +{ +protected: + typedef IncDecImpl m_32; + typedef IncDecImpl m_16; + typedef IncDecImpl m_8; + +public: + __forceinline void operator()( const iRegister32& to ) const { m_32::Emit( isDec, to ); } + __noinline void operator()( const ModSibStrict& sibdest ) const { m_32::Emit( isDec, sibdest ); } + + __forceinline void operator()( const iRegister16& to ) const { m_16::Emit( isDec, to ); } + __noinline void operator()( const ModSibStrict& sibdest ) const { m_16::Emit( isDec, sibdest ); } + + __forceinline void operator()( const iRegister8& to ) const { m_8::Emit( isDec, to ); } + __noinline void operator()( const ModSibStrict& sibdest ) const { m_8::Emit( isDec, sibdest ); } + + IncDecImplAll() {} // don't ask. +}; diff --git a/pcsx2/x86/ix86/implement/movs.h b/pcsx2/x86/ix86/implement/movs.h index 2ba18be506..2899659930 100644 --- a/pcsx2/x86/ix86/implement/movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -21,11 +21,164 @@ // Header: ix86_impl_movs.h -- covers cmov and movsx/movzx. // Note: This header is meant to be included from within the x86Emitter::Internal namespace. +////////////////////////////////////////////////////////////////////////////////////////// +// MOV instruction Implementation + +template< typename ImmType > +class MovImpl : ImplementationHelper< ImmType > +{ +public: + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const iRegister& from ) + { + if( to == from ) return; // ignore redundant MOVs. + + prefix16(); + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + ModRM( 3, from.Id, to.Id ); + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const ModSibBase& dest, const iRegister& from ) + { + prefix16(); + + // mov eax has a special from when writing directly to a DISP32 address + // (sans any register index/base registers). + + if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() ) + { + iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( dest.Displacement ); + } + else + { + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + EmitSibMagic( from.Id, dest ); + } + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const ModSibBase& src ) + { + prefix16(); + + // mov eax has a special from when reading directly from a DISP32 address + // (sans any register index/base registers). + + if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() ) + { + iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( src.Displacement ); + } + else + { + iWrite( Is8BitOperand() ? 0x8a : 0x8b ); + EmitSibMagic( to.Id, src ); + } + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( void* dest, const iRegister& from ) + { + prefix16(); + + // mov eax has a special from when writing directly to a DISP32 address + + if( from.IsAccumulator() ) + { + iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( (s32)dest ); + } + else + { + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + iWriteDisp( from.Id, dest ); + } + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, const void* src ) + { + prefix16(); + + // mov eax has a special from when reading directly from a DISP32 address + + if( to.IsAccumulator() ) + { + iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( (s32)src ); + } + else + { + iWrite( Is8BitOperand() ? 0x8a : 0x8b ); + iWriteDisp( to.Id, src ); + } + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( const iRegister& to, ImmType imm ) + { + // Note: MOV does not have (reg16/32,imm8) forms. + + prefix16(); + iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); + iWrite( imm ); + } + + // ------------------------------------------------------------------------ + static __forceinline void Emit( ModSibStrict dest, ImmType imm ) + { + prefix16(); + iWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); + EmitSibMagic( 0, dest ); + iWrite( imm ); + } +}; + +// Inlining Notes: +// I've set up the inlining to be as practical and intelligent as possible, which means +// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to +// virtually no code. In the case of (Reg, Imm) forms, the inlinign is up to the dis- +// cretion of the compiler. +// + +class MovImplAll +{ +public: + template< typename T> + __forceinline void operator()( const iRegister& to, const iRegister& from ) const { MovImpl::Emit( to, from ); } + template< typename T> + __forceinline void operator()( const iRegister& to, const void* src ) const { MovImpl::Emit( to, src ); } + template< typename T> + __forceinline void operator()( void* dest, const iRegister& from ) const { MovImpl::Emit( dest, from ); } + template< typename T> + __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { MovImpl::Emit( sibdest, from ); } + template< typename T> + __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { MovImpl::Emit( to, sibsrc ); } + + template< typename T> + __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { MovImpl::Emit( sibdest, imm ); } + + // preserve_flags - set to true to disable optimizations which could alter the state of + // the flags (namely replacing mov reg,0 with xor). + + template< typename T > + __emitinline void operator()( const iRegister& to, int imm, bool preserve_flags=false ) const + { + if( !preserve_flags && (imm == 0) ) + iXOR( to, to ); + else + MovImpl::Emit( to, imm ); + } +}; + + ////////////////////////////////////////////////////////////////////////////////////////// // CMOV !! [in all of it's disappointing lack-of glory] // -template< int OperandSize > -class CMovImpl +template< typename ImmType > +class CMovImpl : public ImplementationHelper< ImmType > { protected: static bool Is8BitOperand() { return OperandSize == 1; } @@ -42,20 +195,20 @@ protected: public: CMovImpl() {} - static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const iRegister& from ) + static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const iRegister& from ) { if( to == from ) return; emit_base( cc ); - ModRM( ModRm_Direct, to.Id, from.Id ); + ModRM_Direct( to.Id, from.Id ); } - static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const void* src ) + static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const void* src ) { emit_base( cc ); iWriteDisp( to.Id, src ); } - static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const ModSibBase& sibsrc ) + static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const ModSibBase& sibsrc ) { emit_base( cc ); EmitSibMagic( to.Id, sibsrc ); @@ -64,11 +217,14 @@ public: }; // ------------------------------------------------------------------------ +// I use explicit method declarations here instead of templates, in order to provide +// *only* 32 and 16 bit register operand forms (8 bit registers are not valid in CMOV). +// class CMovImplGeneric { protected: - typedef CMovImpl<4> m_32; - typedef CMovImpl<2> m_16; + typedef CMovImpl m_32; + typedef CMovImpl m_16; public: __forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); } @@ -87,8 +243,8 @@ template< JccComparisonType ccType > class CMovImplAll { protected: - typedef CMovImpl<4> m_32; - typedef CMovImpl<2> m_16; + typedef CMovImpl m_32; + typedef CMovImpl m_16; public: __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); } @@ -105,10 +261,13 @@ public: ////////////////////////////////////////////////////////////////////////////////////////// // Mov with sign/zero extension implementations (movsx / movzx) // -template< int DestOperandSize, int SrcOperandSize > +template< typename DestImmType, typename SrcImmType > class MovExtendImpl { protected: + static const uint DestOperandSize = sizeof( DestImmType ); + static const uint SrcOperandSize = sizeof( SrcImmType ); + static bool Is8BitOperand() { return SrcOperandSize == 1; } static void prefix16() { if( DestOperandSize == 2 ) iWrite( 0x66 ); } static __forceinline void emit_base( bool SignExtend ) @@ -121,13 +280,13 @@ protected: public: MovExtendImpl() {} // For the love of GCC. - static __emitinline void Emit( const iRegister& to, const iRegister& from, bool SignExtend ) + static __emitinline void Emit( const iRegister& to, const iRegister& from, bool SignExtend ) { emit_base( SignExtend ); ModRM_Direct( to.Id, from.Id ); } - static __emitinline void Emit( const iRegister& to, const ModSibStrict& sibsrc, bool SignExtend ) + static __emitinline void Emit( const iRegister& to, const ModSibStrict& sibsrc, bool SignExtend ) { emit_base( SignExtend ); EmitSibMagic( to.Id, sibsrc ); @@ -139,19 +298,19 @@ template< bool SignExtend > class MovExtendImplAll { protected: - typedef MovExtendImpl<4, 2> m_16to32; - typedef MovExtendImpl<4, 1> m_8to32; - typedef MovExtendImpl<2, 1> m_8to16; + typedef MovExtendImpl m_16to32; + typedef MovExtendImpl m_8to32; + typedef MovExtendImpl m_8to16; public: __forceinline void operator()( const iRegister32& to, const iRegister16& from ) const { m_16to32::Emit( to, from, SignExtend ); } - __noinline void operator()( const iRegister32& to, const ModSibStrict<2>& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); } + __noinline void operator()( const iRegister32& to, const ModSibStrict& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); } __forceinline void operator()( const iRegister32& to, const iRegister8& from ) const { m_8to32::Emit( to, from, SignExtend ); } - __noinline void operator()( const iRegister32& to, const ModSibStrict<1>& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); } + __noinline void operator()( const iRegister32& to, const ModSibStrict& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); } __forceinline void operator()( const iRegister16& to, const iRegister8& from ) const { m_8to16::Emit( to, from, SignExtend ); } - __noinline void operator()( const iRegister16& to, const ModSibStrict<1>& sibsrc ) const { m_8to16::Emit( to, sibsrc, SignExtend ); } + __noinline void operator()( const iRegister16& to, const ModSibStrict& sibsrc ) const { m_8to16::Emit( to, sibsrc, SignExtend ); } MovExtendImplAll() {} // don't ask. }; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 7a1d4ec89a..11020fa647 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -67,13 +67,13 @@ __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; namespace x86Emitter { const x86IndexerType ptr; -const x86IndexerTypeExplicit<4> ptr32; -const x86IndexerTypeExplicit<2> ptr16; -const x86IndexerTypeExplicit<1> ptr8; +const x86IndexerTypeExplicit ptr32; +const x86IndexerTypeExplicit ptr16; +const x86IndexerTypeExplicit ptr8; // ------------------------------------------------------------------------ -template< int OperandSize > const iRegister iRegister::Empty; +template< typename OperandType > const iRegister iRegister::Empty; const x86IndexReg x86IndexReg::Empty; const iRegister32 @@ -240,6 +240,8 @@ namespace Internal using namespace Internal; +const MovImplAll iMOV; + const Group1ImplAll iADD; const Group1ImplAll iOR; const Group1ImplAll iADC; @@ -257,6 +259,15 @@ const Group2ImplAll iSHL; const Group2ImplAll iSHR; const Group2ImplAll iSAR; +const Group3ImplAll iNOT; +const Group3ImplAll iNEG; +const Group3ImplAll iUMUL; +const Group3ImplAll iUDIV; +const Group3ImplAll iSDIV; + +const IncDecImplAll iINC; +const IncDecImplAll iDEC; + const MovExtendImplAll iMOVZX; const MovExtendImplAll iMOVSX; @@ -336,9 +347,11 @@ __emitinline void iAdvancePtr( uint bytes ) // preserve_flags - set to ture to disable use of SHL on [Index*Base] forms // of LEA, which alters flags states. // -template< typename ToReg > -static void EmitLeaMagic( ToReg to, const ModSibBase& src, bool preserve_flags ) +template< typename OperandType > +static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool preserve_flags ) { + typedef iRegister ToReg; + int displacement_size = (src.Displacement == 0) ? 0 : ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); @@ -465,226 +478,7 @@ __emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_fla } ////////////////////////////////////////////////////////////////////////////////////////// -// MOV instruction Implementation - -template< typename ImmType > -class MovImpl -{ -public: - static const uint OperandSize = sizeof(ImmType); - -protected: - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } - -public: - // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const iRegister& from ) - { - if( to == from ) return; // ignore redundant MOVs. - - prefix16(); - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); - ModRM( 3, from.Id, to.Id ); - } - - // ------------------------------------------------------------------------ - static __forceinline void Emit( const ModSibBase& dest, const iRegister& from ) - { - prefix16(); - - // mov eax has a special from when writing directly to a DISP32 address - // (sans any register index/base registers). - - if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() ) - { - iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); - iWrite( dest.Displacement ); - } - else - { - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); - EmitSibMagic( from.Id, dest ); - } - } - - // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const ModSibBase& src ) - { - prefix16(); - - // mov eax has a special from when reading directly from a DISP32 address - // (sans any register index/base registers). - - if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() ) - { - iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); - iWrite( src.Displacement ); - } - else - { - iWrite( Is8BitOperand() ? 0x8a : 0x8b ); - EmitSibMagic( to.Id, src ); - } - } - - // ------------------------------------------------------------------------ - static __forceinline void Emit( void* dest, const iRegister& from ) - { - prefix16(); - - // mov eax has a special from when writing directly to a DISP32 address - - if( from.IsAccumulator() ) - { - iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); - iWrite( (s32)dest ); - } - else - { - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); - iWriteDisp( from.Id, dest ); - } - } - - // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const void* src ) - { - prefix16(); - - // mov eax has a special from when reading directly from a DISP32 address - - if( to.IsAccumulator() ) - { - iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); - iWrite( (s32)src ); - } - else - { - iWrite( Is8BitOperand() ? 0x8a : 0x8b ); - iWriteDisp( to.Id, src ); - } - } - - // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, ImmType imm ) - { - // Note: MOV does not have (reg16/32,imm8) forms. - - prefix16(); - iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); - iWrite( imm ); - } - - // ------------------------------------------------------------------------ - static __forceinline void Emit( ModSibStrict dest, ImmType imm ) - { - prefix16(); - iWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); - EmitSibMagic( 0, dest ); - iWrite( imm ); - } -}; - -namespace Internal -{ - typedef MovImpl MOV32; - typedef MovImpl MOV16; - typedef MovImpl MOV8; -} - -// Inlining Notes: -// I've set up the inlining to be as practical and intelligent as possible, which means -// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to -// virtually no code. In the case of (Reg, Imm) forms, the inlinign is up to the dis- -// cretion of the compiler. -// - -// TODO : Turn this into a macro after it's been debugged and accuracy-approved! :D - -// ---------- 32 Bit Interface ----------- -__forceinline void iMOV( const iRegister32& to, const iRegister32& from ) { MOV32::Emit( to, from ); } -__forceinline void iMOV( const iRegister32& to, const void* src ) { MOV32::Emit( to, ptr32[src] ); } -__forceinline void iMOV( void* dest, const iRegister32& from ) { MOV32::Emit( ptr32[dest], from ); } -__noinline void iMOV( const ModSibBase& sibdest, const iRegister32& from ) { MOV32::Emit( sibdest, from ); } -__noinline void iMOV( const iRegister32& to, const ModSibBase& sibsrc ) { MOV32::Emit( to, sibsrc ); } -__noinline void iMOV( const ModSibStrict<4>& sibdest,u32 imm ) { MOV32::Emit( sibdest, imm ); } - -void iMOV( const iRegister32& to, u32 imm, bool preserve_flags ) -{ - if( !preserve_flags && (imm == 0) ) - iXOR( to, to ); - else - MOV32::Emit( to, imm ); -} - - -// ---------- 16 Bit Interface ----------- -__forceinline void iMOV( const iRegister16& to, const iRegister16& from ) { MOV16::Emit( to, from ); } -__forceinline void iMOV( const iRegister16& to, const void* src ) { MOV16::Emit( to, ptr16[src] ); } -__forceinline void iMOV( void* dest, const iRegister16& from ) { MOV16::Emit( ptr16[dest], from ); } -__noinline void iMOV( const ModSibBase& sibdest, const iRegister16& from ) { MOV16::Emit( sibdest, from ); } -__noinline void iMOV( const iRegister16& to, const ModSibBase& sibsrc ) { MOV16::Emit( to, sibsrc ); } -__noinline void iMOV( const ModSibStrict<2>& sibdest,u16 imm ) { MOV16::Emit( sibdest, imm ); } - -void iMOV( const iRegister16& to, u16 imm, bool preserve_flags ) -{ - if( !preserve_flags && (imm == 0) ) - iXOR( to, to ); - else - MOV16::Emit( to, imm ); -} - -// ---------- 8 Bit Interface ----------- -__forceinline void iMOV( const iRegister8& to, const iRegister8& from ) { MOV8::Emit( to, from ); } -__forceinline void iMOV( const iRegister8& to, const void* src ) { MOV8::Emit( to, ptr8[src] ); } -__forceinline void iMOV( void* dest, const iRegister8& from ) { MOV8::Emit( ptr8[dest], from ); } -__noinline void iMOV( const ModSibBase& sibdest, const iRegister8& from ) { MOV8::Emit( sibdest, from ); } -__noinline void iMOV( const iRegister8& to, const ModSibBase& sibsrc ) { MOV8::Emit( to, sibsrc ); } -__noinline void iMOV( const ModSibStrict<1>& sibdest,u8 imm ) { MOV8::Emit( sibdest, imm ); } - -void iMOV( const iRegister8& to, u8 imm, bool preserve_flags ) -{ - if( !preserve_flags && (imm == 0) ) - iXOR( to, to ); - else - MOV8::Emit( to, imm ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// DIV/MUL/IDIV/IMUL instructions (Implemented!) - -// F6 is r8, F7 is r32. -// MUL is 4, DIV is 6. - -enum MulDivType -{ - MDT_Mul = 4, - MDT_iMul = 5, - MDT_Div = 6, - MDT_iDiv = 7 -}; - -// ------------------------------------------------------------------------ -// EAX form emitter for Mul/Div/iMUL/iDIV -// -template< int OperandSize > -static __forceinline void EmitMulDiv_OneRegForm( MulDivType InstType, const iRegister& from ) -{ - if( OperandSize == 2 ) iWrite( 0x66 ); - iWrite( (OperandSize == 1) ? 0xf6 : 0xf7 ); - ModRM( ModRm_Direct, InstType, from.Id ); -} - -static __forceinline void EmitMulDiv_OneRegForm( MulDivType InstType, const ModSibSized& sibsrc ) -{ - if( sibsrc.OperandSize == 2 ) iWrite( 0x66 ); - iWrite( (sibsrc.OperandSize == 1) ? 0xf6 : 0xf7 ); - EmitSibMagic( InstType, sibsrc ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// All iMul forms are valid for 16 and 32 bit register operands only! +// The following iMul-specific forms are valid for 16 and 32 bit register operands only! template< typename ImmType > class iMulImpl @@ -697,15 +491,15 @@ protected: public: // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( const iRegister& to, const iRegister& from ) { prefix16(); write16( 0xaf0f ); - ModRM( ModRm_Direct, to.Id, from.Id ); + ModRM_Direct( to.Id, from.Id ); } // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const void* src ) + static __emitinline void Emit( const iRegister& to, const void* src ) { prefix16(); write16( 0xaf0f ); @@ -713,7 +507,7 @@ public: } // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const ModSibBase& src ) + static __emitinline void Emit( const iRegister& to, const ModSibBase& src ) { prefix16(); write16( 0xaf0f ); @@ -721,11 +515,11 @@ public: } // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const iRegister& from, ImmType imm ) + static __emitinline void Emit( const iRegister& to, const iRegister& from, ImmType imm ) { prefix16(); write16( is_s8( imm ) ? 0x6b : 0x69 ); - ModRM( ModRm_Direct, to.Id, from.Id ); + ModRM_Direct( to.Id, from.Id ); if( is_s8( imm ) ) write8( imm ); else @@ -733,7 +527,7 @@ public: } // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const void* src, ImmType imm ) + static __emitinline void Emit( const iRegister& to, const void* src, ImmType imm ) { prefix16(); write16( is_s8( imm ) ? 0x6b : 0x69 ); @@ -745,7 +539,7 @@ public: } // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const ModSibBase& src, ImmType imm ) + static __emitinline void Emit( const iRegister& to, const ModSibBase& src, ImmType imm ) { prefix16(); write16( is_s8( imm ) ? 0x6b : 0x69 ); @@ -757,28 +551,9 @@ public: } }; -__forceinline void iUMUL( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } -__forceinline void iUMUL( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } -__forceinline void iUMUL( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } -__noinline void iUMUL( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_Mul, from ); } - -__forceinline void iUDIV( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_Div, from ); } -__forceinline void iUDIV( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_Div, from ); } -__forceinline void iUDIV( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_Div, from ); } -__noinline void iUDIV( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_Div, from ); } - -__forceinline void iSDIV( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } -__forceinline void iSDIV( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } -__forceinline void iSDIV( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } -__noinline void iSDIV( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_iDiv, from ); } - -__forceinline void iSMUL( const iRegister32& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } -__forceinline void iSMUL( const iRegister16& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } -__forceinline void iSMUL( const iRegister8& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } -__noinline void iSMUL( const ModSibSized& from ) { EmitMulDiv_OneRegForm( MDT_iMul, from ); } - // ------------------------------------------------------------------------ -// iMUL's special forms (unique to iMUL alone) +// iMUL's special forms (unique to iMUL alone), and valid for 32/16 bit operands only, +// thus noi templates are used. namespace Internal { diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 533c2dbd57..d625b6a7c5 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -37,7 +37,6 @@ namespace x86Emitter { extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false ); - // ----- Lea Instructions (Load Effective Address) ----- // Note: alternate (void*) forms of these instructions are not provided since those // forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs @@ -86,74 +85,22 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // MUL / DIV instructions - extern void iUMUL( const iRegister32& from ); - extern void iUMUL( const iRegister16& from ); - extern void iUMUL( const iRegister8& from ); - extern void iUMUL( const ModSibSized& from ); - - extern void iUDIV( const iRegister32& from ); - extern void iUDIV( const iRegister16& from ); - extern void iUDIV( const iRegister8& from ); - extern void iUDIV( const ModSibSized& from ); - - extern void iSDIV( const iRegister32& from ); - extern void iSDIV( const iRegister16& from ); - extern void iSDIV( const iRegister8& from ); - extern void iSDIV( const ModSibSized& from ); - - extern void iSMUL( const iRegister32& from ); extern void iSMUL( const iRegister32& to, const iRegister32& from ); extern void iSMUL( const iRegister32& to, const void* src ); extern void iSMUL( const iRegister32& to, const iRegister32& from, s32 imm ); extern void iSMUL( const iRegister32& to, const ModSibBase& src ); extern void iSMUL( const iRegister32& to, const ModSibBase& src, s32 imm ); - extern void iSMUL( const iRegister16& from ); extern void iSMUL( const iRegister16& to, const iRegister16& from ); extern void iSMUL( const iRegister16& to, const void* src ); extern void iSMUL( const iRegister16& to, const iRegister16& from, s16 imm ); extern void iSMUL( const iRegister16& to, const ModSibBase& src ); extern void iSMUL( const iRegister16& to, const ModSibBase& src, s16 imm ); - extern void iSMUL( const iRegister8& from ); - extern void iSMUL( const ModSibSized& from ); - - - ////////////////////////////////////////////////////////////////////////////////////////// - // MOV instructions! - // ---------- 32 Bit Interface ----------- - extern void iMOV( const iRegister32& to, const iRegister32& from ); - extern void iMOV( const ModSibBase& sibdest, const iRegister32& from ); - extern void iMOV( const iRegister32& to, const ModSibBase& sibsrc ); - extern void iMOV( const iRegister32& to, const void* src ); - extern void iMOV( void* dest, const iRegister32& from ); - - // preserve_flags - set to true to disable optimizations which could alter the state of - // the flags (namely replacing mov reg,0 with xor). - extern void iMOV( const iRegister32& to, u32 imm, bool preserve_flags=false ); - extern void iMOV( const ModSibStrict<4>& sibdest, u32 imm ); - - // ---------- 16 Bit Interface ----------- - extern void iMOV( const iRegister16& to, const iRegister16& from ); - extern void iMOV( const ModSibBase& sibdest, const iRegister16& from ); - extern void iMOV( const iRegister16& to, const ModSibBase& sibsrc ); - extern void iMOV( const iRegister16& to, const void* src ); - extern void iMOV( void* dest, const iRegister16& from ); - - // preserve_flags - set to true to disable optimizations which could alter the state of - // the flags (namely replacing mov reg,0 with xor). - extern void iMOV( const iRegister16& to, u16 imm, bool preserve_flags=false ); - extern void iMOV( const ModSibStrict<2>& sibdest, u16 imm ); - - // ---------- 8 Bit Interface ----------- - extern void iMOV( const iRegister8& to, const iRegister8& from ); - extern void iMOV( const ModSibBase& sibdest, const iRegister8& from ); - extern void iMOV( const iRegister8& to, const ModSibBase& sibsrc ); - extern void iMOV( const iRegister8& to, const void* src ); - extern void iMOV( void* dest, const iRegister8& from ); - - extern void iMOV( const iRegister8& to, u8 imm, bool preserve_flags=false ); - extern void iMOV( const ModSibStrict<1>& sibdest, u8 imm ); + template< typename T > + __forceinline void iSMUL( const iRegister& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } + template< typename T > + __noinline void iSMUL( const ModSibStrict& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 2fc12afd89..79f442a190 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -34,55 +34,60 @@ using namespace x86Emitter; -template< int OperandSize > -static __forceinline iRegister _reghlp( x86IntRegType src ) +template< typename ImmType > +static __forceinline iRegister _reghlp( x86IntRegType src ) { - return iRegister( src ); + return iRegister( src ); } static __forceinline ModSibBase _mrmhlp( x86IntRegType src ) { - return ptr[_reghlp<4>(src)]; + return ptr[_reghlp(src)]; } -template< int OperandSize > -static __forceinline ModSibStrict _mhlp( x86IntRegType src ) +template< typename ImmType > +static __forceinline ModSibStrict _mhlp( x86IntRegType src ) { - return ModSibStrict( x86IndexReg::Empty, x86IndexReg(src) ); + return ModSibStrict( x86IndexReg::Empty, x86IndexReg(src) ); } -template< int OperandSize > -static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86IntRegType src2 ) +template< typename ImmType > +static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86IntRegType src2 ) { - return ModSibStrict( x86IndexReg(src2), x86IndexReg(src1) ); + return ModSibStrict( x86IndexReg(src2), x86IndexReg(src1) ); } ////////////////////////////////////////////////////////////////////////////////////////// // #define DEFINE_LEGACY_HELPER( cod, bits ) \ - emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { i##cod( _reghlp(to), _reghlp(from) ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { i##cod( _reghlp(to), imm ); } \ - emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { i##cod( _reghlp(to), (void*)from ); } \ - emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { i##cod( (void*)to, _reghlp(from) ); } \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { i##cod( _reghlp(to), _reghlp(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { i##cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { i##cod( _reghlp(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { i##cod( (void*)to, _reghlp(from) ); } \ emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { i##cod( ptr##bits[to], imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ - emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _reghlp(to), _mhlp(from) + offset ); } \ - emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _mhlp(to) + offset, _reghlp(from) ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _reghlp(to), _mhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _mhlp(to) + offset, _reghlp(from) ); } \ emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ - { i##cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ + { i##cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \ - { i##cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } + { i##cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } #define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \ - emitterT void cod##bits##CLtoR( x86IntRegType to ) { i##cod( _reghlp(to), cl ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { i##cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##CLtoR( x86IntRegType to ) { i##cod( _reghlp(to), cl ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { i##cod( _reghlp(to), imm ); } \ emitterT void cod##bits##CLtoM( uptr to ) { i##cod( ptr##bits[to], cl ); } \ emitterT void cod##bits##ItoM( uptr to, u8 imm ) { i##cod( ptr##bits[to], imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ - emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { i##cod( _mhlp(to) + offset, cl ); } + emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { i##cod( _mhlp(to) + offset, cl ); } +#define DEFINE_LEGACY_ONEREG_HELPER( cod, bits ) \ + emitterT void cod##bits##R( x86IntRegType to ) { i##cod( _reghlp(to) ); } \ + emitterT void cod##bits##M( uptr to ) { i##cod( ptr##bits[to] ); } \ + emitterT void cod##bits##Rm( x86IntRegType to, uptr offset ) { i##cod( _mhlp(to) + offset ); } + //emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ -// { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ +// { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ #define DEFINE_OPCODE_LEGACY( cod ) \ DEFINE_LEGACY_HELPER( cod, 32 ) \ @@ -94,6 +99,11 @@ static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86In DEFINE_LEGACY_SHIFT_HELPER( cod, 16 ) \ DEFINE_LEGACY_SHIFT_HELPER( cod, 8 ) +#define DEFINE_OPCODE_ONEREG_LEGACY( cod ) \ + DEFINE_LEGACY_ONEREG_HELPER( cod, 32 ) \ + DEFINE_LEGACY_ONEREG_HELPER( cod, 16 ) \ + DEFINE_LEGACY_ONEREG_HELPER( cod, 8 ) + ////////////////////////////////////////////////////////////////////////////////////////// // DEFINE_OPCODE_LEGACY( ADD ) @@ -115,6 +125,12 @@ DEFINE_OPCODE_SHIFT_LEGACY( SAR ) DEFINE_OPCODE_LEGACY( MOV ) +DEFINE_OPCODE_ONEREG_LEGACY( INC ) +DEFINE_OPCODE_ONEREG_LEGACY( DEC ) +DEFINE_OPCODE_ONEREG_LEGACY( NOT ) +DEFINE_OPCODE_ONEREG_LEGACY( NEG ) + + // ------------------------------------------------------------------------ #define DEFINE_LEGACY_MOVEXTEND( form, destbits, srcbits ) \ emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form( iRegister##destbits( to ), iRegister##srcbits( from ) ); } \ @@ -150,7 +166,7 @@ emitterT void MOV8RmSOffsettoR( x86IntRegType to, x86IntRegType from1, s32 from2 emitterT void AND32I8toR( x86IntRegType to, s8 from ) { - iAND( _reghlp<4>(to), from ); + iAND( _reghlp(to), from ); } emitterT void AND32I8toM( uptr to, s8 from ) @@ -374,114 +390,6 @@ emitterT void STC( void ) { iSTC(); } emitterT void CLC( void ) { iCLC(); } emitterT void NOP( void ) { iNOP(); } -//////////////////////////////////// -// arithmetic instructions / -//////////////////////////////////// - -/* inc r32 */ -emitterT void INC32R( x86IntRegType to ) -{ - write8( 0x40 + to ); -} - -/* inc m32 */ -emitterT void INC32M( u32 to ) -{ - write8( 0xFF ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* inc r16 */ -emitterT void INC16R( x86IntRegType to ) -{ - write8( 0x66 ); - write8( 0x40 + to ); -} - -/* inc m16 */ -emitterT void INC16M( u32 to ) -{ - write8( 0x66 ); - write8( 0xFF ); - ModRM( 0, 0, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* dec r32 */ -emitterT void DEC32R( x86IntRegType to ) -{ - write8( 0x48 + to ); -} - -/* dec m32 */ -emitterT void DEC32M( u32 to ) -{ - write8( 0xFF ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -/* dec r16 */ -emitterT void DEC16R( x86IntRegType to ) -{ - write8( 0x66 ); - write8( 0x48 + to ); -} - -/* dec m16 */ -emitterT void DEC16M( u32 to ) -{ - write8( 0x66 ); - write8( 0xFF ); - ModRM( 0, 1, DISP32 ); - write32( MEMADDR(to, 4) ); -} - -//////////////////////////////////// -// logical instructions / -//////////////////////////////////// - -/* not r32 */ -emitterT void NOT32R( x86IntRegType from ) -{ - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 2, from ); -} - -// not m32 -emitterT void NOT32M( u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(from, 4)); -} - -/* neg r32 */ -emitterT void NEG32R( x86IntRegType from ) -{ - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 3, from ); -} - -emitterT void NEG32M( u32 from ) -{ - write8( 0xF7 ); - ModRM( 0, 3, DISP32 ); - write32( MEMADDR(from, 4)); -} - -/* neg r16 */ -emitterT void NEG16R( x86IntRegType from ) -{ - write8( 0x66 ); - RexB(0,from); - write8( 0xF7 ); - ModRM( 3, 3, from ); -} - //////////////////////////////////// // jump instructions / //////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index af473c5c8a..c7331c732e 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -156,7 +156,6 @@ namespace x86Emitter Mod_Direct, // direct reg/reg operation }; - static const int ModRm_Direct = 3; // when used as the first parameter, specifies direct register operation (no mem) static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) @@ -197,15 +196,16 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // - template< int OperandSize > + template< typename OperandType > class iRegister { public: + static const uint OperandSize = sizeof( OperandType ); static const iRegister Empty; // defined as an empty/unused value (-1) int Id; - iRegister( const iRegister& src ) : Id( src.Id ) {} + iRegister( const iRegister& src ) : Id( src.Id ) {} iRegister(): Id( -1 ) {} explicit iRegister( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } @@ -214,17 +214,17 @@ namespace x86Emitter // Returns true if the register is a valid accumulator: Eax, Ax, Al. bool IsAccumulator() const { return Id == 0; } - bool operator==( const iRegister& src ) const + bool operator==( const iRegister& src ) const { return (Id == src.Id); } - bool operator!=( const iRegister& src ) const + bool operator!=( const iRegister& src ) const { return (Id != src.Id); } - iRegister& operator=( const iRegister& src ) + iRegister& operator=( const iRegister& src ) { Id = src.Id; return *this; @@ -239,9 +239,9 @@ namespace x86Emitter // all about the the templated code in haphazard fashion. Yay.. >_< // - typedef iRegister<4> iRegister32; - typedef iRegister<2> iRegister16; - typedef iRegister<1> iRegister8; + typedef iRegister iRegister32; + typedef iRegister iRegister16; + typedef iRegister iRegister8; class iRegisterCL : public iRegister8 { @@ -396,65 +396,29 @@ namespace x86Emitter __forceinline void Reduce(); }; - ////////////////////////////////////////////////////////////////////////////////////////// - // - class ModSibSized : public ModSibBase - { - public: - int OperandSize; - - ModSibSized( int opsize, const iAddressInfo& src ) : - ModSibBase( src ), - OperandSize( opsize ) - { - jASSUME( OperandSize == 1 || OperandSize == 2 || OperandSize == 4 ); - } - - ModSibSized( int opsize, s32 disp ) : - ModSibBase( disp ), - OperandSize( opsize ) - { - jASSUME( OperandSize == 1 || OperandSize == 2 || OperandSize == 4 ); - } - - ModSibSized( int opsize, x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : - ModSibBase( base, index, scale, displacement ), - OperandSize( opsize ) - { - jASSUME( OperandSize == 1 || OperandSize == 2 || OperandSize == 4 ); - } - - __forceinline ModSibSized& Add( s32 imm ) - { - Displacement += imm; - return *this; - } - - __forceinline ModSibSized operator+( const s32 imm ) const { return ModSibSized( *this ).Add( imm ); } - __forceinline ModSibSized operator-( const s32 imm ) const { return ModSibSized( *this ).Add( -imm ); } - }; - ////////////////////////////////////////////////////////////////////////////////////////// // Strictly-typed version of ModSibBase, which is used to apply operand size information // to ImmToMem operations. // - template< int OpSize > - class ModSibStrict : public ModSibSized + template< typename OperandType > + class ModSibStrict : public ModSibBase { public: - __forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibSized( OpSize, src ) {} - __forceinline explicit ModSibStrict( s32 disp ) : ModSibSized( OpSize, disp ) {} + static const uint OperandSize = sizeof( OperandType ); + + __forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibBase( src ) {} + __forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {} __forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : - ModSibSized( OpSize, base, index, scale, displacement ) {} + ModSibBase( base, index, scale, displacement ) {} - __forceinline ModSibStrict& Add( s32 imm ) + __forceinline ModSibStrict& Add( s32 imm ) { Displacement += imm; return *this; } - __forceinline ModSibStrict operator+( const s32 imm ) const { return ModSibStrict( *this ).Add( imm ); } - __forceinline ModSibStrict operator-( const s32 imm ) const { return ModSibStrict( *this ).Add( -imm ); } + __forceinline ModSibStrict operator+( const s32 imm ) const { return ModSibStrict( *this ).Add( imm ); } + __forceinline ModSibStrict operator-( const s32 imm ) const { return ModSibStrict( *this ).Add( -imm ); } }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -493,40 +457,42 @@ namespace x86Emitter // Explicit version of ptr[], in the form of ptr32[], ptr16[], etc. which allows // specification of the operand size for ImmToMem operations. // - template< int OperandSize > + template< typename OperandType > struct x86IndexerTypeExplicit { + static const uint OperandSize = sizeof( OperandType ); + // passthrough instruction, allows ModSib to pass silently through ptr translation // without doing anything and without compiler error. - const ModSibStrict& operator[]( const ModSibStrict& src ) const { return src; } + const ModSibStrict& operator[]( const ModSibStrict& src ) const { return src; } - __forceinline ModSibStrict operator[]( x86IndexReg src ) const + __forceinline ModSibStrict operator[]( x86IndexReg src ) const { - return ModSibStrict( src, x86IndexReg::Empty ); + return ModSibStrict( src, x86IndexReg::Empty ); } - __forceinline ModSibStrict operator[]( const iAddressInfo& src ) const + __forceinline ModSibStrict operator[]( const iAddressInfo& src ) const { - return ModSibStrict( src ); + return ModSibStrict( src ); } - __forceinline ModSibStrict operator[]( uptr src ) const + __forceinline ModSibStrict operator[]( uptr src ) const { - return ModSibStrict( src ); + return ModSibStrict( src ); } - __forceinline ModSibStrict operator[]( const void* src ) const + __forceinline ModSibStrict operator[]( const void* src ) const { - return ModSibStrict( (uptr)src ); + return ModSibStrict( (uptr)src ); } x86IndexerTypeExplicit() {} // GCC initialization dummy }; extern const x86IndexerType ptr; - extern const x86IndexerTypeExplicit<4> ptr32; - extern const x86IndexerTypeExplicit<2> ptr16; - extern const x86IndexerTypeExplicit<1> ptr8; + extern const x86IndexerTypeExplicit ptr32; + extern const x86IndexerTypeExplicit ptr16; + extern const x86IndexerTypeExplicit ptr8; ////////////////////////////////////////////////////////////////////////////////////////// // JccComparisonType - enumerated possibilities for inspired code branching! @@ -652,14 +618,29 @@ namespace x86Emitter extern void EmitSibMagic( uint regfield, const ModSibBase& info ); + // ------------------------------------------------------------------------ + template< typename ImmType > + class ImplementationHelper + { + public: + static const uint OperandSize = sizeof(ImmType); + + protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + }; + + // ------------------------------------------------------------------------ #include "implement/group1.h" #include "implement/group2.h" + #include "implement/group3.h" #include "implement/movs.h" // cmov and movsx/zx - #include "implement/dwshift.h" // dowubleword shifts! + #include "implement/dwshift.h" // doubleword shifts! + #include "implement/incdec.h" } - // ------------------------------------------------------------------------ - + ////////////////////////////////////////////////////////////////////////////////////////// + // // ----- Group 1 Instruction Class ----- extern const Internal::Group1ImplAll iADD; @@ -676,6 +657,8 @@ namespace x86Emitter // zero. This is a safe optimization since any zero-value shift does not affect any // flags. + extern const Internal::MovImplAll iMOV; + extern const Internal::Group2ImplAll iROL; extern const Internal::Group2ImplAll iROR; extern const Internal::Group2ImplAll iRCL; @@ -684,6 +667,17 @@ namespace x86Emitter extern const Internal::Group2ImplAll iSHR; extern const Internal::Group2ImplAll iSAR; + // ----- Group 3 Instruction Class ----- + + extern const Internal::Group3ImplAll iNOT; + extern const Internal::Group3ImplAll iNEG; + extern const Internal::Group3ImplAll iUMUL; + extern const Internal::Group3ImplAll iUDIV; + extern const Internal::Group3ImplAll iSDIV; + + extern const Internal::IncDecImplAll iINC; + extern const Internal::IncDecImplAll iDEC; + extern const Internal::MovExtendImplAll iMOVZX; extern const Internal::MovExtendImplAll iMOVSX; From 7308ae1b715f1e38bd3ae47b7b2d6c2ec18c3d32 Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 16 Apr 2009 23:28:18 +0000 Subject: [PATCH 076/143] Fixed Issue 157 removed redundant code arcum pointed out I'd committed git-svn-id: http://pcsx2.googlecode.com/svn/trunk@991 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/IPU/IPU.cpp | 1 - pcsx2/VifDma.cpp | 52 ++++++++++++++++++----------------------------- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 04152bfd10..16f0cfc994 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -1370,7 +1370,6 @@ int FIFOto_write(u32* pMem, int size) } \ } -#define gif ((DMACh*)&PS2MEM_HW[0xA000]) extern void gsInterrupt(); int IPU1dma() diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 5b57c430f7..f0141a82a0 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -543,30 +543,6 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i memsize = size; #endif - -#ifdef VIFUNPACKDEBUG - - if((vif->tag.addr + (size / (VIFfuncTable[ vif->cmd & 0xf ].gsize * vifRegs->cycle.wl)) * - ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) > (u32)(VIFdmanum ? 0x4000 : 0x1000)) - { - //Sanity Check (memory overflow) - DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, vif->tag.addr, VIFdmanum ? 0x4000 : 0x1000); - - } -#endif - - tempsize = (vif->tag.addr + (size / (ft->gsize * vifRegs->cycle.wl)) * - ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) + ((size / ft->gsize) * 16); - - //Sanity Check (memory overflow) - if(tempsize > (u32)(VIFdmanum ? 0x4000 : 0x1000)) - { - - // DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); - tempsize = size; - size = 0; - } else tempsize = 0; - if (vifRegs->cycle.cl >= vifRegs->cycle.wl) // skipping write { @@ -574,6 +550,18 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i static int s_count = 0; #endif + tempsize = (vif->tag.addr + (size / (ft->gsize * vifRegs->cycle.wl)) * + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) + ((size / ft->gsize) * 16); + + //Sanity Check (memory overflow) + if(tempsize > (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + + DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); + tempsize = size; + size = 0; + } else tempsize = 0; + if (size >= ft->gsize) { @@ -735,8 +723,8 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i else /* filling write */ { - if((u32)(size / ft->gsize) < vifRegs->num && vifRegs->cycle.cl != 0) - DevCon::Notice("Filling write warning! Size < packet size and CL != 0"); + if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num && vifRegs->cycle.cl != 0) + DevCon::Notice("Filling write warning! %x < %x and CL = %x WL = %x", params (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl); VIFUNPACK_LOG("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType); while (vifRegs->num > 0) @@ -748,6 +736,11 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i if (vif->cl < vifRegs->cycle.cl) /* unpack one qword */ { + if(size < ft->gsize) + { + VIF_LOG("Out of Filling write data"); + break; + } func(dest, (u32*)cdata, ft->qsize); cdata += ft->gsize; size -= ft->gsize; @@ -756,12 +749,7 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i if (vif->cl == vifRegs->cycle.wl) { vif->cl = 0; - } - if(size < ft->gsize) - { - VIF_LOG("Out of Filling write data"); - break; - } + } } else { From f4d57faf779c1141cbfb1e55c9dd30f300bdbbf8 Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 16 Apr 2009 23:33:05 +0000 Subject: [PATCH 077/143] Look out, the monkey is back git-svn-id: http://pcsx2.googlecode.com/svn/trunk@992 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index f0141a82a0..adddb9a322 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -557,7 +557,7 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i if(tempsize > (u32)(VIFdmanum ? 0x4000 : 0x1000)) { - DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); + //DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); tempsize = size; size = 0; } else tempsize = 0; From ecbef93c6b4817e7bc76c18665ad7041a615a752 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 16 Apr 2009 23:57:04 +0000 Subject: [PATCH 078/143] Here we go again. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@993 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/Makefile.am | 8 +++++--- pcsx2/x86/ix86/implement/group3.h | 2 ++ pcsx2/x86/ix86/implement/movs.h | 11 +++++++---- pcsx2/x86/ix86/ix86_types.h | 4 ++-- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index 6943135b1b..caecac9851 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -1,6 +1,8 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty noinst_LIBRARIES = libix86.a -libix86_a_SOURCES = libix86_a_SOURCES = ix86_mmx.cpp ix86_tools.cpp ix86.cpp ix86_3dnow.cpp \ -ix86_fpu.cpp ix86_legacy.cpp ix86_sse.cpp ix86_cpudetect.cpp ix86_jmp.cpp \ - ix86_internal.h ix86.h ix86_macros.h ix86_sse_helpers.h ix86_types.h +libix86_a_SOURCES = \ +ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_mmx.cpp ix86_tools.cpp ix86_3dnow.cpp \ +ix86_legacy.cpp ix86_sse.cpp \ +ix86_internal.h ix86_legacy_instructions.h ix86_macros.h ix86_sse_helpers.h ix86.h ix86_legacy_internal.h \ +ix86_instructions.h ix86_legacy_types.h ix86_types.h \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index 9cf60b2256..362b0186b8 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -63,4 +63,6 @@ public: template< typename T > __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( InstType, from ); } + + Group3ImplAll() {} }; \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/movs.h b/pcsx2/x86/ix86/implement/movs.h index 2899659930..1e079bc87d 100644 --- a/pcsx2/x86/ix86/implement/movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -139,7 +139,7 @@ public: // Inlining Notes: // I've set up the inlining to be as practical and intelligent as possible, which means // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to -// virtually no code. In the case of (Reg, Imm) forms, the inlinign is up to the dis- +// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- // cretion of the compiler. // @@ -171,6 +171,8 @@ public: else MovImpl::Emit( to, imm ); } + + MovImplAll() {} // Satisfy GCC's whims. }; @@ -181,7 +183,7 @@ template< typename ImmType > class CMovImpl : public ImplementationHelper< ImmType > { protected: - static bool Is8BitOperand() { return OperandSize == 1; } + static bool Is8BitOperand() {return OperandSize == 1; } static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } static __forceinline void emit_base( JccComparisonType cc ) @@ -193,8 +195,8 @@ protected: } public: - CMovImpl() {} - + static const uint OperandSize = sizeof(ImmType); + static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const iRegister& from ) { if( to == from ) return; @@ -213,6 +215,7 @@ public: emit_base( cc ); EmitSibMagic( to.Id, sibsrc ); } + CMovImpl() {} }; diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index c7331c732e..4a8ebfe272 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -144,7 +144,7 @@ namespace x86Emitter #ifdef _MSC_VER # define __noinline __declspec(noinline) #else -# define __noinline +# define __noinline __attribute__((noinline)) #endif // ModRM 'mod' field enumeration. Provided mostly for reference: @@ -658,7 +658,7 @@ namespace x86Emitter // flags. extern const Internal::MovImplAll iMOV; - + extern const Internal::Group2ImplAll iROL; extern const Internal::Group2ImplAll iROR; extern const Internal::Group2ImplAll iRCL; From 4704ac9436c02c798cef7953aa786a4abe8cc85c Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 17 Apr 2009 00:35:55 +0000 Subject: [PATCH 079/143] Reduce compiler warnings to a more reasonable level in Linux. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@994 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/dwshift.h | 6 ++--- pcsx2/x86/ix86/implement/group1.h | 32 +++++++++++++------------- pcsx2/x86/ix86/implement/group2.h | 20 ++++++++--------- pcsx2/x86/ix86/implement/group3.h | 8 +++---- pcsx2/x86/ix86/implement/incdec.h | 6 ++--- pcsx2/x86/ix86/implement/movs.h | 36 +++++++++++++++--------------- 6 files changed, 54 insertions(+), 54 deletions(-) diff --git a/pcsx2/x86/ix86/implement/dwshift.h b/pcsx2/x86/ix86/implement/dwshift.h index 66a1b83088..2ad6e82903 100644 --- a/pcsx2/x86/ix86/implement/dwshift.h +++ b/pcsx2/x86/ix86/implement/dwshift.h @@ -32,7 +32,7 @@ class DwordShiftImpl : public ImplementationHelper< ImmType > protected: static void basesibform( bool isCL ) { - prefix16(); + ImplementationHelper::prefix16(); write8( 0x0f ); write8( (isCL ? 0xa5 : 0xa4) | (isShiftRight ? 0x8 : 0) ); } @@ -42,7 +42,7 @@ public: static __emitinline void Emit( const iRegister& to, const iRegister& from ) { - prefix16(); + ImplementationHelper::prefix16(); write16( 0xa50f | (isShiftRight ? 0x800 : 0) ); ModRM_Direct( from.Id, to.Id ); } @@ -50,7 +50,7 @@ public: static __emitinline void Emit( const iRegister& to, const iRegister& from, u8 imm ) { if( imm == 0 ) return; - prefix16(); + ImplementationHelper::prefix16(); write16( 0xa40f | (isShiftRight ? 0x800 : 0) ); ModRM_Direct( from.Id, to.Id ); write8( imm ); diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 2837960f73..13215ebfb1 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -42,43 +42,43 @@ public: static __emitinline void Emit( const iRegister& to, const iRegister& from ) { - prefix16(); - iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + ImplementationHelper::prefix16(); + iWrite( (ImplementationHelper::Is8BitOperand() ? 0 : 1) | (InstType<<3) ); ModRM_Direct( from.Id, to.Id ); } static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from ) { - prefix16(); - iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + ImplementationHelper::prefix16(); + iWrite( (ImplementationHelper::Is8BitOperand() ? 0 : 1) | (InstType<<3) ); EmitSibMagic( from.Id, sibdest ); } static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) { - prefix16(); - iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + ImplementationHelper::prefix16(); + iWrite( (ImplementationHelper::Is8BitOperand() ? 2 : 3) | (InstType<<3) ); EmitSibMagic( to.Id, sibsrc ); } static __emitinline void Emit( void* dest, const iRegister& from ) { - prefix16(); - iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + ImplementationHelper::prefix16(); + iWrite( (ImplementationHelper::Is8BitOperand() ? 0 : 1) | (InstType<<3) ); iWriteDisp( from.Id, dest ); } static __emitinline void Emit( const iRegister& to, const void* src ) { - prefix16(); - iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + ImplementationHelper::prefix16(); + iWrite( (ImplementationHelper::Is8BitOperand() ? 2 : 3) | (InstType<<3) ); iWriteDisp( to.Id, src ); } static __emitinline void Emit( const iRegister& to, int imm ) { - prefix16(); - if( !Is8BitOperand() && is_s8( imm ) ) + ImplementationHelper::prefix16(); + if( !ImplementationHelper::Is8BitOperand() && is_s8( imm ) ) { iWrite( 0x83 ); ModRM_Direct( InstType, to.Id ); @@ -87,10 +87,10 @@ public: else { if( to.IsAccumulator() ) - iWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); + iWrite( (ImplementationHelper::Is8BitOperand() ? 4 : 5) | (InstType<<3) ); else { - iWrite( Is8BitOperand() ? 0x80 : 0x81 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0x80 : 0x81 ); ModRM_Direct( InstType, to.Id ); } iWrite( imm ); @@ -99,7 +99,7 @@ public: static __emitinline void Emit( const ModSibStrict& sibdest, int imm ) { - if( Is8BitOperand() ) + if( ImplementationHelper::Is8BitOperand() ) { iWrite( 0x80 ); EmitSibMagic( InstType, sibdest ); @@ -107,7 +107,7 @@ public: } else { - prefix16(); + ImplementationHelper::prefix16(); iWrite( is_s8( imm ) ? 0x83 : 0x81 ); EmitSibMagic( InstType, sibdest ); if( is_s8( imm ) ) diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index 74979f8c91..f4153378d9 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -46,8 +46,8 @@ public: static __emitinline void Emit( const iRegister& to ) { - prefix16(); - iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + ImplementationHelper::prefix16(); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xd2 : 0xd3 ); ModRM_Direct( InstType, to.Id ); } @@ -55,16 +55,16 @@ public: { if( imm == 0 ) return; - prefix16(); + ImplementationHelper::prefix16(); if( imm == 1 ) { // special encoding of 1's - iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xd0 : 0xd1 ); ModRM_Direct( InstType, to.Id ); } else { - iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xc0 : 0xc1 ); ModRM_Direct( InstType, to.Id ); iWrite( imm ); } @@ -72,8 +72,8 @@ public: static __emitinline void Emit( const ModSibStrict& sibdest ) { - prefix16(); - iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + ImplementationHelper::prefix16(); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xd2 : 0xd3 ); EmitSibMagic( InstType, sibdest ); } @@ -81,16 +81,16 @@ public: { if( imm == 0 ) return; - prefix16(); + ImplementationHelper::prefix16(); if( imm == 1 ) { // special encoding of 1's - iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xd0 : 0xd1 ); EmitSibMagic( InstType, sibdest ); } else { - iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xc0 : 0xc1 ); EmitSibMagic( InstType, sibdest ); iWrite( imm ); } diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index 362b0186b8..41b4cdc625 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -39,15 +39,15 @@ public: static __emitinline void Emit( G3Type InstType, const iRegister& from ) { - prefix16(); - iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + ImplementationHelper::prefix16(); + iWrite(ImplementationHelper::Is8BitOperand() ? 0xf6 : 0xf7 ); ModRM_Direct( InstType, from.Id ); } static __emitinline void Emit( G3Type InstType, const ModSibStrict& sibsrc ) { - prefix16(); - iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + ImplementationHelper::prefix16(); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xf6 : 0xf7 ); EmitSibMagic( InstType, sibsrc ); } }; diff --git a/pcsx2/x86/ix86/implement/incdec.h b/pcsx2/x86/ix86/implement/incdec.h index 0cf5a08c62..1570aa3768 100644 --- a/pcsx2/x86/ix86/implement/incdec.h +++ b/pcsx2/x86/ix86/implement/incdec.h @@ -31,21 +31,21 @@ public: { // There is no valid 8-bit form of direct register inc/dec, so fall // back on Mod/RM format instead: - if( Is8BitOperand() ) + if (ImplementationHelper::Is8BitOperand() ) { write8( 0xfe ); ModRM_Direct( isDec ? 1 : 0, to.Id ); } else { - prefix16(); + ImplementationHelper::prefix16(); write8( (isDec ? 0x48 : 0x40) | to.Id ); } } static __emitinline void Emit( bool isDec, const ModSibStrict& dest ) { - write8( Is8BitOperand() ? 0xfe : 0xff ); + write8( ImplementationHelper::Is8BitOperand() ? 0xfe : 0xff ); EmitSibMagic( isDec ? 1: 0, dest ); } }; diff --git a/pcsx2/x86/ix86/implement/movs.h b/pcsx2/x86/ix86/implement/movs.h index 1e079bc87d..7ac0dee7bf 100644 --- a/pcsx2/x86/ix86/implement/movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -33,27 +33,27 @@ public: { if( to == from ) return; // ignore redundant MOVs. - prefix16(); - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + ImplementationHelper::prefix16(); + iWrite( ImplementationHelper::Is8BitOperand() ? 0x88 : 0x89 ); ModRM( 3, from.Id, to.Id ); } // ------------------------------------------------------------------------ static __forceinline void Emit( const ModSibBase& dest, const iRegister& from ) { - prefix16(); + ImplementationHelper::prefix16(); // mov eax has a special from when writing directly to a DISP32 address // (sans any register index/base registers). if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() ) { - iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xa2 : 0xa3 ); iWrite( dest.Displacement ); } else { - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0x88 : 0x89 ); EmitSibMagic( from.Id, dest ); } } @@ -61,19 +61,19 @@ public: // ------------------------------------------------------------------------ static __forceinline void Emit( const iRegister& to, const ModSibBase& src ) { - prefix16(); + ImplementationHelper::prefix16(); // mov eax has a special from when reading directly from a DISP32 address // (sans any register index/base registers). if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() ) { - iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xa0 : 0xa1 ); iWrite( src.Displacement ); } else { - iWrite( Is8BitOperand() ? 0x8a : 0x8b ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0x8a : 0x8b ); EmitSibMagic( to.Id, src ); } } @@ -81,18 +81,18 @@ public: // ------------------------------------------------------------------------ static __forceinline void Emit( void* dest, const iRegister& from ) { - prefix16(); + ImplementationHelper::prefix16(); // mov eax has a special from when writing directly to a DISP32 address if( from.IsAccumulator() ) { - iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xa2 : 0xa3 ); iWrite( (s32)dest ); } else { - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0x88 : 0x89 ); iWriteDisp( from.Id, dest ); } } @@ -100,18 +100,18 @@ public: // ------------------------------------------------------------------------ static __forceinline void Emit( const iRegister& to, const void* src ) { - prefix16(); + ImplementationHelper::prefix16(); // mov eax has a special from when reading directly from a DISP32 address if( to.IsAccumulator() ) { - iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xa0 : 0xa1 ); iWrite( (s32)src ); } else { - iWrite( Is8BitOperand() ? 0x8a : 0x8b ); + iWrite( ImplementationHelper::Is8BitOperand() ? 0x8a : 0x8b ); iWriteDisp( to.Id, src ); } } @@ -121,16 +121,16 @@ public: { // Note: MOV does not have (reg16/32,imm8) forms. - prefix16(); - iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); + ImplementationHelper::prefix16(); + iWrite( (ImplementationHelper::Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); iWrite( imm ); } // ------------------------------------------------------------------------ static __forceinline void Emit( ModSibStrict dest, ImmType imm ) { - prefix16(); - iWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); + ImplementationHelper::prefix16(); + iWrite( ImplementationHelper::Is8BitOperand() ? 0xc6 : 0xc7 ); EmitSibMagic( 0, dest ); iWrite( imm ); } From 170acd854aec50622a67bde225c2cf65604aee0c Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 17 Apr 2009 00:54:53 +0000 Subject: [PATCH 080/143] Added a check to make sure the unpack REALLY overflows, sometimes it can be dead on the limit or count the skip, so it doesn't need to break to slower code. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@995 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index adddb9a322..7522158b60 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -558,8 +558,17 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i { //DevCon::Notice("VIF%x Unpack ending %x > %x", params VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); - tempsize = size; - size = 0; + if(vifRegs->cycle.cl == 1 && ((u32)(VIFdmanum ? 0x4000 : 0x1000) + ((vifRegs->cycle.cl - vifRegs->cycle.wl) * 16)) == tempsize + || tempsize == (u32)(VIFdmanum ? 0x4000 : 0x1000)) + { + //Its a red herring! so ignore it! SSE unpacks will be much quicker + tempsize = 0; + } + else + { + tempsize = size; + size = 0; + } } else tempsize = 0; From 0bf914833f023fc53dcebd1487a4448b86adbebd Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 17 Apr 2009 00:58:20 +0000 Subject: [PATCH 081/143] Bring the new speed hack to Linux. (I just quickly hacked it in, so I may make it look nicer later.) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@996 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Linux/HacksDlg.cpp | 2 ++ pcsx2/Linux/interface.c | 22 +++++++++++++++ pcsx2/Linux/pcsx2.glade | 61 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) diff --git a/pcsx2/Linux/HacksDlg.cpp b/pcsx2/Linux/HacksDlg.cpp index 1f4a29936f..4bed23966a 100644 --- a/pcsx2/Linux/HacksDlg.cpp +++ b/pcsx2/Linux/HacksDlg.cpp @@ -81,6 +81,7 @@ void on_Speed_Hacks(GtkMenuItem *menuitem, gpointer user_data) set_checked(SpeedHacksDlg, "check_intc_sync_hack", CHECK_INTC_STAT_HACK); set_checked(SpeedHacksDlg, "check_ESC_hack", CHECK_ESCAPE_HACK); + gtk_range_set_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale")), Config.VUCycleHack); gtk_widget_show_all(SpeedHacksDlg); gtk_widget_set_sensitive(MainWindow, FALSE); gtk_main(); @@ -104,6 +105,7 @@ void on_Speed_Hack_OK(GtkButton *button, gpointer user_data) Config.Hacks |= is_checked(SpeedHacksDlg, "check_intc_sync_hack") << 5; Config.Hacks |= is_checked(SpeedHacksDlg, "check_ESC_hack") << 10; + Config.VUCycleHack = gtk_range_get_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale"))); SaveConfig(); gtk_widget_destroy(SpeedHacksDlg); diff --git a/pcsx2/Linux/interface.c b/pcsx2/Linux/interface.c index f62270e3d4..843e490537 100644 --- a/pcsx2/Linux/interface.c +++ b/pcsx2/Linux/interface.c @@ -637,6 +637,9 @@ create_SpeedHacksDlg (void) GtkWidget *label97; GtkWidget *check_intc_sync_hack; GtkWidget *label101; + GtkWidget *vbox71; + GtkWidget *VUCycleHackScale; + GtkWidget *label108; GtkWidget *frame36; GtkWidget *alignment31; GtkWidget *check_ESC_hack; @@ -796,6 +799,22 @@ create_SpeedHacksDlg (void) gtk_box_pack_start (GTK_BOX (vbox60), label101, FALSE, FALSE, 0); gtk_label_set_line_wrap (GTK_LABEL (label101), TRUE); + vbox71 = gtk_vbox_new (FALSE, 0); + gtk_widget_set_name (vbox71, "vbox71"); + gtk_widget_show (vbox71); + gtk_box_pack_start (GTK_BOX (vbox60), vbox71, TRUE, TRUE, 0); + + VUCycleHackScale = gtk_hscale_new (GTK_ADJUSTMENT (gtk_adjustment_new (0, 0, 4, 1, 0, 0))); + gtk_widget_set_name (VUCycleHackScale, "VUCycleHackScale"); + gtk_widget_show (VUCycleHackScale); + gtk_box_pack_start (GTK_BOX (vbox71), VUCycleHackScale, TRUE, TRUE, 0); + gtk_scale_set_digits (GTK_SCALE (VUCycleHackScale), 0); + + label108 = gtk_label_new (_("Speedup for 3D games.\n0: No speedup.\n1: Slight speedup for 3D geometry, should work with most games.\n2: Moderate speedup for 3D geometry, should work with most games with minor problems.\n3: Large speedup for 3D geometry, may break many games and make others skip frames.\n4: Very large speedup for 3D geometry, will break games in interesting ways.")); + gtk_widget_set_name (label108, "label108"); + gtk_widget_show (label108); + gtk_box_pack_start (GTK_BOX (vbox71), label108, FALSE, FALSE, 0); + frame36 = gtk_frame_new (NULL); gtk_widget_set_name (frame36, "frame36"); gtk_widget_show (frame36); @@ -869,6 +888,9 @@ create_SpeedHacksDlg (void) GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label97, "label97"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_intc_sync_hack, "check_intc_sync_hack"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label101, "label101"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vbox71, "vbox71"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, VUCycleHackScale, "VUCycleHackScale"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label108, "label108"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, frame36, "frame36"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, alignment31, "alignment31"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_ESC_hack, "check_ESC_hack"); diff --git a/pcsx2/Linux/pcsx2.glade b/pcsx2/Linux/pcsx2.glade index 4e2513352c..b7e95d4d91 100644 --- a/pcsx2/Linux/pcsx2.glade +++ b/pcsx2/Linux/pcsx2.glade @@ -1688,6 +1688,67 @@ Known to work well with a couple games, namely Shadow of the Colossus (but break False + + + + True + False + 0 + + + + True + True + True + GTK_POS_TOP + 0 + GTK_UPDATE_CONTINUOUS + False + 0 0 4 1 0 0 + + + 0 + True + True + + + + + + True + Speedup for 3D games. +0: No speedup. +1: Slight speedup for 3D geometry, should work with most games. +2: Moderate speedup for 3D geometry, should work with most games with minor problems. +3: Large speedup for 3D geometry, may break many games and make others skip frames. +4: Very large speedup for 3D geometry, will break games in interesting ways. + False + False + GTK_JUSTIFY_LEFT + False + False + 0.5 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + 0 + False + False + + + + + 0 + True + True + + 0 From 8865ee398bb0ecce5cad4af11598ebcce4e11eb9 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Fri, 17 Apr 2009 01:06:27 +0000 Subject: [PATCH 082/143] GSdx: reworked the gs transfer function a bit, and removed the nloop hack, which does not seem to be necessary anymore. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@997 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.cpp | 20 ++-- plugins/GSdx/GSCrc.cpp | 202 ++++++++++++++++---------------- plugins/GSdx/GSCrc.h | 1 - plugins/GSdx/GSDevice9.cpp | 5 +- plugins/GSdx/GSRenderer.h | 12 +- plugins/GSdx/GSRendererHW.h | 4 +- plugins/GSdx/GSRendererHW10.cpp | 4 +- plugins/GSdx/GSRendererHW10.h | 2 +- plugins/GSdx/GSRendererHW9.cpp | 4 +- plugins/GSdx/GSRendererHW9.h | 2 +- plugins/GSdx/GSRendererNull.h | 4 +- plugins/GSdx/GSRendererSW.h | 4 +- plugins/GSdx/GSSettingsDlg.cpp | 8 -- plugins/GSdx/GSSettingsDlg.h | 2 - plugins/GSdx/GSState.cpp | 81 ++++++------- plugins/GSdx/GSState.h | 4 +- plugins/GSdx/GSdx.rc | 22 ++-- 17 files changed, 173 insertions(+), 208 deletions(-) diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 0da874ce54..3dd274f709 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -106,10 +106,6 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer) GSclose(); - // TODO - - int nloophack = AfxGetApp()->GetProfileInt(_T("Settings"), _T("nloophack"), 2); - GSRendererSettings rs; rs.m_interlace = AfxGetApp()->GetProfileInt(_T("Settings"), _T("interlace"), 0); @@ -125,14 +121,14 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer) switch(renderer) { default: - case 0: s_gs = new GSRendererHW9(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 1: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs, threads); break; - case 2: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 3: s_gs = new GSRendererHW10(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 4: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs, threads); break; - case 5: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; - case 6: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, nloophack, rs, threads); break; - case 7: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack, rs); break; + case 0: s_gs = new GSRendererHW9(s_basemem, !!mt, s_irq, rs); break; + case 1: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, rs, threads); break; + case 2: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, rs); break; + case 3: s_gs = new GSRendererHW10(s_basemem, !!mt, s_irq, rs); break; + case 4: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, rs, threads); break; + case 5: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, rs); break; + case 6: s_gs = new GSRendererSW(s_basemem, !!mt, s_irq, rs, threads); break; + case 7: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, rs); break; } s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); diff --git a/plugins/GSdx/GSCrc.cpp b/plugins/GSdx/GSCrc.cpp index ae03ccbefb..e6432260e6 100644 --- a/plugins/GSdx/GSCrc.cpp +++ b/plugins/GSdx/GSCrc.cpp @@ -24,107 +24,107 @@ CRC::Game CRC::m_games[] = { - {0x00000000, None, Unknown, false}, - {0x2113EA2E, MetalSlug6, Unknown, false}, - {0x42E05BAF, TomoyoAfter, JP, false}, - {0x7800DC84, Clannad, JP, false}, - {0xa39517ab, FFX, EU, true}, - {0xa39517ae, FFX, FR, true}, - {0x941bb7d9, FFX, DE, true}, - {0xa39517a9, FFX, IT, true}, - {0x941bb7de, FFX, ES, true}, - {0xb4414ea1, FFX, RU, true}, - {0xee97db5b, FFX, RU, true}, - {0xaec495cc, FFX, RU, true}, - {0xbb3d833a, FFX, US, true}, - {0x6a4efe60, FFX, JP, true}, - {0x3866ca7e, FFX, ASIA, true}, // int. - {0x658597e2, FFX, JP, true}, // int. - {0x9aac5309, FFX2, EU, true}, - {0x9aac530c, FFX2, FR, true}, - {0x9aac530a, FFX2, FR, true}, // ? - {0x9aac530d, FFX2, DE, true}, - {0x9aac530b, FFX2, IT, true}, - {0x48fe0c71, FFX2, US, true}, - {0xe1fd9a2d, FFX2, JP, true}, // int. - {0x78da0252, FFXII, EU, false}, - {0xc1274668, FFXII, EU, false}, - {0xdc2a467e, FFXII, EU, false}, - {0xca284668, FFXII, EU, false}, - {0x280AD120, FFXII, JP, false}, - {0x8BE3D7B2, ShadowHearts, Unknown, false}, - {0xDEFA4763, ShadowHearts, US, false}, - {0x21068223, Okami, US, false}, - {0x891f223f, Okami, FR, false}, - {0xC5DEFEA0, Okami, JP, false}, - {0x053D2239, MetalGearSolid3, US, false}, - {0x086273D2, MetalGearSolid3, FR, false}, - {0x26A6E286, MetalGearSolid3, EU, false}, - {0xAA31B5BF, MetalGearSolid3, Unknown, false}, - {0x9F185CE1, MetalGearSolid3, Unknown, false}, - {0x98D4BC93, MetalGearSolid3, EU, false}, - {0x86BC3040, MetalGearSolid3, US, false}, //Subsistance disc 1 - {0x0481AD8A, MetalGearSolid3, JP, false}, - {0x79ED26AD, MetalGearSolid3, EU, false}, - {0x5E31EA42, MetalGearSolid3, EU, false}, - {0x278722BF, DBZBT2, US, false}, - {0xFE961D28, DBZBT2, US, false}, - {0x0393B6BE, DBZBT2, EU, false}, - {0xE2F289ED, DBZBT2, JP, false}, // Sparking Neo! - {0x35AA84D1, DBZBT2, Unknown, false}, - {0x428113C2, DBZBT3, US, false}, - {0xA422BB13, DBZBT3, EU, false}, - {0x983c53d2, DBZBT3, Unknown, false}, - {0x72B3802A, SFEX3, US, false}, - {0x71521863, SFEX3, US, false}, - {0x28703748, Bully, US, false}, - {0xC78A495D, BullyCC, US, false}, - {0xC19A374E, SoTC, US, false}, - {0x7D8F539A, SoTC, EU, false}, - {0x3122B508, OnePieceGrandAdventure, US, false}, - {0x6F8545DB, ICO, US, false}, - {0xB01A4C95, ICO, JP, false}, - {0x5C991F4E, ICO, Unknown, false}, - {0xAEAD1CA3, GT4, JP, false}, - {0x44A61C8F, GT4, Unknown, false}, - {0x0086E35B, GT4, Unknown, false}, - {0x77E61C8A, GT4, Unknown, false}, - {0xC164550A, WildArms5, JPUNDUB, false}, - {0xC1640D2C, WildArms5, US, false}, - {0x0FCF8FE4, WildArms5, EU, false}, - {0x2294D322, WildArms5, JP, false}, - {0x565B6170, WildArms5, JP, false}, - {0x8B029334, Manhunt2, Unknown, false}, - {0x09F49E37, CrashBandicootWoC, Unknown, false}, - {0x013E349D, ResidentEvil4, US, false}, - {0x6BA2F6B9, ResidentEvil4, Unknown, false}, - {0x60FA8C69, ResidentEvil4, JP, false}, - {0x72E1E60E, Spartan, Unknown, false}, - {0x5ED8FB53, AceCombat4, JP, false}, - {0x1B9B7563, AceCombat4, Unknown, false}, - {0xEC432B24, Drakengard2, Unknown, false}, - {0xFC46EA61, Tekken5, JP, false}, - {0x1F88EE37, Tekken5, Unknown, false}, - {0x652050D2, Tekken5, Unknown, false}, - {0x9E98B8AE, IkkiTousen, JP, false}, - {0xD6385328, GodOfWar, US, false}, - {0xFB0E6D72, GodOfWar, EU, false}, - {0xEB001875, GodOfWar, EU, false}, - {0xA61A4C6D, GodOfWar, Unknown, false}, - {0xE23D532B, GodOfWar, Unknown, false}, - {0x2F123FD8, GodOfWar2, RU, false}, - {0x2F123FD8, GodOfWar2, US, false}, - {0x44A8A22A, GodOfWar2, EU, false}, - {0x5D482F18, JackieChanAdv, Unknown, false}, - {0xf0a6d880, HarvestMoon, US, true}, - {0x75c01a04, NamcoXCapcom, US, false}, - {0xBF6F101F, GiTS, US, false}, - {0xA5768F53, GiTS, JP, false}, - {0x6BF11378, Onimusha3, US, false}, - {0xF442260C, MajokkoALaMode2, JP, false}, - {0x14FE77F7, TalesOfAbyss, US, false}, - {0x045D77E9, TalesOfAbyss, US, false}, // undub - {0xAA5EC3A3, TalesOfAbyss, JP, false}, + {0x00000000, None, Unknown}, + {0x2113EA2E, MetalSlug6, Unknown}, + {0x42E05BAF, TomoyoAfter, JP}, + {0x7800DC84, Clannad, JP}, + {0xa39517ab, FFX, EU}, + {0xa39517ae, FFX, FR}, + {0x941bb7d9, FFX, DE}, + {0xa39517a9, FFX, IT}, + {0x941bb7de, FFX, ES}, + {0xb4414ea1, FFX, RU}, + {0xee97db5b, FFX, RU}, + {0xaec495cc, FFX, RU}, + {0xbb3d833a, FFX, US}, + {0x6a4efe60, FFX, JP}, + {0x3866ca7e, FFX, ASIA}, // int. + {0x658597e2, FFX, JP}, // int. + {0x9aac5309, FFX2, EU}, + {0x9aac530c, FFX2, FR}, + {0x9aac530a, FFX2, FR}, // ? + {0x9aac530d, FFX2, DE}, + {0x9aac530b, FFX2, IT}, + {0x48fe0c71, FFX2, US}, + {0xe1fd9a2d, FFX2, JP}, // int. + {0x78da0252, FFXII, EU}, + {0xc1274668, FFXII, EU}, + {0xdc2a467e, FFXII, EU}, + {0xca284668, FFXII, EU}, + {0x280AD120, FFXII, JP}, + {0x8BE3D7B2, ShadowHearts, Unknown}, + {0xDEFA4763, ShadowHearts, US}, + {0x21068223, Okami, US}, + {0x891f223f, Okami, FR}, + {0xC5DEFEA0, Okami, JP}, + {0x053D2239, MetalGearSolid3, US}, + {0x086273D2, MetalGearSolid3, FR}, + {0x26A6E286, MetalGearSolid3, EU}, + {0xAA31B5BF, MetalGearSolid3, Unknown}, + {0x9F185CE1, MetalGearSolid3, Unknown}, + {0x98D4BC93, MetalGearSolid3, EU}, + {0x86BC3040, MetalGearSolid3, US}, //Subsistance disc 1 + {0x0481AD8A, MetalGearSolid3, JP}, + {0x79ED26AD, MetalGearSolid3, EU}, + {0x5E31EA42, MetalGearSolid3, EU}, + {0x278722BF, DBZBT2, US}, + {0xFE961D28, DBZBT2, US}, + {0x0393B6BE, DBZBT2, EU}, + {0xE2F289ED, DBZBT2, JP}, // Sparking Neo! + {0x35AA84D1, DBZBT2, Unknown}, + {0x428113C2, DBZBT3, US}, + {0xA422BB13, DBZBT3, EU}, + {0x983c53d2, DBZBT3, Unknown}, + {0x72B3802A, SFEX3, US}, + {0x71521863, SFEX3, US}, + {0x28703748, Bully, US}, + {0xC78A495D, BullyCC, US}, + {0xC19A374E, SoTC, US}, + {0x7D8F539A, SoTC, EU}, + {0x3122B508, OnePieceGrandAdventure, US}, + {0x6F8545DB, ICO, US}, + {0xB01A4C95, ICO, JP}, + {0x5C991F4E, ICO, Unknown}, + {0xAEAD1CA3, GT4, JP}, + {0x44A61C8F, GT4, Unknown}, + {0x0086E35B, GT4, Unknown}, + {0x77E61C8A, GT4, Unknown}, + {0xC164550A, WildArms5, JPUNDUB}, + {0xC1640D2C, WildArms5, US}, + {0x0FCF8FE4, WildArms5, EU}, + {0x2294D322, WildArms5, JP}, + {0x565B6170, WildArms5, JP}, + {0x8B029334, Manhunt2, Unknown}, + {0x09F49E37, CrashBandicootWoC, Unknown}, + {0x013E349D, ResidentEvil4, US}, + {0x6BA2F6B9, ResidentEvil4, Unknown}, + {0x60FA8C69, ResidentEvil4, JP}, + {0x72E1E60E, Spartan, Unknown}, + {0x5ED8FB53, AceCombat4, JP}, + {0x1B9B7563, AceCombat4, Unknown}, + {0xEC432B24, Drakengard2, Unknown}, + {0xFC46EA61, Tekken5, JP}, + {0x1F88EE37, Tekken5, Unknown}, + {0x652050D2, Tekken5, Unknown}, + {0x9E98B8AE, IkkiTousen, JP}, + {0xD6385328, GodOfWar, US}, + {0xFB0E6D72, GodOfWar, EU}, + {0xEB001875, GodOfWar, EU}, + {0xA61A4C6D, GodOfWar, Unknown}, + {0xE23D532B, GodOfWar, Unknown}, + {0x2F123FD8, GodOfWar2, RU}, + {0x2F123FD8, GodOfWar2, US}, + {0x44A8A22A, GodOfWar2, EU}, + {0x5D482F18, JackieChanAdv, Unknown}, + {0xf0a6d880, HarvestMoon, US}, + {0x75c01a04, NamcoXCapcom, US}, + {0xBF6F101F, GiTS, US}, + {0xA5768F53, GiTS, JP}, + {0x6BF11378, Onimusha3, US}, + {0xF442260C, MajokkoALaMode2, JP}, + {0x14FE77F7, TalesOfAbyss, US}, + {0x045D77E9, TalesOfAbyss, US}, // undub + {0xAA5EC3A3, TalesOfAbyss, JP}, }; CAtlMap CRC::m_map; diff --git a/plugins/GSdx/GSCrc.h b/plugins/GSdx/GSCrc.h index 89efaf71d8..12f41bd93f 100644 --- a/plugins/GSdx/GSCrc.h +++ b/plugins/GSdx/GSCrc.h @@ -86,7 +86,6 @@ public: DWORD crc; Title title; Region region; - bool nloophack; }; private: diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index f8414a36e0..641e34a6c0 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -266,10 +266,7 @@ bool GSDevice9::Reset(int w, int h, bool fs) m_pp.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT; } - if(!!AfxGetApp()->GetProfileInt(_T("Settings"), _T("tvout"), FALSE)) - { - m_pp.Flags |= D3DPRESENTFLAG_VIDEO; - } + m_pp.Flags |= D3DPRESENTFLAG_VIDEO; // enables tv-out (but I don't think anyone would still use a regular tv...) int mw = AfxGetApp()->GetProfileInt(_T("Settings"), _T("ModeWidth"), 0); int mh = AfxGetApp()->GetProfileInt(_T("Settings"), _T("ModeHeight"), 0); diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index ad56de020f..56bd999fb7 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -95,8 +95,8 @@ public: GSWnd m_wnd; public: - GSRendererBase(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs) - : GSState(base, mt, irq, nloophack) + GSRendererBase(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs) + : GSState(base, mt, irq) , m_osd(true) { m_interlace = rs.m_interlace; @@ -412,8 +412,8 @@ public: GSCapture m_capture; public: - GSRenderer(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, bool psrr) - : GSRendererBase(base, mt, irq, nloophack, rs) + GSRenderer(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs, bool psrr) + : GSRendererBase(base, mt, irq, rs) , m_psrr(psrr) { s_n = 0; @@ -687,8 +687,8 @@ protected: virtual void Draw() = 0; public: - GSRendererT(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, bool psrr = true) - : GSRenderer(base, mt, irq, nloophack, rs, psrr) + GSRendererT(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs, bool psrr = true) + : GSRenderer(base, mt, irq, rs, psrr) , m_count(0) , m_maxcount(0) , m_vertices(NULL) diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index c5aa8e1f88..31c5673c9b 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -631,8 +631,8 @@ protected: } public: - GSRendererHW(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, bool psrr) - : GSRendererT(base, mt, irq, nloophack, rs, psrr) + GSRendererHW(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs, bool psrr) + : GSRendererT(base, mt, irq, rs, psrr) , m_width(1024) , m_height(1024) , m_skip(0) diff --git a/plugins/GSdx/GSRendererHW10.cpp b/plugins/GSdx/GSRendererHW10.cpp index 28447fbf44..3fd9ccb9d7 100644 --- a/plugins/GSdx/GSRendererHW10.cpp +++ b/plugins/GSdx/GSRendererHW10.cpp @@ -24,8 +24,8 @@ #include "GSCrc.h" #include "resource.h" -GSRendererHW10::GSRendererHW10(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs) - : GSRendererHW(base, mt, irq, nloophack, rs, true) +GSRendererHW10::GSRendererHW10(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs) + : GSRendererHW(base, mt, irq, rs, true) { InitVertexKick(); } diff --git a/plugins/GSdx/GSRendererHW10.h b/plugins/GSdx/GSRendererHW10.h index b1b069ee22..a9985dfc0f 100644 --- a/plugins/GSdx/GSRendererHW10.h +++ b/plugins/GSdx/GSRendererHW10.h @@ -48,7 +48,7 @@ protected: void SetupDATE(Texture& rt, Texture& ds); public: - GSRendererHW10(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs); + GSRendererHW10(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs); bool Create(LPCTSTR title); diff --git a/plugins/GSdx/GSRendererHW9.cpp b/plugins/GSdx/GSRendererHW9.cpp index 009d924f81..4459094f1d 100644 --- a/plugins/GSdx/GSRendererHW9.cpp +++ b/plugins/GSdx/GSRendererHW9.cpp @@ -24,8 +24,8 @@ #include "GSCrc.h" #include "resource.h" -GSRendererHW9::GSRendererHW9(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs) - : GSRendererHW(base, mt, irq, nloophack, rs, false) +GSRendererHW9::GSRendererHW9(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs) + : GSRendererHW(base, mt, irq, rs, false) { m_fba.enabled = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("fba"), TRUE); m_logz = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("logz"), FALSE); diff --git a/plugins/GSdx/GSRendererHW9.h b/plugins/GSdx/GSRendererHW9.h index bc9c7cb030..c91a0cb42d 100644 --- a/plugins/GSdx/GSRendererHW9.h +++ b/plugins/GSdx/GSRendererHW9.h @@ -57,7 +57,7 @@ protected: void UpdateFBA(Texture& rt); public: - GSRendererHW9(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs); + GSRendererHW9(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs); bool Create(LPCTSTR title); diff --git a/plugins/GSdx/GSRendererNull.h b/plugins/GSdx/GSRendererNull.h index 7af3baaa88..bb9152443e 100644 --- a/plugins/GSdx/GSRendererNull.h +++ b/plugins/GSdx/GSRendererNull.h @@ -37,8 +37,8 @@ protected: } public: - GSRendererNull(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs) - : GSRendererT(base, mt, irq, nloophack, rs) + GSRendererNull(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs) + : GSRendererT(base, mt, irq, rs) { InitVertexKick >(); } diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 0d47499b06..178800736d 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -717,8 +717,8 @@ protected: } public: - GSRendererSW(BYTE* base, bool mt, void (*irq)(), int nloophack, const GSRendererSettings& rs, int threads) - : GSRendererT(base, mt, irq, nloophack, rs) + GSRendererSW(BYTE* base, bool mt, void (*irq)(), const GSRendererSettings& rs, int threads) + : GSRendererT(base, mt, irq, rs) { m_rl.Create(this, threads); diff --git a/plugins/GSdx/GSSettingsDlg.cpp b/plugins/GSdx/GSSettingsDlg.cpp index 1e1ed74446..333744a560 100644 --- a/plugins/GSdx/GSSettingsDlg.cpp +++ b/plugins/GSdx/GSSettingsDlg.cpp @@ -68,9 +68,7 @@ GSSetting GSSettingsDlg::g_aspectratio[] = IMPLEMENT_DYNAMIC(GSSettingsDlg, CDialog) GSSettingsDlg::GSSettingsDlg(CWnd* pParent /*=NULL*/) : CDialog(GSSettingsDlg::IDD, pParent) - , m_tvout(FALSE) , m_filter(1) - , m_nloophack(2) , m_nativeres(FALSE) , m_vsync(FALSE) , m_logz(FALSE) @@ -104,9 +102,7 @@ void GSSettingsDlg::DoDataExchange(CDataExchange* pDX) DDX_Control(pDX, IDC_COMBO4, m_psversion); DDX_Control(pDX, IDC_COMBO2, m_interlace); DDX_Control(pDX, IDC_COMBO5, m_aspectratio); - DDX_Check(pDX, IDC_CHECK3, m_tvout); DDX_Check(pDX, IDC_CHECK4, m_filter); - DDX_Check(pDX, IDC_CHECK6, m_nloophack); DDX_Control(pDX, IDC_SPIN1, m_resx); DDX_Control(pDX, IDC_SPIN2, m_resy); DDX_Control(pDX, IDC_SPIN3, m_swthreads); @@ -219,8 +215,6 @@ BOOL GSSettingsDlg::OnInitDialog() // m_filter = pApp->GetProfileInt(_T("Settings"), _T("filter"), 1); - m_tvout = pApp->GetProfileInt(_T("Settings"), _T("tvout"), FALSE); - m_nloophack = pApp->GetProfileInt(_T("Settings"), _T("nloophack"), 2); m_vsync = !!pApp->GetProfileInt(_T("Settings"), _T("vsync"), FALSE); m_logz = !!pApp->GetProfileInt(_T("Settings"), _T("logz"), FALSE); m_fba = !!pApp->GetProfileInt(_T("Settings"), _T("fba"), TRUE); @@ -284,8 +278,6 @@ void GSSettingsDlg::OnOK() } pApp->WriteProfileInt(_T("Settings"), _T("filter"), m_filter); - pApp->WriteProfileInt(_T("Settings"), _T("tvout"), m_tvout); - pApp->WriteProfileInt(_T("Settings"), _T("nloophack"), m_nloophack); pApp->WriteProfileInt(_T("Settings"), _T("vsync"), m_vsync); pApp->WriteProfileInt(_T("Settings"), _T("logz"), m_logz); pApp->WriteProfileInt(_T("Settings"), _T("fba"), m_fba); diff --git a/plugins/GSdx/GSSettingsDlg.h b/plugins/GSdx/GSSettingsDlg.h index b21137cd66..e2b68c41bf 100644 --- a/plugins/GSdx/GSSettingsDlg.h +++ b/plugins/GSdx/GSSettingsDlg.h @@ -47,9 +47,7 @@ public: CComboBox m_psversion; CComboBox m_interlace; CComboBox m_aspectratio; - BOOL m_tvout; int m_filter; - int m_nloophack; CSpinButtonCtrl m_resx; CSpinButtonCtrl m_resy; CSpinButtonCtrl m_swthreads; diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index bf147881b0..a474fd741f 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -22,11 +22,9 @@ #include "stdafx.h" #include "GSState.h" -GSState::GSState(BYTE* base, bool mt, void (*irq)(), int nloophack) +GSState::GSState(BYTE* base, bool mt, void (*irq)()) : m_mt(mt) , m_irq(irq) - , m_nloophack_org(nloophack) - , m_nloophack(nloophack == 1) , m_crc(0) , m_options(0) , m_path3hack(0) @@ -1241,8 +1239,6 @@ template void GSState::Transfer(BYTE* mem, UINT32 size) while(size > 0) { - bool eop = false; - if(path.tag.NLOOP == 0) { path.SetTag(mem); @@ -1250,41 +1246,29 @@ template void GSState::Transfer(BYTE* mem, UINT32 size) mem += sizeof(GIFTag); size--; - m_q = 1.0f; - if(index == 2 && path.tag.EOP) { m_path3hack = 1; } - if(path.tag.PRE) + if(path.tag.NLOOP > 0) // eeuser 7.2.2. GIFtag: "... when NLOOP is 0, the GIF does not output anything, and values other than the EOP field are disregarded." { - ASSERT(path.tag.FLG != GIF_FLG_IMAGE); // kingdom hearts, ffxii, tales of abyss, berserk + m_q = 1.0f; - if((path.tag.FLG & 2) == 0) + if(path.tag.PRE) { - GIFReg r; - r.i64 = path.tag.PRIM; - (this->*m_fpGIFRegHandlers[GIF_A_D_REG_PRIM])(&r); - } - } + ASSERT(path.tag.FLG != GIF_FLG_IMAGE); // kingdom hearts, ffxii, tales of abyss, berserk - if(path.tag.EOP) - { - eop = true; - } - else if(path.tag.NLOOP == 0) - { - if(index == 0 && m_nloophack) - { - continue; + if((path.tag.FLG & 2) == 0) + { + GIFReg r; + r.i64 = path.tag.PRIM; + (this->*m_fpGIFRegHandlers[GIF_A_D_REG_PRIM])(&r); + } } - - eop = true; } } - - if(path.tag.NLOOP > 0) + else { switch(path.tag.FLG) { @@ -1400,21 +1384,12 @@ template void GSState::Transfer(BYTE* mem, UINT32 size) } } - if(eop && ((int)size <= 0 || index == 0)) + if(index == 0) { - break; - } - } - - // FIXME: dq8, pcsx2 error probably - - if(index == 0) - { - if(!path.tag.EOP && path.tag.NLOOP > 0) - { - path.tag.NLOOP = 0; - - TRACE(_T("path1 hack\n")); + if(path.tag.EOP && path.tag.NLOOP == 0) + { + break; + } } } @@ -1422,6 +1397,23 @@ template void GSState::Transfer(BYTE* mem, UINT32 size) { m_dump.Transfer(index, start, mem - start); } + + if(index == 0) + { + if(size == 0 && path.tag.NLOOP > 0) + { + if(m_mt) + { + // TODO + + path.tag.NLOOP = 0; + } + else + { + Transfer<0>(mem - 0x4000, 0x4000 / 16); + } + } + } } template static void WriteState(BYTE*& dst, T* src, size_t len = sizeof(T)) @@ -1617,11 +1609,6 @@ void GSState::SetGameCRC(DWORD crc, int options) m_crc = crc; m_options = options; m_game = CRC::Lookup(crc); - - if(m_nloophack_org == 2) - { - m_nloophack = m_game.nloophack; - } } void GSState::SetFrameSkip(int frameskip) diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 1e1915c0cc..3ffc1d9d46 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -112,7 +112,6 @@ class GSState : public GSAlignedClass<16> bool m_mt; void (*m_irq)(); bool m_path3hack; - int m_nloophack_org; int m_x, m_y; int m_bytes; @@ -200,7 +199,6 @@ public: DWORD m_vprim; GSPerfMon m_perfmon; - bool m_nloophack; DWORD m_crc; int m_options; int m_frameskip; @@ -208,7 +206,7 @@ public: GSDump m_dump; public: - GSState(BYTE* base, bool mt, void (*irq)(), int nloophack); + GSState(BYTE* base, bool mt, void (*irq)()); virtual ~GSState(); void ResetHandlers(); diff --git a/plugins/GSdx/GSdx.rc b/plugins/GSdx/GSdx.rc index ea714def56..58ba4fc312 100644 --- a/plugins/GSdx/GSdx.rc +++ b/plugins/GSdx/GSdx.rc @@ -82,7 +82,7 @@ IDB_LOGO10 BITMAP "res\\logo10.bmp" // Dialog // -IDD_CONFIG DIALOGEX 0, 0, 189, 256 +IDD_CONFIG DIALOGEX 0, 0, 189, 248 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU CAPTION "Settings..." FONT 8, "MS Shell Dlg", 400, 0, 0x1 @@ -108,17 +108,15 @@ BEGIN LTEXT "SW rend. threads:",IDC_STATIC,7,149,60,8 EDITTEXT IDC_EDIT3,71,147,35,13,ES_AUTOHSCROLL | ES_NUMBER CONTROL "",IDC_SPIN3,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,150,11,14 - CONTROL "NLOOP hack",IDC_CHECK6,"Button",BS_AUTO3STATE | WS_TABSTOP,7,167,55,10 - CONTROL "Enable tv-out",IDC_CHECK3,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,167,57,10 - CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,180,67,10 - CONTROL "Logarithmic Z",IDC_CHECK5,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,180,58,10 - CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,193,51,10 - CONTROL "Alpha correction (FBA)",IDC_CHECK7,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,193,102,10 + CONTROL "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,167,67,10 + CONTROL "Logarithmic Z",IDC_CHECK5,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,168,58,10 + CONTROL "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,180,51,10 + CONTROL "Alpha correction (FBA)",IDC_CHECK7,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,80,181,102,10 CONTROL "Edge anti-aliasing (AA1, sw-mode only)",IDC_CHECK8, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,206,141,10 - CONTROL "Enable output merger blur effect",IDC_CHECK9,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,219,121,10 - DEFPUSHBUTTON "OK",IDOK,43,235,50,14 - PUSHBUTTON "Cancel",IDCANCEL,96,235,50,14 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,194,141,10 + CONTROL "Enable output merger blur effect",IDC_CHECK9,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,7,208,121,10 + DEFPUSHBUTTON "OK",IDOK,43,227,50,14 + PUSHBUTTON "Cancel",IDCANCEL,96,227,50,14 END IDD_CAPTURE DIALOGEX 0, 0, 279, 71 @@ -182,7 +180,7 @@ BEGIN VERTGUIDE, 80 VERTGUIDE, 182 TOPMARGIN, 7 - BOTTOMMARGIN, 249 + BOTTOMMARGIN, 241 END IDD_CAPTURE, DIALOG From 986683323e70bec4fc0ac4f0d02b4730a70a3dba Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Fri, 17 Apr 2009 01:54:35 +0000 Subject: [PATCH 083/143] Extremely insignificant optimization applied to recADD/ADDI instructions (omg it might save a cpu cycle per minutes or something!) Also: Reverted the addition of the ImplementationHelper<> class, since it failed miserably under GCC. -_- git-svn-id: http://pcsx2.googlecode.com/svn/trunk@998 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86-32/iR5900Arit.cpp | 15 ++++-- pcsx2/x86/ix86-32/iR5900AritImm.cpp | 14 ++++-- pcsx2/x86/ix86/implement/dwshift.h | 13 ++++-- pcsx2/x86/ix86/implement/group1.h | 40 +++++++++------- pcsx2/x86/ix86/implement/group2.h | 28 +++++++----- pcsx2/x86/ix86/implement/group3.h | 16 +++++-- pcsx2/x86/ix86/implement/incdec.h | 14 ++++-- pcsx2/x86/ix86/implement/movs.h | 71 ++++++++++++++++------------- pcsx2/x86/ix86/ix86.cpp | 6 +-- pcsx2/x86/ix86/ix86_types.h | 12 ----- 10 files changed, 135 insertions(+), 94 deletions(-) diff --git a/pcsx2/x86/ix86-32/iR5900Arit.cpp b/pcsx2/x86/ix86-32/iR5900Arit.cpp index ec344dc1fb..2106347b60 100644 --- a/pcsx2/x86/ix86-32/iR5900Arit.cpp +++ b/pcsx2/x86/ix86-32/iR5900Arit.cpp @@ -116,9 +116,18 @@ void recADD_constv(int info, int creg, int vreg) } else { if( _Rd_ == vreg ) { - ADD32ItoM((int)&cpuRegs.GPR.r[_Rd_].UL[ 0 ], g_cpuConstRegs[creg].UL[0]); - if( EEINST_ISLIVE1(_Rd_) ) _signExtendSFtoM( (int)&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]); - else EEINST_RESETHASLIVE1(_Rd_); + if( EEINST_ISLIVE1(_Rd_) ) + { + // must perform the ADD unconditionally, to maintain flags status: + ADD32ItoM((int)&cpuRegs.GPR.r[_Rd_].UL[ 0 ], g_cpuConstRegs[creg].UL[0]); + _signExtendSFtoM( (int)&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ]); + } + else + { + if( g_cpuConstRegs[creg].UL[0] ) + ADD32ItoM((int)&cpuRegs.GPR.r[_Rd_].UL[ 0 ], g_cpuConstRegs[creg].UL[0]); + EEINST_RESETHASLIVE1(_Rd_); + } } else { MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ vreg ].UL[ 0 ] ); diff --git a/pcsx2/x86/ix86-32/iR5900AritImm.cpp b/pcsx2/x86/ix86-32/iR5900AritImm.cpp index 4d805768e3..7ae89bfe0a 100644 --- a/pcsx2/x86/ix86-32/iR5900AritImm.cpp +++ b/pcsx2/x86/ix86-32/iR5900AritImm.cpp @@ -111,9 +111,17 @@ void recADDI_(int info) } else { if ( _Rt_ == _Rs_ ) { - ADD32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], _Imm_); - if ( EEINST_ISLIVE1(_Rt_) ) _signExtendSFtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]); - else EEINST_RESETHASLIVE1(_Rt_); + if ( EEINST_ISLIVE1(_Rt_) ) + { + // must perform the ADD unconditionally, to maintain flags status: + ADD32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], _Imm_); + _signExtendSFtoM( (int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ]); + } + else + { + if( _Imm_ ) ADD32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], _Imm_); + EEINST_RESETHASLIVE1(_Rt_); + } } else { MOV32MtoR( EAX, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); diff --git a/pcsx2/x86/ix86/implement/dwshift.h b/pcsx2/x86/ix86/implement/dwshift.h index 2ad6e82903..fc32e81d60 100644 --- a/pcsx2/x86/ix86/implement/dwshift.h +++ b/pcsx2/x86/ix86/implement/dwshift.h @@ -27,12 +27,17 @@ // because shifts by 0 do *not* affect flags status. template< typename ImmType, bool isShiftRight > -class DwordShiftImpl : public ImplementationHelper< ImmType > +class DwordShiftImpl { protected: + static const uint OperandSize = sizeof(ImmType); + + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void basesibform( bool isCL ) { - ImplementationHelper::prefix16(); + prefix16(); write8( 0x0f ); write8( (isCL ? 0xa5 : 0xa4) | (isShiftRight ? 0x8 : 0) ); } @@ -42,7 +47,7 @@ public: static __emitinline void Emit( const iRegister& to, const iRegister& from ) { - ImplementationHelper::prefix16(); + prefix16(); write16( 0xa50f | (isShiftRight ? 0x800 : 0) ); ModRM_Direct( from.Id, to.Id ); } @@ -50,7 +55,7 @@ public: static __emitinline void Emit( const iRegister& to, const iRegister& from, u8 imm ) { if( imm == 0 ) return; - ImplementationHelper::prefix16(); + prefix16(); write16( 0xa40f | (isShiftRight ? 0x800 : 0) ); ModRM_Direct( from.Id, to.Id ); write8( imm ); diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 13215ebfb1..5d338c4368 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -35,50 +35,56 @@ enum G1Type // ------------------------------------------------------------------- template< G1Type InstType, typename ImmType > -class Group1Impl : public ImplementationHelper< ImmType > +class Group1Impl { +protected: + static const uint OperandSize = sizeof(ImmType); + + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + public: Group1Impl() {} // because GCC doesn't like static classes static __emitinline void Emit( const iRegister& to, const iRegister& from ) { - ImplementationHelper::prefix16(); - iWrite( (ImplementationHelper::Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); ModRM_Direct( from.Id, to.Id ); } static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from ) { - ImplementationHelper::prefix16(); - iWrite( (ImplementationHelper::Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); EmitSibMagic( from.Id, sibdest ); } static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) { - ImplementationHelper::prefix16(); - iWrite( (ImplementationHelper::Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + prefix16(); + iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); EmitSibMagic( to.Id, sibsrc ); } static __emitinline void Emit( void* dest, const iRegister& from ) { - ImplementationHelper::prefix16(); - iWrite( (ImplementationHelper::Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + prefix16(); + iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); iWriteDisp( from.Id, dest ); } static __emitinline void Emit( const iRegister& to, const void* src ) { - ImplementationHelper::prefix16(); - iWrite( (ImplementationHelper::Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + prefix16(); + iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); iWriteDisp( to.Id, src ); } static __emitinline void Emit( const iRegister& to, int imm ) { - ImplementationHelper::prefix16(); - if( !ImplementationHelper::Is8BitOperand() && is_s8( imm ) ) + prefix16(); + if( !Is8BitOperand() && is_s8( imm ) ) { iWrite( 0x83 ); ModRM_Direct( InstType, to.Id ); @@ -87,10 +93,10 @@ public: else { if( to.IsAccumulator() ) - iWrite( (ImplementationHelper::Is8BitOperand() ? 4 : 5) | (InstType<<3) ); + iWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); else { - iWrite( ImplementationHelper::Is8BitOperand() ? 0x80 : 0x81 ); + iWrite( Is8BitOperand() ? 0x80 : 0x81 ); ModRM_Direct( InstType, to.Id ); } iWrite( imm ); @@ -99,7 +105,7 @@ public: static __emitinline void Emit( const ModSibStrict& sibdest, int imm ) { - if( ImplementationHelper::Is8BitOperand() ) + if( Is8BitOperand() ) { iWrite( 0x80 ); EmitSibMagic( InstType, sibdest ); @@ -107,7 +113,7 @@ public: } else { - ImplementationHelper::prefix16(); + prefix16(); iWrite( is_s8( imm ) ? 0x83 : 0x81 ); EmitSibMagic( InstType, sibdest ); if( is_s8( imm ) ) diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index f4153378d9..099aec32ee 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -39,15 +39,21 @@ enum G2Type // This is a safe optimization since any zero-value shift does not affect any flags. // template< G2Type InstType, typename ImmType > -class Group2Impl : public ImplementationHelper< ImmType > +class Group2Impl { +protected: + static const uint OperandSize = sizeof(ImmType); + + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + public: Group2Impl() {} // For the love of GCC. static __emitinline void Emit( const iRegister& to ) { - ImplementationHelper::prefix16(); - iWrite( ImplementationHelper::Is8BitOperand() ? 0xd2 : 0xd3 ); + prefix16(); + iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); ModRM_Direct( InstType, to.Id ); } @@ -55,16 +61,16 @@ public: { if( imm == 0 ) return; - ImplementationHelper::prefix16(); + prefix16(); if( imm == 1 ) { // special encoding of 1's - iWrite( ImplementationHelper::Is8BitOperand() ? 0xd0 : 0xd1 ); + iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); ModRM_Direct( InstType, to.Id ); } else { - iWrite( ImplementationHelper::Is8BitOperand() ? 0xc0 : 0xc1 ); + iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); ModRM_Direct( InstType, to.Id ); iWrite( imm ); } @@ -72,8 +78,8 @@ public: static __emitinline void Emit( const ModSibStrict& sibdest ) { - ImplementationHelper::prefix16(); - iWrite( ImplementationHelper::Is8BitOperand() ? 0xd2 : 0xd3 ); + prefix16(); + iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); EmitSibMagic( InstType, sibdest ); } @@ -81,16 +87,16 @@ public: { if( imm == 0 ) return; - ImplementationHelper::prefix16(); + prefix16(); if( imm == 1 ) { // special encoding of 1's - iWrite( ImplementationHelper::Is8BitOperand() ? 0xd0 : 0xd1 ); + iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); EmitSibMagic( InstType, sibdest ); } else { - iWrite( ImplementationHelper::Is8BitOperand() ? 0xc0 : 0xc1 ); + iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); EmitSibMagic( InstType, sibdest ); iWrite( imm ); } diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index 41b4cdc625..05543cdd05 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -32,22 +32,28 @@ enum G3Type }; template< typename ImmType > -class Group3Impl : public ImplementationHelper +class Group3Impl { +protected: + static const uint OperandSize = sizeof(ImmType); + + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + public: Group3Impl() {} // For the love of GCC. static __emitinline void Emit( G3Type InstType, const iRegister& from ) { - ImplementationHelper::prefix16(); - iWrite(ImplementationHelper::Is8BitOperand() ? 0xf6 : 0xf7 ); + prefix16(); + iWrite(Is8BitOperand() ? 0xf6 : 0xf7 ); ModRM_Direct( InstType, from.Id ); } static __emitinline void Emit( G3Type InstType, const ModSibStrict& sibsrc ) { - ImplementationHelper::prefix16(); - iWrite( ImplementationHelper::Is8BitOperand() ? 0xf6 : 0xf7 ); + prefix16(); + iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); EmitSibMagic( InstType, sibsrc ); } }; diff --git a/pcsx2/x86/ix86/implement/incdec.h b/pcsx2/x86/ix86/implement/incdec.h index 1570aa3768..f7c1e6b3ad 100644 --- a/pcsx2/x86/ix86/implement/incdec.h +++ b/pcsx2/x86/ix86/implement/incdec.h @@ -22,8 +22,14 @@ // Note: This header is meant to be included from within the x86Emitter::Internal namespace. template< typename ImmType > -class IncDecImpl : public ImplementationHelper +class IncDecImpl { +protected: + static const uint OperandSize = sizeof(ImmType); + + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + public: IncDecImpl() {} // For the love of GCC. @@ -31,21 +37,21 @@ public: { // There is no valid 8-bit form of direct register inc/dec, so fall // back on Mod/RM format instead: - if (ImplementationHelper::Is8BitOperand() ) + if (Is8BitOperand() ) { write8( 0xfe ); ModRM_Direct( isDec ? 1 : 0, to.Id ); } else { - ImplementationHelper::prefix16(); + prefix16(); write8( (isDec ? 0x48 : 0x40) | to.Id ); } } static __emitinline void Emit( bool isDec, const ModSibStrict& dest ) { - write8( ImplementationHelper::Is8BitOperand() ? 0xfe : 0xff ); + write8( Is8BitOperand() ? 0xfe : 0xff ); EmitSibMagic( isDec ? 1: 0, dest ); } }; diff --git a/pcsx2/x86/ix86/implement/movs.h b/pcsx2/x86/ix86/implement/movs.h index 7ac0dee7bf..6b14e58702 100644 --- a/pcsx2/x86/ix86/implement/movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -25,112 +25,121 @@ // MOV instruction Implementation template< typename ImmType > -class MovImpl : ImplementationHelper< ImmType > +class MovImpl { public: + static const uint OperandSize = sizeof(ImmType); + +protected: + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + MovImpl() {} + // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( const iRegister& to, const iRegister& from ) { if( to == from ) return; // ignore redundant MOVs. - ImplementationHelper::prefix16(); - iWrite( ImplementationHelper::Is8BitOperand() ? 0x88 : 0x89 ); + prefix16(); + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); ModRM( 3, from.Id, to.Id ); } // ------------------------------------------------------------------------ - static __forceinline void Emit( const ModSibBase& dest, const iRegister& from ) + static __emitinline void Emit( const ModSibBase& dest, const iRegister& from ) { - ImplementationHelper::prefix16(); + prefix16(); // mov eax has a special from when writing directly to a DISP32 address // (sans any register index/base registers). if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() ) { - iWrite( ImplementationHelper::Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); iWrite( dest.Displacement ); } else { - iWrite( ImplementationHelper::Is8BitOperand() ? 0x88 : 0x89 ); + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); EmitSibMagic( from.Id, dest ); } } // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const ModSibBase& src ) + static __emitinline void Emit( const iRegister& to, const ModSibBase& src ) { - ImplementationHelper::prefix16(); + prefix16(); // mov eax has a special from when reading directly from a DISP32 address // (sans any register index/base registers). if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() ) { - iWrite( ImplementationHelper::Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); iWrite( src.Displacement ); } else { - iWrite( ImplementationHelper::Is8BitOperand() ? 0x8a : 0x8b ); + iWrite( Is8BitOperand() ? 0x8a : 0x8b ); EmitSibMagic( to.Id, src ); } } // ------------------------------------------------------------------------ - static __forceinline void Emit( void* dest, const iRegister& from ) + static __emitinline void Emit( void* dest, const iRegister& from ) { - ImplementationHelper::prefix16(); + prefix16(); // mov eax has a special from when writing directly to a DISP32 address if( from.IsAccumulator() ) { - iWrite( ImplementationHelper::Is8BitOperand() ? 0xa2 : 0xa3 ); + iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); iWrite( (s32)dest ); } else { - iWrite( ImplementationHelper::Is8BitOperand() ? 0x88 : 0x89 ); + iWrite( Is8BitOperand() ? 0x88 : 0x89 ); iWriteDisp( from.Id, dest ); } } // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, const void* src ) + static __emitinline void Emit( const iRegister& to, const void* src ) { - ImplementationHelper::prefix16(); + prefix16(); // mov eax has a special from when reading directly from a DISP32 address if( to.IsAccumulator() ) { - iWrite( ImplementationHelper::Is8BitOperand() ? 0xa0 : 0xa1 ); + iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); iWrite( (s32)src ); } else { - iWrite( ImplementationHelper::Is8BitOperand() ? 0x8a : 0x8b ); + iWrite( Is8BitOperand() ? 0x8a : 0x8b ); iWriteDisp( to.Id, src ); } } // ------------------------------------------------------------------------ - static __forceinline void Emit( const iRegister& to, ImmType imm ) + static __emitinline void Emit( const iRegister& to, ImmType imm ) { // Note: MOV does not have (reg16/32,imm8) forms. - ImplementationHelper::prefix16(); - iWrite( (ImplementationHelper::Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); + prefix16(); + iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); iWrite( imm ); } // ------------------------------------------------------------------------ - static __forceinline void Emit( ModSibStrict dest, ImmType imm ) + static __emitinline void Emit( ModSibStrict dest, ImmType imm ) { - ImplementationHelper::prefix16(); - iWrite( ImplementationHelper::Is8BitOperand() ? 0xc6 : 0xc7 ); + prefix16(); + iWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); EmitSibMagic( 0, dest ); iWrite( imm ); } @@ -180,9 +189,11 @@ public: // CMOV !! [in all of it's disappointing lack-of glory] // template< typename ImmType > -class CMovImpl : public ImplementationHelper< ImmType > +class CMovImpl { protected: + static const uint OperandSize = sizeof(ImmType); + static bool Is8BitOperand() {return OperandSize == 1; } static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } @@ -195,8 +206,8 @@ protected: } public: - static const uint OperandSize = sizeof(ImmType); - + CMovImpl() {} + static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const iRegister& from ) { if( to == from ) return; @@ -215,8 +226,6 @@ public: emit_base( cc ); EmitSibMagic( to.Id, sibsrc ); } - CMovImpl() {} - }; // ------------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 11020fa647..d9d9d0ecda 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -427,7 +427,7 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool { // ESP is not encodable as an index (ix86 ignores it), thus: iMOV( to, ToReg( src.Base.Id ) ); // will do the trick! - iADD( to, src.Displacement ); + if( src.Displacement ) iADD( to, src.Displacement ); return; } else if( src.Displacement == 0 ) @@ -483,10 +483,8 @@ __emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_fla template< typename ImmType > class iMulImpl { -public: - static const uint OperandSize = sizeof(ImmType); - protected: + static const uint OperandSize = sizeof(ImmType); static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } public: diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 4a8ebfe272..d85c14c99f 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -618,18 +618,6 @@ namespace x86Emitter extern void EmitSibMagic( uint regfield, const ModSibBase& info ); - // ------------------------------------------------------------------------ - template< typename ImmType > - class ImplementationHelper - { - public: - static const uint OperandSize = sizeof(ImmType); - - protected: - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } - }; - // ------------------------------------------------------------------------ #include "implement/group1.h" #include "implement/group2.h" From 984ca67042c7b4a4de500039554b6e959f0b430a Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Fri, 17 Apr 2009 12:39:22 +0000 Subject: [PATCH 084/143] -Scaled back the EE load/store cycle count to the theoretical minimum of 1 cycle. (Fixes Digital Devil Saga PAL fmv) -Added a safety to the VU cycle stealing hack, so it doesn't go berserk :p Note: Due to the changed cycle count a lot of games will get "slower". Especially FMV will be affected. This is unfortunate, but correct emulation comes first. You can however enable the ee sync speedhacks, as they're more stable now as well. This can bring back the lost speed. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@999 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/R5900OpcodeTables.cpp | 19 ++++++++----------- pcsx2/x86/iVUzerorec.cpp | 6 +++++- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pcsx2/R5900OpcodeTables.cpp b/pcsx2/R5900OpcodeTables.cpp index 4c52c052d6..431a6460cf 100644 --- a/pcsx2/R5900OpcodeTables.cpp +++ b/pcsx2/R5900OpcodeTables.cpp @@ -108,11 +108,8 @@ namespace R5900 static const int FPU_Mult = 12; - static const int Store = 28; - static const int Load = 22; - - static const int StoreFast = 14; - static const int LoadFast = 12; + static const int Store = 8; + static const int Load = 8; } using namespace Cycles; @@ -263,28 +260,28 @@ namespace R5900 MakeOpcode( LB, Load ); MakeOpcode( LH, Load ); MakeOpcode( LWL, Load ); - MakeOpcode( LW, LoadFast ); + MakeOpcode( LW, Load ); MakeOpcode( LBU, Load ); MakeOpcode( LHU, Load ); MakeOpcode( LWR, Load ); MakeOpcode( LWU, Load ); MakeOpcode( LWC1, Load ); MakeOpcode( LQC2, Load ); - MakeOpcode( LD, LoadFast ); + MakeOpcode( LD, Load ); // Stores! MakeOpcode( SQ, Store ); - MakeOpcode( SB, Store );//slow - MakeOpcode( SH, Store );//slow + MakeOpcode( SB, Store ); + MakeOpcode( SH, Store ); MakeOpcode( SWL, Store ); - MakeOpcode( SW, StoreFast ); + MakeOpcode( SW, Store ); MakeOpcode( SDL, Store ); MakeOpcode( SDR, Store ); MakeOpcode( SWR, Store ); MakeOpcode( SWC1, Store ); MakeOpcode( SQC2, Store ); - MakeOpcode( SD, StoreFast ); + MakeOpcode( SD, Store ); // Multimedia Instructions! diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index b17b0da717..cfea2f0556 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -2294,7 +2294,11 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex) VU = vuindex ? &VU1 : &VU0; VU->cycle += s_TotalVUCycles; - cpuRegs.cycle += s_TotalVUCycles * Config.VUCycleHack; + + //VU cycle stealing hack, 3000 cycle maximum so it doesn't get out of hand + if (s_TotalVUCycles < 3000) cpuRegs.cycle += s_TotalVUCycles * Config.VUCycleHack; + else cpuRegs.cycle += 3000 * Config.VUCycleHack; + if( (int)s_writeQ > 0 ) VU->VI[REG_Q] = VU->q; if( (int)s_writeP > 0 ) { assert(VU == &VU1); From 62604d359a4cb068a3a1f81a83f2a464310261f1 Mon Sep 17 00:00:00 2001 From: refraction Date: Fri, 17 Apr 2009 18:02:49 +0000 Subject: [PATCH 085/143] Commented out my bodge to XGKick on the VU interpreter, Gabest has now solved this issue in his latest gsdx build. Happy 1000th commit :) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1000 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VUops.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index bd8b81b0b7..c0341158b7 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2046,19 +2046,19 @@ void _vuXITOP(VURegs * VU) { void _vuXGKICK(VURegs * VU) { u32* ptr = (u32*)GET_VU_MEM(VU, (VU->VI[_Fs_].US[0]*16) & (VU == &VU1 ? 0x3fff : 0xfff)); - int temp = 0x4000 - ((VU->VI[_Fs_].US[0]*16) & 0x3fff); - u32 tempmem[0x8000]; +// int temp = 0x4000 - ((VU->VI[_Fs_].US[0]*16) & 0x3fff); +// u32 tempmem[0x8000]; // flush all pipelines first (in the right order) _vuFlushAll(VU); //Gonna be slow but reshuffles the memory so overflows wont occur - memset(tempmem, 0, sizeof(tempmem)); + /* memset(tempmem, 0, sizeof(tempmem)); memcpy(tempmem, ptr, temp); ptr = (u32*)GET_VU_MEM(VU, 0); memcpy(&tempmem[temp], ptr, ((VU->VI[_Fs_].US[0]*16) & 0x3fff)); GSGIFTRANSFER1((u32*)&tempmem[0], 0); - //} else GSGIFTRANSFER1((u32*)VU->Mem, (VU->VI[_Fs_].US[0]*16) & 0x3fff); + } else*/ GSGIFTRANSFER1((u32*)VU->Mem, (VU->VI[_Fs_].US[0]*16) & 0x3fff); } void _vuXTOP(VURegs * VU) { From 029a94ce3fd3791c53204913ee016373dc133ced Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Fri, 17 Apr 2009 18:47:04 +0000 Subject: [PATCH 086/143] Emitter: Added BTS/SETcc/TEST instructions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1001 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 8 + pcsx2/x86/ix86/implement/bittest.h | 117 ++++++++++ pcsx2/x86/ix86/implement/movs.h | 77 +++++-- pcsx2/x86/ix86/implement/test.h | 83 +++++++ pcsx2/x86/ix86/ix86.cpp | 44 +++- pcsx2/x86/ix86/ix86_instructions.h | 1 - pcsx2/x86/ix86/ix86_legacy.cpp | 246 +++++---------------- pcsx2/x86/ix86/ix86_types.h | 38 ++++ 8 files changed, 411 insertions(+), 203 deletions(-) create mode 100644 pcsx2/x86/ix86/implement/bittest.h create mode 100644 pcsx2/x86/ix86/implement/test.h diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 51852e0cdc..6df72ec1ea 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2980,6 +2980,10 @@ + + @@ -3004,6 +3008,10 @@ RelativePath="..\..\x86\ix86\implement\movs.h" > + + +class Group8Impl +{ +protected: + static const uint OperandSize = sizeof(ImmType); + + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + Group8Impl() {} // For the love of GCC. + + static __emitinline void Emit( const iRegister& bitbase, const iRegister& bitoffset ) + { + prefix16(); + iWrite( 0x0f ); + iWrite( 0xa3 | (InstType << 2) ); + ModRM_Direct( bitoffset.Id, bitbase.Id ); + } + + static __emitinline void Emit( void* bitbase, const iRegister& bitoffset ) + { + prefix16(); + iWrite( 0x0f ); + iWrite( 0xa3 | (InstType << 2) ); + iWriteDisp( bitoffset.Id, bitbase.Id ); + } + + static __emitinline void Emit( const ModSibBase& bitbase, const iRegister& bitoffset ) + { + prefix16(); + iWrite( 0x0f ); + iWrite( 0xa3 | (InstType << 2) ); + EmitSibMagic( bitoffset.Id, bitbase ); + } + + static __emitinline void Emit( const iRegister& bitbase, u8 immoffset ) + { + prefix16(); + iWrite( 0xba0f ); + ModRM_Direct( InstType, bitbase.Id ); + iWrite( immoffset ); + } + + static __emitinline void Emit( const ModSibStrict& bitbase, u8 immoffset ) + { + prefix16(); + iWrite( 0xba0f ); + EmitSibMagic( InstType, bitbase ); + iWrite( immoffset ); + } +}; + +// ------------------------------------------------------------------- +// +template< G8Type InstType > +class Group8ImplAll +{ +protected: + typedef Group8Impl m_32; + typedef Group8Impl m_16; + +public: + __forceinline void operator()( const iRegister32& bitbase, const iRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } + __forceinline void operator()( const iRegister16& bitbase, const iRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } + __forceinline void operator()( void* bitbase, const iRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } + __forceinline void operator()( void* bitbase, const iRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } + __noinline void operator()( const ModSibBase& bitbase, const iRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } + __noinline void operator()( const ModSibBase& bitbase, const iRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } + + // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler + // perspective. (using uint tends to make the compiler try and fail to match signed immediates with + // one of the other overloads). + + __noinline void operator()( const ModSibStrict& bitbase, u8 immoffset ) const { m_32::Emit( bitbase, immoffset ); } + __noinline void operator()( const ModSibStrict& bitbase, u8 immoffset ) const { m_16::Emit( bitbase, immoffset ); } + void operator()( const iRegister& bitbase, u8 immoffset ) const { m_32::Emit( bitbase, immoffset ); } + void operator()( const iRegister& bitbase, u8 immoffset ) const { m_16::Emit( bitbase, immoffset ); } + + Group8ImplAll() {} +}; diff --git a/pcsx2/x86/ix86/implement/movs.h b/pcsx2/x86/ix86/implement/movs.h index 6b14e58702..80ff2a4cdd 100644 --- a/pcsx2/x86/ix86/implement/movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -27,10 +27,8 @@ template< typename ImmType > class MovImpl { -public: - static const uint OperandSize = sizeof(ImmType); - protected: + static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } @@ -44,7 +42,7 @@ public: prefix16(); iWrite( Is8BitOperand() ? 0x88 : 0x89 ); - ModRM( 3, from.Id, to.Id ); + ModRM_Direct( from.Id, to.Id ); } // ------------------------------------------------------------------------ @@ -187,14 +185,16 @@ public: ////////////////////////////////////////////////////////////////////////////////////////// // CMOV !! [in all of it's disappointing lack-of glory] +// Caution! This instruction can look exciting and cool, until you realize that it cannot +// load immediate values into registers. -_- // -template< typename ImmType > -class CMovImpl +template< typename ImmType, int InstBaseVal > +class CMovSetImpl { protected: static const uint OperandSize = sizeof(ImmType); - static bool Is8BitOperand() {return OperandSize == 1; } + static bool Is8BitOperand() { return OperandSize == 1; } static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } static __forceinline void emit_base( JccComparisonType cc ) @@ -202,11 +202,11 @@ protected: jASSUME( cc >= 0 && cc <= 0x0f ); prefix16(); write8( 0x0f ); - write8( 0x40 | cc ); + write8( InstBaseVal | cc ); } public: - CMovImpl() {} + CMovSetImpl() {} static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const iRegister& from ) { @@ -226,6 +226,27 @@ public: emit_base( cc ); EmitSibMagic( to.Id, sibsrc ); } + + // This form is provided for SETcc only (not available in CMOV) + static __emitinline void EmitSet( JccComparisonType cc, const iRegister& to ) + { + emit_base( cc ); + ModRM_Direct( 0, to.Id ); + } + + // This form is provided for SETcc only (not available in CMOV) + static __emitinline void EmitSet( JccComparisonType cc, const void* src ) + { + emit_base( cc ); + iWriteDisp( 0, src ); + } + + // This form is provided for SETcc only (not available in CMOV) + static __emitinline void EmitSet( JccComparisonType cc, const ModSibStrict& sibsrc ) + { + emit_base( cc ); + EmitSibMagic( 0, sibsrc ); + } }; // ------------------------------------------------------------------------ @@ -235,8 +256,8 @@ public: class CMovImplGeneric { protected: - typedef CMovImpl m_32; - typedef CMovImpl m_16; + typedef CMovSetImpl m_32; // 0x40 is the cmov base instruction id + typedef CMovSetImpl m_16; // 0x40 is the cmov base instruction id public: __forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); } @@ -255,8 +276,8 @@ template< JccComparisonType ccType > class CMovImplAll { protected: - typedef CMovImpl m_32; - typedef CMovImpl m_16; + typedef CMovSetImpl m_32; + typedef CMovSetImpl m_16; public: __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); } @@ -270,6 +291,36 @@ public: CMovImplAll() {} // don't ask. }; +// ------------------------------------------------------------------------ +class SetImplGeneric +{ +protected: + typedef CMovSetImpl Impl; // 0x90 is the SETcc base instruction id + +public: + __forceinline void operator()( JccComparisonType cc, const iRegister8& to ) const { Impl::EmitSet( cc, to ); } + __forceinline void operator()( JccComparisonType cc, void* dest ) const { Impl::EmitSet( cc, dest ); } + __noinline void operator()( JccComparisonType cc, const ModSibStrict& dest ) const { Impl::EmitSet( cc, dest ); } + + SetImplGeneric() {} // if you do, ask GCC. +}; + +// ------------------------------------------------------------------------ +template< JccComparisonType ccType > +class SetImplAll +{ +protected: + typedef CMovSetImpl Impl; // 0x90 is the SETcc base instruction id + +public: + __forceinline void operator()( const iRegister8& to ) const { Impl::EmitSet( ccType, to ); } + __forceinline void operator()( void* dest ) const { Impl::EmitSet( ccType, dest ); } + __noinline void operator()( const ModSibStrict& dest ) const { Impl::EmitSet( ccType, dest ); } + + SetImplAll() {} // if you do, ask GCC. +}; + + ////////////////////////////////////////////////////////////////////////////////////////// // Mov with sign/zero extension implementations (movsx / movzx) // diff --git a/pcsx2/x86/ix86/implement/test.h b/pcsx2/x86/ix86/implement/test.h new file mode 100644 index 0000000000..c7b2fa58b2 --- /dev/null +++ b/pcsx2/x86/ix86/implement/test.h @@ -0,0 +1,83 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// MOV instruction Implementation + +template< typename ImmType > +class TestImpl +{ +protected: + static const uint OperandSize = sizeof(ImmType); + static bool Is8BitOperand() { return OperandSize == 1; } + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + TestImpl() {} + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const iRegister& from ) + { + prefix16(); + iWrite( Is8BitOperand() ? 0x84 : 0x85 ); + ModRM_Direct( from.Id, to.Id ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, ImmType imm ) + { + prefix16(); + + if( to.IsAccumulator() ) + iWrite( Is8BitOperand() ? 0xa8 : 0xa9 ); + else + { + iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + ModRM_Direct( 0, to.Id ); + } + iWrite( imm ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( ModSibStrict dest, ImmType imm ) + { + prefix16(); + iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + EmitSibMagic( 0, dest ); + iWrite( imm ); + } +}; + +// ------------------------------------------------------------------- +// +class TestImplAll +{ +public: + template< typename T > + __forceinline void operator()( const iRegister& to, const iRegister& from ) const { TestImpl::Emit( to, from ); } + + template< typename T > + __noinline void operator()( const ModSibStrict& sibdest, T imm ) const { TestImpl::Emit( sibdest, imm ); } + template< typename T > + void operator()( const iRegister& to, T imm ) const { TestImpl::Emit( to, imm ); } + + TestImplAll() {} // Why does GCC need these? +}; + diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index d9d9d0ecda..004ac9f4f9 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -241,6 +241,7 @@ namespace Internal using namespace Internal; const MovImplAll iMOV; +const TestImplAll iTEST; const Group1ImplAll iADD; const Group1ImplAll iOR; @@ -271,9 +272,15 @@ const IncDecImplAll iDEC; const MovExtendImplAll iMOVZX; const MovExtendImplAll iMOVSX; -const Internal::DwordShiftImplAll iSHLD; -const Internal::DwordShiftImplAll iSHRD; +const DwordShiftImplAll iSHLD; +const DwordShiftImplAll iSHRD; +const Group8ImplAll iBT; +const Group8ImplAll iBTR; +const Group8ImplAll iBTS; +const Group8ImplAll iBTC; + +// ------------------------------------------------------------------------ const CMovImplGeneric iCMOV; const CMovImplAll iCMOVA; @@ -301,6 +308,35 @@ const CMovImplAll iCMOVNS; const CMovImplAll iCMOVPE; const CMovImplAll iCMOVPO; +// ------------------------------------------------------------------------ +const SetImplGeneric iSET; + +const SetImplAll iSETA; +const SetImplAll iSETAE; +const SetImplAll iSETB; +const SetImplAll iSETBE; + +const SetImplAll iSETG; +const SetImplAll iSETGE; +const SetImplAll iSETL; +const SetImplAll iSETLE; + +const SetImplAll iSETZ; +const SetImplAll iSETE; +const SetImplAll iSETNZ; +const SetImplAll iSETNE; + +const SetImplAll iSETO; +const SetImplAll iSETNO; +const SetImplAll iSETC; +const SetImplAll iSETNC; + +const SetImplAll iSETS; +const SetImplAll iSETNS; +const SetImplAll iSETPE; +const SetImplAll iSETPO; + + // ------------------------------------------------------------------------ // Assigns the current emitter buffer target address. // This is provided instead of using x86Ptr directly, since we may in the future find @@ -582,13 +618,13 @@ __noinline void iSMUL( const iRegister16& to, const ModSibBase& from, s16 imm ) __emitinline void iPOP( const ModSibBase& from ) { iWrite( 0x8f ); - Internal::EmitSibMagic( 0, from ); + EmitSibMagic( 0, from ); } __emitinline void iPUSH( const ModSibBase& from ) { iWrite( 0xff ); - Internal::EmitSibMagic( 6, from ); + EmitSibMagic( 6, from ); } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index d625b6a7c5..9a59620ce6 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -119,7 +119,6 @@ namespace x86Emitter typedef iForwardJump iForwardJump8; typedef iForwardJump iForwardJump32; - DEFINE_FORWARD_JUMP( JA, Jcc_Above ); DEFINE_FORWARD_JUMP( JB, Jcc_Below ); diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 79f442a190..0ecac70f31 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -145,6 +145,20 @@ DEFINE_LEGACY_MOVEXTEND( ZX, 32, 8 ) DEFINE_LEGACY_MOVEXTEND( SX, 16, 8 ) DEFINE_LEGACY_MOVEXTEND( ZX, 16, 8 ) +emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { iTEST( iRegister32(to), from ); } +emitterT void TEST32ItoM( uptr to, u32 from ) { iTEST( ptr32[to], from ); } +emitterT void TEST32RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister32(to), iRegister32(from) ); } +emitterT void TEST32ItoRm( x86IntRegType to, u32 from ) { iTEST( ptr32[x86IndexReg(to)], from ); } + +emitterT void TEST16ItoR( x86IntRegType to, u16 from ) { iTEST( iRegister16(to), from ); } +emitterT void TEST16ItoM( uptr to, u16 from ) { iTEST( ptr16[to], from ); } +emitterT void TEST16RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister16(to), iRegister16(from) ); } +emitterT void TEST16ItoRm( x86IntRegType to, u16 from ) { iTEST( ptr16[x86IndexReg(to)], from ); } + +emitterT void TEST8ItoR( x86IntRegType to, u8 from ) { iTEST( iRegister8(to), from ); } +emitterT void TEST8ItoM( uptr to, u8 from ) { iTEST( ptr8[to], from ); } +emitterT void TEST8RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister8(to), iRegister8(from) ); } +emitterT void TEST8ItoRm( x86IntRegType to, u8 from ) { iTEST( ptr8[x86IndexReg(to)], from ); } // mov r32 to [r32<(to), from ); @@ -251,6 +263,53 @@ emitterT void LEA16RStoR(x86IntRegType to, x86IntRegType from, u32 scale) iLEA( iRegister16( to ), ptr[x86IndexReg(from)*(1< iROL; extern const Internal::Group2ImplAll iROR; @@ -672,6 +675,12 @@ namespace x86Emitter extern const Internal::DwordShiftImplAll iSHLD; extern const Internal::DwordShiftImplAll iSHRD; + extern const Internal::Group8ImplAll iBT; + extern const Internal::Group8ImplAll iBTR; + extern const Internal::Group8ImplAll iBTS; + extern const Internal::Group8ImplAll iBTC; + + // ------------------------------------------------------------------------ extern const Internal::CMovImplGeneric iCMOV; extern const Internal::CMovImplAll iCMOVA; @@ -698,6 +707,35 @@ namespace x86Emitter extern const Internal::CMovImplAll iCMOVNS; extern const Internal::CMovImplAll iCMOVPE; extern const Internal::CMovImplAll iCMOVPO; + + // ------------------------------------------------------------------------ + extern const Internal::SetImplGeneric iSET; + + extern const Internal::SetImplAll iSETA; + extern const Internal::SetImplAll iSETAE; + extern const Internal::SetImplAll iSETB; + extern const Internal::SetImplAll iSETBE; + + extern const Internal::SetImplAll iSETG; + extern const Internal::SetImplAll iSETGE; + extern const Internal::SetImplAll iSETL; + extern const Internal::SetImplAll iSETLE; + + extern const Internal::SetImplAll iSETZ; + extern const Internal::SetImplAll iSETE; + extern const Internal::SetImplAll iSETNZ; + extern const Internal::SetImplAll iSETNE; + + extern const Internal::SetImplAll iSETO; + extern const Internal::SetImplAll iSETNO; + extern const Internal::SetImplAll iSETC; + extern const Internal::SetImplAll iSETNC; + + extern const Internal::SetImplAll iSETS; + extern const Internal::SetImplAll iSETNS; + extern const Internal::SetImplAll iSETPE; + extern const Internal::SetImplAll iSETPO; + } From 14f306a80086b867914064a6e69a8a5731b934f5 Mon Sep 17 00:00:00 2001 From: Nneeve Date: Fri, 17 Apr 2009 19:35:22 +0000 Subject: [PATCH 087/143] Fixes a mistake that I made in r927. Fixes regression issue 158 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1002 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUzerorec.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index cfea2f0556..ad0b628013 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -2044,7 +2044,8 @@ void VuBaseBlock::AssignVFRegs() else if( itinst->vfacc[i] >= 0 ) lastwrite = itinst->vfacc[i]; // always alloc at least 1 temp reg - int free0 = (i||regs->VFwrite||regs->VFread0||regs->VFread1||(regs->VIwrite&(1<VFwrite||regs->VFread0||regs->VFread1||(regs->VIwrite&(1<VIread&(1<vfwrite[1] >= 0 && (itinst->vfread0[0]==itinst->vfwrite[1]||itinst->vfread1[0]==itinst->vfwrite[1]) ) { @@ -2060,9 +2061,9 @@ void VuBaseBlock::AssignVFRegs() _freeXMMreg(free1); _freeXMMreg(free2); } - else if( regs->VIwrite & (1<VIwrite & (1<VIread & (1<VIwrite & (1< Date: Sat, 18 Apr 2009 04:28:24 +0000 Subject: [PATCH 088/143] microVU: mostly block-linking/execution stuff... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1003 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 10 +++--- pcsx2/x86/microVU.h | 66 +++++++++++------------------------ pcsx2/x86/microVU_Alloc.h | 10 +++++- pcsx2/x86/microVU_Analyze.inl | 16 ++++----- pcsx2/x86/microVU_Compile.inl | 63 +++++++++++++++++++++------------ pcsx2/x86/microVU_Execute.inl | 37 +++++++------------- pcsx2/x86/microVU_Misc.h | 12 ++++--- pcsx2/x86/microVU_Tables.inl | 2 -- 8 files changed, 105 insertions(+), 111 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index d54d8367ab..af8af9002b 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -63,9 +63,9 @@ microVUt(void) mVUreset() { // Dynarec Cache mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - mVU->ptr = mVU->cache; - + // Setup Entrance/Exit Points + x86SetPtr(mVU->cache); mVUdispatcherA(); mVUdispatcherB(); @@ -105,6 +105,7 @@ microVUt(void) mVUclose() { microVUt(void) mVUclear(u32 addr, u32 size) { microVU* mVU = mVUx; + memset(&mVU->prog.lpState, 0, sizeof(mVU->prog.lpState)); mVU->prog.cleared = 1; // Next execution searches/creates a new microprogram // Note: It might be better to copy old recompiled blocks to the new microprogram rec data // however, if games primarily do big writes, its probably not worth it. @@ -157,6 +158,7 @@ __forceinline int mVUsearchProg(microVU* mVU) { if (mVU->prog.cleared) { // If cleared, we need to search for new program for (int i = 0; i <= mVU->prog.total; i++) { //if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/) + //if (mVU->prog.prog[i]) // ToDo: Implement Cycles if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } mVU->prog.cur = i; @@ -172,7 +174,7 @@ __forceinline int mVUsearchProg(microVU* mVU) { mVU->prog.prog[mVU->prog.cur].used++; return 1; // If !cleared, then we're still on the same program as last-time ;) } - +/* // Block Invalidation __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) { @@ -192,7 +194,7 @@ __forceinline void mVUinvalidateBlock(microVU* mVU, u32 addr, u32 size) { mVU->prog.prog[mVU->prog.cur].block[i]->clearFast(); } } - +*/ //------------------------------------------------------------------ // Wrapper Functions - Called by other parts of the Emu //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 90096299de..ad54f1eeb1 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -25,67 +25,40 @@ #include "microVU_Alloc.h" #include "microVU_Misc.h" -struct microBlock { - microRegInfo pState; // Detailed State of Pipeline - u32 pipelineState; // | FDiv x 4 | EFU x 6 | Needs pState Info? x 1 | // Simple State of Pipeline - u8* x86ptrStart; // Start of code - u8* x86ptrEnd; // End of code (first byte outside of block) - u8* x86ptrBranch; // - u32 size; // Number of 64bit VU Instructions in Block -}; - #define mMaxBlocks 32 // Max Blocks With Different Pipeline States (For n = 1, 2, 4, 8, 16, etc...) class microBlockManager { private: static const int MaxBlocks = mMaxBlocks - 1; - u32 startPC; - u32 endPC; int listSize; // Total Items - 1 - int callerSize; // Total Callers - 1 microBlock blockList[mMaxBlocks]; - microBlock callersList[mMaxBlocks]; // Foreign Blocks that call Local Blocks public: - microBlockManager() { init(); } - ~microBlockManager() { close(); } - void init() { - listSize = -1; - callerSize = -1; - //ZeroMemory(&blockList, sizeof(blockList)); // Can be Omitted? - //ZeroMemory(&blockList, sizeof(callersList)); // Can be Omitted? - } - void reset() { init(); }; - void close() {}; // Can be Omitted? - /*void add(u32 pipelineState, u8* x86ptrStart) { - if (!search(pipelineState)) { + microBlockManager() { reset(); } + ~microBlockManager() {} + void reset() { listSize = -1; }; + microBlock* add(microBlock* pBlock) { + microBlock* thisBlock = search(&pBlock->pState); + if (!thisBlock) { listSize++; listSize &= MaxBlocks; - blockList[listSize].pipelineState = pipelineState; - blockList[listSize].x86ptrStart = x86ptrStart; + memcpy_fast(&blockList[listSize], pBlock, sizeof(microBlock)); + thisBlock = &blockList[listSize]; } - }*/ - microBlock* search(/*u32 pipelineState,*/ microRegInfo* pState) { - /*if (pipelineState & 1) { // Needs Detailed Search (Exact Match of Pipeline State) - for (int i = 0; i < listSize; i++) { + return thisBlock; + } + microBlock* search(microRegInfo* pState) { + if (listSize < 0) return NULL; + if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) + for (int i = 0; i <= listSize; i++) { if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo))) return &blockList[i]; } } else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) - for (int i = 0; i < listSize; i++) { - if (blockList[i].pipelineState == pipelineState) return &blockList[i]; + for (int i = 0; i <= listSize; i++) { + if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p)) { return &blockList[i]; } } - }*/ - return NULL; - } - void clearFast() { - listSize = -1; - for ( ; callerSize >= 0; callerSize--) { - //callerList[callerSize]. // ToDo: Implement Branch Link Removal Code } - } - int clear() { - if (listSize >= 0) { clearFast(); return 1; } - else return 0; + return NULL; } }; @@ -109,7 +82,7 @@ struct microProgManager { int total; // Total Number of valid MicroPrograms minus 1 int cleared; // Micro Program is Indeterminate so must be searched for (and if no matches are found then recompile a new one) int finished; // Completed MicroProgram by E-bit Termination - u32 lastPipelineState; // Pipeline state from where it left off (useful for continuing execution) + microRegInfo lpState; // Pipeline state from where program left off (useful for continuing execution) }; struct microVU { @@ -124,7 +97,6 @@ struct microVU { u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to) u8* startFunct; // Ptr Function to the Start code for recompiled programs u8* exitFunct; // Ptr Function to the Exit code for recompiled programs - u8* ptr; // Pointer to next place to write recompiled code to u32 code; // Contains the current Instruction u32 iReg; // iReg (only used in recompilation, not execution) u32 clipFlag[4]; // 4 instances of clip flag (used in execution) @@ -167,6 +139,8 @@ typedef void (*mVUrecCall)(u32, u32) __attribute__((__fastcall)); // Not sure if #include "microVU_Misc.inl" #include "microVU_Analyze.inl" #include "microVU_Alloc.inl" +#include "microVU_Upper.inl" +#include "microVU_Lower.inl" #include "microVU_Tables.inl" #include "microVU_Compile.inl" #include "microVU_Execute.inl" diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 8a827eea3f..49b0fda39e 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -18,6 +18,7 @@ #pragma once + union regInfo { u32 reg; struct { @@ -49,9 +50,16 @@ struct microTempRegInfo { u8 xgkick; // Holds the cycle info for XGkick }; +struct microBlock { + microRegInfo pState; // Detailed State of Pipeline + u8* x86ptrStart; // Start of code + //u8* x86ptrEnd; // End of code (first byte outside of block) + //u32 size; // Number of 64bit VU Instructions in Block +}; + template struct microAllocInfo { - microRegInfo regs; // Pipeline info + microBlock block; // Block/Pipeline info microTempRegInfo regsTemp; // Temp Pipeline info (used so that new pipeline info isn't conflicting between upper and lower instructions in the same cycle) u8 branch; // 0 = No Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR u32 cycles; // Cycles for current block diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 94f75d0f2f..9381c317bf 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -247,7 +247,8 @@ microVUt(void) mVUanalyzeSflag(int It) { if (!It) { mVUinfo |= _isNOP; } else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from mVUinfo |= _swapOps; - if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); } + if (mVUcount < 4) { mVUregs.needExactMatch = 1; } + if (mVUcount >= 1) { incPC2(-2); mVUinfo |= _isSflag; incPC2(2); } // Note: _isSflag is used for status flag optimizations. // Do to stalls, it can only be set one instruction prior to the status flag read instruction // if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior. @@ -274,14 +275,13 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) { if (!It) { mVUinfo |= _isNOP; } else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) mVUinfo |= _swapOps; - if (mVUcount > 1) { - int curPC = iPC; - for (int i = mVUcount, j = 0; i > 1; i--, j++) { - incPC(-2); - if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } } - } - iPC = curPC; + if (mVUcount < 4) { mVUregs.needExactMatch = 1; } + int curPC = iPC; + for (int i = mVUcount, j = 0; i > 1; i--, j++) { + incPC(-2); + if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } } } + iPC = curPC; } analyzeVIreg1(Is); analyzeVIreg2(It, 1); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 775a2aec5d..1e21e6ab3f 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -143,8 +143,9 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { #define getFlagReg1(x) ((x == 3) ? gprF3 : ((x == 2) ? gprF2 : ((x == 1) ? gprF1 : gprF0))) #define getFlagReg2(x) ((x == bStatus[3]) ? gprESP : ((x == bStatus[2]) ? gprR : ((x == bStatus[1]) ? gprT2 : gprT1))) -// Recompiles Code for Proper Flags on Block Linkings -microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) { +// Recompiles Code for Proper Flags and Q/P regs on Block Linkings +microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) { + microVU* mVU = mVUx; PUSH32R(gprR); // Backup gprR PUSH32R(gprESP); // Backup gprESP @@ -176,6 +177,9 @@ microVUt(void) mVUsetFlagsRec(int* bStatus, int* bMac) { POP32R(gprESP); // Restore gprESP POP32R(gprR); // Restore gprR + + // Shuffle P/Q regs since every block starts at instance #0 + if (mVU->p || mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, shufflePQ); } } microVUt(void) mVUincCycles(int x) { @@ -237,14 +241,14 @@ microVUt(void) mVUdivSet() { microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { microVU* mVU = mVUx; - u8* thisPtr = mVUcurProg.x86ptr; - iPC = startPC / 4; + u8* thisPtr = x86Ptr; // Searches for Existing Compiled Block (if found, then returns; else, compile) - microBlock* pblock = mVUblock[iPC/2]->search((microRegInfo*)pState); - if (pblock) { return pblock->x86ptrStart; } + microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState); + if (pBlock) { return pBlock->x86ptrStart; } // First Pass + iPC = startPC / 4; setCode(); mVUbranch = 0; mVUstartPC = iPC; @@ -252,6 +256,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage mVU->p = 0; // All blocks start at p index #0 mVU->q = 0; // All blocks start at q index #0 + memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info for (int branch = 0;; ) { startLoop(); mVUopU(); @@ -286,7 +291,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (!isBdelay) { incPC(1); } else { - u32* ajmp; + u32* ajmp = 0; switch (mVUbranch) { case 3: branchCase(JZ32); // IBEQ case 4: branchCase(JGE32); // IBGEZ @@ -295,29 +300,43 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { case 7: branchCase(JL32); // IBLTZ case 8: branchCase(JNZ32); // IBNEQ case 1: case 2: // B/BAL - // ToDo: search for block - // (remember about global variables and recursion!) - mVUsetFlagsRec(bStatus, bMac); - ajmp = JMP32((uptr)0); - break; + incPC(-2); // Go back to branch opcode (to get branch imm addr) + mVUsetupBranch(bStatus, bMac); + + // Check if branch-block has already been compiled + pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); + if (pBlock) { + ajmp = JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); + mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager + } + else { + pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add block + if (!vuIndex) mVUcompileVU0(branchAddr, (uptr)&pBlock->pState); + else mVUcompileVU1(branchAddr, (uptr)&pBlock->pState); + } + //incPC(+2); + return thisPtr; case 9: case 10: // JR/JALR - mVUsetFlagsRec(bStatus, bMac); + mVUsetupBranch(bStatus, bMac); PUSH32R(gprR); // Backup EDX - MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) - AND32ItoR(gprT2, (vuIndex) ? 0x3ff8 : 0xff8); - MOV32ItoR(gprR, (u32)&pblock->pState); // Get pState (EDX second argument for __fastcall) + //MOV32MtoR(gprT1, (uptr)&mVUcurProg.x86ptr); // Get last x86ptr for this program + //MOV32RtoM((uptr)&x86Ptr, gprT1); // Setup x86Ptr to write to correct address + MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) + AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address + pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager + MOV32ItoR(gprR, (u32)&pBlock->pState); // Get pState (EDX second argument for __fastcall) - //ToDo: Add block to block manager and use its address instead of pblock! - - if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) + if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) else CALLFunc((uptr)mVUcompileVU1); - POP32R(gprR); // Restore + POP32R(gprR); // Restore EDX JMPR(gprT1); // Jump to rec-code address - break; + return thisPtr; } - //mVUcurProg.x86Ptr + + + return thisPtr; } } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index c467464241..205fdf0047 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -26,8 +26,7 @@ microVUt(void) mVUdispatcherA() { static u32 PCSX2_ALIGNED16(vuMXCSR); microVU* mVU = mVUx; - x86SetPtr(mVU->ptr); - mVU->startFunct = mVU->ptr; + mVU->startFunct = x86Ptr; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUexecuteVU0); } @@ -67,15 +66,13 @@ microVUt(void) mVUdispatcherA() { // Jump to Recompiled Code Block JMPR(EAX); - mVU->ptr = x86Ptr; } // Generates the code to exit from recompiled blocks microVUt(void) mVUdispatcherB() { static u32 PCSX2_ALIGNED16(eeMXCSR); microVU* mVU = mVUx; - x86SetPtr(mVU->ptr); - mVU->exitFunct = mVU->ptr; + mVU->exitFunct = x86Ptr; // __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left. if (!vuIndex) { CALLFunc((uptr)mVUcleanUpVU0); } @@ -111,8 +108,7 @@ microVUt(void) mVUdispatcherB() { EMMS(); RET(); - mVU->ptr = x86Ptr; - mVUcachCheck(mVU->cache, 512); + mVUcacheCheck(x86Ptr, mVU->cache, 512); } //------------------------------------------------------------------ @@ -121,23 +117,16 @@ microVUt(void) mVUdispatcherB() { // Executes for number of cycles microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { -/* - Pseudocode: (ToDo: implement # of cycles) - 1) Search for existing program - 2) If program not found, goto 5 - 3) Search for recompiled block - 4) If recompiled block found, goto 6 - 5) Recompile as much blocks as possible - 6) Return start execution address of block -*/ + microVU* mVU = mVUx; mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); - if ( mVUsearchProg(mVU) ) { // Found Program - //microBlock* block = mVU->prog.prog[mVU->prog.cur].block[startPC]->search(mVU->prog.lastPipelineState); - //if (block) return block->x86ptrStart; // Found Block - } - // Recompile code - return NULL; + + // ToDo: Implement Cycles + mVUsearchProg(mVU); // Find and set correct program + + x86SetPtr(mVUcurProg.x86ptr); // Set x86ptr to where program left off + if (!vuIndex) return mVUcompileVU0(startPC, (uptr)&mVU->prog.lpState); + else return mVUcompileVU1(startPC, (uptr)&mVU->prog.lpState); } //------------------------------------------------------------------ @@ -146,8 +135,8 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVUt(void) mVUcleanUp() { microVU* mVU = mVUx; - mVU->ptr = mVUcurProg.x86ptr; - mVUcachCheck(mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); + mVUcurProg.x86ptr = x86Ptr; + mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 31e6cefab4..7937d2990b 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -139,12 +139,13 @@ declareAllVariables #define microVUq(aType) template __forceinline aType #define mVUcurProg mVU->prog.prog[mVU->prog.cur] -#define mVUblock mVU->prog.prog[mVU->prog.cur].block +#define mVUblocks mVU->prog.prog[mVU->prog.cur].block #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo #define mVUbranch mVUallocInfo.branch #define mVUcycles mVUallocInfo.cycles #define mVUcount mVUallocInfo.count -#define mVUregs mVUallocInfo.regs +#define mVUblock mVUallocInfo.block +#define mVUregs mVUallocInfo.block.pState #define mVUregsTemp mVUallocInfo.regsTemp #define iPC mVUallocInfo.curPC #define mVUinfo mVUallocInfo.info[iPC / 2] @@ -157,6 +158,9 @@ declareAllVariables #define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } #define incCycles(x) { mVUincCycles(x); } #define bSaveAddr ((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) +#define branchAddr ((xPC + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) +#define shufflePQ (((mVU->q) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04)) + #define _isNOP (1<<0) // Skip Lower Instruction #define _isBranch (1<<1) // Cur Instruction is a Branch @@ -235,7 +239,7 @@ declareAllVariables #define mVUdebug1() {} #endif -#define mVUcachCheck(start, limit) { \ - uptr diff = mVU->ptr - start; \ +#define mVUcacheCheck(ptr, start, limit) { \ + uptr diff = ptr - start; \ if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ } diff --git a/pcsx2/x86/microVU_Tables.inl b/pcsx2/x86/microVU_Tables.inl index 22af9f2aaa..d79cd3c74a 100644 --- a/pcsx2/x86/microVU_Tables.inl +++ b/pcsx2/x86/microVU_Tables.inl @@ -17,8 +17,6 @@ */ #pragma once -#include "microVU_Upper.inl" -#include "microVU_Lower.inl" #ifdef PCSX2_MICROVU //------------------------------------------------------------------ From f51e6b7d913bbdb2e2cb2b9b5bbe12fdd3842dd8 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sat, 18 Apr 2009 09:31:16 +0000 Subject: [PATCH 089/143] LilyPad: fixed a crash under remote desktop, GetRawInputDeviceInfo may return -1, the buffer size it wants is around a megabyte and gets filled with junk, just ignoring it seems fine. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1004 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/RawInput.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/LilyPad/RawInput.cpp b/plugins/LilyPad/RawInput.cpp index 0fbde4513f..5593ad9761 100644 --- a/plugins/LilyPad/RawInput.cpp +++ b/plugins/LilyPad/RawInput.cpp @@ -262,7 +262,7 @@ void EnumRawInputDevices() { if (list && pGetRawInputDeviceList(list, &count, sizeof(RAWINPUTDEVICELIST))) { for (UINT i=0; i 0 && nameLen >= 3) { wcscpy(productID, instanceID); wchar_t *temp = 0; From 94e2bd9409fefa78b542018907766db048258168 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sat, 18 Apr 2009 09:34:25 +0000 Subject: [PATCH 090/143] GSdx: adapted to ref's changes to the image transfer, it was a bug in GSdx but not triggered by the old way, also the data overflow is now ignored, no idea what it may cause, look out for missing or fixed textures. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1005 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSDrawingEnvironment.h | 2 - plugins/GSdx/GSLocalMemory.cpp | 25 ++- plugins/GSdx/GSState.cpp | 259 ++++++++++++++-------------- plugins/GSdx/GSState.h | 19 +- 4 files changed, 149 insertions(+), 156 deletions(-) diff --git a/plugins/GSdx/GSDrawingEnvironment.h b/plugins/GSdx/GSDrawingEnvironment.h index 557deab56b..dfca1aa953 100644 --- a/plugins/GSdx/GSDrawingEnvironment.h +++ b/plugins/GSdx/GSDrawingEnvironment.h @@ -43,7 +43,6 @@ public: GIFRegTRXDIR TRXDIR; GIFRegTRXPOS TRXPOS; GIFRegTRXREG TRXREG; - GIFRegTRXREG TRXREG2; GSDrawingContext CTXT[2]; GSDrawingEnvironment() @@ -67,7 +66,6 @@ public: memset(&TRXDIR, 0, sizeof(TRXDIR)); memset(&TRXPOS, 0, sizeof(TRXPOS)); memset(&TRXREG, 0, sizeof(TRXREG)); - memset(&TRXREG2, 0, sizeof(TRXREG2)); CTXT[0].Reset(); CTXT[1].Reset(); diff --git a/plugins/GSdx/GSLocalMemory.cpp b/plugins/GSdx/GSLocalMemory.cpp index 27a26e46a2..b86c6ce8b8 100644 --- a/plugins/GSdx/GSLocalMemory.cpp +++ b/plugins/GSdx/GSLocalMemory.cpp @@ -896,7 +896,7 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, BYTE* src, int len, GIFRegBITBL if(TRXREG.RRW == 0) return; int l = (int)TRXPOS.DSAX; - int r = (int)TRXREG.RRW; + int r = l + (int)TRXREG.RRW; // finish the incomplete row first @@ -913,9 +913,7 @@ void GSLocalMemory::WriteImage(int& tx, int& ty, BYTE* src, int len, GIFRegBITBL int srcpitch = (r - l) * trbpp >> 3; int h = len / srcpitch; - // transfer width >= block width, and there is at least one full row - - if(ra - la >= bsx && h > 0) + if(ra - la >= bsx && h > 0) // "transfer width" >= "block width" && there is at least one full row { BYTE* s = &src[-l * trbpp >> 3]; @@ -1009,7 +1007,7 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, BYTE* src, int len, GIFRegBIT DWORD bp = BITBLTBUF.DBP; DWORD bw = BITBLTBUF.DBW; - int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 3; + int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW * 3; int th = len / srcpitch; bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); @@ -1035,6 +1033,7 @@ void GSLocalMemory::WriteImage24(int& tx, int& ty, BYTE* src, int len, GIFRegBIT ty = th; } } + void GSLocalMemory::WriteImage8H(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG) { if(TRXREG.RRW == 0) return; @@ -1042,7 +1041,7 @@ void GSLocalMemory::WriteImage8H(int& tx, int& ty, BYTE* src, int len, GIFRegBIT DWORD bp = BITBLTBUF.DBP; DWORD bw = BITBLTBUF.DBW; - int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX; + int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW; int th = len / srcpitch; bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); @@ -1076,7 +1075,7 @@ void GSLocalMemory::WriteImage4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBI DWORD bp = BITBLTBUF.DBP; DWORD bw = BITBLTBUF.DBW; - int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) / 2; + int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW / 2; int th = len / srcpitch; bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); @@ -1110,7 +1109,7 @@ void GSLocalMemory::WriteImage4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBI DWORD bp = BITBLTBUF.DBP; DWORD bw = BITBLTBUF.DBW; - int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) / 2; + int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW / 2; int th = len / srcpitch; bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); @@ -1143,7 +1142,7 @@ void GSLocalMemory::WriteImage24Z(int& tx, int& ty, BYTE* src, int len, GIFRegBI DWORD bp = BITBLTBUF.DBP; DWORD bw = BITBLTBUF.DBW; - int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX) * 3; + int tw = TRXPOS.DSAX + TRXREG.RRW, srcpitch = TRXREG.RRW * 3; int th = len / srcpitch; bool aligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8); @@ -1173,8 +1172,6 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, BYTE* src, int len, GIFRegBITB { if(len <= 0) return; - // if(ty >= (int)TRXREG.RRH) {ASSERT(0); return;} - BYTE* pb = (BYTE*)src; WORD* pw = (WORD*)src; DWORD* pd = (DWORD*)src; @@ -1186,7 +1183,7 @@ void GSLocalMemory::WriteImageX(int& tx, int& ty, BYTE* src, int len, GIFRegBITB int x = tx; int y = ty; int sx = (int)TRXPOS.DSAX; - int ex = (int)TRXREG.RRW; + int ex = sx + (int)TRXREG.RRW; switch(BITBLTBUF.DPSM) { @@ -1351,8 +1348,6 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, BYTE* dst, int len, GIFRegBITBL { if(len <= 0) return; - // if(ty >= (int)TRXREG.RRH) {ASSERT(0); return;} - BYTE* pb = (BYTE*)dst; WORD* pw = (WORD*)dst; DWORD* pd = (DWORD*)dst; @@ -1364,7 +1359,7 @@ void GSLocalMemory::ReadImageX(int& tx, int& ty, BYTE* dst, int len, GIFRegBITBL int x = tx; int y = ty; int sx = (int)TRXPOS.SSAX; - int ex = (int)TRXREG.RRW; + int ex = sx + (int)TRXREG.RRW; switch(BITBLTBUF.SPSM) { diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index a474fd741f..11d681a337 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -52,7 +52,7 @@ GSState::GSState(BYTE* base, bool mt, void (*irq)()) m_sssize += sizeof(m_env.TRXDIR); m_sssize += sizeof(m_env.TRXPOS); m_sssize += sizeof(m_env.TRXREG); - m_sssize += sizeof(m_env.TRXREG2); + m_sssize += sizeof(m_env.TRXREG); // obsolete for(int i = 0; i < 2; i++) { @@ -77,8 +77,8 @@ GSState::GSState(BYTE* base, bool mt, void (*irq)()) m_sssize += sizeof(m_v.XYZ); m_sssize += sizeof(m_v.FOG); - m_sssize += sizeof(m_x); - m_sssize += sizeof(m_y); + m_sssize += sizeof(m_tr.x); + m_sssize += sizeof(m_tr.y); m_sssize += m_mem.m_vmsize; m_sssize += (sizeof(m_path[0].tag) + sizeof(m_path[0].nreg)) * 3; m_sssize += sizeof(m_q); @@ -93,11 +93,6 @@ GSState::GSState(BYTE* base, bool mt, void (*irq)()) // CSR->rREV = 0x20; m_env.PRMODECONT.AC = 1; - m_x = m_y = 0; - m_bytes = 0; - m_maxbytes = 1024 * 1024 * 4; - m_buff = (BYTE*)_aligned_malloc(m_maxbytes, 16); - Reset(); ResetHandlers(); @@ -105,7 +100,6 @@ GSState::GSState(BYTE* base, bool mt, void (*irq)()) GSState::~GSState() { - _aligned_free(m_buff); } void GSState::Reset() @@ -916,13 +910,12 @@ void GSState::GIFRegHandlerTRXPOS(GIFReg* r) void GSState::GIFRegHandlerTRXREG(GIFReg* r) { - if(!(m_env.TRXREG == (GSVector4i)r->TRXREG).alltrue() || !(m_env.TRXREG2 == (GSVector4i)r->TRXREG).alltrue()) + if(!(m_env.TRXREG == (GSVector4i)r->TRXREG).alltrue()) { FlushWrite(); } m_env.TRXREG = (GSVector4i)r->TRXREG; - m_env.TRXREG2 = (GSVector4i)r->TRXREG; } void GSState::GIFRegHandlerTRXDIR(GIFReg* r) @@ -934,16 +927,10 @@ void GSState::GIFRegHandlerTRXDIR(GIFReg* r) switch(m_env.TRXDIR.XDIR) { case 0: // host -> local - m_x = m_env.TRXPOS.DSAX; - m_y = m_env.TRXPOS.DSAY; - m_env.TRXREG.RRW = m_x + m_env.TRXREG2.RRW; - m_env.TRXREG.RRH = m_y + m_env.TRXREG2.RRH; + m_tr.Init(m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY); break; case 1: // local -> host - m_x = m_env.TRXPOS.SSAX; - m_y = m_env.TRXPOS.SSAY; - m_env.TRXREG.RRW = m_x + m_env.TRXREG2.RRW; - m_env.TRXREG.RRH = m_y + m_env.TRXREG2.RRH; + m_tr.Init(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY); break; case 2: // local -> local Move(); @@ -997,113 +984,66 @@ void GSState::Flush() void GSState::FlushWrite() { - FlushWrite(m_buff, m_bytes); + int len = m_tr.end - m_tr.start; - m_bytes = 0; -} + if(len <= 0) return; -void GSState::FlushWrite(BYTE* mem, int len) -{ - if(len > 0) - { + int y = m_tr.y; + + GSLocalMemory::writeImage wi = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wi; + + (m_mem.*wi)(m_tr.x, m_tr.y, &m_tr.buff[m_tr.start], len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG); + + m_tr.start += len; + + m_perfmon.Put(GSPerfMon::Swizzle, len); + + CRect r; + + r.left = m_env.TRXPOS.DSAX; + r.top = y; + r.right = r.left + m_env.TRXREG.RRW; + r.bottom = min(r.top + m_env.TRXREG.RRH, m_tr.x == r.left ? m_tr.y : m_tr.y + 1); + + InvalidateVideoMem(m_env.BITBLTBUF, r); /* -CSize bs = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].bs; - -if((m_x & (bs.cx - 1)) || (m_env.TRXREG.RRW & (bs.cx - 1)) -|| (m_y & (bs.cy - 1)) || (m_env.TRXREG.RRH & (bs.cy - 1)) -|| m_x != m_env.TRXPOS.DSAX) -{ - printf("*** [%d]: %d %d, %d %d %d %d\n", m_env.BITBLTBUF.DPSM, m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, m_x, m_y, m_env.TRXREG.RRW, m_env.TRXREG.RRH); -} - -if((len % ((m_env.TRXREG.RRW - m_x) * GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].trbpp / 8)) != 0) -{ - printf("*** [%d]: %d %d\n", m_env.BITBLTBUF.DPSM, len, ((m_env.TRXREG.RRW - m_x) * GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].trbpp / 8)); -} + static int n = 0; + CString str; + str.Format(_T("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp"), + n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM, + r.left, r.top, r.right, r.bottom); + m_mem.SaveBMP(str, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.right, r.bottom); */ - int y = m_y; - - GSLocalMemory::writeImage wi = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wi; - - (m_mem.*wi)(m_x, m_y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG); - - m_perfmon.Put(GSPerfMon::Swizzle, len); - - //ASSERT(m_env.TRXREG.RRH >= m_y - y); - - CRect r; - - r.left = m_env.TRXPOS.DSAX; - r.top = y; - r.right = m_env.TRXREG.RRW; - r.bottom = min(m_x == m_env.TRXPOS.DSAX ? m_y : m_y + 1, m_env.TRXREG.RRH); - - InvalidateVideoMem(m_env.BITBLTBUF, r); -/* - static int n = 0; - CString str; - str.Format(_T("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp"), - n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM, - r.left, r.top, r.right, r.bottom); - m_mem.SaveBMP(str, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.right, r.bottom); -*/ - } } // void GSState::Write(BYTE* mem, int len) { -/* - TRACE(_T("Write len=%d DBP=%05x DBW=%d DPSM=%d DSAX=%d DSAY=%d RRW=%d RRH=%d\n"), - len, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM, - (int)m_env.TRXPOS.DSAX, (int)m_env.TRXPOS.DSAY, - (int)m_env.TRXREG.RRW, (int)m_env.TRXREG.RRH); -*/ - if(len == 0) return; + int dx = m_env.TRXPOS.DSAX; + int dy = m_env.TRXPOS.DSAY; + int w = m_env.TRXREG.RRW; + int h = m_env.TRXREG.RRH; - if(m_y >= m_env.TRXREG.RRH) return; // TODO: handle overflow during writing data too (just chop len below somewhere) + // TRACE(_T("Write len=%d DBP=%05x DBW=%d DPSM=%d DSAX=%d DSAY=%d RRW=%d RRH=%d\n"), len, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM, dx, dy, w, h); - // TODO: hmmmm + if(!m_tr.Update(w, h, GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].trbpp, len)) + { + return; + } - if(PRIM->TME && (m_env.BITBLTBUF.DBP == m_context->TEX0.TBP0 || m_env.BITBLTBUF.DBP == m_context->TEX0.CBP)) + memcpy(&m_tr.buff[m_tr.end], mem, len); + + m_tr.end += len; + + if(PRIM->TME && (m_env.BITBLTBUF.DBP == m_context->TEX0.TBP0 || m_env.BITBLTBUF.DBP == m_context->TEX0.CBP)) // TODO: hmmmm { FlushPrim(); } - int bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].trbpp; - - int pitch = (m_env.TRXREG.RRW - m_env.TRXPOS.DSAX) * bpp >> 3; - - if(pitch <= 0) {ASSERT(0); return;} - - int height = len / pitch; - - if(height > m_env.TRXREG.RRH - m_env.TRXPOS.DSAY) + if(m_tr.end >= m_tr.total) { - height = m_env.TRXREG.RRH - m_env.TRXPOS.DSAY; - - len = height * pitch; - } - - if(m_bytes > 0 || height < m_env.TRXREG.RRH - m_env.TRXPOS.DSAY) - { - ASSERT(len <= m_maxbytes); // more than 4mb into a 4mb local mem doesn't make sense - - len = min(m_maxbytes, len); - - if(m_bytes + len > m_maxbytes) - { - FlushWrite(); - } - - memcpy(&m_buff[m_bytes], mem, len); - - m_bytes += len; - } - else - { - FlushWrite(mem, len); + FlushWrite(); } m_mem.m_clut.Invalidate(); @@ -1111,25 +1051,26 @@ void GSState::Write(BYTE* mem, int len) void GSState::Read(BYTE* mem, int len) { - /* - TRACE(_T("Read len=%d SBP=%05x SBW=%d SPSM=%d SSAX=%d SSAY=%d RRW=%d RRH=%d\n"), - len, (int)m_env.BITBLTBUF.SBP, (int)m_env.BITBLTBUF.SBW, (int)m_env.BITBLTBUF.SPSM, - (int)m_env.TRXPOS.SSAX, (int)m_env.TRXPOS.SSAY, - (int)m_env.TRXREG.RRW, (int)m_env.TRXREG.RRH); - */ + if(len <= 0) return; - if(m_y >= (int)m_env.TRXREG.RRH) {ASSERT(0); return;} + int sx = m_env.TRXPOS.SSAX; + int sy = m_env.TRXPOS.SSAY; + int w = m_env.TRXREG.RRW; + int h = m_env.TRXREG.RRH; - if(m_x == m_env.TRXPOS.SSAX && m_y == m_env.TRXPOS.SSAY) + // TRACE(_T("Read len=%d SBP=%05x SBW=%d SPSM=%d SSAX=%d SSAY=%d RRW=%d RRH=%d\n"), len, (int)m_env.BITBLTBUF.SBP, (int)m_env.BITBLTBUF.SBW, (int)m_env.BITBLTBUF.SPSM, sx, sy, w, h); + + if(!m_tr.Update(w, h, GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].trbpp, len)) { - CRect r(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY, m_env.TRXREG.RRW, m_env.TRXREG.RRH); - - InvalidateLocalMem(m_env.BITBLTBUF, r); + return; } - // TODO + if(m_tr.x == sx && m_tr.y == sy) + { + InvalidateLocalMem(m_env.BITBLTBUF, CRect(CPoint(sx, sy), CSize(w, h))); + } - m_mem.ReadImageX(m_x, m_y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG); + m_mem.ReadImageX(m_tr.x, m_tr.y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG); } void GSState::Move() @@ -1138,19 +1079,20 @@ void GSState::Move() // guitar hero copies the far end of the board to do a similar blend too int sx = m_env.TRXPOS.SSAX; - int dx = m_env.TRXPOS.DSAX; int sy = m_env.TRXPOS.SSAY; + int dx = m_env.TRXPOS.DSAX; int dy = m_env.TRXPOS.DSAY; int w = m_env.TRXREG.RRW; int h = m_env.TRXREG.RRH; - int xinc = 1; - int yinc = 1; InvalidateLocalMem(m_env.BITBLTBUF, CRect(CPoint(sx, sy), CSize(w, h))); InvalidateVideoMem(m_env.BITBLTBUF, CRect(CPoint(dx, dy), CSize(w, h))); - if(sx < dx) sx += w-1, dx += w-1, xinc = -1; - if(sy < dy) sy += h-1, dy += h-1, yinc = -1; + int xinc = 1; + int yinc = 1; + + if(sx < dx) {sx += w - 1; dx += w - 1; xinc = -1;} + if(sy < dy) {sy += h - 1; dy += h - 1; yinc = -1;} /* GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp; @@ -1166,7 +1108,7 @@ void GSState::Move() if(m_env.BITBLTBUF.SPSM == PSM_PSMCT32 && m_env.BITBLTBUF.DPSM == PSM_PSMCT32) { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w) + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { DWORD sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); int* soffset = spsm.rowOffset[sy & 7]; @@ -1182,7 +1124,7 @@ void GSState::Move() } else { - for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w) + for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc * w, dx -= xinc * w) { DWORD sbase = spsm.pa(0, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW); int* soffset = spsm.rowOffset[sy & 7]; @@ -1461,7 +1403,7 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly) WriteState(data, &m_env.TRXDIR); WriteState(data, &m_env.TRXPOS); WriteState(data, &m_env.TRXREG); - WriteState(data, &m_env.TRXREG2); + WriteState(data, &m_env.TRXREG); // obsolete for(int i = 0; i < 2; i++) { @@ -1485,8 +1427,8 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly) WriteState(data, &m_v.UV); WriteState(data, &m_v.XYZ); WriteState(data, &m_v.FOG); - WriteState(data, &m_x); - WriteState(data, &m_y); + WriteState(data, &m_tr.x); + WriteState(data, &m_tr.y); WriteState(data, m_mem.m_vm8, m_mem.m_vmsize); for(int i = 0; i < 3; i++) @@ -1542,7 +1484,7 @@ int GSState::Defrost(const GSFreezeData* fd) ReadState(&m_env.TRXDIR, data); ReadState(&m_env.TRXPOS, data); ReadState(&m_env.TRXREG, data); - ReadState(&m_env.TRXREG2, data); + ReadState(&m_env.TRXREG, data); // obsolete for(int i = 0; i < 2; i++) { @@ -1574,10 +1516,12 @@ int GSState::Defrost(const GSFreezeData* fd) ReadState(&m_v.UV, data); ReadState(&m_v.XYZ, data); ReadState(&m_v.FOG, data); - ReadState(&m_x, data); - ReadState(&m_y, data); + ReadState(&m_tr.x, data); + ReadState(&m_tr.y, data); ReadState(m_mem.m_vm8, data, m_mem.m_vmsize); + m_tr.total = 0; // TODO: restore transfer state + for(int i = 0; i < 3; i++) { ReadState(&m_path[i].tag, data); @@ -1670,6 +1614,53 @@ void GSState::SetFrameSkip(int frameskip) } } +// GSTransferBuffer + +GSState::GSTransferBuffer::GSTransferBuffer() +{ + x = y = 0; + start = end = total = 0; + buff = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16); +} + +GSState::GSTransferBuffer::~GSTransferBuffer() +{ + _aligned_free(buff); +} + +void GSState::GSTransferBuffer::Init(int tx, int ty) +{ + x = tx; + y = ty; + total = 0; +} + +bool GSState::GSTransferBuffer::Update(int tw, int th, int bpp, int& len) +{ + if(total == 0) + { + start = end = 0; + total = min((tw * bpp >> 3) * th, 1024 * 1024 * 4); + overflow = false; + } + + int remaining = total - end; + + if(len > remaining) + { + if(!overflow) + { + overflow = true; + + // printf("GS transfer overflow\n"); + } + + len = remaining; + } + + return len > 0; +} + // hacks struct GSFrameInfo diff --git a/plugins/GSdx/GSState.h b/plugins/GSdx/GSState.h index 3ffc1d9d46..5aeb4fe202 100644 --- a/plugins/GSdx/GSState.h +++ b/plugins/GSdx/GSState.h @@ -113,13 +113,22 @@ class GSState : public GSAlignedClass<16> void (*m_irq)(); bool m_path3hack; - int m_x, m_y; - int m_bytes; - int m_maxbytes; - BYTE* m_buff; + struct GSTransferBuffer + { + int x, y; + int start, end, total; + bool overflow; + BYTE* buff; + + GSTransferBuffer(); + virtual ~GSTransferBuffer(); + + void Init(int tx, int ty); + bool Update(int tw, int th, int bpp, int& len); + + } m_tr; void FlushWrite(); - void FlushWrite(BYTE* mem, int len); protected: bool IsBadFrame(int& skip); From 71e391d01053495a9bb6afee70a8a5b129efc975 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sat, 18 Apr 2009 09:51:42 +0000 Subject: [PATCH 091/143] GSdx: Issue 149 (GoW2 crc) and removed the dx9 flag to enable tv-out, which might have been the cause for limited fps in recent revisions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1006 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSCrc.cpp | 1 + plugins/GSdx/GSDevice9.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/GSdx/GSCrc.cpp b/plugins/GSdx/GSCrc.cpp index e6432260e6..584c9c4404 100644 --- a/plugins/GSdx/GSCrc.cpp +++ b/plugins/GSdx/GSCrc.cpp @@ -115,6 +115,7 @@ CRC::Game CRC::m_games[] = {0x2F123FD8, GodOfWar2, RU}, {0x2F123FD8, GodOfWar2, US}, {0x44A8A22A, GodOfWar2, EU}, + {0X4340C7C6, GodOfWar2, Unknown}, {0x5D482F18, JackieChanAdv, Unknown}, {0xf0a6d880, HarvestMoon, US}, {0x75c01a04, NamcoXCapcom, US}, diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index 641e34a6c0..03984fc848 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -266,7 +266,7 @@ bool GSDevice9::Reset(int w, int h, bool fs) m_pp.PresentationInterval = D3DPRESENT_INTERVAL_DEFAULT; } - m_pp.Flags |= D3DPRESENTFLAG_VIDEO; // enables tv-out (but I don't think anyone would still use a regular tv...) + // m_pp.Flags |= D3DPRESENTFLAG_VIDEO; // enables tv-out (but I don't think anyone would still use a regular tv...) int mw = AfxGetApp()->GetProfileInt(_T("Settings"), _T("ModeWidth"), 0); int mh = AfxGetApp()->GetProfileInt(_T("Settings"), _T("ModeHeight"), 0); From a4abcf120696a9b98984fb37e48fdd939e77dca2 Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 18 Apr 2009 15:02:41 +0000 Subject: [PATCH 092/143] Regression fix for GT4, crashes presumably due to the zero division on this check git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1007 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 7522158b60..f076d53f6c 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -732,7 +732,8 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i else /* filling write */ { - if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num && vifRegs->cycle.cl != 0) + if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P + if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num) DevCon::Notice("Filling write warning! %x < %x and CL = %x WL = %x", params (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl); VIFUNPACK_LOG("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType); From 07c22b357c2882ae804f9ab31d7695af57cf0f2e Mon Sep 17 00:00:00 2001 From: refraction Date: Sat, 18 Apr 2009 23:36:01 +0000 Subject: [PATCH 093/143] Resolved Issue 165 so the scratchpad syncs with the MFIFO drain (this was the cause of the Tekken issue). and some other misc tweaks for unpack errors n such git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1008 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 33 ++++++++++++++++++++------------- pcsx2/SPR.cpp | 2 ++ pcsx2/Vif.cpp | 13 +++++++++---- pcsx2/VifDma.cpp | 34 +++++++++++++++++++++++++++++----- 4 files changed, 60 insertions(+), 22 deletions(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index 59ef287d07..fe57bcaed1 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -31,13 +31,14 @@ using std::min; #define gifsplit 128 enum gifstate_t { - GIF_STATE_EMPTY = 0, - GIF_STATE_STALL, - GIF_STATE_DONE + GIF_STATE_READY = 0, + GIF_STATE_STALL = 1, + GIF_STATE_DONE = 2, + GIF_STATE_EMPTY = 0x10 }; // A three-way toggle used to determine if the GIF is stalling (transferring) or done (finished). -static gifstate_t gifstate = GIF_STATE_EMPTY; +static int gifstate = GIF_STATE_READY; static u64 s_gstag = 0; // used for querying the last tag @@ -347,7 +348,7 @@ void dmaGIF() { //It takes the time of 24 QW for the BUS to become ready - The Punisher, And1 Streetball GIF_LOG("dmaGIFstart chcr = %lx, madr = %lx, qwc = %lx\n tadr = %lx, asr0 = %lx, asr1 = %lx", gif->chcr, gif->madr, gif->qwc, gif->tadr, gif->asr0, gif->asr1); if ((psHu32(DMAC_CTRL) & 0xC) == 0xC ) { // GIF MFIFO - Console::WriteLn("GIF MFIFO"); + //Console::WriteLn("GIF MFIFO"); gifMFIFOInterrupt(); return; } @@ -415,7 +416,7 @@ static __forceinline int mfifoGIFrbTransfer() { gifqwc -= mfifoqwc; gif->qwc -= mfifoqwc; gif->madr += mfifoqwc*16; - mfifocycles += (mfifoqwc) * 2; /* guessing */ + //mfifocycles += (mfifoqwc) * 2; /* guessing */ return 0; } @@ -456,8 +457,8 @@ void mfifoGIFtransfer(int qwc) { if(qwc > 0 ) { gifqwc += qwc; - if (!(gif->chcr & 0x100)) return; - if (gifstate == GIF_STATE_STALL) return; + if (gifstate != GIF_STATE_EMPTY) return; + gifstate &= ~GIF_STATE_EMPTY; } SPR_LOG("mfifoGIFtransfer %x madr %x, tadr %x", gif->chcr, gif->madr, gif->tadr); @@ -491,20 +492,20 @@ void mfifoGIFtransfer(int qwc) { case 1: // CNT - Transfer QWC following the tag. gif->madr = psHu32(DMAC_RBOR) + ((gif->tadr + 16) & psHu32(DMAC_RBSR)); //Set MADR to QW after Tag gif->tadr = psHu32(DMAC_RBOR) + ((gif->madr + (gif->qwc << 4)) & psHu32(DMAC_RBSR)); //Set TADR to QW following the data - gifstate = GIF_STATE_EMPTY; + gifstate = GIF_STATE_READY; break; case 2: // Next - Transfer QWC following tag. TADR = ADDR temp = gif->madr; //Temporarily Store ADDR gif->madr = psHu32(DMAC_RBOR) + ((gif->tadr + 16) & psHu32(DMAC_RBSR)); //Set MADR to QW following the tag gif->tadr = temp; //Copy temporarily stored ADDR to Tag - gifstate = GIF_STATE_EMPTY; + gifstate = GIF_STATE_READY; break; case 3: // Ref - Transfer QWC from ADDR field case 4: // Refs - Transfer QWC from ADDR field (Stall Control) gif->tadr = psHu32(DMAC_RBOR) + ((gif->tadr + 16) & psHu32(DMAC_RBSR)); //Set TADR to next tag - gifstate = GIF_STATE_EMPTY; + gifstate = GIF_STATE_READY; break; case 7: // End - Transfer QWC following the tag @@ -544,10 +545,16 @@ void gifMFIFOInterrupt() cpuRegs.interrupt &= ~(1 << 11); return ; } + if(spr0->chcr & 0x100) + { + spr0->chcr &= ~0x100; + hwDmacIrq(8); + } if(gifstate != GIF_STATE_STALL) { if(gifqwc <= 0) { //Console::WriteLn("Empty"); + gifstate |= GIF_STATE_EMPTY; psHu32(GIF_STAT)&= ~0xE00; // OPH=0 | APATH=0 hwDmacIrq(14); return; @@ -556,14 +563,14 @@ void gifMFIFOInterrupt() return; } #ifdef PCSX2_DEVBUILD - if(gifstate == GIF_STATE_EMPTY || gif->qwc > 0) { + if(gifstate == GIF_STATE_READY || gif->qwc > 0) { Console::Error("gifMFIFO Panic > Shouldn't go here!"); return; } #endif //if(gifqwc > 0) Console::WriteLn("GIF MFIFO ending with stuff in it %x", params gifqwc); if (!gifmfifoirq) gifqwc = 0; - gifstate = GIF_STATE_EMPTY; + gifstate = GIF_STATE_READY; gif->chcr &= ~0x100; hwDmacIrq(DMAC_GIF); GSCSRr &= ~0xC000; //Clear FIFO stuff diff --git a/pcsx2/SPR.cpp b/pcsx2/SPR.cpp index 71bfc6b948..5c5291fad0 100644 --- a/pcsx2/SPR.cpp +++ b/pcsx2/SPR.cpp @@ -227,6 +227,7 @@ void SPRFROMinterrupt() //Console::WriteLn("mfifoGIFtransfer %x madr %x, tadr %x", params gif->chcr, gif->madr, gif->tadr); mfifoGIFtransfer(mfifotransferred); mfifotransferred = 0; + return; } else if ((psHu32(DMAC_CTRL) & 0xC) == 0x8) // VIF1 MFIFO { @@ -235,6 +236,7 @@ void SPRFROMinterrupt() //Console::WriteLn("mfifoVIF1transfer %x madr %x, tadr %x", params vif1ch->chcr, vif1ch->madr, vif1ch->tadr); mfifoVIF1transfer(mfifotransferred); mfifotransferred = 0; + return; } } if (spr0finished == 0) return; diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index 56e399dbc6..a38fea9540 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -434,17 +434,18 @@ void mfifoVIF1transfer(int qwc) if (qwc > 0) { vifqwc += qwc; + SPR_LOG("Added %x qw to mfifo, total now %x - Vif CHCR %x Stalled %x done %x", qwc, vifqwc, vif1ch->chcr, vif1.vifstalled, vif1.done); if (vif1.inprogress & 0x10) { if (vif1ch->madr >= psHu32(DMAC_RBOR) && vif1ch->madr <= (psHu32(DMAC_RBOR) + psHu32(DMAC_RBSR))) - CPU_INT(10, min((int)vifqwc, (int)vif1ch->qwc) * BIAS); + CPU_INT(10, 0); else CPU_INT(10, vif1ch->qwc * BIAS); vif1Regs->stat |= 0x10000000; // FQC=16 } vif1.inprogress &= ~0x10; - SPR_LOG("Added %x qw to mfifo, total now %x - Vif CHCR %x Stalled %x done %x", qwc, vifqwc, vif1ch->chcr, vif1.vifstalled, vif1.done); + return; } @@ -531,7 +532,11 @@ void mfifoVIF1transfer(int qwc) void vifMFIFOInterrupt() { g_vifCycles = 0; - + if(spr0->chcr & 0x100) + { + spr0->chcr &= ~0x100; + hwDmacIrq(8); + } if (vif1.inprogress == 1) mfifo_VIF1chain(); if (vif1.irq && vif1.tag.size == 0) @@ -560,7 +565,7 @@ void vifMFIFOInterrupt() if (!(vif1.inprogress & 0x1)) mfifoVIF1transfer(0); if (vif1ch->madr >= psHu32(DMAC_RBOR) && vif1ch->madr <= (psHu32(DMAC_RBOR) + psHu32(DMAC_RBSR))) - CPU_INT(10, min((int)vifqwc, (int)vif1ch->qwc) * BIAS); + CPU_INT(10, 0); else CPU_INT(10, vif1ch->qwc * BIAS); diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index f076d53f6c..17d9310b21 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -645,7 +645,7 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ - vif->tag.addr += (size / ft->dsize) * 4; + //vif->tag.addr += (size / ft->dsize) * 4; func(dest, (u32*)cdata, size / ft->dsize); size = 0; @@ -714,7 +714,7 @@ static void VIFunpack(u32 *data, vifCode *v, unsigned int size, const unsigned i //VIF_LOG("warning, end with size = %d", size); /* unpack one qword */ - vif->tag.addr += (size / ft->dsize) * 4; + //vif->tag.addr += (size / ft->dsize) * 4; func(dest, (u32*)cdata, size / ft->dsize); size = 0; @@ -1008,6 +1008,18 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK FreezeXMMRegs(1); if (vif0.vifpacketsize < vif0.tag.size) { + if(vif0Regs->offset != 0 || vif0.cl != 0) + { + ret = vif0.tag.size; + vif0.tag.size = VIFalign(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); + ret = ret - vif0.tag.size; + data += ret; + if(vif0.vifpacketsize > 0) VIFunpack(data, &vif0.tag, vif0.vifpacketsize - ret, VIF0dmanum); + ProcessMemSkip((vif0.vifpacketsize - ret) << 2, (vif0.cmd & 0xf), VIF0dmanum); + vif0.tag.size -= (vif0.vifpacketsize - ret); + FreezeXMMRegs(0); + return vif0.vifpacketsize; + } /* size is less that the total size, transfer is 'in pieces' */ VIFunpack(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); @@ -1835,8 +1847,20 @@ static int __fastcall Vif1TransUnpack(u32 *data) if (vif1.vifpacketsize < vif1.tag.size) { + int ret = vif1.tag.size; /* size is less that the total size, transfer is 'in pieces' */ + if(vif1Regs->offset != 0 || vif1.cl != 0) + { + vif1.tag.size = VIFalign(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); + ret = ret - vif1.tag.size; + data += ret; + if((vif1.vifpacketsize - ret) > 0) VIFunpack(data, &vif1.tag, vif1.vifpacketsize - ret, VIF1dmanum); + ProcessMemSkip((vif1.vifpacketsize - ret) << 2, (vif1.cmd & 0xf), VIF1dmanum); + vif1.tag.size -= (vif1.vifpacketsize - ret); + FreezeXMMRegs(0); + return vif1.vifpacketsize; + } VIFunpack(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); ProcessMemSkip(vif1.vifpacketsize << 2, (vif1.cmd & 0xf), VIF1dmanum); @@ -2390,7 +2414,7 @@ __forceinline void vif1Interrupt() if (vif1.inprogress) _VIF1chain(); - if ((!vif1.done) || (vif1.inprogress)) + if ((!vif1.done) || (vif1.inprogress & 0x1)) { if (!(psHu32(DMAC_CTRL) & 0x1)) @@ -2399,7 +2423,7 @@ __forceinline void vif1Interrupt() return; } - if (vif1.inprogress == 0) vif1SetupTransfer(); + if ((vif1.inprogress & 0x1) == 0) vif1SetupTransfer(); CPU_INT(1, vif1ch->qwc * BIAS); return; @@ -2436,7 +2460,7 @@ void dmaVIF1() if (((psHu32(DMAC_CTRL) & 0xC) == 0x8)) // VIF MFIFO { -// Console::WriteLn("VIFMFIFO\n"); + //Console::WriteLn("VIFMFIFO\n"); if (!(vif1ch->chcr & 0x4)) Console::WriteLn("MFIFO mode != Chain! %x", params vif1ch->chcr); vifMFIFOInterrupt(); return; From 924869f765a75140eb85a9dc505cf713cb18d29a Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 18 Apr 2009 23:56:32 +0000 Subject: [PATCH 094/143] more microVU "stuff" if the commit log has "stuff" in it, it's vu work from cotton git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1009 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU.cpp | 23 +++-- pcsx2/x86/microVU.h | 3 +- pcsx2/x86/microVU_Alloc.h | 9 +- pcsx2/x86/microVU_Alloc.inl | 26 +++--- pcsx2/x86/microVU_Compile.inl | 154 ++++++++++++++++++++-------------- pcsx2/x86/microVU_Execute.inl | 48 ++++++----- pcsx2/x86/microVU_Lower.inl | 26 +++--- pcsx2/x86/microVU_Misc.inl | 14 ++-- pcsx2/x86/microVU_Upper.inl | 2 +- 9 files changed, 174 insertions(+), 131 deletions(-) diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index af8af9002b..469a2a7c86 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -43,6 +43,8 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->microSize = (vuIndex ? 0x4000 : 0x1000); mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4; mVU->cache = NULL; + memset(&mVU->prog, 0, sizeof(mVU->prog)); + mVUlog((vuIndex) ? "microVU1: init" : "microVU0: init"); mVUreset(); } @@ -53,17 +55,13 @@ microVUt(void) mVUreset() { microVU* mVU = mVUx; mVUclose(); // Close - // Create Block Managers - for (int i = 0; i <= mVU->prog.max; i++) { - for (u32 j = 0; j < (mVU->progSize / 2); j++) { - mVU->prog.prog[i].block[j] = new microBlockManager(); - } - } + mVUlog((vuIndex) ? "microVU1: reset" : "microVU0: reset"); // Dynarec Cache mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); if ( mVU->cache == NULL ) throw Exception::OutOfMemory(fmt_string( "microVU Error: Failed to allocate recompiler memory! (addr: 0x%x)", params (u32)mVU->cache)); - + memset(mVU->cache, 0xcc, mVU->cacheSize); + // Setup Entrance/Exit Points x86SetPtr(mVU->cache); mVUdispatcherA(); @@ -76,6 +74,13 @@ microVUt(void) mVUreset() { mVU->prog.cur = -1; mVU->prog.total = -1; + // Create Block Managers + for (int i = 0; i <= mVU->prog.max; i++) { + for (u32 j = 0; j < (mVU->progSize / 2); j++) { + mVU->prog.prog[i].block[j] = new microBlockManager(); + } + } + // Setup Dynarec Cache Limits for Each Program u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes for (int i = 0; i <= mVU->prog.max; i++) { @@ -90,6 +95,7 @@ microVUt(void) mVUreset() { microVUt(void) mVUclose() { microVU* mVU = mVUx; + mVUlog((vuIndex) ? "microVU1: close" : "microVU0: close"); if ( mVU->cache ) { HostSys::Munmap( mVU->cache, mVU->cacheSize ); mVU->cache = NULL; } @@ -149,6 +155,7 @@ __forceinline int mVUfindLeastUsedProg(microVU* mVU) { } mVUclearProg(mVU, j); // Clear old data if overwriting old program mVUcacheProg(mVU, j); // Cache Micro Program + mVUlog("microVU: Program Cache got Full!"); return j; } } @@ -160,7 +167,7 @@ __forceinline int mVUsearchProg(microVU* mVU) { //if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/) //if (mVU->prog.prog[i]) // ToDo: Implement Cycles if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { - if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } + //if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } mVU->prog.cur = i; mVU->prog.cleared = 0; mVU->prog.prog[i].used++; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index ad54f1eeb1..4cd7654866 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -89,7 +89,7 @@ struct microVU { u32 index; // VU Index (VU0 or VU1) u32 microSize; // VU Micro Memory Size u32 progSize; // VU Micro Program Size (microSize/4) - static const u32 cacheSize = 0x500000; // VU Cache Size + static const u32 cacheSize = 0x800000; // VU Cache Size microProgManager<0x4000> prog; // Micro Program Data @@ -105,6 +105,7 @@ struct microVU { u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) u32 p; // Holds current P instance index u32 q; // Holds current Q instance index + u32 tempBackup; }; // microVU rec structs diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index 49b0fda39e..b1f1bbdc24 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -36,7 +36,7 @@ struct microRegInfo { u8 p; u8 r; u8 xgkick; - u8 needExactMatch; // This block needs an exact match of pipeline state + u8 needExactMatch; // If set, block needs an exact match of pipeline state }; struct microTempRegInfo { @@ -51,10 +51,9 @@ struct microTempRegInfo { }; struct microBlock { - microRegInfo pState; // Detailed State of Pipeline - u8* x86ptrStart; // Start of code - //u8* x86ptrEnd; // End of code (first byte outside of block) - //u32 size; // Number of 64bit VU Instructions in Block + microRegInfo pState; // Detailed State of Pipeline + microRegInfo pStateEnd; // Detailed State of Pipeline at End of Block (needed by JR/JALR opcodes) + u8* x86ptrStart; // Start of code }; template diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index c256d71753..c31522eb88 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -282,7 +282,6 @@ microVUt(void) mVUallocFMAC9a(int& Fd, int& ACC, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmT1; ACC = xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); if (_Ft_ == _Fs_) { Ft = Fs; } @@ -296,6 +295,7 @@ microVUt(void) mVUallocFMAC9a(int& Fd, int& ACC, int& Fs, int& Ft) { else if (!_Ft_) { getZero4(Ft); } else { getReg4(Ft, _Ft_); } } + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); } microVUt(void) mVUallocFMAC9b(int& Fd) { @@ -344,7 +344,6 @@ microVUt(void) mVUallocFMAC11a(int& Fd, int& ACC, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmT1; ACC = xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); if ( (_Ft_ == _Fs_) && _bc_x) { Ft = Fs; } @@ -358,6 +357,7 @@ microVUt(void) mVUallocFMAC11a(int& Fd, int& ACC, int& Fs, int& Ft) { if (!_Ft_) { getZero3(Ft); } else { getReg3(Ft, _Ft_); } } + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); } microVUt(void) mVUallocFMAC11b(int& Fd) { @@ -394,11 +394,11 @@ microVUt(void) mVUallocFMAC13a(int& Fd, int& ACC, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmT1; ACC = xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); } microVUt(void) mVUallocFMAC13b(int& Fd) { @@ -415,8 +415,7 @@ microVUt(void) mVUallocFMAC14a(int& ACCw, int& ACCr, int& Fs, int& Ft) { Ft = xmmFt; ACCw = xmmACC; ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); - + if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); if (_Ft_ == _Fs_) { Ft = Fs; } @@ -430,6 +429,7 @@ microVUt(void) mVUallocFMAC14a(int& ACCw, int& ACCr, int& Fs, int& Ft) { else if (!_Ft_) { getZero4(Ft); } else { getReg4(Ft, _Ft_); } } + SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); } microVUt(void) mVUallocFMAC14b(int& ACCw, int& ACCr) { @@ -448,8 +448,7 @@ microVUt(void) mVUallocFMAC15a(int& ACCw, int& ACCr, int& Fs, int& Ft) { Ft = xmmFt; ACCw = xmmACC; ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); - + if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); if ((_Ft_ == _Fs_) && _bc_x) { Ft = Fs; } @@ -463,6 +462,7 @@ microVUt(void) mVUallocFMAC15a(int& ACCw, int& ACCr, int& Fs, int& Ft) { if (!_Ft_) { getZero3(Ft); } else { getReg3(Ft, _Ft_); } } + SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); } microVUt(void) mVUallocFMAC15b(int& ACCw, int& ACCr) { @@ -479,11 +479,11 @@ microVUt(void) mVUallocFMAC16a(int& ACCw, int& ACCr, int& Fs, int& Ft) { Ft = xmmFt; ACCw = xmmACC; ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); getIreg(Ft, 0); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } + SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); } microVUt(void) mVUallocFMAC16b(int& ACCw, int& ACCr) { @@ -542,8 +542,7 @@ microVUt(void) mVUallocFMAC19a(int& Fd, int& ACC, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmT1; ACC = xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); - + if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } @@ -552,6 +551,7 @@ microVUt(void) mVUallocFMAC19a(int& Fd, int& ACC, int& Fs, int& Ft) { SSE2_PSHUFD_XMM_to_XMM(Fs, Fs, 0xC9); // WXZY SSE2_PSHUFD_XMM_to_XMM(Ft, Ft, 0xD2); // WYXZ + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); } microVUt(void) mVUallocFMAC19b(int& Fd) { @@ -629,11 +629,11 @@ microVUt(void) mVUallocFMAC25a(int& Fd, int& ACC, int& Fs, int& Ft) { Ft = xmmFt; Fd = xmmT1; ACC = xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); getQreg(Ft); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } + SSE_MOVAPS_XMM_to_XMM(ACC, xmmACC); } microVUt(void) mVUallocFMAC25b(int& Fd) { @@ -650,11 +650,11 @@ microVUt(void) mVUallocFMAC26a(int& ACCw, int& ACCr, int& Fs, int& Ft) { Ft = xmmFt; ACCw = xmmACC; ACCr = ((_X_Y_Z_W == 15) || (_X_Y_Z_W == 8)) ? xmmACC : xmmT1; - SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); getQreg(Ft); if (_X_Y_Z_W == 8) { getReg6(Fs, _Fs_); } else if (!_Fs_) { getZero4(Fs); } else { getReg4(Fs, _Fs_); } + SSE_MOVAPS_XMM_to_XMM(ACCr, xmmACC); } microVUt(void) mVUallocFMAC26b(int& ACCw, int& ACCr) { @@ -748,7 +748,7 @@ microVUt(void) mVUallocVIb(int GPRreg, int _reg_) { #define getReg5(reg, _reg_, _fxf_) { \ if (!_reg_) { \ if (_fxf_ < 3) { SSE_XORPS_XMM_to_XMM(reg, reg); } \ - else { mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 3); } \ + else { mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 1); } \ } \ else { \ mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _fxf_))); \ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 1e21e6ab3f..b7699d1db2 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -23,19 +23,15 @@ // Helper Macros //------------------------------------------------------------------ -#define createBlock(blockEndPtr) { \ - block.pipelineState = pipelineState; \ - block.x86ptrStart = x86ptrStart; \ - block.x86ptrEnd = blockEndPtr; \ - /*block.x86ptrBranch;*/ \ - if (!(pipelineState & 1)) { \ - memcpy_fast(&block.pState, pState, sizeof(microRegInfo)); \ - } \ -} - -#define branchCase(JMPcc) \ - CMP16ItoM((uptr)mVU->branch, 0); \ - ajmp = JMPcc((uptr)0); \ +#define branchCase(JMPcc, nJMPcc) \ + mVUsetupBranch(bStatus, bMac); \ + mVUlog("mVUcompile branchCase"); \ + CMP16ItoM((uptr)&mVU->branch, 0); \ + incPC2(1); \ + pBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \ + incPC2(-1); \ + if (pBlock) { nJMPcc((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \ + else { ajmp = JMPcc((uptr)0); } \ break #define flagSetMacro(xFlag, pFlag, xF, yF, zF) { \ @@ -134,7 +130,7 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { for (; (ii > 0 && i >= 0); i--, ii--) { xStatus = (xStatus-1) & 3; bStatus[i] = xStatus; } } if (doMac && (j >= 0)) { - for (; (jj > 0 && j >= 0); j--, jj--) { xMac = (xMac-1) & 3; bMac[i] = xMac; } + for (; (jj > 0 && j >= 0); j--, jj--) { xMac = (xMac-1) & 3; bMac[j] = xMac; } } incPC2(-2); } @@ -146,9 +142,10 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { // Recompiles Code for Proper Flags and Q/P regs on Block Linkings microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) { microVU* mVU = mVUx; + mVUlog("mVUsetupBranch"); PUSH32R(gprR); // Backup gprR - PUSH32R(gprESP); // Backup gprESP + MOV32RtoM((uptr)&mVU->tempBackup, gprESP); MOV32RtoR(gprT1, getFlagReg1(bStatus[0])); MOV32RtoR(gprT2, getFlagReg1(bStatus[1])); @@ -175,7 +172,7 @@ microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) { OR32RtoR(gprF2, getFlagReg2(bMac[2])); OR32RtoR(gprF3, getFlagReg2(bMac[3])); - POP32R(gprESP); // Restore gprESP + MOV32MtoR(gprESP, (uptr)&mVU->tempBackup); POP32R(gprR); // Restore gprR // Shuffle P/Q regs since every block starts at instance #0 @@ -210,7 +207,7 @@ microVUt(void) mVUincCycles(int x) { microVUt(void) mVUsetCycles() { microVU* mVU = mVUx; incCycles(mVUstall); - if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && !mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg + if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y); @@ -229,10 +226,14 @@ microVUt(void) mVUsetCycles() { microVUt(void) mVUdivSet() { microVU* mVU = mVUx; int flagReg1, flagReg2; - getFlagReg(flagReg1, fsInstance); - if (!doStatus) { getFlagReg(flagReg2, fpsInstance); MOV16RtoR(flagReg1, flagReg2); } - AND16ItoR(flagReg1, 0xfcf); - OR16MtoR (flagReg1, (uptr)&mVU->divFlag); + if (doDivFlag) { + getFlagReg(flagReg1, fsInstance); + if (!doStatus) { getFlagReg(flagReg2, fpsInstance); MOV16RtoR(flagReg1, flagReg2); } + MOV32RtoR(gprT1, flagReg1); + AND32ItoR(gprT1, 0xffff0fcf); + OR32MtoR (gprT1, (uptr)&mVU->divFlag); + MOV32RtoR(flagReg1, gprT1); + } } //------------------------------------------------------------------ @@ -243,13 +244,17 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { microVU* mVU = mVUx; u8* thisPtr = x86Ptr; + if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUlog("microVU: invalid startPC"); } + //mVUlog("mVUcompile Search"); + // Searches for Existing Compiled Block (if found, then returns; else, compile) microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState); if (pBlock) { return pBlock->x86ptrStart; } + + mVUlog("mVUcompile First Pass"); // First Pass iPC = startPC / 4; - setCode(); mVUbranch = 0; mVUstartPC = iPC; mVUcount = 0; @@ -257,13 +262,17 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { mVU->p = 0; // All blocks start at p index #0 mVU->q = 0; // All blocks start at q index #0 memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info + mVUblock.x86ptrStart = thisPtr; + pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager + for (int branch = 0;; ) { + incPC(1); startLoop(); mVUopU(); if (curI & _Ebit_) { branch = 1; } if (curI & _MDTbit_) { branch = 2; } - if (curI & _Ibit_) { incPC(1); mVUinfo |= _isNOP; } - else { incPC(1); mVUopL(); } + if (curI & _Ibit_) { mVUinfo |= _isNOP; } + else { incPC(-1); mVUopL(); incPC(1); } mVUsetCycles(); if (mVU->p) { mVUinfo |= _readP; } if (mVU->q) { mVUinfo |= _readQ; } @@ -276,86 +285,107 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { mVUcount++; } + mVUlog("mVUcompile mVUsetFlags"); + // Sets Up Flag instances int bStatus[4]; int bMac[4]; mVUsetFlags(bStatus, bMac); + mVUlog("mVUcompile Second Pass"); + + //write8(0xcc); + // Second Pass iPC = mVUstartPC; - setCode(); + mVUbranch = 0; + int test = 0; for (bool x = 1; x; ) { if (isEOB) { x = 0; } - if (isNOP) { doUpperOp(); if (curI & _Ibit_) { incPC(1); mVU->iReg = curI; } else { incPC(1); } } - else if (!swapOps) { doUpperOp(); incPC(1); mVUopL(); } - else { incPC(1); mVUopL(); incPC(-1); doUpperOp(); incPC(1); } + if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); mVU->iReg = curI; incPC(-1); } } + else if (!swapOps) { incPC(1); doUpperOp(); incPC(-1); mVUopL(); incPC(1); } + else { mVUopL(); incPC(1); doUpperOp(); } + test++; + if (test > 0x3ff) { mVUlog("microVU: Possible infinite compiling loop!"); x = 0; test = 0; } + if (!isBdelay) { incPC(1); } else { u32* ajmp = 0; switch (mVUbranch) { - case 3: branchCase(JZ32); // IBEQ - case 4: branchCase(JGE32); // IBGEZ - case 5: branchCase(JG32); // IBGTZ - case 6: branchCase(JLE32); // IBLEQ - case 7: branchCase(JL32); // IBLTZ - case 8: branchCase(JNZ32); // IBNEQ + case 3: branchCase(JZ32, JNZ32); // IBEQ + case 4: branchCase(JGE32, JNGE32); // IBGEZ + case 5: branchCase(JG32, JNG32); // IBGTZ + case 6: branchCase(JLE32, JNLE32); // IBLEQ + case 7: branchCase(JL32, JNL32); // IBLTZ + case 8: branchCase(JNZ32, JZ32); // IBNEQ case 1: case 2: // B/BAL - incPC(-2); // Go back to branch opcode (to get branch imm addr) + + mVUlog("mVUcompile B/BAL"); + incPC(-3); // Go back to branch opcode (to get branch imm addr) mVUsetupBranch(bStatus, bMac); // Check if branch-block has already been compiled pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); - if (pBlock) { - ajmp = JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); - mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager - } - else { - pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add block - if (!vuIndex) mVUcompileVU0(branchAddr, (uptr)&pBlock->pState); - else mVUcompileVU1(branchAddr, (uptr)&pBlock->pState); - } - //incPC(+2); + if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); } + else if (!vuIndex) { mVUcompileVU0(branchAddr, (uptr)&mVUregs); } + else { mVUcompileVU1(branchAddr, (uptr)&mVUregs); } return thisPtr; case 9: case 10: // JR/JALR - + + mVUlog("mVUcompile JR/JALR"); + memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); mVUsetupBranch(bStatus, bMac); PUSH32R(gprR); // Backup EDX - //MOV32MtoR(gprT1, (uptr)&mVUcurProg.x86ptr); // Get last x86ptr for this program - //MOV32RtoM((uptr)&x86Ptr, gprT1); // Setup x86Ptr to write to correct address - MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) - AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address - pBlock = mVUblocks[startPC/8]->add(&mVUblock); // Add this block to block manager - MOV32ItoR(gprR, (u32)&pBlock->pState); // Get pState (EDX second argument for __fastcall) + MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) + AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address + MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall) - if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) + if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) else CALLFunc((uptr)mVUcompileVU1); POP32R(gprR); // Restore EDX JMPR(gprT1); // Jump to rec-code address return thisPtr; } - - - + // Conditional Branches + mVUlog("mVUcompile conditional branch"); + if (pBlock) { // Branch non-taken has already been compiled + incPC(-3); // Go back to branch opcode (to get branch imm addr) + // Check if branch-block has already been compiled + pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); + if (pBlock) { JMP32((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 5)); } + else if (!vuIndex) { mVUcompileVU0(branchAddr, (uptr)&mVUregs); } + else { mVUcompileVU1(branchAddr, (uptr)&mVUregs); } + } + else { + uptr jumpAddr; + incPC(1); // Get PC for branch not-taken + if (!vuIndex) mVUcompileVU0(xPC, (uptr)&mVUregs); + else mVUcompileVU1(xPC, (uptr)&mVUregs); + incPC(-4); // Go back to branch opcode (to get branch imm addr) + if (!vuIndex) jumpAddr = (uptr)mVUcompileVU0(branchAddr, (uptr)&mVUregs); + else jumpAddr = (uptr)mVUcompileVU1(branchAddr, (uptr)&mVUregs); + *ajmp = (jumpAddr - ((uptr)ajmp + 4)); + } return thisPtr; } } + mVUlog("mVUcompile ebit"); // Do E-bit end stuff here incCycles(55); // Ensures Valid P/Q instances mVUcycles -= 55; if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); - - //MOV32ItoM((uptr)&mVU->p, mVU->p); - //MOV32ItoM((uptr)&mVU->q, mVU->q); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); AND32ItoM((uptr)µVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif - MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC], xPC); + MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC); JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); + + //ToDo: Save pipeline state? return thisPtr; } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 205fdf0047..1fdf4c7929 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -22,6 +22,8 @@ // Dispatcher Functions //------------------------------------------------------------------ +void testFunction() { mVUlog("microVU: Entered Execution Mode"); } + // Generates the code for entering recompiled blocks microVUt(void) mVUdispatcherA() { static u32 PCSX2_ALIGNED16(vuMXCSR); @@ -43,9 +45,9 @@ microVUt(void) mVUdispatcherA() { SSE_LDMXCSR((uptr)&vuMXCSR); // Load Regs - MOV32MtoR(gprR, (uptr)&mVU->regs->VI[REG_R]); - MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG]); - MOV32MtoR(gprF1, (uptr)&mVU->regs->VI[REG_MAC_FLAG]); + MOV32MtoR(gprR, (uptr)&mVU->regs->VI[REG_R].UL); + MOV32MtoR(gprF0, (uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL); + MOV32MtoR(gprF1, (uptr)&mVU->regs->VI[REG_MAC_FLAG].UL); SHL32ItoR(gprF0, 16); AND32ItoR(gprF1, 0xffff); OR32RtoR (gprF0, gprF1); @@ -54,16 +56,21 @@ microVUt(void) mVUdispatcherA() { MOV32RtoR(gprF3, gprF0); for (int i = 0; i < 8; i++) { - MOVQMtoR(i, (uptr)&mVU->regs->VI[i+1]); + MOVQMtoR(i, (uptr)&mVU->regs->VI[i+1].UL); } - SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC); - SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)&mVU_maxvals[0]); - SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)&mVU_minvals[0]); - SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P]); - SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q]); + SSE_MOVAPS_M128_to_XMM(xmmACC, (uptr)&mVU->regs->ACC.UL[0]); + SSE_MOVAPS_M128_to_XMM(xmmMax, (uptr)mVU_maxvals); + SSE_MOVAPS_M128_to_XMM(xmmMin, (uptr)mVU_minvals); + SSE_MOVAPS_M128_to_XMM(xmmT1, (uptr)&mVU->regs->VI[REG_P].UL); + SSE_MOVAPS_M128_to_XMM(xmmPQ, (uptr)&mVU->regs->VI[REG_Q].UL); SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ + //PUSH32R(EAX); + //CALLFunc((uptr)testFunction); + //POP32R(EAX); + //write8(0xcc); + // Jump to Recompiled Code Block JMPR(EAX); } @@ -86,18 +93,18 @@ microVUt(void) mVUdispatcherB() { MOV32RtoR(gprT1, gprF0); // ToDo: Ensure Correct Flag instances AND32ItoR(gprT1, 0xffff); SHR32ItoR(gprF0, 16); - MOV32RtoM((uptr)&mVU->regs->VI[REG_R], gprR); - MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG], gprT1); - MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG], gprF0); + MOV32RtoM((uptr)&mVU->regs->VI[REG_R].UL, gprR); + MOV32RtoM((uptr)&mVU->regs->VI[REG_STATUS_FLAG].UL, gprT1); + MOV32RtoM((uptr)&mVU->regs->VI[REG_MAC_FLAG].UL, gprF0); for (int i = 0; i < 8; i++) { - MOVDMMXtoM((uptr)&mVU->regs->VI[i+1], i); + MOVDMMXtoM((uptr)&mVU->regs->VI[i+1].UL, i); } - SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC, xmmACC); - //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q], xmmPQ); // ToDo: Ensure Correct Q/P instances + SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC.UL[0], xmmACC); + //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); // ToDo: Ensure Correct Q/P instances //SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP - //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P], xmmPQ); + //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); // Restore cpu state POP32R(EDI); @@ -105,6 +112,8 @@ microVUt(void) mVUdispatcherB() { POP32R(EBP); POP32R(EBX); + //write8(0xcc); + EMMS(); RET(); @@ -119,7 +128,7 @@ microVUt(void) mVUdispatcherB() { microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVU* mVU = mVUx; - mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); + //mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); // ToDo: Implement Cycles mVUsearchProg(mVU); // Find and set correct program @@ -135,6 +144,7 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVUt(void) mVUcleanUp() { microVU* mVU = mVUx; + //mVUlog("microVU: Program exited successfully!"); mVUcurProg.x86ptr = x86Ptr; mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); } @@ -147,7 +157,7 @@ void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.start void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.startFunct)(startPC, cycles); } void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); } void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); } -void mVUcleanUpVU0() { mVUcleanUp<0>(); } -void mVUcleanUpVU1() { mVUcleanUp<1>(); } +void __fastcall mVUcleanUpVU0() { mVUcleanUp<0>(); } +void __fastcall mVUcleanUpVU1() { mVUcleanUp<1>(); } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 35827af0ed..ef63010029 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -415,7 +415,7 @@ microVUf(void) mVU_ESUM() { microVUf(void) mVU_FCAND() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) {mVUlog("clip broken");} else { mVUallocCFLAGa(gprT1, fvcInstance); AND32ItoR(gprT1, _Imm24_); @@ -427,7 +427,7 @@ microVUf(void) mVU_FCAND() { microVUf(void) mVU_FCEQ() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) {mVUlog("clip broken");} else { mVUallocCFLAGa(gprT1, fvcInstance); XOR32ItoR(gprT1, _Imm24_); @@ -439,7 +439,7 @@ microVUf(void) mVU_FCEQ() { microVUf(void) mVU_FCGET() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) {mVUlog("clip broken");} else { mVUallocCFLAGa(gprT1, fvcInstance); AND32ItoR(gprT1, 0xfff); @@ -449,7 +449,7 @@ microVUf(void) mVU_FCGET() { microVUf(void) mVU_FCOR() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) {mVUlog("clip broken");} else { mVUallocCFLAGa(gprT1, fvcInstance); OR32ItoR(gprT1, _Imm24_); @@ -461,7 +461,7 @@ microVUf(void) mVU_FCOR() { microVUf(void) mVU_FCSET() { microVU* mVU = mVUx; - if (!recPass) {} + if (!recPass) {mVUlog("clip broken");} else { MOV32ItoR(gprT1, _Imm24_); mVUallocCFLAGb(gprT1, fcInstance); @@ -1007,7 +1007,7 @@ microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } else { - MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->top); + MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->top); mVUallocVIb(gprT1, _Ft_); } } @@ -1016,7 +1016,7 @@ microVUf(void) mVU_XITOP() { microVU* mVU = mVUx; if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } else { - MOVZX32M16toR( gprT1, (uptr)&mVU->regs->vifRegs->itop ); + MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->itop); mVUallocVIb(gprT1, _Ft_); } } @@ -1025,16 +1025,13 @@ microVUf(void) mVU_XITOP() { // XGkick //------------------------------------------------------------------ -microVUt(void) __fastcall mVU_XGKICK_(u32 addr) { - microVU* mVU = mVUx; - u32 *data = (u32*)(mVU->regs->Mem + (addr&0x3fff)); +void __fastcall mVU_XGKICK_(u32 addr) { + u32 *data = (u32*)(microVU1.regs->Mem + (addr&0x3fff)); u32 size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff)) >> 4); u8 *pDest = mtgsThread->GetDataPacketPtr(); - memcpy_aligned(pDest, mVU->regs->Mem + addr, size<<4); + memcpy_aligned(pDest, microVU1.regs->Mem + addr, size<<4); mtgsThread->SendDataPacket(); } -void __fastcall mVU_XGKICK0(u32 addr) { mVU_XGKICK_<0>(addr); } -void __fastcall mVU_XGKICK1(u32 addr) { mVU_XGKICK_<1>(addr); } microVUf(void) mVU_XGKICK() { microVU* mVU = mVUx; @@ -1042,8 +1039,7 @@ microVUf(void) mVU_XGKICK() { else { mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall PUSH32R(gprR); // gprR = EDX is volatile so backup - if (!vuIndex) CALLFunc((uptr)mVU_XGKICK0); - else CALLFunc((uptr)mVU_XGKICK1); + CALLFunc((uptr)mVU_XGKICK_); POP32R(gprR); // Restore } } diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 26034fa598..13d0d1814a 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -169,7 +169,7 @@ microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) { break; // XYW case 14: SSE_MOVLPS_XMM_to_Rm(gprReg, reg, offset); SSE_MOVHLPS_XMM_to_XMM(reg, reg); - SSE_MOVSS_XMM_to_Rm(gprReg, xmmT1, offset+8); + SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+8); break; // XYZ case 8: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset); break; // X case 4: SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); break; // Y @@ -242,19 +242,19 @@ microVUx(void) mVUmergeRegs(int dest, int src, int xyzw) { // Transforms the Address in gprReg to valid VU0/VU1 Address microVUt(void) mVUaddrFix(int gprReg) { if ( vuIndex == 1 ) { - AND32ItoR(EAX, 0x3ff); // wrap around - SHL32ItoR(EAX, 4); + AND32ItoR(gprReg, 0x3ff); // wrap around + SHL32ItoR(gprReg, 4); } else { u8 *jmpA, *jmpB; - CMP32ItoR(EAX, 0x400); + CMP32ItoR(gprReg, 0x400); jmpA = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs - AND32ItoR(EAX, 0x43f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! + AND32ItoR(gprReg, 0x43f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs! jmpB = JMP8(0); x86SetJ8(jmpA); - AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around + AND32ItoR(gprReg, 0xff); // if addr < 0x4000, wrap around x86SetJ8(jmpB); - SHL32ItoR(EAX, 4); // multiply by 16 (shift left by 4) + SHL32ItoR(gprReg, 4); // multiply by 16 (shift left by 4) } } diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index a8ae64ace7..ab084d824b 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -584,7 +584,7 @@ microVUf(void) mVU_ITOF12() { mVU_ITOFx((uptr)mVU_ITOF_12); } microVUf(void) mVU_ITOF15() { mVU_ITOFx((uptr)mVU_ITOF_15); } microVUf(void) mVU_CLIP() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFMAC4(_Fs_, _Ft_); } + if (!recPass) { mVUanalyzeFMAC4(_Fs_, _Ft_); mVUlog("clip broken"); } else { int Fs, Ft; mVUallocFMAC17a(Fs, Ft); From 6f99ec45b97f49f452cf5c5da9d5f5fdb1388224 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sun, 19 Apr 2009 02:14:50 +0000 Subject: [PATCH 095/143] Emitter: Fixed a small bug in the ModSib emitter logic which caused some forms of ESP to encode incorrectly. Implemented CALL/BSF/BSR [x86], MOVQ, MOVD, and MOVSS [mmx/xmm]. Renamed ix86_mmx.cpp and i86_sse.cpp to ix86_legacy_mmx.cpp and i8x_legacy_sse.cpp. iMMI.cpp: Removed mucho dead pre-SSE2 code. (old code can be grabbed from a tagged revision, in case it's useful for future reference). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1011 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/PS2Etypes.h | 2 + pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 32 +- pcsx2/x86/iMMI.cpp | 1540 ++++------------- pcsx2/x86/iR5900.h | 13 - pcsx2/x86/ix86/Makefile.am | 4 +- pcsx2/x86/ix86/implement/bittest.h | 72 +- pcsx2/x86/ix86/implement/dwshift.h | 6 + pcsx2/x86/ix86/implement/group2.h | 14 +- pcsx2/x86/ix86/implement/incdec.h | 7 +- pcsx2/x86/ix86/implement/jmpcall.h | 85 + pcsx2/x86/ix86/implement/movs.h | 32 +- pcsx2/x86/ix86/implement/test.h | 2 +- pcsx2/x86/ix86/implement/xmm/movqss.h | 109 ++ pcsx2/x86/ix86/ix86.cpp | 120 +- pcsx2/x86/ix86/ix86_inlines.inl | 30 +- pcsx2/x86/ix86/ix86_instructions.h | 161 +- pcsx2/x86/ix86/ix86_jmp.cpp | 33 +- pcsx2/x86/ix86/ix86_legacy.cpp | 44 +- pcsx2/x86/ix86/ix86_legacy_instructions.h | 3 - .../{ix86_mmx.cpp => ix86_legacy_mmx.cpp} | 72 +- .../{ix86_sse.cpp => ix86_legacy_sse.cpp} | 59 +- pcsx2/x86/ix86/ix86_types.h | 277 ++- 22 files changed, 1090 insertions(+), 1627 deletions(-) create mode 100644 pcsx2/x86/ix86/implement/jmpcall.h create mode 100644 pcsx2/x86/ix86/implement/xmm/movqss.h rename pcsx2/x86/ix86/{ix86_mmx.cpp => ix86_legacy_mmx.cpp} (85%) rename pcsx2/x86/ix86/{ix86_sse.cpp => ix86_legacy_sse.cpp} (96%) diff --git a/common/include/PS2Etypes.h b/common/include/PS2Etypes.h index 6f652e8736..474eb50eff 100644 --- a/common/include/PS2Etypes.h +++ b/common/include/PS2Etypes.h @@ -86,6 +86,7 @@ typedef unsigned int uint; #define __naked __declspec(naked) #define __unused /*unused*/ +#define __noinline __declspec(noinline) #define CALLBACK __stdcall #else // _MSC_VER @@ -134,6 +135,7 @@ typedef union _LARGE_INTEGER #define __unused __attribute__((unused)) #define _inline __inline__ __attribute__((unused)) #define __forceinline __attribute__((always_inline,unused)) +#define __noinline __attribute__((noinline)) #define __naked // GCC lacks the naked specifier #define CALLBACK // CALLBACK is win32-specific mess diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 6df72ec1ea..cd443e42ad 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2953,18 +2953,18 @@ RelativePath="..\..\x86\ix86\ix86_legacy_internal.h" > + + + + - - - - @@ -3004,6 +3004,10 @@ RelativePath="..\..\x86\ix86\implement\incdec.h" > + + @@ -3012,12 +3016,16 @@ RelativePath="..\..\x86\ix86\implement\test.h" > + + + + - - diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index e031a7a6c6..aebb01ff67 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -182,7 +182,7 @@ void recPMFHL() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READLO|XMMINFO_READHI) + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READLO|XMMINFO_READHI ); int t0reg; @@ -248,16 +248,14 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READLO|XMMINFO_READHI) assert(0); } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PMFHL, _Rd_ ); + _clearNeededXMMregs(); } void recPMTHL() { if ( _Sa_ != 0 ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); if ( cpucaps.hasStreamingSIMD4Extensions ) { SSE4_BLENDPS_XMM_to_XMM(EEREC_LO, EEREC_S, 0x5); @@ -271,9 +269,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRIT SSE_SHUFPS_XMM_to_XMM(EEREC_HI, EEREC_HI, 0x72); } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PMTHL, 0 ); + _clearNeededXMMregs(); } // MMX helper routines @@ -322,28 +318,15 @@ void recPSRLH( void ) { if ( !_Rd_ ) return; - CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) - if( (_Sa_&0xf) == 0 ) { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - } - else { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - SSE2_PSRLW_I8_to_XMM(EEREC_D,_Sa_&0xf ); - } - CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSRLWItoR( t0reg, _Sa_&0xf ); - PSRLWItoR( t1reg, _Sa_&0xf ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + if( (_Sa_&0xf) == 0 ) { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + } + else { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_PSRLW_I8_to_XMM(EEREC_D,_Sa_&0xf ); + } + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -351,28 +334,15 @@ void recPSRLW( void ) { if( !_Rd_ ) return; - CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) - if( _Sa_ == 0 ) { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - } - else { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - SSE2_PSRLD_I8_to_XMM(EEREC_D,_Sa_ ); - } - CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSRLDItoR( t0reg, _Sa_ ); - PSRLDItoR( t1reg, _Sa_ ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + if( _Sa_ == 0 ) { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + } + else { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_PSRLD_I8_to_XMM(EEREC_D,_Sa_ ); + } + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -380,28 +350,15 @@ void recPSRAH( void ) { if ( !_Rd_ ) return; - CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) - if( (_Sa_&0xf) == 0 ) { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - } - else { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - SSE2_PSRAW_I8_to_XMM(EEREC_D,_Sa_&0xf ); - } - CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSRAWItoR( t0reg, _Sa_&0xf ); - PSRAWItoR( t1reg, _Sa_&0xf ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + if( (_Sa_&0xf) == 0 ) { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + } + else { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_PSRAW_I8_to_XMM(EEREC_D,_Sa_&0xf ); + } + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -409,28 +366,15 @@ void recPSRAW( void ) { if ( !_Rd_ ) return; - CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) - if( _Sa_ == 0 ) { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - } - else { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - SSE2_PSRAD_I8_to_XMM(EEREC_D,_Sa_ ); - } - CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSRADItoR( t0reg, _Sa_ ); - PSRADItoR( t1reg, _Sa_ ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + if( _Sa_ == 0 ) { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + } + else { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_PSRAD_I8_to_XMM(EEREC_D,_Sa_ ); + } + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -438,28 +382,15 @@ void recPSLLH( void ) { if ( !_Rd_ ) return; - CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) - if( (_Sa_&0xf) == 0 ) { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - } - else { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - SSE2_PSLLW_I8_to_XMM(EEREC_D,_Sa_&0xf ); - } - CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSLLWItoR( t0reg, _Sa_&0xf ); - PSLLWItoR( t1reg, _Sa_&0xf ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + if( (_Sa_&0xf) == 0 ) { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + } + else { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_PSLLW_I8_to_XMM(EEREC_D,_Sa_&0xf ); + } + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -467,28 +398,15 @@ void recPSLLW( void ) { if ( !_Rd_ ) return; - CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) - if( _Sa_ == 0 ) { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - } - else { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - SSE2_PSLLD_I8_to_XMM(EEREC_D,_Sa_ ); - } - CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSLLDItoR( t0reg, _Sa_ ); - PSLLDItoR( t1reg, _Sa_ ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); + if( _Sa_ == 0 ) { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + } + else { + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_PSLLD_I8_to_XMM(EEREC_D,_Sa_ ); + } + _clearNeededXMMregs(); } /* @@ -550,7 +468,7 @@ void recPMAXW() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if ( cpucaps.hasStreamingSIMD4Extensions ) { if( EEREC_S == EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); else if( EEREC_D == EEREC_S ) SSE4_PMAXSD_XMM_to_XMM(EEREC_D, EEREC_T); @@ -593,9 +511,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); } } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PMAXW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -603,51 +519,39 @@ void recPPACW() { if ( ! _Rd_ ) return; -CPU_SSE_XMMCACHE_START(((_Rs_!=0)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) - if( _Rs_ == 0 ) { + int info = eeRecompileCodeXMM( ((_Rs_!=0)?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); + + if( _Rs_ == 0 ) { + SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88); + SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 8); + } + else { + int t0reg = _allocTempXMMreg(XMMT_INT, -1); + if( EEREC_D == EEREC_T ) { + SSE2_PSHUFD_XMM_to_XMM(t0reg, EEREC_S, 0x88); SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88); - SSE2_PSRLDQ_I8_to_XMM(EEREC_D, 8); + SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, t0reg); + _freeXMMreg(t0reg); } else { - int t0reg = _allocTempXMMreg(XMMT_INT, -1); - if( EEREC_D == EEREC_T ) { - SSE2_PSHUFD_XMM_to_XMM(t0reg, EEREC_S, 0x88); - SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0x88); - SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, t0reg); - _freeXMMreg(t0reg); - } - else { - SSE2_PSHUFD_XMM_to_XMM(t0reg, EEREC_T, 0x88); - SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_S, 0x88); - SSE2_PUNPCKLQDQ_XMM_to_XMM(t0reg, EEREC_D); + SSE2_PSHUFD_XMM_to_XMM(t0reg, EEREC_T, 0x88); + SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_S, 0x88); + SSE2_PUNPCKLQDQ_XMM_to_XMM(t0reg, EEREC_D); - // swap mmx regs.. don't ask - xmmregs[t0reg] = xmmregs[EEREC_D]; - xmmregs[EEREC_D].inuse = 0; - } + // swap mmx regs.. don't ask + xmmregs[t0reg] = xmmregs[EEREC_D]; + xmmregs[EEREC_D].inuse = 0; } -CPU_SSE_XMMCACHE_END + } - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - //Done - Refraction - Crude but quicker than int - MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[2]); //Copy this one cos it could get overwritten - - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UL[2]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[3], EAX); - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UL[0]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[2], EAX); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[1], ECX); //This is where we bring it back - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); + _clearNeededXMMregs(); } void recPPACH( void ) { if (!_Rd_) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { SSE2_PSHUFLW_XMM_to_XMM(EEREC_D, EEREC_T, 0x88); SSE2_PSHUFHW_XMM_to_XMM(EEREC_D, EEREC_D, 0x88); @@ -667,28 +571,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - //Done - Refraction - Crude but quicker than int - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[6]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[7], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].US[6]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[3], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[2]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[5], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].US[2]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[1], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[4]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[6], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].US[4]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[2], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[0]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[4], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].US[0]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[0], EAX); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -696,7 +579,7 @@ void recPPACB() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _hasFreeXMMreg() ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -728,9 +611,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) SSE2_PACKUSWB_XMM_to_XMM(EEREC_D, t0reg); _freeXMMreg(t0reg); } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PPACB, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -738,7 +619,7 @@ void recPEXT5() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -766,9 +647,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); _freeXMMreg(t1reg); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PEXT5, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -776,7 +655,7 @@ void recPPAC5() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -806,9 +685,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); _freeXMMreg(t1reg); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PPAC5, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -816,29 +693,14 @@ void recPMAXH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PMAXSW_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PMAXSW_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PMAXSW_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - SSE_PMAXSW_MM_to_MM( t0reg, t1reg ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - SSE_PMAXSW_MM_to_MM( t2reg, t3reg); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t2reg); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -846,7 +708,7 @@ void recPCGTB( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D != EEREC_T ) { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PCMPGTB_XMM_to_XMM(EEREC_D, EEREC_T); @@ -858,24 +720,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSE2_PCMPGTB_XMM_to_XMM(EEREC_D, t0reg); _freeXMMreg(t0reg); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - PCMPGTBRtoR( t0reg, t1reg ); - - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PCMPGTBRtoR( t2reg, t3reg); - - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t2reg); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -883,7 +728,7 @@ void recPCGTH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D != EEREC_T ) { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PCMPGTW_XMM_to_XMM(EEREC_D, EEREC_T); @@ -895,24 +740,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSE2_PCMPGTW_XMM_to_XMM(EEREC_D, t0reg); _freeXMMreg(t0reg); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - - PCMPGTWRtoR( t0reg, t1reg ); - PCMPGTWRtoR( t2reg, t3reg); - - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t2reg); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -921,7 +749,7 @@ void recPCGTW( void ) //TODO:optimize RS | RT== 0 if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D != EEREC_T ) { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PCMPGTD_XMM_to_XMM(EEREC_D, EEREC_T); @@ -933,24 +761,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSE2_PCMPGTD_XMM_to_XMM(EEREC_D, t0reg); _freeXMMreg(t0reg); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - - PCMPGTDRtoR( t0reg, t1reg ); - PCMPGTDRtoR( t2reg, t3reg); - - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t2reg); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -958,29 +769,14 @@ void recPADDSB( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PADDSB_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PADDSB_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PADDSB_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PADDSBRtoR( t0reg, t2reg); - PADDSBRtoR( t1reg, t3reg); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -988,29 +784,14 @@ void recPADDSH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PADDSW_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PADDSW_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PADDSW_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PADDSWRtoR( t0reg, t2reg); - PADDSWRtoR( t1reg, t3reg); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1019,7 +800,7 @@ void recPADDSW( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t2reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1063,16 +844,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); _freeXMMreg(t1reg); _freeXMMreg(t2reg); -CPU_SSE_XMMCACHE_END - - if( _Rd_ ) _deleteEEreg(_Rd_, 0); - _deleteEEreg(_Rs_, 1); - _deleteEEreg(_Rt_, 1); - _flushConstRegs(); - - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - CALLFunc( (uptr)R5900::Interpreter::OpcodeImpl::MMI::PADDSW ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1080,7 +852,7 @@ void recPSUBSB( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PSUBSB_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1093,22 +865,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PSUBSB_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSUBSBRtoR( t0reg, t2reg); - PSUBSBRtoR( t1reg, t3reg); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1116,7 +873,7 @@ void recPSUBSH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PSUBSW_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1129,22 +886,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PSUBSW_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSUBSWRtoR( t0reg, t2reg); - PSUBSWRtoR( t1reg, t3reg); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1153,7 +895,7 @@ void recPSUBSW( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t2reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1200,16 +942,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); _freeXMMreg(t1reg); _freeXMMreg(t2reg); -CPU_SSE_XMMCACHE_END - - if( _Rd_ ) _deleteEEreg(_Rd_, 0); - _deleteEEreg(_Rs_, 1); - _deleteEEreg(_Rt_, 1); - _flushConstRegs(); - - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - CALLFunc( (uptr)R5900::Interpreter::OpcodeImpl::MMI::PSUBSW ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1217,29 +950,14 @@ void recPADDB( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PADDB_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PADDB_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PADDB_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PADDBRtoR( t0reg, t2reg ); - PADDBRtoR( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1247,7 +965,7 @@ void recPADDH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D); else SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); @@ -1263,22 +981,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMI SSE2_PADDW_XMM_to_XMM(EEREC_D, EEREC_T); } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PADDWRtoR( t0reg, t2reg ); - PADDWRtoR( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1286,7 +989,7 @@ void recPADDW( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D); else SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); @@ -1302,22 +1005,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMI SSE2_PADDD_XMM_to_XMM(EEREC_D, EEREC_T); } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PADDDRtoR( t0reg, t2reg ); - PADDDRtoR( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1325,7 +1013,7 @@ void recPSUBB( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PSUBB_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1338,22 +1026,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PSUBB_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSUBBRtoR( t0reg, t2reg ); - PSUBBRtoR( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1361,7 +1034,7 @@ void recPSUBH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PSUBW_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1374,22 +1047,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PSUBW_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSUBWRtoR( t0reg, t2reg ); - PSUBWRtoR( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1397,7 +1055,7 @@ void recPSUBW( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PSUBD_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1410,22 +1068,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PSUBD_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PSUBDRtoR( t0reg, t2reg); - PSUBDRtoR( t1reg, t3reg); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1433,7 +1076,7 @@ void recPEXTLW( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PSRLQ_I8_to_XMM(EEREC_D, 32); @@ -1452,27 +1095,14 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) SSE2_PUNPCKLDQ_XMM_to_XMM(EEREC_D, EEREC_S); } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ] ); - MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ] ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 3 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 2 ], ECX ); - - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ] ); - MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ], ECX ); + _clearNeededXMMregs(); } void recPEXTLB( void ) { if (!_Rd_) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { SSE2_PUNPCKLBW_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PSRLW_I8_to_XMM(EEREC_D, 8); @@ -1491,53 +1121,14 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) SSE2_PUNPCKLBW_XMM_to_XMM(EEREC_D, EEREC_S); } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - //Done - Refraction - Crude but quicker than int - //Console::WriteLn("PEXTLB"); - //Rs = cpuRegs.GPR.r[_Rs_]; Rt = cpuRegs.GPR.r[_Rt_]; - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[7]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[15], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[7]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[14], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[6]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[13], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[6]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[12], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[5]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[11], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[5]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[10], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[4]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[9], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[4]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[8], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[3]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[7], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[3]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[6], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[2]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[5], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[2]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[4], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[1]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[3], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[1]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[2], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[0]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[1], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[0]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[0], EAX); + _clearNeededXMMregs(); } void recPEXTLH( void ) { if (!_Rd_) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { SSE2_PUNPCKLWD_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PSRLD_I8_to_XMM(EEREC_D, 16); @@ -1556,28 +1147,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) SSE2_PUNPCKLWD_XMM_to_XMM(EEREC_D, EEREC_S); } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - //Done - Refraction - Crude but quicker than int - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[3]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[7], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].US[3]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[6], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[2]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[5], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].US[2]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[4], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[1]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[3], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].US[1]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[2], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[0]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[1], EAX); - MOV16MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].US[0]); - MOV16RtoM((uptr)&cpuRegs.GPR.r[_Rd_].US[0], EAX); + _clearNeededXMMregs(); } #endif @@ -1621,7 +1191,7 @@ void recPABSW() //needs clamping { if( !_Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); SSE2_PCMPEQD_XMM_to_XMM(t0reg, t0reg); SSE2_PSLLD_I8_to_XMM(t0reg, 31); @@ -1640,15 +1210,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) } SSE2_PXOR_XMM_to_XMM(EEREC_D, t0reg); //0x80000000 -> 0x7fffffff _freeXMMreg(t0reg); -CPU_SSE_XMMCACHE_END - - _deleteEEreg(_Rt_, 1); - _deleteEEreg(_Rd_, 0); - _flushConstRegs(); - - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - CALLFunc( (uptr)R5900::Interpreter::OpcodeImpl::MMI::PABSW ); + _clearNeededXMMregs(); } @@ -1657,7 +1219,7 @@ void recPABSH() { if( !_Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); SSE2_PCMPEQW_XMM_to_XMM(t0reg, t0reg); SSE2_PSLLW_I8_to_XMM(t0reg, 15); @@ -1676,15 +1238,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) } SSE2_PXOR_XMM_to_XMM(EEREC_D, t0reg); //0x8000 -> 0x7fff _freeXMMreg(t0reg); -CPU_SSE_XMMCACHE_END - - _deleteEEreg(_Rt_, 1); - _deleteEEreg(_Rd_, 0); - _flushConstRegs(); - - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - CALLFunc( (uptr)R5900::Interpreter::OpcodeImpl::MMI::PABSW ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1692,7 +1246,7 @@ void recPMINW() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if ( cpucaps.hasStreamingSIMD4Extensions ) { if( EEREC_S == EEREC_T ) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); else if( EEREC_D == EEREC_S ) SSE4_PMINSD_XMM_to_XMM(EEREC_D, EEREC_T); @@ -1735,9 +1289,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); } } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PMINW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1745,7 +1297,7 @@ void recPADSBH() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); int t0reg; if( EEREC_S == EEREC_T ) { @@ -1776,9 +1328,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PADSBH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1786,7 +1336,7 @@ void recPADDUW() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rt_ == 0 ) { if( _Rs_ == 0 ) { @@ -1824,9 +1374,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WR _freeXMMreg(t0reg); _freeXMMreg(t1reg); } - -CPU_SSE_XMMCACHE_END - recCall( Interp::PADDUW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1834,7 +1382,7 @@ void recPSUBUB() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PSUBUSB_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1847,9 +1395,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PSUBUSB_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PSUBUB, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1857,7 +1403,7 @@ void recPSUBUH() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PSUBUSW_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1870,9 +1416,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PSUBUSW_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PSUBUH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1880,7 +1424,7 @@ void recPSUBUW() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -1918,9 +1462,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) _freeXMMreg(t0reg); _freeXMMreg(t1reg); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PSUBUW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1928,7 +1470,7 @@ void recPEXTUH() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { SSE2_PUNPCKHWD_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PSRLD_I8_to_XMM(EEREC_D, 16); @@ -1947,9 +1489,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) SSE2_PUNPCKHWD_XMM_to_XMM(EEREC_D, EEREC_S); } } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PEXTUH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -1971,42 +1511,41 @@ void recQFSRV() if ( !_Rd_ ) return; //Console::WriteLn("recQFSRV()"); - CPU_SSE2_XMMCACHE_START( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED ) + int info = eeRecompileCodeXMM( XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED ); - u32 *ajmp[16]; - int i, j; - int t0reg = _allocTempXMMreg(XMMT_INT, -1); + u32 *ajmp[16]; + int i, j; + int t0reg = _allocTempXMMreg(XMMT_INT, -1); - SSE2_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); - SSE2_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); + SSE2_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_T); - MOV32MtoR(EAX, (uptr)&cpuRegs.sa); - SHL32ItoR(EAX, 4); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros) - AND32ItoR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) - ADD32ItoR(EAX, (uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes - JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases) + MOV32MtoR(EAX, (uptr)&cpuRegs.sa); + SHL32ItoR(EAX, 4); // Multiply SA bytes by 16 bytes (the amount of bytes in QFSRVhelper() macros) + AND32ItoR(EAX, 0xf0); // This can possibly be removed but keeping it incase theres garbage in SA (cottonvibes) + ADD32ItoR(EAX, (uptr)x86Ptr + 7); // ADD32 = 5 bytes, JMPR = 2 bytes + JMPR(EAX); // Jumps to a QFSRVhelper() case below (a total of 16 different cases) + + // Case 0: + QFSRVhelper0(); + + // Cases 1 to 15: + for (i = 1, j = 15; i < 16; i++, j--) { + QFSRVhelper(i, j); + } + + // Set jump addresses for the JMP32's in QFSRVhelper() + for (i = 1; i < 16; i++) { + x86SetJ32(ajmp[i]); + } + + // Concatenate the regs after appropriate shifts have been made + SSE2_POR_XMM_to_XMM(EEREC_D, t0reg); - // Case 0: - QFSRVhelper0(); + x86SetJ32(ajmp[0]); // Case 0 jumps to here (to skip the POR) + _freeXMMreg(t0reg); - // Cases 1 to 15: - for (i = 1, j = 15; i < 16; i++, j--) { - QFSRVhelper(i, j); - } - - // Set jump addresses for the JMP32's in QFSRVhelper() - for (i = 1; i < 16; i++) { - x86SetJ32(ajmp[i]); - } - - // Concatenate the regs after appropriate shifts have been made - SSE2_POR_XMM_to_XMM(EEREC_D, t0reg); - - x86SetJ32(ajmp[0]); // Case 0 jumps to here (to skip the POR) - _freeXMMreg(t0reg); - - CPU_SSE_XMMCACHE_END - //recCall( Interp::QFSRV, _Rd_ ); + _clearNeededXMMregs(); } @@ -2014,7 +1553,8 @@ void recPEXTUB( void ) { if (!_Rd_) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); + if( _Rs_ == 0 ) { SSE2_PUNPCKHBW_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PSRLW_I8_to_XMM(EEREC_D, 8); @@ -2033,44 +1573,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) SSE2_PUNPCKHBW_XMM_to_XMM(EEREC_D, EEREC_S); } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - //Done - Refraction - Crude but faster than int - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[8]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[0], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[8]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[1], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[9]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[2], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[9]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[3], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[10]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[4], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[10]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[5], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[11]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[6], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[11]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[7], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[12]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[8], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[12]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[9], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[13]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[10], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[13]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[11], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[14]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[12], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[14]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[13], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UC[15]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[14], EAX); - MOV8MtoR(EAX, (uptr)&cpuRegs.GPR.r[_Rs_].UC[15]); - MOV8RtoM((uptr)&cpuRegs.GPR.r[_Rd_].UC[15], EAX); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2078,7 +1581,7 @@ void recPEXTUW( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED ); if( _Rs_ == 0 ) { SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PSRLQ_I8_to_XMM(EEREC_D, 32); @@ -2097,20 +1600,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|XMMINFO_READT|XMMINFO_WRITED) SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_D, EEREC_S); } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UL[ 2 ] ); - MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 2 ] ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ], ECX ); - - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UL[ 3 ] ); - MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 3 ] ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 3 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 2 ], ECX ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2118,29 +1608,14 @@ void recPMINH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PMINSW_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PMINSW_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PMINSW_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - SSE_PMINSW_MM_to_MM( t0reg, t2reg ); - SSE_PMINSW_MM_to_MM( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2148,30 +1623,14 @@ void recPCEQB( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PCMPEQB_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PCMPEQB_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PCMPEQB_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PCMPEQBRtoR( t0reg, t2reg ); - PCMPEQBRtoR( t1reg, t3reg ); - - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2179,30 +1638,14 @@ void recPCEQH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PCMPEQW_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PCMPEQW_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PCMPEQW_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PCMPEQWRtoR( t0reg, t2reg ); - PCMPEQWRtoR( t1reg, t3reg ); - - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2210,30 +1653,14 @@ void recPCEQW( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PCMPEQD_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PCMPEQD_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PCMPEQD_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PCMPEQDRtoR( t0reg, t2reg ); - PCMPEQDRtoR( t1reg, t3reg ); - - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2241,7 +1668,7 @@ void recPADDUB( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rt_ ) { if( EEREC_D == EEREC_S ) SSE2_PADDUSB_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PADDUSB_XMM_to_XMM(EEREC_D, EEREC_S); @@ -2251,22 +1678,7 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED) } } else SSE2_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PADDUSBRtoR( t0reg, t2reg ); - PADDUSBRtoR( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2274,29 +1686,14 @@ void recPADDUH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) SSE2_PADDUSW_XMM_to_XMM(EEREC_D, EEREC_T); else if( EEREC_D == EEREC_T ) SSE2_PADDUSW_XMM_to_XMM(EEREC_D, EEREC_S); else { SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSE2_PADDUSW_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PADDUSWRtoR( t0reg, t2reg ); - PADDUSWRtoR( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } #endif @@ -2342,7 +1739,8 @@ void recPMADDW() recCall( Interp::PMADDW, _Rd_ ); return; } -CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI) + + int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88); SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} if( _Rd_ ) { @@ -2377,7 +1775,7 @@ CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMM } SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO); SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI); -CPU_SSE_XMMCACHE_END + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2386,7 +1784,7 @@ void recPSLLVW() if ( ! _Rd_ ) return; EEINST_SETSIGNEXT(_Rd_); -CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) { SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D); @@ -2444,8 +1842,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WR _freeXMMreg(t0reg); _freeXMMreg(t1reg); } -CPU_SSE_XMMCACHE_END - recCall( Interp::PSLLVW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2454,7 +1851,7 @@ void recPSRLVW() if ( ! _Rd_ ) return; EEINST_SETSIGNEXT(_Rd_); -CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) { SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D); @@ -2512,9 +1909,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WR _freeXMMreg(t0reg); _freeXMMreg(t1reg); } - -CPU_SSE_XMMCACHE_END - recCall( Interp::PSRLVW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2527,7 +1922,7 @@ void recPMSUBW() recCall( Interp::PMSUBW, _Rd_ ); return; } -CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI) + int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88); SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} if( _Rd_ ) { @@ -2568,7 +1963,7 @@ CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMM } SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO); SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI); -CPU_SSE_XMMCACHE_END + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2581,7 +1976,7 @@ void recPMULTW() recCall( Interp::PMULTW, _Rd_ ); return; } -CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); if( !_Rs_ || !_Rt_ ) { if( _Rd_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D); SSE2_PXOR_XMM_to_XMM(EEREC_LO, EEREC_LO); @@ -2613,7 +2008,7 @@ CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMM SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_LO, EEREC_LO); SSE4_PMOVSXDQ_XMM_to_XMM(EEREC_HI, EEREC_HI); } -CPU_SSE_XMMCACHE_END + _clearNeededXMMregs(); } //////////////////////////////////////////////////// void recPDIVW() @@ -2636,7 +2031,7 @@ PCSX2_ALIGNED16(int s_mask1[4]) = {~0, 0, ~0, 0}; //contains the upper multiplication result (before the addition with the lower multiplication result) void recPHMADH() { -CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); SSEX_MOVDQA_XMM_to_XMM(t0reg, EEREC_S); @@ -2671,14 +2066,12 @@ CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMI SSE_SHUFPS_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xd8); _freeXMMreg(t0reg); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PHMADH, _Rd_ ); + _clearNeededXMMregs(); } void recPMSUBH() { - CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -2732,8 +2125,7 @@ void recPMSUBH() _freeXMMreg(t0reg); _freeXMMreg(t1reg); -CPU_SSE_XMMCACHE_END - recCall( Interp::PMSUBH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2741,7 +2133,7 @@ CPU_SSE_XMMCACHE_END //it contains the NOT of the upper multiplication result (before the substraction of the lower multiplication result) void recPHMSBH() { -CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); SSE2_PCMPEQD_XMM_to_XMM(EEREC_LO, EEREC_LO); @@ -2768,9 +2160,7 @@ CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMI SSE_SHUFPS_XMM_to_XMM(EEREC_HI, EEREC_HI, 0xd8); _freeXMMreg(t0reg); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PHMSBH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2778,12 +2168,10 @@ void recPEXEH( void ) { if (!_Rd_) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); SSE2_PSHUFLW_XMM_to_XMM(EEREC_D, EEREC_T, 0xc6); SSE2_PSHUFHW_XMM_to_XMM(EEREC_D, EEREC_D, 0xc6); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PEXEH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2792,12 +2180,10 @@ void recPREVH( void ) if (!_Rd_) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); SSE2_PSHUFLW_XMM_to_XMM(EEREC_D, EEREC_T, 0x1B); SSE2_PSHUFHW_XMM_to_XMM(EEREC_D, EEREC_D, 0x1B); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PREVH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -2805,7 +2191,7 @@ void recPINTH( void ) { if (!_Rd_) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED ); if( EEREC_D == EEREC_S ) { int t0reg = _allocTempXMMreg(XMMT_INT, -1); SSE_MOVHLPS_XMM_to_XMM(t0reg, EEREC_S); @@ -2817,80 +2203,30 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED) SSE_MOVLHPS_XMM_to_XMM(EEREC_D, EEREC_T); SSE2_PUNPCKHWD_XMM_to_XMM(EEREC_D, EEREC_S); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - //Done - Refraction - MOV16MtoR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[4]); - MOV16MtoR( EBX, (uptr)&cpuRegs.GPR.r[_Rt_].US[1]); - MOV16MtoR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].US[2]); - MOV16MtoR( EDX, (uptr)&cpuRegs.GPR.r[_Rt_].US[0]); - - MOV16RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].US[1], EAX); - MOV16RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].US[2], EBX); - MOV16RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].US[4], ECX); - MOV16RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].US[0], EDX); - - MOV16MtoR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].US[5]); - MOV16MtoR( EBX, (uptr)&cpuRegs.GPR.r[_Rs_].US[6]); - MOV16MtoR( ECX, (uptr)&cpuRegs.GPR.r[_Rs_].US[7]); - MOV16MtoR( EDX, (uptr)&cpuRegs.GPR.r[_Rt_].US[3]); - - MOV16RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].US[3], EAX); - MOV16RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].US[5], EBX); - MOV16RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].US[7], ECX); - MOV16RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].US[6], EDX); + _clearNeededXMMregs(); } void recPEXEW( void ) { if (!_Rd_) return; -CPU_SSE_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0xc6); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[2]); - MOV32MtoR( EBX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[1]); - MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); - MOV32MtoR( EDX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[3]); - - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[1], EBX); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[2], ECX); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[3], EDX); + _clearNeededXMMregs(); } void recPROT3W( void ) { if (!_Rd_) return; -CPU_SSE_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0xc9); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[1]); - MOV32MtoR( EBX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[2]); - MOV32MtoR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[0]); - MOV32MtoR( EDX, (uptr)&cpuRegs.GPR.r[_Rt_].UL[3]); - - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[1], EBX); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[2], ECX); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[3], EDX); + _clearNeededXMMregs(); } void recPMULTH( void ) { -CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_S); @@ -2920,102 +2256,16 @@ CPU_SSE2_XMMCACHE_START(XMMINFO_READS|XMMINFO_READT|(_Rd_?XMMINFO_WRITED:0)|XMMI SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_HI, t0reg); _freeXMMreg(t0reg); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - _deleteEEreg(XMMGPR_LO, 0); - _deleteEEreg(XMMGPR_HI, 0); - _deleteGPRtoXMMreg(_Rs_, 1); - _deleteGPRtoXMMreg(_Rt_, 1); - - if(!_Rt_ || !_Rs_) { - MOV32ItoM( (uptr)&cpuRegs.LO.UL[0], 0); - MOV32ItoM( (uptr)&cpuRegs.LO.UL[1], 0); - MOV32ItoM( (uptr)&cpuRegs.LO.UL[2], 0); - MOV32ItoM( (uptr)&cpuRegs.LO.UL[3], 0); - MOV32ItoM( (uptr)&cpuRegs.HI.UL[0], 0); - MOV32ItoM( (uptr)&cpuRegs.HI.UL[1], 0); - MOV32ItoM( (uptr)&cpuRegs.HI.UL[2], 0); - MOV32ItoM( (uptr)&cpuRegs.HI.UL[3], 0); - - if( _Rd_ ) { - MOV32ItoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[0], 0); - MOV32ItoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[1], 0); - MOV32ItoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[2], 0); - MOV32ItoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[3], 0); - } - return; - } - - //Done - Refraction - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[0]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[0]); - IMUL32RtoR( EAX, ECX); - MOV32RtoM( (uptr)&cpuRegs.LO.UL[0], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[1]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[1]); - IMUL32RtoR( EAX, ECX); - MOV32RtoM( (uptr)&cpuRegs.LO.UL[1], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[2]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[2]); - IMUL32RtoR( EAX, ECX); - MOV32RtoM( (uptr)&cpuRegs.HI.UL[0], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[3]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[3]); - IMUL32RtoR( EAX, ECX); - MOV32RtoM( (uptr)&cpuRegs.HI.UL[1], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[4]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[4]); - IMUL32RtoR( EAX, ECX); - MOV32RtoM( (uptr)&cpuRegs.LO.UL[2], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[5]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[5]); - IMUL32RtoR( EAX, ECX); - MOV32RtoM( (uptr)&cpuRegs.LO.UL[3], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[6]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[6]); - IMUL32RtoR( EAX, ECX); - MOV32RtoM( (uptr)&cpuRegs.HI.UL[2], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[7]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[7]); - IMUL32RtoR( EAX, ECX); - MOV32RtoM( (uptr)&cpuRegs.HI.UL[3], EAX); - - if (_Rd_) { - MOV32MtoR( EAX, (uptr)&cpuRegs.LO.UL[0]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); - MOV32MtoR( EAX, (uptr)&cpuRegs.HI.UL[0]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[1], EAX); - MOV32MtoR( EAX, (uptr)&cpuRegs.LO.UL[2]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[2], EAX); - MOV32MtoR( EAX, (uptr)&cpuRegs.HI.UL[2]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[3], EAX); - } + _clearNeededXMMregs(); } void recPMFHI( void ) { if ( ! _Rd_ ) return; -CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READHI) + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READHI ); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_HI); -CPU_SSE_XMMCACHE_END - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.HI.UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.HI.UD[ 1 ] ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3023,20 +2273,9 @@ void recPMFLO( void ) { if ( ! _Rd_ ) return; -CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READLO) + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READLO ); SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_LO); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.LO.UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.LO.UD[ 1 ] ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3044,7 +2283,7 @@ void recPAND( void ) { if ( ! _Rd_ ) return; -CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT) + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT ); if( EEREC_D == EEREC_T ) { SSEX_PAND_XMM_to_XMM(EEREC_D, EEREC_S); } @@ -3055,20 +2294,7 @@ CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSEX_PAND_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - PANDMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - PANDMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3076,7 +2302,7 @@ void recPXOR( void ) { if ( ! _Rd_ ) return; -CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT) + int info = eeRecompileCodeXMM( XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT ); if( EEREC_D == EEREC_T ) { SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_S); } @@ -3087,21 +2313,7 @@ CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT) SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_T); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - PXORMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - PXORMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3109,39 +2321,28 @@ void recPCPYLD( void ) { if ( ! _Rd_ ) return; -CPU_SSE_XMMCACHE_START(XMMINFO_WRITED|(( _Rs_== 0) ? 0:XMMINFO_READS)|XMMINFO_READT) - if( _Rs_ == 0 ) { - SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T); + int info = eeRecompileCodeXMM( XMMINFO_WRITED|(( _Rs_== 0) ? 0:XMMINFO_READS)|XMMINFO_READT ); + if( _Rs_ == 0 ) { + SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T); + } + else { + if( EEREC_D == EEREC_T ) SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_S); + else if( EEREC_S == EEREC_T ) SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_S, 0x44); + else if( EEREC_D == EEREC_S ) { + SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_D, 0x4e); } else { - if( EEREC_D == EEREC_T ) SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_S); - else if( EEREC_S == EEREC_T ) SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_S, 0x44); - else if( EEREC_D == EEREC_S ) { - SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_T); - SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_D, 0x4e); - } - else { - SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T); - SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_S); - } + SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_T); + SSE2_PUNPCKLQDQ_XMM_to_XMM(EEREC_D, EEREC_S); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t1reg ); - SetMMXstate(); - ) + } + _clearNeededXMMregs(); } void recPMADDH( void ) { - CPU_SSE2_XMMCACHE_START((_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( (_Rd_?XMMINFO_WRITED:0)|XMMINFO_READS|XMMINFO_READT|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI ); int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -3195,69 +2396,7 @@ void recPMADDH( void ) _freeXMMreg(t0reg); _freeXMMreg(t1reg); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - _deleteEEreg(XMMGPR_LO, 1); - _deleteEEreg(XMMGPR_HI, 1); - _deleteGPRtoXMMreg(_Rs_, 1); - _deleteGPRtoXMMreg(_Rt_, 1); - - if(_Rt_ && _Rs_){ - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[0]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[0]); - IMUL32RtoR( EAX, ECX); - ADD32RtoM( (uptr)&cpuRegs.LO.UL[0], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[1]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[1]); - IMUL32RtoR( EAX, ECX); - ADD32RtoM( (uptr)&cpuRegs.LO.UL[1], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[2]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[2]); - IMUL32RtoR( EAX, ECX); - ADD32RtoM( (uptr)&cpuRegs.HI.UL[0], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[3]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[3]); - IMUL32RtoR( EAX, ECX); - ADD32RtoM( (uptr)&cpuRegs.HI.UL[1], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[4]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[4]); - IMUL32RtoR( EAX, ECX); - ADD32RtoM( (uptr)&cpuRegs.LO.UL[2], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[5]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[5]); - IMUL32RtoR( EAX, ECX); - ADD32RtoM( (uptr)&cpuRegs.LO.UL[3], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[6]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[6]); - IMUL32RtoR( EAX, ECX); - ADD32RtoM( (uptr)&cpuRegs.HI.UL[2], EAX); - - MOVSX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[_Rs_].SS[7]); - MOVSX32M16toR( ECX, (uptr)&cpuRegs.GPR.r[_Rt_].SS[7]); - IMUL32RtoR( EAX, ECX); - ADD32RtoM( (uptr)&cpuRegs.HI.UL[3], EAX); - - } - - if (_Rd_) { - MOV32MtoR( EAX, (uptr)&cpuRegs.LO.UL[0]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); - MOV32MtoR( EAX, (uptr)&cpuRegs.HI.UL[0]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[1], EAX); - MOV32MtoR( EAX, (uptr)&cpuRegs.LO.UL[2]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[2], EAX); - MOV32MtoR( EAX, (uptr)&cpuRegs.HI.UL[2]); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[_Rd_].UL[3], EAX); - } + _clearNeededXMMregs(); } #endif @@ -3291,7 +2430,7 @@ void recPSRAVW() if ( ! _Rd_ ) return; EEINST_SETSIGNEXT(_Rd_); -CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) { SSEX_PXOR_XMM_to_XMM(EEREC_D, EEREC_D); @@ -3350,13 +2489,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WR _freeXMMreg(t1reg); } -CPU_SSE_XMMCACHE_END - - MOV32ItoM( (uptr)&cpuRegs.code, (u32)cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, (u32)pc ); - iFlushCall(FLUSH_EVERYTHING); - if( _Rd_ > 0 ) _deleteEEreg(_Rd_, 0); - CALLFunc( (uptr)R5900::Interpreter::OpcodeImpl::MMI::PSRAVW ); + _clearNeededXMMregs(); } @@ -3367,7 +2500,7 @@ void recPINTEH() { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WRITED ); int t0reg = -1; @@ -3410,9 +2543,7 @@ CPU_SSE2_XMMCACHE_START((_Rs_?XMMINFO_READS:0)|(_Rt_?XMMINFO_READT:0)|XMMINFO_WR } if( t0reg >= 0 ) _freeXMMreg(t0reg); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PINTEH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3421,7 +2552,7 @@ void recPMULTUW() if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_); EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rt_); -CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI ); if( !_Rs_ || !_Rt_ ) { if( _Rd_ ) SSE2_PXOR_XMM_to_XMM(EEREC_D, EEREC_D); SSE2_PXOR_XMM_to_XMM(EEREC_LO, EEREC_LO); @@ -3461,8 +2592,7 @@ CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMM _freeXMMreg(t0reg); } } -CPU_SSE_XMMCACHE_END - recCall( Interp::PMULTUW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3471,7 +2601,7 @@ void recPMADDUW() if( _Rd_ ) EEINST_SETSIGNEXT(_Rd_); EEINST_SETSIGNEXT(_Rs_); EEINST_SETSIGNEXT(_Rt_); -CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI) + int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI ); SSE_SHUFPS_XMM_to_XMM(EEREC_LO, EEREC_HI, 0x88); SSE2_PSHUFD_XMM_to_XMM(EEREC_LO, EEREC_LO, 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]} if( _Rd_ ) { @@ -3516,9 +2646,7 @@ CPU_SSE2_XMMCACHE_START((((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMM SSE2_PUNPCKHDQ_XMM_to_XMM(EEREC_HI, t0reg); _freeXMMreg(t0reg); } -CPU_SSE_XMMCACHE_END - - recCall( Interp::PMADDUW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3535,11 +2663,9 @@ void recPEXCW() { if (!_Rd_) return; -CPU_SSE_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_T, 0xd8); -CPU_SSE_XMMCACHE_END - -recCall( Interp::PEXCW, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3547,12 +2673,10 @@ void recPEXCH( void ) { if (!_Rd_) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); SSE2_PSHUFLW_XMM_to_XMM(EEREC_D, EEREC_T, 0xd8); SSE2_PSHUFHW_XMM_to_XMM(EEREC_D, EEREC_D, 0xd8); -CPU_SSE_XMMCACHE_END - - recCall( Interp::PEXCH, _Rd_ ); + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3560,7 +2684,7 @@ void recPNOR( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) { @@ -3605,110 +2729,60 @@ CPU_SSE2_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMI SSEX_PXOR_XMM_to_XMM( EEREC_D, t0reg ); _freeXMMreg(t0reg); } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP3( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - PORMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - PORMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - PCMPEQDRtoR( t2reg, t2reg ); - PXORRtoR( t0reg, t2reg ); - PXORRtoR( t1reg, t2reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// void recPMTHI( void ) { -CPU_SSE_XMMCACHE_START(XMMINFO_READS|XMMINFO_WRITEHI) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_WRITEHI ); SSEX_MOVDQA_XMM_to_XMM(EEREC_HI, EEREC_S); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(XMMGPR_HI, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQRtoM( (uptr)&cpuRegs.HI.UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.HI.UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// void recPMTLO( void ) { -CPU_SSE_XMMCACHE_START(XMMINFO_READS|XMMINFO_WRITELO) + int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_WRITELO ); SSEX_MOVDQA_XMM_to_XMM(EEREC_LO, EEREC_S); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(XMMGPR_LO, 0); - _deleteGPRtoXMMreg(_Rs_, 1); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQRtoM( (uptr)&cpuRegs.LO.UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.LO.UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// void recPCPYUD( void ) { - if ( ! _Rd_ ) return; + if ( ! _Rd_ ) return; -CPU_SSE_XMMCACHE_START(XMMINFO_READS|(( _Rt_ == 0) ? 0:XMMINFO_READT)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READS|(( _Rt_ == 0) ? 0:XMMINFO_READT)|XMMINFO_WRITED ); - if( _Rt_ == 0 ) { - if( EEREC_D == EEREC_S ) { - SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_S); - SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_D); - } - else { - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_D); - } + if( _Rt_ == 0 ) { + if( EEREC_D == EEREC_S ) { + SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_S); + SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_D); } else { - if( EEREC_D == EEREC_S ) SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_T); - else if( EEREC_D == EEREC_T ) { - //TODO - SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_S); - SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_D, 0x4e); + SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE2_MOVQ_XMM_to_XMM(EEREC_D, EEREC_D); + } + } + else { + if( EEREC_D == EEREC_S ) SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_T); + else if( EEREC_D == EEREC_T ) { + //TODO + SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_S); + SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_D, 0x4e); + } + else { + if( EEREC_S == EEREC_T ) { + SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_S, 0xee); } else { - if( EEREC_S == EEREC_T ) { - SSE2_PSHUFD_XMM_to_XMM(EEREC_D, EEREC_S, 0xee); - } - else { - SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); - SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_T); - } + SSEX_MOVDQA_XMM_to_XMM(EEREC_D, EEREC_S); + SSE2_PUNPCKHQDQ_XMM_to_XMM(EEREC_D, EEREC_T); } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + } + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3716,7 +2790,7 @@ void recPOR( void ) { if ( ! _Rd_ ) return; -CPU_SSE_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( (_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMINFO_WRITED ); if( _Rs_ == 0 ) { if( _Rt_ == 0 ) { @@ -3745,23 +2819,7 @@ CPU_SSE_XMMCACHE_START((_Rs_!=0?XMMINFO_READS:0)|(_Rt_!=0?XMMINFO_READT:0)|XMMIN } } } -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - MMX_ALLOC_TEMP2( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - if ( _Rt_ != 0 ) - { - PORMtoR ( t0reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - PORMtoR ( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - } - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) + _clearNeededXMMregs(); } //////////////////////////////////////////////////// @@ -3769,28 +2827,10 @@ void recPCPYH( void ) { if ( ! _Rd_ ) return; -CPU_SSE2_XMMCACHE_START(XMMINFO_READT|XMMINFO_WRITED) + int info = eeRecompileCodeXMM( XMMINFO_READT|XMMINFO_WRITED ); SSE2_PSHUFLW_XMM_to_XMM(EEREC_D, EEREC_T, 0); SSE2_PSHUFHW_XMM_to_XMM(EEREC_D, EEREC_D, 0); -CPU_SSE_XMMCACHE_END - - _flushCachedRegs(); - _deleteEEreg(_Rd_, 0); - - //PUSH32R( EBX ); - MOVZX32M16toR( EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOV32RtoR( ECX, EAX ); - SHL32ItoR( ECX, 16 ); - OR32RtoR( EAX, ECX ); - MOVZX32M16toR( EDX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - MOV32RtoR( ECX, EDX ); - SHL32ItoR( ECX, 16 ); - OR32RtoR( EDX, ECX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 0 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 1 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 2 ], EDX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UL[ 3 ], EDX ); - //POP32R( EBX ); + _clearNeededXMMregs(); } #endif // else MMI3_RECOMPILE diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 5f0644c073..a14f1cee3a 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -249,19 +249,6 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode #define XMMINFO_READACC 0x200 #define XMMINFO_WRITEACC 0x400 -#define CPU_SSE_XMMCACHE_START(xmminfo) \ - { \ - int info = eeRecompileCodeXMM(xmminfo); \ - -#define CPU_SSE2_XMMCACHE_START(xmminfo) \ - { \ - int info = eeRecompileCodeXMM(xmminfo); \ - -#define CPU_SSE_XMMCACHE_END \ - _clearNeededXMMregs(); \ - return; \ - } \ - #define FPURECOMPILE_CONSTCODE(fn, xmminfo) \ void rec##fn(void) \ { \ diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index caecac9851..c09946d951 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -2,7 +2,7 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include noinst_LIBRARIES = libix86.a libix86_a_SOURCES = \ -ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_mmx.cpp ix86_tools.cpp ix86_3dnow.cpp \ -ix86_legacy.cpp ix86_sse.cpp \ +ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_legacy_mmx.cpp ix86_tools.cpp ix86_3dnow.cpp \ +ix86_legacy.cpp ix86_legacy_sse.cpp \ ix86_internal.h ix86_legacy_instructions.h ix86_macros.h ix86_sse_helpers.h ix86.h ix86_legacy_internal.h \ ix86_instructions.h ix86_legacy_types.h ix86_types.h \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/bittest.h b/pcsx2/x86/ix86/implement/bittest.h index 4f147a7b82..a84e3cce6b 100644 --- a/pcsx2/x86/ix86/implement/bittest.h +++ b/pcsx2/x86/ix86/implement/bittest.h @@ -18,7 +18,7 @@ #pragma once -// Implementations found here: BTS/BT/BTC/BTR! +// Implementations found here: BTS/BT/BTC/BTR plus BSF/BSR! // Note: This header is meant to be included from within the x86Emitter::Internal namespace. // These instructions are in the 'Group8' as per Intel's manual, but since they all have @@ -46,6 +46,7 @@ protected: public: Group8Impl() {} // For the love of GCC. + // ------------------------------------------------------------------------ static __emitinline void Emit( const iRegister& bitbase, const iRegister& bitoffset ) { prefix16(); @@ -54,6 +55,7 @@ public: ModRM_Direct( bitoffset.Id, bitbase.Id ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( void* bitbase, const iRegister& bitoffset ) { prefix16(); @@ -62,6 +64,7 @@ public: iWriteDisp( bitoffset.Id, bitbase.Id ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibBase& bitbase, const iRegister& bitoffset ) { prefix16(); @@ -70,6 +73,7 @@ public: EmitSibMagic( bitoffset.Id, bitbase ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( const iRegister& bitbase, u8 immoffset ) { prefix16(); @@ -78,6 +82,7 @@ public: iWrite( immoffset ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibStrict& bitbase, u8 immoffset ) { prefix16(); @@ -115,3 +120,68 @@ public: Group8ImplAll() {} }; + + +////////////////////////////////////////////////////////////////////////////////////////// +// BSF / BSR -- 16/32 operands supported only. +// +template< bool isReverse, typename ImmType > +class BitScanImpl +{ +protected: + static const uint OperandSize = sizeof(ImmType); + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void emitbase() + { + prefix16(); + iWrite( 0x0f ); + iWrite( isReverse ? 0xbd : 0xbc ); + } + +public: + BitScanImpl() {} // For the love of GCC. + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const iRegister& from ) + { + emitbase(); + ModRM_Direct( to.Id, from.Id ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const void* src ) + { + emitbase(); + iWriteDisp( to.Id, src ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) + { + emitbase(); + EmitSibMagic( to.Id, sibsrc ); + } +}; + + +// ------------------------------------------------------------------- +// BSF/BSR -- 16 and 32 bit operand forms only! +// +template< bool isReverse > +class BitScanImplAll +{ +protected: + typedef BitScanImpl m_32; + typedef BitScanImpl m_16; + +public: + __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( to, from ); } + __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( to, from ); } + __forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( to, src ); } + __forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( to, src ); } + __noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } + __noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } + + BitScanImplAll() {} +}; + diff --git a/pcsx2/x86/ix86/implement/dwshift.h b/pcsx2/x86/ix86/implement/dwshift.h index fc32e81d60..7fb64f4b7a 100644 --- a/pcsx2/x86/ix86/implement/dwshift.h +++ b/pcsx2/x86/ix86/implement/dwshift.h @@ -45,6 +45,7 @@ protected: public: DwordShiftImpl() {} // because GCC doesn't like static classes + // ------------------------------------------------------------------------ static __emitinline void Emit( const iRegister& to, const iRegister& from ) { prefix16(); @@ -52,6 +53,7 @@ public: ModRM_Direct( from.Id, to.Id ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( const iRegister& to, const iRegister& from, u8 imm ) { if( imm == 0 ) return; @@ -61,12 +63,14 @@ public: write8( imm ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, __unused const iRegisterCL& clreg ) { basesibform(); EmitSibMagic( from.Id, sibdest ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, u8 imm ) { basesibform(); @@ -74,6 +78,7 @@ public: write8( imm ); } + // ------------------------------------------------------------------------ // dest data type is inferred from the 'from' register, so we can do void* resolution :) static __emitinline void Emit( void* dest, const iRegister& from, __unused const iRegisterCL& clreg ) { @@ -81,6 +86,7 @@ public: iWriteDisp( from.Id, dest ); } + // ------------------------------------------------------------------------ // dest data type is inferred from the 'from' register, so we can do void* resolution :) static __emitinline void Emit( void* dest, const iRegister& from, u8 imm ) { diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index 099aec32ee..f5767c282e 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -50,6 +50,7 @@ protected: public: Group2Impl() {} // For the love of GCC. + // ------------------------------------------------------------------------ static __emitinline void Emit( const iRegister& to ) { prefix16(); @@ -57,6 +58,7 @@ public: ModRM_Direct( InstType, to.Id ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( const iRegister& to, u8 imm ) { if( imm == 0 ) return; @@ -76,6 +78,7 @@ public: } } + // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibStrict& sibdest ) { prefix16(); @@ -83,6 +86,7 @@ public: EmitSibMagic( InstType, sibdest ); } + // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) { if( imm == 0 ) return; @@ -108,17 +112,7 @@ public: template< G2Type InstType > class Group2ImplAll { - // Inlining Notes: - // I've set up the inlining to be as practical and intelligent as possible, which means - // forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to - // virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- - // creation of the compiler. - // - - // (Note: I'm not going to macro this since it would likely clobber intellisense parameter resolution) - public: - // ---------- 32 Bit Interface ----------- template< typename T > __forceinline void operator()( const iRegister& to, __unused const iRegisterCL& from ) const { Group2Impl::Emit( to ); } diff --git a/pcsx2/x86/ix86/implement/incdec.h b/pcsx2/x86/ix86/implement/incdec.h index f7c1e6b3ad..4aacb81beb 100644 --- a/pcsx2/x86/ix86/implement/incdec.h +++ b/pcsx2/x86/ix86/implement/incdec.h @@ -51,8 +51,9 @@ public: static __emitinline void Emit( bool isDec, const ModSibStrict& dest ) { + prefix16(); write8( Is8BitOperand() ? 0xfe : 0xff ); - EmitSibMagic( isDec ? 1: 0, dest ); + EmitSibMagic( isDec ? 1 : 0, dest ); } }; @@ -67,10 +68,10 @@ protected: public: __forceinline void operator()( const iRegister32& to ) const { m_32::Emit( isDec, to ); } - __noinline void operator()( const ModSibStrict& sibdest ) const { m_32::Emit( isDec, sibdest ); } + __noinline void operator()( const ModSibStrict& sibdest ) const{ m_32::Emit( isDec, sibdest ); } __forceinline void operator()( const iRegister16& to ) const { m_16::Emit( isDec, to ); } - __noinline void operator()( const ModSibStrict& sibdest ) const { m_16::Emit( isDec, sibdest ); } + __noinline void operator()( const ModSibStrict& sibdest ) const{ m_16::Emit( isDec, sibdest ); } __forceinline void operator()( const iRegister8& to ) const { m_8::Emit( isDec, to ); } __noinline void operator()( const ModSibStrict& sibdest ) const { m_8::Emit( isDec, sibdest ); } diff --git a/pcsx2/x86/ix86/implement/jmpcall.h b/pcsx2/x86/ix86/implement/jmpcall.h new file mode 100644 index 0000000000..bf375432d5 --- /dev/null +++ b/pcsx2/x86/ix86/implement/jmpcall.h @@ -0,0 +1,85 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// Implementations found here: CALL and JMP! (unconditional only) +// Note: This header is meant to be included from within the x86Emitter::Internal namespace. + +template< typename ImmType > +class JmpCallImpl +{ +protected: + static const uint OperandSize = sizeof(ImmType); + + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + JmpCallImpl() {} // For the love of GCC. + + static __emitinline void Emit( bool isJmp, const iRegister& absreg ) + { + prefix16(); + iWrite( 0xff ); + ModRM_Direct( isJmp ? 4 : 2, absreg.Id ); + } + + static __emitinline void Emit( bool isJmp, const ModSibStrict& src ) + { + prefix16(); + iWrite( 0xff ); + EmitSibMagic( isJmp ? 4 : 2, src ); + } +}; + +// ------------------------------------------------------------------------ +template< bool isJmp > +class JmpCallImplAll +{ +protected: + typedef JmpCallImpl m_32; + typedef JmpCallImpl m_16; + +public: + JmpCallImplAll() {} + + __forceinline void operator()( const iRegister32& absreg ) const { m_32::Emit( isJmp, absreg ); } + __forceinline void operator()( const ModSibStrict& src ) const { m_32::Emit( isJmp, src ); } + + __forceinline void operator()( const iRegister16& absreg ) const { m_16::Emit( isJmp, absreg ); } + __forceinline void operator()( const ModSibStrict& src ) const { m_16::Emit( isJmp, src ); } + + // Special form for calling functions. This form automatically resolves the + // correct displacement based on the size of the instruction being generated. + template< typename T > + __forceinline void operator()( const T* func ) const + { + if( isJmp ) + iJccKnownTarget( Jcc_Unconditional, (void*)func ); + else + { + // calls are relative to the instruction after this one, and length is + // always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic). + + sptr dest = (sptr)func - ((sptr)iGetPtr() + 5); + iWrite( 0xe8 ); + iWrite( dest ); + } + } + +}; diff --git a/pcsx2/x86/ix86/implement/movs.h b/pcsx2/x86/ix86/implement/movs.h index 80ff2a4cdd..de469a7c10 100644 --- a/pcsx2/x86/ix86/implement/movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -18,7 +18,9 @@ #pragma once -// Header: ix86_impl_movs.h -- covers cmov and movsx/movzx. +// Header: ix86_impl_movs.h -- covers mov, cmov, movsx/movzx, and SETcc (which shares +// with cmov many similarities). + // Note: This header is meant to be included from within the x86Emitter::Internal namespace. ////////////////////////////////////////////////////////////////////////////////////////// @@ -143,28 +145,22 @@ public: } }; -// Inlining Notes: -// I've set up the inlining to be as practical and intelligent as possible, which means -// forcing inlining for (void*) forms of ModRM, which thanks to constprop reduce to -// virtually no code. In the case of (Reg, Imm) forms, the inlining is up to the dis- -// cretion of the compiler. -// - +// ------------------------------------------------------------------------ class MovImplAll { public: - template< typename T> + template< typename T > __forceinline void operator()( const iRegister& to, const iRegister& from ) const { MovImpl::Emit( to, from ); } - template< typename T> + template< typename T > __forceinline void operator()( const iRegister& to, const void* src ) const { MovImpl::Emit( to, src ); } - template< typename T> + template< typename T > __forceinline void operator()( void* dest, const iRegister& from ) const { MovImpl::Emit( dest, from ); } - template< typename T> + template< typename T > __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { MovImpl::Emit( sibdest, from ); } - template< typename T> + template< typename T > __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { MovImpl::Emit( to, sibsrc ); } - template< typename T> + template< typename T > __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { MovImpl::Emit( sibdest, imm ); } // preserve_flags - set to true to disable optimizations which could alter the state of @@ -184,9 +180,11 @@ public: ////////////////////////////////////////////////////////////////////////////////////////// -// CMOV !! [in all of it's disappointing lack-of glory] -// Caution! This instruction can look exciting and cool, until you realize that it cannot -// load immediate values into registers. -_- +// CMOV !! [in all of it's disappointing lack-of glory] .. and .. +// SETcc!! [more glory, less lack!] +// +// CMOV Disclaimer: Caution! This instruction can look exciting and cool, until you +// realize that it cannot load immediate values into registers. -_- // template< typename ImmType, int InstBaseVal > class CMovSetImpl diff --git a/pcsx2/x86/ix86/implement/test.h b/pcsx2/x86/ix86/implement/test.h index c7b2fa58b2..55ecdbcaf0 100644 --- a/pcsx2/x86/ix86/implement/test.h +++ b/pcsx2/x86/ix86/implement/test.h @@ -19,7 +19,7 @@ #pragma once ////////////////////////////////////////////////////////////////////////////////////////// -// MOV instruction Implementation +// TEST instruction Implementation template< typename ImmType > class TestImpl diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h new file mode 100644 index 0000000000..22855aa80f --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -0,0 +1,109 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// This helper function is used for instructions which enter XMM form when the 0x66 prefix +// is specified (indicating alternate operand type selection). +template< typename OperandType > +static __forceinline void preXMM( u8 opcode ) +{ + if( sizeof( OperandType ) == 16 ) + iWrite( 0x0f66 ); + else + iWrite( 0x0f ); + iWrite( opcode ); +} + +// prefix - 0 indicates MMX, anything assumes XMM. +static __forceinline void SimdPrefix( u8 opcode, u8 prefix=0 ) +{ + if( prefix != 0 ) + { + iWrite( 0x0f00 | prefix ); + iWrite( opcode ); + } + else + iWrite( (opcode<<8) | 0x0f ); +} + +template< u8 prefix, typename T, typename T2 > +static __forceinline void writeXMMop( const iRegister& to, const iRegister& from, u8 opcode ) +{ + SimdPrefix( opcode, prefix ); + ModRM_Direct( to.Id, from.Id ); +} + +template< u8 prefix, typename T > +static __noinline void writeXMMop( const iRegister& reg, const ModSibBase& sib, u8 opcode ) +{ + SimdPrefix( opcode, prefix ); + EmitSibMagic( reg.Id, sib ); +} + +template< u8 prefix, typename T > +static __forceinline void writeXMMop( const iRegister& reg, const void* data, u8 opcode ) +{ + SimdPrefix( opcode, prefix ); + iWriteDisp( reg.Id, data ); +} + +// ------------------------------------------------------------------------ +// MOVD has valid forms for MMX and XMM registers. +// +template< typename T > +static __forceinline void iMOVDZX( const iRegisterSIMD& to, const iRegister32& from ) +{ + preXMM( 0x6e ); + ModRM_Direct( to.Id, from.Id ); +} + +template< typename T> +static __forceinline void iMOVDZX( const iRegisterSIMD& to, const void* src ) +{ + preXMM( 0x6e ); + iWriteDisp( to.Id, src ); +} + +template< typename T> +static __forceinline void iMOVDZX( const iRegisterSIMD& to, const ModSibBase& src ) +{ + preXMM( 0x6e ); + EmitSibMagic( to.Id, src ); +} + +template< typename T> +static __emitinline void iMOVD( const iRegister32& to, const iRegisterSIMD& from ) +{ + preXMM( 0x7e ); + ModRM_Direct( from.Id, to.Id ); +} + +template< typename T> +static __forceinline void iMOVD( void* dest, const iRegisterSIMD& from ) +{ + preXMM( 0x7e ); + iWriteDisp( from.Id, dest ); +} + +template< typename T> +static __noinline void iMOVD( const ModSibBase& dest, const iRegisterSIMD& from ) +{ + preXMM( 0x7e ); + EmitSibMagic( from.Id, dest ); +} diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 004ac9f4f9..31f414d50c 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -66,10 +66,10 @@ __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; namespace x86Emitter { -const x86IndexerType ptr; -const x86IndexerTypeExplicit ptr32; -const x86IndexerTypeExplicit ptr16; -const x86IndexerTypeExplicit ptr8; +const iAddressIndexerBase ptr; +const iAddressIndexer ptr32; +const iAddressIndexer ptr16; +const iAddressIndexer ptr8; // ------------------------------------------------------------------------ @@ -280,6 +280,9 @@ const Group8ImplAll iBTR; const Group8ImplAll iBTS; const Group8ImplAll iBTC; +const BitScanImplAll iBSF; +const BitScanImplAll iBSR; + // ------------------------------------------------------------------------ const CMovImplGeneric iCMOV; @@ -607,8 +610,6 @@ __forceinline void iSMUL( const iRegister16& to, const iRegister16& from, s16 im __noinline void iSMUL( const iRegister16& to, const ModSibBase& src ) { iMUL16::Emit( to, src ); } __noinline void iSMUL( const iRegister16& to, const ModSibBase& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } - - ////////////////////////////////////////////////////////////////////////////////////////// // Push / Pop Emitters // @@ -627,5 +628,112 @@ __emitinline void iPUSH( const ModSibBase& from ) EmitSibMagic( 6, from ); } +////////////////////////////////////////////////////////////////////////////////////////// +// +__emitinline void iBSWAP( const iRegister32& to ) +{ + write8( 0x0F ); + write8( 0xC8 | to.Id ); +} + + +////////////////////////////////////////////////////////////////////////////////////////// +// MMX / XMM Instructions +// (these will get put in their own file later) + +__emitinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) +{ + writeXMMop<0>( to, from, 0x6f ); +} + +__noinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) +{ + writeXMMop<0>( to, src, 0x6f ); +} + +__emitinline void iMOVQ( const iRegisterMMX& to, const void* src ) +{ + writeXMMop<0>( to, src, 0x6f ); +} + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__emitinline void iMOVQZX( const iRegisterXMM& to, const iRegisterXMM& from ) +{ + writeXMMop<0xf3>( to, from, 0x7e ); +} + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__noinline void iMOVQZX( const iRegisterXMM& to, const ModSibBase& src ) +{ + writeXMMop<0xf3>( to, src, 0x7e ); +} + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__emitinline void iMOVQZX( const iRegisterXMM& to, const void* src ) +{ + writeXMMop<0xf3>( to, src, 0x7e ); +} + +__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) +{ + writeXMMop<0>( from, dest, 0x7f ); +} + +__forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) +{ + writeXMMop<0>( from, dest, 0x7f ); +} + +__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterXMM& from ) +{ + writeXMMop<0xf3>( from, dest, 0x7e ); +} + +__forceinline void iMOVQ( void* dest, const iRegisterXMM& from ) +{ + writeXMMop<0xf3>( from, dest, 0x7e ); +} + +__forceinline void iMOVQ( const iRegisterXMM& to, const iRegisterMMX& from ) +{ + writeXMMop<0xf3>( to, from, 0xd6 ); +} + +__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterXMM& from ) +{ + writeXMMop<0xf2>( to, from, 0xd6 ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// + +__forceinline void iMOVSS( const iRegisterXMM& to, const iRegisterXMM& from ) +{ + if( to != from ) + writeXMMop<0xf3>( to, from, 0x10 ); +} + +__forceinline void iMOVSSZX( const iRegisterXMM& to, const void* from ) +{ + writeXMMop<0xf3>( to, from, 0x10 ); +} + +__forceinline void iMOVSSZX( const iRegisterXMM& to, const ModSibBase& from ) +{ + writeXMMop<0xf3>( to, from, 0x10 ); +} + +__forceinline void iMOVSS( const void* to, const iRegisterXMM& from ) +{ + writeXMMop<0xf3>( from, to, 0x11 ); +} + +__forceinline void iMOVSS( const ModSibBase& to, const iRegisterXMM& from ) +{ + writeXMMop<0xf3>( from, to, 0x11 ); +} } diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl index 4eed2390f5..ebe2341c21 100644 --- a/pcsx2/x86/ix86/ix86_inlines.inl +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -121,15 +121,30 @@ namespace x86Emitter // fashion. __forceinline void ModSibBase::Reduce() { + if( Index.IsStackPointer() ) + { + // esp cannot be encoded as the index, so move it to the Base, if possible. + // note: intentionally leave index assigned to esp also (generates correct + // encoding later, since ESP cannot be encoded 'alone') + + jASSUME( Scale == 0 ); // esp can't have an index modifier! + jASSUME( Base.IsEmpty() ); // base must be empty or else! + + Base = Index; + return; + } + // If no index reg, then load the base register into the index slot. if( Index.IsEmpty() ) { Index = Base; Scale = 0; - Base = x86IndexReg::Empty; + if( !Base.IsStackPointer() ) // prevent ESP from being encoded 'alone' + Base = x86IndexReg::Empty; return; } - + + // The Scale has a series of valid forms, all shown here: switch( Scale ) @@ -167,17 +182,6 @@ namespace x86Emitter Scale = 3; break; } - - if( Index.IsStackPointer() ) - { - // esp cannot be encoded as the index, so move it to the Base, if possible. - jASSUME( Scale == 0 ); - jASSUME( Base.IsEmpty() ); - - Base = Index; - // noe: leave index assigned to esp also (generates correct encoding later) - //Index = x86IndexReg::Empty; - } } ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 9a59620ce6..1cfef1dd8c 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -35,7 +35,127 @@ namespace x86Emitter { - extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false ); + // ------------------------------------------------------------------------ + // Group 1 Instruction Class + + extern const Internal::Group1ImplAll iADD; + extern const Internal::Group1ImplAll iOR; + extern const Internal::Group1ImplAll iADC; + extern const Internal::Group1ImplAll iSBB; + extern const Internal::Group1ImplAll iAND; + extern const Internal::Group1ImplAll iSUB; + extern const Internal::Group1ImplAll iXOR; + extern const Internal::Group1ImplAll iCMP; + + // ------------------------------------------------------------------------ + // Group 2 Instruction Class + // + // Optimization Note: For Imm forms, we ignore the instruction if the shift count is + // zero. This is a safe optimization since any zero-value shift does not affect any + // flags. + + extern const Internal::MovImplAll iMOV; + extern const Internal::TestImplAll iTEST; + + extern const Internal::Group2ImplAll iROL; + extern const Internal::Group2ImplAll iROR; + extern const Internal::Group2ImplAll iRCL; + extern const Internal::Group2ImplAll iRCR; + extern const Internal::Group2ImplAll iSHL; + extern const Internal::Group2ImplAll iSHR; + extern const Internal::Group2ImplAll iSAR; + + // ------------------------------------------------------------------------ + // Group 3 Instruction Class + + extern const Internal::Group3ImplAll iNOT; + extern const Internal::Group3ImplAll iNEG; + extern const Internal::Group3ImplAll iUMUL; + extern const Internal::Group3ImplAll iUDIV; + extern const Internal::Group3ImplAll iSDIV; + + extern const Internal::IncDecImplAll iINC; + extern const Internal::IncDecImplAll iDEC; + + extern const Internal::MovExtendImplAll iMOVZX; + extern const Internal::MovExtendImplAll iMOVSX; + + extern const Internal::DwordShiftImplAll iSHLD; + extern const Internal::DwordShiftImplAll iSHRD; + + extern const Internal::Group8ImplAll iBT; + extern const Internal::Group8ImplAll iBTR; + extern const Internal::Group8ImplAll iBTS; + extern const Internal::Group8ImplAll iBTC; + + extern const Internal::JmpCallImplAll iJMP; + extern const Internal::JmpCallImplAll iCALL; + + extern const Internal::BitScanImplAll iBSF; + extern const Internal::BitScanImplAll iBSR; + + // ------------------------------------------------------------------------ + extern const Internal::CMovImplGeneric iCMOV; + + extern const Internal::CMovImplAll iCMOVA; + extern const Internal::CMovImplAll iCMOVAE; + extern const Internal::CMovImplAll iCMOVB; + extern const Internal::CMovImplAll iCMOVBE; + + extern const Internal::CMovImplAll iCMOVG; + extern const Internal::CMovImplAll iCMOVGE; + extern const Internal::CMovImplAll iCMOVL; + extern const Internal::CMovImplAll iCMOVLE; + + extern const Internal::CMovImplAll iCMOVZ; + extern const Internal::CMovImplAll iCMOVE; + extern const Internal::CMovImplAll iCMOVNZ; + extern const Internal::CMovImplAll iCMOVNE; + + extern const Internal::CMovImplAll iCMOVO; + extern const Internal::CMovImplAll iCMOVNO; + extern const Internal::CMovImplAll iCMOVC; + extern const Internal::CMovImplAll iCMOVNC; + + extern const Internal::CMovImplAll iCMOVS; + extern const Internal::CMovImplAll iCMOVNS; + extern const Internal::CMovImplAll iCMOVPE; + extern const Internal::CMovImplAll iCMOVPO; + + // ------------------------------------------------------------------------ + extern const Internal::SetImplGeneric iSET; + + extern const Internal::SetImplAll iSETA; + extern const Internal::SetImplAll iSETAE; + extern const Internal::SetImplAll iSETB; + extern const Internal::SetImplAll iSETBE; + + extern const Internal::SetImplAll iSETG; + extern const Internal::SetImplAll iSETGE; + extern const Internal::SetImplAll iSETL; + extern const Internal::SetImplAll iSETLE; + + extern const Internal::SetImplAll iSETZ; + extern const Internal::SetImplAll iSETE; + extern const Internal::SetImplAll iSETNZ; + extern const Internal::SetImplAll iSETNE; + + extern const Internal::SetImplAll iSETO; + extern const Internal::SetImplAll iSETNO; + extern const Internal::SetImplAll iSETC; + extern const Internal::SetImplAll iSETNC; + + extern const Internal::SetImplAll iSETS; + extern const Internal::SetImplAll iSETNS; + extern const Internal::SetImplAll iSETPE; + extern const Internal::SetImplAll iSETPO; + + ////////////////////////////////////////////////////////////////////////////////////////// + // Miscellaneous Instructions + // These are all defined inline or in ix86.cpp. + // + + extern void iBSWAP( const iRegister32& to ); // ----- Lea Instructions (Load Effective Address) ----- // Note: alternate (void*) forms of these instructions are not provided since those @@ -81,7 +201,7 @@ namespace x86Emitter // NOP 1-byte __forceinline void iNOP() { write8(0x90); } - + ////////////////////////////////////////////////////////////////////////////////////////// // MUL / DIV instructions @@ -105,6 +225,8 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! + extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false ); + #define DEFINE_FORWARD_JUMP( label, cond ) \ template< typename OperandType > \ class iForward##label : public iForwardJump \ @@ -193,5 +315,40 @@ namespace x86Emitter typedef iForwardJPE iForwardJPE32; typedef iForwardJPO iForwardJPO8; typedef iForwardJPO iForwardJPO32; + + ////////////////////////////////////////////////////////////////////////////////////////// + // MMX Mov Instructions (MOVD, MOVQ, MOVSS). + // + // Notes: + // * Some of the functions have been renamed to more clearly reflect what they actually + // do. Namely we've affixed "ZX" to several MOVs that take a register as a destination + // since that's what they do (MOVD clears upper 32/96 bits, etc). + // + + using Internal::iMOVD; + using Internal::iMOVDZX; + + extern void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ); + extern void iMOVQ( const iRegisterMMX& to, const iRegisterXMM& from ); + extern void iMOVQ( const iRegisterXMM& to, const iRegisterMMX& from ); + + extern void iMOVQ( void* dest, const iRegisterXMM& from ); + extern void iMOVQ( const ModSibBase& dest, const iRegisterXMM& from ); + extern void iMOVQ( void* dest, const iRegisterMMX& from ); + extern void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ); + extern void iMOVQ( const iRegisterMMX& to, const void* src ); + extern void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ); + + extern void iMOVQZX( const iRegisterXMM& to, const void* src ); + extern void iMOVQZX( const iRegisterXMM& to, const ModSibBase& src ); + extern void iMOVQZX( const iRegisterXMM& to, const iRegisterXMM& from ); + + extern void iMOVSS( const iRegisterXMM& to, const iRegisterXMM& from ); + extern void iMOVSS( const void* to, const iRegisterXMM& from ); + extern void iMOVSS( const ModSibBase& to, const iRegisterXMM& from ); + + extern void iMOVSSZX( const iRegisterXMM& to, const void* from ); + extern void iMOVSSZX( const iRegisterXMM& to, const ModSibBase& from ); + } diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp index 20fde0d093..9189cd21da 100644 --- a/pcsx2/x86/ix86/ix86_jmp.cpp +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -38,19 +38,20 @@ namespace x86Emitter { +using namespace Internal; + +const JmpCallImplAll iJMP; +const JmpCallImplAll iCALL; + // ------------------------------------------------------------------------ void iSmartJump::SetTarget() { - jASSUME( !m_written ); - if( m_written ) - throw Exception::InvalidOperation( "Attempted to set SmartJump label multiple times." ); - - m_target = iGetPtr(); + u8* target = iGetPtr(); if( m_baseptr == NULL ) return; iSetPtr( m_baseptr ); u8* const saveme = m_baseptr + GetMaxInstructionSize(); - iJccKnownTarget( m_cc, m_target, true ); + iJccKnownTarget( m_cc, target, true ); // Copy recompiled data inward if the jump instruction didn't fill the // alloted buffer (means that we optimized things to a j8!) @@ -59,17 +60,19 @@ void iSmartJump::SetTarget() if( spacer != 0 ) { u8* destpos = iGetPtr(); - const int copylen = (sptr)m_target - (sptr)saveme; + const int copylen = (sptr)target - (sptr)saveme; memcpy_fast( destpos, saveme, copylen ); - iSetPtr( m_target - spacer ); + iSetPtr( target - spacer ); } - - m_written = true; } -////////////////////////////////////////////////////////////////////////////////////////// -// +iSmartJump::~iSmartJump() +{ + SetTarget(); + m_baseptr = NULL; // just in case (sometimes helps in debugging too) +} + // ------------------------------------------------------------------------ // Writes a jump at the current x86Ptr, which targets a pre-established target address. @@ -78,6 +81,7 @@ void iSmartJump::SetTarget() // slideForward - used internally by iSmartJump to indicate that the jump target is going // to slide forward in the event of an 8 bit displacement. // +// Using this __emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ) { // Calculate the potential j8 displacement first, assuming an instruction length of 2: @@ -110,9 +114,4 @@ __emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, b } } -__emitinline void iJcc( JccComparisonType comparison, void* target ) -{ - iJccKnownTarget( comparison, target ); -} - } \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 0ecac70f31..d227d47ef2 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -445,18 +445,6 @@ emitterT void NOP( void ) { iNOP(); } // jump instructions / //////////////////////////////////// -emitterT u8* JMP( uptr to ) { - uptr jump = ( x86Ptr - (u8*)to ) - 1; - - if ( jump > 0x7f ) { - assert( to <= 0xffffffff ); - return (u8*)JMP32( to ); - } - else { - return (u8*)JMP8( to ); - } -} - /* jmp rel8 */ emitterT u8* JMP8( u8 to ) { @@ -477,17 +465,13 @@ emitterT u32* JMP32( uptr to ) /* jmp r32/r64 */ emitterT void JMPR( x86IntRegType to ) { - RexB(0, to); - write8( 0xFF ); - ModRM( 3, 4, to ); + iJMP( iRegister32(to) ); } // jmp m32 emitterT void JMP32M( uptr to ) { - write8( 0xFF ); - ModRM( 0, 4, DISP32 ); - write32( MEMADDR(to, 4)); + iJMP( ptr32[to] ); } /* jp rel8 */ @@ -736,41 +720,27 @@ emitterT u32* JNO32( u32 to ) /* call func */ emitterT void CALLFunc( uptr func ) { - func -= ( (uptr)x86Ptr + 5 ); - assert( (sptr)func <= 0x7fffffff && (sptr)func >= -0x7fffffff ); - CALL32(func); -} - -/* call rel32 */ -emitterT void CALL32( u32 to ) -{ - write8( 0xE8 ); - write32( to ); + iCALL( (void*)func ); } /* call r32 */ emitterT void CALL32R( x86IntRegType to ) { - write8( 0xFF ); - ModRM( 3, 2, to ); + iCALL( iRegister32( to ) ); } /* call m32 */ emitterT void CALL32M( u32 to ) { - write8( 0xFF ); - ModRM( 0, 2, DISP32 ); - write32( MEMADDR(to, 4) ); + iCALL( ptr32[to] ); } emitterT void BSRRtoR(x86IntRegType to, x86IntRegType from) { - write16( 0xBD0F ); - ModRM( 3, from, to ); + iBSR( iRegister32(to), iRegister32(from) ); } emitterT void BSWAP32R( x86IntRegType to ) { - write8( 0x0F ); - write8( 0xC8 + to ); + iBSWAP( iRegister32(to) ); } diff --git a/pcsx2/x86/ix86/ix86_legacy_instructions.h b/pcsx2/x86/ix86/ix86_legacy_instructions.h index cc4ca90661..e58e7f8d7a 100644 --- a/pcsx2/x86/ix86/ix86_legacy_instructions.h +++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h @@ -576,8 +576,6 @@ extern u32* JS32( u32 to ); // call func extern void CALLFunc( uptr func); // based on CALL32 -// call rel32 -extern void CALL32( u32 to ); // call r32 extern void CALL32R( x86IntRegType to ); // call m32 @@ -923,7 +921,6 @@ extern void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); extern void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); extern void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); extern void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); -extern void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 extern void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); extern void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset=0 ); extern void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset=0 ); diff --git a/pcsx2/x86/ix86/ix86_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp similarity index 85% rename from pcsx2/x86/ix86/ix86_mmx.cpp rename to pcsx2/x86/ix86/ix86_legacy_mmx.cpp index 74abe3e5df..d1b46906f1 100644 --- a/pcsx2/x86/ix86/ix86_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp @@ -25,20 +25,24 @@ // note: r64 = mm //------------------------------------------------------------------ +using namespace x86Emitter; + /* movq m64 to r64 */ emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { - write16( 0x6F0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + iMOVQ( iRegisterMMX(to), (void*)from ); + //write16( 0x6F0F ); + //ModRM( 0, to, DISP32 ); + //write32( MEMADDR(from, 4) ); } /* movq r64 to m64 */ emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { - write16( 0x7F0F ); - ModRM( 0, from, DISP32 ); - write32(MEMADDR(to, 4)); + iMOVQ( (void*)to, iRegisterMMX(from) ); + //write16( 0x7F0F ); + //ModRM( 0, from, DISP32 ); + //write32(MEMADDR(to, 4)); } /* pand r64 to r64 */ @@ -470,69 +474,71 @@ emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -emitterT void MOVQ64ItoR( x86MMXRegType reg, u64 i ) -{ - MOVQMtoR( reg, ( uptr )(x86Ptr) + 2 + 7 ); - JMP8( 8 ); - write64( i ); -} - emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { - write16( 0x6F0F ); - ModRM( 3, to, from ); + iMOVQ( iRegisterMMX(to), iRegisterMMX(from) ); + //write16( 0x6F0F ); + //ModRM( 3, to, from ); } emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { - write16( 0x6F0F ); - WriteRmOffsetFrom( to, from, offset ); + iMOVQ( iRegisterMMX(to), ptr[x86IndexReg(from)+offset] ); + //write16( 0x6F0F ); + //WriteRmOffsetFrom( to, from, offset ); } emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { - write16( 0x7F0F ); - WriteRmOffsetFrom( from, to, offset ); + iMOVQ( ptr[x86IndexReg(to)+offset], iRegisterMMX(from) ); + //write16( 0x7F0F ); + //WriteRmOffsetFrom( from, to, offset ); } /* movd m32 to r64 */ emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { - write16( 0x6E0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + iMOVDZX( iRegisterMMX(to), (void*)from ); + //write16( 0x6E0F ); + //ModRM( 0, to, DISP32 ); + //write32( MEMADDR(from, 4) ); } /* movd r64 to m32 */ emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { - write16( 0x7E0F ); - ModRM( 0, from, DISP32 ); - write32( MEMADDR(to, 4) ); + iMOVD( (void*)to, iRegisterMMX(from) ); + //write16( 0x7E0F ); + //ModRM( 0, from, DISP32 ); + //write32( MEMADDR(to, 4) ); } emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { - write16( 0x6E0F ); - ModRM( 3, to, from ); + iMOVDZX( iRegisterMMX(to), iRegister32(from) ); + //write16( 0x6E0F ); + //ModRM( 3, to, from ); } emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { - write16( 0x6E0F ); - WriteRmOffsetFrom( to, from, offset ); + iMOVDZX( iRegisterMMX(to), ptr[x86IndexReg(from)+offset] ); + //write16( 0x6E0F ); + //WriteRmOffsetFrom( to, from, offset ); } emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { - write16( 0x7E0F ); - ModRM( 3, from, to ); + iMOVD( iRegister32(to), iRegisterMMX(from) ); + //write16( 0x7E0F ); + //ModRM( 3, from, to ); } emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { - write16( 0x7E0F ); - WriteRmOffsetFrom( from, to, offset ); + iMOVD( ptr[x86IndexReg(to)+offset], iRegisterMMX(from) ); + //write16( 0x7E0F ); + //WriteRmOffsetFrom( from, to, offset ); } // untested diff --git a/pcsx2/x86/ix86/ix86_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp similarity index 96% rename from pcsx2/x86/ix86/ix86_sse.cpp rename to pcsx2/x86/ix86/ix86_legacy_sse.cpp index da3c215f35..f33c4a6d39 100644 --- a/pcsx2/x86/ix86/ix86_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -20,6 +20,8 @@ #include "ix86_legacy_internal.h" #include "ix86_sse_helpers.h" +using namespace x86Emitter; + ////////////////////////////////////////////////////////////////////////////////////////// // AlwaysUseMovaps [const] // @@ -303,55 +305,39 @@ emitterT void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S emitterT void SSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); } emitterT void SSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); } -emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) -{ - write8(0xf3); SSEMtoR( 0x7e0f, 0); -} - -emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - write8(0xf3); SSERtoR( 0x7e0f); -} - -emitterT void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) -{ - SSERtoM66(0xd60f); -} - -emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) -{ - write8(0xf2); - SSERtoR( 0xd60f); -} -emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) -{ - write8(0xf3); - SSERtoR( 0xd60f); -} +emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { iMOVQZX( iRegisterXMM(to), (void*)from ); } +emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMOVQZX( iRegisterXMM(to), iRegisterXMM(from) ); } +emitterT void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ) { iMOVQ( (void*)to, iRegisterXMM(from) ); } +emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) { iMOVQ( iRegisterMMX(to), iRegisterXMM(from) ); } +emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) { iMOVQ( iRegisterXMM(to), iRegisterMMX(from) ); } //**********************************************************************************/ //MOVSS: Move Scalar Single-Precision FP value * //********************************************************************************** -emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } +emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { iMOVSSZX( iRegisterXMM(to), (void*)from ); } +emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { iMOVSS( (void*)to, iRegisterXMM(from) ); } +emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMOVSS( iRegisterXMM(to), iRegisterXMM(from) ); } +emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { iMOVSSZX( iRegisterXMM(to), ptr[x86IndexReg(from)+offset] ); } +emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { iMOVSS( ptr[x86IndexReg(to)+offset], iRegisterXMM(from) ); } + +/*emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } - emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } - emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { write8(0xf3); - RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); + RexRB(0, to, from); + write16( 0x100f ); + WriteRmOffsetFrom(to, from, offset); } emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { write8(0xf3); - RexRB(0, from, to); - write16(0x110f); - WriteRmOffsetFrom(from, to, offset); -} + RexRB(0, from, to); + write16(0x110f); + WriteRmOffsetFrom(from, to, offset); +}*/ emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } //**********************************************************************************/ @@ -1405,8 +1391,7 @@ emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) ////////////////////////////////////////////////////////////////////////////////////////// // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) -// This header should always be included *after* ix86.h. - +// // Added AlwaysUseMovaps check to the relevant functions here, which helps reduce the // overhead of dynarec instructions that use these, even thought the same check would // have been done redundantly by the emitter function. diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 40f85ad587..7ecede76c1 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -129,22 +129,22 @@ namespace x86Emitter // // This is configured to inline emitter functions appropriately for release builds, and // disables some of the more aggressive inlines for dev builds (which can be helpful when -// debugging). +// debugging). Additionally, I've set up the inlining to be as practical and intelligent +// as possible with regard to constant propagation. Namely this involves forcing inlining +// for (void*) forms of ModRM, which (thanks to constprop) reduce to virtually no code, and +// force-disabling inlining on complicated SibSB forms [since MSVC would sometimes inline +// despite being a generally bad idea]. // -// Note: I use __forceinline directly for most single-line class members, when needed. -// There's no point in using __emitline in these cases since the debugger can't trace into -// single-line functions anyway. +// In the case of (Reg, Imm) forms, the inlining is up to the discreation of the compiler. +// +// Note: I *intentionally* use __forceinline directly for most single-line class members, +// when needed. There's no point in using __emitline in these cases since the debugger +// can't trace into single-line functions anyway. // #ifdef PCSX2_DEVBUILD # define __emitinline #else # define __emitinline __forceinline -#endif - -#ifdef _MSC_VER -# define __noinline __declspec(noinline) -#else -# define __noinline __attribute__((noinline)) #endif // ModRM 'mod' field enumeration. Provided mostly for reference: @@ -195,6 +195,8 @@ namespace x86Emitter } ////////////////////////////////////////////////////////////////////////////////////////// + // iRegister + // Unless templating some fancy stuff, use the friendly iRegister32/16/8 typedefs instead. // template< typename OperandType > class iRegister @@ -213,6 +215,9 @@ namespace x86Emitter // Returns true if the register is a valid accumulator: Eax, Ax, Al. bool IsAccumulator() const { return Id == 0; } + + // returns true if the register is a valid MMX or XMM register. + bool IsSIMD() const { return OperandSize == 8 || OperandSize == 16; } bool operator==( const iRegister& src ) const { @@ -230,6 +235,28 @@ namespace x86Emitter return *this; } }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + template< typename OperandType > + class iRegisterSIMD : public iRegister + { + public: + static const iRegisterSIMD Empty; // defined as an empty/unused value (-1) + + public: + iRegisterSIMD(): iRegister() {} + iRegisterSIMD( const iRegisterSIMD& src ) : iRegister( src.Id ) {} + iRegisterSIMD( const iRegister& src ) : iRegister( src ) {} + explicit iRegisterSIMD( int regId ) : iRegister( regId ) {} + + iRegisterSIMD& operator=( const iRegisterSIMD& src ) + { + Id = src.Id; + return *this; + } + }; + // ------------------------------------------------------------------------ // Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which @@ -239,9 +266,11 @@ namespace x86Emitter // all about the the templated code in haphazard fashion. Yay.. >_< // - typedef iRegister iRegister32; - typedef iRegister iRegister16; - typedef iRegister iRegister8; + typedef iRegisterSIMD iRegisterXMM; + typedef iRegisterSIMD iRegisterMMX; + typedef iRegister iRegister32; + typedef iRegister iRegister16; + typedef iRegister iRegister8; class iRegisterCL : public iRegister8 { @@ -249,6 +278,14 @@ namespace x86Emitter iRegisterCL(): iRegister8( 1 ) {} }; + extern const iRegisterXMM + xmm0, xmm1, xmm2, xmm3, + xmm4, xmm5, xmm6, xmm7; + + extern const iRegisterMMX + mm0, mm1, mm2, mm3, + mm4, mm5, mm6, mm7; + extern const iRegister32 eax, ebx, ecx, edx, esi, edi, ebp, esp; @@ -266,6 +303,7 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // Use 32 bit registers as out index register (for ModSib memory address calculations) // Only x86IndexReg provides operators for constructing iAddressInfo types. + // class x86IndexReg : public iRegister32 { public: @@ -313,9 +351,9 @@ namespace x86Emitter { } - __forceinline explicit iAddressInfo( const x86IndexReg& base, int displacement=0 ) : - Base( base ), - Index(), + __forceinline explicit iAddressInfo( const x86IndexReg& index, int displacement=0 ) : + Base(), + Index( index ), Factor(0), Displacement( displacement ) { @@ -349,13 +387,6 @@ namespace x86Emitter __forceinline iAddressInfo operator-( s32 imm ) const { return iAddressInfo( *this ).Add( -imm ); } }; - enum OperandSizeType - { - OpSize_8 = 1, - OpSize_16 = 2, - OpSize_32 = 4, - }; - ////////////////////////////////////////////////////////////////////////////////////////// // ModSib - Internal low-level representation of the ModRM/SIB information. // @@ -422,9 +453,9 @@ namespace x86Emitter }; ////////////////////////////////////////////////////////////////////////////////////////// - // x86IndexerType - This is a static class which provisions our ptr[] syntax. + // iAddressIndexerBase - This is a static class which provisions our ptr[] syntax. // - struct x86IndexerType + struct iAddressIndexerBase { // passthrough instruction, allows ModSib to pass silently through ptr translation // without doing anything and without compiler error. @@ -450,7 +481,7 @@ namespace x86Emitter return ModSibBase( (uptr)src ); } - x86IndexerType() {} // applease the GCC gods + iAddressIndexerBase() {} // appease the GCC gods }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -458,7 +489,7 @@ namespace x86Emitter // specification of the operand size for ImmToMem operations. // template< typename OperandType > - struct x86IndexerTypeExplicit + struct iAddressIndexer { static const uint OperandSize = sizeof( OperandType ); @@ -486,13 +517,15 @@ namespace x86Emitter return ModSibStrict( (uptr)src ); } - x86IndexerTypeExplicit() {} // GCC initialization dummy + iAddressIndexer() {} // GCC initialization dummy }; - extern const x86IndexerType ptr; - extern const x86IndexerTypeExplicit ptr32; - extern const x86IndexerTypeExplicit ptr16; - extern const x86IndexerTypeExplicit ptr8; + // ptr[] - use this form for instructions which can resolve the address operand size from + // the other register operand sizes. + extern const iAddressIndexerBase ptr; + extern const iAddressIndexer ptr32; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms + extern const iAddressIndexer ptr16; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms + extern const iAddressIndexer ptr8; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms ////////////////////////////////////////////////////////////////////////////////////////// // JccComparisonType - enumerated possibilities for inspired code branching! @@ -533,64 +566,67 @@ namespace x86Emitter // as per the measured displacement distance. If the displacement is a valid s8, then // a j8 is inserted, else a j32. // - // Performance Analysis: j8's use 4 less byes per opcode, and thus can provide - // minor speed benefits in the form of L1/L2 cache clutter. They're also notably faster - // on P4's, and mildly faster on AMDs. (Core2's and i7's don't care) + // Note: This class is inherently unsafe, and so it's recommended to use iForwardJump8/32 + // whenever it is known that the jump destination is (or is not) short. Only use + // iSmartJump in cases where it's unknown what jump encoding will be ideal. // - class iSmartJump + // Important: Use this tool with caution! iSmartJump cannot be used in cases where jump + // targets overlap, since the writeback of the second target will alter the position of + // the first target (which breaks the relative addressing). To assist in avoiding such + // errors, iSmartJump works based on C++ block scope, where the destruction of the + // iSmartJump object (invoked by a '}') signals the target of the jump. Example: + // + // { + // iCMP( EAX, ECX ); + // iSmartJump jumpTo( Jcc_Above ); + // [... conditional code ...] + // } // smartjump targets this spot. + // + // No code inside the scope can attempt to jump outside the scoped block (unless the jump + // uses an immediate addressing method, such as Register or Mod/RM forms of JMP/CALL). + // Multiple SmartJumps can be safely nested inside scopes, as long as they are properly + // scoped themselves. + // + // Performance Analysis: j8's use 4 less byes per opcode, and thus can provide minor + // speed benefits in the form of L1/L2 cache clutter, on any CPU. They're also notably + // faster on P4's, and mildly faster on AMDs. (Core2's and i7's don't care) + // + class iSmartJump : public NoncopyableObject { protected: - u8* m_target; // x86Ptr target address of this label u8* m_baseptr; // base address of the instruction (passed to the instruction emitter) JccComparisonType m_cc; // comparison type of the instruction - bool m_written; // set true when the jump is written (at which point the object becomes invalid) public: - const int GetMaxInstructionSize() const { jASSUME( m_cc != Jcc_Unknown ); return ( m_cc == Jcc_Unconditional ) ? 5 : 6; } - // Creates a backward jump label which will be passed into a Jxx instruction (or few!) - // later on, and the current x86Ptr is recorded as the target [thus making the class - // creation point the jump target]. - iSmartJump() - { - m_target = iGetPtr(); - m_baseptr = NULL; - m_cc = Jcc_Unknown; - m_written = false; - } + JccComparisonType GetCondition() const { return m_cc; } + virtual ~iSmartJump(); + // ------------------------------------------------------------------------ // ccType - Comparison type to be written back to the jump instruction position. // iSmartJump( JccComparisonType ccType ) { jASSUME( ccType != Jcc_Unknown ); - m_target = NULL; m_baseptr = iGetPtr(); m_cc = ccType; - m_written = false; iAdvancePtr( GetMaxInstructionSize() ); } - - JccComparisonType GetCondition() const - { - return m_cc; - } - - u8* GetTarget() const - { - return m_target; - } - + + protected: void SetTarget(); }; ////////////////////////////////////////////////////////////////////////////////////////// - // + // iForwardJump + // Primary use of this class is through the various iForwardJA8/iForwardJLE32/etc. helpers + // defined later in this header. :) + // template< typename OperandType > class iForwardJump { @@ -601,8 +637,13 @@ namespace x86Emitter // relative to this address. s8* const BasePtr; - public: + // The jump instruction is emitted at the point of object construction. The conditional + // type must be valid (Jcc_Unknown generates an assertion). iForwardJump( JccComparisonType cctype = Jcc_Unconditional ); + + // Sets the jump target by writing back the current x86Ptr to the jump instruction. + // This method can be called multiple times, re-writing the jump instruction's target + // in each case. (the the last call is the one that takes effect). void SetTarget() const; }; @@ -627,116 +668,12 @@ namespace x86Emitter #include "implement/incdec.h" #include "implement/bittest.h" #include "implement/test.h" + #include "implement/jmpcall.h" + #include "implement/xmm/movqss.h" } ////////////////////////////////////////////////////////////////////////////////////////// // - // ----- Group 1 Instruction Class ----- - - extern const Internal::Group1ImplAll iADD; - extern const Internal::Group1ImplAll iOR; - extern const Internal::Group1ImplAll iADC; - extern const Internal::Group1ImplAll iSBB; - extern const Internal::Group1ImplAll iAND; - extern const Internal::Group1ImplAll iSUB; - extern const Internal::Group1ImplAll iXOR; - extern const Internal::Group1ImplAll iCMP; - - // ----- Group 2 Instruction Class ----- - // Optimization Note: For Imm forms, we ignore the instruction if the shift count is - // zero. This is a safe optimization since any zero-value shift does not affect any - // flags. - - extern const Internal::MovImplAll iMOV; - extern const Internal::TestImplAll iTEST; - - extern const Internal::Group2ImplAll iROL; - extern const Internal::Group2ImplAll iROR; - extern const Internal::Group2ImplAll iRCL; - extern const Internal::Group2ImplAll iRCR; - extern const Internal::Group2ImplAll iSHL; - extern const Internal::Group2ImplAll iSHR; - extern const Internal::Group2ImplAll iSAR; - - // ----- Group 3 Instruction Class ----- - - extern const Internal::Group3ImplAll iNOT; - extern const Internal::Group3ImplAll iNEG; - extern const Internal::Group3ImplAll iUMUL; - extern const Internal::Group3ImplAll iUDIV; - extern const Internal::Group3ImplAll iSDIV; - - extern const Internal::IncDecImplAll iINC; - extern const Internal::IncDecImplAll iDEC; - - extern const Internal::MovExtendImplAll iMOVZX; - extern const Internal::MovExtendImplAll iMOVSX; - - extern const Internal::DwordShiftImplAll iSHLD; - extern const Internal::DwordShiftImplAll iSHRD; - - extern const Internal::Group8ImplAll iBT; - extern const Internal::Group8ImplAll iBTR; - extern const Internal::Group8ImplAll iBTS; - extern const Internal::Group8ImplAll iBTC; - - // ------------------------------------------------------------------------ - extern const Internal::CMovImplGeneric iCMOV; - - extern const Internal::CMovImplAll iCMOVA; - extern const Internal::CMovImplAll iCMOVAE; - extern const Internal::CMovImplAll iCMOVB; - extern const Internal::CMovImplAll iCMOVBE; - - extern const Internal::CMovImplAll iCMOVG; - extern const Internal::CMovImplAll iCMOVGE; - extern const Internal::CMovImplAll iCMOVL; - extern const Internal::CMovImplAll iCMOVLE; - - extern const Internal::CMovImplAll iCMOVZ; - extern const Internal::CMovImplAll iCMOVE; - extern const Internal::CMovImplAll iCMOVNZ; - extern const Internal::CMovImplAll iCMOVNE; - - extern const Internal::CMovImplAll iCMOVO; - extern const Internal::CMovImplAll iCMOVNO; - extern const Internal::CMovImplAll iCMOVC; - extern const Internal::CMovImplAll iCMOVNC; - - extern const Internal::CMovImplAll iCMOVS; - extern const Internal::CMovImplAll iCMOVNS; - extern const Internal::CMovImplAll iCMOVPE; - extern const Internal::CMovImplAll iCMOVPO; - - // ------------------------------------------------------------------------ - extern const Internal::SetImplGeneric iSET; - - extern const Internal::SetImplAll iSETA; - extern const Internal::SetImplAll iSETAE; - extern const Internal::SetImplAll iSETB; - extern const Internal::SetImplAll iSETBE; - - extern const Internal::SetImplAll iSETG; - extern const Internal::SetImplAll iSETGE; - extern const Internal::SetImplAll iSETL; - extern const Internal::SetImplAll iSETLE; - - extern const Internal::SetImplAll iSETZ; - extern const Internal::SetImplAll iSETE; - extern const Internal::SetImplAll iSETNZ; - extern const Internal::SetImplAll iSETNE; - - extern const Internal::SetImplAll iSETO; - extern const Internal::SetImplAll iSETNO; - extern const Internal::SetImplAll iSETC; - extern const Internal::SetImplAll iSETNC; - - extern const Internal::SetImplAll iSETS; - extern const Internal::SetImplAll iSETNS; - extern const Internal::SetImplAll iSETPE; - extern const Internal::SetImplAll iSETPO; - - } #include "ix86_inlines.inl" From a2a1d58b4ba4bd45746e95c964098b7e73fea09b Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sun, 19 Apr 2009 02:54:56 +0000 Subject: [PATCH 096/143] backup git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1012 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Analyze.inl | 21 ++++++++++++++++- pcsx2/x86/microVU_Compile.inl | 26 ++++++++++----------- pcsx2/x86/microVU_Execute.inl | 4 ++-- pcsx2/x86/microVU_Lower.inl | 34 ++++++++++++++++++++++----- pcsx2/x86/microVU_Upper.inl | 44 +++++++++++++++++------------------ 5 files changed, 84 insertions(+), 45 deletions(-) diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 9381c317bf..4581d937fd 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -51,6 +51,7 @@ microVUt(void) mVUanalyzeFMAC1(int Fd, int Fs, int Ft) { microVU* mVU = mVUx; + mVUlog("microVU: FMAC1 Opcode"); mVUinfo |= _doStatus; analyzeReg1(Fs); analyzeReg1(Ft); @@ -63,6 +64,7 @@ microVUt(void) mVUanalyzeFMAC1(int Fd, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC2(int Fs, int Ft) { microVU* mVU = mVUx; + mVUlog("microVU: FMAC2 Opcode"); analyzeReg1(Fs); analyzeReg2(Ft); } @@ -82,6 +84,7 @@ microVUt(void) mVUanalyzeFMAC2(int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC3(int Fd, int Fs, int Ft) { microVU* mVU = mVUx; + mVUlog("microVU: FMAC3 Opcode"); mVUinfo |= _doStatus; analyzeReg1(Fs); analyzeReg3(Ft); @@ -98,6 +101,7 @@ microVUt(void) mVUanalyzeFMAC3(int Fd, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { microVU* mVU = mVUx; + mVUlog("microVU: FMAC4 Opcode"); analyzeReg1(Fs); analyzeReg4(Ft); } @@ -111,6 +115,7 @@ microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { microVUt(void) mVUanalyzeIALU1(int Id, int Is, int It) { microVU* mVU = mVUx; + //mVUlog("microVU: IALU1 Opcode"); if (!Id) { mVUinfo |= _isNOP; } analyzeVIreg1(Is); analyzeVIreg1(It); @@ -119,6 +124,7 @@ microVUt(void) mVUanalyzeIALU1(int Id, int Is, int It) { microVUt(void) mVUanalyzeIALU2(int Is, int It) { microVU* mVU = mVUx; + //mVUlog("microVU: IALU2 Opcode"); if (!It) { mVUinfo |= _isNOP; } analyzeVIreg1(Is); analyzeVIreg2(It, 1); @@ -140,6 +146,7 @@ microVUt(void) mVUanalyzeIALU2(int Is, int It) { microVUt(void) mVUanalyzeMR32(int Fs, int Ft) { microVU* mVU = mVUx; + mVUlog("microVU: MR32 Opcode"); if (!Ft) { mVUinfo |= _isNOP; } analyzeReg6(Fs); analyzeReg2(Ft); @@ -165,6 +172,7 @@ microVUt(void) mVUanalyzeMR32(int Fs, int Ft) { microVUt(void) mVUanalyzeFDIV(int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { microVU* mVU = mVUx; + mVUlog("microVU: DIV Opcode"); analyzeReg5(Fs, Fsf); analyzeReg5(Ft, Ftf); analyzeQreg(xCycles); @@ -176,12 +184,14 @@ microVUt(void) mVUanalyzeFDIV(int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { microVUt(void) mVUanalyzeEFU1(int Fs, int Fsf, u8 xCycles) { microVU* mVU = mVUx; + mVUlog("microVU: EFU Opcode"); analyzeReg5(Fs, Fsf); analyzePreg(xCycles); } microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { microVU* mVU = mVUx; + mVUlog("microVU: EFU Opcode"); analyzeReg1(Fs); analyzePreg(xCycles); } @@ -191,7 +201,8 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { //------------------------------------------------------------------ microVUt(void) mVUanalyzeMFP(int Ft) { - microVU* mVU = mVUx; // ToDo: Needs special info for P reg? + microVU* mVU = mVUx; + mVUlog("microVU: MFP Opcode"); if (!Ft) { mVUinfo |= _isNOP; } analyzeReg2(Ft); } @@ -202,6 +213,7 @@ microVUt(void) mVUanalyzeMFP(int Ft) { microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { microVU* mVU = mVUx; + //mVUlog("microVU: LQ Opcode"); analyzeVIreg1(Is); analyzeReg2(Ft); if (!Ft) { mVUinfo |= (writeIs && Is) ? _noWriteVF : _isNOP; } @@ -214,6 +226,7 @@ microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { microVUt(void) mVUanalyzeSQ(int Fs, int It, bool writeIt) { microVU* mVU = mVUx; + mVUlog("microVU: SQ Opcode"); analyzeReg1(Fs); analyzeVIreg1(It); if (writeIt) { analyzeVIreg2(It, 1); } @@ -227,12 +240,14 @@ microVUt(void) mVUanalyzeSQ(int Fs, int It, bool writeIt) { microVUt(void) mVUanalyzeR1(int Fs, int Fsf) { microVU* mVU = mVUx; + mVUlog("microVU: R-reg Opcode"); analyzeReg5(Fs, Fsf); analyzeRreg(); } microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVU* mVU = mVUx; + mVUlog("microVU: R-reg Opcode"); if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); } analyzeReg2(Ft); analyzeRreg(); @@ -244,6 +259,7 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVUt(void) mVUanalyzeSflag(int It) { microVU* mVU = mVUx; + mVUlog("microVU: Sflag Opcode"); if (!It) { mVUinfo |= _isNOP; } else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from mVUinfo |= _swapOps; @@ -259,6 +275,7 @@ microVUt(void) mVUanalyzeSflag(int It) { microVUt(void) mVUanalyzeFSSET() { microVU* mVU = mVUx; mVUinfo |= _isFSSET; + mVUlog("microVU: FSSET Opcode"); // mVUinfo &= ~_doStatus; // Note: I'm not entirely sure if the non-sticky flags // should be taken from the current upper instruction @@ -272,6 +289,7 @@ microVUt(void) mVUanalyzeFSSET() { microVUt(void) mVUanalyzeMflag(int Is, int It) { microVU* mVU = mVUx; + mVUlog("microVU: Mflag Opcode"); if (!It) { mVUinfo |= _isNOP; } else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) mVUinfo |= _swapOps; @@ -296,6 +314,7 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) { microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { microVU* mVU = mVUx; + mVUlog("microVU: XGkick Opcode"); analyzeVIreg1(Fs); analyzeXGkick1(); analyzeXGkick2(xCycles); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index b7699d1db2..ad468e62f7 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -207,9 +207,9 @@ microVUt(void) mVUincCycles(int x) { microVUt(void) mVUsetCycles() { microVU* mVU = mVUx; incCycles(mVUstall); - if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg - mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP - mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector + if (mVUregsTemp.VFreg[0] == mVUregsTemp.VFreg[1] && mVUregsTemp.VFreg[0]) { // If upper Op && lower Op write to same VF reg + mVUinfo |= (mVUregsTemp.r || mVUregsTemp.VI) ? _noWriteVF : _isNOP; // If lower Op doesn't modify anything else, then make it a NOP + mVUregsTemp.VF[1].x = aMax(mVUregsTemp.VF[0].x, mVUregsTemp.VF[1].x); // Use max cycles from each vector mVUregsTemp.VF[1].y = aMax(mVUregsTemp.VF[0].y, mVUregsTemp.VF[1].y); mVUregsTemp.VF[1].z = aMax(mVUregsTemp.VF[0].z, mVUregsTemp.VF[1].z); mVUregsTemp.VF[1].w = aMax(mVUregsTemp.VF[0].w, mVUregsTemp.VF[1].w); @@ -251,14 +251,14 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState); if (pBlock) { return pBlock->x86ptrStart; } - mVUlog("mVUcompile First Pass"); + //mVUlog("mVUcompile First Pass"); // First Pass iPC = startPC / 4; mVUbranch = 0; mVUstartPC = iPC; mVUcount = 0; - mVUcycles = 1; // Skips "M" phase, and starts counting cycles at "T" stage + mVUcycles = 0; // Skips "M" phase, and starts counting cycles at "T" stage mVU->p = 0; // All blocks start at p index #0 mVU->q = 0; // All blocks start at q index #0 memcpy_fast(&mVUregs, (microRegInfo*)pState, sizeof(microRegInfo)); // Loads up Pipeline State Info @@ -267,6 +267,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { for (int branch = 0;; ) { incPC(1); + incCycles(1); startLoop(); mVUopU(); if (curI & _Ebit_) { branch = 1; } @@ -281,32 +282,28 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { else if (branch == 1) { branch = 2; } if (mVUbranch) { branch = 3; mVUbranch = 0; mVUinfo |= _isBranch; } incPC(1); - incCycles(1); mVUcount++; } - mVUlog("mVUcompile mVUsetFlags"); + //mVUlog("mVUcompile mVUsetFlags"); // Sets Up Flag instances int bStatus[4]; int bMac[4]; mVUsetFlags(bStatus, bMac); - mVUlog("mVUcompile Second Pass"); + //mVUlog("mVUcompile Second Pass"); //write8(0xcc); // Second Pass iPC = mVUstartPC; mVUbranch = 0; - int test = 0; - for (bool x = 1; x; ) { - if (isEOB) { x = 0; } + int x; + for (x = 0; x < (vuIndex ? (0x3fff/8) : (0xfff/8)); x++) { + if (isEOB) { x = 0xffff; } if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); mVU->iReg = curI; incPC(-1); } } else if (!swapOps) { incPC(1); doUpperOp(); incPC(-1); mVUopL(); incPC(1); } else { mVUopL(); incPC(1); doUpperOp(); } - - test++; - if (test > 0x3ff) { mVUlog("microVU: Possible infinite compiling loop!"); x = 0; test = 0; } if (!isBdelay) { incPC(1); } else { @@ -371,6 +368,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { } } mVUlog("mVUcompile ebit"); + if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { mVUlog("microVU: Possible infinite compiling loop!"); } // Do E-bit end stuff here incCycles(55); // Ensures Valid P/Q instances diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 1fdf4c7929..45998cc3e7 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -157,7 +157,7 @@ void __fastcall startVU0(u32 startPC, u32 cycles) { ((mVUrecCall)microVU0.start void __fastcall startVU1(u32 startPC, u32 cycles) { ((mVUrecCall)microVU1.startFunct)(startPC, cycles); } void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles) { return mVUexecute<0>(startPC, cycles); } void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles) { return mVUexecute<1>(startPC, cycles); } -void __fastcall mVUcleanUpVU0() { mVUcleanUp<0>(); } -void __fastcall mVUcleanUpVU1() { mVUcleanUp<1>(); } +void __fastcall mVUcleanUpVU0() { mVUcleanUp<0>(); } +void __fastcall mVUcleanUpVU1() { mVUcleanUp<1>(); } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index ef63010029..7c00a6b7a6 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -563,6 +563,7 @@ microVUf(void) mVU_IADD() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { + mVUlog("IADD"); mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { mVUallocVIa(gprT2, _Ft_); @@ -577,6 +578,7 @@ microVUf(void) mVU_IADDI() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { + mVUlog("IADDI"); mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm5_); mVUallocVIb(gprT1, _Ft_); @@ -587,8 +589,9 @@ microVUf(void) mVU_IADDIU() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { + mVUlog("IADDIU"); mVUallocVIa(gprT1, _Fs_); - ADD16ItoR(gprT1, _Imm12_); + ADD16ItoR(gprT1, _Imm15_); mVUallocVIb(gprT1, _Ft_); } } @@ -597,6 +600,7 @@ microVUf(void) mVU_IAND() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { + mVUlog("IAND"); mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { mVUallocVIa(gprT2, _Ft_); @@ -610,6 +614,7 @@ microVUf(void) mVU_IOR() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { + mVUlog("IOR"); mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { mVUallocVIa(gprT2, _Ft_); @@ -623,6 +628,7 @@ microVUf(void) mVU_ISUB() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } else { + mVUlog("ISUB"); if (_Ft_ != _Fs_) { mVUallocVIa(gprT1, _Fs_); mVUallocVIa(gprT2, _Ft_); @@ -640,8 +646,9 @@ microVUf(void) mVU_ISUBIU() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } else { + mVUlog("ISUBIU"); mVUallocVIa(gprT1, _Fs_); - SUB16ItoR(gprT1, _Imm12_); + SUB16ItoR(gprT1, _Imm15_); mVUallocVIb(gprT1, _Ft_); } } @@ -654,6 +661,7 @@ microVUf(void) mVU_MFIR() { microVU* mVU = mVUx; if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeReg2(_Ft_); } else { + mVUlog("MFIR"); mVUallocVIa(gprT1, _Fs_); MOVSX32R16toR(gprT1, gprT1); SSE2_MOVD_R_to_XMM(xmmT1, gprT1); @@ -666,6 +674,7 @@ microVUf(void) mVU_MFP() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeMFP(_Ft_); } else { + mVUlog("MFP"); getPreg(xmmFt); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } @@ -675,6 +684,7 @@ microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; if (!recPass) { if (!_Ft_ || (_Ft_ == _Fs_)) { mVUinfo |= _isNOP; } analyzeReg1(_Fs_); analyzeReg2(_Ft_); } else { + mVUlog("MOVE"); mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } @@ -684,6 +694,7 @@ microVUf(void) mVU_MR32() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeMR32(_Fs_, _Ft_); } else { + mVUlog("MR32"); mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15); if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); } mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); @@ -694,6 +705,7 @@ microVUf(void) mVU_MTIR() { microVU* mVU = mVUx; if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_Ft_, 1); } else { + mVUlog("MTIR"); MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); mVUallocVIb(gprT1, _Ft_); } @@ -707,6 +719,7 @@ microVUf(void) mVU_ILW() { microVU* mVU = mVUx; if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } else { + mVUlog("ILW"); if (!_Fs_) { MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS ); mVUallocVIb(gprT1, _Ft_); @@ -726,6 +739,7 @@ microVUf(void) mVU_ILWR() { microVU* mVU = mVUx; if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } else { + mVUlog("ILWR"); if (!_Fs_) { MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS); mVUallocVIb(gprT1, _Ft_); @@ -748,6 +762,7 @@ microVUf(void) mVU_ISW() { microVU* mVU = mVUx; if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } else { + mVUlog("ISW"); if (!_Fs_) { int imm = getVUmem(_Imm11_); mVUallocVIa(gprT1, _Ft_); @@ -773,6 +788,7 @@ microVUf(void) mVU_ISWR() { microVU* mVU = mVUx; if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } else { + mVUlog("ISWR"); if (!_Fs_) { mVUallocVIa(gprT1, _Ft_); if (_X) MOV32RtoM((uptr)mVU->regs->Mem, gprT1); @@ -801,10 +817,12 @@ microVUf(void) mVU_LQ() { if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 0); } else { if (!_Fs_) { + mVUlog("LQ1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } else { + mVUlog("LQ2"); mVUallocVIa(gprT1, _Fs_); ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); @@ -819,13 +837,15 @@ microVUf(void) mVU_LQD() { if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } else { if (!_Fs_ && !noWriteVF) { + mVUlog("LQD1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } else { + mVUlog("LQD2"); mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, 1); - mVUallocVIb(gprT1, _Fs_); // ToDo: Backup to memory check. + mVUallocVIb(gprT1, _Fs_); if (!noWriteVF) { mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); @@ -840,11 +860,13 @@ microVUf(void) mVU_LQI() { if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } else { if (!_Fs_ && !noWriteVF) { + mVUlog("LQI1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } else { - mVUallocVIa((_Ft_) ? gprT1 : gprT2, _Fs_); + mVUlog("LQI2"); + mVUallocVIa((!noWriteVF) ? gprT1 : gprT2, _Fs_); if (!noWriteVF) { MOV32RtoR(gprT2, gprT1); mVUaddrFix(gprT1); @@ -852,7 +874,7 @@ microVUf(void) mVU_LQI() { mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); } ADD16ItoR(gprT2, 1); - mVUallocVIb(gprT2, _Fs_); // ToDo: Backup to memory check. + mVUallocVIb(gprT2, _Fs_); } } } @@ -890,7 +912,7 @@ microVUf(void) mVU_SQD() { else { mVUallocVIa(gprT1, _Ft_); SUB16ItoR(gprT1, 1); - mVUallocVIb(gprT1, _Ft_); // ToDo: Backup to memory check. + mVUallocVIb(gprT1, _Ft_); mVUaddrFix(gprT1); getReg7(xmmFs, _Fs_); mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index ab084d824b..94582273f9 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -87,7 +87,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC1a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 1); \ + mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 1); \ mVUallocFMAC1b(Fd); \ } \ } @@ -100,7 +100,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC3a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 1); \ + mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 1); \ mVUallocFMAC3b(Fd); \ } \ } @@ -113,7 +113,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC4a(ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC4b(ACC, Fs); \ } \ } @@ -126,7 +126,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC5a(ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC5b(ACC, Fs); \ } \ } @@ -139,7 +139,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC6a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 1); \ + mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 1); \ mVUallocFMAC6b(Fd); \ } \ } @@ -152,7 +152,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC7a(ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC7b(ACC, Fs); \ } \ } @@ -171,7 +171,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, ACC); \ } \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC8b(Fd); \ } \ } @@ -190,7 +190,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ } \ - mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC9b(Fd); \ } \ } @@ -209,7 +209,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, ACC); \ } \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC10b(Fd); \ } \ } @@ -228,7 +228,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ } \ - mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC11b(Fd); \ } \ } @@ -247,7 +247,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, ACC); \ } \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC12b(Fd); \ } \ } @@ -266,7 +266,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ } \ - mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC13b(Fd); \ } \ } @@ -285,7 +285,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACCr, Fs); \ } \ - mVUupdateFlags(ACCr, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(ACCr, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC14b(ACCw, ACCr); \ } \ } @@ -304,7 +304,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACCr, Fs); \ } \ - mVUupdateFlags(ACCr, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(ACCr, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC15b(ACCw, ACCr); \ } \ } @@ -323,7 +323,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACCr, Fs); \ } \ - mVUupdateFlags(ACCr, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(ACCr, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC16b(ACCw, ACCr); \ } \ } @@ -335,7 +335,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX int ACC, Fs, Ft; \ mVUallocFMAC18a(ACC, Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC18b(ACC, Fs); \ } \ } @@ -348,7 +348,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC19a(Fd, ACC, Fs, Ft); \ SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ - mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC19b(Fd); \ } \ } @@ -361,7 +361,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC22a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 1); \ + mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 1); \ mVUallocFMAC22b(Fd); \ } \ } @@ -374,7 +374,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUallocFMAC23a(ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ - mVUupdateFlags(Fs, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC23b(ACC, Fs); \ } \ } @@ -393,7 +393,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, ACC); \ } \ - mVUupdateFlags(Fd, xmmT1, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC24b(Fd); \ } \ } @@ -412,7 +412,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACC, Fs); \ } \ - mVUupdateFlags(Fd, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC25b(Fd); \ } \ } @@ -431,7 +431,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MULPS_XMM_to_XMM(Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(ACCr, Fs); \ } \ - mVUupdateFlags(ACCr, Fs, Ft, _X_Y_Z_W, 0); \ + mVUupdateFlags(ACCr, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC26b(ACCw, ACCr); \ } \ } From 1d2ae02c97137763a57a60809fe3c660c15c7075 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Sun, 19 Apr 2009 03:33:32 +0000 Subject: [PATCH 097/143] LilyPad: More minor fixes to failure detection in raw input initialization code. Gabest already fixed the big one. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1013 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/LilyPad.cpp | 13 ++++- plugins/LilyPad/RawInput.cpp | 106 +++++++++++++++++++---------------- 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/plugins/LilyPad/LilyPad.cpp b/plugins/LilyPad/LilyPad.cpp index 067ed73fb2..e366764e9a 100644 --- a/plugins/LilyPad/LilyPad.cpp +++ b/plugins/LilyPad/LilyPad.cpp @@ -105,12 +105,14 @@ struct Stick { int vert; }; +// Sum of states of all controls for a pad (Not including toggles). struct ButtonSum { int buttons[12]; Stick sticks[3]; }; -// Freeze data, for a single pad. +// Freeze data, for a single pad. Basically has all pad state that +// a PS2 can set. struct PadFreezeData { // Digital / Analog / DS2 Native u8 mode; @@ -129,9 +131,14 @@ struct PadFreezeData { class Pad : public PadFreezeData { public: - ButtonSum sum, lockedSum; + // Current pad state. + ButtonSum sum; + // State of locked buttons. Already included by sum, used + // as initial value of sum. + ButtonSum lockedSum; - int lockedState; + // Flags for which controls (buttons or axes) are locked, if any. + DWORD lockedState; // Last vibration value. Only used so as not to call vibration // functions when old and new values are both 0. diff --git a/plugins/LilyPad/RawInput.cpp b/plugins/LilyPad/RawInput.cpp index 5593ad9761..8137ea2974 100644 --- a/plugins/LilyPad/RawInput.cpp +++ b/plugins/LilyPad/RawInput.cpp @@ -249,64 +249,74 @@ int InitializeRawInput() { } void EnumRawInputDevices() { - UINT count = 0; - if (InitializeRawInput() && pGetRawInputDeviceList(0, &count, sizeof(RAWINPUTDEVICELIST)) != (UINT)-1) { + int count = 0; + if (InitializeRawInput() && pGetRawInputDeviceList(0, (unsigned int*)&count, sizeof(RAWINPUTDEVICELIST)) && count > 0) { wchar_t *instanceID = (wchar_t *) malloc(41000*sizeof(wchar_t)); wchar_t *keyName = instanceID + 11000; wchar_t *displayName = keyName + 10000; wchar_t *productID = displayName + 10000; + + RAWINPUTDEVICELIST *list = (RAWINPUTDEVICELIST*) malloc(sizeof(RAWINPUTDEVICELIST) * count); int keyboardCount = 1; int mouseCount = 1; - if (count) { - RAWINPUTDEVICELIST *list = (RAWINPUTDEVICELIST*) malloc(sizeof(RAWINPUTDEVICELIST) * count); - if (list && pGetRawInputDeviceList(list, &count, sizeof(RAWINPUTDEVICELIST))) { - for (UINT i=0; i 0 && - nameLen >= 3) { - wcscpy(productID, instanceID); - wchar_t *temp = 0; - for (int j=0; j<3; j++) { - wchar_t *s = wcschr(productID, '#'); - if (!s) break; - *s = '\\'; - if (j==2) { - *s = 0; - } - if (j==1) temp = s; - } - wsprintfW(keyName, L"SYSTEM\\CurrentControlSet\\Enum%s", productID+3); - if (temp) *temp = 0; - displayName[0] = 0; - HKEY hKey; - if (ERROR_SUCCESS == RegOpenKeyExW(HKEY_LOCAL_MACHINE, keyName, 0, KEY_QUERY_VALUE, &hKey)) { - DWORD type; - DWORD len = 10000 * sizeof(wchar_t); - if (ERROR_SUCCESS == RegQueryValueExW(hKey, L"DeviceDesc", 0, &type, (BYTE*)displayName, &len) && - len && type == REG_SZ) { - wchar_t *temp2 = wcsrchr(displayName, ';'); - if (!temp2) temp2 = displayName; - else temp2++; - // Could do without this, but more effort than it's worth. - wcscpy(keyName, temp2); - } - RegCloseKey(hKey); - } - if (list[i].dwType == RIM_TYPEKEYBOARD) { - if (!displayName[0]) wsprintfW(displayName, L"Raw Keyboard %i", keyboardCount++); - else wsprintfW(displayName, L"Raw KB: %s", keyName); - dm->AddDevice(new RawInputKeyboard(list[i].hDevice, displayName, instanceID)); - } - else if (list[i].dwType == RIM_TYPEMOUSE) { - if (!displayName[0]) wsprintfW(displayName, L"Raw Mouse %i", mouseCount++); - else wsprintfW(displayName, L"Raw MS: %s", keyName); - dm->AddDevice(new RawInputMouse(list[i].hDevice, displayName, instanceID, productID)); - } + count = pGetRawInputDeviceList(list, (unsigned int*)&count, sizeof(RAWINPUTDEVICELIST)); + + // Not necessary, but reminder that count is -1 on failure. + if (count > 0) { + for (int i=0; i= 4) { + // nameLen includes terminating null. + nameLen--; + + // Strip out GUID parts of instanceID to make it a generic product id, + // and reformat it to point to registry entry containing device description. + wcscpy(productID, instanceID); + wchar_t *temp = 0; + for (int j=0; j<3; j++) { + wchar_t *s = wcschr(productID, '#'); + if (!s) break; + *s = '\\'; + if (j==2) { + *s = 0; + } + if (j==1) temp = s; + } + + wsprintfW(keyName, L"SYSTEM\\CurrentControlSet\\Enum%s", productID+3); + if (temp) *temp = 0; + displayName[0] = 0; + HKEY hKey; + if (ERROR_SUCCESS == RegOpenKeyExW(HKEY_LOCAL_MACHINE, keyName, 0, KEY_QUERY_VALUE, &hKey)) { + DWORD type; + DWORD len = 10000 * sizeof(wchar_t); + if (ERROR_SUCCESS == RegQueryValueExW(hKey, L"DeviceDesc", 0, &type, (BYTE*)displayName, &len) && + len && type == REG_SZ) { + wchar_t *temp2 = wcsrchr(displayName, ';'); + if (!temp2) temp2 = displayName; + else temp2++; + // Could do without this, but more effort than it's worth. + wcscpy(keyName, temp2); + } + RegCloseKey(hKey); + } + if (list[i].dwType == RIM_TYPEKEYBOARD) { + if (!displayName[0]) wsprintfW(displayName, L"Raw Keyboard %i", keyboardCount++); + else wsprintfW(displayName, L"Raw KB: %s", keyName); + dm->AddDevice(new RawInputKeyboard(list[i].hDevice, displayName, instanceID)); + } + else if (list[i].dwType == RIM_TYPEMOUSE) { + if (!displayName[0]) wsprintfW(displayName, L"Raw Mouse %i", mouseCount++); + else wsprintfW(displayName, L"Raw MS: %s", keyName); + dm->AddDevice(new RawInputMouse(list[i].hDevice, displayName, instanceID, productID)); } } - free(list); } } + free(list); free(instanceID); dm->AddDevice(new RawInputKeyboard(0, L"Simulated Keyboard")); dm->AddDevice(new RawInputMouse(0, L"Simulated Mouse")); From 5fb6505e9d7ec121e088ee4e3beb3159260ec5a5 Mon Sep 17 00:00:00 2001 From: mattmenke Date: Sun, 19 Apr 2009 03:50:04 +0000 Subject: [PATCH 098/143] LilyPad: Oops. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1014 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/RawInput.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/LilyPad/RawInput.cpp b/plugins/LilyPad/RawInput.cpp index 8137ea2974..040594cc84 100644 --- a/plugins/LilyPad/RawInput.cpp +++ b/plugins/LilyPad/RawInput.cpp @@ -250,7 +250,7 @@ int InitializeRawInput() { void EnumRawInputDevices() { int count = 0; - if (InitializeRawInput() && pGetRawInputDeviceList(0, (unsigned int*)&count, sizeof(RAWINPUTDEVICELIST)) && count > 0) { + if (InitializeRawInput() && pGetRawInputDeviceList(0, (unsigned int*)&count, sizeof(RAWINPUTDEVICELIST)) != (UINT)-1 && count > 0) { wchar_t *instanceID = (wchar_t *) malloc(41000*sizeof(wchar_t)); wchar_t *keyName = instanceID + 11000; wchar_t *displayName = keyName + 10000; From c19de231f6ed98121745d38aa79e09592ff68e60 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sun, 19 Apr 2009 05:24:20 +0000 Subject: [PATCH 099/143] Emitter: Implemented MOVAPS/MOVDQA, and renamed some vars/classes. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1015 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUmicroLower.cpp | 2 +- pcsx2/x86/ix86-32/recVTLB.cpp | 4 +- pcsx2/x86/ix86/implement/xmm/movqss.h | 117 ++++++++------- pcsx2/x86/ix86/ix86.cpp | 202 ++++++++++++++++---------- pcsx2/x86/ix86/ix86_inlines.inl | 88 +---------- pcsx2/x86/ix86/ix86_instructions.h | 75 ++++++++-- pcsx2/x86/ix86/ix86_legacy.cpp | 30 ++-- pcsx2/x86/ix86/ix86_legacy_mmx.cpp | 8 +- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 44 +++--- pcsx2/x86/ix86/ix86_types.h | 66 +++++---- 10 files changed, 344 insertions(+), 292 deletions(-) diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index a54cefe86a..0a7a73117a 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -802,7 +802,7 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) // (this is one of my test cases for the new emitter --air) using namespace x86Emitter; - x86IndexReg thisreg( x86reg ); + iAddressReg thisreg( x86reg ); if ( _X ) iMOV(ptr32[thisreg+offset], 0x00000000); if ( _Y ) iMOV(ptr32[thisreg+offset+4], 0x00000000); diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 99ac820221..a518b04388 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -34,8 +34,8 @@ void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) { // (this is one of my test cases for the new emitter --air) - x86IndexReg src( srcRm ); - x86IndexReg dest( destRm ); + iAddressReg src( srcRm ); + iAddressReg dest( destRm ); iMOV( eax, ptr[src] ); iMOV( ptr[dest], eax ); diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 22855aa80f..7fe6d23a29 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -18,22 +18,13 @@ #pragma once -// This helper function is used for instructions which enter XMM form when the 0x66 prefix -// is specified (indicating alternate operand type selection). -template< typename OperandType > -static __forceinline void preXMM( u8 opcode ) -{ - if( sizeof( OperandType ) == 16 ) - iWrite( 0x0f66 ); - else - iWrite( 0x0f ); - iWrite( opcode ); -} +////////////////////////////////////////////////////////////////////////////////////////// +// MMX / SSE Helper Functions! -// prefix - 0 indicates MMX, anything assumes XMM. -static __forceinline void SimdPrefix( u8 opcode, u8 prefix=0 ) +template< typename T > +__emitinline void SimdPrefix( u8 opcode, u8 prefix ) { - if( prefix != 0 ) + if( sizeof( T ) == 16 && prefix != 0 ) { iWrite( 0x0f00 | prefix ); iWrite( opcode ); @@ -43,67 +34,81 @@ static __forceinline void SimdPrefix( u8 opcode, u8 prefix=0 ) } template< u8 prefix, typename T, typename T2 > -static __forceinline void writeXMMop( const iRegister& to, const iRegister& from, u8 opcode ) +__emitinline void writeXMMop( u8 opcode, const iRegister& to, const iRegister& from ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( opcode, prefix ); ModRM_Direct( to.Id, from.Id ); } template< u8 prefix, typename T > -static __noinline void writeXMMop( const iRegister& reg, const ModSibBase& sib, u8 opcode ) +void writeXMMop( u8 opcode, const iRegister& reg, const ModSibBase& sib ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( opcode, prefix ); EmitSibMagic( reg.Id, sib ); } template< u8 prefix, typename T > -static __forceinline void writeXMMop( const iRegister& reg, const void* data, u8 opcode ) +__emitinline void writeXMMop( u8 opcode, const iRegister& reg, const void* data ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( opcode, prefix ); iWriteDisp( reg.Id, data ); } -// ------------------------------------------------------------------------ -// MOVD has valid forms for MMX and XMM registers. +////////////////////////////////////////////////////////////////////////////////////////// // -template< typename T > -static __forceinline void iMOVDZX( const iRegisterSIMD& to, const iRegister32& from ) +template< u8 Prefix, typename OperandType > +class MovapsImpl { - preXMM( 0x6e ); - ModRM_Direct( to.Id, from.Id ); -} +public: + // ------------------------------------------------------------------------ + static __emitinline void Emit( u8 opcode, const iRegisterSIMD& to, const iRegisterSIMD from ) + { + if( to != from ) + writeXMMop( opcode, to, from ); + } -template< typename T> -static __forceinline void iMOVDZX( const iRegisterSIMD& to, const void* src ) -{ - preXMM( 0x6e ); - iWriteDisp( to.Id, src ); -} + // ------------------------------------------------------------------------ + static __emitinline void Emit( u8 opcode, const iRegisterSIMD& to, const void* from ) + { + writeXMMop( opcode, to, from ); + } -template< typename T> -static __forceinline void iMOVDZX( const iRegisterSIMD& to, const ModSibBase& src ) -{ - preXMM( 0x6e ); - EmitSibMagic( to.Id, src ); -} + // ------------------------------------------------------------------------ + static __emitinline void Emit( u8 opcode, const iRegisterSIMD& to, const ModSibBase& from ) + { + writeXMMop( opcode, to, from ); + } -template< typename T> -static __emitinline void iMOVD( const iRegister32& to, const iRegisterSIMD& from ) -{ - preXMM( 0x7e ); - ModRM_Direct( from.Id, to.Id ); -} + // ------------------------------------------------------------------------ + // Generally a Movaps/dqa instruction form only. + // Most SSE/MMX instructions don't have this form. + static __emitinline void Emit( u8 opcode, const void* to, const iRegisterSIMD& from ) + { + writeXMMop( opcode, from, to ); + } -template< typename T> -static __forceinline void iMOVD( void* dest, const iRegisterSIMD& from ) -{ - preXMM( 0x7e ); - iWriteDisp( from.Id, dest ); -} + // ------------------------------------------------------------------------ + // Generally a Movaps/dqa instruction form only. + // Most SSE/MMX instructions don't have this form. + static __emitinline void Emit( u8 opcode, const ModSibBase& to, const iRegisterSIMD& from ) + { + writeXMMop( opcode, from, to ); + } -template< typename T> -static __noinline void iMOVD( const ModSibBase& dest, const iRegisterSIMD& from ) +}; + +// ------------------------------------------------------------------------ +template< u8 Prefix, u8 Opcode, u8 OpcodeAlt > +class MovapsImplAll { - preXMM( 0x7e ); - EmitSibMagic( from.Id, dest ); -} +protected: + typedef MovapsImpl m_128; + +public: + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { m_128::Emit( Opcode, to, from ); } + __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { m_128::Emit( Opcode, to, from ); } + __forceinline void operator()( const void* to, const iRegisterSSE& from ) const { m_128::Emit( OpcodeAlt, to, from ); } + __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { m_128::Emit( Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const iRegisterSSE& from ) const { m_128::Emit( OpcodeAlt, to, from ); } +}; + diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 31f414d50c..aa85482572 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -67,6 +67,8 @@ __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; namespace x86Emitter { const iAddressIndexerBase ptr; +const iAddressIndexer ptr128; +const iAddressIndexer ptr64; const iAddressIndexer ptr32; const iAddressIndexer ptr16; const iAddressIndexer ptr8; @@ -74,7 +76,19 @@ const iAddressIndexer ptr8; // ------------------------------------------------------------------------ template< typename OperandType > const iRegister iRegister::Empty; -const x86IndexReg x86IndexReg::Empty; +const iAddressReg iAddressReg::Empty; + +const iRegisterSSE + xmm0( 0 ), xmm1( 1 ), + xmm2( 2 ), xmm3( 3 ), + xmm4( 4 ), xmm5( 5 ), + xmm6( 6 ), xmm7( 7 ); + +const iRegisterMMX + mm0( 0 ), mm1( 1 ), + mm2( 2 ), mm3( 3 ), + mm4( 4 ), mm5( 5 ), + mm6( 6 ), mm7( 7 ); const iRegister32 eax( 0 ), ebx( 3 ), @@ -378,6 +392,86 @@ __emitinline void iAdvancePtr( uint bytes ) x86Ptr += bytes; } +// ------------------------------------------------------------------------ +// Generates a 'reduced' ModSib form, which has valid Base, Index, and Scale values. +// Necessary because by default ModSib compounds registers into Index when possible. +// +// If the ModSib is in illegal form ([Base + Index*5] for example) then an assertion +// followed by an InvalidParameter Exception will be tossed around in haphazard +// fashion. +// +// Optimization Note: Currently VC does a piss poor job of inlining this, even though +// constant propagation *should* resove it to little or no code (VC's constprop fails +// on C++ class initializers). There is a work around [using array initializers instead] +// but it's too much trouble for code that isn't performance critical anyway. +// And, with luck, maybe VC10 will optimize it better and make it a non-issue. :D +// +void ModSibBase::Reduce() +{ + if( Index.IsStackPointer() ) + { + // esp cannot be encoded as the index, so move it to the Base, if possible. + // note: intentionally leave index assigned to esp also (generates correct + // encoding later, since ESP cannot be encoded 'alone') + + jASSUME( Scale == 0 ); // esp can't have an index modifier! + jASSUME( Base.IsEmpty() ); // base must be empty or else! + + Base = Index; + return; + } + + // If no index reg, then load the base register into the index slot. + if( Index.IsEmpty() ) + { + Index = Base; + Scale = 0; + if( !Base.IsStackPointer() ) // prevent ESP from being encoded 'alone' + Base = iAddressReg::Empty; + return; + } + + + // The Scale has a series of valid forms, all shown here: + + switch( Scale ) + { + case 0: break; + case 1: Scale = 0; break; + case 2: Scale = 1; break; + + case 3: // becomes [reg*2+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 1; + break; + + case 4: Scale = 2; break; + + case 5: // becomes [reg*4+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 2; + break; + + case 6: // invalid! + assert( false ); + break; + + case 7: // so invalid! + assert( false ); + break; + + case 8: Scale = 3; break; + case 9: // becomes [reg*8+reg] + jASSUME( Base.IsEmpty() ); + Base = Index; + Scale = 3; + break; + } +} + + // ------------------------------------------------------------------------ // Internal implementation of EmitSibMagic which has been custom tailored // to optimize special forms of the Lea instructions accordingly, such @@ -641,99 +735,57 @@ __emitinline void iBSWAP( const iRegister32& to ) // MMX / XMM Instructions // (these will get put in their own file later) -__emitinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) -{ - writeXMMop<0>( to, from, 0x6f ); -} +const MovapsImplAll< 0, 0x28, 0x29 > iMOVAPS; +const MovapsImplAll< 0, 0x10, 0x11 > iMOVUPS; +const MovapsImplAll< 0x66, 0x28, 0x29 > iMOVAPD; +const MovapsImplAll< 0x66, 0x10, 0x11 > iMOVUPD; -__noinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) -{ - writeXMMop<0>( to, src, 0x6f ); -} - -__emitinline void iMOVQ( const iRegisterMMX& to, const void* src ) -{ - writeXMMop<0>( to, src, 0x6f ); -} +const MovapsImplAll< 0x66, 0x6f, 0x7f > iMOVDQA; +const MovapsImplAll< 0xf3, 0x6f, 0x7f > iMOVDQU; // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__emitinline void iMOVQZX( const iRegisterXMM& to, const iRegisterXMM& from ) -{ - writeXMMop<0xf3>( to, from, 0x7e ); -} +__emitinline void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop<0xf3>( 0x7e, to, from ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__noinline void iMOVQZX( const iRegisterXMM& to, const ModSibBase& src ) -{ - writeXMMop<0xf3>( to, src, 0x7e ); -} +__noinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop<0xf3>( 0x7e, to, src ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__emitinline void iMOVQZX( const iRegisterXMM& to, const void* src ) -{ - writeXMMop<0xf3>( to, src, 0x7e ); -} +__emitinline void iMOVQZX( const iRegisterSSE& to, const void* src ) { writeXMMop<0xf3>( 0x7e, to, src ); } -__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) +__emitinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop<0>( 0x6f, to, from ); } +__noinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop<0>( 0x6f, to, src ); } +__emitinline void iMOVQ( const iRegisterMMX& to, const void* src ) { writeXMMop<0>( 0x6f, to, src ); } +__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) { writeXMMop<0>( 0x7f, from, dest ); } +__forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) { writeXMMop<0>( 0x7f, from, dest ); } +__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop<0xf3>( 0x7e, from, dest ); } +__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop<0xf3>( 0x7e, from, dest ); } +__forceinline void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ) { writeXMMop<0xf3>( 0xd6, to, from ); } +__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ) { - writeXMMop<0>( from, dest, 0x7f ); -} + // Manual implementation of this form of MOVQ, since its parameters are unique in a way + // that breaks the template inference of writeXMMop(); -__forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) -{ - writeXMMop<0>( from, dest, 0x7f ); -} - -__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterXMM& from ) -{ - writeXMMop<0xf3>( from, dest, 0x7e ); -} - -__forceinline void iMOVQ( void* dest, const iRegisterXMM& from ) -{ - writeXMMop<0xf3>( from, dest, 0x7e ); -} - -__forceinline void iMOVQ( const iRegisterXMM& to, const iRegisterMMX& from ) -{ - writeXMMop<0xf3>( to, from, 0xd6 ); -} - -__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterXMM& from ) -{ - writeXMMop<0xf2>( to, from, 0xd6 ); + SimdPrefix( 0xd6, 0xf2 ); + ModRM_Direct( to.Id, from.Id ); } ////////////////////////////////////////////////////////////////////////////////////////// // -__forceinline void iMOVSS( const iRegisterXMM& to, const iRegisterXMM& from ) -{ - if( to != from ) - writeXMMop<0xf3>( to, from, 0x10 ); -} +#define IMPLEMENT_iMOVS( ssd, prefix ) \ + __forceinline void iMOV##ssd( const iRegisterSSE& to, const iRegisterSSE& from ) { if( to != from ) writeXMMop( 0x10, to, from ); } \ + __forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const void* from ) { writeXMMop( 0x10, to, from ); } \ + __forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const ModSibBase& from ) { writeXMMop( 0x10, to, from ); } \ + __forceinline void iMOV##ssd( const void* to, const iRegisterSSE& from ) { writeXMMop( 0x11, from, to ); } \ + __forceinline void iMOV##ssd( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x11, from, to ); } -__forceinline void iMOVSSZX( const iRegisterXMM& to, const void* from ) -{ - writeXMMop<0xf3>( to, from, 0x10 ); -} +IMPLEMENT_iMOVS( SS, 0xf3 ) +IMPLEMENT_iMOVS( SD, 0xf2 ) -__forceinline void iMOVSSZX( const iRegisterXMM& to, const ModSibBase& from ) -{ - writeXMMop<0xf3>( to, from, 0x10 ); -} - -__forceinline void iMOVSS( const void* to, const iRegisterXMM& from ) -{ - writeXMMop<0xf3>( from, to, 0x11 ); -} - -__forceinline void iMOVSS( const ModSibBase& to, const iRegisterXMM& from ) -{ - writeXMMop<0xf3>( from, to, 0x11 ); -} +////////////////////////////////////////////////////////////////////////////////////////// +// } diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl index ebe2341c21..c9af4a8e04 100644 --- a/pcsx2/x86/ix86/ix86_inlines.inl +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -53,27 +53,27 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // x86Register Method Implementations // - __forceinline iAddressInfo x86IndexReg::operator+( const x86IndexReg& right ) const + __forceinline iAddressInfo iAddressReg::operator+( const iAddressReg& right ) const { return iAddressInfo( *this, right ); } - __forceinline iAddressInfo x86IndexReg::operator+( const iAddressInfo& right ) const + __forceinline iAddressInfo iAddressReg::operator+( const iAddressInfo& right ) const { return right + *this; } - __forceinline iAddressInfo x86IndexReg::operator+( s32 right ) const + __forceinline iAddressInfo iAddressReg::operator+( s32 right ) const { return iAddressInfo( *this, right ); } - __forceinline iAddressInfo x86IndexReg::operator*( u32 right ) const + __forceinline iAddressInfo iAddressReg::operator*( u32 right ) const { return iAddressInfo( Empty, *this, right ); } - __forceinline iAddressInfo x86IndexReg::operator<<( u32 shift ) const + __forceinline iAddressInfo iAddressReg::operator<<( u32 shift ) const { return iAddressInfo( Empty, *this, 1< + __emitinline void iMOVDZX( const iRegisterSIMD& to, const iRegister32& from ) + { + Internal::writeXMMop<0x66>( 0x6e, to, from ); + } + template< typename T > + __emitinline void iMOVDZX( const iRegisterSIMD& to, const void* src ) + { + Internal::writeXMMop<0x66>( 0x6e, to, src ); + } + + template< typename T > + void iMOVDZX( const iRegisterSIMD& to, const ModSibBase& src ) + { + Internal::writeXMMop<0x66>( 0x6e, to, src ); + } + + template< typename T > + __emitinline void iMOVD( const iRegister32& to, const iRegisterSIMD& from ) + { + Internal::writeXMMop<0x66>( 0x7e, from, to ); + } + + template< typename T > + __emitinline void iMOVD( void* dest, const iRegisterSIMD& from ) + { + Internal::writeXMMop<0x66>( 0x7e, from, dest ); + } + + template< typename T > + void iMOVD( const ModSibBase& dest, const iRegisterSIMD& from ) + { + Internal::writeXMMop<0x66>( 0x7e, from, dest ); + } + + // ------------------------------------------------------------------------ + + + + // ------------------------------------------------------------------------ + extern void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ); - extern void iMOVQ( const iRegisterMMX& to, const iRegisterXMM& from ); - extern void iMOVQ( const iRegisterXMM& to, const iRegisterMMX& from ); + extern void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ); + extern void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ); - extern void iMOVQ( void* dest, const iRegisterXMM& from ); - extern void iMOVQ( const ModSibBase& dest, const iRegisterXMM& from ); + extern void iMOVQ( void* dest, const iRegisterSSE& from ); + extern void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ); extern void iMOVQ( void* dest, const iRegisterMMX& from ); extern void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ); extern void iMOVQ( const iRegisterMMX& to, const void* src ); extern void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ); - extern void iMOVQZX( const iRegisterXMM& to, const void* src ); - extern void iMOVQZX( const iRegisterXMM& to, const ModSibBase& src ); - extern void iMOVQZX( const iRegisterXMM& to, const iRegisterXMM& from ); + extern void iMOVQZX( const iRegisterSSE& to, const void* src ); + extern void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ); + extern void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ); - extern void iMOVSS( const iRegisterXMM& to, const iRegisterXMM& from ); - extern void iMOVSS( const void* to, const iRegisterXMM& from ); - extern void iMOVSS( const ModSibBase& to, const iRegisterXMM& from ); + extern void iMOVSS( const iRegisterSSE& to, const iRegisterSSE& from ); + extern void iMOVSS( const void* to, const iRegisterSSE& from ); + extern void iMOVSS( const ModSibBase& to, const iRegisterSSE& from ); + extern void iMOVSD( const iRegisterSSE& to, const iRegisterSSE& from ); + extern void iMOVSD( const void* to, const iRegisterSSE& from ); + extern void iMOVSD( const ModSibBase& to, const iRegisterSSE& from ); - extern void iMOVSSZX( const iRegisterXMM& to, const void* from ); - extern void iMOVSSZX( const iRegisterXMM& to, const ModSibBase& from ); + extern void iMOVSSZX( const iRegisterSSE& to, const void* from ); + extern void iMOVSSZX( const iRegisterSSE& to, const ModSibBase& from ); + extern void iMOVSDZX( const iRegisterSSE& to, const void* from ); + extern void iMOVSDZX( const iRegisterSSE& to, const ModSibBase& from ); } diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index d227d47ef2..e3daec9b59 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -48,13 +48,13 @@ static __forceinline ModSibBase _mrmhlp( x86IntRegType src ) template< typename ImmType > static __forceinline ModSibStrict _mhlp( x86IntRegType src ) { - return ModSibStrict( x86IndexReg::Empty, x86IndexReg(src) ); + return ModSibStrict( iAddressReg::Empty, iAddressReg(src) ); } template< typename ImmType > static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86IntRegType src2 ) { - return ModSibStrict( x86IndexReg(src2), x86IndexReg(src1) ); + return ModSibStrict( iAddressReg(src2), iAddressReg(src1) ); } ////////////////////////////////////////////////////////////////////////////////////////// @@ -134,7 +134,7 @@ DEFINE_OPCODE_ONEREG_LEGACY( NEG ) // ------------------------------------------------------------------------ #define DEFINE_LEGACY_MOVEXTEND( form, destbits, srcbits ) \ emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form( iRegister##destbits( to ), iRegister##srcbits( from ) ); } \ - emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form( iRegister##destbits( to ), ptr##srcbits[x86IndexReg( from ) + offset] ); } \ + emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form( iRegister##destbits( to ), ptr##srcbits[iAddressReg( from ) + offset] ); } \ emitterT void MOV##form##destbits##M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form( iRegister##destbits( to ), ptr##srcbits[from] ); } DEFINE_LEGACY_MOVEXTEND( SX, 32, 16 ) @@ -148,32 +148,32 @@ DEFINE_LEGACY_MOVEXTEND( ZX, 16, 8 ) emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { iTEST( iRegister32(to), from ); } emitterT void TEST32ItoM( uptr to, u32 from ) { iTEST( ptr32[to], from ); } emitterT void TEST32RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister32(to), iRegister32(from) ); } -emitterT void TEST32ItoRm( x86IntRegType to, u32 from ) { iTEST( ptr32[x86IndexReg(to)], from ); } +emitterT void TEST32ItoRm( x86IntRegType to, u32 from ) { iTEST( ptr32[iAddressReg(to)], from ); } emitterT void TEST16ItoR( x86IntRegType to, u16 from ) { iTEST( iRegister16(to), from ); } emitterT void TEST16ItoM( uptr to, u16 from ) { iTEST( ptr16[to], from ); } emitterT void TEST16RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister16(to), iRegister16(from) ); } -emitterT void TEST16ItoRm( x86IntRegType to, u16 from ) { iTEST( ptr16[x86IndexReg(to)], from ); } +emitterT void TEST16ItoRm( x86IntRegType to, u16 from ) { iTEST( ptr16[iAddressReg(to)], from ); } emitterT void TEST8ItoR( x86IntRegType to, u8 from ) { iTEST( iRegister8(to), from ); } emitterT void TEST8ItoM( uptr to, u8 from ) { iTEST( ptr8[to], from ); } emitterT void TEST8RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister8(to), iRegister8(from) ); } -emitterT void TEST8ItoRm( x86IntRegType to, u8 from ) { iTEST( ptr8[x86IndexReg(to)], from ); } +emitterT void TEST8ItoRm( x86IntRegType to, u8 from ) { iTEST( ptr8[iAddressReg(to)], from ); } // mov r32 to [r32<_< // - typedef iRegisterSIMD iRegisterXMM; + typedef iRegisterSIMD iRegisterSSE; typedef iRegisterSIMD iRegisterMMX; typedef iRegister iRegister32; typedef iRegister iRegister16; @@ -278,7 +278,7 @@ namespace x86Emitter iRegisterCL(): iRegister8( 1 ) {} }; - extern const iRegisterXMM + extern const iRegisterSSE xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; @@ -302,30 +302,30 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // Use 32 bit registers as out index register (for ModSib memory address calculations) - // Only x86IndexReg provides operators for constructing iAddressInfo types. + // Only iAddressReg provides operators for constructing iAddressInfo types. // - class x86IndexReg : public iRegister32 + class iAddressReg : public iRegister32 { public: - static const x86IndexReg Empty; // defined as an empty/unused value (-1) + static const iAddressReg Empty; // defined as an empty/unused value (-1) public: - x86IndexReg(): iRegister32() {} - x86IndexReg( const x86IndexReg& src ) : iRegister32( src.Id ) {} - x86IndexReg( const iRegister32& src ) : iRegister32( src ) {} - explicit x86IndexReg( int regId ) : iRegister32( regId ) {} + iAddressReg(): iRegister32() {} + iAddressReg( const iAddressReg& src ) : iRegister32( src.Id ) {} + iAddressReg( const iRegister32& src ) : iRegister32( src ) {} + explicit iAddressReg( int regId ) : iRegister32( regId ) {} // Returns true if the register is the stack pointer: ESP. bool IsStackPointer() const { return Id == 4; } - iAddressInfo operator+( const x86IndexReg& right ) const; + iAddressInfo operator+( const iAddressReg& right ) const; iAddressInfo operator+( const iAddressInfo& right ) const; iAddressInfo operator+( s32 right ) const; iAddressInfo operator*( u32 factor ) const; iAddressInfo operator<<( u32 shift ) const; - x86IndexReg& operator=( const iRegister32& src ) + iAddressReg& operator=( const iRegister32& src ) { Id = src.Id; return *this; @@ -337,13 +337,13 @@ namespace x86Emitter class iAddressInfo { public: - x86IndexReg Base; // base register (no scale) - x86IndexReg Index; // index reg gets multiplied by the scale + iAddressReg Base; // base register (no scale) + iAddressReg Index; // index reg gets multiplied by the scale int Factor; // scale applied to the index register, in factor form (not a shift!) s32 Displacement; // address displacement public: - __forceinline iAddressInfo( const x86IndexReg& base, const x86IndexReg& index, int factor=1, s32 displacement=0 ) : + __forceinline iAddressInfo( const iAddressReg& base, const iAddressReg& index, int factor=1, s32 displacement=0 ) : Base( base ), Index( index ), Factor( factor ), @@ -351,7 +351,7 @@ namespace x86Emitter { } - __forceinline explicit iAddressInfo( const x86IndexReg& index, int displacement=0 ) : + __forceinline explicit iAddressInfo( const iAddressReg& index, int displacement=0 ) : Base(), Index( index ), Factor(0), @@ -367,7 +367,7 @@ namespace x86Emitter { } - static iAddressInfo FromIndexReg( const x86IndexReg& index, int scale=0, s32 displacement=0 ); + static iAddressInfo FromIndexReg( const iAddressReg& index, int scale=0, s32 displacement=0 ); public: bool IsByteSizeDisp() const { return is_s8( Displacement ); } @@ -378,10 +378,10 @@ namespace x86Emitter return *this; } - __forceinline iAddressInfo& Add( const x86IndexReg& src ); + __forceinline iAddressInfo& Add( const iAddressReg& src ); __forceinline iAddressInfo& Add( const iAddressInfo& src ); - __forceinline iAddressInfo operator+( const x86IndexReg& right ) const { return iAddressInfo( *this ).Add( right ); } + __forceinline iAddressInfo operator+( const iAddressReg& right ) const { return iAddressInfo( *this ).Add( right ); } __forceinline iAddressInfo operator+( const iAddressInfo& right ) const { return iAddressInfo( *this ).Add( right ); } __forceinline iAddressInfo operator+( s32 imm ) const { return iAddressInfo( *this ).Add( imm ); } __forceinline iAddressInfo operator-( s32 imm ) const { return iAddressInfo( *this ).Add( -imm ); } @@ -402,15 +402,15 @@ namespace x86Emitter class ModSibBase { public: - x86IndexReg Base; // base register (no scale) - x86IndexReg Index; // index reg gets multiplied by the scale + iAddressReg Base; // base register (no scale) + iAddressReg Index; // index reg gets multiplied by the scale uint Scale; // scale applied to the index register, in scale/shift form s32 Displacement; // offset applied to the Base/Index registers. public: explicit ModSibBase( const iAddressInfo& src ); explicit ModSibBase( s32 disp ); - ModSibBase( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ); + ModSibBase( iAddressReg base, iAddressReg index, int scale=0, s32 displacement=0 ); bool IsByteSizeDisp() const { return is_s8( Displacement ); } @@ -424,7 +424,7 @@ namespace x86Emitter __forceinline ModSibBase operator-( const s32 imm ) const { return ModSibBase( *this ).Add( -imm ); } protected: - __forceinline void Reduce(); + void Reduce(); }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -439,7 +439,7 @@ namespace x86Emitter __forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibBase( src ) {} __forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {} - __forceinline ModSibStrict( x86IndexReg base, x86IndexReg index, int scale=0, s32 displacement=0 ) : + __forceinline ModSibStrict( iAddressReg base, iAddressReg index, int scale=0, s32 displacement=0 ) : ModSibBase( base, index, scale, displacement ) {} __forceinline ModSibStrict& Add( s32 imm ) @@ -461,9 +461,9 @@ namespace x86Emitter // without doing anything and without compiler error. const ModSibBase& operator[]( const ModSibBase& src ) const { return src; } - __forceinline ModSibBase operator[]( x86IndexReg src ) const + __forceinline ModSibBase operator[]( iAddressReg src ) const { - return ModSibBase( src, x86IndexReg::Empty ); + return ModSibBase( src, iAddressReg::Empty ); } __forceinline ModSibBase operator[]( const iAddressInfo& src ) const @@ -497,9 +497,9 @@ namespace x86Emitter // without doing anything and without compiler error. const ModSibStrict& operator[]( const ModSibStrict& src ) const { return src; } - __forceinline ModSibStrict operator[]( x86IndexReg src ) const + __forceinline ModSibStrict operator[]( iAddressReg src ) const { - return ModSibStrict( src, x86IndexReg::Empty ); + return ModSibStrict( src, iAddressReg::Empty ); } __forceinline ModSibStrict operator[]( const iAddressInfo& src ) const @@ -523,6 +523,8 @@ namespace x86Emitter // ptr[] - use this form for instructions which can resolve the address operand size from // the other register operand sizes. extern const iAddressIndexerBase ptr; + extern const iAddressIndexer ptr128; + extern const iAddressIndexer ptr64; extern const iAddressIndexer ptr32; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms extern const iAddressIndexer ptr16; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms extern const iAddressIndexer ptr8; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms @@ -674,6 +676,16 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // + + extern const Internal::MovapsImplAll< 0, 0x28, 0x29 > iMOVAPS; + extern const Internal::MovapsImplAll< 0, 0x10, 0x11 > iMOVUPS; + + extern const Internal::MovapsImplAll< 0x66, 0x28, 0x29 > iMOVAPD; + extern const Internal::MovapsImplAll< 0x66, 0x10, 0x11 > iMOVUPD; + + extern const Internal::MovapsImplAll< 0x66, 0x6f, 0x7f > iMOVDQA; + extern const Internal::MovapsImplAll< 0xf3, 0x6f, 0x7f > iMOVDQU; + } #include "ix86_inlines.inl" From f32f705d1d0228a05c8cbf55c09ec9d4ac492811 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sun, 19 Apr 2009 05:42:33 +0000 Subject: [PATCH 100/143] Quick hack to fix Linux compiling. Delete some old code. Minor tweaks that I hadn't gotten around to committing. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1016 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Hw.cpp | 627 -------------------------- pcsx2/IopDma.cpp | 3 +- pcsx2/Vif.cpp | 4 +- pcsx2/VifDma.cpp | 1 - pcsx2/x86/ix86/Makefile.am | 5 +- pcsx2/x86/ix86/implement/bittest.h | 3 +- pcsx2/x86/ix86/implement/xmm/movqss.h | 2 + pcsx2/x86/ix86/ix86_types.h | 2 +- 8 files changed, 12 insertions(+), 635 deletions(-) diff --git a/pcsx2/Hw.cpp b/pcsx2/Hw.cpp index 159c121328..591de7138e 100644 --- a/pcsx2/Hw.cpp +++ b/pcsx2/Hw.cpp @@ -251,630 +251,3 @@ bool hwDmacSrcChain(DMACh *dma, int id) { return false; } - -// Original hwRead/Write32 functions .. left in for now, for troubleshooting purposes. -#if 0 -mem32_t __fastcall hwRead32(u32 mem) -{ - // *Performance Warning* This function is called -A-LOT. Be weary when making changes. It - // could impact FPS significantly. - - // Optimization Note: - // Shortcut for the INTC_STAT register, which is checked *very* frequently as part of the EE's - // vsynch timers. INTC_STAT has the disadvantage of being in the 0x1000f000 case, which has - // a lot of additional registers in it, and combined with it's call frequency is a bad thing. - - if(mem == INTC_STAT) - { - // This one is checked alot, so leave it commented out unless you love 600 meg logfiles. - //HW_LOG("DMAC_STAT Read 32bit %x\n", psHu32(0xe010)); - return psHu32(INTC_STAT); - } - - const u16 masked_mem = mem & 0xffff; - - // We optimize the hw register reads by breaking them into manageable 4k chunks (for a total of - // 16 cases spanning the 64k PS2 hw register memory map). It helps also that the EE is, for - // the most part, designed so that various classes of registers are sectioned off into these - // 4k segments. - - // Notes: Breaks from the switch statement will return a standard hw memory read. - // Special case handling of reads should use "return" directly. - - switch( masked_mem>>12 ) // switch out as according to the 4k page of the access. - { - // Counters Registers - // This code uses some optimized trickery to produce more compact output. - // See below for the "reference" block to get a better idea what this code does. :) - - case 0x0: // counters 0 and 1 - case 0x1: // counters 2 and 3 - { - const uint cntidx = masked_mem >> 11; // neat trick to scale the counter HW address into 0-3 range. - switch( (masked_mem>>4) & 0xf ) - { - case 0x0: return (u16)rcntRcount(cntidx); - case 0x1: return (u16)counters[cntidx].modeval; - case 0x2: return (u16)counters[cntidx].target; - case 0x3: return (u16)counters[cntidx].hold; - } - } - -#if 0 // Counters Reference Block (original case setup) - // 0x10000000 - 0x10000030 - case RCNT0_COUNT: return (u16)rcntRcount(0); - case RCNT0_MODE: return (u16)counters[0].modeval; - case RCNT0_TARGET: return (u16)counters[0].target; - case RCNT0_HOLD: return (u16)counters[0].hold; - - // 0x10000800 - 0x10000830 - case RCNT1_COUNT: return (u16)rcntRcount(1); - case RCNT1_MODE: return (u16)counters[1].modeval; - case RCNT1_TARGET: return (u16)counters[1].target; - case RCNT1_HOLD: return (u16)counters[1].hold; - - // 0x10001000 - 0x10001020 - case RCNT2_COUNT: return (u16)rcntRcount(2); - case RCNT2_MODE: return (u16)counters[2].modeval; - case RCNT2_TARGET: return (u16)counters[2].target; - - // 0x10001800 - 0x10001820 - case RCNT3_COUNT: return (u16)rcntRcount(3); - case RCNT3_MODE: return (u16)counters[3].modeval; - case RCNT3_TARGET: return (u16)counters[3].target; -#endif - - break; - - case 0x2: return ipuRead32( mem ); - - case 0xf: - switch( (masked_mem >> 4) & 0xff ) - { - case 0x01: - HW_LOG("INTC_MASK Read32, value=0x%x", psHu32(INTC_MASK)); - break; - - case 0x13: // 0x1000f130 - case 0x26: // 0x1000f260 SBUS? - case 0x41: // 0x1000f410 - case 0x43: // MCH_RICM - return 0; - - case 0x24: // 0x1000f240: SBUS - return psHu32(0xf240) | 0xF0000102; - - case 0x44: // 0x1000f440: MCH_DRD - - if( !((psHu32(0xf430) >> 6) & 0xF) ) - { - switch ((psHu32(0xf430)>>16) & 0xFFF) - { - //MCH_RICM: x:4|SA:12|x:5|SDEV:1|SOP:4|SBC:1|SDEV:5 - - case 0x21://INIT - if(rdram_sdevid < rdram_devices) - { - rdram_sdevid++; - return 0x1F; - } - return 0; - - case 0x23://CNFGA - return 0x0D0D; //PVER=3 | MVER=16 | DBL=1 | REFBIT=5 - - case 0x24://CNFGB - //0x0110 for PSX SVER=0 | CORG=8(5x9x7) | SPT=1 | DEVTYP=0 | BYTE=0 - return 0x0090; //SVER=0 | CORG=4(5x9x6) | SPT=1 | DEVTYP=0 | BYTE=0 - - case 0x40://DEVID - return psHu32(0xf430) & 0x1F; // =SDEV - } - } - return 0; - } - break; - - /////////////////////////////////////////////////////// - // Most of the following case handlers are for developer builds only (logging). - // It'll all optimize to ziltch in public release builds. - - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0a: - { - const char* regName = "Unknown"; - - switch (mem) - { - case D2_CHCR: regName = "DMA2_CHCR"; break; - case D2_MADR: regName = "DMA2_MADR"; break; - case D2_QWC: regName = "DMA2_QWC"; break; - case D2_TADR: regName = "DMA2_TADDR"; break; - case D2_ASR0: regName = "DMA2_ASR0"; break; - case D2_ASR1: regName = "DMA2_ASR1"; break; - case D2_SADR: regName = "DMA2_SADDR"; break; - } - - HW_LOG( "Hardware Read32 at 0x%x (%s), value=0x%x", mem, regName, psHu32(mem) ); - } - break; - - case 0x0b: - if( mem == D4_CHCR ) - HW_LOG("Hardware Read32 at 0x%x (IPU1:DMA4_CHCR), value=0x%x", mem, psHu32(mem)); - break; - - case 0x0c: - case 0x0d: - case 0x0e: - if( mem == DMAC_STAT ) - HW_LOG("DMAC_STAT Read32, value=0x%x", psHu32(DMAC_STAT)); - break; - - jNO_DEFAULT; - } - - // Optimization note: We masked 'mem' earlier, so it's safe to access PS2MEM_HW directly. - // (checked disasm, and MSVC 2008 fails to optimize it on its own) - - //return psHu32(mem); - return *((u32*)&PS2MEM_HW[masked_mem]); -} - - -__forceinline void __fastcall hwWrite32(u32 mem, u32 value) -{ - // Would ((mem >= IPU_CMD) && (mem <= IPU_TOP)) be better? -arcum42 - if ((mem >= IPU_CMD) && (mem < GIF_CTRL)) { //IPU regs - ipuWrite32(mem,value); - return; - } - if ((mem>=0x10003800) && (mem<0x10003c00)) { - vif0Write32(mem, value); - return; - } - if ((mem>=0x10003c00) && (mem<0x10004000)) { - vif1Write32(mem, value); - return; - } - - switch (mem) { - case RCNT0_COUNT: rcntWcount(0, value); break; - case RCNT0_MODE: rcntWmode(0, value); break; - case RCNT0_TARGET: rcntWtarget(0, value); break; - case RCNT0_TARGET: rcntWhold(0, value); break; - - case RCNT1_COUNT: rcntWcount(1, value); break; - case RCNT1_MODE: rcntWmode(1, value); break; - case RCNT1_TARGET: rcntWtarget(1, value); break; - case RCNT1_HOLD: rcntWhold(1, value); break; - - case RCNT2_COUNT: rcntWcount(2, value); break; - case RCNT2_MODE: rcntWmode(2, value); break; - case RCNT2_TARGET: rcntWtarget(2, value); break; - - case RCNT3_COUNT: rcntWcount(3, value); break; - case RCNT3_MODE: rcntWmode(3, value); break; - case RCNT3_TARGET: rcntWtarget(3, value); break; - - case GIF_CTRL: - //Console::WriteLn("GIF_CTRL write %x", params value); - psHu32(mem) = value & 0x8; - - if (value & 0x1) - gsGIFReset(); - else if( value & 8 ) - psHu32(GIF_STAT) |= 8; - else - psHu32(GIF_STAT) &= ~8; - - return; - - case GIF_MODE: - // need to set GIF_MODE (hamster ball) - psHu32(GIF_MODE) = value; - - if (value & 0x1) - psHu32(GIF_STAT)|= 0x1; - else - psHu32(GIF_STAT)&= ~0x1; - - if (value & 0x4) - psHu32(GIF_STAT)|= 0x4; - else - psHu32(GIF_STAT)&= ~0x4; - - break; - - case GIF_STAT: // stat is readonly - Console::WriteLn("Gifstat write value = %x", params value); - return; - - case D0_CHCR: // dma0 - vif0 - DMA_LOG("VIF0dma %lx", value); - DmaExec(dmaVIF0, mem, value); - break; - - case D1_CHCR: // dma1 - vif1 - chcr - DMA_LOG("VIF1dma CHCR %lx", value); - DmaExec(dmaVIF1, mem, value); - break; - -#ifdef PCSX2_DEVBUILD - case D1_MADR: // dma1 - vif1 - madr - HW_LOG("VIF1dma Madr %lx", value); - psHu32(mem) = value;//dma1 madr - break; - - case D1_QWC: // dma1 - vif1 - qwc - HW_LOG("VIF1dma QWC %lx", value); - psHu32(mem) = value;//dma1 qwc - break; - - case D1_TADR: // dma1 - vif1 - tadr - HW_LOG("VIF1dma TADR %lx", value); - psHu32(mem) = value;//dma1 tadr - break; - - case D1_ASR0: // dma1 - vif1 - asr0 - HW_LOG("VIF1dma ASR0 %lx", value); - psHu32(mem) = value;//dma1 asr0 - break; - - case D1_ASR1: // dma1 - vif1 - asr1 - HW_LOG("VIF1dma ASR1 %lx", value); - psHu32(mem) = value;//dma1 asr1 - break; - - case D1_SADR: // dma1 - vif1 - sadr - HW_LOG("VIF1dma SADR %lx", value); - psHu32(mem) = value;//dma1 sadr - break; -#endif - - case D2_CHCR: // dma2 - gif - DMA_LOG("0x%8.8x hwWrite32: GSdma %lx", cpuRegs.cycle, value); - DmaExec(dmaGIF, mem, value); - break; - -#ifdef PCSX2_DEVBUILD - case D2_MADR: - psHu32(mem) = value;//dma2 madr - HW_LOG("Hardware write DMA2_MADR 32bit at %x with value %x",mem,value); - break; - - case D2_QWC: - psHu32(mem) = value;//dma2 qwc - HW_LOG("Hardware write DMA2_QWC 32bit at %x with value %x",mem,value); - break; - - case D2_TADR: - psHu32(mem) = value;//dma2 taddr - HW_LOG("Hardware write DMA2_TADDR 32bit at %x with value %x",mem,value); - break; - - case D2_ASR0: - psHu32(mem) = value;//dma2 asr0 - HW_LOG("Hardware write DMA2_ASR0 32bit at %x with value %x",mem,value); - break; - - case D2_ASR1: - psHu32(mem) = value;//dma2 asr1 - HW_LOG("Hardware write DMA2_ASR1 32bit at %x with value %x",mem,value); - break; - - case D2_SADR: - psHu32(mem) = value;//dma2 saddr - HW_LOG("Hardware write DMA2_SADDR 32bit at %x with value %x",mem,value); - break; -#endif - - case D3_CHCR: // dma3 - fromIPU - DMA_LOG("IPU0dma %lx", value); - DmaExec(dmaIPU0, mem, value); - break; - -#ifdef PCSX2_DEVBUILD - case D3_MADR: - psHu32(mem) = value;//dma2 madr - HW_LOG("Hardware write IPU0DMA_MADR 32bit at %x with value %x",mem,value); - break; - - case D3_QWC: - psHu32(mem) = value;//dma2 madr - HW_LOG("Hardware write IPU0DMA_QWC 32bit at %x with value %x",mem,value); - break; - - case D3_TADR: - psHu32(mem) = value;//dma2 tadr - HW_LOG("Hardware write IPU0DMA_TADR 32bit at %x with value %x",mem,value); - break; - - case D3_SADR: - psHu32(mem) = value;//dma2 saddr - HW_LOG("Hardware write IPU0DMA_SADDR 32bit at %x with value %x",mem,value); - break; -#endif - - case D4_CHCR: // dma4 - toIPU - DMA_LOG("IPU1dma %lx", value); - DmaExec(dmaIPU1, mem, value); - break; - -#ifdef PCSX2_DEVBUILD - case D4_MADR: - psHu32(mem) = value;//dma2 madr - HW_LOG("Hardware write IPU1DMA_MADR 32bit at %x with value %x",mem,value); - break; - - case D4_QWC: - psHu32(mem) = value;//dma2 madr - HW_LOG("Hardware write IPU1DMA_QWC 32bit at %x with value %x",mem,value); - break; - - case D4_TADR: - psHu32(mem) = value;//dma2 tadr - HW_LOG("Hardware write IPU1DMA_TADR 32bit at %x with value %x",mem,value); - break; - - case D4_SADR: - psHu32(mem) = value;//dma2 saddr - HW_LOG("Hardware write IPU1DMA_SADDR 32bit at %x with value %x",mem,value); - break; -#endif - case D5_CHCR: // dma5 - sif0 - DMA_LOG("SIF0dma %lx", value); - DmaExec(dmaSIF0, mem, value); - break; - - case D6_CHCR: // dma6 - sif1 - DMA_LOG("SIF1dma %lx", value); - DmaExec(dmaSIF1, mem, value); - break; - -#ifdef PCSX2_DEVBUILD - case D6_QWC: // dma6 - sif1 - qwc - HW_LOG("SIF1dma QWC = %lx", value); - psHu32(mem) = value; - break; - - case 0x1000c430: // dma6 - sif1 - tadr - HW_LOG("SIF1dma TADR = %lx", value); - psHu32(mem) = value; - break; -#endif - case D7_CHCR: // dma7 - sif2 - DMA_LOG("SIF2dma %lx", value); - DmaExec(dmaSIF2, mem, value); - break; - - case D8_CHCR: // dma8 - fromSPR - DMA_LOG("fromSPRdma %lx", value); - DmaExec(dmaSPR0, mem, value); - break; - - case 0x1000d400: // dma9 - toSPR - DMA_LOG("toSPRdma %lx", value); - DmaExec(dmaSPR1, mem, value); - break; - - case DMAC_CTRL: // DMAC_CTRL - HW_LOG("DMAC_CTRL Write 32bit %x", value); - psHu32(0xe000) = value; - break; - - case DMAC_STAT: // DMAC_STAT - HW_LOG("DMAC_STAT Write 32bit %x", value); - psHu16(0xe010)&= ~(value & 0xffff); // clear on 1 - psHu16(0xe012) ^= (u16)(value >> 16); - - cpuTestDMACInts(); - break; - - case INTC_STAT: // INTC_STAT - HW_LOG("INTC_STAT Write 32bit %x", value); - psHu32(0xf000)&=~value; - break; - - case INTC_MASK: // INTC_MASK - HW_LOG("INTC_MASK Write 32bit %x", value); - psHu32(0xf010) ^= (u16)value; - cpuTestINTCInts(); - break; - - case 0x1000f430://MCH_RICM: x:4|SA:12|x:5|SDEV:1|SOP:4|SBC:1|SDEV:5 - if ((((value >> 16) & 0xFFF) == 0x21) && (((value >> 6) & 0xF) == 1) && (((psHu32(0xf440) >> 7) & 1) == 0))//INIT & SRP=0 - rdram_sdevid = 0; // if SIO repeater is cleared, reset sdevid - psHu32(mem) = value & ~0x80000000; //kill the busy bit - break; - - case 0x1000f440://MCH_DRD: - psHu32(mem) = value; - break; - - case DMAC_ENABLEW: // DMAC_ENABLEW - HW_LOG("DMAC_ENABLEW Write 32bit %lx", value); - psHu32(0xf590) = value; - psHu32(0xf520) = value; - return; - - case 0x1000f200: - psHu32(mem) = value; - break; - - case SBUS_F220: - psHu32(mem) |= value; - break; - - case SBUS_SMFLG: - psHu32(mem) &= ~value; - break; - - case SBUS_F240: - if(!(value & 0x100)) - psHu32(mem) &= ~0x100; - else - psHu32(mem) |= 0x100; - break; - - case 0x1000f260: - psHu32(mem) = 0; - break; - - case 0x1000f130: - case 0x1000f410: - HW_LOG("Unknown Hardware write 32 at %x with value %x (%x)", mem, value, cpuRegs.CP0.n.Status.val); - break; - - default: - psHu32(mem) = value; - HW_LOG("Unknown Hardware write 32 at %x with value %x (%x)", mem, value, cpuRegs.CP0.n.Status.val); - break; - } -} - -#endif - -/* -__forceinline void hwWrite64(u32 mem, u64 value) -{ - u32 val32; - int i; - - if ((mem>=0x10002000) && (mem<=0x10002030)) { - ipuWrite64(mem, value); - return; - } - - if ((mem>=0x10003800) && (mem<0x10003c00)) { - vif0Write32(mem, value); return; - } - if ((mem>=0x10003c00) && (mem<0x10004000)) { - vif1Write32(mem, value); return; - } - - switch (mem) { - case GIF_CTRL: - DevCon::Status("GIF_CTRL write 64", params value); - psHu32(mem) = value & 0x8; - if(value & 0x1) { - gsGIFReset(); - //gsReset(); - } - else { - if( value & 8 ) psHu32(GIF_STAT) |= 8; - else psHu32(GIF_STAT) &= ~8; - } - - return; - - case GIF_MODE: -#ifdef GSPATH3FIX - Console::Status("GIFMODE64 %x", params value); -#endif - psHu64(GIF_MODE) = value; - if (value & 0x1) psHu32(GIF_STAT)|= 0x1; - else psHu32(GIF_STAT)&= ~0x1; - if (value & 0x4) psHu32(GIF_STAT)|= 0x4; - else psHu32(GIF_STAT)&= ~0x4; - break; - - case GIF_STAT: // stat is readonly - return; - - case 0x1000a000: // dma2 - gif - DMA_LOG("0x%8.8x hwWrite64: GSdma %lx", cpuRegs.cycle, value); - DmaExec(dmaGIF, mem, value); - break; - - case 0x1000e000: // DMAC_CTRL - HW_LOG("DMAC_CTRL Write 64bit %x", value); - psHu64(mem) = value; - break; - - case 0x1000e010: // DMAC_STAT - HW_LOG("DMAC_STAT Write 64bit %x", value); - val32 = (u32)value; - psHu16(0xe010)&= ~(val32 & 0xffff); // clear on 1 - val32 = val32 >> 16; - for (i=0; i<16; i++) { // reverse on 1 - if (val32 & (1<= 0x10004000 && mem < 0x10008000) { - WriteFIFO(mem, value); return; - } - - switch (mem) { - case 0x1000f590: // DMAC_ENABLEW - psHu32(0xf590) = *(u32*)value; - psHu32(0xf520) = *(u32*)value; - break; - case 0x1000f130: - case 0x1000f410: - case 0x1000f430: - break; - - default: - - psHu64(mem ) = value[0]; - psHu64(mem+8) = value[1]; - - HW_LOG("Unknown Hardware write 128 at %x with value %x_%x (status=%x)", mem, value[1], value[0], cpuRegs.CP0.n.Status.val); - break; - } -} -*/ \ No newline at end of file diff --git a/pcsx2/IopDma.cpp b/pcsx2/IopDma.cpp index 7798779a92..15aa276411 100644 --- a/pcsx2/IopDma.cpp +++ b/pcsx2/IopDma.cpp @@ -28,6 +28,7 @@ using namespace R3000A; // Should be a bool, and will be next time I break savestate. --arcum42 int iopsifbusy[2] = { 0, 0 }; +extern int eesifbusy[2]; static void __fastcall psxDmaGeneric(u32 madr, u32 bcr, u32 chcr, u32 spuCore, _SPU2writeDMA4Mem spu2WriteFunc, _SPU2readDMA4Mem spu2ReadFunc) { @@ -130,7 +131,7 @@ int psxDma7Interrupt() return 1; } -extern int eesifbusy[2]; + void psxDma9(u32 madr, u32 bcr, u32 chcr) { SIF_LOG("IOP: dmaSIF0 chcr = %lx, madr = %lx, bcr = %lx, tadr = %lx", chcr, madr, bcr, HW_DMA9_TADR); diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index a38fea9540..ccc5068b7a 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -26,9 +26,10 @@ #include "VifDma.h" VIFregisters *vifRegs; -u32* vifRow = NULL, *vifCol = NULL; +u32* vifRow = NULL; u32* vifMaskRegs = NULL; vifStruct *vif; +u16 vifqwc = 0; PCSX2_ALIGNED16(u32 g_vifRow0[4]); PCSX2_ALIGNED16(u32 g_vifCol0[4]); @@ -36,7 +37,6 @@ PCSX2_ALIGNED16(u32 g_vifRow1[4]); PCSX2_ALIGNED16(u32 g_vifCol1[4]); extern int g_vifCycles; -u16 vifqwc = 0; bool mfifodmairq = false; enum UnpackOffset diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 17d9310b21..3708bd8f14 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -36,7 +36,6 @@ extern "C" extern VIFregisters *vifRegs; extern u32* vifMaskRegs; extern u32* vifRow; - extern u32* vifCol; } PCSX2_ALIGNED16_EXTERN(u32 g_vifRow0[4]); diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index c09946d951..c146c7c4dc 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -1,8 +1,9 @@ -INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty +INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include -I@srcdir@/../../../3rdparty -I/implement -I/implement/xmm noinst_LIBRARIES = libix86.a libix86_a_SOURCES = \ ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_legacy_mmx.cpp ix86_tools.cpp ix86_3dnow.cpp \ ix86_legacy.cpp ix86_legacy_sse.cpp \ ix86_internal.h ix86_legacy_instructions.h ix86_macros.h ix86_sse_helpers.h ix86.h ix86_legacy_internal.h \ -ix86_instructions.h ix86_legacy_types.h ix86_types.h \ No newline at end of file +ix86_instructions.h ix86_legacy_types.h ix86_types.h \ +bittest.h dwshift.h group1.h group2.h group3.h incdec.h jmpcall.h movs.h test.h movqss.h \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/bittest.h b/pcsx2/x86/ix86/implement/bittest.h index a84e3cce6b..b4dc648b9c 100644 --- a/pcsx2/x86/ix86/implement/bittest.h +++ b/pcsx2/x86/ix86/implement/bittest.h @@ -56,6 +56,7 @@ public: } // ------------------------------------------------------------------------ +#ifndef __LINUX__ static __emitinline void Emit( void* bitbase, const iRegister& bitoffset ) { prefix16(); @@ -63,7 +64,7 @@ public: iWrite( 0xa3 | (InstType << 2) ); iWriteDisp( bitoffset.Id, bitbase.Id ); } - +#endif // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibBase& bitbase, const iRegister& bitoffset ) { diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 7fe6d23a29..f2c29c187f 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -110,5 +110,7 @@ public: __forceinline void operator()( const void* to, const iRegisterSSE& from ) const { m_128::Emit( OpcodeAlt, to, from ); } __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { m_128::Emit( Opcode, to, from ); } __noinline void operator()( const ModSibBase& to, const iRegisterSSE& from ) const { m_128::Emit( OpcodeAlt, to, from ); } + + MovapsImplAll() {} //GCC. }; diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 0e0502552c..2bed0871d6 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -252,7 +252,7 @@ namespace x86Emitter iRegisterSIMD& operator=( const iRegisterSIMD& src ) { - Id = src.Id; + iRegister::Id = src.Id; return *this; } }; From 76e8b6586aeb51015751ab7079c113093a2c7afb Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sun, 19 Apr 2009 05:49:16 +0000 Subject: [PATCH 101/143] microVU: fixed various typos git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1017 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Alloc.inl | 8 +- pcsx2/x86/microVU_Analyze.inl | 11 +-- pcsx2/x86/microVU_Compile.inl | 3 +- pcsx2/x86/microVU_Lower.inl | 11 ++- pcsx2/x86/microVU_Misc.h | 2 +- pcsx2/x86/microVU_Tables.inl | 5 +- pcsx2/x86/microVU_Upper.inl | 181 +++++++++++++++++----------------- 7 files changed, 114 insertions(+), 107 deletions(-) diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index c31522eb88..1591091b3b 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -89,7 +89,7 @@ microVUt(void) mVUallocFMAC2b(int& Ft) { #define getReg3(reg, _reg_) { \ mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], (1 << (3 - _bc_))); \ if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, (1 << (3 - _bc_))); \ - mVUunpack_xyzw(reg, reg, _bc_); \ + mVUunpack_xyzw(reg, reg, 0); \ } #define getZero3SS(reg) { \ @@ -100,7 +100,7 @@ microVUt(void) mVUallocFMAC2b(int& Ft) { #define getZero3(reg) { \ if (_bc_w) { \ mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], 1); \ - mVUunpack_xyzw(reg, reg, _bc_); \ + mVUunpack_xyzw(reg, reg, 0); \ } \ else { SSE_XORPS_XMM_to_XMM(reg, reg); } \ } @@ -112,7 +112,7 @@ microVUt(void) mVUallocFMAC3a(int& Fd, int& Fs, int& Ft) { Fd = xmmFs; if (_XYZW_SS) { getReg6(Fs, _Fs_); - if ( (_Ft_ == _Fs_) && ((_X && _bc_x) || (_Y && _bc_y) || (_Z && _bc_w) || (_W && _bc_w)) ) { + if ( (_Ft_ == _Fs_) && ((_X && _bc_x) || (_Y && _bc_y) || (_Z && _bc_z) || (_W && _bc_w)) ) { Ft = Fs; } else if (!_Ft_) { getZero3SS(Ft); } @@ -497,7 +497,7 @@ microVUt(void) mVUallocFMAC16b(int& ACCw, int& ACCr) { #define getReg9(reg, _reg_) { \ mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], 1); \ if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 1); \ - mVUunpack_xyzw(reg, reg, 3); \ + mVUunpack_xyzw(reg, reg, 0); \ } microVUt(void) mVUallocFMAC17a(int& Fs, int& Ft) { diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 4581d937fd..506144262b 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -51,7 +51,7 @@ microVUt(void) mVUanalyzeFMAC1(int Fd, int Fs, int Ft) { microVU* mVU = mVUx; - mVUlog("microVU: FMAC1 Opcode"); + //mVUlog("microVU: FMAC1 Opcode"); mVUinfo |= _doStatus; analyzeReg1(Fs); analyzeReg1(Ft); @@ -64,7 +64,7 @@ microVUt(void) mVUanalyzeFMAC1(int Fd, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC2(int Fs, int Ft) { microVU* mVU = mVUx; - mVUlog("microVU: FMAC2 Opcode"); + //mVUlog("microVU: FMAC2 Opcode"); analyzeReg1(Fs); analyzeReg2(Ft); } @@ -84,7 +84,7 @@ microVUt(void) mVUanalyzeFMAC2(int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC3(int Fd, int Fs, int Ft) { microVU* mVU = mVUx; - mVUlog("microVU: FMAC3 Opcode"); + //mVUlog("microVU: FMAC3 Opcode"); mVUinfo |= _doStatus; analyzeReg1(Fs); analyzeReg3(Ft); @@ -101,7 +101,6 @@ microVUt(void) mVUanalyzeFMAC3(int Fd, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { microVU* mVU = mVUx; - mVUlog("microVU: FMAC4 Opcode"); analyzeReg1(Fs); analyzeReg4(Ft); } @@ -226,7 +225,6 @@ microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { microVUt(void) mVUanalyzeSQ(int Fs, int It, bool writeIt) { microVU* mVU = mVUx; - mVUlog("microVU: SQ Opcode"); analyzeReg1(Fs); analyzeVIreg1(It); if (writeIt) { analyzeVIreg2(It, 1); } @@ -296,7 +294,7 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) { if (mVUcount < 4) { mVUregs.needExactMatch = 1; } int curPC = iPC; for (int i = mVUcount, j = 0; i > 1; i--, j++) { - incPC(-2); + incPC2(-2); if (doStatus) { mVUinfo |= _doMac; if (j >= 3) { break; } } } iPC = curPC; @@ -314,7 +312,6 @@ microVUt(void) mVUanalyzeMflag(int Is, int It) { microVUt(void) mVUanalyzeXGkick(int Fs, int xCycles) { microVU* mVU = mVUx; - mVUlog("microVU: XGkick Opcode"); analyzeVIreg1(Fs); analyzeXGkick1(); analyzeXGkick2(xCycles); diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index ad468e62f7..15b3a145be 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -245,6 +245,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { u8* thisPtr = x86Ptr; if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUlog("microVU: invalid startPC"); } + //startPC &= (vuIndex ? 0x3ff8 : 0xff8); //mVUlog("mVUcompile Search"); // Searches for Existing Compiled Block (if found, then returns; else, compile) @@ -301,7 +302,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { int x; for (x = 0; x < (vuIndex ? (0x3fff/8) : (0xfff/8)); x++) { if (isEOB) { x = 0xffff; } - if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); mVU->iReg = curI; incPC(-1); } } + if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); mVU->iReg = curI; incPC(1); } } else if (!swapOps) { incPC(1); doUpperOp(); incPC(-1); mVUopL(); incPC(1); } else { mVUopL(); incPC(1); doUpperOp(); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 7c00a6b7a6..c70ce5f6aa 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -721,7 +721,7 @@ microVUf(void) mVU_ILW() { else { mVUlog("ILW"); if (!_Fs_) { - MOVZX32M16toR( gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS ); + MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS); mVUallocVIb(gprT1, _Ft_); } else { @@ -887,6 +887,7 @@ microVUf(void) mVU_SQ() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 0); } else { + mVUlog("SQ"); if (!_Ft_) { getReg7(xmmFs, _Fs_); mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); @@ -905,6 +906,7 @@ microVUf(void) mVU_SQD() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } else { + mVUlog("SQD"); if (!_Ft_) { getReg7(xmmFs, _Fs_); mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W); @@ -924,6 +926,7 @@ microVUf(void) mVU_SQI() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } else { + mVUlog("SQI"); if (!_Ft_) { getReg7(xmmFs, _Fs_); mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W); @@ -948,6 +951,7 @@ microVUf(void) mVU_RINIT() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } else { + mVUlog("RINIT"); if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprR, _Fs_, _Fsf_); AND32ItoR(gprR, 0x007fffff); @@ -970,13 +974,14 @@ microVUt(void) mVU_RGET_() { microVUf(void) mVU_RGET() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeR2(_Ft_, 1); } - else { mVU_RGET_(); } + else { mVUlog("RGET"); mVU_RGET_(); } } microVUf(void) mVU_RNEXT() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeR2(_Ft_, 0); } else { + mVUlog("RNEXT"); // algorithm from www.project-fao.org MOV32RtoR(gprT1, gprR); SHR32ItoR(gprT1, 4); @@ -999,6 +1004,7 @@ microVUf(void) mVU_RXOR() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } else { + mVUlog("RXOR"); if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprT1, _Fs_, _Fsf_); AND32ItoR(gprT1, 0x7fffff); @@ -1059,6 +1065,7 @@ microVUf(void) mVU_XGKICK() { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeXGkick(_Fs_, 4); } else { + mVUlog("XGkick"); mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall PUSH32R(gprR); // gprR = EDX is volatile so backup CALLFunc((uptr)mVU_XGKICK_); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 7937d2990b..2f0ea1c8ad 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -226,7 +226,7 @@ declareAllVariables #define mmVI(_VIreg_) (_VIreg_ - 1) #ifdef mVUdebug -#define mVUlog Console::Notice +#define mVUlog Console::Status #define mVUdebug1() { \ if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ diff --git a/pcsx2/x86/microVU_Tables.inl b/pcsx2/x86/microVU_Tables.inl index d79cd3c74a..c6e76ab38e 100644 --- a/pcsx2/x86/microVU_Tables.inl +++ b/pcsx2/x86/microVU_Tables.inl @@ -747,8 +747,11 @@ microVUf(void) mVULowerOP_T3_00() { doTableStuff(mVULowerOP_T3_00_OPCODE, ((mVUg microVUf(void) mVULowerOP_T3_01() { doTableStuff(mVULowerOP_T3_01_OPCODE, ((mVUgetCode >> 6) & 0x1f)); } microVUf(void) mVULowerOP_T3_10() { doTableStuff(mVULowerOP_T3_10_OPCODE, ((mVUgetCode >> 6) & 0x1f)); } microVUf(void) mVULowerOP_T3_11() { doTableStuff(mVULowerOP_T3_11_OPCODE, ((mVUgetCode >> 6) & 0x1f)); } -microVUf(void) mVUunknown() { SysPrintf("mVUunknown<%d,%d> : Unknown Micro VU opcode called\n", vuIndex, recPass); } microVUf(void) mVUopU() { doTableStuff(mVU_UPPER_OPCODE, (mVUgetCode & 0x3f)); } // Gets Upper Opcode microVUf(void) mVUopL() { doTableStuff(mVULOWER_OPCODE, (mVUgetCode >> 25)); } // Gets Lower Opcode +microVUf(void) mVUunknown() { + //if (recPass) return; + SysPrintf("mVUunknown<%d,%d> : Unknown Micro VU opcode called (%x)\n", vuIndex, recPass, mVUgetCode); +} #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 94582273f9..dcbc757a13 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -52,7 +52,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX SSE_MOVMSKPS_XMM_to_R32(mReg, regT2); // Move the sign bits of the t1reg - AND16ItoR(mReg, AND_XYZW ); // Grab "Is Signed" bits from the previous calculation + AND16ItoR(mReg, AND_XYZW); // Grab "Is Signed" bits from the previous calculation pjmp = JZ8(0); // Skip if none are if (doMac) SHL16ItoR(mReg, 4 + ADD_XYZW); if (doStatus) OR16ItoR(sReg, 0x82); // SS, S flags @@ -61,7 +61,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX //-------------------------Check for Zero flags------------------------------ - AND16ItoR(gprT2, AND_XYZW ); // Grab "Is Zero" bits from the previous calculation + AND16ItoR(gprT2, AND_XYZW); // Grab "Is Zero" bits from the previous calculation pjmp = JZ8(0); // Skip if none are if (doMac) { SHIFT_XYZW(gprT2); OR32RtoR(mReg, gprT2); } if (doStatus) { OR16ItoR(sReg, 0x41); } // ZS, Z flags @@ -450,100 +450,97 @@ microVUf(void) mVU_ABS() { mVUallocFMAC2b(Ft); } } -microVUf(void) mVU_ADD() { mVU_FMAC1(ADD); } -microVUf(void) mVU_ADDi() { mVU_FMAC6(ADD); } -microVUf(void) mVU_ADDq() { mVU_FMAC22(ADD); } -microVUf(void) mVU_ADDx() { mVU_FMAC3(ADD); } -microVUf(void) mVU_ADDy() { mVU_FMAC3(ADD); } -microVUf(void) mVU_ADDz() { mVU_FMAC3(ADD); } -microVUf(void) mVU_ADDw() { mVU_FMAC3(ADD); } -microVUf(void) mVU_ADDA() { mVU_FMAC4(ADD); } -microVUf(void) mVU_ADDAi() { mVU_FMAC7(ADD); } -microVUf(void) mVU_ADDAq() { mVU_FMAC23(ADD); } -microVUf(void) mVU_ADDAx() { mVU_FMAC5(ADD); } -microVUf(void) mVU_ADDAy() { mVU_FMAC5(ADD); } -microVUf(void) mVU_ADDAz() { mVU_FMAC5(ADD); } -microVUf(void) mVU_ADDAw() { mVU_FMAC5(ADD); } -microVUf(void) mVU_SUB() { mVU_FMAC1(SUB); } -microVUf(void) mVU_SUBi() { mVU_FMAC6(SUB); } -microVUf(void) mVU_SUBq() { mVU_FMAC22(SUB); } -microVUf(void) mVU_SUBx() { mVU_FMAC3(SUB); } -microVUf(void) mVU_SUBy() { mVU_FMAC3(SUB); } -microVUf(void) mVU_SUBz() { mVU_FMAC3(SUB); } -microVUf(void) mVU_SUBw() { mVU_FMAC3(SUB); } -microVUf(void) mVU_SUBA() { mVU_FMAC4(SUB); } -microVUf(void) mVU_SUBAi() { mVU_FMAC7(SUB); } -microVUf(void) mVU_SUBAq() { mVU_FMAC23(SUB); } -microVUf(void) mVU_SUBAx() { mVU_FMAC5(SUB); } -microVUf(void) mVU_SUBAy() { mVU_FMAC5(SUB); } -microVUf(void) mVU_SUBAz() { mVU_FMAC5(SUB); } -microVUf(void) mVU_SUBAw() { mVU_FMAC5(SUB); } -microVUf(void) mVU_MUL() { mVU_FMAC1(MUL); } -microVUf(void) mVU_MULi() { mVU_FMAC6(MUL); } -microVUf(void) mVU_MULq() { mVU_FMAC22(MUL); } -microVUf(void) mVU_MULx() { mVU_FMAC3(MUL); } -microVUf(void) mVU_MULy() { mVU_FMAC3(MUL); } -microVUf(void) mVU_MULz() { mVU_FMAC3(MUL); } -microVUf(void) mVU_MULw() { mVU_FMAC3(MUL); } -microVUf(void) mVU_MULA() { mVU_FMAC4(MUL); } -microVUf(void) mVU_MULAi() { mVU_FMAC7(MUL); } -microVUf(void) mVU_MULAq() { mVU_FMAC23(MUL); } -microVUf(void) mVU_MULAx() { mVU_FMAC5(MUL); } -microVUf(void) mVU_MULAy() { mVU_FMAC5(MUL); } -microVUf(void) mVU_MULAz() { mVU_FMAC5(MUL); } -microVUf(void) mVU_MULAw() { mVU_FMAC5(MUL); } -microVUf(void) mVU_MADD() { mVU_FMAC8(ADD); } -microVUf(void) mVU_MADDi() { mVU_FMAC12(ADD); } -microVUf(void) mVU_MADDq() { mVU_FMAC24(ADD); } -microVUf(void) mVU_MADDx() { mVU_FMAC10(ADD); } -microVUf(void) mVU_MADDy() { mVU_FMAC10(ADD); } -microVUf(void) mVU_MADDz() { mVU_FMAC10(ADD); } -microVUf(void) mVU_MADDw() { mVU_FMAC10(ADD); } -microVUf(void) mVU_MADDA() { mVU_FMAC14(ADD); } -microVUf(void) mVU_MADDAi() { mVU_FMAC16(ADD); } -microVUf(void) mVU_MADDAq() { mVU_FMAC26(ADD); } -microVUf(void) mVU_MADDAx() { mVU_FMAC15(ADD); } -microVUf(void) mVU_MADDAy() { mVU_FMAC15(ADD); } -microVUf(void) mVU_MADDAz() { mVU_FMAC15(ADD); } -microVUf(void) mVU_MADDAw() { mVU_FMAC15(ADD); } -microVUf(void) mVU_MSUB() { mVU_FMAC9(SUB); } -microVUf(void) mVU_MSUBi() { mVU_FMAC13(SUB); } -microVUf(void) mVU_MSUBq() { mVU_FMAC25(SUB); } -microVUf(void) mVU_MSUBx() { mVU_FMAC11(SUB); } -microVUf(void) mVU_MSUBy() { mVU_FMAC11(SUB); } -microVUf(void) mVU_MSUBz() { mVU_FMAC11(SUB); } -microVUf(void) mVU_MSUBw() { mVU_FMAC11(SUB); } -microVUf(void) mVU_MSUBA() { mVU_FMAC14(SUB); } -microVUf(void) mVU_MSUBAi() { mVU_FMAC16(SUB); } -microVUf(void) mVU_MSUBAq() { mVU_FMAC26(SUB); } -microVUf(void) mVU_MSUBAx() { mVU_FMAC15(SUB); } -microVUf(void) mVU_MSUBAy() { mVU_FMAC15(SUB); } -microVUf(void) mVU_MSUBAz() { mVU_FMAC15(SUB); } -microVUf(void) mVU_MSUBAw() { mVU_FMAC15(SUB); } -microVUf(void) mVU_MAX() { mVU_FMAC1(MAX); } -microVUf(void) mVU_MAXi() { mVU_FMAC6(MAX); } -microVUf(void) mVU_MAXx() { mVU_FMAC3(MAX); } -microVUf(void) mVU_MAXy() { mVU_FMAC3(MAX); } -microVUf(void) mVU_MAXz() { mVU_FMAC3(MAX); } -microVUf(void) mVU_MAXw() { mVU_FMAC3(MAX); } -microVUf(void) mVU_MINI() { mVU_FMAC1(MIN); } -microVUf(void) mVU_MINIi() { mVU_FMAC6(MIN); } -microVUf(void) mVU_MINIx() { mVU_FMAC3(MIN); } -microVUf(void) mVU_MINIy() { mVU_FMAC3(MIN); } -microVUf(void) mVU_MINIz() { mVU_FMAC3(MIN); } -microVUf(void) mVU_MINIw() { mVU_FMAC3(MIN); } -microVUf(void) mVU_OPMULA() { mVU_FMAC18(MUL); } -microVUf(void) mVU_OPMSUB() { mVU_FMAC19(SUB); } -microVUf(void) mVU_NOP() { - microVU* mVU = mVUx; - if (!recPass) {} - else {} -} +microVUf(void) mVU_ADD() { mVU_FMAC1(ADD); mVUlog("ADD"); } +microVUf(void) mVU_ADDi() { mVU_FMAC6(ADD); mVUlog("ADDi"); } +microVUf(void) mVU_ADDq() { mVU_FMAC22(ADD); mVUlog("ADDq"); } +microVUf(void) mVU_ADDx() { mVU_FMAC3(ADD); mVUlog("ADDx"); } +microVUf(void) mVU_ADDy() { mVU_FMAC3(ADD); mVUlog("ADDy"); } +microVUf(void) mVU_ADDz() { mVU_FMAC3(ADD); mVUlog("ADDz"); } +microVUf(void) mVU_ADDw() { mVU_FMAC3(ADD); mVUlog("ADDw"); } +microVUf(void) mVU_ADDA() { mVU_FMAC4(ADD); mVUlog("ADDA"); } +microVUf(void) mVU_ADDAi() { mVU_FMAC7(ADD); mVUlog("ADDAi"); } +microVUf(void) mVU_ADDAq() { mVU_FMAC23(ADD); mVUlog("ADDAq"); } +microVUf(void) mVU_ADDAx() { mVU_FMAC5(ADD); mVUlog("ADDAx"); } +microVUf(void) mVU_ADDAy() { mVU_FMAC5(ADD); mVUlog("ADDAy"); } +microVUf(void) mVU_ADDAz() { mVU_FMAC5(ADD); mVUlog("ADDAz"); } +microVUf(void) mVU_ADDAw() { mVU_FMAC5(ADD); mVUlog("ADDAw"); } +microVUf(void) mVU_SUB() { mVU_FMAC1(SUB); mVUlog("SUB"); } +microVUf(void) mVU_SUBi() { mVU_FMAC6(SUB); mVUlog("SUBi"); } +microVUf(void) mVU_SUBq() { mVU_FMAC22(SUB); mVUlog("SUBq"); } +microVUf(void) mVU_SUBx() { mVU_FMAC3(SUB); mVUlog("SUBx"); } +microVUf(void) mVU_SUBy() { mVU_FMAC3(SUB); mVUlog("SUBy"); } +microVUf(void) mVU_SUBz() { mVU_FMAC3(SUB); mVUlog("SUBz"); } +microVUf(void) mVU_SUBw() { mVU_FMAC3(SUB); mVUlog("SUBw"); } +microVUf(void) mVU_SUBA() { mVU_FMAC4(SUB); mVUlog("SUBA"); } +microVUf(void) mVU_SUBAi() { mVU_FMAC7(SUB); mVUlog("SUBAi"); } +microVUf(void) mVU_SUBAq() { mVU_FMAC23(SUB); mVUlog("SUBAq"); } +microVUf(void) mVU_SUBAx() { mVU_FMAC5(SUB); mVUlog("SUBAx"); } +microVUf(void) mVU_SUBAy() { mVU_FMAC5(SUB); mVUlog("SUBAy"); } +microVUf(void) mVU_SUBAz() { mVU_FMAC5(SUB); mVUlog("SUBAz"); } +microVUf(void) mVU_SUBAw() { mVU_FMAC5(SUB); mVUlog("SUBAw"); } +microVUf(void) mVU_MUL() { mVU_FMAC1(MUL); mVUlog("MUL"); } +microVUf(void) mVU_MULi() { mVU_FMAC6(MUL); mVUlog("MULi"); } +microVUf(void) mVU_MULq() { mVU_FMAC22(MUL); mVUlog("MULq"); } +microVUf(void) mVU_MULx() { mVU_FMAC3(MUL); mVUlog("MULx"); } +microVUf(void) mVU_MULy() { mVU_FMAC3(MUL); mVUlog("MULy"); } +microVUf(void) mVU_MULz() { mVU_FMAC3(MUL); mVUlog("MULz"); } +microVUf(void) mVU_MULw() { mVU_FMAC3(MUL); mVUlog("MULw"); } +microVUf(void) mVU_MULA() { mVU_FMAC4(MUL); mVUlog("MULA"); } +microVUf(void) mVU_MULAi() { mVU_FMAC7(MUL); mVUlog("MULAi"); } +microVUf(void) mVU_MULAq() { mVU_FMAC23(MUL); mVUlog("MULAq"); } +microVUf(void) mVU_MULAx() { mVU_FMAC5(MUL); mVUlog("MULAx"); } +microVUf(void) mVU_MULAy() { mVU_FMAC5(MUL); mVUlog("MULAy"); } +microVUf(void) mVU_MULAz() { mVU_FMAC5(MUL); mVUlog("MULAz"); } +microVUf(void) mVU_MULAw() { mVU_FMAC5(MUL); mVUlog("MULAw"); } +microVUf(void) mVU_MADD() { mVU_FMAC8(ADD); mVUlog("MADD"); } +microVUf(void) mVU_MADDi() { mVU_FMAC12(ADD); mVUlog("MADDi"); } +microVUf(void) mVU_MADDq() { mVU_FMAC24(ADD); mVUlog("MADDq"); } +microVUf(void) mVU_MADDx() { mVU_FMAC10(ADD); mVUlog("MADDx"); } +microVUf(void) mVU_MADDy() { mVU_FMAC10(ADD); mVUlog("MADDy"); } +microVUf(void) mVU_MADDz() { mVU_FMAC10(ADD); mVUlog("MADDz"); } +microVUf(void) mVU_MADDw() { mVU_FMAC10(ADD); mVUlog("MADDw"); } +microVUf(void) mVU_MADDA() { mVU_FMAC14(ADD); mVUlog("MADDA"); } +microVUf(void) mVU_MADDAi() { mVU_FMAC16(ADD); mVUlog("MADDAi"); } +microVUf(void) mVU_MADDAq() { mVU_FMAC26(ADD); mVUlog("MADDAq"); } +microVUf(void) mVU_MADDAx() { mVU_FMAC15(ADD); mVUlog("MADDAx"); } +microVUf(void) mVU_MADDAy() { mVU_FMAC15(ADD); mVUlog("MADDAy"); } +microVUf(void) mVU_MADDAz() { mVU_FMAC15(ADD); mVUlog("MADDAz"); } +microVUf(void) mVU_MADDAw() { mVU_FMAC15(ADD); mVUlog("MADDAw"); } +microVUf(void) mVU_MSUB() { mVU_FMAC9(SUB); mVUlog("MSUB"); } +microVUf(void) mVU_MSUBi() { mVU_FMAC13(SUB); mVUlog("MSUBi"); } +microVUf(void) mVU_MSUBq() { mVU_FMAC25(SUB); mVUlog("MSUBq"); } +microVUf(void) mVU_MSUBx() { mVU_FMAC11(SUB); mVUlog("MSUBx"); } +microVUf(void) mVU_MSUBy() { mVU_FMAC11(SUB); mVUlog("MSUBy"); } +microVUf(void) mVU_MSUBz() { mVU_FMAC11(SUB); mVUlog("MSUBz"); } +microVUf(void) mVU_MSUBw() { mVU_FMAC11(SUB); mVUlog("MSUBw"); } +microVUf(void) mVU_MSUBA() { mVU_FMAC14(SUB); mVUlog("MSUBA"); } +microVUf(void) mVU_MSUBAi() { mVU_FMAC16(SUB); mVUlog("MSUBAi"); } +microVUf(void) mVU_MSUBAq() { mVU_FMAC26(SUB); mVUlog("MSUBAq"); } +microVUf(void) mVU_MSUBAx() { mVU_FMAC15(SUB); mVUlog("MSUBAx"); } +microVUf(void) mVU_MSUBAy() { mVU_FMAC15(SUB); mVUlog("MSUBAy"); } +microVUf(void) mVU_MSUBAz() { mVU_FMAC15(SUB); mVUlog("MSUBAz"); } +microVUf(void) mVU_MSUBAw() { mVU_FMAC15(SUB); mVUlog("MSUBAw"); } +microVUf(void) mVU_MAX() { mVU_FMAC1(MAX); mVUlog("MAX"); } +microVUf(void) mVU_MAXi() { mVU_FMAC6(MAX); mVUlog("MAXi"); } +microVUf(void) mVU_MAXx() { mVU_FMAC3(MAX); mVUlog("MAXq"); } +microVUf(void) mVU_MAXy() { mVU_FMAC3(MAX); mVUlog("MAXy"); } +microVUf(void) mVU_MAXz() { mVU_FMAC3(MAX); mVUlog("MAXz"); } +microVUf(void) mVU_MAXw() { mVU_FMAC3(MAX); mVUlog("MAXw"); } +microVUf(void) mVU_MINI() { mVU_FMAC1(MIN); mVUlog("MINI"); } +microVUf(void) mVU_MINIi() { mVU_FMAC6(MIN); mVUlog("MINIi"); } +microVUf(void) mVU_MINIx() { mVU_FMAC3(MIN); mVUlog("MINIx"); } +microVUf(void) mVU_MINIy() { mVU_FMAC3(MIN); mVUlog("MINIy"); } +microVUf(void) mVU_MINIz() { mVU_FMAC3(MIN); mVUlog("MINIz"); } +microVUf(void) mVU_MINIw() { mVU_FMAC3(MIN); mVUlog("MINIw"); } +microVUf(void) mVU_OPMULA() { mVU_FMAC18(MUL); mVUlog("OPMULA"); } +microVUf(void) mVU_OPMSUB() { mVU_FMAC19(SUB); mVUlog("OPMSUB"); } +microVUf(void) mVU_NOP() { /*mVUlog("NOP");*/ } microVUq(void) mVU_FTOIx(uptr addr) { microVU* mVU = mVUx; if (!recPass) { mVUanalyzeFMAC2(_Fs_, _Ft_); } else { int Fs, Ft; + mVUlog("FTOIx"); mVUallocFMAC2a(Fs, Ft); // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() @@ -569,6 +566,7 @@ microVUq(void) mVU_ITOFx(uptr addr) { if (!recPass) { mVUanalyzeFMAC2(_Fs_, _Ft_); } else { int Fs, Ft; + mVUlog("ITOFx"); mVUallocFMAC2a(Fs, Ft); SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs); @@ -587,6 +585,7 @@ microVUf(void) mVU_CLIP() { if (!recPass) { mVUanalyzeFMAC4(_Fs_, _Ft_); mVUlog("clip broken"); } else { int Fs, Ft; + mVUlog("CLIP"); mVUallocFMAC17a(Fs, Ft); mVUallocCFLAGa(gprT1, fpcInstance); SHL32ItoR(gprT1, 6); From ecfe0b50113dfe4c4f8578436df2b7d764751bdc Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sun, 19 Apr 2009 07:42:36 +0000 Subject: [PATCH 102/143] Change some magic numbers to enums and defines and rearrange things a little... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1018 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/IopDma.cpp | 68 +++++----- pcsx2/IopHw.cpp | 320 ++++++++++++++++++++++------------------------- pcsx2/IopHw.h | 47 +++++++ 3 files changed, 236 insertions(+), 199 deletions(-) diff --git a/pcsx2/IopDma.cpp b/pcsx2/IopDma.cpp index 15aa276411..697b0acb9e 100644 --- a/pcsx2/IopDma.cpp +++ b/pcsx2/IopDma.cpp @@ -75,6 +75,14 @@ static void __fastcall psxDmaGeneric(u32 madr, u32 bcr, u32 chcr, u32 spuCore, _ } } +void psxDma2(u32 madr, u32 bcr, u32 chcr) // GPU +{ + HW_DMA2_CHCR &= ~0x01000000; + psxDmaInterrupt(2); +} + +/* psxDma3 is in CdRom.cpp */ + void psxDma4(u32 madr, u32 bcr, u32 chcr) // SPU2's Core 0 { psxDmaGeneric(madr, bcr, chcr, 0, SPU2writeDMA4Mem, SPU2readDMA4Mem); @@ -88,12 +96,6 @@ int psxDma4Interrupt() return 1; } -void psxDma2(u32 madr, u32 bcr, u32 chcr) // GPU -{ - HW_DMA2_CHCR &= ~0x01000000; - psxDmaInterrupt(2); -} - void psxDma6(u32 madr, u32 bcr, u32 chcr) { u32 *mem = (u32 *)iopPhysMem(madr); @@ -132,6 +134,31 @@ int psxDma7Interrupt() } +void psxDma8(u32 madr, u32 bcr, u32 chcr) +{ + + const int size = (bcr >> 16) * (bcr & 0xFFFF) * 8; + + switch (chcr & 0x01000201) + { + case 0x01000201: //cpu to dev9 transfer + PSXDMA_LOG("*** DMA 8 - DEV9 mem2dev9 *** %lx addr = %lx size = %lx", chcr, madr, bcr); + DEV9writeDMA8Mem((u32*)iopPhysMem(madr), size); + break; + + case 0x01000200: //dev9 to cpu transfer + PSXDMA_LOG("*** DMA 8 - DEV9 dev9mem *** %lx addr = %lx size = %lx", chcr, madr, bcr); + DEV9readDMA8Mem((u32*)iopPhysMem(madr), size); + break; + + default: + PSXDMA_LOG("*** DMA 8 - DEV9 unknown *** %lx addr = %lx size = %lx", chcr, madr, bcr); + break; + } + HW_DMA8_CHCR &= ~0x01000000; + psxDmaInterrupt2(1); +} + void psxDma9(u32 madr, u32 bcr, u32 chcr) { SIF_LOG("IOP: dmaSIF0 chcr = %lx, madr = %lx, bcr = %lx, tadr = %lx", chcr, madr, bcr, HW_DMA9_TADR); @@ -165,32 +192,9 @@ void psxDma10(u32 madr, u32 bcr, u32 chcr) } } -void psxDma8(u32 madr, u32 bcr, u32 chcr) -{ +/* psxDma11 & psxDma 12 are in IopSio2,cpp, along with the appropriate interrupt functions. */ - const int size = (bcr >> 16) * (bcr & 0xFFFF) * 8; - - switch (chcr & 0x01000201) - { - case 0x01000201: //cpu to dev9 transfer - PSXDMA_LOG("*** DMA 8 - DEV9 mem2dev9 *** %lx addr = %lx size = %lx", chcr, madr, bcr); - DEV9writeDMA8Mem((u32*)iopPhysMem(madr), size); - break; - - case 0x01000200: //dev9 to cpu transfer - PSXDMA_LOG("*** DMA 8 - DEV9 dev9mem *** %lx addr = %lx size = %lx", chcr, madr, bcr); - DEV9readDMA8Mem((u32*)iopPhysMem(madr), size); - break; - - default: - PSXDMA_LOG("*** DMA 8 - DEV9 unknown *** %lx addr = %lx size = %lx", chcr, madr, bcr); - break; - } - HW_DMA8_CHCR &= ~0x01000000; - psxDmaInterrupt2(1); -} - -void dev9Interrupt() +void dev9Interrupt() { if ((dev9Handler != NULL) && (dev9Handler() != 1)) return; @@ -203,7 +207,7 @@ void dev9Irq(int cycles) PSX_INT(IopEvt_DEV9, cycles); } -void usbInterrupt() +void usbInterrupt() { if (usbHandler != NULL && (usbHandler() != 1)) return; diff --git a/pcsx2/IopHw.cpp b/pcsx2/IopHw.cpp index 57344bbc99..8a486855d2 100644 --- a/pcsx2/IopHw.cpp +++ b/pcsx2/IopHw.cpp @@ -41,7 +41,7 @@ void psxHwReset() { u8 psxHwRead8(u32 add) { u8 hard; - if (add >= 0x1f801600 && add < 0x1f801700) { + if (add >= HW_USB_START && add < HW_USB_END) { return USBread8(add); } @@ -53,24 +53,24 @@ u8 psxHwRead8(u32 add) { return DEV9read8(add); #ifdef PCSX2_DEVBUILD - case 0x1f801100: - case 0x1f801104: - case 0x1f801108: - case 0x1f801110: - case 0x1f801114: - case 0x1f801118: - case 0x1f801120: - case 0x1f801124: - case 0x1f801128: - case 0x1f801480: - case 0x1f801484: - case 0x1f801488: - case 0x1f801490: - case 0x1f801494: - case 0x1f801498: - case 0x1f8014a0: - case 0x1f8014a4: - case 0x1f8014a8: + case IOP_T0_COUNT: + case IOP_T0_MODE: + case IOP_T0_TARGET: + case IOP_T1_COUNT: + case IOP_T1_MODE: + case IOP_T1_TARGET: + case IOP_T2_COUNT: + case IOP_T2_MODE: + case IOP_T2_TARGET: + case IOP_T3_COUNT: + case IOP_T3_MODE: + case IOP_T3_TARGET: + case IOP_T4_COUNT: + case IOP_T4_MODE: + case IOP_T4_TARGET: + case IOP_T5_COUNT: + case IOP_T5_MODE: + case IOP_T5_TARGET: DevCon::Notice( "IOP Counter Read8 from addr0x%x = 0x%x", params add, psxHu8(add) ); return psxHu8(add); #endif @@ -102,7 +102,7 @@ u8 psxHwRead8(u32 add) { u16 psxHwRead16(u32 add) { u16 hard; - if (add >= 0x1f801600 && add < 0x1f801700) { + if (add >= HW_USB_START && add < HW_USB_END) { return USBread16(add); } @@ -136,50 +136,50 @@ u16 psxHwRead16(u32 add) { return hard; //Serial port stuff not support now ;P - // case 0x1f801050: hard = serial_read16(); break; - // case 0x1f801054: hard = serial_status_read(); break; - // case 0x1f80105a: hard = serial_control_read(); break; - // case 0x1f80105e: hard = serial_baud_read(); break; +// case 0x1f801050: hard = serial_read16(); break; +// case 0x1f801054: hard = serial_status_read(); break; +// case 0x1f80105a: hard = serial_control_read(); break; +// case 0x1f80105e: hard = serial_baud_read(); break; - case 0x1f801100: + case IOP_T0_COUNT: hard = (u16)psxRcntRcount16(0); PSXCNT_LOG("T0 count read16: %x", hard); return hard; - case 0x1f801104: + case IOP_T0_MODE: hard = psxCounters[0].mode; - psxCounters[0].mode &= ~0x1800; + psxCounters[0].mode &= ~0x1800; psxCounters[0].mode |= 0x400; PSXCNT_LOG("T0 mode read16: %x", hard); return hard; - case 0x1f801108: + case IOP_T0_TARGET: hard = psxCounters[0].target; PSXCNT_LOG("T0 target read16: %x", hard); return hard; - case 0x1f801110: + case IOP_T1_COUNT: hard = (u16)psxRcntRcount16(1); PSXCNT_LOG("T1 count read16: %x", hard); return hard; - case 0x1f801114: + case IOP_T1_MODE: hard = psxCounters[1].mode; psxCounters[1].mode &= ~0x1800; psxCounters[1].mode |= 0x400; PSXCNT_LOG("T1 mode read16: %x", hard); return hard; - case 0x1f801118: + case IOP_T1_TARGET: hard = psxCounters[1].target; PSXCNT_LOG("T1 target read16: %x", hard); return hard; - case 0x1f801120: + case IOP_T2_COUNT: hard = (u16)psxRcntRcount16(2); PSXCNT_LOG("T2 count read16: %x", hard); return hard; - case 0x1f801124: + case IOP_T2_MODE: hard = psxCounters[2].mode; psxCounters[2].mode &= ~0x1800; psxCounters[2].mode |= 0x400; PSXCNT_LOG("T2 mode read16: %x", hard); return hard; - case 0x1f801128: + case IOP_T2_TARGET: hard = psxCounters[2].target; PSXCNT_LOG("T2 target read16: %x", hard); return hard; @@ -187,45 +187,45 @@ u16 psxHwRead16(u32 add) { case 0x1f80146e: // DEV9_R_REV return DEV9read16(add); - case 0x1f801480: + case IOP_T3_COUNT: hard = (u16)psxRcntRcount32(3); PSXCNT_LOG("T3 count read16: %lx", hard); return hard; - case 0x1f801484: + case IOP_T3_MODE: hard = psxCounters[3].mode; psxCounters[3].mode &= ~0x1800; psxCounters[3].mode |= 0x400; PSXCNT_LOG("T3 mode read16: %lx", hard); return hard; - case 0x1f801488: + case IOP_T3_TARGET: hard = psxCounters[3].target; PSXCNT_LOG("T3 target read16: %lx", hard); return hard; - case 0x1f801490: + case IOP_T4_COUNT: hard = (u16)psxRcntRcount32(4); PSXCNT_LOG("T4 count read16: %lx", hard); return hard; - case 0x1f801494: + case IOP_T4_MODE: hard = psxCounters[4].mode; psxCounters[4].mode &= ~0x1800; psxCounters[4].mode |= 0x400; PSXCNT_LOG("T4 mode read16: %lx", hard); return hard; - case 0x1f801498: + case IOP_T4_TARGET: hard = psxCounters[4].target; PSXCNT_LOG("T4 target read16: %lx", hard); return hard; - case 0x1f8014a0: + case IOP_T5_COUNT: hard = (u16)psxRcntRcount32(5); PSXCNT_LOG("T5 count read16: %lx", hard); return hard; - case 0x1f8014a4: + case IOP_T5_MODE: hard = psxCounters[5].mode; psxCounters[5].mode &= ~0x1800; psxCounters[5].mode |= 0x400; PSXCNT_LOG("T5 mode read16: %lx", hard); return hard; - case 0x1f8014a8: + case IOP_T5_TARGET: hard = psxCounters[5].target; PSXCNT_LOG("T5 target read16: %lx", hard); return hard; @@ -238,11 +238,11 @@ u16 psxHwRead16(u32 add) { hard = psxHu16(0x1506); PSXHW_LOG("DMA7 BCR_count 16bit read %lx", hard); return hard; - //case 0x1f802030: hard = //int_2000???? - //case 0x1f802040: hard =//dip switches...?? +// case 0x1f802030: hard = //int_2000???? +// case 0x1f802040: hard =//dip switches...?? default: - if (add>=0x1f801c00 && add<0x1f801e00) { + if (add>=HW_SPU2_START && add= 0x1f801600 && add < 0x1f801700) { + if (add >= HW_USB_START && add < HW_USB_END) { return USBread32(add); } - if (add >= 0x1f808400 && add <= 0x1f808550) {//the size is a complete guess.. + if (add >= HW_FW_START && add <= HW_FW_END) {//the size is a complete guess.. return FWread32(add); } @@ -275,7 +275,7 @@ u32 psxHwRead32(u32 add) { PAD_LOG("sio read32 ;ret = %lx", hard); return hard; - // case 0x1f801050: hard = serial_read32(); break;//serial port +// case 0x1f801050: hard = serial_read32(); break;//serial port case 0x1f801060: PSXHW_LOG("RAM size read %lx", psxHu32(0x1060)); return psxHu32(0x1060); @@ -289,18 +289,17 @@ u32 psxHwRead32(u32 add) { psxHu32(0x1078) = 0; return hard; -/* case 0x1f801810: +// case 0x1f801810: // hard = GPU_readData(); - PSXHW_LOG("GPU DATA 32bit read %lx", hard); - return hard;*/ -/* case 0x1f801814: - hard = GPU_readStatus(); - PSXHW_LOG("GPU STATUS 32bit read %lx", hard); - return hard; -*/ -/* case 0x1f801820: hard = mdecRead0(); break; - case 0x1f801824: hard = mdecRead1(); break; -*/ +// PSXHW_LOG("GPU DATA 32bit read %lx", hard); +// return hard; +// case 0x1f801814: +// hard = GPU_readStatus(); +// PSXHW_LOG("GPU STATUS 32bit read %lx", hard); +// return hard; +// +// case 0x1f801820: hard = mdecRead0(); break; +// case 0x1f801824: hard = mdecRead1(); break; case 0x1f8010a0: PSXHW_LOG("DMA2 MADR 32bit read %lx", psxHu32(0x10a0)); @@ -352,7 +351,7 @@ u32 psxHwRead32(u32 add) { PSXHW_LOG("DMA ICR 32bit read %lx", HW_DMA_ICR); return HW_DMA_ICR; -//SSBus registers + //SSBus registers case 0x1f801000: hard = psxHu32(0x1000); PSXHW_LOG("SSBUS 32bit read %lx", hard); @@ -432,79 +431,78 @@ u32 psxHwRead32(u32 add) { case 0x1f8010c8: PSXHW_LOG("DMA4 CHCR 32bit read %lx", HW_DMA4_CHCR); - return HW_DMA4_CHCR; // DMA4 chcr (SPU DMA) + return HW_DMA4_CHCR; // DMA4 chcr (SPU DMA) // time for rootcounters :) - case 0x1f801100: + case IOP_T0_COUNT: hard = (u16)psxRcntRcount16(0); PSXCNT_LOG("T0 count read32: %lx", hard); return hard; - case 0x1f801104: + case IOP_T0_MODE: hard = (u16)psxCounters[0].mode; PSXCNT_LOG("T0 mode read32: %lx", hard); return hard; - case 0x1f801108: + case IOP_T0_TARGET: hard = psxCounters[0].target; PSXCNT_LOG("T0 target read32: %lx", hard); return hard; - case 0x1f801110: + case IOP_T1_COUNT: hard = (u16)psxRcntRcount16(1); PSXCNT_LOG("T1 count read32: %lx", hard); return hard; - case 0x1f801114: + case IOP_T1_MODE: hard = (u16)psxCounters[1].mode; PSXCNT_LOG("T1 mode read32: %lx", hard); return hard; - case 0x1f801118: + case IOP_T1_TARGET: hard = psxCounters[1].target; PSXCNT_LOG("T1 target read32: %lx", hard); return hard; - case 0x1f801120: + case IOP_T2_COUNT: hard = (u16)psxRcntRcount16(2); PSXCNT_LOG("T2 count read32: %lx", hard); return hard; - case 0x1f801124: + case IOP_T2_MODE: hard = (u16)psxCounters[2].mode; PSXCNT_LOG("T2 mode read32: %lx", hard); return hard; - case 0x1f801128: + case IOP_T2_TARGET: hard = psxCounters[2].target; PSXCNT_LOG("T2 target read32: %lx", hard); return hard; - - case 0x1f801480: + case IOP_T3_COUNT: hard = (u32)psxRcntRcount32(3); PSXCNT_LOG("T3 count read32: %lx", hard); return hard; - case 0x1f801484: + case IOP_T3_MODE: hard = (u16)psxCounters[3].mode; PSXCNT_LOG("T3 mode read32: %lx", hard); return hard; - case 0x1f801488: + case IOP_T3_TARGET: hard = psxCounters[3].target; PSXCNT_LOG("T3 target read32: %lx", hard); return hard; - case 0x1f801490: + case IOP_T4_COUNT: hard = (u32)psxRcntRcount32(4); PSXCNT_LOG("T4 count read32: %lx", hard); return hard; - case 0x1f801494: + case IOP_T4_MODE: hard = (u16)psxCounters[4].mode; PSXCNT_LOG("T4 mode read32: %lx", hard); return hard; - case 0x1f801498: + case IOP_T4_TARGET: hard = psxCounters[4].target; PSXCNT_LOG("T4 target read32: %lx", hard); return hard; - case 0x1f8014a0: + case IOP_T5_COUNT: hard = (u32)psxRcntRcount32(5); PSXCNT_LOG("T5 count read32: %lx", hard); return hard; - case 0x1f8014a4: + case IOP_T5_MODE: hard = (u16)psxCounters[5].mode; PSXCNT_LOG("T5 mode read32: %lx", hard); return hard; - case 0x1f8014a8: + case IOP_T5_TARGET: hard = psxCounters[5].target; PSXCNT_LOG("T5 target read32: %lx", hard); return hard; @@ -619,18 +617,11 @@ u32 psxHwRead32(u32 add) { return hard; } -int g_pbufi; +// A buffer that stores messages until it gets a /n or the number of chars (g_pbufi) is more then 1023. s8 g_pbuf[1024]; - -#define DmaExec(n) { \ - if (HW_DMA##n##_CHCR & 0x01000000 && \ - HW_DMA_PCR & (8 << (n * 4))) { \ - psxDma##n(HW_DMA##n##_MADR, HW_DMA##n##_BCR, HW_DMA##n##_CHCR); \ - } \ -} - +int g_pbufi; void psxHwWrite8(u32 add, u8 value) { - if (add >= 0x1f801600 && add < 0x1f801700) { + if (add >= HW_USB_START && add < HW_USB_END) { USBwrite8(add, value); return; } if((add & 0xf) == 0xa) @@ -638,28 +629,28 @@ void psxHwWrite8(u32 add, u8 value) { switch (add) { case 0x1f801040: - sioWrite8(value); - break; - // case 0x1f801050: serial_write8(value); break;//serial port + sioWrite8(value); + break; +// case 0x1f801050: serial_write8(value); break;//serial port - case 0x1f801100: - case 0x1f801104: - case 0x1f801108: - case 0x1f801110: - case 0x1f801114: - case 0x1f801118: - case 0x1f801120: - case 0x1f801124: - case 0x1f801128: - case 0x1f801480: - case 0x1f801484: - case 0x1f801488: - case 0x1f801490: - case 0x1f801494: - case 0x1f801498: - case 0x1f8014a0: - case 0x1f8014a4: - case 0x1f8014a8: + case IOP_T0_COUNT: + case IOP_T0_MODE: + case IOP_T0_TARGET: + case IOP_T1_COUNT: + case IOP_T1_MODE: + case IOP_T1_TARGET: + case IOP_T2_COUNT: + case IOP_T2_MODE: + case IOP_T2_TARGET: + case IOP_T3_COUNT: + case IOP_T3_MODE: + case IOP_T3_TARGET: + case IOP_T4_COUNT: + case IOP_T4_MODE: + case IOP_T4_TARGET: + case IOP_T5_COUNT: + case IOP_T5_MODE: + case IOP_T5_TARGET: DevCon::Notice( "IOP Counter Write8 to addr 0x%x = 0x%x", params add, value ); psxHu8(add) = value; return; @@ -676,17 +667,19 @@ void psxHwWrite8(u32 add, u8 value) { case 0x1f80380c: if (value == '\r') break; - if (value == '\n' || g_pbufi >= 1023) { - g_pbuf[g_pbufi++] = 0; g_pbufi = 0; + if (value == '\n' || g_pbufi >= 1023) { // A line break, or the buffer is about to overflow. + g_pbuf[g_pbufi++] = 0; + g_pbufi = 0; DevCon::WriteLn( Color_Cyan, g_pbuf ); } else g_pbuf[g_pbufi++] = value; - psxHu8(add) = value; + psxHu8(add) = value; return; case 0x1F808260: PSXHW_LOG("SIO2 write8 DATAIN <- %08X", value); - sio2_serialIn(value);return;//serial data feed/fifo + sio2_serialIn(value); + return;//serial data feed/fifo default: psxHu8(add) = value; @@ -698,7 +691,7 @@ void psxHwWrite8(u32 add, u8 value) { } void psxHwWrite16(u32 add, u16 value) { - if (add >= 0x1f801600 && add < 0x1f801700) { + if (add >= HW_USB_START && add < HW_USB_END) { USBwrite16(add, value); return; } @@ -727,10 +720,10 @@ void psxHwWrite16(u32 add, u16 value) { return; //serial port ;P - // case 0x1f801050: serial_write16(value); break; - // case 0x1f80105a: serial_control_write(value);break; - // case 0x1f80105e: serial_baud_write(value); break; - // case 0x1f801054: serial_status_write(value); break; +// case 0x1f801050: serial_write16(value); break; +// case 0x1f80105a: serial_control_write(value);break; +// case 0x1f80105e: serial_baud_write(value); break; +// case 0x1f801054: serial_status_write(value); break; case 0x1f801070: PSXHW_LOG("IREG 16bit write %x", value); @@ -760,33 +753,33 @@ void psxHwWrite16(u32 add, u16 value) { PSXHW_LOG("DMA4 BCR_count 16bit write %lx", value); psxHu16(0x10c6) = value; return; // DMA4 bcr_count - case 0x1f801100: + case IOP_T0_COUNT: PSXCNT_LOG("COUNTER 0 COUNT 16bit write %x", value); psxRcntWcount16(0, value); return; - case 0x1f801104: + case IOP_T0_MODE: PSXCNT_LOG("COUNTER 0 MODE 16bit write %x", value); psxRcnt0Wmode(value); return; - case 0x1f801108: + case IOP_T0_TARGET: PSXCNT_LOG("COUNTER 0 TARGET 16bit write %x", value); psxRcntWtarget16(0, value); return; - case 0x1f801110: + case IOP_T1_COUNT: PSXCNT_LOG("COUNTER 1 COUNT 16bit write %x", value); psxRcntWcount16(1, value); return; - case 0x1f801114: + case IOP_T1_MODE: PSXCNT_LOG("COUNTER 1 MODE 16bit write %x", value); psxRcnt1Wmode(value); return; - case 0x1f801118: + case IOP_T1_TARGET: PSXCNT_LOG("COUNTER 1 TARGET 16bit write %x", value); psxRcntWtarget16(1, value); return; - case 0x1f801120: + case IOP_T2_COUNT: PSXCNT_LOG("COUNTER 2 COUNT 16bit write %x", value); psxRcntWcount16(2, value); return; - case 0x1f801124: + case IOP_T2_MODE: PSXCNT_LOG("COUNTER 2 MODE 16bit write %x", value); psxRcnt2Wmode(value); return; - case 0x1f801128: + case IOP_T2_TARGET: PSXCNT_LOG("COUNTER 2 TARGET 16bit write %x", value); psxRcntWtarget16(2, value); return; @@ -795,33 +788,33 @@ void psxHwWrite16(u32 add, u16 value) { psxHu16(0x1450) = value/* & (~0x8)*/; return; - case 0x1f801480: + case IOP_T3_COUNT: PSXCNT_LOG("COUNTER 3 COUNT 16bit write %lx", value); psxRcntWcount32(3, value); return; - case 0x1f801484: + case IOP_T3_MODE: PSXCNT_LOG("COUNTER 3 MODE 16bit write %lx", value); psxRcnt3Wmode(value); return; - case 0x1f801488: + case IOP_T3_TARGET: PSXCNT_LOG("COUNTER 3 TARGET 16bit write %lx", value); psxRcntWtarget32(3, value); return; - case 0x1f801490: + case IOP_T4_COUNT: PSXCNT_LOG("COUNTER 4 COUNT 16bit write %lx", value); psxRcntWcount32(4, value); return; - case 0x1f801494: + case IOP_T4_MODE: PSXCNT_LOG("COUNTER 4 MODE 16bit write %lx", value); psxRcnt4Wmode(value); return; - case 0x1f801498: + case IOP_T4_TARGET: PSXCNT_LOG("COUNTER 4 TARGET 16bit write %lx", value); psxRcntWtarget32(4, value); return; - case 0x1f8014a0: + case IOP_T5_COUNT: PSXCNT_LOG("COUNTER 5 COUNT 16bit write %lx", value); psxRcntWcount32(5, value); return; - case 0x1f8014a4: + case IOP_T5_MODE: PSXCNT_LOG("COUNTER 5 MODE 16bit write %lx", value); psxRcnt5Wmode(value); return; - case 0x1f8014a8: + case IOP_T5_TARGET: PSXCNT_LOG("COUNTER 5 TARGET 16bit write %lx", value); psxRcntWtarget32(5, value); return; @@ -834,7 +827,7 @@ void psxHwWrite16(u32 add, u16 value) { PSXHW_LOG("DMA7 BCR_count 16bit write %lx", value); return; default: - if (add>=0x1f801c00 && add<0x1f801e00) { + if (add>=HW_SPU2_START && add= 0x1f801600 && add < 0x1f801700) { + if (add >= HW_USB_START && add < HW_USB_END) { USBwrite32(add, value); return; } - if (add >= 0x1f808400 && add <= 0x1f808550) { + if (add >= HW_FW_START && add <= HW_FW_END) { FWwrite32(add, value); return; } switch (add) { @@ -1179,63 +1165,63 @@ void psxHwWrite32(u32 add, u32 value) { case 0x1f801824: mdecWrite1(value); break; */ - case 0x1f801100: + case IOP_T0_COUNT: PSXCNT_LOG("COUNTER 0 COUNT 32bit write %lx", value); psxRcntWcount16(0, value ); return; - case 0x1f801104: + case IOP_T0_MODE: PSXCNT_LOG("COUNTER 0 MODE 32bit write %lx", value); psxRcnt0Wmode(value); return; - case 0x1f801108: + case IOP_T0_TARGET: PSXCNT_LOG("COUNTER 0 TARGET 32bit write %lx", value); psxRcntWtarget16(0, value ); return; - case 0x1f801110: + case IOP_T1_COUNT: PSXCNT_LOG("COUNTER 1 COUNT 32bit write %lx", value); psxRcntWcount16(1, value ); return; - case 0x1f801114: + case IOP_T1_MODE: PSXCNT_LOG("COUNTER 1 MODE 32bit write %lx", value); psxRcnt1Wmode(value); return; - case 0x1f801118: + case IOP_T1_TARGET: PSXCNT_LOG("COUNTER 1 TARGET 32bit write %lx", value); psxRcntWtarget16(1, value ); return; - case 0x1f801120: + case IOP_T2_COUNT: PSXCNT_LOG("COUNTER 2 COUNT 32bit write %lx", value); psxRcntWcount16(2, value ); return; - case 0x1f801124: + case IOP_T2_MODE: PSXCNT_LOG("COUNTER 2 MODE 32bit write %lx", value); psxRcnt2Wmode(value); return; - case 0x1f801128: + case IOP_T2_TARGET: PSXCNT_LOG("COUNTER 2 TARGET 32bit write %lx", value); psxRcntWtarget16(2, value); return; - case 0x1f801480: + case IOP_T3_COUNT: PSXCNT_LOG("COUNTER 3 COUNT 32bit write %lx", value); psxRcntWcount32(3, value); return; - case 0x1f801484: + case IOP_T3_MODE: PSXCNT_LOG("COUNTER 3 MODE 32bit write %lx", value); psxRcnt3Wmode(value); return; - case 0x1f801488: + case IOP_T3_TARGET: PSXCNT_LOG("COUNTER 3 TARGET 32bit write %lx", value); psxRcntWtarget32(3, value); return; - case 0x1f801490: + case IOP_T4_COUNT: PSXCNT_LOG("COUNTER 4 COUNT 32bit write %lx", value); psxRcntWcount32(4, value); return; - case 0x1f801494: + case IOP_T4_MODE: PSXCNT_LOG("COUNTER 4 MODE 32bit write %lx", value); psxRcnt4Wmode(value); return; - case 0x1f801498: + case IOP_T4_TARGET: PSXCNT_LOG("COUNTER 4 TARGET 32bit write %lx", value); psxRcntWtarget32(4, value); return; - case 0x1f8014a0: + case IOP_T5_COUNT: PSXCNT_LOG("COUNTER 5 COUNT 32bit write %lx", value); psxRcntWcount32(5, value); return; - case 0x1f8014a4: + case IOP_T5_MODE: PSXCNT_LOG("COUNTER 5 MODE 32bit write %lx", value); psxRcnt5Wmode(value); return; - case 0x1f8014a8: + case IOP_T5_TARGET: PSXCNT_LOG("COUNTER 5 TARGET 32bit write %lx", value); psxRcntWtarget32(5, value); return; diff --git a/pcsx2/IopHw.h b/pcsx2/IopHw.h index 1b7a5400f6..6fdabcc287 100644 --- a/pcsx2/IopHw.h +++ b/pcsx2/IopHw.h @@ -22,6 +22,53 @@ #include "R3000A.h" #include "IopMem.h" +#define HW_USB_START 0x1f801600 +#define HW_USB_END 0x1f801700 +#define HW_FW_START 0x1f808400 +#define HW_FW_END 0x1f808550 +#define HW_SPU2_START 0x1f801c00 +#define HW_SPU2_END 0x1f801e00 + +/* Registers for the IOP Counters */ +enum IOPCountRegs +{ + IOP_T0_COUNT = 0x1f801100, + IOP_T1_COUNT = 0x1f801110, + IOP_T2_COUNT = 0x1f801120, + IOP_T3_COUNT = 0x1f801480, + IOP_T4_COUNT = 0x1f801490, + IOP_T5_COUNT = 0x1f8014a0, + + IOP_T0_MODE = 0x1f801104, + IOP_T1_MODE = 0x1f801114, + IOP_T2_MODE = 0x1f801124, + IOP_T3_MODE = 0x1f801484, + IOP_T4_MODE = 0x1f801494, + IOP_T5_MODE = 0x1f8014a4, + + IOP_T0_TARGET= 0x1f801108, + IOP_T1_TARGET = 0x1f801118, + IOP_T2_TARGET = 0x1f801128, + IOP_T3_TARGET = 0x1f801488, + IOP_T4_TARGET = 0x1f801498, + IOP_T5_TARGET = 0x1f8014a8 +}; + +// fixme: I'm sure there's a better way to do this. --arcum42 +#define DmaExec(n) { \ + if (HW_DMA##n##_CHCR & 0x01000000 && \ + HW_DMA_PCR & (8 << (n * 4))) { \ + psxDma##n(HW_DMA##n##_MADR, HW_DMA##n##_BCR, HW_DMA##n##_CHCR); \ + } \ +} + +#define DmaExec2(n) { \ + if (HW_DMA##n##_CHCR & 0x01000000 && \ + HW_DMA_PCR2 & (8 << ((n-7) * 4))) { \ + psxDma##n(HW_DMA##n##_MADR, HW_DMA##n##_BCR, HW_DMA##n##_CHCR); \ + } \ +} + #define HW_DMA0_MADR (psxHu32(0x1080)) // MDEC in DMA #define HW_DMA0_BCR (psxHu32(0x1084)) #define HW_DMA0_CHCR (psxHu32(0x1088)) From bb7dc87819ad18fa2af157e4619347c5c2d1037d Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Sun, 19 Apr 2009 13:55:09 +0000 Subject: [PATCH 103/143] Silly bug made each EE branch count as 0 cycles. Enjoy some "free" speedups :p git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1019 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86-32/iR5900-32.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 36a42666a5..6085791b9b 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1289,8 +1289,8 @@ void recompileNextInstruction(int delayslot) return; } } - opcode.recompile(); s_nBlockCycles += opcode.cycles; + opcode.recompile(); } if( !delayslot ) { From d91eb6d1c86eaec656e91a6af84248e5331d1ebb Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sun, 19 Apr 2009 16:34:29 +0000 Subject: [PATCH 104/143] Emitter: Fixed a GCC compilation error; Implemented MOVNT/MOVLH/MOVHL/PMOVMSKB, and tied in all old emitter references to MOVAPS/MOVDQA to the new versions. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1020 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/bittest.h | 5 +- pcsx2/x86/ix86/implement/xmm/movqss.h | 106 ++++----- pcsx2/x86/ix86/ix86.cpp | 79 +++++-- pcsx2/x86/ix86/ix86_instructions.h | 47 +++- pcsx2/x86/ix86/ix86_legacy_instructions.h | 10 +- pcsx2/x86/ix86/ix86_legacy_mmx.cpp | 111 ++------- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 272 +++------------------- pcsx2/x86/ix86/ix86_types.h | 25 +- 8 files changed, 236 insertions(+), 419 deletions(-) diff --git a/pcsx2/x86/ix86/implement/bittest.h b/pcsx2/x86/ix86/implement/bittest.h index b4dc648b9c..dd3d8fcc73 100644 --- a/pcsx2/x86/ix86/implement/bittest.h +++ b/pcsx2/x86/ix86/implement/bittest.h @@ -56,15 +56,14 @@ public: } // ------------------------------------------------------------------------ -#ifndef __LINUX__ static __emitinline void Emit( void* bitbase, const iRegister& bitoffset ) { prefix16(); iWrite( 0x0f ); iWrite( 0xa3 | (InstType << 2) ); - iWriteDisp( bitoffset.Id, bitbase.Id ); + iWriteDisp( bitoffset.Id, bitbase ); } -#endif + // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibBase& bitbase, const iRegister& bitoffset ) { diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index f2c29c187f..0a955b053b 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -22,7 +22,7 @@ // MMX / SSE Helper Functions! template< typename T > -__emitinline void SimdPrefix( u8 opcode, u8 prefix ) +__emitinline void SimdPrefix( u8 opcode, u8 prefix=0 ) { if( sizeof( T ) == 16 && prefix != 0 ) { @@ -33,84 +33,84 @@ __emitinline void SimdPrefix( u8 opcode, u8 prefix ) iWrite( (opcode<<8) | 0x0f ); } -template< u8 prefix, typename T, typename T2 > -__emitinline void writeXMMop( u8 opcode, const iRegister& to, const iRegister& from ) +// ------------------------------------------------------------------------ +// xmm emitter helpers for xmm instruction with prefixes. +// These functions also support deducing the use of the prefix from the template parameters, +// since most xmm instructions use a prefix and most mmx instructions do not. (some mov +// instructions violate this "guideline.") +// +template< typename T, typename T2 > +__emitinline void writeXMMop( u8 prefix, u8 opcode, const iRegister& to, const iRegister& from ) { SimdPrefix( opcode, prefix ); ModRM_Direct( to.Id, from.Id ); } -template< u8 prefix, typename T > -void writeXMMop( u8 opcode, const iRegister& reg, const ModSibBase& sib ) +template< typename T > +void writeXMMop( u8 prefix, u8 opcode, const iRegister& reg, const ModSibBase& sib ) { SimdPrefix( opcode, prefix ); EmitSibMagic( reg.Id, sib ); } -template< u8 prefix, typename T > -__emitinline void writeXMMop( u8 opcode, const iRegister& reg, const void* data ) +template< typename T > +__emitinline void writeXMMop( u8 prefix, u8 opcode, const iRegister& reg, const void* data ) { SimdPrefix( opcode, prefix ); iWriteDisp( reg.Id, data ); } +// ------------------------------------------------------------------------ +// xmm emitter helpers for xmm instructions *without* prefixes. +// These are normally used for special instructions that have MMX forms only (non-SSE), however +// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. +// +template< typename T, typename T2 > +__emitinline void writeXMMop( u8 opcode, const iRegister& to, const iRegister& from ) +{ + SimdPrefix( opcode ); + ModRM_Direct( to.Id, from.Id ); +} + +template< typename T > +void writeXMMop( u8 opcode, const iRegister& reg, const ModSibBase& sib ) +{ + SimdPrefix( opcode ); + EmitSibMagic( reg.Id, sib ); +} + +template< typename T > +__emitinline void writeXMMop( u8 opcode, const iRegister& reg, const void* data ) +{ + SimdPrefix( opcode ); + iWriteDisp( reg.Id, data ); +} + ////////////////////////////////////////////////////////////////////////////////////////// // -template< u8 Prefix, typename OperandType > -class MovapsImpl +// Moves to/from high/low portions of an xmm register. +// These instructions cannot be used in reg/reg form. +template< u8 Prefix, u8 Opcode > +class MovhlImplAll { public: - // ------------------------------------------------------------------------ - static __emitinline void Emit( u8 opcode, const iRegisterSIMD& to, const iRegisterSIMD from ) - { - if( to != from ) - writeXMMop( opcode, to, from ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( u8 opcode, const iRegisterSIMD& to, const void* from ) - { - writeXMMop( opcode, to, from ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( u8 opcode, const iRegisterSIMD& to, const ModSibBase& from ) - { - writeXMMop( opcode, to, from ); - } - - // ------------------------------------------------------------------------ - // Generally a Movaps/dqa instruction form only. - // Most SSE/MMX instructions don't have this form. - static __emitinline void Emit( u8 opcode, const void* to, const iRegisterSIMD& from ) - { - writeXMMop( opcode, from, to ); - } - - // ------------------------------------------------------------------------ - // Generally a Movaps/dqa instruction form only. - // Most SSE/MMX instructions don't have this form. - static __emitinline void Emit( u8 opcode, const ModSibBase& to, const iRegisterSIMD& from ) - { - writeXMMop( opcode, from, to ); - } + __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const iRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + MovhlImplAll() {} //GCC. }; -// ------------------------------------------------------------------------ template< u8 Prefix, u8 Opcode, u8 OpcodeAlt > class MovapsImplAll { -protected: - typedef MovapsImpl m_128; - public: - __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { m_128::Emit( Opcode, to, from ); } - __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { m_128::Emit( Opcode, to, from ); } - __forceinline void operator()( const void* to, const iRegisterSSE& from ) const { m_128::Emit( OpcodeAlt, to, from ); } - __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { m_128::Emit( Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const iRegisterSSE& from ) const { m_128::Emit( OpcodeAlt, to, from ); } + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const iRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } + __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } MovapsImplAll() {} //GCC. }; - diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index aa85482572..2557ba8f30 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -740,29 +740,40 @@ const MovapsImplAll< 0, 0x10, 0x11 > iMOVUPS; const MovapsImplAll< 0x66, 0x28, 0x29 > iMOVAPD; const MovapsImplAll< 0x66, 0x10, 0x11 > iMOVUPD; +#ifdef ALWAYS_USE_MOVAPS const MovapsImplAll< 0x66, 0x6f, 0x7f > iMOVDQA; const MovapsImplAll< 0xf3, 0x6f, 0x7f > iMOVDQU; +#else +const MovapsImplAll< 0, 0x28, 0x29 > iMOVDQA; +const MovapsImplAll< 0, 0x10, 0x11 > iMOVDQU; +#endif + +const MovhlImplAll< 0, 0x16 > iMOVHPS; +const MovhlImplAll< 0, 0x12 > iMOVLPS; +const MovhlImplAll< 0x66, 0x16 > iMOVHPD; +const MovhlImplAll< 0x66, 0x12 > iMOVLPD; + // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__emitinline void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop<0xf3>( 0x7e, to, from ); } +__forceinline void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, to, from ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__noinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop<0xf3>( 0x7e, to, src ); } +__noinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__emitinline void iMOVQZX( const iRegisterSSE& to, const void* src ) { writeXMMop<0xf3>( 0x7e, to, src ); } +__forceinline void iMOVQZX( const iRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); } -__emitinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop<0>( 0x6f, to, from ); } -__noinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop<0>( 0x6f, to, src ); } -__emitinline void iMOVQ( const iRegisterMMX& to, const void* src ) { writeXMMop<0>( 0x6f, to, src ); } -__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) { writeXMMop<0>( 0x7f, from, dest ); } -__forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) { writeXMMop<0>( 0x7f, from, dest ); } -__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop<0xf3>( 0x7e, from, dest ); } -__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop<0xf3>( 0x7e, from, dest ); } -__forceinline void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ) { writeXMMop<0xf3>( 0xd6, to, from ); } +__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } +__noinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } +__forceinline void iMOVQ( const iRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); } +__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } +__forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } +__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, from, dest ); } +__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, from, dest ); } +__forceinline void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); } __forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ) { // Manual implementation of this form of MOVQ, since its parameters are unique in a way @@ -776,16 +787,52 @@ __forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ) // #define IMPLEMENT_iMOVS( ssd, prefix ) \ - __forceinline void iMOV##ssd( const iRegisterSSE& to, const iRegisterSSE& from ) { if( to != from ) writeXMMop( 0x10, to, from ); } \ - __forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const void* from ) { writeXMMop( 0x10, to, from ); } \ - __forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const ModSibBase& from ) { writeXMMop( 0x10, to, from ); } \ - __forceinline void iMOV##ssd( const void* to, const iRegisterSSE& from ) { writeXMMop( 0x11, from, to ); } \ - __forceinline void iMOV##ssd( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x11, from, to ); } + __forceinline void iMOV##ssd( const iRegisterSSE& to, const iRegisterSSE& from ) { if( to != from ) writeXMMop( prefix, 0x10, to, from ); } \ + __forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const void* from ) { writeXMMop( prefix, 0x10, to, from ); } \ + __forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const ModSibBase& from ) { writeXMMop( prefix, 0x10, to, from ); } \ + __forceinline void iMOV##ssd( const void* to, const iRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } \ + __forceinline void iMOV##ssd( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } IMPLEMENT_iMOVS( SS, 0xf3 ) IMPLEMENT_iMOVS( SD, 0xf2 ) ////////////////////////////////////////////////////////////////////////////////////////// +// Non-temporal movs only support a register as a target (ie, load form only, no stores) // +__forceinline void iMOVNTDQA( const iRegisterSSE& to, const void* from ) +{ + iWrite( 0x2A380f66 ); + iWriteDisp( to.Id, from ); +} + +__noinline void iMOVNTDQA( const iRegisterSSE& to, const ModSibBase& from ) +{ + iWrite( 0x2A380f66 ); + EmitSibMagic( to.Id, from ); +} + +__forceinline void iMOVNTDQ( void* to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } +__noinline void iMOVNTDQA( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } + +__forceinline void iMOVNTPD( void* to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } +__noinline void iMOVNTPD( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } +__forceinline void iMOVNTPS( void* to, const iRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } +__noinline void iMOVNTPS( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } + +__forceinline void iMOVNTQ( void* to, const iRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } +__noinline void iMOVNTQ( const ModSibBase& to, const iRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } + +////////////////////////////////////////////////////////////////////////////////////////// +// Mov Low to High / High to Low +// +// These instructions come in xmmreg,xmmreg forms only! +// + +__forceinline void iMOVLHPS( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x16, to, from ); } +__forceinline void iMOVHLPS( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x12, to, from ); } +__forceinline void iMOVLHPD( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x16, to, from ); } +__forceinline void iMOVHLPD( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x12, to, from ); } + + } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 64fee9d85b..5b776567ad 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -331,42 +331,60 @@ namespace x86Emitter template< typename T > __emitinline void iMOVDZX( const iRegisterSIMD& to, const iRegister32& from ) { - Internal::writeXMMop<0x66>( 0x6e, to, from ); + Internal::writeXMMop( 0x66, 0x6e, to, from ); } template< typename T > __emitinline void iMOVDZX( const iRegisterSIMD& to, const void* src ) { - Internal::writeXMMop<0x66>( 0x6e, to, src ); + Internal::writeXMMop( 0x66, 0x6e, to, src ); } template< typename T > void iMOVDZX( const iRegisterSIMD& to, const ModSibBase& src ) { - Internal::writeXMMop<0x66>( 0x6e, to, src ); + Internal::writeXMMop( 0x66, 0x6e, to, src ); } template< typename T > __emitinline void iMOVD( const iRegister32& to, const iRegisterSIMD& from ) { - Internal::writeXMMop<0x66>( 0x7e, from, to ); + Internal::writeXMMop( 0x66, 0x7e, from, to ); } template< typename T > __emitinline void iMOVD( void* dest, const iRegisterSIMD& from ) { - Internal::writeXMMop<0x66>( 0x7e, from, dest ); + Internal::writeXMMop( 0x66, 0x7e, from, dest ); } template< typename T > void iMOVD( const ModSibBase& dest, const iRegisterSIMD& from ) { - Internal::writeXMMop<0x66>( 0x7e, from, dest ); + Internal::writeXMMop( 0x66, 0x7e, from, dest ); } // ------------------------------------------------------------------------ + // iMASKMOV: + // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. + // The default memory location is specified by DS:EDI. The most significant bit in each byte + // of the mask operand determines whether the corresponding byte in the source operand is + // written to the corresponding byte location in memory. + template< typename T > + static __forceinline void iMASKMOV( const iRegisterSIMD& to, const iRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); } + + // iPMOVMSKB: + // Creates a mask made up of the most significant bit of each byte of the source + // operand and stores the result in the low byte or word of the destination operand. + // Upper bits of the destination are cleared to zero. + // + // When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on + // 128-bit (SSE) source, the byte mask is 16-bits. + // + template< typename T > + static __forceinline void iPMOVMSKB( const iRegister32& to, const iRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } // ------------------------------------------------------------------------ @@ -397,5 +415,22 @@ namespace x86Emitter extern void iMOVSDZX( const iRegisterSSE& to, const void* from ); extern void iMOVSDZX( const iRegisterSSE& to, const ModSibBase& from ); + extern void iMOVNTDQA( const iRegisterSSE& to, const void* from ); + extern void iMOVNTDQA( const iRegisterSSE& to, const ModSibBase& from ); + extern void iMOVNTDQ( void* to, const iRegisterSSE& from ); + extern void iMOVNTDQA( const ModSibBase& to, const iRegisterSSE& from ); + + extern void iMOVNTPD( void* to, const iRegisterSSE& from ); + extern void iMOVNTPD( const ModSibBase& to, const iRegisterSSE& from ); + extern void iMOVNTPS( void* to, const iRegisterSSE& from ); + extern void iMOVNTPS( const ModSibBase& to, const iRegisterSSE& from ); + extern void iMOVNTQ( void* to, const iRegisterMMX& from ); + extern void iMOVNTQ( const ModSibBase& to, const iRegisterMMX& from ); + + extern void iMOVLHPS( const iRegisterSSE& to, const iRegisterSSE& from ); + extern void iMOVHLPS( const iRegisterSSE& to, const iRegisterSSE& from ); + extern void iMOVLHPD( const iRegisterSSE& to, const iRegisterSSE& from ); + extern void iMOVHLPD( const iRegisterSSE& to, const iRegisterSSE& from ); + } diff --git a/pcsx2/x86/ix86/ix86_legacy_instructions.h b/pcsx2/x86/ix86/ix86_legacy_instructions.h index e58e7f8d7a..f36522852f 100644 --- a/pcsx2/x86/ix86/ix86_legacy_instructions.h +++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h @@ -977,19 +977,13 @@ extern void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offs extern void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); extern void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); extern void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); extern void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); extern void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset=0 ); extern void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); -extern void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale=0 ); -extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); -extern void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); +extern void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from ); +extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from ); extern void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset=0 ); extern void SSE_MOVUPSRtoRm( x86SSERegType to, x86IntRegType from, int offset=0 ); diff --git a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp index 40e2170bf2..cf06a37777 100644 --- a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp @@ -27,23 +27,27 @@ using namespace x86Emitter; -/* movq m64 to r64 */ -emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) +emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { iMOVQ( iRegisterMMX(to), (void*)from ); } +emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { iMOVQ( (void*)to, iRegisterMMX(from) ); } +emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { iMOVQ( iRegisterMMX(to), iRegisterMMX(from) ); } +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { iMOVQ( iRegisterMMX(to), ptr[iAddressReg(from)+offset] ); } +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { iMOVQ( ptr[iAddressReg(to)+offset], iRegisterMMX(from) ); } + +emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { iMOVDZX( iRegisterMMX(to), (void*)from ); } +emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { iMOVD( (void*)to, iRegisterMMX(from) ); } +emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { iMOVDZX( iRegisterMMX(to), iRegister32(from) ); } +emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { iMOVDZX( iRegisterMMX(to), ptr[iAddressReg(from)+offset] ); } +emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { iMOVD( iRegister32(to), iRegisterMMX(from) ); } +emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { iMOVD( ptr[iAddressReg(to)+offset], iRegisterMMX(from) ); } + +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { - iMOVQ( iRegisterMMX(to), (void*)from ); - //write16( 0x6F0F ); - //ModRM( 0, to, DISP32 ); - //write32( MEMADDR(from, 4) ); + iPMOVMSKB( iRegister32(to), iRegisterMMX(from) ); } -/* movq r64 to m64 */ -emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) -{ - iMOVQ( (void*)to, iRegisterMMX(from) ); - //write16( 0x7F0F ); - //ModRM( 0, from, DISP32 ); - //write32(MEMADDR(to, 4)); -} + + + /* pand r64 to r64 */ emitterT void PANDRtoR( x86MMXRegType to, x86MMXRegType from ) @@ -474,73 +478,6 @@ emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - iMOVQ( iRegisterMMX(to), iRegisterMMX(from) ); - //write16( 0x6F0F ); - //ModRM( 3, to, from ); -} - -emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) -{ - iMOVQ( iRegisterMMX(to), ptr[iAddressReg(from)+offset] ); - //write16( 0x6F0F ); - //WriteRmOffsetFrom( to, from, offset ); -} - -emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) -{ - iMOVQ( ptr[iAddressReg(to)+offset], iRegisterMMX(from) ); - //write16( 0x7F0F ); - //WriteRmOffsetFrom( from, to, offset ); -} - -/* movd m32 to r64 */ -emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) -{ - iMOVDZX( iRegisterMMX(to), (void*)from ); - //write16( 0x6E0F ); - //ModRM( 0, to, DISP32 ); - //write32( MEMADDR(from, 4) ); -} - -/* movd r64 to m32 */ -emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) -{ - iMOVD( (void*)to, iRegisterMMX(from) ); - //write16( 0x7E0F ); - //ModRM( 0, from, DISP32 ); - //write32( MEMADDR(to, 4) ); -} - -emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) -{ - iMOVDZX( iRegisterMMX(to), iRegister32(from) ); - //write16( 0x6E0F ); - //ModRM( 3, to, from ); -} - -emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) -{ - iMOVDZX( iRegisterMMX(to), ptr[iAddressReg(from)+offset] ); - //write16( 0x6E0F ); - //WriteRmOffsetFrom( to, from, offset ); -} - -emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) -{ - iMOVD( iRegister32(to), iRegisterMMX(from) ); - //write16( 0x7E0F ); - //ModRM( 3, from, to ); -} - -emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) -{ - iMOVD( ptr[iAddressReg(to)+offset], iRegisterMMX(from) ); - //write16( 0x7E0F ); - //WriteRmOffsetFrom( from, to, offset ); -} - // untested emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) { @@ -554,12 +491,6 @@ emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) ModRM( 3, to, from ); } -emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) -{ - write16( 0xD70F ); - ModRM( 3, to, from ); -} - emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3); @@ -583,8 +514,4 @@ emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) write8(imm8); } -emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) -{ - write16(0xf70f); - ModRM( 3, to, from ); -} +emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { iMASKMOV( iRegisterMMX(to), iRegisterMMX(from) ); } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index b604919e22..365b168902 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -22,17 +22,6 @@ using namespace x86Emitter; -////////////////////////////////////////////////////////////////////////////////////////// -// AlwaysUseMovaps [const] -// -// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions -// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache -// and some marginal speed gains as a result. (it's possible someday in the future the per- -// formance of the two instructions could change, so this constant is provided to restore MOVDQA -// use easily at a later time, if needed). -// -static const bool AlwaysUseMovaps = true; - //------------------------------------------------------------------ // SSE instructions @@ -147,164 +136,28 @@ static const bool AlwaysUseMovaps = true; SSE_SD_RtoR( 0xc20f ), \ write8( op ) -/* movups [r32][r32*scale] to xmm1 */ -emitterT void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - RexRXB(0, to, from2, from); - write16( 0x100f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); -} +#define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \ + emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { iMOV##mod( iRegisterSSE(to), (void*)from ); } \ + emitterT void sse##_MOV##mod##_XMM_to_M128( uptr to, x86SSERegType from ) { iMOV##mod( (void*)to, iRegisterSSE(from) ); } \ + emitterT void sse##_MOV##mod##RmtoR( x86SSERegType to, x86IntRegType from, int offset ) { iMOV##mod( iRegisterSSE(to), ptr[iAddressReg(from)+offset] ); } \ + emitterT void sse##_MOV##mod##RtoRm( x86IntRegType to, x86SSERegType from, int offset ) { iMOV##mod( ptr[iAddressReg(to)+offset], iRegisterSSE(from) ); } \ + emitterT void sse##_MOV##mod##RmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) \ + { iMOV##mod( iRegisterSSE(to), ptr[iAddressReg(from)+iAddressReg(from2)] ); } \ + emitterT void sse##_MOV##mod##RtoRmS( x86IntRegType to, x86SSERegType from, x86IntRegType from2, int scale ) \ + { iMOV##mod( ptr[iAddressReg(to)+iAddressReg(from2)], iRegisterSSE(from) ); } -/* movups xmm1 to [r32][r32*scale] */ -emitterT void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - RexRXB(1, to, from2, from); - write16( 0x110f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); -} - -/* movups [r32] to r32 */ -emitterT void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, to, from); - write16( 0x100f ); - ModRM( 0, to, from ); -} - -/* movups r32 to [r32] */ -emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write16( 0x110f ); - ModRM( 0, from, to ); -} - -/* movlps [r32] to r32 */ -emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) -{ - RexRB(1, to, from); - write16( 0x120f ); - ModRM( 0, to, from ); -} - -emitterT void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x120f ); - WriteRmOffsetFrom(to, from, offset); -} - -/* movaps r32 to [r32] */ -emitterT void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) -{ - RexRB(0, from, to); - write16( 0x130f ); - ModRM( 0, from, to ); -} - -emitterT void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, from, to); - write16( 0x130f ); - WriteRmOffsetFrom(from, to, offset); -} - -/* movaps [r32][r32*scale] to xmm1 */ -emitterT void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - assert( from != EBP ); - RexRXB(0, to, from2, from); - write16( 0x280f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); -} - -/* movaps xmm1 to [r32][r32*scale] */ -emitterT void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) -{ - assert( from != EBP ); - RexRXB(0, to, from2, from); - write16( 0x290f ); - ModRM( 0, to, 0x4 ); - SibSB( scale, from2, from ); -} - -// movaps [r32+offset] to r32 -emitterT void SSE_MOVAPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x280f ); - WriteRmOffsetFrom(to, from, offset); -} - -// movaps r32 to [r32+offset] -emitterT void SSE_MOVAPSRtoRm( x86IntRegType to, x86SSERegType from, int offset ) -{ - RexRB(0, from, to); - write16( 0x290f ); - WriteRmOffsetFrom(from, to, offset); -} - -// movdqa [r32+offset] to r32 -emitterT void SSE2_MOVDQARmtoR( x86SSERegType to, x86IntRegType from, int offset ) -{ - if( AlwaysUseMovaps ) - SSE_MOVAPSRmtoR( to, from, offset ); - else - { - write8(0x66); - RexRB(0, to, from); - write16( 0x6f0f ); - WriteRmOffsetFrom(to, from, offset); - } -} - -// movdqa r32 to [r32+offset] -emitterT void SSE2_MOVDQARtoRm( x86IntRegType to, x86SSERegType from, int offset ) -{ - if( AlwaysUseMovaps ) - SSE_MOVAPSRtoRm( to, from, offset ); - else - { - write8(0x66); - RexRB(0, from, to); - write16( 0x7f0f ); - WriteRmOffsetFrom(from, to, offset); - } -} - -// movups [r32+offset] to r32 -emitterT void SSE_MOVUPSRmtoR( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); -} - -// movups r32 to [r32+offset] -emitterT void SSE_MOVUPSRtoRm( x86IntRegType to, x86SSERegType from, int offset ) -{ - RexRB(0, from, to); - write16( 0x110f ); - WriteRmOffsetFrom(from, to, offset); -} +DEFINE_LEGACY_MOV_OPCODE( UPS, SSE ) +DEFINE_LEGACY_MOV_OPCODE( APS, SSE ) +DEFINE_LEGACY_MOV_OPCODE( LPS, SSE ) +DEFINE_LEGACY_MOV_OPCODE( HPS, SSE ) +DEFINE_LEGACY_MOV_OPCODE( DQA, SSE2 ) +DEFINE_LEGACY_MOV_OPCODE( DQU, SSE2 ) //**********************************************************************************/ //MOVAPS: Move aligned Packed Single Precision FP values * //********************************************************************************** -//emitterT void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x280f, 0 ); } -//emitterT void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x290f, 0 ); } -//emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSERtoR( 0x280f ); } } -//emitterT void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } -//emitterT void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } -emitterT void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ) { iMOVAPS( iRegisterSSE(to), (void*)from ); } -emitterT void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ) { iMOVAPS( (void*)to, iRegisterSSE(from) ); } emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMOVAPS( iRegisterSSE(to), iRegisterSSE(from) ); } -emitterT void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { iMOVUPS( iRegisterSSE(to), (void*)from ); } -emitterT void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { iMOVUPS( (void*)to, iRegisterSSE(from) ); } emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { iMOVQZX( iRegisterSSE(to), (void*)from ); } emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMOVQZX( iRegisterSSE(to), iRegisterSSE(from) ); } @@ -328,78 +181,41 @@ emitterT void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { emitterT void SSE2_MOVSD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { iMOVSDZX( iRegisterSSE(to), ptr[iAddressReg(from)+offset] ); } emitterT void SSE2_MOVSD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { iMOVSD( ptr[iAddressReg(to)+offset], iRegisterSSE(from) ); } -/*emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x100f, 0 ); } -emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { SSE_SS_RtoM( 0x110f, 0 ); } -emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { if (to != from) { SSE_SS_RtoR( 0x100f ); } } -emitterT void SSE_MOVSS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - write8(0xf3); - RexRB(0, to, from); - write16( 0x100f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) -{ - write8(0xf3); - RexRB(0, from, to); - write16(0x110f); - WriteRmOffsetFrom(from, to, offset); -}*/ - -emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xf70f ); } +emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMASKMOV( iRegisterSSE(to), iRegisterSSE(from) ); } //**********************************************************************************/ //MOVLPS: Move low Packed Single-Precision FP * //********************************************************************************** -emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x120f, 0 ); } -emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x130f, 0 ); } - -emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x120f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) -{ - RexRB(0, from, to); - write16(0x130f); - WriteRmOffsetFrom(from, to, offset); -} +emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { iMOVLPS( iRegisterSSE(to), (void*)from ); } +emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { iMOVLPS( (void*)to, iRegisterSSE(from) ); } +emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { iMOVLPS( iRegisterSSE(to), ptr[iAddressReg(from)+offset] ); } +emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { iMOVLPS( ptr[iAddressReg(to)+offset], iRegisterSSE(from) ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVHPS: Move High Packed Single-Precision FP * //********************************************************************************** -emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x160f, 0 ); } -emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSERtoM( 0x170f, 0 ); } - -emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - RexRB(0, to, from); - write16( 0x160f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) -{ - RexRB(0, from, to); - write16(0x170f); - WriteRmOffsetFrom(from, to, offset); -} +emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { iMOVHPS( iRegisterSSE(to), (void*)from ); } +emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { iMOVHPS( (void*)to, iRegisterSSE(from) ); } +emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { iMOVHPS( iRegisterSSE(to), ptr[iAddressReg(from)+offset] ); } +emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { iMOVHPS( ptr[iAddressReg(to)+offset], iRegisterSSE(from) ); } ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVLHPS: Moved packed Single-Precision FP low to high * //********************************************************************************** -emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x160f ); } +emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMOVLHPS( iRegisterSSE(to), iRegisterSSE(from) ); } ////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MOVHLPS: Moved packed Single-Precision FP High to Low * //********************************************************************************** -emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x120f ); } +emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iMOVHLPS( iRegisterSSE(to), iRegisterSSE(from) ); } + + +emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { iPMOVMSKB( iRegister32(to), iRegisterSSE(from) ); } + + + /////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -938,30 +754,8 @@ emitterT void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SS emitterT void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ); } /////////////////////////////////////////////////////////////////////////////////////// -emitterT void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from) { if( AlwaysUseMovaps ) SSE_MOVAPS_M128_to_XMM( to, from ); else SSEMtoR66(0x6F0F); } -emitterT void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_M128( to, from ); else SSERtoM66(0x7F0F); } emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_XMM( to, from ); else if( to != from ) SSERtoR66(0x6F0F); } -emitterT void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from) -{ - if( AlwaysUseMovaps ) - SSE_MOVUPS_M128_to_XMM( to, from ); - else - { - write8(0xF3); - SSEMtoR(0x6F0F, 0); - } -} -emitterT void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from) -{ - if( AlwaysUseMovaps ) - SSE_MOVUPS_XMM_to_M128( to, from ); - else - { - write8(0xF3); - SSERtoM(0x7F0F, 0); - } -} // shift right logical @@ -1153,8 +947,6 @@ emitterT void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR6 emitterT void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } emitterT void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } -emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0xD70F); } - emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 2bed0871d6..1ef7f33e0a 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -675,16 +675,39 @@ namespace x86Emitter } ////////////////////////////////////////////////////////////////////////////////////////// + // ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const] // + // This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions + // do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache + // and some marginal speed gains as a result. (it's possible someday in the future the per- + // formance of the two instructions could change, so this constant is provided to restore MOVDQA + // use easily at a later time, if needed). + #define ALWAYS_USE_MOVAPS - extern const Internal::MovapsImplAll< 0, 0x28, 0x29 > iMOVAPS; + #ifdef ALWAYS_USE_MOVAPS + static const bool AlwaysUseMovaps = true; + #else + static const bool AlwaysUseMovaps = false; + #endif + + extern const Internal::MovapsImplAll< 0, 0x28, 0x29 > iMOVAPS; extern const Internal::MovapsImplAll< 0, 0x10, 0x11 > iMOVUPS; extern const Internal::MovapsImplAll< 0x66, 0x28, 0x29 > iMOVAPD; extern const Internal::MovapsImplAll< 0x66, 0x10, 0x11 > iMOVUPD; + #ifdef ALWAYS_USE_MOVAPS extern const Internal::MovapsImplAll< 0x66, 0x6f, 0x7f > iMOVDQA; extern const Internal::MovapsImplAll< 0xf3, 0x6f, 0x7f > iMOVDQU; + #else + extern const Internal::MovapsImplAll< 0, 0x28, 0x29 > iMOVDQA; + extern const Internal::MovapsImplAll< 0, 0x10, 0x11 > iMOVDQU; + #endif + + extern const Internal::MovhlImplAll< 0, 0x16 > iMOVHPS; + extern const Internal::MovhlImplAll< 0, 0x12 > iMOVLPS; + extern const Internal::MovhlImplAll< 0x66, 0x16 > iMOVHPD; + extern const Internal::MovhlImplAll< 0x66, 0x12 > iMOVLPD; } From 1bc6795200a39ce96117729601a1df4108ae6551 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sun, 19 Apr 2009 20:14:53 +0000 Subject: [PATCH 105/143] Emitter :Implemented Packed logical operations (PAND/POR/ANDxx/ORxx/etc), ADDxx/SUBxx/MULxx/DIVxx, and iRCPPS/iRCPSS. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1021 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUzerorec.cpp | 2 - pcsx2/x86/ix86/implement/xmm/movqss.h | 45 +++++++- pcsx2/x86/ix86/ix86.cpp | 40 +++++++ pcsx2/x86/ix86/ix86_legacy_mmx.cpp | 74 ++---------- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 160 ++++---------------------- pcsx2/x86/ix86/ix86_types.h | 63 ++++++++-- 6 files changed, 169 insertions(+), 215 deletions(-) diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index ad0b628013..de6ad9b1af 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -3859,7 +3859,6 @@ void recVUMI_JR( VURegs* vuu, s32 info ) { int fsreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Fs_, MODE_READ); LEA32RStoR(EAX, fsreg, 3); - CWDE(); if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 ) MOV32RtoM(SuperVUGetVIAddr(REG_TPC, 0), EAX); @@ -3876,7 +3875,6 @@ void recVUMI_JALR( VURegs* vuu, s32 info ) int fsreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Fs_, MODE_READ); LEA32RStoR(EAX, fsreg, 3); - CWDE(); // necessary, charlie and chocolate factory gives bad addrs, but graphics are ok if ( _Ft_ ) { _deleteX86reg(X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Ft_, 2); diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 0a955b053b..f90500e674 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -87,9 +87,9 @@ __emitinline void writeXMMop( u8 opcode, const iRegister& reg, const void* da } ////////////////////////////////////////////////////////////////////////////////////////// -// // Moves to/from high/low portions of an xmm register. // These instructions cannot be used in reg/reg form. +// template< u8 Prefix, u8 Opcode > class MovhlImplAll { @@ -114,3 +114,46 @@ public: MovapsImplAll() {} //GCC. }; + +////////////////////////////////////////////////////////////////////////////////////////// +// PLogicImplAll - Implements logic forms for MMX/SSE instructions, and can be used for +// a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms). +// +template< u8 Opcode > +class PLogicImplAll +{ +public: + template< typename T > + __forceinline void operator()( const iRegisterSIMD& to, const iRegisterSIMD& from ) const + { + writeXMMop( 0x66, Opcode, to, from ); + } + template< typename T > + __forceinline void operator()( const iRegisterSIMD& to, const void* from ) const + { + writeXMMop( 0x66, Opcode, to, from ); + } + template< typename T > + __noinline void operator()( const iRegisterSIMD&, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + + PLogicImplAll() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +// For implementing SSE-only logic operations, like ANDPS/ANDPD +template< u8 Prefix, u8 Opcode > +class PLogicImplSSE +{ +public: + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const + { + writeXMMop( Prefix, Opcode, to, from ); + } + __forceinline void operator()( const iRegisterSSE& to, const void* from ) const + { + writeXMMop( Prefix, Opcode, to, from ); + } + __noinline void operator()( const iRegisterSSE&, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + + PLogicImplSSE() {} //GCWho? +}; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 2557ba8f30..86ad0f234d 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -753,6 +753,46 @@ const MovhlImplAll< 0, 0x12 > iMOVLPS; const MovhlImplAll< 0x66, 0x16 > iMOVHPD; const MovhlImplAll< 0x66, 0x12 > iMOVLPD; +const PLogicImplAll<0xdb> iPAND; +const PLogicImplAll<0xdf> iPANDN; +const PLogicImplAll<0xeb> iPOR; +const PLogicImplAll<0xef> iPXOR; + +const PLogicImplSSE<0x00,0x54> iANDPS; +const PLogicImplSSE<0x66,0x54> iANDPD; +const PLogicImplSSE<0x00,0x55> iANDNPS; +const PLogicImplSSE<0x66,0x55> iANDNPD; +const PLogicImplSSE<0x00,0x56> iORPS; +const PLogicImplSSE<0x66,0x56> iORPD; +const PLogicImplSSE<0x00,0x57> iXORPS; +const PLogicImplSSE<0x66,0x57> iXORPD; + +const PLogicImplSSE<0x00,0x5c> iSUBPS; +const PLogicImplSSE<0x66,0x5c> iSUBPD; +const PLogicImplSSE<0xf3,0x5c> iSUBSS; +const PLogicImplSSE<0xf2,0x5c> iSUBSD; + +const PLogicImplSSE<0x00,0x58> iADDPS; +const PLogicImplSSE<0x66,0x58> iADDPD; +const PLogicImplSSE<0xf3,0x58> iADDSS; +const PLogicImplSSE<0xf2,0x58> iADDSD; + +const PLogicImplSSE<0x00,0x59> iMULPS; +const PLogicImplSSE<0x66,0x59> iMULPD; +const PLogicImplSSE<0xf3,0x59> iMULSS; +const PLogicImplSSE<0xf2,0x59> iMULSD; + +const PLogicImplSSE<0x00,0x5e> iDIVPS; +const PLogicImplSSE<0x66,0x5e> iDIVPD; +const PLogicImplSSE<0xf3,0x5e> iDIVSS; +const PLogicImplSSE<0xf2,0x5e> iDIVSD; + +// Compute Reciprocal Packed Single-Precision Floating-Point Values +const PLogicImplSSE<0,0x53> iRCPPS; + +// Compute Reciprocal of Scalar Single-Precision Floating-Point Value +const PLogicImplSSE<0xf3,0x53> iRCPSS; + // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. diff --git a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp index cf06a37777..8633cc92f2 100644 --- a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp @@ -40,42 +40,20 @@ emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { iMOVD( iRegister32(to), iRegisterMMX(from) ); } emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { iMOVD( ptr[iAddressReg(to)+offset], iRegisterMMX(from) ); } -emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) -{ - iPMOVMSKB( iRegister32(to), iRegisterMMX(from) ); -} +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { iPMOVMSKB( iRegister32(to), iRegisterMMX(from) ); } +#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \ + emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { iP##mod( iRegisterMMX(to), iRegisterMMX(from) ); } \ + emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { iP##mod( iRegisterMMX(to), (void*)from ); } \ + emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iP##mod( iRegisterSSE(to), iRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { iP##mod( iRegisterSSE(to), (void*)from ); } +DEFINE_LEGACY_LOGIC_OPCODE( AND ) +DEFINE_LEGACY_LOGIC_OPCODE( ANDN ) +DEFINE_LEGACY_LOGIC_OPCODE( OR ) +DEFINE_LEGACY_LOGIC_OPCODE( XOR ) - -/* pand r64 to r64 */ -emitterT void PANDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xDB0F ); - ModRM( 3, to, from ); -} - -emitterT void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xDF0F ); - ModRM( 3, to, from ); -} - -/* por r64 to r64 */ -emitterT void PORRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xEB0F ); - ModRM( 3, to, from ); -} - -/* pxor r64 to r64 */ -emitterT void PXORRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xEF0F ); - ModRM( 3, to, from ); -} - /* psllq r64 to r64 */ emitterT void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) { @@ -420,38 +398,6 @@ emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ) ModRM( 3, to, from ); } -/* por m64 to r64 */ -emitterT void PORMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xEB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* pxor m64 to r64 */ -emitterT void PXORMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xEF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* pand m64 to r64 */ -emitterT void PANDMtoR( x86MMXRegType to, uptr from ) -{ - //u64 rip = (u64)x86Ptr + 7; - write16( 0xDB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void PANDNMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xDF0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) { write16( 0x6A0F ); diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 365b168902..50818e75f4 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -215,108 +215,34 @@ emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { iPMOVMSKB( iRegister32(to), iRegisterSSE(from) ); } +#define DEFINE_LEGACY_PSD_OPCODE( mod ) \ + emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod##PS( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##PS( iRegisterSSE(to), iRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod##PD( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##PD( iRegisterSSE(to), iRegisterSSE(from) ); } +#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ + DEFINE_LEGACY_PSD_OPCODE( mod ) \ + emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod##SS( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##SS( iRegisterSSE(to), iRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##SD_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod##SD( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##SD( iRegisterSSE(to), iRegisterSSE(from) ); } -/////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ANDPS: Logical Bit-wise AND for Single FP * -//********************************************************************************** -emitterT void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } -emitterT void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } +DEFINE_LEGACY_PSD_OPCODE( AND ) +DEFINE_LEGACY_PSD_OPCODE( ANDN ) +DEFINE_LEGACY_PSD_OPCODE( OR ) +DEFINE_LEGACY_PSD_OPCODE( XOR ) -emitterT void SSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x540f ); } -emitterT void SSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x540f ); } +DEFINE_LEGACY_PSSD_OPCODE( SUB ) +DEFINE_LEGACY_PSSD_OPCODE( ADD ) +DEFINE_LEGACY_PSSD_OPCODE( MUL ) +DEFINE_LEGACY_PSSD_OPCODE( DIV ) -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values * -//********************************************************************************** -emitterT void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } -emitterT void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } +emitterT void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iRCPPS( iRegisterSSE(to), iRegisterSSE(from) ); } +emitterT void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { iRCPPS( iRegisterSSE(to), (void*)from ); } -emitterT void SSE2_ANDNPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x550f ); } -emitterT void SSE2_ANDNPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x550f ); } - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//RCPPS : Packed Single-Precision FP Reciprocal * -//********************************************************************************** -emitterT void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x530f ); } -emitterT void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x530f, 0 ); } - -emitterT void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR(0x530f); } -emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR(0x530f, 0); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ORPS : Bit-wise Logical OR of Single-Precision FP Data * -//********************************************************************************** -emitterT void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } -emitterT void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } - -emitterT void SSE2_ORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x560f ); } -emitterT void SSE2_ORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x560f ); } - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//XORPS : Bitwise Logical XOR of Single-Precision FP Values * -//********************************************************************************** -emitterT void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } -emitterT void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } - -emitterT void SSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x570f ); } -emitterT void SSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x570f ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ADDPS : ADD Packed Single-Precision FP Values * -//********************************************************************************** -emitterT void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x580f, 0 ); } -emitterT void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x580f ); } - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//ADDSS : ADD Scalar Single-Precision FP Values * -//********************************************************************************** -emitterT void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } -emitterT void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } - -emitterT void SSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x580f, 0 ); } -emitterT void SSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x580f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SUBPS: Packed Single-Precision FP Subtract * -//********************************************************************************** -emitterT void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5c0f, 0 ); } -emitterT void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5c0f ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SUBSS : Scalar Single-Precision FP Subtract * -//********************************************************************************** -emitterT void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } -emitterT void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } - -emitterT void SSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5c0f, 0 ); } -emitterT void SSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5c0f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MULPS : Packed Single-Precision FP Multiply * -//********************************************************************************** -emitterT void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x590f, 0 ); } -emitterT void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x590f ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MULSS : Scalar Single-Precision FP Multiply * -//********************************************************************************** -emitterT void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } -emitterT void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } - -emitterT void SSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x590f, 0 ); } -emitterT void SSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x590f ); } +emitterT void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { iRCPSS( iRegisterSSE(to), iRegisterSSE(from) ); } +emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { iRCPSS( iRegisterSSE(to), (void*)from ); } //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -610,23 +536,6 @@ emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//DIVPS : Packed Single-Precision FP Divide * -//********************************************************************************** -emitterT void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5e0F, 0 ); } -emitterT void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5e0F ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//DIVSS : Scalar Single-Precision FP Divide * -//********************************************************************************** -emitterT void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } -emitterT void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } - -emitterT void SSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5e0F, 0 ); } -emitterT void SSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5e0F ); } - ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //STMXCSR : Store Streaming SIMD Extension Control/Status * @@ -731,30 +640,9 @@ emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int off WriteRmOffsetFrom(from, to, offset); } -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//POR : SSE Bitwise OR * -//********************************************************************************** -emitterT void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEB0F ); } -emitterT void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEB0F ); } - -// logical and to &= from -emitterT void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDB0F ); } -emitterT void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDB0F ); } - -// to = (~to) & from -emitterT void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDF0F ); } -emitterT void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDF0F ); } - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PXOR : SSE Bitwise XOR * -//********************************************************************************** -emitterT void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEF0F ); } -emitterT void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEF0F ); } /////////////////////////////////////////////////////////////////////////////////////// -emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { if( AlwaysUseMovaps ) SSE_MOVAPS_XMM_to_XMM( to, from ); else if( to != from ) SSERtoR66(0x6F0F); } +emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { iMOVDQA( iRegisterSSE(to), iRegisterSSE(from) ); } // shift right logical diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 1ef7f33e0a..d70ff1b1ea 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -690,25 +690,64 @@ namespace x86Emitter static const bool AlwaysUseMovaps = false; #endif - extern const Internal::MovapsImplAll< 0, 0x28, 0x29 > iMOVAPS; - extern const Internal::MovapsImplAll< 0, 0x10, 0x11 > iMOVUPS; + extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVAPS; + extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVUPS; - extern const Internal::MovapsImplAll< 0x66, 0x28, 0x29 > iMOVAPD; - extern const Internal::MovapsImplAll< 0x66, 0x10, 0x11 > iMOVUPD; + extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> iMOVAPD; + extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> iMOVUPD; #ifdef ALWAYS_USE_MOVAPS - extern const Internal::MovapsImplAll< 0x66, 0x6f, 0x7f > iMOVDQA; - extern const Internal::MovapsImplAll< 0xf3, 0x6f, 0x7f > iMOVDQU; + extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> iMOVDQA; + extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> iMOVDQU; #else - extern const Internal::MovapsImplAll< 0, 0x28, 0x29 > iMOVDQA; - extern const Internal::MovapsImplAll< 0, 0x10, 0x11 > iMOVDQU; + extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVDQA; + extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVDQU; #endif - extern const Internal::MovhlImplAll< 0, 0x16 > iMOVHPS; - extern const Internal::MovhlImplAll< 0, 0x12 > iMOVLPS; - extern const Internal::MovhlImplAll< 0x66, 0x16 > iMOVHPD; - extern const Internal::MovhlImplAll< 0x66, 0x12 > iMOVLPD; + extern const Internal::MovhlImplAll<0, 0x16> iMOVHPS; + extern const Internal::MovhlImplAll<0, 0x12> iMOVLPS; + extern const Internal::MovhlImplAll<0x66, 0x16> iMOVHPD; + extern const Internal::MovhlImplAll<0x66, 0x12> iMOVLPD; + extern const Internal::PLogicImplAll<0xdb> iPAND; + extern const Internal::PLogicImplAll<0xdf> iPANDN; + extern const Internal::PLogicImplAll<0xeb> iPOR; + extern const Internal::PLogicImplAll<0xef> iPXOR; + + extern const Internal::PLogicImplSSE<0x00,0x54> iANDPS; + extern const Internal::PLogicImplSSE<0x66,0x54> iANDPD; + extern const Internal::PLogicImplSSE<0x00,0x55> iANDNPS; + extern const Internal::PLogicImplSSE<0x66,0x55> iANDNPD; + extern const Internal::PLogicImplSSE<0x00,0x56> iORPS; + extern const Internal::PLogicImplSSE<0x66,0x56> iORPD; + extern const Internal::PLogicImplSSE<0x00,0x57> iXORPS; + extern const Internal::PLogicImplSSE<0x66,0x57> iXORPD; + + extern const Internal::PLogicImplSSE<0x00,0x5c> iSUBPS; + extern const Internal::PLogicImplSSE<0x66,0x5c> iSUBPD; + extern const Internal::PLogicImplSSE<0xf3,0x5c> iSUBSS; + extern const Internal::PLogicImplSSE<0xf2,0x5c> iSUBSD; + + extern const Internal::PLogicImplSSE<0x00,0x58> iADDPS; + extern const Internal::PLogicImplSSE<0x66,0x58> iADDPD; + extern const Internal::PLogicImplSSE<0xf3,0x58> iADDSS; + extern const Internal::PLogicImplSSE<0xf2,0x58> iADDSD; + + extern const Internal::PLogicImplSSE<0x00,0x59> iMULPS; + extern const Internal::PLogicImplSSE<0x66,0x59> iMULPD; + extern const Internal::PLogicImplSSE<0xf3,0x59> iMULSS; + extern const Internal::PLogicImplSSE<0xf2,0x59> iMULSD; + + extern const Internal::PLogicImplSSE<0x00,0x5e> iDIVPS; + extern const Internal::PLogicImplSSE<0x66,0x5e> iDIVPD; + extern const Internal::PLogicImplSSE<0xf3,0x5e> iDIVSS; + extern const Internal::PLogicImplSSE<0xf2,0x5e> iDIVSD; + + + + extern const Internal::PLogicImplSSE<0,0x53> iRCPPS; + extern const Internal::PLogicImplSSE<0xf3,0x53> iRCPSS; + } #include "ix86_inlines.inl" From a2d305b9abc4bc36714de0de97f54c2abc597bdb Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sun, 19 Apr 2009 20:34:32 +0000 Subject: [PATCH 106/143] The things Visual C++ lets you get away with... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1022 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/xmm/movqss.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index f90500e674..4f0fdd1614 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -134,7 +134,7 @@ public: writeXMMop( 0x66, Opcode, to, from ); } template< typename T > - __noinline void operator()( const iRegisterSIMD&, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + __noinline void operator()( const iRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } PLogicImplAll() {} //GCWho? }; @@ -153,7 +153,7 @@ public: { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const iRegisterSSE&, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } PLogicImplSSE() {} //GCWho? }; From b7ea57a5d91e8bbf8ccfaf8a14c6446081a01ee4 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sun, 19 Apr 2009 21:22:47 +0000 Subject: [PATCH 107/143] microVU: -implemented the ability to run VU's for x amount of cycles, instead of running till the microprogram is completed (some games can get stuck in infinite loops, so this is needed) -fixed some errors... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1023 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVU0micro.cpp | 2 +- pcsx2/x86/iVU1micro.cpp | 4 +-- pcsx2/x86/microVU.cpp | 1 - pcsx2/x86/microVU.h | 6 ++-- pcsx2/x86/microVU_Alloc.inl | 16 +++++----- pcsx2/x86/microVU_Compile.inl | 59 ++++++++++++++++++++--------------- pcsx2/x86/microVU_Execute.inl | 5 +-- pcsx2/x86/microVU_Lower.inl | 26 +++++++-------- pcsx2/x86/microVU_Misc.inl | 31 +++++++++++++++--- 9 files changed, 91 insertions(+), 59 deletions(-) diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index 55af7a5c54..bbc9b64cc9 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -84,7 +84,7 @@ namespace VU0micro FreezeXMMRegs(1); FreezeMMXRegs(1); - runVUrec(VU0.VI[REG_TPC].UL & 0xfff, 0xffffffff, 0); + runVUrec(VU0.VI[REG_TPC].UL, 0x20000, 0); FreezeXMMRegs(0); FreezeMMXRegs(0); } diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 7debdcfa8e..93c43b36af 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -142,8 +142,8 @@ namespace VU1micro assert( (VU1.VI[REG_TPC].UL&7) == 0 ); FreezeXMMRegs(1); - FreezeMMXRegs(0); - runVUrec(VU1.VI[REG_TPC].UL & 0x3fff, 0xffffffff, 1); + FreezeMMXRegs(1); + runVUrec(VU1.VI[REG_TPC].UL, 20000, 1); FreezeXMMRegs(0); FreezeMMXRegs(0); } diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 469a2a7c86..9e362768af 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -165,7 +165,6 @@ __forceinline int mVUsearchProg(microVU* mVU) { if (mVU->prog.cleared) { // If cleared, we need to search for new program for (int i = 0; i <= mVU->prog.total; i++) { //if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/) - //if (mVU->prog.prog[i]) // ToDo: Implement Cycles if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { //if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } mVU->prog.cur = i; diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 4cd7654866..c163025bb1 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -17,7 +17,7 @@ */ #pragma once -#define mVUdebug // Prints Extra Info to Console +//#define mVUdebug // Prints Extra Info to Console #include "Common.h" #include "VU.h" #include "GS.h" @@ -105,7 +105,9 @@ struct microVU { u32 branch; // Holds branch compare result (IBxx) OR Holds address to Jump to (JALR/JR) u32 p; // Holds current P instance index u32 q; // Holds current Q instance index - u32 tempBackup; + u32 espBackup; // Temp Backup for ESP + u32 totalCycles; + u32 cycles; }; // microVU rec structs diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 1591091b3b..72c4a56f24 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -56,7 +56,7 @@ microVUt(void) mVUallocFMAC1b(int& Fd) { microVU* mVU = mVUx; if (!_Fd_) return; if (CHECK_VU_OVERFLOW) mVUclamp1(Fd, xmmT1, _X_Y_Z_W); - mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W); + mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W, 1); } //------------------------------------------------------------------ @@ -74,7 +74,7 @@ microVUt(void) mVUallocFMAC2b(int& Ft) { microVU* mVU = mVUx; if (!_Ft_) { SysPrintf("microVU: If a game does this, its retarded...\n"); return; } //if (CHECK_VU_OVERFLOW) mVUclamp1(Ft, xmmT1, _X_Y_Z_W); - mVUsaveReg(Ft, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(Ft, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } //------------------------------------------------------------------ @@ -201,10 +201,10 @@ microVUt(void) mVUallocFMAC5b(int& ACC, int& Fs) { // FMAC6 - Normal FMAC Opcodes (I Reg) //------------------------------------------------------------------ -#define getIreg(reg, modXYZW) { \ - MOV32ItoR(gprT1, mVU->iReg); \ - SSE2_MOVD_R_to_XMM(reg, gprT1); \ - if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 8); \ +#define getIreg(reg, modXYZW) { \ + MOV32MtoR(gprT1, (uptr)&mVU->regs->VI[REG_I].UL); \ + SSE2_MOVD_R_to_XMM(reg, gprT1); \ + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 8); \ if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \ } @@ -269,7 +269,7 @@ microVUt(void) mVUallocFMAC8b(int& Fd) { microVU* mVU = mVUx; if (!_Fd_) return; if (CHECK_VU_OVERFLOW) mVUclamp1(Fd, xmmT1, _xyzw_ACC); - mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W); + mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W, 0); } //------------------------------------------------------------------ @@ -302,7 +302,7 @@ microVUt(void) mVUallocFMAC9b(int& Fd) { microVU* mVU = mVUx; if (!_Fd_) return; if (CHECK_VU_OVERFLOW) mVUclamp1(Fd, xmmFt, _xyzw_ACC); - mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W); + mVUsaveReg(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W, 0); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 15b3a145be..a7d67983ef 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -145,7 +145,7 @@ microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) { mVUlog("mVUsetupBranch"); PUSH32R(gprR); // Backup gprR - MOV32RtoM((uptr)&mVU->tempBackup, gprESP); + MOV32RtoM((uptr)&mVU->espBackup, gprESP); MOV32RtoR(gprT1, getFlagReg1(bStatus[0])); MOV32RtoR(gprT2, getFlagReg1(bStatus[1])); @@ -172,7 +172,7 @@ microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) { OR32RtoR(gprF2, getFlagReg2(bMac[2])); OR32RtoR(gprF3, getFlagReg2(bMac[3])); - MOV32MtoR(gprESP, (uptr)&mVU->tempBackup); + MOV32MtoR(gprESP, (uptr)&mVU->espBackup); POP32R(gprR); // Restore gprR // Shuffle P/Q regs since every block starts at instance #0 @@ -236,6 +236,30 @@ microVUt(void) mVUdivSet() { } } +microVUt(void) mVUendProgram() { + microVU* mVU = mVUx; + incCycles(55); // Ensures Valid P/Q instances + mVUcycles -= 55; + if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); + SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); + + AND32ItoM((uptr)µVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag + AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif + MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC); + JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); +} + +microVUt(void) mVUtestCycles() { + microVU* mVU = mVUx; + iPC = mVUstartPC; + CMP32ItoM((uptr)&mVU->cycles, 0); + u8* jmp8 = JG8(0); + mVUendProgram(); + x86SetJ8(jmp8); + SUB32ItoM((uptr)&mVU->cycles, mVUcycles); +} //------------------------------------------------------------------ // Recompiler //------------------------------------------------------------------ @@ -245,17 +269,15 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { u8* thisPtr = x86Ptr; if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUlog("microVU: invalid startPC"); } - //startPC &= (vuIndex ? 0x3ff8 : 0xff8); - //mVUlog("mVUcompile Search"); + startPC &= (vuIndex ? 0x3ff8 : 0xff8); // Searches for Existing Compiled Block (if found, then returns; else, compile) microBlock* pBlock = mVUblocks[startPC/8]->search((microRegInfo*)pState); if (pBlock) { return pBlock->x86ptrStart; } - //mVUlog("mVUcompile First Pass"); - // First Pass iPC = startPC / 4; + setCode(); mVUbranch = 0; mVUstartPC = iPC; mVUcount = 0; @@ -286,23 +308,19 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { mVUcount++; } - //mVUlog("mVUcompile mVUsetFlags"); - // Sets Up Flag instances int bStatus[4]; int bMac[4]; mVUsetFlags(bStatus, bMac); - - //mVUlog("mVUcompile Second Pass"); - - //write8(0xcc); + mVUtestCycles(); // Second Pass iPC = mVUstartPC; + setCode(); mVUbranch = 0; int x; for (x = 0; x < (vuIndex ? (0x3fff/8) : (0xfff/8)); x++) { if (isEOB) { x = 0xffff; } - if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); mVU->iReg = curI; incPC(1); } } + if (isNOP) { incPC(1); doUpperOp(); if (curI & _Ibit_) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } } else if (!swapOps) { incPC(1); doUpperOp(); incPC(-1); mVUopL(); incPC(1); } else { mVUopL(); incPC(1); doUpperOp(); } @@ -336,7 +354,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { PUSH32R(gprR); // Backup EDX MOV32MtoR(gprT2, (uptr)&mVU->branch); // Get startPC (ECX first argument for __fastcall) - AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address + //AND32ItoR(gprT2, (vuIndex)?0x3ff8:0xff8); // Ensure valid jump address MOV32ItoR(gprR, (u32)&pBlock->pStateEnd); // Get pState (EDX second argument for __fastcall) if (!vuIndex) CALLFunc((uptr)mVUcompileVU0); //(u32 startPC, uptr pState) @@ -372,18 +390,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { mVUlog("microVU: Possible infinite compiling loop!"); } // Do E-bit end stuff here - incCycles(55); // Ensures Valid P/Q instances - mVUcycles -= 55; - if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); - SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); - SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); - - AND32ItoM((uptr)µVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag - AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif - MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC); - JMP32((uptr)mVU->exitFunct - ((uptr)x86Ptr + 5)); - + mVUendProgram(); //ToDo: Save pipeline state? return thisPtr; } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 45998cc3e7..6714813aec 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -130,8 +130,9 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVU* mVU = mVUx; //mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); - // ToDo: Implement Cycles mVUsearchProg(mVU); // Find and set correct program + mVU->cycles = cycles; + mVU->totalCycles = cycles; x86SetPtr(mVUcurProg.x86ptr); // Set x86ptr to where program left off if (!vuIndex) return mVUcompileVU0(startPC, (uptr)&mVU->prog.lpState); @@ -144,7 +145,7 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVUt(void) mVUcleanUp() { microVU* mVU = mVUx; - //mVUlog("microVU: Program exited successfully!"); + mVUlog("microVU: Program exited successfully!"); mVUcurProg.x86ptr = x86Ptr; mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index c70ce5f6aa..8626ec175f 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -666,7 +666,7 @@ microVUf(void) mVU_MFIR() { MOVSX32R16toR(gprT1, gprT1); SSE2_MOVD_R_to_XMM(xmmT1, gprT1); if (!_XYZW_SS) { mVUunpack_xyzw(xmmT1, xmmT1, 0); } - mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } } @@ -676,7 +676,7 @@ microVUf(void) mVU_MFP() { else { mVUlog("MFP"); getPreg(xmmFt); - mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } } @@ -686,7 +686,7 @@ microVUf(void) mVU_MOVE() { else { mVUlog("MOVE"); mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); - mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } } @@ -697,7 +697,7 @@ microVUf(void) mVU_MR32() { mVUlog("MR32"); mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15); if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); } - mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 0); } } @@ -819,7 +819,7 @@ microVUf(void) mVU_LQ() { if (!_Fs_) { mVUlog("LQ1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); - mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } else { mVUlog("LQ2"); @@ -827,7 +827,7 @@ microVUf(void) mVU_LQ() { ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); - mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } } } @@ -839,7 +839,7 @@ microVUf(void) mVU_LQD() { if (!_Fs_ && !noWriteVF) { mVUlog("LQD1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); - mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } else { mVUlog("LQD2"); @@ -849,7 +849,7 @@ microVUf(void) mVU_LQD() { if (!noWriteVF) { mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); - mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } } } @@ -862,7 +862,7 @@ microVUf(void) mVU_LQI() { if (!_Fs_ && !noWriteVF) { mVUlog("LQI1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); - mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } else { mVUlog("LQI2"); @@ -871,7 +871,7 @@ microVUf(void) mVU_LQI() { MOV32RtoR(gprT2, gprT1); mVUaddrFix(gprT1); mVUloadReg2(xmmFt, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); - mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W); + mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } ADD16ItoR(gprT2, 1); mVUallocVIb(gprT2, _Fs_); @@ -890,7 +890,7 @@ microVUf(void) mVU_SQ() { mVUlog("SQ"); if (!_Ft_) { getReg7(xmmFs, _Fs_); - mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W, 1); } else { mVUallocVIa(gprT1, _Ft_); @@ -909,7 +909,7 @@ microVUf(void) mVU_SQD() { mVUlog("SQD"); if (!_Ft_) { getReg7(xmmFs, _Fs_); - mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1); } else { mVUallocVIa(gprT1, _Ft_); @@ -929,7 +929,7 @@ microVUf(void) mVU_SQI() { mVUlog("SQI"); if (!_Ft_) { getReg7(xmmFs, _Fs_); - mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W); + mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1); } else { mVUallocVIa(gprT1, _Ft_); diff --git a/pcsx2/x86/microVU_Misc.inl b/pcsx2/x86/microVU_Misc.inl index 13d0d1814a..895507cc4e 100644 --- a/pcsx2/x86/microVU_Misc.inl +++ b/pcsx2/x86/microVU_Misc.inl @@ -94,7 +94,16 @@ microVUx(void) mVUloadReg2(int reg, int gprReg, uptr offset, int xyzw) { } // Modifies the Source Reg! -microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) { +microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw, bool modXYZW) { + /*SSE_MOVAPS_M128_to_XMM(xmmT2, offset); + if (modXYZW && (xyzw == 8 || xyzw == 4 || xyzw == 2 || xyzw == 1)) { + mVUunpack_xyzw(reg, reg, 0); + } + mVUmergeRegs(xmmT2, reg, xyzw); + + SSE_MOVAPS_XMM_to_M128(offset, xmmT2); + return;*/ + switch ( xyzw ) { case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY SSE_MOVSS_XMM_to_M32(offset+4, reg); @@ -127,10 +136,16 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) { SSE_MOVHLPS_XMM_to_XMM(reg, reg); SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // XYZ + case 4: if (!modXYZW) mVUunpack_xyzw(reg, reg, 1); + SSE_MOVSS_XMM_to_M32(offset+4, reg); + break; // Y + case 2: if (!modXYZW) mVUunpack_xyzw(reg, reg, 2); + SSE_MOVSS_XMM_to_M32(offset+8, reg); + break; // Z + case 1: if (!modXYZW) mVUunpack_xyzw(reg, reg, 3); + SSE_MOVSS_XMM_to_M32(offset+12, reg); + break; // W case 8: SSE_MOVSS_XMM_to_M32(offset, reg); break; // X - case 4: SSE_MOVSS_XMM_to_M32(offset+4, reg); break; // Y - case 2: SSE_MOVSS_XMM_to_M32(offset+8, reg); break; // Z - case 1: SSE_MOVSS_XMM_to_M32(offset+12, reg); break; // W case 12: SSE_MOVLPS_XMM_to_M64(offset, reg); break; // XY case 3: SSE_MOVHPS_XMM_to_M64(offset+8, reg); break; // ZW default: SSE_MOVAPS_XMM_to_M128(offset, reg); break; // XYZW @@ -139,6 +154,14 @@ microVUx(void) mVUsaveReg(int reg, uptr offset, int xyzw) { // Modifies the Source Reg! microVUx(void) mVUsaveReg2(int reg, int gprReg, u32 offset, int xyzw) { + /*SSE_MOVAPSRmtoR(xmmT2, gprReg, offset); + if (xyzw == 8 || xyzw == 4 || xyzw == 2 || xyzw == 1) { + mVUunpack_xyzw(reg, reg, 0); + } + mVUmergeRegs(xmmT2, reg, xyzw); + SSE_MOVAPSRtoRm(gprReg, xmmT2, offset); + return;*/ + switch ( xyzw ) { case 5: SSE2_PSHUFD_XMM_to_XMM(reg, reg, 0xe1); //WZXY SSE_MOVSS_XMM_to_Rm(gprReg, reg, offset+4); From 457b78e70ec2486bb119d384f18c5af9e37267e1 Mon Sep 17 00:00:00 2001 From: refraction Date: Sun, 19 Apr 2009 22:00:11 +0000 Subject: [PATCH 108/143] Put a different fix for bad J/JALR jumps (Charlie and the Chocolate Factory) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1024 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUzerorec.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index de6ad9b1af..59102f303e 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -3859,7 +3859,13 @@ void recVUMI_JR( VURegs* vuu, s32 info ) { int fsreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Fs_, MODE_READ); LEA32RStoR(EAX, fsreg, 3); - + + //Mask the address to something valid + if(vuu == &VU0) + AND32ItoR(EAX, 0xfff); + else + AND32ItoR(EAX, 0x3fff); + if( (s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 ) MOV32RtoM(SuperVUGetVIAddr(REG_TPC, 0), EAX); if( !(s_pCurBlock->type & BLOCKTYPE_HASEOP) ) { @@ -3876,6 +3882,12 @@ void recVUMI_JALR( VURegs* vuu, s32 info ) int fsreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Fs_, MODE_READ); LEA32RStoR(EAX, fsreg, 3); + //Mask the address to something valid + if(vuu == &VU0) + AND32ItoR(EAX, 0xfff); + else + AND32ItoR(EAX, 0x3fff); + if ( _Ft_ ) { _deleteX86reg(X86TYPE_VI|(s_vu?X86TYPE_VU1:0), _Ft_, 2); MOV16ItoM( SuperVUGetVIAddr(_Ft_, 0), (pc+8)>>3 ); From 256d32037c8d1e71fd467397df8b0c5773cf0c0c Mon Sep 17 00:00:00 2001 From: mattmenke Date: Sun, 19 Apr 2009 23:04:21 +0000 Subject: [PATCH 109/143] LilyPad: Fixed a potential threading issue when read input in gs thread is disabled. Odds of the issue being a problem, while perhaps significantly greater than 1/, weren't too high. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1025 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/LilyPad/Config.cpp | 8 +++---- plugins/LilyPad/InputManager.cpp | 13 ++++++----- plugins/LilyPad/KeyboardHook.cpp | 4 ++-- plugins/LilyPad/LilyPad.cpp | 25 +++++++++++++++++++--- plugins/LilyPad/RawInput.cpp | 28 ++++++++++++------------ plugins/LilyPad/WindowsMessaging.cpp | 16 ++++++-------- plugins/LilyPad/WndProcEater.cpp | 32 +++++++++++++++++++++------- plugins/LilyPad/WndProcEater.h | 4 +++- 8 files changed, 83 insertions(+), 47 deletions(-) diff --git a/plugins/LilyPad/Config.cpp b/plugins/LilyPad/Config.cpp index 2e2050a573..9a504a45d5 100644 --- a/plugins/LilyPad/Config.cpp +++ b/plugins/LilyPad/Config.cpp @@ -840,9 +840,7 @@ int LoadSettings(int force, wchar_t *file) { } if (config.debug) { - HANDLE hFile = CreateFileA("logs\\padLog.txt", GENERIC_WRITE, FILE_SHARE_READ, 0, CREATE_ALWAYS, 0, 0); - if (hFile != INVALID_HANDLE_VALUE) CloseHandle(hFile); - else CreateDirectory(L"logs", 0); + CreateDirectory(L"logs", 0); } @@ -1472,7 +1470,7 @@ INT_PTR CALLBACK DialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, LPARAM l if (GetBinding(port, slot, selIndex, dev, b, ffb)) { selected = 0xFF; InitInfo info = {0, hWndProp, hWnd, GetDlgItem(hWnd, cmd)}; - EatWndProc(info.hWndButton, DoNothingWndProc); + EatWndProc(info.hWndButton, DoNothingWndProc, 0); for (int i=0; inumDevices; i++) { if (dm->devices[i] != dev) { dm->DisableDevice(i); @@ -1507,7 +1505,7 @@ INT_PTR CALLBACK DialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, LPARAM l } InitInfo info = {selected==0x7F, hWndProp, hWnd, GetDlgItem(hWnd, cmd)}; - EatWndProc(info.hWndButton, DoNothingWndProc); + EatWndProc(info.hWndButton, DoNothingWndProc, 0); int w = timeGetTime(); dm->Update(&info); dm->PostRead(); diff --git a/plugins/LilyPad/InputManager.cpp b/plugins/LilyPad/InputManager.cpp index 2be1429dca..829ed06f38 100644 --- a/plugins/LilyPad/InputManager.cpp +++ b/plugins/LilyPad/InputManager.cpp @@ -56,9 +56,7 @@ Device::Device(DeviceAPI api, DeviceType d, const wchar_t *displayName, const wc void Device::FreeState() { if (virtualControlState) free(virtualControlState); virtualControlState = 0; - if (oldVirtualControlState) free(oldVirtualControlState); oldVirtualControlState = 0; - if (physicalControlState) free(physicalControlState); physicalControlState = 0; } @@ -127,9 +125,9 @@ void Device::AddFFAxis(const wchar_t *displayName, int id) { void Device::AllocState() { FreeState(); - virtualControlState = (int*) calloc(numVirtualControls, sizeof(int)); - oldVirtualControlState = (int*) calloc(numVirtualControls, sizeof(int)); - physicalControlState = (int*) calloc(numPhysicalControls, sizeof(int)); + virtualControlState = (int*) calloc(numVirtualControls + numVirtualControls + numPhysicalControls, sizeof(int)); + oldVirtualControlState = virtualControlState + numVirtualControls; + physicalControlState = oldVirtualControlState + numVirtualControls; } void Device::FlipState() { @@ -213,6 +211,9 @@ VirtualControl *Device::GetVirtualControl(unsigned int uid) { } VirtualControl *Device::AddVirtualControl(unsigned int uid, int physicalControlIndex) { + // Not really necessary, as always call AllocState when activated, but doesn't hurt. + FreeState(); + if (numVirtualControls % 16 == 0) { virtualControls = (VirtualControl*) realloc(virtualControls, sizeof(VirtualControl)*(numVirtualControls+16)); } @@ -226,7 +227,9 @@ VirtualControl *Device::AddVirtualControl(unsigned int uid, int physicalControlI } PhysicalControl *Device::AddPhysicalControl(ControlType type, unsigned short id, unsigned short vkey, const wchar_t *name) { + // Not really necessary, as always call AllocState when activated, but doesn't hurt. FreeState(); + if (numPhysicalControls % 16 == 0) { physicalControls = (PhysicalControl*) realloc(physicalControls, sizeof(PhysicalControl)*(numPhysicalControls+16)); } diff --git a/plugins/LilyPad/KeyboardHook.cpp b/plugins/LilyPad/KeyboardHook.cpp index 0ac88f0814..d3c45d9897 100644 --- a/plugins/LilyPad/KeyboardHook.cpp +++ b/plugins/LilyPad/KeyboardHook.cpp @@ -43,9 +43,9 @@ public: InitInfo *info = (InitInfo*) d; binding = info->bindingIgnore; if (info->hWndButton) - EatWndProc(info->hWndButton, StartHooksWndProc); + EatWndProc(info->hWndButton, StartHooksWndProc, EATPROC_NO_UPDATE_WHILE_UPDATING_DEVICES); else - EatWndProc(info->hWnd, StartHooksWndProc); + EatWndProc(info->hWnd, StartHooksWndProc, EATPROC_NO_UPDATE_WHILE_UPDATING_DEVICES); InitState(); ikhd = this; active = 1; diff --git a/plugins/LilyPad/LilyPad.cpp b/plugins/LilyPad/LilyPad.cpp index e366764e9a..7d06d093ba 100644 --- a/plugins/LilyPad/LilyPad.cpp +++ b/plugins/LilyPad/LilyPad.cpp @@ -63,6 +63,17 @@ int IsWindowMaximized (HWND hWnd) { return 0; } +void DEBUG_TEXT_OUT(const char *text) { + if (config.debug) { + HANDLE hFile = CreateFileA("logs\\padLog.txt", FILE_APPEND_DATA, FILE_SHARE_READ, 0, OPEN_ALWAYS, 0, 0); + if (hFile != INVALID_HANDLE_VALUE) { + DWORD junk; + WriteFile(hFile, text, strlen(text), &junk, 0); + CloseHandle(hFile);; + } + } +} + void DEBUG_NEW_SET() { if (config.debug && bufSize>1) { HANDLE hFile = CreateFileA("logs\\padLog.txt", FILE_APPEND_DATA, FILE_SHARE_READ, 0, OPEN_ALWAYS, 0, 0); @@ -600,6 +611,7 @@ char* CALLBACK PS2EgetLibName(void) { //} void CALLBACK PADshutdown() { + DEBUG_TEXT_OUT("LilyPad shutdown.\n\n"); for (int i=0; i<8; i++) pads[i&1][i>>1].initialized = 0; portInitialized[0] = portInitialized[1] = 0; @@ -664,9 +676,10 @@ s32 CALLBACK PADinit(u32 flags) { } int port = (flags & 3); if (port == 3) { - if (PADinit(1)) return -1; + if (PADinit(1) == -1) return -1; return PADinit(2); } + #ifdef _DEBUG int tmpFlag = _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG ); tmpFlag |= _CRTDBG_LEAK_CHECK_DF; @@ -686,6 +699,8 @@ s32 CALLBACK PADinit(u32 flags) { ClearKeyQueue(); // Just in case, when resuming emulation. ReleaseModifierKeys(); + + DEBUG_TEXT_OUT("LilyPad initialized\n\n"); return 0; } @@ -806,6 +821,7 @@ DWORD WINAPI MaximizeWindowThreadProc(void *lpParameter) { s32 CALLBACK PADopen(void *pDsp) { if (openCount++) return 0; + DEBUG_TEXT_OUT("LilyPad opened\n\n"); // Not really needed, shouldn't do anything. if (LoadSettings()) return -1; @@ -825,12 +841,12 @@ s32 CALLBACK PADopen(void *pDsp) { hWnd = GetParent (hWnd); // Implements most hacks, as well as enabling/disabling mouse // capture when focus changes. - if (!EatWndProc(hWnd, HackWndProc)) { + if (!EatWndProc(hWnd, HackWndProc, 0)) { openCount = 0; return -1; } if (config.forceHide) { - EatWndProc(hWnd, HideCursorProc); + EatWndProc(hWnd, HideCursorProc, 0); } } @@ -864,6 +880,8 @@ s32 CALLBACK PADopen(void *pDsp) { void CALLBACK PADclose() { if (openCount && !--openCount) { + DEBUG_TEXT_OUT("LilyPad closed\n\n"); + deviceUpdateQueued = 0; dm->ReleaseInput(); ReleaseEatenProc(); hWnd = 0; @@ -1011,6 +1029,7 @@ u8 CALLBACK PADpoll(u8 value) { break; // QUERY_DS2_ANALOG_MODE case 0x41: + // Right? Wrong? No clue. if (pad->mode == MODE_DIGITAL) { queryMaskMode[1] = queryMaskMode[2] = queryMaskMode[3] = 0; queryMaskMode[6] = 0x00; diff --git a/plugins/LilyPad/RawInput.cpp b/plugins/LilyPad/RawInput.cpp index 040594cc84..e48ec8b081 100644 --- a/plugins/LilyPad/RawInput.cpp +++ b/plugins/LilyPad/RawInput.cpp @@ -83,7 +83,7 @@ public: } active = 1; if (!rawKeyboardActivatedCount++) { - if (!rawMouseActivatedCount && !EatWndProc(hWnd, RawInputWndProc)) { + if (!rawMouseActivatedCount && !EatWndProc(hWnd, RawInputWndProc, EATPROC_NO_UPDATE_WHILE_UPDATING_DEVICES)) { Deactivate(); return 0; } @@ -134,7 +134,7 @@ public: // lines. if (!rawMouseActivatedCount++) { GetMouseCapture(hWnd); - if (!rawKeyboardActivatedCount && !EatWndProc(hWnd, RawInputWndProc)) { + if (!rawKeyboardActivatedCount && !EatWndProc(hWnd, RawInputWndProc, EATPROC_NO_UPDATE_WHILE_UPDATING_DEVICES)) { Deactivate(); return 0; } @@ -234,15 +234,14 @@ int InitializeRawInput() { if (RawInputFailed) return 0; if (!pGetRawInputDeviceList) { HMODULE user32 = LoadLibrary(L"user32.dll"); - if (user32) { - if (!(pRegisterRawInputDevices = (_RegisterRawInputDevices) GetProcAddress(user32, "RegisterRawInputDevices")) || - !(pGetRawInputDeviceInfo = (_GetRawInputDeviceInfo) GetProcAddress(user32, "GetRawInputDeviceInfoW")) || - !(pGetRawInputData = (_GetRawInputData) GetProcAddress(user32, "GetRawInputData")) || - !(pGetRawInputDeviceList = (_GetRawInputDeviceList) GetProcAddress(user32, "GetRawInputDeviceList"))) { - FreeLibrary(user32); - RawInputFailed = 1; - return 0; - } + if (!user32 || + !(pRegisterRawInputDevices = (_RegisterRawInputDevices) GetProcAddress(user32, "RegisterRawInputDevices")) || + !(pGetRawInputDeviceInfo = (_GetRawInputDeviceInfo) GetProcAddress(user32, "GetRawInputDeviceInfoW")) || + !(pGetRawInputData = (_GetRawInputData) GetProcAddress(user32, "GetRawInputData")) || + !(pGetRawInputDeviceList = (_GetRawInputDeviceList) GetProcAddress(user32, "GetRawInputDeviceList"))) { + FreeLibrary(user32); + RawInputFailed = 1; + return 0; } } return 1; @@ -288,7 +287,7 @@ void EnumRawInputDevices() { wsprintfW(keyName, L"SYSTEM\\CurrentControlSet\\Enum%s", productID+3); if (temp) *temp = 0; - displayName[0] = 0; + int haveDescription = 0; HKEY hKey; if (ERROR_SUCCESS == RegOpenKeyExW(HKEY_LOCAL_MACHINE, keyName, 0, KEY_QUERY_VALUE, &hKey)) { DWORD type; @@ -300,16 +299,17 @@ void EnumRawInputDevices() { else temp2++; // Could do without this, but more effort than it's worth. wcscpy(keyName, temp2); + haveDescription = 1; } RegCloseKey(hKey); } if (list[i].dwType == RIM_TYPEKEYBOARD) { - if (!displayName[0]) wsprintfW(displayName, L"Raw Keyboard %i", keyboardCount++); + if (!haveDescription) wsprintfW(displayName, L"Raw Keyboard %i", keyboardCount++); else wsprintfW(displayName, L"Raw KB: %s", keyName); dm->AddDevice(new RawInputKeyboard(list[i].hDevice, displayName, instanceID)); } else if (list[i].dwType == RIM_TYPEMOUSE) { - if (!displayName[0]) wsprintfW(displayName, L"Raw Mouse %i", mouseCount++); + if (!haveDescription) wsprintfW(displayName, L"Raw Mouse %i", mouseCount++); else wsprintfW(displayName, L"Raw MS: %s", keyName); dm->AddDevice(new RawInputMouse(list[i].hDevice, displayName, instanceID, productID)); } diff --git a/plugins/LilyPad/WindowsMessaging.cpp b/plugins/LilyPad/WindowsMessaging.cpp index c85783e430..edfe7f1a95 100644 --- a/plugins/LilyPad/WindowsMessaging.cpp +++ b/plugins/LilyPad/WindowsMessaging.cpp @@ -28,9 +28,8 @@ public: HWND hWnd = info->hWnd; if (info->hWndButton) { hWnd = info->hWndButton; - // hWndDlg = info->hWnd; } - if (!wmm && !EatWndProc(hWnd, WindowsMessagingWndProc)) { + if (!wmm && !EatWndProc(hWnd, WindowsMessagingWndProc, EATPROC_NO_UPDATE_WHILE_UPDATING_DEVICES)) { Deactivate(); return 0; } @@ -43,14 +42,13 @@ public: } void Deactivate() { - FreeState(); if (active) { if (!wmm) ReleaseExtraProc(WindowsMessagingWndProc); - active = 0; wmk = 0; + active = 0; + FreeState(); } - // hWndDlg = 0; } @@ -75,7 +73,7 @@ public: hWnd = info->hWndButton; } - if (!wmk && !EatWndProc(hWnd, WindowsMessagingWndProc)) { + if (!wmk && !EatWndProc(hWnd, WindowsMessagingWndProc, EATPROC_NO_UPDATE_WHILE_UPDATING_DEVICES)) { Deactivate(); return 0; } @@ -90,13 +88,13 @@ public: } void Deactivate() { - FreeState(); if (active) { - ReleaseMouseCapture(); if (!wmk) ReleaseExtraProc(WindowsMessagingWndProc); - active = 0; + ReleaseMouseCapture(); wmm = 0; + active = 0; + FreeState(); } } }; diff --git a/plugins/LilyPad/WndProcEater.cpp b/plugins/LilyPad/WndProcEater.cpp index 7d4627ba03..27d27cbdeb 100644 --- a/plugins/LilyPad/WndProcEater.cpp +++ b/plugins/LilyPad/WndProcEater.cpp @@ -2,7 +2,13 @@ static HWND hWndEaten = 0; static WNDPROC eatenWndProc = 0; -static ExtraWndProc* extraProcs = 0; + +struct ExtraWndProcInfo { + ExtraWndProc proc; + DWORD flags; +}; + +static ExtraWndProcInfo* extraProcs = 0; static int numExtraProcs = 0; void ReleaseExtraProc(ExtraWndProc proc) { @@ -13,7 +19,7 @@ void ReleaseExtraProc(ExtraWndProc proc) { if (hMutex) WaitForSingleObject(hMutex, 100); for (int i=0; i res) res = res2; } } + if (res != NO_WND_PROC) { if (out == WM_DESTROY) { ReleaseEatenProc(); @@ -68,7 +82,7 @@ LRESULT CALLBACK OverrideWndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lPa } -int EatWndProc(HWND hWnd, ExtraWndProc proc) { +int EatWndProc(HWND hWnd, ExtraWndProc proc, DWORD flags) { // Probably isn't needed, but just in case... // Creating and destroying the mutex adds some inefficiency, // but this function is only called on emulation start and on focus/unfocus. @@ -83,8 +97,10 @@ int EatWndProc(HWND hWnd, ExtraWndProc proc) { hWndEaten = hWnd; } if (hWndEaten == hWnd) { - extraProcs = (ExtraWndProc*) realloc(extraProcs, sizeof(ExtraWndProc)*(numExtraProcs+1)); - extraProcs[numExtraProcs++] = proc; + extraProcs = (ExtraWndProcInfo*) realloc(extraProcs, sizeof(ExtraWndProcInfo)*(numExtraProcs+1)); + extraProcs[numExtraProcs].proc = proc; + extraProcs[numExtraProcs].flags = flags; + numExtraProcs++; } if (hMutex) { diff --git a/plugins/LilyPad/WndProcEater.h b/plugins/LilyPad/WndProcEater.h index c5a128e3fe..b85af1fd20 100644 --- a/plugins/LilyPad/WndProcEater.h +++ b/plugins/LilyPad/WndProcEater.h @@ -1,5 +1,7 @@ #include "Global.h" +#define EATPROC_NO_UPDATE_WHILE_UPDATING_DEVICES 1 + /* Need this to let window be subclassed multiple times but still clean up nicely. */ enum ExtraWndProcResult { @@ -11,7 +13,7 @@ enum ExtraWndProcResult { }; typedef ExtraWndProcResult (*ExtraWndProc)(HWND hWnd, UINT uMsg, WPARAM wParam, LPARAM lParam, LRESULT *out); -int EatWndProc(HWND hWnd, ExtraWndProc proc); +int EatWndProc(HWND hWnd, ExtraWndProc proc, DWORD flags); void ReleaseExtraProc(ExtraWndProc proc); void ReleaseEatenProc(); From 28c81a2ff1e3e0693eec02ccd422dbfe40200b7a Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Mon, 20 Apr 2009 00:06:51 +0000 Subject: [PATCH 110/143] Bugfix for the new emitter: ([xmm/m128],xmm) form of MOVQ was generating the wrong opcode (introduced in r1011). [other changes are a work in progress for more instruction additions] git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1026 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/group1.h | 61 +++++++--- pcsx2/x86/ix86/implement/group2.h | 2 +- pcsx2/x86/ix86/implement/group3.h | 117 ++++++++++++++++++- pcsx2/x86/ix86/implement/xmm/movqss.h | 63 +++++++--- pcsx2/x86/ix86/ix86.cpp | 158 ++++---------------------- pcsx2/x86/ix86/ix86_instructions.h | 79 ++++++++++--- pcsx2/x86/ix86/ix86_legacy.cpp | 10 +- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 20 ++-- pcsx2/x86/ix86/ix86_types.h | 61 +--------- 9 files changed, 308 insertions(+), 263 deletions(-) diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 5d338c4368..8089593cc3 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -34,7 +34,7 @@ enum G1Type }; // ------------------------------------------------------------------- -template< G1Type InstType, typename ImmType > +template< typename ImmType > class Group1Impl { protected: @@ -46,42 +46,42 @@ protected: public: Group1Impl() {} // because GCC doesn't like static classes - static __emitinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, const iRegister& to, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); ModRM_Direct( from.Id, to.Id ); } - static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, const ModSibBase& sibdest, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); EmitSibMagic( from.Id, sibdest ); } - static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) + static __emitinline void Emit( G1Type InstType, const iRegister& to, const ModSibBase& sibsrc ) { prefix16(); iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); EmitSibMagic( to.Id, sibsrc ); } - static __emitinline void Emit( void* dest, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, void* dest, const iRegister& from ) { prefix16(); iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); iWriteDisp( from.Id, dest ); } - static __emitinline void Emit( const iRegister& to, const void* src ) + static __emitinline void Emit( G1Type InstType, const iRegister& to, const void* src ) { prefix16(); iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); iWriteDisp( to.Id, src ); } - static __emitinline void Emit( const iRegister& to, int imm ) + static __emitinline void Emit( G1Type InstType, const iRegister& to, int imm ) { prefix16(); if( !Is8BitOperand() && is_s8( imm ) ) @@ -103,7 +103,7 @@ public: } } - static __emitinline void Emit( const ModSibStrict& sibdest, int imm ) + static __emitinline void Emit( G1Type InstType, const ModSibStrict& sibdest, int imm ) { if( Is8BitOperand() ) { @@ -132,26 +132,55 @@ class Group1ImplAll { public: template< typename T > - __forceinline void operator()( const iRegister& to, const iRegister& from ) const { Group1Impl::Emit( to, from ); } + __forceinline void operator()( const iRegister& to, const iRegister& from ) const { Group1Impl::Emit( InstType, to, from ); } template< typename T > - __forceinline void operator()( const iRegister& to, const void* src ) const { Group1Impl::Emit( to, src ); } + __forceinline void operator()( const iRegister& to, const void* src ) const { Group1Impl::Emit( InstType, to, src ); } template< typename T > - __forceinline void operator()( void* dest, const iRegister& from ) const { Group1Impl::Emit( dest, from ); } + __forceinline void operator()( void* dest, const iRegister& from ) const { Group1Impl::Emit( InstType, dest, from ); } template< typename T > - __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { Group1Impl::Emit( sibdest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { Group1Impl::Emit( InstType, sibdest, from ); } template< typename T > - __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { Group1Impl::Emit( to, sibsrc ); } + __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { Group1Impl::Emit( InstType, to, sibsrc ); } // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler // perspective. (using uint tends to make the compiler try and fail to match signed immediates with // one of the other overloads). template< typename T > - __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { Group1Impl::Emit( sibdest, imm ); } + __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { Group1Impl::Emit( InstType, sibdest, imm ); } template< typename T > - void operator()( const iRegister& to, int imm ) const { Group1Impl::Emit( to, imm ); } - + __forceinline void operator()( const iRegister& to, int imm ) const { Group1Impl::Emit( InstType, to, imm ); } Group1ImplAll() {} // Why does GCC need these? }; +template< G1Type InstType, u8 OpcodeSSE > +class G1LogicImpl : public Group1ImplAll +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0x66,OpcodeSSE> PD; + + G1LogicImpl() {} +}; + +template< G1Type InstType, u8 OpcodeSSE > +class G1ArithmeticImpl : public G1LogicImpl +{ +public: + const SSELogicImpl<0xf3,OpcodeSSE> SS; + const SSELogicImpl<0xf2,OpcodeSSE> SD; + + G1ArithmeticImpl() {} +}; + + +template< u8 OpcodeSSE > +class SSEAndNotImpl +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0x66,OpcodeSSE> PD; + + SSEAndNotImpl() {} +}; \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index f5767c282e..a694263008 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -122,7 +122,7 @@ public: template< typename T > __noinline void operator()( const ModSibStrict& sibdest, u8 imm ) const { Group2Impl::Emit( sibdest, imm ); } - template< typename T > void operator()( const iRegister& to, u8 imm ) const + template< typename T > __forceinline void operator()( const iRegister& to, u8 imm ) const { Group2Impl::Emit( to, imm ); } Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index 05543cdd05..a614e4b09d 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -71,4 +71,119 @@ public: __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( InstType, from ); } Group3ImplAll() {} -}; \ No newline at end of file +}; + + +template< G3Type InstType, u8 OpcodeSSE > +class G3Impl_PlusSSE : public Group3ImplAll +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0x66,OpcodeSSE> PD; + const SSELogicImpl<0xf3,OpcodeSSE> SS; + const SSELogicImpl<0xf2,OpcodeSSE> SD; + + G3Impl_PlusSSE() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// The following iMul-specific forms are valid for 16 and 32 bit register operands only! + +template< typename ImmType > +class iMulImpl +{ +protected: + static const uint OperandSize = sizeof(ImmType); + static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + +public: + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const iRegister& from ) + { + prefix16(); + write16( 0xaf0f ); + ModRM_Direct( to.Id, from.Id ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const void* src ) + { + prefix16(); + write16( 0xaf0f ); + iWriteDisp( to.Id, src ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const ModSibBase& src ) + { + prefix16(); + write16( 0xaf0f ); + EmitSibMagic( to.Id, src ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const iRegister& from, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + ModRM_Direct( to.Id, from.Id ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const void* src, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + iWriteDisp( to.Id, src ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } + + // ------------------------------------------------------------------------ + static __emitinline void Emit( const iRegister& to, const ModSibBase& src, ImmType imm ) + { + prefix16(); + write16( is_s8( imm ) ? 0x6b : 0x69 ); + EmitSibMagic( to.Id, src ); + if( is_s8( imm ) ) + write8( imm ); + else + iWrite( imm ); + } +}; + + +class iMul_PlusSSE : public G3Impl_PlusSSE +{ +protected: + typedef iMulImpl iMUL32; + typedef iMulImpl iMUL16; + +public: + + template< typename T > + __forceinline void operator()( const iRegister& from ) const { Group3Impl::Emit( G3Type_iMUL, from ); } + + template< typename T > + __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( G3Type_iMUL, from ); } + + __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { iMUL32::Emit( to, from ); } + __forceinline void operator()( const iRegister32& to, const void* src ) const { iMUL32::Emit( to, src ); } + __forceinline void operator()( const iRegister32& to, const iRegister32& from, s32 imm ) const{ iMUL32::Emit( to, from, imm ); } + __noinline void operator()( const iRegister32& to, const ModSibBase& src ) const { iMUL32::Emit( to, src ); } + __noinline void operator()( const iRegister32& to, const ModSibBase& from, s32 imm ) const { iMUL32::Emit( to, from, imm ); } + + __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { iMUL16::Emit( to, from ); } + __forceinline void operator()( const iRegister16& to, const void* src ) const { iMUL16::Emit( to, src ); } + __forceinline void operator()( const iRegister16& to, const iRegister16& from, s16 imm ) const{ iMUL16::Emit( to, from, imm ); } + __noinline void operator()( const iRegister16& to, const ModSibBase& src ) const { iMUL16::Emit( to, src ); } + __noinline void operator()( const iRegister16& to, const ModSibBase& from, s16 imm ) const { iMUL16::Emit( to, from, imm ); } + + iMul_PlusSSE() {} +}; diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 4f0fdd1614..41f2b3071a 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -124,15 +124,9 @@ class PLogicImplAll { public: template< typename T > - __forceinline void operator()( const iRegisterSIMD& to, const iRegisterSIMD& from ) const - { - writeXMMop( 0x66, Opcode, to, from ); - } + __forceinline void operator()( const iRegisterSIMD& to, const iRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } template< typename T > - __forceinline void operator()( const iRegisterSIMD& to, const void* from ) const - { - writeXMMop( 0x66, Opcode, to, from ); - } + __forceinline void operator()( const iRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } template< typename T > __noinline void operator()( const iRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } @@ -141,19 +135,52 @@ public: // ------------------------------------------------------------------------ // For implementing SSE-only logic operations, like ANDPS/ANDPD +// template< u8 Prefix, u8 Opcode > -class PLogicImplSSE +class SSELogicImpl { public: - __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const - { - writeXMMop( Prefix, Opcode, to, from ); - } - __forceinline void operator()( const iRegisterSSE& to, const void* from ) const - { - writeXMMop( Prefix, Opcode, to, from ); - } + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - PLogicImplSSE() {} //GCWho? + SSELogicImpl() {} //GCWho? +}; + + +// ------------------------------------------------------------------------ +// For implementing SSE-only comparison operations, like CMPEQPS. +// +enum SSE2_ComparisonType +{ + SSE2_Equal = 0, + SSE2_Less, + SSE2_LessOrEqual, + SSE2_Unordered, + SSE2_NotEqual, + SSE2_NotLess, + SSE2_NotLessOrEqual, + SSE2_Ordered +}; + +template< u8 Prefix > +class SSECompareImplGeneric +{ +public: + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } + __forceinline void operator()( const iRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } + __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } + + SSECompareImplGeneric() {} //GCWhat? +}; + +template< u8 Prefix, u8 Opcode, SSE2_ComparisonType CType > +class SSECompareImpl +{ +public: + __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } + __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } + __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } + + SSECompareImpl() {} //GCWhat? }; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 86ad0f234d..362d446037 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -257,13 +257,15 @@ using namespace Internal; const MovImplAll iMOV; const TestImplAll iTEST; -const Group1ImplAll iADD; -const Group1ImplAll iOR; +const G1LogicImpl iAND; +const G1LogicImpl iOR; +const G1LogicImpl iXOR; + +const G1ArithmeticImpl iADD; +const G1ArithmeticImpl iSUB; + const Group1ImplAll iADC; const Group1ImplAll iSBB; -const Group1ImplAll iAND; -const Group1ImplAll iSUB; -const Group1ImplAll iXOR; const Group1ImplAll iCMP; const Group2ImplAll iROL; @@ -278,7 +280,8 @@ const Group3ImplAll iNOT; const Group3ImplAll iNEG; const Group3ImplAll iUMUL; const Group3ImplAll iUDIV; -const Group3ImplAll iSDIV; +const G3Impl_PlusSSE iDIV; +const iMul_PlusSSE iMUL; const IncDecImplAll iINC; const IncDecImplAll iDEC; @@ -610,99 +613,6 @@ __emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_fla EmitLeaMagic( to, src, preserve_flags ); } -////////////////////////////////////////////////////////////////////////////////////////// -// The following iMul-specific forms are valid for 16 and 32 bit register operands only! - -template< typename ImmType > -class iMulImpl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } - -public: - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from ) - { - prefix16(); - write16( 0xaf0f ); - ModRM_Direct( to.Id, from.Id ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const void* src ) - { - prefix16(); - write16( 0xaf0f ); - iWriteDisp( to.Id, src ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const ModSibBase& src ) - { - prefix16(); - write16( 0xaf0f ); - EmitSibMagic( to.Id, src ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - ModRM_Direct( to.Id, from.Id ); - if( is_s8( imm ) ) - write8( imm ); - else - iWrite( imm ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const void* src, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - iWriteDisp( to.Id, src ); - if( is_s8( imm ) ) - write8( imm ); - else - iWrite( imm ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const ModSibBase& src, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - EmitSibMagic( to.Id, src ); - if( is_s8( imm ) ) - write8( imm ); - else - iWrite( imm ); - } -}; - -// ------------------------------------------------------------------------ -// iMUL's special forms (unique to iMUL alone), and valid for 32/16 bit operands only, -// thus noi templates are used. - -namespace Internal -{ - typedef iMulImpl iMUL32; - typedef iMulImpl iMUL16; -} - -__forceinline void iSMUL( const iRegister32& to, const iRegister32& from ) { iMUL32::Emit( to, from ); } -__forceinline void iSMUL( const iRegister32& to, const void* src ) { iMUL32::Emit( to, src ); } -__forceinline void iSMUL( const iRegister32& to, const iRegister32& from, s32 imm ) { iMUL32::Emit( to, from, imm ); } -__noinline void iSMUL( const iRegister32& to, const ModSibBase& src ) { iMUL32::Emit( to, src ); } -__noinline void iSMUL( const iRegister32& to, const ModSibBase& from, s32 imm ) { iMUL32::Emit( to, from, imm ); } - -__forceinline void iSMUL( const iRegister16& to, const iRegister16& from ) { iMUL16::Emit( to, from ); } -__forceinline void iSMUL( const iRegister16& to, const void* src ) { iMUL16::Emit( to, src ); } -__forceinline void iSMUL( const iRegister16& to, const iRegister16& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } -__noinline void iSMUL( const iRegister16& to, const ModSibBase& src ) { iMUL16::Emit( to, src ); } -__noinline void iSMUL( const iRegister16& to, const ModSibBase& from, s16 imm ) { iMUL16::Emit( to, from, imm ); } ////////////////////////////////////////////////////////////////////////////////////////// // Push / Pop Emitters @@ -758,40 +668,13 @@ const PLogicImplAll<0xdf> iPANDN; const PLogicImplAll<0xeb> iPOR; const PLogicImplAll<0xef> iPXOR; -const PLogicImplSSE<0x00,0x54> iANDPS; -const PLogicImplSSE<0x66,0x54> iANDPD; -const PLogicImplSSE<0x00,0x55> iANDNPS; -const PLogicImplSSE<0x66,0x55> iANDNPD; -const PLogicImplSSE<0x00,0x56> iORPS; -const PLogicImplSSE<0x66,0x56> iORPD; -const PLogicImplSSE<0x00,0x57> iXORPS; -const PLogicImplSSE<0x66,0x57> iXORPD; - -const PLogicImplSSE<0x00,0x5c> iSUBPS; -const PLogicImplSSE<0x66,0x5c> iSUBPD; -const PLogicImplSSE<0xf3,0x5c> iSUBSS; -const PLogicImplSSE<0xf2,0x5c> iSUBSD; - -const PLogicImplSSE<0x00,0x58> iADDPS; -const PLogicImplSSE<0x66,0x58> iADDPD; -const PLogicImplSSE<0xf3,0x58> iADDSS; -const PLogicImplSSE<0xf2,0x58> iADDSD; - -const PLogicImplSSE<0x00,0x59> iMULPS; -const PLogicImplSSE<0x66,0x59> iMULPD; -const PLogicImplSSE<0xf3,0x59> iMULSS; -const PLogicImplSSE<0xf2,0x59> iMULSD; - -const PLogicImplSSE<0x00,0x5e> iDIVPS; -const PLogicImplSSE<0x66,0x5e> iDIVPD; -const PLogicImplSSE<0xf3,0x5e> iDIVSS; -const PLogicImplSSE<0xf2,0x5e> iDIVSD; +const SSEAndNotImpl<0x55> iANDN; // Compute Reciprocal Packed Single-Precision Floating-Point Values -const PLogicImplSSE<0,0x53> iRCPPS; +const SSELogicImpl<0,0x53> iRCPPS; // Compute Reciprocal of Scalar Single-Precision Floating-Point Value -const PLogicImplSSE<0xf3,0x53> iRCPSS; +const SSELogicImpl<0xf3,0x53> iRCPSS; // Moves from XMM to XMM, with the *upper 64 bits* of the destination register @@ -800,20 +683,27 @@ __forceinline void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ) { // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__noinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } +__forceinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. __forceinline void iMOVQZX( const iRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); } -__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } -__noinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } +// Moves lower quad of XMM to ptr64 (no bits are cleared) +__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } +// Moves lower quad of XMM to ptr64 (no bits are cleared) +__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } + +__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } +__forceinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } __forceinline void iMOVQ( const iRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); } __forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } __forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } -__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, from, dest ); } -__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, from, dest ); } + +// This form of iMOVQ is Intel's adeptly named 'MOVQ2DQ' __forceinline void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); } + +// This form of iMOVQ is Intel's adeptly named 'MOVDQ2Q' __forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ) { // Manual implementation of this form of MOVQ, since its parameters are unique in a way diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 5b776567ad..2b68790756 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -38,13 +38,15 @@ namespace x86Emitter // ------------------------------------------------------------------------ // Group 1 Instruction Class - extern const Internal::Group1ImplAll iADD; - extern const Internal::Group1ImplAll iOR; + extern const Internal::G1LogicImpl iAND; + extern const Internal::G1LogicImpl iOR; + extern const Internal::G1LogicImpl iXOR; + extern const Internal::SSEAndNotImpl<0x55> iANDN; + + extern const Internal::G1ArithmeticImpl iADD; + extern const Internal::G1ArithmeticImpl iSUB; extern const Internal::Group1ImplAll iADC; extern const Internal::Group1ImplAll iSBB; - extern const Internal::Group1ImplAll iAND; - extern const Internal::Group1ImplAll iSUB; - extern const Internal::Group1ImplAll iXOR; extern const Internal::Group1ImplAll iCMP; // ------------------------------------------------------------------------ @@ -72,7 +74,8 @@ namespace x86Emitter extern const Internal::Group3ImplAll iNEG; extern const Internal::Group3ImplAll iUMUL; extern const Internal::Group3ImplAll iUDIV; - extern const Internal::Group3ImplAll iSDIV; + extern const Internal::G3Impl_PlusSSE iDIV; + extern const Internal::iMul_PlusSSE iMUL; extern const Internal::IncDecImplAll iINC; extern const Internal::IncDecImplAll iDEC; @@ -205,22 +208,22 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // MUL / DIV instructions - extern void iSMUL( const iRegister32& to, const iRegister32& from ); - extern void iSMUL( const iRegister32& to, const void* src ); - extern void iSMUL( const iRegister32& to, const iRegister32& from, s32 imm ); - extern void iSMUL( const iRegister32& to, const ModSibBase& src ); - extern void iSMUL( const iRegister32& to, const ModSibBase& src, s32 imm ); + /*extern void iMUL( const iRegister32& to, const iRegister32& from ); + extern void iMUL( const iRegister32& to, const void* src ); + extern void iMUL( const iRegister32& to, const iRegister32& from, s32 imm ); + extern void iMUL( const iRegister32& to, const ModSibBase& src ); + extern void iMUL( const iRegister32& to, const ModSibBase& src, s32 imm ); - extern void iSMUL( const iRegister16& to, const iRegister16& from ); - extern void iSMUL( const iRegister16& to, const void* src ); - extern void iSMUL( const iRegister16& to, const iRegister16& from, s16 imm ); - extern void iSMUL( const iRegister16& to, const ModSibBase& src ); - extern void iSMUL( const iRegister16& to, const ModSibBase& src, s16 imm ); + extern void iMUL( const iRegister16& to, const iRegister16& from ); + extern void iMUL( const iRegister16& to, const void* src ); + extern void iMUL( const iRegister16& to, const iRegister16& from, s16 imm ); + extern void iMUL( const iRegister16& to, const ModSibBase& src ); + extern void iMUL( const iRegister16& to, const ModSibBase& src, s16 imm ); template< typename T > - __forceinline void iSMUL( const iRegister& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } + __forceinline void iMUL( const iRegister& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } template< typename T > - __noinline void iSMUL( const ModSibStrict& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } + __noinline void iMUL( const ModSibStrict& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); }*/ ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! @@ -431,6 +434,46 @@ namespace x86Emitter extern void iMOVHLPS( const iRegisterSSE& to, const iRegisterSSE& from ); extern void iMOVLHPD( const iRegisterSSE& to, const iRegisterSSE& from ); extern void iMOVHLPD( const iRegisterSSE& to, const iRegisterSSE& from ); + + ////////////////////////////////////////////////////////////////////////////////////////// + // + + extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVAPS; + extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVUPS; + + extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> iMOVAPD; + extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> iMOVUPD; + +#ifdef ALWAYS_USE_MOVAPS + extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> iMOVDQA; + extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> iMOVDQU; +#else + extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVDQA; + extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVDQU; +#endif + + extern const Internal::MovhlImplAll<0, 0x16> iMOVHPS; + extern const Internal::MovhlImplAll<0, 0x12> iMOVLPS; + extern const Internal::MovhlImplAll<0x66, 0x16> iMOVHPD; + extern const Internal::MovhlImplAll<0x66, 0x12> iMOVLPD; + + extern const Internal::PLogicImplAll<0xdb> iPAND; + extern const Internal::PLogicImplAll<0xdf> iPANDN; + extern const Internal::PLogicImplAll<0xeb> iPOR; + extern const Internal::PLogicImplAll<0xef> iPXOR; + + extern const Internal::SSELogicImpl<0,0x53> iRCPPS; + extern const Internal::SSELogicImpl<0xf3,0x53> iRCPSS; + + extern const Internal::SSECompareImplGeneric<0x00> iCMPPS; + extern const Internal::SSECompareImplGeneric<0x66> iCMPPD; + extern const Internal::SSECompareImplGeneric<0xf3> iCMPSS; + extern const Internal::SSECompareImplGeneric<0xf2> iCMPSD; + + extern const Internal::SSECompareImplGeneric<0x00> iCMPPS; + extern const Internal::SSECompareImplGeneric<0x66> iCMPPD; + extern const Internal::SSECompareImplGeneric<0xf3> iCMPSS; + extern const Internal::SSECompareImplGeneric<0xf2> iCMPSD; } diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index e3daec9b59..a484579f88 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -207,26 +207,26 @@ emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) /* mul eax by r32 to edx:eax */ emitterT void MUL32R( x86IntRegType from ) { iUMUL( iRegister32(from) ); } /* imul eax by r32 to edx:eax */ -emitterT void IMUL32R( x86IntRegType from ) { iSMUL( iRegister32(from) ); } +emitterT void IMUL32R( x86IntRegType from ) { iMUL( iRegister32(from) ); } /* mul eax by m32 to edx:eax */ emitterT void MUL32M( u32 from ) { iUMUL( ptr32[from] ); } /* imul eax by m32 to edx:eax */ -emitterT void IMUL32M( u32 from ) { iSMUL( ptr32[from] ); } +emitterT void IMUL32M( u32 from ) { iMUL( ptr32[from] ); } /* imul r32 by r32 to r32 */ emitterT void IMUL32RtoR( x86IntRegType to, x86IntRegType from ) { - iSMUL( iRegister32(to), iRegister32(from) ); + iMUL( iRegister32(to), iRegister32(from) ); } /* div eax by r32 to edx:eax */ emitterT void DIV32R( x86IntRegType from ) { iUDIV( iRegister32(from) ); } /* idiv eax by r32 to edx:eax */ -emitterT void IDIV32R( x86IntRegType from ) { iSDIV( iRegister32(from) ); } +emitterT void IDIV32R( x86IntRegType from ) { iDIV( iRegister32(from) ); } /* div eax by m32 to edx:eax */ emitterT void DIV32M( u32 from ) { iUDIV( ptr32[from] ); } /* idiv eax by m32 to edx:eax */ -emitterT void IDIV32M( u32 from ) { iSDIV( ptr32[from] ); } +emitterT void IDIV32M( u32 from ) { iDIV( ptr32[from] ); } emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 50818e75f4..72190a2f67 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -216,17 +216,17 @@ emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { i #define DEFINE_LEGACY_PSD_OPCODE( mod ) \ - emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod##PS( iRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##PS( iRegisterSSE(to), iRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod##PD( iRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##PD( iRegisterSSE(to), iRegisterSSE(from) ); } + emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod.PS( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod.PS( iRegisterSSE(to), iRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { i##mod.PD( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod.PD( iRegisterSSE(to), iRegisterSSE(from) ); } #define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ DEFINE_LEGACY_PSD_OPCODE( mod ) \ - emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod##SS( iRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##SS( iRegisterSSE(to), iRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##SD_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod##SD( iRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod##SD( iRegisterSSE(to), iRegisterSSE(from) ); } + emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod.SS( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod.SS( iRegisterSSE(to), iRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##SD_M32_to_XMM( x86SSERegType to, uptr from ) { i##mod.SD( iRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { i##mod.SD( iRegisterSSE(to), iRegisterSSE(from) ); } DEFINE_LEGACY_PSD_OPCODE( AND ) DEFINE_LEGACY_PSD_OPCODE( ANDN ) @@ -246,7 +246,7 @@ emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { iRCPSS( iR //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ -//Packed Single-Precission FP compare (CMPccPS) * +//Packed Single-Precision FP compare (CMPccPS) * //********************************************************************************** //missing SSE_CMPPS_I8_to_XMM // SSE_CMPPS_M32_to_XMM @@ -270,7 +270,7 @@ emitterT void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ -//Scalar Single-Precission FP compare (CMPccSS) * +//Scalar Single-Precision FP compare (CMPccSS) * //********************************************************************************** //missing SSE_CMPSS_I8_to_XMM // SSE_CMPSS_M32_to_XMM diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index d70ff1b1ea..095fc1a6fc 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -662,6 +662,7 @@ namespace x86Emitter extern void EmitSibMagic( uint regfield, const ModSibBase& info ); // ------------------------------------------------------------------------ + #include "implement/xmm/movqss.h" #include "implement/group1.h" #include "implement/group2.h" #include "implement/group3.h" @@ -671,7 +672,6 @@ namespace x86Emitter #include "implement/bittest.h" #include "implement/test.h" #include "implement/jmpcall.h" - #include "implement/xmm/movqss.h" } ////////////////////////////////////////////////////////////////////////////////////////// @@ -689,65 +689,6 @@ namespace x86Emitter #else static const bool AlwaysUseMovaps = false; #endif - - extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVAPS; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVUPS; - - extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> iMOVAPD; - extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> iMOVUPD; - - #ifdef ALWAYS_USE_MOVAPS - extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> iMOVDQA; - extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> iMOVDQU; - #else - extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVDQA; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVDQU; - #endif - - extern const Internal::MovhlImplAll<0, 0x16> iMOVHPS; - extern const Internal::MovhlImplAll<0, 0x12> iMOVLPS; - extern const Internal::MovhlImplAll<0x66, 0x16> iMOVHPD; - extern const Internal::MovhlImplAll<0x66, 0x12> iMOVLPD; - - extern const Internal::PLogicImplAll<0xdb> iPAND; - extern const Internal::PLogicImplAll<0xdf> iPANDN; - extern const Internal::PLogicImplAll<0xeb> iPOR; - extern const Internal::PLogicImplAll<0xef> iPXOR; - - extern const Internal::PLogicImplSSE<0x00,0x54> iANDPS; - extern const Internal::PLogicImplSSE<0x66,0x54> iANDPD; - extern const Internal::PLogicImplSSE<0x00,0x55> iANDNPS; - extern const Internal::PLogicImplSSE<0x66,0x55> iANDNPD; - extern const Internal::PLogicImplSSE<0x00,0x56> iORPS; - extern const Internal::PLogicImplSSE<0x66,0x56> iORPD; - extern const Internal::PLogicImplSSE<0x00,0x57> iXORPS; - extern const Internal::PLogicImplSSE<0x66,0x57> iXORPD; - - extern const Internal::PLogicImplSSE<0x00,0x5c> iSUBPS; - extern const Internal::PLogicImplSSE<0x66,0x5c> iSUBPD; - extern const Internal::PLogicImplSSE<0xf3,0x5c> iSUBSS; - extern const Internal::PLogicImplSSE<0xf2,0x5c> iSUBSD; - - extern const Internal::PLogicImplSSE<0x00,0x58> iADDPS; - extern const Internal::PLogicImplSSE<0x66,0x58> iADDPD; - extern const Internal::PLogicImplSSE<0xf3,0x58> iADDSS; - extern const Internal::PLogicImplSSE<0xf2,0x58> iADDSD; - - extern const Internal::PLogicImplSSE<0x00,0x59> iMULPS; - extern const Internal::PLogicImplSSE<0x66,0x59> iMULPD; - extern const Internal::PLogicImplSSE<0xf3,0x59> iMULSS; - extern const Internal::PLogicImplSSE<0xf2,0x59> iMULSD; - - extern const Internal::PLogicImplSSE<0x00,0x5e> iDIVPS; - extern const Internal::PLogicImplSSE<0x66,0x5e> iDIVPD; - extern const Internal::PLogicImplSSE<0xf3,0x5e> iDIVSS; - extern const Internal::PLogicImplSSE<0xf2,0x5e> iDIVSD; - - - - extern const Internal::PLogicImplSSE<0,0x53> iRCPPS; - extern const Internal::PLogicImplSSE<0xf3,0x53> iRCPSS; - } #include "ix86_inlines.inl" From adf6dfdcd43289bb84bc3568a70a0721c1558134 Mon Sep 17 00:00:00 2001 From: refraction Date: Mon, 20 Apr 2009 00:11:34 +0000 Subject: [PATCH 111/143] Fixed Tekken 5 regression causing black screens and missing text git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1027 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/VifDma.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index 3708bd8f14..7452378f4f 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -1010,7 +1010,7 @@ static int __fastcall Vif0TransUnpack(u32 *data) // UNPACK if(vif0Regs->offset != 0 || vif0.cl != 0) { ret = vif0.tag.size; - vif0.tag.size = VIFalign(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); + vif0.tag.size -= vif0.vifpacketsize - VIFalign(data, &vif0.tag, vif0.vifpacketsize, VIF0dmanum); ret = ret - vif0.tag.size; data += ret; if(vif0.vifpacketsize > 0) VIFunpack(data, &vif0.tag, vif0.vifpacketsize - ret, VIF0dmanum); @@ -1851,7 +1851,7 @@ static int __fastcall Vif1TransUnpack(u32 *data) 'in pieces' */ if(vif1Regs->offset != 0 || vif1.cl != 0) { - vif1.tag.size = VIFalign(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); + vif1.tag.size -= vif1.vifpacketsize - VIFalign(data, &vif1.tag, vif1.vifpacketsize, VIF1dmanum); ret = ret - vif1.tag.size; data += ret; if((vif1.vifpacketsize - ret) > 0) VIFunpack(data, &vif1.tag, vif1.vifpacketsize - ret, VIF1dmanum); From b21b81df9f9f033c2841a1beedf7ee126aa1e049 Mon Sep 17 00:00:00 2001 From: refraction Date: Mon, 20 Apr 2009 00:31:03 +0000 Subject: [PATCH 112/143] Resolved Issue 168 with FFXII crashing with DMA error due to cancelling a scratchpad transfer before it happened, how silly of me :p git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1028 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 2 +- pcsx2/Vif.cpp | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index fe57bcaed1..e1cf32efae 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -545,7 +545,7 @@ void gifMFIFOInterrupt() cpuRegs.interrupt &= ~(1 << 11); return ; } - if(spr0->chcr & 0x100) + if((spr0->chcr & 0x100) && spr0->qwc == 0) { spr0->chcr &= ~0x100; hwDmacIrq(8); diff --git a/pcsx2/Vif.cpp b/pcsx2/Vif.cpp index ccc5068b7a..0da6e70175 100644 --- a/pcsx2/Vif.cpp +++ b/pcsx2/Vif.cpp @@ -532,11 +532,13 @@ void mfifoVIF1transfer(int qwc) void vifMFIFOInterrupt() { g_vifCycles = 0; - if(spr0->chcr & 0x100) + + if((spr0->chcr & 0x100) && spr0->qwc == 0) { spr0->chcr &= ~0x100; hwDmacIrq(8); } + if (vif1.inprogress == 1) mfifo_VIF1chain(); if (vif1.irq && vif1.tag.size == 0) @@ -569,6 +571,7 @@ void vifMFIFOInterrupt() else CPU_INT(10, vif1ch->qwc * BIAS); + return; } else if (vifqwc <= 0) From 3ee59f3f4e8eb6e011cef06633dca1c344ee40eb Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Mon, 20 Apr 2009 01:40:13 +0000 Subject: [PATCH 113/143] More work on cycles. FPU is slower than assumed earlier. Also added COP0 DIE bit handling, that disables the EE's dual issue capabilities. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1029 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/R5900OpcodeTables.cpp | 8 ++++---- pcsx2/x86/ix86-32/iR5900-32.cpp | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pcsx2/R5900OpcodeTables.cpp b/pcsx2/R5900OpcodeTables.cpp index 431a6460cf..69077098bb 100644 --- a/pcsx2/R5900OpcodeTables.cpp +++ b/pcsx2/R5900OpcodeTables.cpp @@ -106,7 +106,7 @@ namespace R5900 static const int MMI_Div = 22*8; static const int MMI_Default = 14; - static const int FPU_Mult = 12; + static const int FPU_Mult = 4*8; static const int Store = 8; static const int Load = 8; @@ -432,9 +432,9 @@ namespace R5900 MakeOpcode1( MIN_S, CopDefault ); MakeOpcode1( MUL_S, FPU_Mult ); - MakeOpcode1( DIV_S, 3*8 ); - MakeOpcode1( SQRT_S, 3*8 ); - MakeOpcode1( RSQRT_S, 4*8 ); + MakeOpcode1( DIV_S, 6*8 ); + MakeOpcode1( SQRT_S, 6*8 ); + MakeOpcode1( RSQRT_S, 8*8 ); MakeOpcode1( MULA_S, FPU_Mult ); MakeOpcode1( MADD_S, FPU_Mult ); MakeOpcode1( MSUB_S, FPU_Mult ); diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 6085791b9b..d500f25c75 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1289,7 +1289,8 @@ void recompileNextInstruction(int delayslot) return; } } - s_nBlockCycles += opcode.cycles; + //If thh COP0 DIE bit is disabled, double the cycles. Happens rarely. + s_nBlockCycles += opcode.cycles * (2 - ((cpuRegs.CP0.n.Config >> 18) & 0x1)); opcode.recompile(); } From 18c4765d31001e40d07932423a33c243dcfe2a84 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Mon, 20 Apr 2009 03:10:05 +0000 Subject: [PATCH 114/143] Emitter: Changed a lot of 'i's into 'x's, because... sudonim says he likes xMOV better than iMOV as an emitter prefix. I'm wondering if I should go ahead and change it. I tend to favor logic, but everyone else just thinks it looks like iMac and iPod I just don't want to have to change it more than once. well 'x' is like the algebraic variable, which can be anything so it does kindoff make sense cuz like you have xSOMETHING, where SOMETHING is all the different emitter functions ... I'm sold. :p git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1030 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVUmicroLower.cpp | 10 +- pcsx2/x86/ix86-32/recVTLB.cpp | 32 +- pcsx2/x86/ix86/implement/bittest.h | 74 ++--- pcsx2/x86/ix86/implement/dwshift.h | 42 +-- pcsx2/x86/ix86/implement/group1.h | 106 +++--- pcsx2/x86/ix86/implement/group2.h | 28 +- pcsx2/x86/ix86/implement/group3.h | 70 ++-- pcsx2/x86/ix86/implement/incdec.h | 12 +- pcsx2/x86/ix86/implement/jmpcall.h | 16 +- pcsx2/x86/ix86/implement/movs.h | 130 ++++---- pcsx2/x86/ix86/implement/test.h | 22 +- pcsx2/x86/ix86/implement/xmm/movqss.h | 130 ++++---- pcsx2/x86/ix86/ix86.cpp | 394 +++++++++++----------- pcsx2/x86/ix86/ix86_inlines.inl | 42 +-- pcsx2/x86/ix86/ix86_instructions.h | 458 ++++++++++++-------------- pcsx2/x86/ix86/ix86_jmp.cpp | 22 +- pcsx2/x86/ix86/ix86_legacy.cpp | 192 +++++------ pcsx2/x86/ix86/ix86_legacy_mmx.cpp | 34 +- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 172 ++++------ pcsx2/x86/ix86/ix86_types.h | 229 +++++++------ 20 files changed, 1105 insertions(+), 1110 deletions(-) diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/iVUmicroLower.cpp index 0a7a73117a..f98ed4549c 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/iVUmicroLower.cpp @@ -802,12 +802,12 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) // (this is one of my test cases for the new emitter --air) using namespace x86Emitter; - iAddressReg thisreg( x86reg ); + xAddressReg thisreg( x86reg ); - if ( _X ) iMOV(ptr32[thisreg+offset], 0x00000000); - if ( _Y ) iMOV(ptr32[thisreg+offset+4], 0x00000000); - if ( _Z ) iMOV(ptr32[thisreg+offset+8], 0x00000000); - if ( _W ) iMOV(ptr32[thisreg+offset+12], 0x3f800000); + if ( _X ) xMOV(ptr32[thisreg+offset], 0x00000000); + if ( _Y ) xMOV(ptr32[thisreg+offset+4], 0x00000000); + if ( _Z ) xMOV(ptr32[thisreg+offset+8], 0x00000000); + if ( _W ) xMOV(ptr32[thisreg+offset+12], 0x3f800000); } return; } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index a518b04388..1f1960eb5d 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -34,20 +34,20 @@ void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) { // (this is one of my test cases for the new emitter --air) - iAddressReg src( srcRm ); - iAddressReg dest( destRm ); + xAddressReg src( srcRm ); + xAddressReg dest( destRm ); - iMOV( eax, ptr[src] ); - iMOV( ptr[dest], eax ); + xMOV( eax, ptr[src] ); + xMOV( ptr[dest], eax ); - iMOV( eax, ptr[src+4] ); - iMOV( ptr[dest+4], eax ); + xMOV( eax, ptr[src+4] ); + xMOV( ptr[dest+4], eax ); - iMOV( eax, ptr[src+8] ); - iMOV( ptr[dest+8], eax ); + xMOV( eax, ptr[src+8] ); + xMOV( ptr[dest+8], eax ); - iMOV( eax, ptr[src+12] ); - iMOV( ptr[dest+12], eax ); + xMOV( eax, ptr[src+12] ); + xMOV( ptr[dest+12], eax ); } /* @@ -200,10 +200,10 @@ void vtlb_DynGenRead64(u32 bits) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - iForwardJS8 _fullread; + xForwardJS8 _fullread; _vtlb_DynGen_DirectRead( bits, false ); - iForwardJump8 cont; + xForwardJump8 cont; _fullread.SetTarget(); @@ -223,10 +223,10 @@ void vtlb_DynGenRead32(u32 bits, bool sign) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - iForwardJS8 _fullread; + xForwardJS8 _fullread; _vtlb_DynGen_DirectRead( bits, sign ); - iForwardJump8 cont; + xForwardJump8 cont; _fullread.SetTarget(); _vtlb_DynGen_IndirectRead( bits ); @@ -478,10 +478,10 @@ void vtlb_DynGenWrite(u32 sz) SHR32ItoR(EAX,VTLB_PAGE_BITS); MOV32RmSOffsettoR(EAX,EAX,(int)vtlbdata.vmap,2); ADD32RtoR(ECX,EAX); - iForwardJS8 _full; + xForwardJS8 _full; _vtlb_DynGen_DirectWrite( sz ); - iForwardJump8 cont; + xForwardJump8 cont; _full.SetTarget(); _vtlb_DynGen_IndirectWrite( sz ); diff --git a/pcsx2/x86/ix86/implement/bittest.h b/pcsx2/x86/ix86/implement/bittest.h index dd3d8fcc73..5ad091667d 100644 --- a/pcsx2/x86/ix86/implement/bittest.h +++ b/pcsx2/x86/ix86/implement/bittest.h @@ -41,54 +41,54 @@ class Group8Impl protected: static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: Group8Impl() {} // For the love of GCC. // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& bitbase, const iRegister& bitoffset ) + static __emitinline void Emit( const xRegister& bitbase, const xRegister& bitoffset ) { prefix16(); - iWrite( 0x0f ); - iWrite( 0xa3 | (InstType << 2) ); + xWrite( 0x0f ); + xWrite( 0xa3 | (InstType << 2) ); ModRM_Direct( bitoffset.Id, bitbase.Id ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( void* bitbase, const iRegister& bitoffset ) + static __emitinline void Emit( void* bitbase, const xRegister& bitoffset ) { prefix16(); - iWrite( 0x0f ); - iWrite( 0xa3 | (InstType << 2) ); - iWriteDisp( bitoffset.Id, bitbase ); + xWrite( 0x0f ); + xWrite( 0xa3 | (InstType << 2) ); + xWriteDisp( bitoffset.Id, bitbase ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& bitbase, const iRegister& bitoffset ) + static __emitinline void Emit( const ModSibBase& bitbase, const xRegister& bitoffset ) { prefix16(); - iWrite( 0x0f ); - iWrite( 0xa3 | (InstType << 2) ); + xWrite( 0x0f ); + xWrite( 0xa3 | (InstType << 2) ); EmitSibMagic( bitoffset.Id, bitbase ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& bitbase, u8 immoffset ) + static __emitinline void Emit( const xRegister& bitbase, u8 immoffset ) { prefix16(); - iWrite( 0xba0f ); + xWrite( 0xba0f ); ModRM_Direct( InstType, bitbase.Id ); - iWrite( immoffset ); + xWrite( immoffset ); } // ------------------------------------------------------------------------ static __emitinline void Emit( const ModSibStrict& bitbase, u8 immoffset ) { prefix16(); - iWrite( 0xba0f ); + xWrite( 0xba0f ); EmitSibMagic( InstType, bitbase ); - iWrite( immoffset ); + xWrite( immoffset ); } }; @@ -102,12 +102,12 @@ protected: typedef Group8Impl m_16; public: - __forceinline void operator()( const iRegister32& bitbase, const iRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __forceinline void operator()( const iRegister16& bitbase, const iRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - __forceinline void operator()( void* bitbase, const iRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __forceinline void operator()( void* bitbase, const iRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - __noinline void operator()( const ModSibBase& bitbase, const iRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __noinline void operator()( const ModSibBase& bitbase, const iRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } + __forceinline void operator()( const xRegister32& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } + __forceinline void operator()( const xRegister16& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } + __forceinline void operator()( void* bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } + __forceinline void operator()( void* bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } + __noinline void operator()( const ModSibBase& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } + __noinline void operator()( const ModSibBase& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler // perspective. (using uint tends to make the compiler try and fail to match signed immediates with @@ -115,8 +115,8 @@ public: __noinline void operator()( const ModSibStrict& bitbase, u8 immoffset ) const { m_32::Emit( bitbase, immoffset ); } __noinline void operator()( const ModSibStrict& bitbase, u8 immoffset ) const { m_16::Emit( bitbase, immoffset ); } - void operator()( const iRegister& bitbase, u8 immoffset ) const { m_32::Emit( bitbase, immoffset ); } - void operator()( const iRegister& bitbase, u8 immoffset ) const { m_16::Emit( bitbase, immoffset ); } + void operator()( const xRegister& bitbase, u8 immoffset ) const { m_32::Emit( bitbase, immoffset ); } + void operator()( const xRegister& bitbase, u8 immoffset ) const { m_16::Emit( bitbase, immoffset ); } Group8ImplAll() {} }; @@ -130,33 +130,33 @@ class BitScanImpl { protected: static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } static void emitbase() { prefix16(); - iWrite( 0x0f ); - iWrite( isReverse ? 0xbd : 0xbc ); + xWrite( 0x0f ); + xWrite( isReverse ? 0xbd : 0xbc ); } public: BitScanImpl() {} // For the love of GCC. // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( const xRegister& to, const xRegister& from ) { emitbase(); ModRM_Direct( to.Id, from.Id ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const void* src ) + static __emitinline void Emit( const xRegister& to, const void* src ) { emitbase(); - iWriteDisp( to.Id, src ); + xWriteDisp( to.Id, src ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const ModSibBase& sibsrc ) + static __emitinline void Emit( const xRegister& to, const ModSibBase& sibsrc ) { emitbase(); EmitSibMagic( to.Id, sibsrc ); @@ -175,12 +175,12 @@ protected: typedef BitScanImpl m_16; public: - __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( to, from ); } - __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( to, from ); } - __forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( to, src ); } - __forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( to, src ); } - __noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } - __noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { m_32::Emit( to, from ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { m_16::Emit( to, from ); } + __forceinline void operator()( const xRegister32& to, const void* src ) const { m_32::Emit( to, src ); } + __forceinline void operator()( const xRegister16& to, const void* src ) const { m_16::Emit( to, src ); } + __noinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } + __noinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } BitScanImplAll() {} }; diff --git a/pcsx2/x86/ix86/implement/dwshift.h b/pcsx2/x86/ix86/implement/dwshift.h index 7fb64f4b7a..390577ae20 100644 --- a/pcsx2/x86/ix86/implement/dwshift.h +++ b/pcsx2/x86/ix86/implement/dwshift.h @@ -33,7 +33,7 @@ protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } static void basesibform( bool isCL ) { @@ -46,7 +46,7 @@ public: DwordShiftImpl() {} // because GCC doesn't like static classes // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( const xRegister& to, const xRegister& from ) { prefix16(); write16( 0xa50f | (isShiftRight ? 0x800 : 0) ); @@ -54,7 +54,7 @@ public: } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from, u8 imm ) + static __emitinline void Emit( const xRegister& to, const xRegister& from, u8 imm ) { if( imm == 0 ) return; prefix16(); @@ -64,14 +64,14 @@ public: } // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, __unused const iRegisterCL& clreg ) + static __emitinline void Emit( const ModSibBase& sibdest, const xRegister& from, __unused const xRegisterCL& clreg ) { basesibform(); EmitSibMagic( from.Id, sibdest ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& sibdest, const iRegister& from, u8 imm ) + static __emitinline void Emit( const ModSibBase& sibdest, const xRegister& from, u8 imm ) { basesibform(); EmitSibMagic( from.Id, sibdest ); @@ -80,18 +80,18 @@ public: // ------------------------------------------------------------------------ // dest data type is inferred from the 'from' register, so we can do void* resolution :) - static __emitinline void Emit( void* dest, const iRegister& from, __unused const iRegisterCL& clreg ) + static __emitinline void Emit( void* dest, const xRegister& from, __unused const xRegisterCL& clreg ) { basesibform(); - iWriteDisp( from.Id, dest ); + xWriteDisp( from.Id, dest ); } // ------------------------------------------------------------------------ // dest data type is inferred from the 'from' register, so we can do void* resolution :) - static __emitinline void Emit( void* dest, const iRegister& from, u8 imm ) + static __emitinline void Emit( void* dest, const xRegister& from, u8 imm ) { basesibform(); - iWriteDisp( from.Id, dest ); + xWriteDisp( from.Id, dest ); write8( imm ); } }; @@ -110,20 +110,20 @@ protected: public: // ---------- 32 Bit Interface ----------- - __forceinline void operator()( const iRegister32& to, const iRegister32& from, __unused const iRegisterCL& clreg ) const { m_32::Emit( to, from ); } - __forceinline void operator()( void* dest, const iRegister32& from, __unused const iRegisterCL& clreg ) const { m_32::Emit( dest, from ); } - __noinline void operator()( const ModSibBase& sibdest, const iRegister32& from, __unused const iRegisterCL& clreg ) const { m_32::Emit( sibdest, from ); } - __forceinline void operator()( const iRegister32& to, const iRegister32& from, u8 imm ) const { m_32::Emit( to, from, imm ); } - __forceinline void operator()( void* dest, const iRegister32& from, u8 imm ) const { m_32::Emit( dest, from, imm ); } - __noinline void operator()( const ModSibBase& sibdest, const iRegister32& from, u8 imm ) const { m_32::Emit( sibdest, from ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( to, from ); } + __forceinline void operator()( void* dest, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( dest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( sibdest, from ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from, u8 imm ) const { m_32::Emit( to, from, imm ); } + __forceinline void operator()( void* dest, const xRegister32& from, u8 imm ) const { m_32::Emit( dest, from, imm ); } + __noinline void operator()( const ModSibBase& sibdest, const xRegister32& from, u8 imm ) const { m_32::Emit( sibdest, from ); } // ---------- 16 Bit Interface ----------- - __forceinline void operator()( const iRegister16& to, const iRegister16& from, __unused const iRegisterCL& clreg ) const { m_16::Emit( to, from ); } - __forceinline void operator()( void* dest, const iRegister16& from, __unused const iRegisterCL& clreg ) const { m_16::Emit( dest, from ); } - __noinline void operator()( const ModSibBase& sibdest, const iRegister16& from, __unused const iRegisterCL& clreg ) const { m_16::Emit( sibdest, from ); } - __forceinline void operator()( const iRegister16& to, const iRegister16& from, u8 imm ) const { m_16::Emit( to, from, imm ); } - __forceinline void operator()( void* dest, const iRegister16& from, u8 imm ) const { m_16::Emit( dest, from, imm ); } - __noinline void operator()( const ModSibBase& sibdest, const iRegister16& from, u8 imm ) const { m_16::Emit( sibdest, from ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( to, from ); } + __forceinline void operator()( void* dest, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( dest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( sibdest, from ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from, u8 imm ) const { m_16::Emit( to, from, imm ); } + __forceinline void operator()( void* dest, const xRegister16& from, u8 imm ) const { m_16::Emit( dest, from, imm ); } + __noinline void operator()( const ModSibBase& sibdest, const xRegister16& from, u8 imm ) const { m_16::Emit( sibdest, from ); } DwordShiftImplAll() {} // Why does GCC need these? }; diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 8089593cc3..3979e2cff4 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -41,65 +41,65 @@ protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: Group1Impl() {} // because GCC doesn't like static classes - static __emitinline void Emit( G1Type InstType, const iRegister& to, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, const xRegister& to, const xRegister& from ) { prefix16(); - iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + xWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); ModRM_Direct( from.Id, to.Id ); } - static __emitinline void Emit( G1Type InstType, const ModSibBase& sibdest, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, const ModSibBase& sibdest, const xRegister& from ) { prefix16(); - iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + xWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); EmitSibMagic( from.Id, sibdest ); } - static __emitinline void Emit( G1Type InstType, const iRegister& to, const ModSibBase& sibsrc ) + static __emitinline void Emit( G1Type InstType, const xRegister& to, const ModSibBase& sibsrc ) { prefix16(); - iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + xWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); EmitSibMagic( to.Id, sibsrc ); } - static __emitinline void Emit( G1Type InstType, void* dest, const iRegister& from ) + static __emitinline void Emit( G1Type InstType, void* dest, const xRegister& from ) { prefix16(); - iWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); - iWriteDisp( from.Id, dest ); + xWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + xWriteDisp( from.Id, dest ); } - static __emitinline void Emit( G1Type InstType, const iRegister& to, const void* src ) + static __emitinline void Emit( G1Type InstType, const xRegister& to, const void* src ) { prefix16(); - iWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); - iWriteDisp( to.Id, src ); + xWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); + xWriteDisp( to.Id, src ); } - static __emitinline void Emit( G1Type InstType, const iRegister& to, int imm ) + static __emitinline void Emit( G1Type InstType, const xRegister& to, int imm ) { prefix16(); if( !Is8BitOperand() && is_s8( imm ) ) { - iWrite( 0x83 ); + xWrite( 0x83 ); ModRM_Direct( InstType, to.Id ); - iWrite( imm ); + xWrite( imm ); } else { if( to.IsAccumulator() ) - iWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); + xWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); else { - iWrite( Is8BitOperand() ? 0x80 : 0x81 ); + xWrite( Is8BitOperand() ? 0x80 : 0x81 ); ModRM_Direct( InstType, to.Id ); } - iWrite( imm ); + xWrite( imm ); } } @@ -107,19 +107,19 @@ public: { if( Is8BitOperand() ) { - iWrite( 0x80 ); + xWrite( 0x80 ); EmitSibMagic( InstType, sibdest ); - iWrite( imm ); + xWrite( imm ); } else { prefix16(); - iWrite( is_s8( imm ) ? 0x83 : 0x81 ); + xWrite( is_s8( imm ) ? 0x83 : 0x81 ); EmitSibMagic( InstType, sibdest ); if( is_s8( imm ) ) - iWrite( imm ); + xWrite( imm ); else - iWrite( imm ); + xWrite( imm ); } } }; @@ -132,15 +132,15 @@ class Group1ImplAll { public: template< typename T > - __forceinline void operator()( const iRegister& to, const iRegister& from ) const { Group1Impl::Emit( InstType, to, from ); } + __forceinline void operator()( const xRegister& to, const xRegister& from ) const { Group1Impl::Emit( InstType, to, from ); } template< typename T > - __forceinline void operator()( const iRegister& to, const void* src ) const { Group1Impl::Emit( InstType, to, src ); } + __forceinline void operator()( const xRegister& to, const void* src ) const { Group1Impl::Emit( InstType, to, src ); } template< typename T > - __forceinline void operator()( void* dest, const iRegister& from ) const { Group1Impl::Emit( InstType, dest, from ); } + __forceinline void operator()( void* dest, const xRegister& from ) const { Group1Impl::Emit( InstType, dest, from ); } template< typename T > - __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { Group1Impl::Emit( InstType, sibdest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const xRegister& from ) const { Group1Impl::Emit( InstType, sibdest, from ); } template< typename T > - __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { Group1Impl::Emit( InstType, to, sibsrc ); } + __noinline void operator()( const xRegister& to, const ModSibBase& sibsrc ) const { Group1Impl::Emit( InstType, to, sibsrc ); } // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler // perspective. (using uint tends to make the compiler try and fail to match signed immediates with @@ -149,38 +149,60 @@ public: template< typename T > __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { Group1Impl::Emit( InstType, sibdest, imm ); } template< typename T > - __forceinline void operator()( const iRegister& to, int imm ) const { Group1Impl::Emit( InstType, to, imm ); } + __forceinline void operator()( const xRegister& to, int imm ) const { Group1Impl::Emit( InstType, to, imm ); } Group1ImplAll() {} // Why does GCC need these? }; +// ------------------------------------------------------------------------ +// This class combines x86 with SSE/SSE2 logic operations (ADD, OR, and NOT). +// Note: ANDN [AndNot] is handled below separately. +// template< G1Type InstType, u8 OpcodeSSE > -class G1LogicImpl : public Group1ImplAll +class G1LogicImpl_PlusSSE : public Group1ImplAll { public: + using Group1ImplAll::operator(); + const SSELogicImpl<0x00,OpcodeSSE> PS; const SSELogicImpl<0x66,OpcodeSSE> PD; - G1LogicImpl() {} + G1LogicImpl_PlusSSE() {} }; +// ------------------------------------------------------------------------ +// This calss combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB). +// template< G1Type InstType, u8 OpcodeSSE > -class G1ArithmeticImpl : public G1LogicImpl +class G1ArithmeticImpl_PlusSSE : public G1LogicImpl_PlusSSE { public: + using Group1ImplAll::operator(); + const SSELogicImpl<0xf3,OpcodeSSE> SS; const SSELogicImpl<0xf2,OpcodeSSE> SD; - G1ArithmeticImpl() {} + G1ArithmeticImpl_PlusSSE() {} }; - -template< u8 OpcodeSSE > -class SSEAndNotImpl +// ------------------------------------------------------------------------ +class G1CompareImpl_PlusSSE : Group1ImplAll< G1Type_CMP > { -public: - const SSELogicImpl<0x00,OpcodeSSE> PS; - const SSELogicImpl<0x66,OpcodeSSE> PD; +protected: + template< u8 Prefix > struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + }; - SSEAndNotImpl() {} -}; \ No newline at end of file +public: + using Group1ImplAll< G1Type_CMP >::operator(); + + Woot<0x00> PS; + Woot<0x66> PD; + Woot<0xf3> SS; + Woot<0xf2> SD; + + G1CompareImpl_PlusSSE() {} //GCWhat? +}; diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index a694263008..45a5430d90 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -45,21 +45,21 @@ protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: Group2Impl() {} // For the love of GCC. // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to ) + static __emitinline void Emit( const xRegister& to ) { prefix16(); - iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + xWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); ModRM_Direct( InstType, to.Id ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, u8 imm ) + static __emitinline void Emit( const xRegister& to, u8 imm ) { if( imm == 0 ) return; @@ -67,14 +67,14 @@ public: if( imm == 1 ) { // special encoding of 1's - iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + xWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); ModRM_Direct( InstType, to.Id ); } else { - iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + xWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); ModRM_Direct( InstType, to.Id ); - iWrite( imm ); + xWrite( imm ); } } @@ -82,7 +82,7 @@ public: static __emitinline void Emit( const ModSibStrict& sibdest ) { prefix16(); - iWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); + xWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); EmitSibMagic( InstType, sibdest ); } @@ -95,14 +95,14 @@ public: if( imm == 1 ) { // special encoding of 1's - iWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); + xWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); EmitSibMagic( InstType, sibdest ); } else { - iWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); + xWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); EmitSibMagic( InstType, sibdest ); - iWrite( imm ); + xWrite( imm ); } } }; @@ -113,16 +113,16 @@ template< G2Type InstType > class Group2ImplAll { public: - template< typename T > __forceinline void operator()( const iRegister& to, __unused const iRegisterCL& from ) const + template< typename T > __forceinline void operator()( const xRegister& to, __unused const xRegisterCL& from ) const { Group2Impl::Emit( to ); } - template< typename T > __noinline void operator()( const ModSibStrict& sibdest, __unused const iRegisterCL& from ) const + template< typename T > __noinline void operator()( const ModSibStrict& sibdest, __unused const xRegisterCL& from ) const { Group2Impl::Emit( sibdest ); } template< typename T > __noinline void operator()( const ModSibStrict& sibdest, u8 imm ) const { Group2Impl::Emit( sibdest, imm ); } - template< typename T > __forceinline void operator()( const iRegister& to, u8 imm ) const + template< typename T > __forceinline void operator()( const xRegister& to, u8 imm ) const { Group2Impl::Emit( to, imm ); } Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index a614e4b09d..aae0d77652 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -31,6 +31,7 @@ enum G3Type G3Type_iDIV = 7 }; +// ------------------------------------------------------------------------ template< typename ImmType > class Group3Impl { @@ -38,22 +39,22 @@ protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: Group3Impl() {} // For the love of GCC. - static __emitinline void Emit( G3Type InstType, const iRegister& from ) + static __emitinline void Emit( G3Type InstType, const xRegister& from ) { prefix16(); - iWrite(Is8BitOperand() ? 0xf6 : 0xf7 ); + xWrite(Is8BitOperand() ? 0xf6 : 0xf7 ); ModRM_Direct( InstType, from.Id ); } static __emitinline void Emit( G3Type InstType, const ModSibStrict& sibsrc ) { prefix16(); - iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + xWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); EmitSibMagic( InstType, sibsrc ); } }; @@ -65,7 +66,7 @@ class Group3ImplAll { public: template< typename T > - __forceinline void operator()( const iRegister& from ) const { Group3Impl::Emit( InstType, from ); } + __forceinline void operator()( const xRegister& from ) const { Group3Impl::Emit( InstType, from ); } template< typename T > __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( InstType, from ); } @@ -73,7 +74,9 @@ public: Group3ImplAll() {} }; - +// ------------------------------------------------------------------------ +// This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV. +// template< G3Type InstType, u8 OpcodeSSE > class G3Impl_PlusSSE : public Group3ImplAll { @@ -94,11 +97,11 @@ class iMulImpl { protected: static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( const xRegister& to, const xRegister& from ) { prefix16(); write16( 0xaf0f ); @@ -106,15 +109,15 @@ public: } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const void* src ) + static __emitinline void Emit( const xRegister& to, const void* src ) { prefix16(); write16( 0xaf0f ); - iWriteDisp( to.Id, src ); + xWriteDisp( to.Id, src ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const ModSibBase& src ) + static __emitinline void Emit( const xRegister& to, const ModSibBase& src ) { prefix16(); write16( 0xaf0f ); @@ -122,7 +125,7 @@ public: } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from, ImmType imm ) + static __emitinline void Emit( const xRegister& to, const xRegister& from, ImmType imm ) { prefix16(); write16( is_s8( imm ) ? 0x6b : 0x69 ); @@ -130,23 +133,23 @@ public: if( is_s8( imm ) ) write8( imm ); else - iWrite( imm ); + xWrite( imm ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const void* src, ImmType imm ) + static __emitinline void Emit( const xRegister& to, const void* src, ImmType imm ) { prefix16(); write16( is_s8( imm ) ? 0x6b : 0x69 ); - iWriteDisp( to.Id, src ); + xWriteDisp( to.Id, src ); if( is_s8( imm ) ) write8( imm ); else - iWrite( imm ); + xWrite( imm ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const ModSibBase& src, ImmType imm ) + static __emitinline void Emit( const xRegister& to, const ModSibBase& src, ImmType imm ) { prefix16(); write16( is_s8( imm ) ? 0x6b : 0x69 ); @@ -154,11 +157,11 @@ public: if( is_s8( imm ) ) write8( imm ); else - iWrite( imm ); + xWrite( imm ); } }; - +// ------------------------------------------------------------------------ class iMul_PlusSSE : public G3Impl_PlusSSE { protected: @@ -166,24 +169,19 @@ protected: typedef iMulImpl iMUL16; public: + using G3Impl_PlusSSE::operator(); + + __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { iMUL32::Emit( to, from ); } + __forceinline void operator()( const xRegister32& to, const void* src ) const { iMUL32::Emit( to, src ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from, s32 imm ) const{ iMUL32::Emit( to, from, imm ); } + __noinline void operator()( const xRegister32& to, const ModSibBase& src ) const { iMUL32::Emit( to, src ); } + __noinline void operator()( const xRegister32& to, const ModSibBase& from, s32 imm ) const { iMUL32::Emit( to, from, imm ); } - template< typename T > - __forceinline void operator()( const iRegister& from ) const { Group3Impl::Emit( G3Type_iMUL, from ); } - - template< typename T > - __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( G3Type_iMUL, from ); } - - __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { iMUL32::Emit( to, from ); } - __forceinline void operator()( const iRegister32& to, const void* src ) const { iMUL32::Emit( to, src ); } - __forceinline void operator()( const iRegister32& to, const iRegister32& from, s32 imm ) const{ iMUL32::Emit( to, from, imm ); } - __noinline void operator()( const iRegister32& to, const ModSibBase& src ) const { iMUL32::Emit( to, src ); } - __noinline void operator()( const iRegister32& to, const ModSibBase& from, s32 imm ) const { iMUL32::Emit( to, from, imm ); } - - __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { iMUL16::Emit( to, from ); } - __forceinline void operator()( const iRegister16& to, const void* src ) const { iMUL16::Emit( to, src ); } - __forceinline void operator()( const iRegister16& to, const iRegister16& from, s16 imm ) const{ iMUL16::Emit( to, from, imm ); } - __noinline void operator()( const iRegister16& to, const ModSibBase& src ) const { iMUL16::Emit( to, src ); } - __noinline void operator()( const iRegister16& to, const ModSibBase& from, s16 imm ) const { iMUL16::Emit( to, from, imm ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { iMUL16::Emit( to, from ); } + __forceinline void operator()( const xRegister16& to, const void* src ) const { iMUL16::Emit( to, src ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from, s16 imm ) const{ iMUL16::Emit( to, from, imm ); } + __noinline void operator()( const xRegister16& to, const ModSibBase& src ) const { iMUL16::Emit( to, src ); } + __noinline void operator()( const xRegister16& to, const ModSibBase& from, s16 imm ) const { iMUL16::Emit( to, from, imm ); } iMul_PlusSSE() {} }; diff --git a/pcsx2/x86/ix86/implement/incdec.h b/pcsx2/x86/ix86/implement/incdec.h index 4aacb81beb..76f5a87b9a 100644 --- a/pcsx2/x86/ix86/implement/incdec.h +++ b/pcsx2/x86/ix86/implement/incdec.h @@ -21,6 +21,8 @@ // Implementations found here: Increment and Decrement Instructions! // Note: This header is meant to be included from within the x86Emitter::Internal namespace. +// ------------------------------------------------------------------------ +// template< typename ImmType > class IncDecImpl { @@ -28,12 +30,12 @@ protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: IncDecImpl() {} // For the love of GCC. - static __emitinline void Emit( bool isDec, const iRegister& to ) + static __emitinline void Emit( bool isDec, const xRegister& to ) { // There is no valid 8-bit form of direct register inc/dec, so fall // back on Mod/RM format instead: @@ -67,13 +69,13 @@ protected: typedef IncDecImpl m_8; public: - __forceinline void operator()( const iRegister32& to ) const { m_32::Emit( isDec, to ); } + __forceinline void operator()( const xRegister32& to ) const { m_32::Emit( isDec, to ); } __noinline void operator()( const ModSibStrict& sibdest ) const{ m_32::Emit( isDec, sibdest ); } - __forceinline void operator()( const iRegister16& to ) const { m_16::Emit( isDec, to ); } + __forceinline void operator()( const xRegister16& to ) const { m_16::Emit( isDec, to ); } __noinline void operator()( const ModSibStrict& sibdest ) const{ m_16::Emit( isDec, sibdest ); } - __forceinline void operator()( const iRegister8& to ) const { m_8::Emit( isDec, to ); } + __forceinline void operator()( const xRegister8& to ) const { m_8::Emit( isDec, to ); } __noinline void operator()( const ModSibStrict& sibdest ) const { m_8::Emit( isDec, sibdest ); } IncDecImplAll() {} // don't ask. diff --git a/pcsx2/x86/ix86/implement/jmpcall.h b/pcsx2/x86/ix86/implement/jmpcall.h index bf375432d5..453f2d4e14 100644 --- a/pcsx2/x86/ix86/implement/jmpcall.h +++ b/pcsx2/x86/ix86/implement/jmpcall.h @@ -27,22 +27,22 @@ class JmpCallImpl protected: static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: JmpCallImpl() {} // For the love of GCC. - static __emitinline void Emit( bool isJmp, const iRegister& absreg ) + static __emitinline void Emit( bool isJmp, const xRegister& absreg ) { prefix16(); - iWrite( 0xff ); + xWrite( 0xff ); ModRM_Direct( isJmp ? 4 : 2, absreg.Id ); } static __emitinline void Emit( bool isJmp, const ModSibStrict& src ) { prefix16(); - iWrite( 0xff ); + xWrite( 0xff ); EmitSibMagic( isJmp ? 4 : 2, src ); } }; @@ -58,10 +58,10 @@ protected: public: JmpCallImplAll() {} - __forceinline void operator()( const iRegister32& absreg ) const { m_32::Emit( isJmp, absreg ); } + __forceinline void operator()( const xRegister32& absreg ) const { m_32::Emit( isJmp, absreg ); } __forceinline void operator()( const ModSibStrict& src ) const { m_32::Emit( isJmp, src ); } - __forceinline void operator()( const iRegister16& absreg ) const { m_16::Emit( isJmp, absreg ); } + __forceinline void operator()( const xRegister16& absreg ) const { m_16::Emit( isJmp, absreg ); } __forceinline void operator()( const ModSibStrict& src ) const { m_16::Emit( isJmp, src ); } // Special form for calling functions. This form automatically resolves the @@ -77,8 +77,8 @@ public: // always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic). sptr dest = (sptr)func - ((sptr)iGetPtr() + 5); - iWrite( 0xe8 ); - iWrite( dest ); + xWrite( 0xe8 ); + xWrite( dest ); } } diff --git a/pcsx2/x86/ix86/implement/movs.h b/pcsx2/x86/ix86/implement/movs.h index de469a7c10..577df29be3 100644 --- a/pcsx2/x86/ix86/implement/movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -32,23 +32,23 @@ class MovImpl protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: MovImpl() {} // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( const xRegister& to, const xRegister& from ) { if( to == from ) return; // ignore redundant MOVs. prefix16(); - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + xWrite( Is8BitOperand() ? 0x88 : 0x89 ); ModRM_Direct( from.Id, to.Id ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& dest, const iRegister& from ) + static __emitinline void Emit( const ModSibBase& dest, const xRegister& from ) { prefix16(); @@ -57,18 +57,18 @@ public: if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() ) { - iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); - iWrite( dest.Displacement ); + xWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + xWrite( dest.Displacement ); } else { - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); + xWrite( Is8BitOperand() ? 0x88 : 0x89 ); EmitSibMagic( from.Id, dest ); } } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const ModSibBase& src ) + static __emitinline void Emit( const xRegister& to, const ModSibBase& src ) { prefix16(); @@ -77,18 +77,18 @@ public: if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() ) { - iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); - iWrite( src.Displacement ); + xWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + xWrite( src.Displacement ); } else { - iWrite( Is8BitOperand() ? 0x8a : 0x8b ); + xWrite( Is8BitOperand() ? 0x8a : 0x8b ); EmitSibMagic( to.Id, src ); } } // ------------------------------------------------------------------------ - static __emitinline void Emit( void* dest, const iRegister& from ) + static __emitinline void Emit( void* dest, const xRegister& from ) { prefix16(); @@ -96,18 +96,18 @@ public: if( from.IsAccumulator() ) { - iWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); - iWrite( (s32)dest ); + xWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + xWrite( (s32)dest ); } else { - iWrite( Is8BitOperand() ? 0x88 : 0x89 ); - iWriteDisp( from.Id, dest ); + xWrite( Is8BitOperand() ? 0x88 : 0x89 ); + xWriteDisp( from.Id, dest ); } } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const void* src ) + static __emitinline void Emit( const xRegister& to, const void* src ) { prefix16(); @@ -115,33 +115,33 @@ public: if( to.IsAccumulator() ) { - iWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); - iWrite( (s32)src ); + xWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + xWrite( (s32)src ); } else { - iWrite( Is8BitOperand() ? 0x8a : 0x8b ); - iWriteDisp( to.Id, src ); + xWrite( Is8BitOperand() ? 0x8a : 0x8b ); + xWriteDisp( to.Id, src ); } } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, ImmType imm ) + static __emitinline void Emit( const xRegister& to, ImmType imm ) { // Note: MOV does not have (reg16/32,imm8) forms. prefix16(); - iWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); - iWrite( imm ); + xWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); + xWrite( imm ); } // ------------------------------------------------------------------------ static __emitinline void Emit( ModSibStrict dest, ImmType imm ) { prefix16(); - iWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); + xWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); EmitSibMagic( 0, dest ); - iWrite( imm ); + xWrite( imm ); } }; @@ -150,15 +150,15 @@ class MovImplAll { public: template< typename T > - __forceinline void operator()( const iRegister& to, const iRegister& from ) const { MovImpl::Emit( to, from ); } + __forceinline void operator()( const xRegister& to, const xRegister& from ) const { MovImpl::Emit( to, from ); } template< typename T > - __forceinline void operator()( const iRegister& to, const void* src ) const { MovImpl::Emit( to, src ); } + __forceinline void operator()( const xRegister& to, const void* src ) const { MovImpl::Emit( to, src ); } template< typename T > - __forceinline void operator()( void* dest, const iRegister& from ) const { MovImpl::Emit( dest, from ); } + __forceinline void operator()( void* dest, const xRegister& from ) const { MovImpl::Emit( dest, from ); } template< typename T > - __noinline void operator()( const ModSibBase& sibdest, const iRegister& from ) const { MovImpl::Emit( sibdest, from ); } + __noinline void operator()( const ModSibBase& sibdest, const xRegister& from ) const { MovImpl::Emit( sibdest, from ); } template< typename T > - __noinline void operator()( const iRegister& to, const ModSibBase& sibsrc ) const { MovImpl::Emit( to, sibsrc ); } + __noinline void operator()( const xRegister& to, const ModSibBase& sibsrc ) const { MovImpl::Emit( to, sibsrc ); } template< typename T > __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { MovImpl::Emit( sibdest, imm ); } @@ -167,10 +167,10 @@ public: // the flags (namely replacing mov reg,0 with xor). template< typename T > - __emitinline void operator()( const iRegister& to, int imm, bool preserve_flags=false ) const + __emitinline void operator()( const xRegister& to, int imm, bool preserve_flags=false ) const { if( !preserve_flags && (imm == 0) ) - iXOR( to, to ); + xXOR( to, to ); else MovImpl::Emit( to, imm ); } @@ -193,7 +193,7 @@ protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } static __forceinline void emit_base( JccComparisonType cc ) { @@ -206,27 +206,27 @@ protected: public: CMovSetImpl() {} - static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const iRegister& from ) + static __emitinline void Emit( JccComparisonType cc, const xRegister& to, const xRegister& from ) { if( to == from ) return; emit_base( cc ); ModRM_Direct( to.Id, from.Id ); } - static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const void* src ) + static __emitinline void Emit( JccComparisonType cc, const xRegister& to, const void* src ) { emit_base( cc ); - iWriteDisp( to.Id, src ); + xWriteDisp( to.Id, src ); } - static __emitinline void Emit( JccComparisonType cc, const iRegister& to, const ModSibBase& sibsrc ) + static __emitinline void Emit( JccComparisonType cc, const xRegister& to, const ModSibBase& sibsrc ) { emit_base( cc ); EmitSibMagic( to.Id, sibsrc ); } // This form is provided for SETcc only (not available in CMOV) - static __emitinline void EmitSet( JccComparisonType cc, const iRegister& to ) + static __emitinline void EmitSet( JccComparisonType cc, const xRegister& to ) { emit_base( cc ); ModRM_Direct( 0, to.Id ); @@ -236,7 +236,7 @@ public: static __emitinline void EmitSet( JccComparisonType cc, const void* src ) { emit_base( cc ); - iWriteDisp( 0, src ); + xWriteDisp( 0, src ); } // This form is provided for SETcc only (not available in CMOV) @@ -258,13 +258,13 @@ protected: typedef CMovSetImpl m_16; // 0x40 is the cmov base instruction id public: - __forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); } - __forceinline void operator()( JccComparisonType ccType, const iRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); } - __noinline void operator()( JccComparisonType ccType, const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister32& to, const xRegister32& from ) const { m_32::Emit( ccType, to, from ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); } + __noinline void operator()( JccComparisonType ccType, const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); } - __forceinline void operator()( JccComparisonType ccType, const iRegister16& to, const iRegister16& from ) const { m_16::Emit( ccType, to, from ); } - __forceinline void operator()( JccComparisonType ccType, const iRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); } - __noinline void operator()( JccComparisonType ccType, const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister16& to, const xRegister16& from ) const { m_16::Emit( ccType, to, from ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); } + __noinline void operator()( JccComparisonType ccType, const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); } CMovImplGeneric() {} // don't ask. }; @@ -278,13 +278,13 @@ protected: typedef CMovSetImpl m_16; public: - __forceinline void operator()( const iRegister32& to, const iRegister32& from ) const { m_32::Emit( ccType, to, from ); } - __forceinline void operator()( const iRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); } - __noinline void operator()( const iRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { m_32::Emit( ccType, to, from ); } + __forceinline void operator()( const xRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); } + __noinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); } - __forceinline void operator()( const iRegister16& to, const iRegister16& from ) const { m_16::Emit( ccType, to, from ); } - __forceinline void operator()( const iRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); } - __noinline void operator()( const iRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { m_16::Emit( ccType, to, from ); } + __forceinline void operator()( const xRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); } + __noinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); } CMovImplAll() {} // don't ask. }; @@ -296,7 +296,7 @@ protected: typedef CMovSetImpl Impl; // 0x90 is the SETcc base instruction id public: - __forceinline void operator()( JccComparisonType cc, const iRegister8& to ) const { Impl::EmitSet( cc, to ); } + __forceinline void operator()( JccComparisonType cc, const xRegister8& to ) const { Impl::EmitSet( cc, to ); } __forceinline void operator()( JccComparisonType cc, void* dest ) const { Impl::EmitSet( cc, dest ); } __noinline void operator()( JccComparisonType cc, const ModSibStrict& dest ) const { Impl::EmitSet( cc, dest ); } @@ -311,7 +311,7 @@ protected: typedef CMovSetImpl Impl; // 0x90 is the SETcc base instruction id public: - __forceinline void operator()( const iRegister8& to ) const { Impl::EmitSet( ccType, to ); } + __forceinline void operator()( const xRegister8& to ) const { Impl::EmitSet( ccType, to ); } __forceinline void operator()( void* dest ) const { Impl::EmitSet( ccType, dest ); } __noinline void operator()( const ModSibStrict& dest ) const { Impl::EmitSet( ccType, dest ); } @@ -330,24 +330,24 @@ protected: static const uint SrcOperandSize = sizeof( SrcImmType ); static bool Is8BitOperand() { return SrcOperandSize == 1; } - static void prefix16() { if( DestOperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( DestOperandSize == 2 ) xWrite( 0x66 ); } static __forceinline void emit_base( bool SignExtend ) { prefix16(); - iWrite( 0x0f ); - iWrite( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) ); + xWrite( 0x0f ); + xWrite( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) ); } public: MovExtendImpl() {} // For the love of GCC. - static __emitinline void Emit( const iRegister& to, const iRegister& from, bool SignExtend ) + static __emitinline void Emit( const xRegister& to, const xRegister& from, bool SignExtend ) { emit_base( SignExtend ); ModRM_Direct( to.Id, from.Id ); } - static __emitinline void Emit( const iRegister& to, const ModSibStrict& sibsrc, bool SignExtend ) + static __emitinline void Emit( const xRegister& to, const ModSibStrict& sibsrc, bool SignExtend ) { emit_base( SignExtend ); EmitSibMagic( to.Id, sibsrc ); @@ -364,14 +364,14 @@ protected: typedef MovExtendImpl m_8to16; public: - __forceinline void operator()( const iRegister32& to, const iRegister16& from ) const { m_16to32::Emit( to, from, SignExtend ); } - __noinline void operator()( const iRegister32& to, const ModSibStrict& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); } + __forceinline void operator()( const xRegister32& to, const xRegister16& from ) const { m_16to32::Emit( to, from, SignExtend ); } + __noinline void operator()( const xRegister32& to, const ModSibStrict& sibsrc ) const { m_16to32::Emit( to, sibsrc, SignExtend ); } - __forceinline void operator()( const iRegister32& to, const iRegister8& from ) const { m_8to32::Emit( to, from, SignExtend ); } - __noinline void operator()( const iRegister32& to, const ModSibStrict& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); } + __forceinline void operator()( const xRegister32& to, const xRegister8& from ) const { m_8to32::Emit( to, from, SignExtend ); } + __noinline void operator()( const xRegister32& to, const ModSibStrict& sibsrc ) const { m_8to32::Emit( to, sibsrc, SignExtend ); } - __forceinline void operator()( const iRegister16& to, const iRegister8& from ) const { m_8to16::Emit( to, from, SignExtend ); } - __noinline void operator()( const iRegister16& to, const ModSibStrict& sibsrc ) const { m_8to16::Emit( to, sibsrc, SignExtend ); } + __forceinline void operator()( const xRegister16& to, const xRegister8& from ) const { m_8to16::Emit( to, from, SignExtend ); } + __noinline void operator()( const xRegister16& to, const ModSibStrict& sibsrc ) const { m_8to16::Emit( to, sibsrc, SignExtend ); } MovExtendImplAll() {} // don't ask. }; diff --git a/pcsx2/x86/ix86/implement/test.h b/pcsx2/x86/ix86/implement/test.h index 55ecdbcaf0..0c66b0203f 100644 --- a/pcsx2/x86/ix86/implement/test.h +++ b/pcsx2/x86/ix86/implement/test.h @@ -27,41 +27,41 @@ class TestImpl protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) iWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } public: TestImpl() {} // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, const iRegister& from ) + static __emitinline void Emit( const xRegister& to, const xRegister& from ) { prefix16(); - iWrite( Is8BitOperand() ? 0x84 : 0x85 ); + xWrite( Is8BitOperand() ? 0x84 : 0x85 ); ModRM_Direct( from.Id, to.Id ); } // ------------------------------------------------------------------------ - static __emitinline void Emit( const iRegister& to, ImmType imm ) + static __emitinline void Emit( const xRegister& to, ImmType imm ) { prefix16(); if( to.IsAccumulator() ) - iWrite( Is8BitOperand() ? 0xa8 : 0xa9 ); + xWrite( Is8BitOperand() ? 0xa8 : 0xa9 ); else { - iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + xWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); ModRM_Direct( 0, to.Id ); } - iWrite( imm ); + xWrite( imm ); } // ------------------------------------------------------------------------ static __emitinline void Emit( ModSibStrict dest, ImmType imm ) { prefix16(); - iWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); + xWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); EmitSibMagic( 0, dest ); - iWrite( imm ); + xWrite( imm ); } }; @@ -71,12 +71,12 @@ class TestImplAll { public: template< typename T > - __forceinline void operator()( const iRegister& to, const iRegister& from ) const { TestImpl::Emit( to, from ); } + __forceinline void operator()( const xRegister& to, const xRegister& from ) const { TestImpl::Emit( to, from ); } template< typename T > __noinline void operator()( const ModSibStrict& sibdest, T imm ) const { TestImpl::Emit( sibdest, imm ); } template< typename T > - void operator()( const iRegister& to, T imm ) const { TestImpl::Emit( to, imm ); } + void operator()( const xRegister& to, T imm ) const { TestImpl::Emit( to, imm ); } TestImplAll() {} // Why does GCC need these? }; diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 41f2b3071a..99c3cbb912 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -26,11 +26,11 @@ __emitinline void SimdPrefix( u8 opcode, u8 prefix=0 ) { if( sizeof( T ) == 16 && prefix != 0 ) { - iWrite( 0x0f00 | prefix ); - iWrite( opcode ); + xWrite( 0x0f00 | prefix ); + xWrite( opcode ); } else - iWrite( (opcode<<8) | 0x0f ); + xWrite( (opcode<<8) | 0x0f ); } // ------------------------------------------------------------------------ @@ -40,24 +40,24 @@ __emitinline void SimdPrefix( u8 opcode, u8 prefix=0 ) // instructions violate this "guideline.") // template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const iRegister& to, const iRegister& from ) +__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& to, const xRegister& from ) { SimdPrefix( opcode, prefix ); ModRM_Direct( to.Id, from.Id ); } template< typename T > -void writeXMMop( u8 prefix, u8 opcode, const iRegister& reg, const ModSibBase& sib ) +void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const ModSibBase& sib ) { SimdPrefix( opcode, prefix ); EmitSibMagic( reg.Id, sib ); } template< typename T > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const iRegister& reg, const void* data ) +__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const void* data ) { SimdPrefix( opcode, prefix ); - iWriteDisp( reg.Id, data ); + xWriteDisp( reg.Id, data ); } // ------------------------------------------------------------------------ @@ -66,51 +66,74 @@ __emitinline void writeXMMop( u8 prefix, u8 opcode, const iRegister& reg, con // some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. // template< typename T, typename T2 > -__emitinline void writeXMMop( u8 opcode, const iRegister& to, const iRegister& from ) +__emitinline void writeXMMop( u8 opcode, const xRegister& to, const xRegister& from ) { SimdPrefix( opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > -void writeXMMop( u8 opcode, const iRegister& reg, const ModSibBase& sib ) +void writeXMMop( u8 opcode, const xRegister& reg, const ModSibBase& sib ) { SimdPrefix( opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > -__emitinline void writeXMMop( u8 opcode, const iRegister& reg, const void* data ) +__emitinline void writeXMMop( u8 opcode, const xRegister& reg, const void* data ) { SimdPrefix( opcode ); - iWriteDisp( reg.Id, data ); + xWriteDisp( reg.Id, data ); } ////////////////////////////////////////////////////////////////////////////////////////// // Moves to/from high/low portions of an xmm register. // These instructions cannot be used in reg/reg form. // -template< u8 Prefix, u8 Opcode > +template< u8 Opcode > class MovhlImplAll { +protected: + template< u8 Prefix > + struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + }; + public: - __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const iRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } - __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + Woot<0x00> PS; + Woot<0x66> PD; MovhlImplAll() {} //GCC. }; +// ------------------------------------------------------------------------ +// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but +// do something kinda different! Fun! +// +template< u8 Opcode > +class MovhlImpl_RtoR +{ +public: + __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); } + __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + + MovhlImpl_RtoR() {} //GCC. +}; + +// ------------------------------------------------------------------------ template< u8 Prefix, u8 Opcode, u8 OpcodeAlt > class MovapsImplAll { public: - __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const iRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } MovapsImplAll() {} //GCC. }; @@ -124,11 +147,11 @@ class PLogicImplAll { public: template< typename T > - __forceinline void operator()( const iRegisterSIMD& to, const iRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } template< typename T > - __forceinline void operator()( const iRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } template< typename T > - __noinline void operator()( const iRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } PLogicImplAll() {} //GCWho? }; @@ -140,47 +163,42 @@ template< u8 Prefix, u8 Opcode > class SSELogicImpl { public: - __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } SSELogicImpl() {} //GCWho? }; +// ------------------------------------------------------------------------ +// +template< u8 OpcodeSSE > +class SSEAndNotImpl +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0x66,OpcodeSSE> PD; + + SSEAndNotImpl() {} +}; // ------------------------------------------------------------------------ -// For implementing SSE-only comparison operations, like CMPEQPS. -// -enum SSE2_ComparisonType -{ - SSE2_Equal = 0, - SSE2_Less, - SSE2_LessOrEqual, - SSE2_Unordered, - SSE2_NotEqual, - SSE2_NotLess, - SSE2_NotLessOrEqual, - SSE2_Ordered -}; - -template< u8 Prefix > -class SSECompareImplGeneric -{ -public: - __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } - __forceinline void operator()( const iRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } - __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( cmptype ); } - - SSECompareImplGeneric() {} //GCWhat? -}; - -template< u8 Prefix, u8 Opcode, SSE2_ComparisonType CType > +template< SSE2_ComparisonType CType > class SSECompareImpl { +protected: + template< u8 Prefix > struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + }; + public: - __forceinline void operator()( const iRegisterSSE& to, const iRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } - __forceinline void operator()( const iRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } - __noinline void operator()( const iRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); iWrite( CType ); } + Woot<0x00> PS; + Woot<0x66> PD; + Woot<0xf3> SS; + Woot<0xf2> SD; SSECompareImpl() {} //GCWhat? }; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 362d446037..2affc9c74e 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -66,54 +66,54 @@ __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; namespace x86Emitter { -const iAddressIndexerBase ptr; -const iAddressIndexer ptr128; -const iAddressIndexer ptr64; -const iAddressIndexer ptr32; -const iAddressIndexer ptr16; -const iAddressIndexer ptr8; +const xAddressIndexerBase ptr; +const xAddressIndexer ptr128; +const xAddressIndexer ptr64; +const xAddressIndexer ptr32; +const xAddressIndexer ptr16; +const xAddressIndexer ptr8; // ------------------------------------------------------------------------ -template< typename OperandType > const iRegister iRegister::Empty; -const iAddressReg iAddressReg::Empty; +template< typename OperandType > const xRegister xRegister::Empty; +const xAddressReg xAddressReg::Empty; -const iRegisterSSE +const xRegisterSSE xmm0( 0 ), xmm1( 1 ), xmm2( 2 ), xmm3( 3 ), xmm4( 4 ), xmm5( 5 ), xmm6( 6 ), xmm7( 7 ); -const iRegisterMMX +const xRegisterMMX mm0( 0 ), mm1( 1 ), mm2( 2 ), mm3( 3 ), mm4( 4 ), mm5( 5 ), mm6( 6 ), mm7( 7 ); -const iRegister32 +const xRegister32 eax( 0 ), ebx( 3 ), ecx( 1 ), edx( 2 ), esi( 6 ), edi( 7 ), ebp( 5 ), esp( 4 ); -const iRegister16 +const xRegister16 ax( 0 ), bx( 3 ), cx( 1 ), dx( 2 ), si( 6 ), di( 7 ), bp( 5 ), sp( 4 ); -const iRegister8 +const xRegister8 al( 0 ), dl( 2 ), bl( 3 ), ah( 4 ), ch( 5 ), dh( 6 ), bh( 7 ); -const iRegisterCL cl; +const xRegisterCL cl; namespace Internal { // Performance note: VC++ wants to use byte/word register form for the following - // ModRM/SibSB constructors when we use iWrite, and furthermore unrolls the + // ModRM/SibSB constructors when we use xWrite, and furthermore unrolls the // the shift using a series of ADDs for the following results: // add cl,cl // add cl,cl @@ -137,7 +137,7 @@ namespace Internal __forceinline void ModRM( uint mod, uint reg, uint rm ) { - iWrite( (mod << 6) | (reg << 3) | rm ); + xWrite( (mod << 6) | (reg << 3) | rm ); //*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; //x86Ptr++; } @@ -149,20 +149,20 @@ namespace Internal __forceinline void SibSB( u32 ss, u32 index, u32 base ) { - iWrite( (ss << 6) | (index << 3) | base ); + xWrite( (ss << 6) | (index << 3) | base ); //*(u32*)x86Ptr = (ss << 6) | (index << 3) | base; //x86Ptr++; } - __forceinline void iWriteDisp( int regfield, s32 displacement ) + __forceinline void xWriteDisp( int regfield, s32 displacement ) { ModRM( 0, regfield, ModRm_UseDisp32 ); - iWrite( displacement ); + xWrite( displacement ); } - __forceinline void iWriteDisp( int regfield, const void* address ) + __forceinline void xWriteDisp( int regfield, const void* address ) { - iWriteDisp( regfield, (s32)address ); + xWriteDisp( regfield, (s32)address ); } // ------------------------------------------------------------------------ @@ -206,7 +206,7 @@ namespace Internal if( info.Index.IsEmpty() ) { - iWriteDisp( regfield, info.Displacement ); + xWriteDisp( regfield, info.Displacement ); return; } else @@ -229,7 +229,7 @@ namespace Internal { ModRM( 0, regfield, ModRm_UseSib ); SibSB( info.Scale, info.Index.Id, ModRm_UseDisp32 ); - iWrite( info.Displacement ); + xWrite( info.Displacement ); return; } else @@ -245,116 +245,116 @@ namespace Internal if( displacement_size != 0 ) { if( displacement_size == 1 ) - iWrite( info.Displacement ); + xWrite( info.Displacement ); else - iWrite( info.Displacement ); + xWrite( info.Displacement ); } } } using namespace Internal; -const MovImplAll iMOV; -const TestImplAll iTEST; +const MovImplAll xMOV; +const TestImplAll xTEST; -const G1LogicImpl iAND; -const G1LogicImpl iOR; -const G1LogicImpl iXOR; +const G1LogicImpl_PlusSSE xAND; +const G1LogicImpl_PlusSSE xOR; +const G1LogicImpl_PlusSSE xXOR; -const G1ArithmeticImpl iADD; -const G1ArithmeticImpl iSUB; +const G1ArithmeticImpl_PlusSSE xADD; +const G1ArithmeticImpl_PlusSSE xSUB; -const Group1ImplAll iADC; -const Group1ImplAll iSBB; -const Group1ImplAll iCMP; +const Group1ImplAll xADC; +const Group1ImplAll xSBB; +const G1CompareImpl_PlusSSE xCMP; -const Group2ImplAll iROL; -const Group2ImplAll iROR; -const Group2ImplAll iRCL; -const Group2ImplAll iRCR; -const Group2ImplAll iSHL; -const Group2ImplAll iSHR; -const Group2ImplAll iSAR; +const Group2ImplAll xROL; +const Group2ImplAll xROR; +const Group2ImplAll xRCL; +const Group2ImplAll xRCR; +const Group2ImplAll xSHL; +const Group2ImplAll xSHR; +const Group2ImplAll xSAR; -const Group3ImplAll iNOT; -const Group3ImplAll iNEG; -const Group3ImplAll iUMUL; -const Group3ImplAll iUDIV; -const G3Impl_PlusSSE iDIV; -const iMul_PlusSSE iMUL; +const Group3ImplAll xNOT; +const Group3ImplAll xNEG; +const Group3ImplAll xUMUL; +const Group3ImplAll xUDIV; +const G3Impl_PlusSSE xDIV; +const iMul_PlusSSE xMUL; -const IncDecImplAll iINC; -const IncDecImplAll iDEC; +const IncDecImplAll xINC; +const IncDecImplAll xDEC; -const MovExtendImplAll iMOVZX; -const MovExtendImplAll iMOVSX; +const MovExtendImplAll xMOVZX; +const MovExtendImplAll xMOVSX; -const DwordShiftImplAll iSHLD; -const DwordShiftImplAll iSHRD; +const DwordShiftImplAll xSHLD; +const DwordShiftImplAll xSHRD; -const Group8ImplAll iBT; -const Group8ImplAll iBTR; -const Group8ImplAll iBTS; -const Group8ImplAll iBTC; +const Group8ImplAll xBT; +const Group8ImplAll xBTR; +const Group8ImplAll xBTS; +const Group8ImplAll xBTC; -const BitScanImplAll iBSF; -const BitScanImplAll iBSR; +const BitScanImplAll xBSF; +const BitScanImplAll xBSR; // ------------------------------------------------------------------------ -const CMovImplGeneric iCMOV; +const CMovImplGeneric xCMOV; -const CMovImplAll iCMOVA; -const CMovImplAll iCMOVAE; -const CMovImplAll iCMOVB; -const CMovImplAll iCMOVBE; +const CMovImplAll xCMOVA; +const CMovImplAll xCMOVAE; +const CMovImplAll xCMOVB; +const CMovImplAll xCMOVBE; -const CMovImplAll iCMOVG; -const CMovImplAll iCMOVGE; -const CMovImplAll iCMOVL; -const CMovImplAll iCMOVLE; +const CMovImplAll xCMOVG; +const CMovImplAll xCMOVGE; +const CMovImplAll xCMOVL; +const CMovImplAll xCMOVLE; -const CMovImplAll iCMOVZ; -const CMovImplAll iCMOVE; -const CMovImplAll iCMOVNZ; -const CMovImplAll iCMOVNE; +const CMovImplAll xCMOVZ; +const CMovImplAll xCMOVE; +const CMovImplAll xCMOVNZ; +const CMovImplAll xCMOVNE; -const CMovImplAll iCMOVO; -const CMovImplAll iCMOVNO; -const CMovImplAll iCMOVC; -const CMovImplAll iCMOVNC; +const CMovImplAll xCMOVO; +const CMovImplAll xCMOVNO; +const CMovImplAll xCMOVC; +const CMovImplAll xCMOVNC; -const CMovImplAll iCMOVS; -const CMovImplAll iCMOVNS; -const CMovImplAll iCMOVPE; -const CMovImplAll iCMOVPO; +const CMovImplAll xCMOVS; +const CMovImplAll xCMOVNS; +const CMovImplAll xCMOVPE; +const CMovImplAll xCMOVPO; // ------------------------------------------------------------------------ -const SetImplGeneric iSET; +const SetImplGeneric xSET; -const SetImplAll iSETA; -const SetImplAll iSETAE; -const SetImplAll iSETB; -const SetImplAll iSETBE; +const SetImplAll xSETA; +const SetImplAll xSETAE; +const SetImplAll xSETB; +const SetImplAll xSETBE; -const SetImplAll iSETG; -const SetImplAll iSETGE; -const SetImplAll iSETL; -const SetImplAll iSETLE; +const SetImplAll xSETG; +const SetImplAll xSETGE; +const SetImplAll xSETL; +const SetImplAll xSETLE; -const SetImplAll iSETZ; -const SetImplAll iSETE; -const SetImplAll iSETNZ; -const SetImplAll iSETNE; +const SetImplAll xSETZ; +const SetImplAll xSETE; +const SetImplAll xSETNZ; +const SetImplAll xSETNE; -const SetImplAll iSETO; -const SetImplAll iSETNO; -const SetImplAll iSETC; -const SetImplAll iSETNC; +const SetImplAll xSETO; +const SetImplAll xSETNO; +const SetImplAll xSETC; +const SetImplAll xSETNC; -const SetImplAll iSETS; -const SetImplAll iSETNS; -const SetImplAll iSETPE; -const SetImplAll iSETPO; +const SetImplAll xSETS; +const SetImplAll xSETNS; +const SetImplAll xSETPE; +const SetImplAll xSETPO; // ------------------------------------------------------------------------ @@ -389,7 +389,7 @@ __emitinline void iAdvancePtr( uint bytes ) { // common debugger courtesy: advance with INT3 as filler. for( uint i=0; i( 0xcc ); + xWrite( 0xcc ); } else x86Ptr += bytes; @@ -430,7 +430,7 @@ void ModSibBase::Reduce() Index = Base; Scale = 0; if( !Base.IsStackPointer() ) // prevent ESP from being encoded 'alone' - Base = iAddressReg::Empty; + Base = xAddressReg::Empty; return; } @@ -484,9 +484,9 @@ void ModSibBase::Reduce() // of LEA, which alters flags states. // template< typename OperandType > -static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool preserve_flags ) +static void EmitLeaMagic( xRegister to, const ModSibBase& src, bool preserve_flags ) { - typedef iRegister ToReg; + typedef xRegister ToReg; int displacement_size = (src.Displacement == 0) ? 0 : ( ( src.IsByteSizeDisp() ) ? 1 : 2 ); @@ -501,12 +501,12 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool if( src.Index.IsEmpty() ) { - iMOV( to, src.Displacement ); + xMOV( to, src.Displacement ); return; } else if( displacement_size == 0 ) { - iMOV( to, ToReg( src.Index.Id ) ); + xMOV( to, ToReg( src.Index.Id ) ); return; } else @@ -516,8 +516,8 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool // encode as MOV and ADD combo. Make sure to use the immediate on the // ADD since it can encode as an 8-bit sign-extended value. - iMOV( to, ToReg( src.Index.Id ) ); - iADD( to, src.Displacement ); + xMOV( to, ToReg( src.Index.Id ) ); + xADD( to, src.Displacement ); return; } else @@ -525,7 +525,7 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool // note: no need to do ebp+0 check since we encode all 0 displacements as // register assignments above (via MOV) - iWrite( 0x8d ); + xWrite( 0x8d ); ModRM( displacement_size, to.Id, src.Index.Id ); } } @@ -543,14 +543,14 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool // (this does not apply to older model P4s with the broken barrel shifter, // but we currently aren't optimizing for that target anyway). - iMOV( to, ToReg( src.Index.Id ) ); - iSHL( to, src.Scale ); + xMOV( to, ToReg( src.Index.Id ) ); + xSHL( to, src.Scale ); return; } - iWrite( 0x8d ); + xWrite( 0x8d ); ModRM( 0, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 ); - iWrite( src.Displacement ); + xWrite( src.Displacement ); return; } else @@ -562,14 +562,14 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool if( src.Index == esp ) { // ESP is not encodable as an index (ix86 ignores it), thus: - iMOV( to, ToReg( src.Base.Id ) ); // will do the trick! - if( src.Displacement ) iADD( to, src.Displacement ); + xMOV( to, ToReg( src.Base.Id ) ); // will do the trick! + if( src.Displacement ) xADD( to, src.Displacement ); return; } else if( src.Displacement == 0 ) { - iMOV( to, ToReg( src.Base.Id ) ); - iADD( to, ToReg( src.Index.Id ) ); + xMOV( to, ToReg( src.Base.Id ) ); + xADD( to, ToReg( src.Index.Id ) ); return; } } @@ -578,7 +578,7 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool // special case handling of ESP as Index, which is replaceable with // a single MOV even when preserve_flags is set! :D - iMOV( to, ToReg( src.Base.Id ) ); + xMOV( to, ToReg( src.Base.Id ) ); return; } } @@ -586,7 +586,7 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool if( src.Base == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - iWrite( 0x8d ); + xWrite( 0x8d ); ModRM( displacement_size, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, src.Base.Id ); } @@ -595,19 +595,19 @@ static void EmitLeaMagic( iRegister to, const ModSibBase& src, bool if( displacement_size != 0 ) { if( displacement_size == 1 ) - iWrite( src.Displacement ); + xWrite( src.Displacement ); else - iWrite( src.Displacement ); + xWrite( src.Displacement ); } } -__emitinline void iLEA( iRegister32 to, const ModSibBase& src, bool preserve_flags ) +__emitinline void xLEA( xRegister32 to, const ModSibBase& src, bool preserve_flags ) { EmitLeaMagic( to, src, preserve_flags ); } -__emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_flags ) +__emitinline void xLEA( xRegister16 to, const ModSibBase& src, bool preserve_flags ) { write8( 0x66 ); EmitLeaMagic( to, src, preserve_flags ); @@ -620,21 +620,21 @@ __emitinline void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_fla // Note: pushad/popad implementations are intentionally left out. The instructions are // invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead. -__emitinline void iPOP( const ModSibBase& from ) +__emitinline void xPOP( const ModSibBase& from ) { - iWrite( 0x8f ); + xWrite( 0x8f ); EmitSibMagic( 0, from ); } -__emitinline void iPUSH( const ModSibBase& from ) +__emitinline void xPUSH( const ModSibBase& from ) { - iWrite( 0xff ); + xWrite( 0xff ); EmitSibMagic( 6, from ); } ////////////////////////////////////////////////////////////////////////////////////////// // -__emitinline void iBSWAP( const iRegister32& to ) +__emitinline void xBSWAP( const xRegister32& to ) { write8( 0x0F ); write8( 0xC8 | to.Id ); @@ -645,66 +645,81 @@ __emitinline void iBSWAP( const iRegister32& to ) // MMX / XMM Instructions // (these will get put in their own file later) -const MovapsImplAll< 0, 0x28, 0x29 > iMOVAPS; -const MovapsImplAll< 0, 0x10, 0x11 > iMOVUPS; -const MovapsImplAll< 0x66, 0x28, 0x29 > iMOVAPD; -const MovapsImplAll< 0x66, 0x10, 0x11 > iMOVUPD; +const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; +const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS; +const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD; +const MovapsImplAll< 0x66, 0x10, 0x11 > xMOVUPD; #ifdef ALWAYS_USE_MOVAPS -const MovapsImplAll< 0x66, 0x6f, 0x7f > iMOVDQA; -const MovapsImplAll< 0xf3, 0x6f, 0x7f > iMOVDQU; +const MovapsImplAll< 0x66, 0x6f, 0x7f > xMOVDQA; +const MovapsImplAll< 0xf3, 0x6f, 0x7f > xMOVDQU; #else -const MovapsImplAll< 0, 0x28, 0x29 > iMOVDQA; -const MovapsImplAll< 0, 0x10, 0x11 > iMOVDQU; +const MovapsImplAll< 0, 0x28, 0x29 > xMOVDQA; +const MovapsImplAll< 0, 0x10, 0x11 > xMOVDQU; #endif -const MovhlImplAll< 0, 0x16 > iMOVHPS; -const MovhlImplAll< 0, 0x12 > iMOVLPS; -const MovhlImplAll< 0x66, 0x16 > iMOVHPD; -const MovhlImplAll< 0x66, 0x12 > iMOVLPD; +const MovhlImplAll<0x16> xMOVH; +const MovhlImplAll<0x12> xMOVL; +const MovhlImpl_RtoR<0x16> xMOVLH; +const MovhlImpl_RtoR<0x12> xMOVHL; -const PLogicImplAll<0xdb> iPAND; -const PLogicImplAll<0xdf> iPANDN; -const PLogicImplAll<0xeb> iPOR; -const PLogicImplAll<0xef> iPXOR; +const PLogicImplAll<0xdb> xPAND; +const PLogicImplAll<0xdf> xPANDN; +const PLogicImplAll<0xeb> xPOR; +const PLogicImplAll<0xef> xPXOR; -const SSEAndNotImpl<0x55> iANDN; +const SSEAndNotImpl<0x55> xANDN; // Compute Reciprocal Packed Single-Precision Floating-Point Values -const SSELogicImpl<0,0x53> iRCPPS; +const SSELogicImpl<0,0x53> xRCPPS; // Compute Reciprocal of Scalar Single-Precision Floating-Point Value -const SSELogicImpl<0xf3,0x53> iRCPSS; +const SSELogicImpl<0xf3,0x53> xRCPSS; + +// ------------------------------------------------------------------------ + +const SSECompareImpl xCMPEQ; +const SSECompareImpl xCMPLT; +const SSECompareImpl xCMPLE; +const SSECompareImpl xCMPUNORD; +const SSECompareImpl xCMPNE; +const SSECompareImpl xCMPNLT; +const SSECompareImpl xCMPNLE; +const SSECompareImpl xCMPORD; + + +////////////////////////////////////////////////////////////////////////////////////////// +// // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__forceinline void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, to, from ); } +__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, to, from ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__forceinline void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } +__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. -__forceinline void iMOVQZX( const iRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); } +__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); } // Moves lower quad of XMM to ptr64 (no bits are cleared) -__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } +__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } // Moves lower quad of XMM to ptr64 (no bits are cleared) -__forceinline void iMOVQ( void* dest, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } +__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } -__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } -__forceinline void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } -__forceinline void iMOVQ( const iRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); } -__forceinline void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } -__forceinline void iMOVQ( void* dest, const iRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } +__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } +__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } +__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); } +__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } +__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } -// This form of iMOVQ is Intel's adeptly named 'MOVQ2DQ' -__forceinline void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); } +// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ' +__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); } -// This form of iMOVQ is Intel's adeptly named 'MOVDQ2Q' -__forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ) +// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q' +__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) { // Manual implementation of this form of MOVQ, since its parameters are unique in a way // that breaks the template inference of writeXMMop(); @@ -716,53 +731,42 @@ __forceinline void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ) ////////////////////////////////////////////////////////////////////////////////////////// // -#define IMPLEMENT_iMOVS( ssd, prefix ) \ - __forceinline void iMOV##ssd( const iRegisterSSE& to, const iRegisterSSE& from ) { if( to != from ) writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const void* from ) { writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void iMOV##ssd##ZX( const iRegisterSSE& to, const ModSibBase& from ) { writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void iMOV##ssd( const void* to, const iRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } \ - __forceinline void iMOV##ssd( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } +#define IMPLEMENT_xMOVS( ssd, prefix ) \ + __forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) writeXMMop( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { writeXMMop( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { writeXMMop( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } \ + __forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } -IMPLEMENT_iMOVS( SS, 0xf3 ) -IMPLEMENT_iMOVS( SD, 0xf2 ) +IMPLEMENT_xMOVS( SS, 0xf3 ) +IMPLEMENT_xMOVS( SD, 0xf2 ) ////////////////////////////////////////////////////////////////////////////////////////// // Non-temporal movs only support a register as a target (ie, load form only, no stores) // -__forceinline void iMOVNTDQA( const iRegisterSSE& to, const void* from ) +__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from ) { - iWrite( 0x2A380f66 ); - iWriteDisp( to.Id, from ); + xWrite( 0x2A380f66 ); + xWriteDisp( to.Id, from ); } -__noinline void iMOVNTDQA( const iRegisterSSE& to, const ModSibBase& from ) +__noinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ) { - iWrite( 0x2A380f66 ); + xWrite( 0x2A380f66 ); EmitSibMagic( to.Id, from ); } -__forceinline void iMOVNTDQ( void* to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } -__noinline void iMOVNTDQA( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } +__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } +__noinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } -__forceinline void iMOVNTPD( void* to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } -__noinline void iMOVNTPD( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } -__forceinline void iMOVNTPS( void* to, const iRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } -__noinline void iMOVNTPS( const ModSibBase& to, const iRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } +__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } +__noinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } +__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } +__noinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } -__forceinline void iMOVNTQ( void* to, const iRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } -__noinline void iMOVNTQ( const ModSibBase& to, const iRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } - -////////////////////////////////////////////////////////////////////////////////////////// -// Mov Low to High / High to Low -// -// These instructions come in xmmreg,xmmreg forms only! -// - -__forceinline void iMOVLHPS( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x16, to, from ); } -__forceinline void iMOVHLPS( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x12, to, from ); } -__forceinline void iMOVLHPD( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x16, to, from ); } -__forceinline void iMOVHLPD( const iRegisterSSE& to, const iRegisterSSE& from ) { writeXMMop( 0x66, 0x12, to, from ); } +__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } +__noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } } diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl index c9af4a8e04..c2b54cfcb0 100644 --- a/pcsx2/x86/ix86/ix86_inlines.inl +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -53,29 +53,29 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // x86Register Method Implementations // - __forceinline iAddressInfo iAddressReg::operator+( const iAddressReg& right ) const + __forceinline xAddressInfo xAddressReg::operator+( const xAddressReg& right ) const { - return iAddressInfo( *this, right ); + return xAddressInfo( *this, right ); } - __forceinline iAddressInfo iAddressReg::operator+( const iAddressInfo& right ) const + __forceinline xAddressInfo xAddressReg::operator+( const xAddressInfo& right ) const { return right + *this; } - __forceinline iAddressInfo iAddressReg::operator+( s32 right ) const + __forceinline xAddressInfo xAddressReg::operator+( s32 right ) const { - return iAddressInfo( *this, right ); + return xAddressInfo( *this, right ); } - __forceinline iAddressInfo iAddressReg::operator*( u32 right ) const + __forceinline xAddressInfo xAddressReg::operator*( u32 right ) const { - return iAddressInfo( Empty, *this, right ); + return xAddressInfo( Empty, *this, right ); } - __forceinline iAddressInfo iAddressReg::operator<<( u32 shift ) const + __forceinline xAddressInfo xAddressReg::operator<<( u32 shift ) const { - return iAddressInfo( Empty, *this, 1< - iForwardJump::iForwardJump( JccComparisonType cctype ) : + xForwardJump::xForwardJump( JccComparisonType cctype ) : BasePtr( (s8*)iGetPtr() + ((OperandSize == 1) ? 2 : // j8's are always 2 bytes. ((cctype==Jcc_Unconditional) ? 5 : 6 )) // j32's are either 5 or 6 bytes @@ -184,15 +184,15 @@ namespace x86Emitter jASSUME( OperandSize == 1 || OperandSize == 4 ); if( OperandSize == 1 ) - iWrite( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) ); + xWrite( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) ); else { if( cctype == Jcc_Unconditional ) - iWrite( 0xe9 ); + xWrite( 0xe9 ); else { - iWrite( 0x0f ); - iWrite( 0x80 | cctype ); + xWrite( 0x0f ); + xWrite( 0x80 | cctype ); } } @@ -201,7 +201,7 @@ namespace x86Emitter // ------------------------------------------------------------------------ template< typename OperandType > - void iForwardJump::SetTarget() const + void xForwardJump::SetTarget() const { jASSUME( BasePtr != NULL ); diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 2b68790756..1421fba6dd 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -38,16 +38,16 @@ namespace x86Emitter // ------------------------------------------------------------------------ // Group 1 Instruction Class - extern const Internal::G1LogicImpl iAND; - extern const Internal::G1LogicImpl iOR; - extern const Internal::G1LogicImpl iXOR; - extern const Internal::SSEAndNotImpl<0x55> iANDN; + extern const Internal::G1LogicImpl_PlusSSE xAND; + extern const Internal::G1LogicImpl_PlusSSE xOR; + extern const Internal::G1LogicImpl_PlusSSE xXOR; - extern const Internal::G1ArithmeticImpl iADD; - extern const Internal::G1ArithmeticImpl iSUB; - extern const Internal::Group1ImplAll iADC; - extern const Internal::Group1ImplAll iSBB; - extern const Internal::Group1ImplAll iCMP; + extern const Internal::G1ArithmeticImpl_PlusSSE xADD; + extern const Internal::G1ArithmeticImpl_PlusSSE xSUB; + extern const Internal::G1CompareImpl_PlusSSE xCMP; + + extern const Internal::Group1ImplAll xADC; + extern const Internal::Group1ImplAll xSBB; // ------------------------------------------------------------------------ // Group 2 Instruction Class @@ -56,174 +56,154 @@ namespace x86Emitter // zero. This is a safe optimization since any zero-value shift does not affect any // flags. - extern const Internal::MovImplAll iMOV; - extern const Internal::TestImplAll iTEST; + extern const Internal::MovImplAll xMOV; + extern const Internal::TestImplAll xTEST; - extern const Internal::Group2ImplAll iROL; - extern const Internal::Group2ImplAll iROR; - extern const Internal::Group2ImplAll iRCL; - extern const Internal::Group2ImplAll iRCR; - extern const Internal::Group2ImplAll iSHL; - extern const Internal::Group2ImplAll iSHR; - extern const Internal::Group2ImplAll iSAR; + extern const Internal::Group2ImplAll xROL; + extern const Internal::Group2ImplAll xROR; + extern const Internal::Group2ImplAll xRCL; + extern const Internal::Group2ImplAll xRCR; + extern const Internal::Group2ImplAll xSHL; + extern const Internal::Group2ImplAll xSHR; + extern const Internal::Group2ImplAll xSAR; // ------------------------------------------------------------------------ // Group 3 Instruction Class - extern const Internal::Group3ImplAll iNOT; - extern const Internal::Group3ImplAll iNEG; - extern const Internal::Group3ImplAll iUMUL; - extern const Internal::Group3ImplAll iUDIV; - extern const Internal::G3Impl_PlusSSE iDIV; - extern const Internal::iMul_PlusSSE iMUL; + extern const Internal::Group3ImplAll xNOT; + extern const Internal::Group3ImplAll xNEG; + extern const Internal::Group3ImplAll xUMUL; + extern const Internal::Group3ImplAll xUDIV; + extern const Internal::G3Impl_PlusSSE xDIV; + extern const Internal::iMul_PlusSSE xMUL; - extern const Internal::IncDecImplAll iINC; - extern const Internal::IncDecImplAll iDEC; + extern const Internal::IncDecImplAll xINC; + extern const Internal::IncDecImplAll xDEC; - extern const Internal::MovExtendImplAll iMOVZX; - extern const Internal::MovExtendImplAll iMOVSX; + extern const Internal::MovExtendImplAll xMOVZX; + extern const Internal::MovExtendImplAll xMOVSX; - extern const Internal::DwordShiftImplAll iSHLD; - extern const Internal::DwordShiftImplAll iSHRD; + extern const Internal::DwordShiftImplAll xSHLD; + extern const Internal::DwordShiftImplAll xSHRD; - extern const Internal::Group8ImplAll iBT; - extern const Internal::Group8ImplAll iBTR; - extern const Internal::Group8ImplAll iBTS; - extern const Internal::Group8ImplAll iBTC; + extern const Internal::Group8ImplAll xBT; + extern const Internal::Group8ImplAll xBTR; + extern const Internal::Group8ImplAll xBTS; + extern const Internal::Group8ImplAll xBTC; - extern const Internal::JmpCallImplAll iJMP; - extern const Internal::JmpCallImplAll iCALL; + extern const Internal::JmpCallImplAll xJMP; + extern const Internal::JmpCallImplAll xCALL; - extern const Internal::BitScanImplAll iBSF; - extern const Internal::BitScanImplAll iBSR; + extern const Internal::BitScanImplAll xBSF; + extern const Internal::BitScanImplAll xBSR; // ------------------------------------------------------------------------ - extern const Internal::CMovImplGeneric iCMOV; + extern const Internal::CMovImplGeneric xCMOV; - extern const Internal::CMovImplAll iCMOVA; - extern const Internal::CMovImplAll iCMOVAE; - extern const Internal::CMovImplAll iCMOVB; - extern const Internal::CMovImplAll iCMOVBE; + extern const Internal::CMovImplAll xCMOVA; + extern const Internal::CMovImplAll xCMOVAE; + extern const Internal::CMovImplAll xCMOVB; + extern const Internal::CMovImplAll xCMOVBE; - extern const Internal::CMovImplAll iCMOVG; - extern const Internal::CMovImplAll iCMOVGE; - extern const Internal::CMovImplAll iCMOVL; - extern const Internal::CMovImplAll iCMOVLE; + extern const Internal::CMovImplAll xCMOVG; + extern const Internal::CMovImplAll xCMOVGE; + extern const Internal::CMovImplAll xCMOVL; + extern const Internal::CMovImplAll xCMOVLE; - extern const Internal::CMovImplAll iCMOVZ; - extern const Internal::CMovImplAll iCMOVE; - extern const Internal::CMovImplAll iCMOVNZ; - extern const Internal::CMovImplAll iCMOVNE; + extern const Internal::CMovImplAll xCMOVZ; + extern const Internal::CMovImplAll xCMOVE; + extern const Internal::CMovImplAll xCMOVNZ; + extern const Internal::CMovImplAll xCMOVNE; - extern const Internal::CMovImplAll iCMOVO; - extern const Internal::CMovImplAll iCMOVNO; - extern const Internal::CMovImplAll iCMOVC; - extern const Internal::CMovImplAll iCMOVNC; + extern const Internal::CMovImplAll xCMOVO; + extern const Internal::CMovImplAll xCMOVNO; + extern const Internal::CMovImplAll xCMOVC; + extern const Internal::CMovImplAll xCMOVNC; - extern const Internal::CMovImplAll iCMOVS; - extern const Internal::CMovImplAll iCMOVNS; - extern const Internal::CMovImplAll iCMOVPE; - extern const Internal::CMovImplAll iCMOVPO; + extern const Internal::CMovImplAll xCMOVS; + extern const Internal::CMovImplAll xCMOVNS; + extern const Internal::CMovImplAll xCMOVPE; + extern const Internal::CMovImplAll xCMOVPO; // ------------------------------------------------------------------------ - extern const Internal::SetImplGeneric iSET; + extern const Internal::SetImplGeneric xSET; - extern const Internal::SetImplAll iSETA; - extern const Internal::SetImplAll iSETAE; - extern const Internal::SetImplAll iSETB; - extern const Internal::SetImplAll iSETBE; + extern const Internal::SetImplAll xSETA; + extern const Internal::SetImplAll xSETAE; + extern const Internal::SetImplAll xSETB; + extern const Internal::SetImplAll xSETBE; - extern const Internal::SetImplAll iSETG; - extern const Internal::SetImplAll iSETGE; - extern const Internal::SetImplAll iSETL; - extern const Internal::SetImplAll iSETLE; + extern const Internal::SetImplAll xSETG; + extern const Internal::SetImplAll xSETGE; + extern const Internal::SetImplAll xSETL; + extern const Internal::SetImplAll xSETLE; - extern const Internal::SetImplAll iSETZ; - extern const Internal::SetImplAll iSETE; - extern const Internal::SetImplAll iSETNZ; - extern const Internal::SetImplAll iSETNE; + extern const Internal::SetImplAll xSETZ; + extern const Internal::SetImplAll xSETE; + extern const Internal::SetImplAll xSETNZ; + extern const Internal::SetImplAll xSETNE; - extern const Internal::SetImplAll iSETO; - extern const Internal::SetImplAll iSETNO; - extern const Internal::SetImplAll iSETC; - extern const Internal::SetImplAll iSETNC; + extern const Internal::SetImplAll xSETO; + extern const Internal::SetImplAll xSETNO; + extern const Internal::SetImplAll xSETC; + extern const Internal::SetImplAll xSETNC; - extern const Internal::SetImplAll iSETS; - extern const Internal::SetImplAll iSETNS; - extern const Internal::SetImplAll iSETPE; - extern const Internal::SetImplAll iSETPO; + extern const Internal::SetImplAll xSETS; + extern const Internal::SetImplAll xSETNS; + extern const Internal::SetImplAll xSETPE; + extern const Internal::SetImplAll xSETPO; ////////////////////////////////////////////////////////////////////////////////////////// // Miscellaneous Instructions // These are all defined inline or in ix86.cpp. // - extern void iBSWAP( const iRegister32& to ); + extern void xBSWAP( const xRegister32& to ); // ----- Lea Instructions (Load Effective Address) ----- // Note: alternate (void*) forms of these instructions are not provided since those // forms are functionally equivalent to Mov reg,imm, and thus better written as MOVs // instead. - extern void iLEA( iRegister32 to, const ModSibBase& src, bool preserve_flags=false ); - extern void iLEA( iRegister16 to, const ModSibBase& src, bool preserve_flags=false ); + extern void xLEA( xRegister32 to, const ModSibBase& src, bool preserve_flags=false ); + extern void xLEA( xRegister16 to, const ModSibBase& src, bool preserve_flags=false ); // ----- Push / Pop Instructions ----- // Note: pushad/popad implementations are intentionally left out. The instructions are // invalid in x64, and are super slow on x32. Use multiple Push/Pop instructions instead. - extern void iPOP( const ModSibBase& from ); - extern void iPUSH( const ModSibBase& from ); + extern void xPOP( const ModSibBase& from ); + extern void xPUSH( const ModSibBase& from ); - static __forceinline void iPOP( iRegister32 from ) { write8( 0x58 | from.Id ); } - static __forceinline void iPOP( void* from ) { iPOP( ptr[from] ); } + static __forceinline void xPOP( xRegister32 from ) { write8( 0x58 | from.Id ); } + static __forceinline void xPOP( void* from ) { xPOP( ptr[from] ); } - static __forceinline void iPUSH( u32 imm ) { write8( 0x68 ); write32( imm ); } - static __forceinline void iPUSH( iRegister32 from ) { write8( 0x50 | from.Id ); } - static __forceinline void iPUSH( void* from ) { iPUSH( ptr[from] ); } + static __forceinline void xPUSH( u32 imm ) { write8( 0x68 ); write32( imm ); } + static __forceinline void xPUSH( xRegister32 from ) { write8( 0x50 | from.Id ); } + static __forceinline void xPUSH( void* from ) { xPUSH( ptr[from] ); } // pushes the EFLAGS register onto the stack - static __forceinline void iPUSHFD() { write8( 0x9C ); } + static __forceinline void xPUSHFD() { write8( 0x9C ); } // pops the EFLAGS register from the stack - static __forceinline void iPOPFD() { write8( 0x9D ); } + static __forceinline void xPOPFD() { write8( 0x9D ); } // ----- Miscellaneous Instructions ----- // Various Instructions with no parameter and no special encoding logic. - __forceinline void iRET() { write8( 0xC3 ); } - __forceinline void iCBW() { write16( 0x9866 ); } - __forceinline void iCWD() { write8( 0x98 ); } - __forceinline void iCDQ() { write8( 0x99 ); } - __forceinline void iCWDE() { write8( 0x98 ); } + __forceinline void xRET() { write8( 0xC3 ); } + __forceinline void xCBW() { write16( 0x9866 ); } + __forceinline void xCWD() { write8( 0x98 ); } + __forceinline void xCDQ() { write8( 0x99 ); } + __forceinline void xCWDE() { write8( 0x98 ); } - __forceinline void iLAHF() { write8( 0x9f ); } - __forceinline void iSAHF() { write8( 0x9e ); } + __forceinline void xLAHF() { write8( 0x9f ); } + __forceinline void xSAHF() { write8( 0x9e ); } - __forceinline void iSTC() { write8( 0xF9 ); } - __forceinline void iCLC() { write8( 0xF8 ); } + __forceinline void xSTC() { write8( 0xF9 ); } + __forceinline void xCLC() { write8( 0xF8 ); } // NOP 1-byte - __forceinline void iNOP() { write8(0x90); } - - ////////////////////////////////////////////////////////////////////////////////////////// - // MUL / DIV instructions - - /*extern void iMUL( const iRegister32& to, const iRegister32& from ); - extern void iMUL( const iRegister32& to, const void* src ); - extern void iMUL( const iRegister32& to, const iRegister32& from, s32 imm ); - extern void iMUL( const iRegister32& to, const ModSibBase& src ); - extern void iMUL( const iRegister32& to, const ModSibBase& src, s32 imm ); - - extern void iMUL( const iRegister16& to, const iRegister16& from ); - extern void iMUL( const iRegister16& to, const void* src ); - extern void iMUL( const iRegister16& to, const iRegister16& from, s16 imm ); - extern void iMUL( const iRegister16& to, const ModSibBase& src ); - extern void iMUL( const iRegister16& to, const ModSibBase& src, s16 imm ); - - template< typename T > - __forceinline void iMUL( const iRegister& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); } - template< typename T > - __noinline void iMUL( const ModSibStrict& from ) { Internal::Group3Impl::Emit( Internal::G3Type_iMUL, from ); }*/ + __forceinline void xNOP() { write8(0x90); } ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! @@ -232,92 +212,92 @@ namespace x86Emitter #define DEFINE_FORWARD_JUMP( label, cond ) \ template< typename OperandType > \ - class iForward##label : public iForwardJump \ + class xForward##label : public xForwardJump \ { \ public: \ - iForward##label() : iForwardJump( cond ) {} \ + xForward##label() : xForwardJump( cond ) {} \ }; // ------------------------------------------------------------------------ // Note: typedefs below are defined individually in order to appease Intellisense // resolution. Including them into the class definition macro above breaks it. - typedef iForwardJump iForwardJump8; - typedef iForwardJump iForwardJump32; + typedef xForwardJump xForwardJump8; + typedef xForwardJump xForwardJump32; DEFINE_FORWARD_JUMP( JA, Jcc_Above ); DEFINE_FORWARD_JUMP( JB, Jcc_Below ); DEFINE_FORWARD_JUMP( JAE, Jcc_AboveOrEqual ); DEFINE_FORWARD_JUMP( JBE, Jcc_BelowOrEqual ); - typedef iForwardJA iForwardJA8; - typedef iForwardJA iForwardJA32; - typedef iForwardJB iForwardJB8; - typedef iForwardJB iForwardJB32; - typedef iForwardJAE iForwardJAE8; - typedef iForwardJAE iForwardJAE32; - typedef iForwardJBE iForwardJBE8; - typedef iForwardJBE iForwardJBE32; + typedef xForwardJA xForwardJA8; + typedef xForwardJA xForwardJA32; + typedef xForwardJB xForwardJB8; + typedef xForwardJB xForwardJB32; + typedef xForwardJAE xForwardJAE8; + typedef xForwardJAE xForwardJAE32; + typedef xForwardJBE xForwardJBE8; + typedef xForwardJBE xForwardJBE32; DEFINE_FORWARD_JUMP( JG, Jcc_Greater ); DEFINE_FORWARD_JUMP( JL, Jcc_Less ); DEFINE_FORWARD_JUMP( JGE, Jcc_GreaterOrEqual ); DEFINE_FORWARD_JUMP( JLE, Jcc_LessOrEqual ); - typedef iForwardJG iForwardJG8; - typedef iForwardJG iForwardJG32; - typedef iForwardJL iForwardJL8; - typedef iForwardJL iForwardJL32; - typedef iForwardJGE iForwardJGE8; - typedef iForwardJGE iForwardJGE32; - typedef iForwardJLE iForwardJLE8; - typedef iForwardJLE iForwardJLE32; + typedef xForwardJG xForwardJG8; + typedef xForwardJG xForwardJG32; + typedef xForwardJL xForwardJL8; + typedef xForwardJL xForwardJL32; + typedef xForwardJGE xForwardJGE8; + typedef xForwardJGE xForwardJGE32; + typedef xForwardJLE xForwardJLE8; + typedef xForwardJLE xForwardJLE32; DEFINE_FORWARD_JUMP( JZ, Jcc_Zero ); DEFINE_FORWARD_JUMP( JE, Jcc_Equal ); DEFINE_FORWARD_JUMP( JNZ, Jcc_NotZero ); DEFINE_FORWARD_JUMP( JNE, Jcc_NotEqual ); - typedef iForwardJZ iForwardJZ8; - typedef iForwardJZ iForwardJZ32; - typedef iForwardJE iForwardJE8; - typedef iForwardJE iForwardJE32; - typedef iForwardJNZ iForwardJNZ8; - typedef iForwardJNZ iForwardJNZ32; - typedef iForwardJNE iForwardJNE8; - typedef iForwardJNE iForwardJNE32; + typedef xForwardJZ xForwardJZ8; + typedef xForwardJZ xForwardJZ32; + typedef xForwardJE xForwardJE8; + typedef xForwardJE xForwardJE32; + typedef xForwardJNZ xForwardJNZ8; + typedef xForwardJNZ xForwardJNZ32; + typedef xForwardJNE xForwardJNE8; + typedef xForwardJNE xForwardJNE32; DEFINE_FORWARD_JUMP( JS, Jcc_Signed ); DEFINE_FORWARD_JUMP( JNS, Jcc_Unsigned ); - typedef iForwardJS iForwardJS8; - typedef iForwardJS iForwardJS32; - typedef iForwardJNS iForwardJNS8; - typedef iForwardJNS iForwardJNS32; + typedef xForwardJS xForwardJS8; + typedef xForwardJS xForwardJS32; + typedef xForwardJNS xForwardJNS8; + typedef xForwardJNS xForwardJNS32; DEFINE_FORWARD_JUMP( JO, Jcc_Overflow ); DEFINE_FORWARD_JUMP( JNO, Jcc_NotOverflow ); - typedef iForwardJO iForwardJO8; - typedef iForwardJO iForwardJO32; - typedef iForwardJNO iForwardJNO8; - typedef iForwardJNO iForwardJNO32; + typedef xForwardJO xForwardJO8; + typedef xForwardJO xForwardJO32; + typedef xForwardJNO xForwardJNO8; + typedef xForwardJNO xForwardJNO32; DEFINE_FORWARD_JUMP( JC, Jcc_Carry ); DEFINE_FORWARD_JUMP( JNC, Jcc_NotCarry ); - typedef iForwardJC iForwardJC8; - typedef iForwardJC iForwardJC32; - typedef iForwardJNC iForwardJNC8; - typedef iForwardJNC iForwardJNC32; + typedef xForwardJC xForwardJC8; + typedef xForwardJC xForwardJC32; + typedef xForwardJNC xForwardJNC8; + typedef xForwardJNC xForwardJNC32; DEFINE_FORWARD_JUMP( JPE, Jcc_ParityEven ); DEFINE_FORWARD_JUMP( JPO, Jcc_ParityOdd ); - typedef iForwardJPE iForwardJPE8; - typedef iForwardJPE iForwardJPE32; - typedef iForwardJPO iForwardJPO8; - typedef iForwardJPO iForwardJPO32; + typedef xForwardJPE xForwardJPE8; + typedef xForwardJPE xForwardJPE32; + typedef xForwardJPO xForwardJPO8; + typedef xForwardJPO xForwardJPO32; ////////////////////////////////////////////////////////////////////////////////////////// // MMX Mov Instructions (MOVD, MOVQ, MOVSS). @@ -332,53 +312,53 @@ namespace x86Emitter // MOVD has valid forms for MMX and XMM registers. // template< typename T > - __emitinline void iMOVDZX( const iRegisterSIMD& to, const iRegister32& from ) + __emitinline void xMOVDZX( const xRegisterSIMD& to, const xRegister32& from ) { Internal::writeXMMop( 0x66, 0x6e, to, from ); } template< typename T > - __emitinline void iMOVDZX( const iRegisterSIMD& to, const void* src ) + __emitinline void xMOVDZX( const xRegisterSIMD& to, const void* src ) { Internal::writeXMMop( 0x66, 0x6e, to, src ); } template< typename T > - void iMOVDZX( const iRegisterSIMD& to, const ModSibBase& src ) + void xMOVDZX( const xRegisterSIMD& to, const ModSibBase& src ) { Internal::writeXMMop( 0x66, 0x6e, to, src ); } template< typename T > - __emitinline void iMOVD( const iRegister32& to, const iRegisterSIMD& from ) + __emitinline void xMOVD( const xRegister32& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0x7e, from, to ); } template< typename T > - __emitinline void iMOVD( void* dest, const iRegisterSIMD& from ) + __emitinline void xMOVD( void* dest, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0x7e, from, dest ); } template< typename T > - void iMOVD( const ModSibBase& dest, const iRegisterSIMD& from ) + void xMOVD( const ModSibBase& dest, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0x7e, from, dest ); } // ------------------------------------------------------------------------ - // iMASKMOV: + // xMASKMOV: // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. // The default memory location is specified by DS:EDI. The most significant bit in each byte // of the mask operand determines whether the corresponding byte in the source operand is // written to the corresponding byte location in memory. template< typename T > - static __forceinline void iMASKMOV( const iRegisterSIMD& to, const iRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); } + static __forceinline void xMASKMOV( const xRegisterSIMD& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); } - // iPMOVMSKB: + // xPMOVMSKB: // Creates a mask made up of the most significant bit of each byte of the source // operand and stores the result in the low byte or word of the destination operand. // Upper bits of the destination are cleared to zero. @@ -387,93 +367,91 @@ namespace x86Emitter // 128-bit (SSE) source, the byte mask is 16-bits. // template< typename T > - static __forceinline void iPMOVMSKB( const iRegister32& to, const iRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } + static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } // ------------------------------------------------------------------------ - extern void iMOVQ( const iRegisterMMX& to, const iRegisterMMX& from ); - extern void iMOVQ( const iRegisterMMX& to, const iRegisterSSE& from ); - extern void iMOVQ( const iRegisterSSE& to, const iRegisterMMX& from ); + extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ); + extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ); + extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ); - extern void iMOVQ( void* dest, const iRegisterSSE& from ); - extern void iMOVQ( const ModSibBase& dest, const iRegisterSSE& from ); - extern void iMOVQ( void* dest, const iRegisterMMX& from ); - extern void iMOVQ( const ModSibBase& dest, const iRegisterMMX& from ); - extern void iMOVQ( const iRegisterMMX& to, const void* src ); - extern void iMOVQ( const iRegisterMMX& to, const ModSibBase& src ); + extern void xMOVQ( void* dest, const xRegisterSSE& from ); + extern void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ); + extern void xMOVQ( void* dest, const xRegisterMMX& from ); + extern void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ); + extern void xMOVQ( const xRegisterMMX& to, const void* src ); + extern void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ); - extern void iMOVQZX( const iRegisterSSE& to, const void* src ); - extern void iMOVQZX( const iRegisterSSE& to, const ModSibBase& src ); - extern void iMOVQZX( const iRegisterSSE& to, const iRegisterSSE& from ); + extern void xMOVQZX( const xRegisterSSE& to, const void* src ); + extern void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ); + extern void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ); - extern void iMOVSS( const iRegisterSSE& to, const iRegisterSSE& from ); - extern void iMOVSS( const void* to, const iRegisterSSE& from ); - extern void iMOVSS( const ModSibBase& to, const iRegisterSSE& from ); - extern void iMOVSD( const iRegisterSSE& to, const iRegisterSSE& from ); - extern void iMOVSD( const void* to, const iRegisterSSE& from ); - extern void iMOVSD( const ModSibBase& to, const iRegisterSSE& from ); + extern void xMOVSS( const xRegisterSSE& to, const xRegisterSSE& from ); + extern void xMOVSS( const void* to, const xRegisterSSE& from ); + extern void xMOVSS( const ModSibBase& to, const xRegisterSSE& from ); + extern void xMOVSD( const xRegisterSSE& to, const xRegisterSSE& from ); + extern void xMOVSD( const void* to, const xRegisterSSE& from ); + extern void xMOVSD( const ModSibBase& to, const xRegisterSSE& from ); - extern void iMOVSSZX( const iRegisterSSE& to, const void* from ); - extern void iMOVSSZX( const iRegisterSSE& to, const ModSibBase& from ); - extern void iMOVSDZX( const iRegisterSSE& to, const void* from ); - extern void iMOVSDZX( const iRegisterSSE& to, const ModSibBase& from ); + extern void xMOVSSZX( const xRegisterSSE& to, const void* from ); + extern void xMOVSSZX( const xRegisterSSE& to, const ModSibBase& from ); + extern void xMOVSDZX( const xRegisterSSE& to, const void* from ); + extern void xMOVSDZX( const xRegisterSSE& to, const ModSibBase& from ); - extern void iMOVNTDQA( const iRegisterSSE& to, const void* from ); - extern void iMOVNTDQA( const iRegisterSSE& to, const ModSibBase& from ); - extern void iMOVNTDQ( void* to, const iRegisterSSE& from ); - extern void iMOVNTDQA( const ModSibBase& to, const iRegisterSSE& from ); + extern void xMOVNTDQA( const xRegisterSSE& to, const void* from ); + extern void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ); + extern void xMOVNTDQ( void* to, const xRegisterSSE& from ); + extern void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ); - extern void iMOVNTPD( void* to, const iRegisterSSE& from ); - extern void iMOVNTPD( const ModSibBase& to, const iRegisterSSE& from ); - extern void iMOVNTPS( void* to, const iRegisterSSE& from ); - extern void iMOVNTPS( const ModSibBase& to, const iRegisterSSE& from ); - extern void iMOVNTQ( void* to, const iRegisterMMX& from ); - extern void iMOVNTQ( const ModSibBase& to, const iRegisterMMX& from ); - - extern void iMOVLHPS( const iRegisterSSE& to, const iRegisterSSE& from ); - extern void iMOVHLPS( const iRegisterSSE& to, const iRegisterSSE& from ); - extern void iMOVLHPD( const iRegisterSSE& to, const iRegisterSSE& from ); - extern void iMOVHLPD( const iRegisterSSE& to, const iRegisterSSE& from ); + extern void xMOVNTPD( void* to, const xRegisterSSE& from ); + extern void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ); + extern void xMOVNTPS( void* to, const xRegisterSSE& from ); + extern void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ); + extern void xMOVNTQ( void* to, const xRegisterMMX& from ); + extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ); ////////////////////////////////////////////////////////////////////////////////////////// // - extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVAPS; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVUPS; + extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS; + extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS; - extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> iMOVAPD; - extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> iMOVUPD; + extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> xMOVAPD; + extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> xMOVUPD; #ifdef ALWAYS_USE_MOVAPS - extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> iMOVDQA; - extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> iMOVDQU; + extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> xMOVDQA; + extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> xMOVDQU; #else - extern const Internal::MovapsImplAll<0, 0x28, 0x29> iMOVDQA; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> iMOVDQU; + extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVDQA; + extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVDQU; #endif - extern const Internal::MovhlImplAll<0, 0x16> iMOVHPS; - extern const Internal::MovhlImplAll<0, 0x12> iMOVLPS; - extern const Internal::MovhlImplAll<0x66, 0x16> iMOVHPD; - extern const Internal::MovhlImplAll<0x66, 0x12> iMOVLPD; + extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH; + extern const Internal::MovhlImpl_RtoR<0x12> xMOVHL; - extern const Internal::PLogicImplAll<0xdb> iPAND; - extern const Internal::PLogicImplAll<0xdf> iPANDN; - extern const Internal::PLogicImplAll<0xeb> iPOR; - extern const Internal::PLogicImplAll<0xef> iPXOR; + extern const Internal::MovhlImplAll<0x16> xMOVH; + extern const Internal::MovhlImplAll<0x12> xMOVL; - extern const Internal::SSELogicImpl<0,0x53> iRCPPS; - extern const Internal::SSELogicImpl<0xf3,0x53> iRCPSS; + extern const Internal::PLogicImplAll<0xdb> xPAND; + extern const Internal::PLogicImplAll<0xdf> xPANDN; + extern const Internal::PLogicImplAll<0xeb> xPOR; + extern const Internal::PLogicImplAll<0xef> xPXOR; - extern const Internal::SSECompareImplGeneric<0x00> iCMPPS; - extern const Internal::SSECompareImplGeneric<0x66> iCMPPD; - extern const Internal::SSECompareImplGeneric<0xf3> iCMPSS; - extern const Internal::SSECompareImplGeneric<0xf2> iCMPSD; - - extern const Internal::SSECompareImplGeneric<0x00> iCMPPS; - extern const Internal::SSECompareImplGeneric<0x66> iCMPPD; - extern const Internal::SSECompareImplGeneric<0xf3> iCMPSS; - extern const Internal::SSECompareImplGeneric<0xf2> iCMPSD; + extern const Internal::SSEAndNotImpl<0x55> xANDN; + extern const Internal::SSELogicImpl<0,0x53> xRCPPS; + extern const Internal::SSELogicImpl<0xf3,0x53> xRCPSS; + + // ------------------------------------------------------------------------ + + extern const Internal::SSECompareImpl xCMPEQ; + extern const Internal::SSECompareImpl xCMPLT; + extern const Internal::SSECompareImpl xCMPLE; + extern const Internal::SSECompareImpl xCMPUNORD; + extern const Internal::SSECompareImpl xCMPNE; + extern const Internal::SSECompareImpl xCMPNLT; + extern const Internal::SSECompareImpl xCMPNLE; + extern const Internal::SSECompareImpl xCMPORD; } diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp index 9189cd21da..15f3fc68f9 100644 --- a/pcsx2/x86/ix86/ix86_jmp.cpp +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -40,11 +40,11 @@ namespace x86Emitter { using namespace Internal; -const JmpCallImplAll iJMP; -const JmpCallImplAll iCALL; +const JmpCallImplAll xJMP; +const JmpCallImplAll xCALL; // ------------------------------------------------------------------------ -void iSmartJump::SetTarget() +void xSmartJump::SetTarget() { u8* target = iGetPtr(); if( m_baseptr == NULL ) return; @@ -67,7 +67,7 @@ void iSmartJump::SetTarget() } } -iSmartJump::~iSmartJump() +xSmartJump::~xSmartJump() { SetTarget(); m_baseptr = NULL; // just in case (sometimes helps in debugging too) @@ -78,7 +78,7 @@ iSmartJump::~iSmartJump() // Writes a jump at the current x86Ptr, which targets a pre-established target address. // (usually a backwards jump) // -// slideForward - used internally by iSmartJump to indicate that the jump target is going +// slideForward - used internally by xSmartJump to indicate that the jump target is going // to slide forward in the event of an 8 bit displacement. // // Using this @@ -96,21 +96,21 @@ __emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, b if( is_s8( displacement8 ) ) { - iWrite( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) ); - iWrite( displacement8 ); + xWrite( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) ); + xWrite( displacement8 ); } else { // Perform a 32 bit jump instead. :( if( comparison == Jcc_Unconditional ) - iWrite( 0xe9 ); + xWrite( 0xe9 ); else { - iWrite( 0x0f ); - iWrite( 0x80 | comparison ); + xWrite( 0x0f ); + xWrite( 0x80 | comparison ); } - iWrite( (sptr)target - ((sptr)iGetPtr() + 4) ); + xWrite( (sptr)target - ((sptr)iGetPtr() + 4) ); } } diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index a484579f88..8f07278443 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -35,9 +35,9 @@ using namespace x86Emitter; template< typename ImmType > -static __forceinline iRegister _reghlp( x86IntRegType src ) +static __forceinline xRegister _reghlp( x86IntRegType src ) { - return iRegister( src ); + return xRegister( src ); } static __forceinline ModSibBase _mrmhlp( x86IntRegType src ) @@ -48,43 +48,43 @@ static __forceinline ModSibBase _mrmhlp( x86IntRegType src ) template< typename ImmType > static __forceinline ModSibStrict _mhlp( x86IntRegType src ) { - return ModSibStrict( iAddressReg::Empty, iAddressReg(src) ); + return ModSibStrict( xAddressReg::Empty, xAddressReg(src) ); } template< typename ImmType > static __forceinline ModSibStrict _mhlp2( x86IntRegType src1, x86IntRegType src2 ) { - return ModSibStrict( iAddressReg(src2), iAddressReg(src1) ); + return ModSibStrict( xAddressReg(src2), xAddressReg(src1) ); } ////////////////////////////////////////////////////////////////////////////////////////// // #define DEFINE_LEGACY_HELPER( cod, bits ) \ - emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { i##cod( _reghlp(to), _reghlp(from) ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { i##cod( _reghlp(to), imm ); } \ - emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { i##cod( _reghlp(to), (void*)from ); } \ - emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { i##cod( (void*)to, _reghlp(from) ); } \ - emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { i##cod( ptr##bits[to], imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ - emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _reghlp(to), _mhlp(from) + offset ); } \ - emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { i##cod( _mhlp(to) + offset, _reghlp(from) ); } \ + emitterT void cod##bits##RtoR( x86IntRegType to, x86IntRegType from ) { x##cod( _reghlp(to), _reghlp(from) ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u##bits imm ) { x##cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##MtoR( x86IntRegType to, uptr from ) { x##cod( _reghlp(to), (void*)from ); } \ + emitterT void cod##bits##RtoM( uptr to, x86IntRegType from ) { x##cod( (void*)to, _reghlp(from) ); } \ + emitterT void cod##bits##ItoM( uptr to, u##bits imm ) { x##cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u##bits imm, int offset ) { x##cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##RmtoR( x86IntRegType to, x86IntRegType from, int offset ) { x##cod( _reghlp(to), _mhlp(from) + offset ); } \ + emitterT void cod##bits##RtoRm( x86IntRegType to, x86IntRegType from, int offset ) { x##cod( _mhlp(to) + offset, _reghlp(from) ); } \ emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ - { i##cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ + { x##cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ emitterT void cod##bits##RmStoR( x86IntRegType to, x86IntRegType from1, x86IntRegType from2, int offset ) \ - { i##cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } + { x##cod( _reghlp(to), _mhlp2(from1,from2) + offset ); } #define DEFINE_LEGACY_SHIFT_HELPER( cod, bits ) \ - emitterT void cod##bits##CLtoR( x86IntRegType to ) { i##cod( _reghlp(to), cl ); } \ - emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { i##cod( _reghlp(to), imm ); } \ - emitterT void cod##bits##CLtoM( uptr to ) { i##cod( ptr##bits[to], cl ); } \ - emitterT void cod##bits##ItoM( uptr to, u8 imm ) { i##cod( ptr##bits[to], imm ); } \ - emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { i##cod( _mhlp(to) + offset, imm ); } \ - emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { i##cod( _mhlp(to) + offset, cl ); } + emitterT void cod##bits##CLtoR( x86IntRegType to ) { x##cod( _reghlp(to), cl ); } \ + emitterT void cod##bits##ItoR( x86IntRegType to, u8 imm ) { x##cod( _reghlp(to), imm ); } \ + emitterT void cod##bits##CLtoM( uptr to ) { x##cod( ptr##bits[to], cl ); } \ + emitterT void cod##bits##ItoM( uptr to, u8 imm ) { x##cod( ptr##bits[to], imm ); } \ + emitterT void cod##bits##ItoRm( x86IntRegType to, u8 imm, int offset ) { x##cod( _mhlp(to) + offset, imm ); } \ + emitterT void cod##bits##CLtoRm( x86IntRegType to, int offset ) { x##cod( _mhlp(to) + offset, cl ); } #define DEFINE_LEGACY_ONEREG_HELPER( cod, bits ) \ - emitterT void cod##bits##R( x86IntRegType to ) { i##cod( _reghlp(to) ); } \ - emitterT void cod##bits##M( uptr to ) { i##cod( ptr##bits[to] ); } \ - emitterT void cod##bits##Rm( x86IntRegType to, uptr offset ) { i##cod( _mhlp(to) + offset ); } + emitterT void cod##bits##R( x86IntRegType to ) { x##cod( _reghlp(to) ); } \ + emitterT void cod##bits##M( uptr to ) { x##cod( ptr##bits[to] ); } \ + emitterT void cod##bits##Rm( x86IntRegType to, uptr offset ) { x##cod( _mhlp(to) + offset ); } //emitterT void cod##bits##RtoRmS( x86IntRegType to1, x86IntRegType to2, x86IntRegType from, int offset ) \ // { cod( _mhlp2(to1,to2) + offset, _reghlp(from) ); } \ @@ -133,9 +133,9 @@ DEFINE_OPCODE_ONEREG_LEGACY( NEG ) // ------------------------------------------------------------------------ #define DEFINE_LEGACY_MOVEXTEND( form, destbits, srcbits ) \ - emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { iMOV##form( iRegister##destbits( to ), iRegister##srcbits( from ) ); } \ - emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { iMOV##form( iRegister##destbits( to ), ptr##srcbits[iAddressReg( from ) + offset] ); } \ - emitterT void MOV##form##destbits##M##srcbits##toR( x86IntRegType to, u32 from ) { iMOV##form( iRegister##destbits( to ), ptr##srcbits[from] ); } + emitterT void MOV##form##destbits##R##srcbits##toR( x86IntRegType to, x86IntRegType from ) { xMOV##form( xRegister##destbits( to ), xRegister##srcbits( from ) ); } \ + emitterT void MOV##form##destbits##Rm##srcbits##toR( x86IntRegType to, x86IntRegType from, int offset ) { xMOV##form( xRegister##destbits( to ), ptr##srcbits[xAddressReg( from ) + offset] ); } \ + emitterT void MOV##form##destbits##M##srcbits##toR( x86IntRegType to, u32 from ) { xMOV##form( xRegister##destbits( to ), ptr##srcbits[from] ); } DEFINE_LEGACY_MOVEXTEND( SX, 32, 16 ) DEFINE_LEGACY_MOVEXTEND( ZX, 32, 16 ) @@ -145,164 +145,164 @@ DEFINE_LEGACY_MOVEXTEND( ZX, 32, 8 ) DEFINE_LEGACY_MOVEXTEND( SX, 16, 8 ) DEFINE_LEGACY_MOVEXTEND( ZX, 16, 8 ) -emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { iTEST( iRegister32(to), from ); } -emitterT void TEST32ItoM( uptr to, u32 from ) { iTEST( ptr32[to], from ); } -emitterT void TEST32RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister32(to), iRegister32(from) ); } -emitterT void TEST32ItoRm( x86IntRegType to, u32 from ) { iTEST( ptr32[iAddressReg(to)], from ); } +emitterT void TEST32ItoR( x86IntRegType to, u32 from ) { xTEST( xRegister32(to), from ); } +emitterT void TEST32ItoM( uptr to, u32 from ) { xTEST( ptr32[to], from ); } +emitterT void TEST32RtoR( x86IntRegType to, x86IntRegType from ) { xTEST( xRegister32(to), xRegister32(from) ); } +emitterT void TEST32ItoRm( x86IntRegType to, u32 from ) { xTEST( ptr32[xAddressReg(to)], from ); } -emitterT void TEST16ItoR( x86IntRegType to, u16 from ) { iTEST( iRegister16(to), from ); } -emitterT void TEST16ItoM( uptr to, u16 from ) { iTEST( ptr16[to], from ); } -emitterT void TEST16RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister16(to), iRegister16(from) ); } -emitterT void TEST16ItoRm( x86IntRegType to, u16 from ) { iTEST( ptr16[iAddressReg(to)], from ); } +emitterT void TEST16ItoR( x86IntRegType to, u16 from ) { xTEST( xRegister16(to), from ); } +emitterT void TEST16ItoM( uptr to, u16 from ) { xTEST( ptr16[to], from ); } +emitterT void TEST16RtoR( x86IntRegType to, x86IntRegType from ) { xTEST( xRegister16(to), xRegister16(from) ); } +emitterT void TEST16ItoRm( x86IntRegType to, u16 from ) { xTEST( ptr16[xAddressReg(to)], from ); } -emitterT void TEST8ItoR( x86IntRegType to, u8 from ) { iTEST( iRegister8(to), from ); } -emitterT void TEST8ItoM( uptr to, u8 from ) { iTEST( ptr8[to], from ); } -emitterT void TEST8RtoR( x86IntRegType to, x86IntRegType from ) { iTEST( iRegister8(to), iRegister8(from) ); } -emitterT void TEST8ItoRm( x86IntRegType to, u8 from ) { iTEST( ptr8[iAddressReg(to)], from ); } +emitterT void TEST8ItoR( x86IntRegType to, u8 from ) { xTEST( xRegister8(to), from ); } +emitterT void TEST8ItoM( uptr to, u8 from ) { xTEST( ptr8[to], from ); } +emitterT void TEST8RtoR( x86IntRegType to, x86IntRegType from ) { xTEST( xRegister8(to), xRegister8(from) ); } +emitterT void TEST8ItoRm( x86IntRegType to, u8 from ) { xTEST( ptr8[xAddressReg(to)], from ); } // mov r32 to [r32<(to), from ); + xAND( _reghlp(to), from ); } emitterT void AND32I8toM( uptr to, s8 from ) { - iAND( ptr8[to], from ); + xAND( ptr8[to], from ); } /* cmove r32 to r32*/ emitterT void CMOVE32RtoR( x86IntRegType to, x86IntRegType from ) { - iCMOVE( iRegister32(to), iRegister32(from) ); + xCMOVE( xRegister32(to), xRegister32(from) ); } // shld imm8 to r32 emitterT void SHLD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) { - iSHLD( iRegister32(to), iRegister32(from), shift ); + xSHLD( xRegister32(to), xRegister32(from), shift ); } // shrd imm8 to r32 emitterT void SHRD32ItoR( x86IntRegType to, x86IntRegType from, u8 shift ) { - iSHRD( iRegister32(to), iRegister32(from), shift ); + xSHRD( xRegister32(to), xRegister32(from), shift ); } /* mul eax by r32 to edx:eax */ -emitterT void MUL32R( x86IntRegType from ) { iUMUL( iRegister32(from) ); } +emitterT void MUL32R( x86IntRegType from ) { xUMUL( xRegister32(from) ); } /* imul eax by r32 to edx:eax */ -emitterT void IMUL32R( x86IntRegType from ) { iMUL( iRegister32(from) ); } +emitterT void IMUL32R( x86IntRegType from ) { xMUL( xRegister32(from) ); } /* mul eax by m32 to edx:eax */ -emitterT void MUL32M( u32 from ) { iUMUL( ptr32[from] ); } +emitterT void MUL32M( u32 from ) { xUMUL( ptr32[from] ); } /* imul eax by m32 to edx:eax */ -emitterT void IMUL32M( u32 from ) { iMUL( ptr32[from] ); } +emitterT void IMUL32M( u32 from ) { xMUL( ptr32[from] ); } /* imul r32 by r32 to r32 */ emitterT void IMUL32RtoR( x86IntRegType to, x86IntRegType from ) { - iMUL( iRegister32(to), iRegister32(from) ); + xMUL( xRegister32(to), xRegister32(from) ); } /* div eax by r32 to edx:eax */ -emitterT void DIV32R( x86IntRegType from ) { iUDIV( iRegister32(from) ); } +emitterT void DIV32R( x86IntRegType from ) { xUDIV( xRegister32(from) ); } /* idiv eax by r32 to edx:eax */ -emitterT void IDIV32R( x86IntRegType from ) { iDIV( iRegister32(from) ); } +emitterT void IDIV32R( x86IntRegType from ) { xDIV( xRegister32(from) ); } /* div eax by m32 to edx:eax */ -emitterT void DIV32M( u32 from ) { iUDIV( ptr32[from] ); } +emitterT void DIV32M( u32 from ) { xUDIV( ptr32[from] ); } /* idiv eax by m32 to edx:eax */ -emitterT void IDIV32M( u32 from ) { iDIV( ptr32[from] ); } +emitterT void IDIV32M( u32 from ) { xDIV( ptr32[from] ); } emitterT void LEA32RtoR(x86IntRegType to, x86IntRegType from, s32 offset) { - iLEA( iRegister32( to ), ptr[iAddressReg(from)+offset] ); + xLEA( xRegister32( to ), ptr[xAddressReg(from)+offset] ); } emitterT void LEA32RRtoR(x86IntRegType to, x86IntRegType from0, x86IntRegType from1) { - iLEA( iRegister32( to ), ptr[iAddressReg(from0)+iAddressReg(from1)] ); + xLEA( xRegister32( to ), ptr[xAddressReg(from0)+xAddressReg(from1)] ); } // Don't inline recursive functions emitterT void LEA32RStoR(x86IntRegType to, x86IntRegType from, u32 scale) { - iLEA( iRegister32( to ), ptr[iAddressReg(from)*(1< static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; } template< typename T > -static __forceinline void iWrite( T val ) +static __forceinline void xWrite( T val ) { *(T*)x86Ptr = val; x86Ptr += sizeof(T); @@ -159,7 +159,7 @@ namespace x86Emitter static const int ModRm_UseSib = 4; // same index value as ESP (used in RM field) static const int ModRm_UseDisp32 = 5; // same index value as EBP (used in Mod field) - class iAddressInfo; + class xAddressInfo; class ModSibBase; extern void iSetPtr( void* ptr ); @@ -170,12 +170,12 @@ namespace x86Emitter static __forceinline void write8( u8 val ) { - iWrite( val ); + xWrite( val ); } static __forceinline void write16( u16 val ) { - iWrite( val ); + xWrite( val ); } static __forceinline void write24( u32 val ) @@ -186,30 +186,30 @@ namespace x86Emitter static __forceinline void write32( u32 val ) { - iWrite( val ); + xWrite( val ); } static __forceinline void write64( u64 val ) { - iWrite( val ); + xWrite( val ); } ////////////////////////////////////////////////////////////////////////////////////////// - // iRegister - // Unless templating some fancy stuff, use the friendly iRegister32/16/8 typedefs instead. + // xRegister + // Unless templating some fancy stuff, use the friendly xRegister32/16/8 typedefs instead. // template< typename OperandType > - class iRegister + class xRegister { public: static const uint OperandSize = sizeof( OperandType ); - static const iRegister Empty; // defined as an empty/unused value (-1) + static const xRegister Empty; // defined as an empty/unused value (-1) int Id; - iRegister( const iRegister& src ) : Id( src.Id ) {} - iRegister(): Id( -1 ) {} - explicit iRegister( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + xRegister( const xRegister& src ) : Id( src.Id ) {} + xRegister(): Id( -1 ) {} + explicit xRegister( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id < 0; } @@ -219,17 +219,17 @@ namespace x86Emitter // returns true if the register is a valid MMX or XMM register. bool IsSIMD() const { return OperandSize == 8 || OperandSize == 16; } - bool operator==( const iRegister& src ) const + bool operator==( const xRegister& src ) const { return (Id == src.Id); } - bool operator!=( const iRegister& src ) const + bool operator!=( const xRegister& src ) const { return (Id != src.Id); } - iRegister& operator=( const iRegister& src ) + xRegister& operator=( const xRegister& src ) { Id = src.Id; return *this; @@ -239,20 +239,20 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // template< typename OperandType > - class iRegisterSIMD : public iRegister + class xRegisterSIMD : public xRegister { public: - static const iRegisterSIMD Empty; // defined as an empty/unused value (-1) + static const xRegisterSIMD Empty; // defined as an empty/unused value (-1) public: - iRegisterSIMD(): iRegister() {} - iRegisterSIMD( const iRegisterSIMD& src ) : iRegister( src.Id ) {} - iRegisterSIMD( const iRegister& src ) : iRegister( src ) {} - explicit iRegisterSIMD( int regId ) : iRegister( regId ) {} + xRegisterSIMD(): xRegister() {} + xRegisterSIMD( const xRegisterSIMD& src ) : xRegister( src.Id ) {} + xRegisterSIMD( const xRegister& src ) : xRegister( src ) {} + explicit xRegisterSIMD( int regId ) : xRegister( regId ) {} - iRegisterSIMD& operator=( const iRegisterSIMD& src ) + xRegisterSIMD& operator=( const xRegisterSIMD& src ) { - iRegister::Id = src.Id; + xRegister::Id = src.Id; return *this; } }; @@ -266,66 +266,66 @@ namespace x86Emitter // all about the the templated code in haphazard fashion. Yay.. >_< // - typedef iRegisterSIMD iRegisterSSE; - typedef iRegisterSIMD iRegisterMMX; - typedef iRegister iRegister32; - typedef iRegister iRegister16; - typedef iRegister iRegister8; + typedef xRegisterSIMD xRegisterSSE; + typedef xRegisterSIMD xRegisterMMX; + typedef xRegister xRegister32; + typedef xRegister xRegister16; + typedef xRegister xRegister8; - class iRegisterCL : public iRegister8 + class xRegisterCL : public xRegister8 { public: - iRegisterCL(): iRegister8( 1 ) {} + xRegisterCL(): xRegister8( 1 ) {} }; - extern const iRegisterSSE + extern const xRegisterSSE xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; - extern const iRegisterMMX + extern const xRegisterMMX mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; - extern const iRegister32 + extern const xRegister32 eax, ebx, ecx, edx, esi, edi, ebp, esp; - extern const iRegister16 + extern const xRegister16 ax, bx, cx, dx, si, di, bp, sp; - extern const iRegister8 + extern const xRegister8 al, dl, bl, ah, ch, dh, bh; - extern const iRegisterCL cl; // I'm special! + extern const xRegisterCL cl; // I'm special! ////////////////////////////////////////////////////////////////////////////////////////// // Use 32 bit registers as out index register (for ModSib memory address calculations) - // Only iAddressReg provides operators for constructing iAddressInfo types. + // Only xAddressReg provides operators for constructing xAddressInfo types. // - class iAddressReg : public iRegister32 + class xAddressReg : public xRegister32 { public: - static const iAddressReg Empty; // defined as an empty/unused value (-1) + static const xAddressReg Empty; // defined as an empty/unused value (-1) public: - iAddressReg(): iRegister32() {} - iAddressReg( const iAddressReg& src ) : iRegister32( src.Id ) {} - iAddressReg( const iRegister32& src ) : iRegister32( src ) {} - explicit iAddressReg( int regId ) : iRegister32( regId ) {} + xAddressReg(): xRegister32() {} + xAddressReg( const xAddressReg& src ) : xRegister32( src.Id ) {} + xAddressReg( const xRegister32& src ) : xRegister32( src ) {} + explicit xAddressReg( int regId ) : xRegister32( regId ) {} // Returns true if the register is the stack pointer: ESP. bool IsStackPointer() const { return Id == 4; } - iAddressInfo operator+( const iAddressReg& right ) const; - iAddressInfo operator+( const iAddressInfo& right ) const; - iAddressInfo operator+( s32 right ) const; + xAddressInfo operator+( const xAddressReg& right ) const; + xAddressInfo operator+( const xAddressInfo& right ) const; + xAddressInfo operator+( s32 right ) const; - iAddressInfo operator*( u32 factor ) const; - iAddressInfo operator<<( u32 shift ) const; + xAddressInfo operator*( u32 factor ) const; + xAddressInfo operator<<( u32 shift ) const; - iAddressReg& operator=( const iRegister32& src ) + xAddressReg& operator=( const xRegister32& src ) { Id = src.Id; return *this; @@ -334,16 +334,16 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // - class iAddressInfo + class xAddressInfo { public: - iAddressReg Base; // base register (no scale) - iAddressReg Index; // index reg gets multiplied by the scale + xAddressReg Base; // base register (no scale) + xAddressReg Index; // index reg gets multiplied by the scale int Factor; // scale applied to the index register, in factor form (not a shift!) s32 Displacement; // address displacement public: - __forceinline iAddressInfo( const iAddressReg& base, const iAddressReg& index, int factor=1, s32 displacement=0 ) : + __forceinline xAddressInfo( const xAddressReg& base, const xAddressReg& index, int factor=1, s32 displacement=0 ) : Base( base ), Index( index ), Factor( factor ), @@ -351,7 +351,7 @@ namespace x86Emitter { } - __forceinline explicit iAddressInfo( const iAddressReg& index, int displacement=0 ) : + __forceinline explicit xAddressInfo( const xAddressReg& index, int displacement=0 ) : Base(), Index( index ), Factor(0), @@ -359,7 +359,7 @@ namespace x86Emitter { } - __forceinline explicit iAddressInfo( s32 displacement ) : + __forceinline explicit xAddressInfo( s32 displacement ) : Base(), Index(), Factor(0), @@ -367,24 +367,24 @@ namespace x86Emitter { } - static iAddressInfo FromIndexReg( const iAddressReg& index, int scale=0, s32 displacement=0 ); + static xAddressInfo FromIndexReg( const xAddressReg& index, int scale=0, s32 displacement=0 ); public: bool IsByteSizeDisp() const { return is_s8( Displacement ); } - __forceinline iAddressInfo& Add( s32 imm ) + __forceinline xAddressInfo& Add( s32 imm ) { Displacement += imm; return *this; } - __forceinline iAddressInfo& Add( const iAddressReg& src ); - __forceinline iAddressInfo& Add( const iAddressInfo& src ); + __forceinline xAddressInfo& Add( const xAddressReg& src ); + __forceinline xAddressInfo& Add( const xAddressInfo& src ); - __forceinline iAddressInfo operator+( const iAddressReg& right ) const { return iAddressInfo( *this ).Add( right ); } - __forceinline iAddressInfo operator+( const iAddressInfo& right ) const { return iAddressInfo( *this ).Add( right ); } - __forceinline iAddressInfo operator+( s32 imm ) const { return iAddressInfo( *this ).Add( imm ); } - __forceinline iAddressInfo operator-( s32 imm ) const { return iAddressInfo( *this ).Add( -imm ); } + __forceinline xAddressInfo operator+( const xAddressReg& right ) const { return xAddressInfo( *this ).Add( right ); } + __forceinline xAddressInfo operator+( const xAddressInfo& right ) const { return xAddressInfo( *this ).Add( right ); } + __forceinline xAddressInfo operator+( s32 imm ) const { return xAddressInfo( *this ).Add( imm ); } + __forceinline xAddressInfo operator-( s32 imm ) const { return xAddressInfo( *this ).Add( -imm ); } }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -392,25 +392,25 @@ namespace x86Emitter // // This class serves two purposes: It houses 'reduced' ModRM/SIB info only, which means // that the Base, Index, Scale, and Displacement values are all in the correct arrange- - // ments, and it serves as a type-safe layer between the iRegister's operators (which - // generate iAddressInfo types) and the emitter's ModSib instruction forms. Without this, - // the iRegister would pass as a ModSib type implicitly, and that would cause ambiguity + // ments, and it serves as a type-safe layer between the xRegister's operators (which + // generate xAddressInfo types) and the emitter's ModSib instruction forms. Without this, + // the xRegister would pass as a ModSib type implicitly, and that would cause ambiguity // on a number of instructions. // - // End users should always use iAddressInfo instead. + // End users should always use xAddressInfo instead. // class ModSibBase { public: - iAddressReg Base; // base register (no scale) - iAddressReg Index; // index reg gets multiplied by the scale + xAddressReg Base; // base register (no scale) + xAddressReg Index; // index reg gets multiplied by the scale uint Scale; // scale applied to the index register, in scale/shift form s32 Displacement; // offset applied to the Base/Index registers. public: - explicit ModSibBase( const iAddressInfo& src ); + explicit ModSibBase( const xAddressInfo& src ); explicit ModSibBase( s32 disp ); - ModSibBase( iAddressReg base, iAddressReg index, int scale=0, s32 displacement=0 ); + ModSibBase( xAddressReg base, xAddressReg index, int scale=0, s32 displacement=0 ); bool IsByteSizeDisp() const { return is_s8( Displacement ); } @@ -437,9 +437,9 @@ namespace x86Emitter public: static const uint OperandSize = sizeof( OperandType ); - __forceinline explicit ModSibStrict( const iAddressInfo& src ) : ModSibBase( src ) {} + __forceinline explicit ModSibStrict( const xAddressInfo& src ) : ModSibBase( src ) {} __forceinline explicit ModSibStrict( s32 disp ) : ModSibBase( disp ) {} - __forceinline ModSibStrict( iAddressReg base, iAddressReg index, int scale=0, s32 displacement=0 ) : + __forceinline ModSibStrict( xAddressReg base, xAddressReg index, int scale=0, s32 displacement=0 ) : ModSibBase( base, index, scale, displacement ) {} __forceinline ModSibStrict& Add( s32 imm ) @@ -453,20 +453,20 @@ namespace x86Emitter }; ////////////////////////////////////////////////////////////////////////////////////////// - // iAddressIndexerBase - This is a static class which provisions our ptr[] syntax. + // xAddressIndexerBase - This is a static class which provisions our ptr[] syntax. // - struct iAddressIndexerBase + struct xAddressIndexerBase { // passthrough instruction, allows ModSib to pass silently through ptr translation // without doing anything and without compiler error. const ModSibBase& operator[]( const ModSibBase& src ) const { return src; } - __forceinline ModSibBase operator[]( iAddressReg src ) const + __forceinline ModSibBase operator[]( xAddressReg src ) const { - return ModSibBase( src, iAddressReg::Empty ); + return ModSibBase( src, xAddressReg::Empty ); } - __forceinline ModSibBase operator[]( const iAddressInfo& src ) const + __forceinline ModSibBase operator[]( const xAddressInfo& src ) const { return ModSibBase( src ); } @@ -481,7 +481,7 @@ namespace x86Emitter return ModSibBase( (uptr)src ); } - iAddressIndexerBase() {} // appease the GCC gods + xAddressIndexerBase() {} // appease the GCC gods }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -489,7 +489,7 @@ namespace x86Emitter // specification of the operand size for ImmToMem operations. // template< typename OperandType > - struct iAddressIndexer + struct xAddressIndexer { static const uint OperandSize = sizeof( OperandType ); @@ -497,12 +497,12 @@ namespace x86Emitter // without doing anything and without compiler error. const ModSibStrict& operator[]( const ModSibStrict& src ) const { return src; } - __forceinline ModSibStrict operator[]( iAddressReg src ) const + __forceinline ModSibStrict operator[]( xAddressReg src ) const { - return ModSibStrict( src, iAddressReg::Empty ); + return ModSibStrict( src, xAddressReg::Empty ); } - __forceinline ModSibStrict operator[]( const iAddressInfo& src ) const + __forceinline ModSibStrict operator[]( const xAddressInfo& src ) const { return ModSibStrict( src ); } @@ -517,17 +517,17 @@ namespace x86Emitter return ModSibStrict( (uptr)src ); } - iAddressIndexer() {} // GCC initialization dummy + xAddressIndexer() {} // GCC initialization dummy }; // ptr[] - use this form for instructions which can resolve the address operand size from // the other register operand sizes. - extern const iAddressIndexerBase ptr; - extern const iAddressIndexer ptr128; - extern const iAddressIndexer ptr64; - extern const iAddressIndexer ptr32; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms - extern const iAddressIndexer ptr16; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms - extern const iAddressIndexer ptr8; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms + extern const xAddressIndexerBase ptr; + extern const xAddressIndexer ptr128; + extern const xAddressIndexer ptr64; + extern const xAddressIndexer ptr32; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms + extern const xAddressIndexer ptr16; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms + extern const xAddressIndexer ptr8; // explicitly typed addressing, usually needed for '[dest],imm' instruction forms ////////////////////////////////////////////////////////////////////////////////////////// // JccComparisonType - enumerated possibilities for inspired code branching! @@ -561,26 +561,41 @@ namespace x86Emitter // Not supported yet: //E3 cb JECXZ rel8 Jump short if ECX register is 0. + ////////////////////////////////////////////////////////////////////////////////////////// + // SSE2_ComparisonType - enumerated possibilities for SIMD data comparison! + // + enum SSE2_ComparisonType + { + SSE2_Equal = 0, + SSE2_Less, + SSE2_LessOrEqual, + SSE2_Unordered, + SSE2_NotEqual, + SSE2_NotLess, + SSE2_NotLessOrEqual, + SSE2_Ordered + }; + ////////////////////////////////////////////////////////////////////////////////////////// - // iSmartJump + // xSmartJump // This class provides an interface for generating forward-based j8's or j32's "smartly" // as per the measured displacement distance. If the displacement is a valid s8, then // a j8 is inserted, else a j32. // - // Note: This class is inherently unsafe, and so it's recommended to use iForwardJump8/32 + // Note: This class is inherently unsafe, and so it's recommended to use xForwardJump8/32 // whenever it is known that the jump destination is (or is not) short. Only use - // iSmartJump in cases where it's unknown what jump encoding will be ideal. + // xSmartJump in cases where it's unknown what jump encoding will be ideal. // - // Important: Use this tool with caution! iSmartJump cannot be used in cases where jump + // Important: Use this tool with caution! xSmartJump cannot be used in cases where jump // targets overlap, since the writeback of the second target will alter the position of // the first target (which breaks the relative addressing). To assist in avoiding such - // errors, iSmartJump works based on C++ block scope, where the destruction of the - // iSmartJump object (invoked by a '}') signals the target of the jump. Example: + // errors, xSmartJump works based on C++ block scope, where the destruction of the + // xSmartJump object (invoked by a '}') signals the target of the jump. Example: // // { // iCMP( EAX, ECX ); - // iSmartJump jumpTo( Jcc_Above ); + // xSmartJump jumpTo( Jcc_Above ); // [... conditional code ...] // } // smartjump targets this spot. // @@ -593,7 +608,7 @@ namespace x86Emitter // speed benefits in the form of L1/L2 cache clutter, on any CPU. They're also notably // faster on P4's, and mildly faster on AMDs. (Core2's and i7's don't care) // - class iSmartJump : public NoncopyableObject + class xSmartJump : public NoncopyableObject { protected: u8* m_baseptr; // base address of the instruction (passed to the instruction emitter) @@ -607,12 +622,12 @@ namespace x86Emitter } JccComparisonType GetCondition() const { return m_cc; } - virtual ~iSmartJump(); + virtual ~xSmartJump(); // ------------------------------------------------------------------------ // ccType - Comparison type to be written back to the jump instruction position. // - iSmartJump( JccComparisonType ccType ) + xSmartJump( JccComparisonType ccType ) { jASSUME( ccType != Jcc_Unknown ); m_baseptr = iGetPtr(); @@ -625,12 +640,12 @@ namespace x86Emitter }; ////////////////////////////////////////////////////////////////////////////////////////// - // iForwardJump - // Primary use of this class is through the various iForwardJA8/iForwardJLE32/etc. helpers + // xForwardJump + // Primary use of this class is through the various xForwardJA8/xForwardJLE32/etc. helpers // defined later in this header. :) // template< typename OperandType > - class iForwardJump + class xForwardJump { public: static const uint OperandSize = sizeof( OperandType ); @@ -641,7 +656,7 @@ namespace x86Emitter // The jump instruction is emitted at the point of object construction. The conditional // type must be valid (Jcc_Unknown generates an assertion). - iForwardJump( JccComparisonType cctype = Jcc_Unconditional ); + xForwardJump( JccComparisonType cctype = Jcc_Unconditional ); // Sets the jump target by writing back the current x86Ptr to the jump instruction. // This method can be called multiple times, re-writing the jump instruction's target @@ -656,8 +671,8 @@ namespace x86Emitter extern void ModRM( uint mod, uint reg, uint rm ); extern void ModRM_Direct( uint reg, uint rm ); extern void SibSB( u32 ss, u32 index, u32 base ); - extern void iWriteDisp( int regfield, s32 displacement ); - extern void iWriteDisp( int regfield, const void* address ); + extern void xWriteDisp( int regfield, s32 displacement ); + extern void xWriteDisp( int regfield, const void* address ); extern void EmitSibMagic( uint regfield, const ModSibBase& info ); From cc48702b17db423fd12b119282f6992a7ba73c0a Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Mon, 20 Apr 2009 15:22:02 +0000 Subject: [PATCH 115/143] Emitter: Implemented SQRT/RSQRT/UCOM and all variations of SSE CVTs (omg those were a nightmare) [also patched old emitter to use new MOVD implementations -- missed those earlier]. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1031 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/group1.h | 21 +- pcsx2/x86/ix86/implement/jmpcall.h | 2 +- pcsx2/x86/ix86/implement/xmm/movqss.h | 122 ++++++-- pcsx2/x86/ix86/ix86.cpp | 64 +++- pcsx2/x86/ix86/ix86_instructions.h | 60 +++- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 412 ++++++++------------------ pcsx2/x86/ix86/ix86_types.h | 32 +- 7 files changed, 355 insertions(+), 358 deletions(-) diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 3979e2cff4..0fb4640e5b 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -164,8 +164,8 @@ class G1LogicImpl_PlusSSE : public Group1ImplAll public: using Group1ImplAll::operator(); - const SSELogicImpl<0x00,OpcodeSSE> PS; - const SSELogicImpl<0x66,OpcodeSSE> PD; + const SSELogicImpl<0x00,OpcodeSSE> PS; // packed single precision + const SSELogicImpl<0x66,OpcodeSSE> PD; // packed double precision G1LogicImpl_PlusSSE() {} }; @@ -179,8 +179,8 @@ class G1ArithmeticImpl_PlusSSE : public G1LogicImpl_PlusSSE::operator(); - const SSELogicImpl<0xf3,OpcodeSSE> SS; - const SSELogicImpl<0xf2,OpcodeSSE> SD; + const SSELogicImpl<0xf3,OpcodeSSE> SS; // scalar single precision + const SSELogicImpl<0xf2,OpcodeSSE> SD; // scalar double precision G1ArithmeticImpl_PlusSSE() {} }; @@ -191,18 +191,19 @@ class G1CompareImpl_PlusSSE : Group1ImplAll< G1Type_CMP > protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + Woot() {} }; public: using Group1ImplAll< G1Type_CMP >::operator(); - Woot<0x00> PS; - Woot<0x66> PD; - Woot<0xf3> SS; - Woot<0xf2> SD; + const Woot<0x00> PS; + const Woot<0x66> PD; + const Woot<0xf3> SS; + const Woot<0xf2> SD; G1CompareImpl_PlusSSE() {} //GCWhat? }; diff --git a/pcsx2/x86/ix86/implement/jmpcall.h b/pcsx2/x86/ix86/implement/jmpcall.h index 453f2d4e14..7858b07a92 100644 --- a/pcsx2/x86/ix86/implement/jmpcall.h +++ b/pcsx2/x86/ix86/implement/jmpcall.h @@ -70,7 +70,7 @@ public: __forceinline void operator()( const T* func ) const { if( isJmp ) - iJccKnownTarget( Jcc_Unconditional, (void*)func ); + iJccKnownTarget( Jcc_Unconditional, (void*)(uptr)func ); else { // calls are relative to the instruction after this one, and length is diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 99c3cbb912..412630cd20 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -21,17 +21,7 @@ ////////////////////////////////////////////////////////////////////////////////////////// // MMX / SSE Helper Functions! -template< typename T > -__emitinline void SimdPrefix( u8 opcode, u8 prefix=0 ) -{ - if( sizeof( T ) == 16 && prefix != 0 ) - { - xWrite( 0x0f00 | prefix ); - xWrite( opcode ); - } - else - xWrite( (opcode<<8) | 0x0f ); -} +extern void SimdPrefix( u8 prefix, u8 opcode ); // ------------------------------------------------------------------------ // xmm emitter helpers for xmm instruction with prefixes. @@ -40,23 +30,23 @@ __emitinline void SimdPrefix( u8 opcode, u8 prefix=0 ) // instructions violate this "guideline.") // template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& to, const xRegister& from ) +__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > -void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const ModSibBase& sib ) +void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const void* data ) +__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) { - SimdPrefix( opcode, prefix ); + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); xWriteDisp( reg.Id, data ); } @@ -68,21 +58,21 @@ __emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, con template< typename T, typename T2 > __emitinline void writeXMMop( u8 opcode, const xRegister& to, const xRegister& from ) { - SimdPrefix( opcode ); + SimdPrefix( 0, opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > void writeXMMop( u8 opcode, const xRegister& reg, const ModSibBase& sib ) { - SimdPrefix( opcode ); + SimdPrefix( 0, opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > __emitinline void writeXMMop( u8 opcode, const xRegister& reg, const void* data ) { - SimdPrefix( opcode ); + SimdPrefix( 0, opcode ); xWriteDisp( reg.Id, data ); } @@ -170,6 +160,34 @@ public: SSELogicImpl() {} //GCWho? }; +// ------------------------------------------------------------------------ +// For implementing MMX/SSE operations which the destination *must* be a register, but the source +// can be regDirect or ModRM (indirect). +// +template< u8 Prefix, u8 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > +class SSEImpl_DestRegForm +{ +public: + __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __noinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + + SSEImpl_DestRegForm() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +template< u8 OpcodeSSE > +class SSEImpl_PSPD_SSSD +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; // packed single precision + const SSELogicImpl<0x66,OpcodeSSE> PD; // packed double precision + const SSELogicImpl<0xf3,OpcodeSSE> SS; // scalar single precision + const SSELogicImpl<0xf2,OpcodeSSE> SD; // scalar double precision + + SSEImpl_PSPD_SSSD() {} //GChow? +}; + // ------------------------------------------------------------------------ // template< u8 OpcodeSSE > @@ -178,10 +196,62 @@ class SSEAndNotImpl public: const SSELogicImpl<0x00,OpcodeSSE> PS; const SSELogicImpl<0x66,OpcodeSSE> PD; - SSEAndNotImpl() {} }; +// ------------------------------------------------------------------------ +// For instructions that have SS/SD form only (UCOMI, etc) +// AltPrefix - prefixed used for doubles (SD form). +template< u8 AltPrefix, u8 OpcodeSSE > +class SSEImpl_SS_SD +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> SS; + const SSELogicImpl SD; + SSEImpl_SS_SD() {} +}; + +// ------------------------------------------------------------------------ +// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) +template< u8 OpcodeSSE > +class SSE_rSqrtImpl +{ +public: + const SSELogicImpl<0x00,OpcodeSSE> PS; + const SSELogicImpl<0xf3,OpcodeSSE> SS; + SSE_rSqrtImpl() {} +}; + +// ------------------------------------------------------------------------ +// For instructions that have PS/SS/SD form only (most commonly Sqrt functions) +template< u8 OpcodeSSE > +class SSE_SqrtImpl : public SSE_rSqrtImpl +{ +public: + const SSELogicImpl<0xf2,OpcodeSSE> SD; + SSE_SqrtImpl() {} +}; + +// ------------------------------------------------------------------------ +template< u8 OpcodeSSE > +class SSEImpl_Shuffle +{ +protected: + template< u8 Prefix > struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + Woot() {} + }; + +public: + const Woot<0x00> PS; + const Woot<0x66> PD; + + SSEImpl_Shuffle() {} //GCWhat? +}; + // ------------------------------------------------------------------------ template< SSE2_ComparisonType CType > class SSECompareImpl @@ -192,13 +262,13 @@ protected: __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + Woot() {} }; public: - Woot<0x00> PS; - Woot<0x66> PD; - Woot<0xf3> SS; - Woot<0xf2> SD; - + const Woot<0x00> PS; + const Woot<0x66> PD; + const Woot<0xf3> SS; + const Woot<0xf2> SD; SSECompareImpl() {} //GCWhat? }; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 2affc9c74e..2a2eb665a7 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -138,8 +138,6 @@ namespace Internal __forceinline void ModRM( uint mod, uint reg, uint rm ) { xWrite( (mod << 6) | (reg << 3) | rm ); - //*(u32*)x86Ptr = (mod << 6) | (reg << 3) | rm; - //x86Ptr++; } __forceinline void ModRM_Direct( uint reg, uint rm ) @@ -150,8 +148,6 @@ namespace Internal __forceinline void SibSB( u32 ss, u32 index, u32 base ) { xWrite( (ss << 6) | (index << 3) | base ); - //*(u32*)x86Ptr = (ss << 6) | (index << 3) | base; - //x86Ptr++; } __forceinline void xWriteDisp( int regfield, s32 displacement ) @@ -645,6 +641,17 @@ __emitinline void xBSWAP( const xRegister32& to ) // MMX / XMM Instructions // (these will get put in their own file later) +__emitinline void Internal::SimdPrefix( u8 prefix, u8 opcode ) +{ + if( prefix != 0 ) + { + xWrite( 0x0f00 | prefix ); + xWrite( opcode ); + } + else + xWrite( (opcode<<8) | 0x0f ); +} + const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS; const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD; @@ -670,11 +677,50 @@ const PLogicImplAll<0xef> xPXOR; const SSEAndNotImpl<0x55> xANDN; -// Compute Reciprocal Packed Single-Precision Floating-Point Values -const SSELogicImpl<0,0x53> xRCPPS; +const SSEImpl_SS_SD<0x66,0x2e> xUCOMI; +const SSE_rSqrtImpl<0x53> xRCP; +const SSE_rSqrtImpl<0x52> xRSQRT; +const SSE_SqrtImpl<0x51> xSQRT; -// Compute Reciprocal of Scalar Single-Precision Floating-Point Value -const SSELogicImpl<0xf3,0x53> xRCPSS; +const SSEImpl_PSPD_SSSD<0x5f> xMAX; +const SSEImpl_PSPD_SSSD<0x5d> xMIN; +const SSEImpl_Shuffle<0xc6> xSHUF; + +// ------------------------------------------------------------------------ +// SSE Conversion Operations, as looney as they are. +// +// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing +// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). +// +const SSEImpl_DestRegForm<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; +const SSEImpl_DestRegForm<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; + +const SSEImpl_DestRegForm<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; +const SSEImpl_DestRegForm<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; +const SSEImpl_DestRegForm<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; + +const SSEImpl_DestRegForm<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; +const SSEImpl_DestRegForm<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + +const SSEImpl_DestRegForm<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; +const SSEImpl_DestRegForm<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; +const SSEImpl_DestRegForm<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + +const SSEImpl_DestRegForm<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; +const SSEImpl_DestRegForm<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; +const SSEImpl_DestRegForm<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; +const SSEImpl_DestRegForm<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + +const SSEImpl_DestRegForm<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; +const SSEImpl_DestRegForm<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + +const SSEImpl_DestRegForm<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; +const SSEImpl_DestRegForm<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; +const SSEImpl_DestRegForm<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; +const SSEImpl_DestRegForm<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + +const SSEImpl_DestRegForm<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; +const SSEImpl_DestRegForm<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; // ------------------------------------------------------------------------ @@ -724,7 +770,7 @@ __forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) // Manual implementation of this form of MOVQ, since its parameters are unique in a way // that breaks the template inference of writeXMMop(); - SimdPrefix( 0xd6, 0xf2 ); + SimdPrefix( 0xf2, 0xd6 ); ModRM_Direct( to.Id, from.Id ); } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 1421fba6dd..dbbfb9874c 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -347,14 +347,15 @@ namespace x86Emitter Internal::writeXMMop( 0x66, 0x7e, from, dest ); } + // ------------------------------------------------------------------------ - + // xMASKMOV: // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. // The default memory location is specified by DS:EDI. The most significant bit in each byte // of the mask operand determines whether the corresponding byte in the source operand is // written to the corresponding byte location in memory. - + template< typename T > static __forceinline void xMASKMOV( const xRegisterSIMD& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); } @@ -368,7 +369,7 @@ namespace x86Emitter // template< typename T > static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } - + // ------------------------------------------------------------------------ extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ); @@ -409,10 +410,9 @@ namespace x86Emitter extern void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ); extern void xMOVNTQ( void* to, const xRegisterMMX& from ); extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ); - - ////////////////////////////////////////////////////////////////////////////////////////// - // - + + // ------------------------------------------------------------------------ + extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS; extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS; @@ -433,6 +433,8 @@ namespace x86Emitter extern const Internal::MovhlImplAll<0x16> xMOVH; extern const Internal::MovhlImplAll<0x12> xMOVL; + // ------------------------------------------------------------------------ + extern const Internal::PLogicImplAll<0xdb> xPAND; extern const Internal::PLogicImplAll<0xdf> xPANDN; extern const Internal::PLogicImplAll<0xeb> xPOR; @@ -440,9 +442,15 @@ namespace x86Emitter extern const Internal::SSEAndNotImpl<0x55> xANDN; - extern const Internal::SSELogicImpl<0,0x53> xRCPPS; - extern const Internal::SSELogicImpl<0xf3,0x53> xRCPSS; + extern const Internal::SSEImpl_SS_SD<0x66,0x2e> xUCOMI; + extern const Internal::SSE_rSqrtImpl<0x53> xRCP; + extern const Internal::SSE_rSqrtImpl<0x52> xRSQRT; + extern const Internal::SSE_SqrtImpl<0x51> xSQRT; + extern const Internal::SSEImpl_PSPD_SSSD<0x5f> xMAX; + extern const Internal::SSEImpl_PSPD_SSSD<0x5d> xMIN; + extern const Internal::SSEImpl_Shuffle<0xc6> xSHUF; + // ------------------------------------------------------------------------ extern const Internal::SSECompareImpl xCMPEQ; @@ -453,5 +461,39 @@ namespace x86Emitter extern const Internal::SSECompareImpl xCMPNLT; extern const Internal::SSECompareImpl xCMPNLE; extern const Internal::SSECompareImpl xCMPORD; + + // ------------------------------------------------------------------------ + // OMG Evil. I went cross-eyed an hour ago doing this. + // + extern const Internal::SSEImpl_DestRegForm<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; + extern const Internal::SSEImpl_DestRegForm<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; + + extern const Internal::SSEImpl_DestRegForm<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; + extern const Internal::SSEImpl_DestRegForm<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; + extern const Internal::SSEImpl_DestRegForm<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; + + extern const Internal::SSEImpl_DestRegForm<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; + extern const Internal::SSEImpl_DestRegForm<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + + extern const Internal::SSEImpl_DestRegForm<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; + extern const Internal::SSEImpl_DestRegForm<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; + extern const Internal::SSEImpl_DestRegForm<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + + extern const Internal::SSEImpl_DestRegForm<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; + extern const Internal::SSEImpl_DestRegForm<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; + extern const Internal::SSEImpl_DestRegForm<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; + extern const Internal::SSEImpl_DestRegForm<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + + extern const Internal::SSEImpl_DestRegForm<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; + extern const Internal::SSEImpl_DestRegForm<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + + extern const Internal::SSEImpl_DestRegForm<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; + extern const Internal::SSEImpl_DestRegForm<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; + extern const Internal::SSEImpl_DestRegForm<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; + extern const Internal::SSEImpl_DestRegForm<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + + extern const Internal::SSEImpl_DestRegForm<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; + extern const Internal::SSEImpl_DestRegForm<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; + } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 163e24614d..8c259b5385 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -97,14 +97,6 @@ using namespace x86Emitter; ModRM( 0, to, DISP32 ), \ write32( MEMADDR(from, 4 + overb) ) \ -#define SSE_SD_RtoM( code, overb ) \ - assert( from < iREGCNT_XMM) , \ - write8( 0xf2 ), \ - RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) \ - #define SSE_SD_RtoR( code ) \ assert( to < iREGCNT_XMM && from < iREGCNT_XMM) , \ write8( 0xf2 ), \ @@ -112,30 +104,6 @@ using namespace x86Emitter; write16( code ), \ ModRM( 3, to, from ) -#define CMPPSMtoR( op ) \ - SSEMtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPPSRtoR( op ) \ - SSERtoR( 0xc20f ), \ - write8( op ) - -#define CMPSSMtoR( op ) \ - SSE_SS_MtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPSSRtoR( op ) \ - SSE_SS_RtoR( 0xc20f ), \ - write8( op ) - -#define CMPSDMtoR( op ) \ - SSE_SD_MtoR( 0xc20f, 1 ), \ - write8( op ) - -#define CMPSDRtoR( op ) \ - SSE_SD_RtoR( 0xc20f ), \ - write8( op ) - #define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \ emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \ emitterT void sse##_MOV##mod##_XMM_to_M128( uptr to, x86SSERegType from ) { xMOV##mod( (void*)to, xRegisterSSE(from) ); } \ @@ -146,17 +114,92 @@ using namespace x86Emitter; emitterT void sse##_MOV##mod##RtoRmS( x86IntRegType to, x86SSERegType from, x86IntRegType from2, int scale ) \ { xMOV##mod( ptr[xAddressReg(to)+xAddressReg(from2)], xRegisterSSE(from) ); } +#define DEFINE_LEGACY_PSD_OPCODE( mod ) \ + emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PD( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_SSSD_OPCODE( mod ) \ + emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_CMP_OPCODE( comp ) \ + emitterT void SSE_CMP##comp##PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_CMP##comp##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_CMP##comp##PD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_CMP##comp##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PD( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE_CMP##comp##SS_M32_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_CMP##comp##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_CMP##comp##SD_M64_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_CMP##comp##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SD( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_RSQRT_OPCODE(mod) \ + emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_SQRT_OPCODE(mod) \ + DEFINE_LEGACY_RSQRT_OPCODE(mod) \ + emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } + +#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ + DEFINE_LEGACY_PSD_OPCODE( mod ) \ + DEFINE_LEGACY_SSSD_OPCODE( mod ) + DEFINE_LEGACY_MOV_OPCODE( UPS, SSE ) DEFINE_LEGACY_MOV_OPCODE( APS, SSE ) DEFINE_LEGACY_MOV_OPCODE( DQA, SSE2 ) DEFINE_LEGACY_MOV_OPCODE( DQU, SSE2 ) +DEFINE_LEGACY_PSD_OPCODE( AND ) +DEFINE_LEGACY_PSD_OPCODE( ANDN ) +DEFINE_LEGACY_PSD_OPCODE( OR ) +DEFINE_LEGACY_PSD_OPCODE( XOR ) -//**********************************************************************************/ -//MOVAPS: Move aligned Packed Single Precision FP values * -//********************************************************************************** +DEFINE_LEGACY_PSSD_OPCODE( SUB ) +DEFINE_LEGACY_PSSD_OPCODE( ADD ) +DEFINE_LEGACY_PSSD_OPCODE( MUL ) +DEFINE_LEGACY_PSSD_OPCODE( DIV ) -emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); } +DEFINE_LEGACY_PSSD_OPCODE( MIN ) +DEFINE_LEGACY_PSSD_OPCODE( MAX ) + +DEFINE_LEGACY_CMP_OPCODE( EQ ) +DEFINE_LEGACY_CMP_OPCODE( LT ) +DEFINE_LEGACY_CMP_OPCODE( LE ) +DEFINE_LEGACY_CMP_OPCODE( UNORD ) +DEFINE_LEGACY_CMP_OPCODE( NE ) +DEFINE_LEGACY_CMP_OPCODE( NLT ) +DEFINE_LEGACY_CMP_OPCODE( NLE ) +DEFINE_LEGACY_CMP_OPCODE( ORD ) + +DEFINE_LEGACY_SSSD_OPCODE( UCOMI ) +DEFINE_LEGACY_RSQRT_OPCODE( RCP ) +DEFINE_LEGACY_RSQRT_OPCODE( RSQRT ) +DEFINE_LEGACY_SQRT_OPCODE( SQRT ) + + +emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { xMOVDZX( xRegisterSSE(to), (void*)from ); } +emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { xMOVDZX( xRegisterSSE(to), xRegister32(from) ); } +emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) +{ + xMOVDZX( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); +} + +emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { xMOVD( (void*)to, xRegisterSSE(from) ); } +emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { xMOVD( xRegister32(to), xRegisterSSE(from) ); } +emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) +{ + xMOVD( ptr[xAddressReg(from)+offset], xRegisterSSE(from) ); +} emitterT void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVQZX( xRegisterSSE(to), (void*)from ); } emitterT void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVQZX( xRegisterSSE(to), xRegisterSSE(from) ); } @@ -165,9 +208,6 @@ emitterT void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from) { xM emitterT void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from) { xMOVQ( xRegisterSSE(to), xRegisterMMX(from) ); } -//**********************************************************************************/ -//MOVSS: Move Scalar Single-Precision FP value * -//********************************************************************************** emitterT void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ) { xMOVSSZX( xRegisterSSE(to), (void*)from ); } emitterT void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ) { xMOVSS( (void*)to, xRegisterSSE(from) ); } emitterT void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVSS( xRegisterSSE(to), xRegisterSSE(from) ); } @@ -181,189 +221,69 @@ emitterT void SSE2_MOVSD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int of emitterT void SSE2_MOVSD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVSD( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); } -//**********************************************************************************/ -//MOVLPS: Move low Packed Single-Precision FP * -//********************************************************************************** + emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVL.PS( xRegisterSSE(to), (void*)from ); } emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVL.PS( (void*)to, xRegisterSSE(from) ); } emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVL.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVL.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVHPS: Move High Packed Single-Precision FP * -//********************************************************************************** emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVH.PS( xRegisterSSE(to), (void*)from ); } emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.PS( (void*)to, xRegisterSSE(from) ); } emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVLHPS: Moved packed Single-Precision FP low to high * -//********************************************************************************** emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVHLPS: Moved packed Single-Precision FP High to Low * -//********************************************************************************** emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); } - -#define DEFINE_LEGACY_PSD_OPCODE( mod ) \ - emitterT void SSE_##mod##PS_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PS( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_##mod##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##PD_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.PD( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_##mod##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.PD( xRegisterSSE(to), xRegisterSSE(from) ); } - -#define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ - DEFINE_LEGACY_PSD_OPCODE( mod ) \ - emitterT void SSE_##mod##SS_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SS( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_##mod##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##SD_M32_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } - -DEFINE_LEGACY_PSD_OPCODE( AND ) -DEFINE_LEGACY_PSD_OPCODE( ANDN ) -DEFINE_LEGACY_PSD_OPCODE( OR ) -DEFINE_LEGACY_PSD_OPCODE( XOR ) - -DEFINE_LEGACY_PSSD_OPCODE( SUB ) -DEFINE_LEGACY_PSSD_OPCODE( ADD ) -DEFINE_LEGACY_PSSD_OPCODE( MUL ) -DEFINE_LEGACY_PSSD_OPCODE( DIV ) - -emitterT void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xRCPPS( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ) { xRCPPS( xRegisterSSE(to), (void*)from ); } - -emitterT void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xRCPSS( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { xRCPSS( xRegisterSSE(to), (void*)from ); } - -//////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//Packed Single-Precision FP compare (CMPccPS) * -//********************************************************************************** - -#define DEFINE_LEGACY_CMP_OPCODE( comp ) \ - emitterT void SSE_CMP##comp##PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PS( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_CMP##comp##PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PS( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_CMP##comp##PD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.PD( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_CMP##comp##PD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.PD( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE_CMP##comp##SS_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SS( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE_CMP##comp##SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SS( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_CMP##comp##SD_M128_to_XMM( x86SSERegType to, uptr from ) { xCMP##comp.SD( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_CMP##comp##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCMP##comp.SD( xRegisterSSE(to), xRegisterSSE(from) ); } - -DEFINE_LEGACY_CMP_OPCODE( EQ ) -DEFINE_LEGACY_CMP_OPCODE( LT ) -DEFINE_LEGACY_CMP_OPCODE( LE ) -DEFINE_LEGACY_CMP_OPCODE( UNORD ) -DEFINE_LEGACY_CMP_OPCODE( NE ) -DEFINE_LEGACY_CMP_OPCODE( NLT ) -DEFINE_LEGACY_CMP_OPCODE( NLE ) -DEFINE_LEGACY_CMP_OPCODE( ORD ) - -emitterT void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) +emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); } +emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) { - RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); + xSHUF.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset], imm8 ); } -emitterT void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); -} +emitterT void SSE_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), (void*)from, imm8 ); } -emitterT void SSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) -{ - write8(0x66); - RexR(0, to); - write16( 0x2e0f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} -emitterT void SSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x2e0f ); - ModRM( 3, to, from ); -} +emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { xCVTPI2PS( xRegisterSSE(to), (u64*)from ); } +emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { xCVTPI2PS( xRegisterSSE(to), xRegisterMMX(from) ); } + +emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { xCVTPS2PI( xRegisterMMX(to), (u64*)from ); } +emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { xCVTPS2PI( xRegisterMMX(to), xRegisterSSE(from) ); } + +emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { xCVTTSS2SI( xRegister32(to), (u32*)from ); } +emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xCVTTSS2SI( xRegister32(to), xRegisterSSE(from) ); } + +emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { xCVTSI2SS( xRegisterSSE(to), (u32*)from ); } +emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { xCVTSI2SS( xRegisterSSE(to), xRegister32(from) ); } + +emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { xCVTSS2SD( xRegisterSSE(to), (u32*)from ); } +emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSS2SD( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { xCVTSD2SS( xRegisterSSE(to), (u64*)from ); } +emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSD2SS( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTDQ2PS( xRegisterSSE(to), (u128*)from ); } +emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTDQ2PS( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTPS2DQ( xRegisterSSE(to), (u128*)from ); } +emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); } ////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * -//********************************************************************************** -emitterT void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x520f, 0 ); } -emitterT void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x520f ); } +////////////////////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//RSQRTSS : Scalar Single-Precision FP Square Root Reciprocal * -//********************************************************************************** -emitterT void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x520f, 0 ); } -emitterT void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x520f ); } - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SQRTPS : Packed Single-Precision FP Square Root * -//********************************************************************************** -emitterT void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x510f, 0 ); } -emitterT void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x510f ); } - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SQRTSS : Scalar Single-Precision FP Square Root * -//********************************************************************************** -emitterT void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } -emitterT void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } - -emitterT void SSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } -emitterT void SSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MAXPS: Return Packed Single-Precision FP Maximum * -//********************************************************************************** -emitterT void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5f0f, 0 ); } -emitterT void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5f0f ); } - -emitterT void SSE2_MAXPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5f0f ); } -emitterT void SSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5f0f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MAXSS: Return Scalar Single-Precision FP Maximum * -//********************************************************************************** -emitterT void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } -emitterT void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } - -emitterT void SSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } -emitterT void SSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } - -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * -//********************************************************************************** -emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } -emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } - -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTPS2PI: Packed Single FP to Packed Signed INT32 Conversion * -//********************************************************************************** -emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } -emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } +//emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } +// emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } +// +// emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } +// emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } +/* emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { @@ -372,8 +292,9 @@ emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) write16(0x2c0f); ModRM(3, to, from); } +*/ -emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } +/*emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { write8(0xf3); @@ -384,50 +305,26 @@ emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } +*/ -emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } +/*emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * -//********************************************************************************** emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } -//**********************************************************************************/ -//CVTPS2DQ: Packed Single Precision FP to Packed Signed INT32 Conversion * -//********************************************************************************** emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } -emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); } -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MINPS: Return Packed Single-Precision FP Minimum * -//********************************************************************************** -emitterT void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5d0f, 0 ); } -emitterT void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5d0f ); } +emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); }*/ -emitterT void SSE2_MINPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5d0f ); } -emitterT void SSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5d0f ); } - -////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MINSS: Return Scalar Single-Precision FP Minimum * -//********************************************************************************** -emitterT void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } -emitterT void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } - -emitterT void SSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } -emitterT void SSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PMAXSW: Packed Signed Integer Word Maximum * //********************************************************************************** //missing - // SSE_PMAXSW_M64_to_MM +// SSE_PMAXSW_M64_to_MM // SSE2_PMAXSW_M128_to_XMM // SSE2_PMAXSW_XMM_to_XMM emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } @@ -437,33 +334,11 @@ emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERt //PMINSW: Packed Signed Integer Word Minimum * //********************************************************************************** //missing - // SSE_PMINSW_M64_to_MM +// SSE_PMINSW_M64_to_MM // SSE2_PMINSW_M128_to_XMM // SSE2_PMINSW_XMM_to_XMM emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SHUFPS: Shuffle Packed Single-Precision FP Values * -//********************************************************************************** -emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR( 0xC60F ); write8( imm8 ); } -emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR( 0xC60F, 1 ); write8( imm8 ); } - -emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ) -{ - RexRB(0, to, from); - write16(0xc60f); - WriteRmOffsetFrom(to, from, offset); - write8(imm8); -} - -////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//SHUFPD: Shuffle Packed Double-Precision FP Values * -//********************************************************************************** -emitterT void SSE2_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { SSERtoR66( 0xC60F ); write8( imm8 ); } -emitterT void SSE2_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0xC60F ); write8( imm8 ); } - //////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PSHUFD: Shuffle Packed DoubleWords * @@ -565,43 +440,6 @@ emitterT void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSER emitterT void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } /////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//MOVD: Move Dword(32bit) to /from XMM reg * -//********************************************************************************** -emitterT void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66(0x6E0F); } -emitterT void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ) { SSERtoR66(0x6E0F); } - -emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x6e0f ); - ModRM( 0, to, from); -} - -emitterT void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) -{ - write8(0x66); - RexRB(0, to, from); - write16( 0x6e0f ); - WriteRmOffsetFrom(to, from, offset); -} - -emitterT void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ) { SSERtoM66(0x7E0F); } -emitterT void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ) { _SSERtoR66(0x7E0F); } - -emitterT void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) -{ - write8(0x66); - RexRB(0, from, to); - write16( 0x7e0f ); - WriteRmOffsetFrom(from, to, offset); -} - -/////////////////////////////////////////////////////////////////////////////////////// - -emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); } - // shift right logical diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 368c4a36b9..a9604653b6 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -124,6 +124,22 @@ static __forceinline void xWrite( T val ) namespace x86Emitter { +////////////////////////////////////////////////////////////////////////////////////////// +// ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const] +// +// This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions +// do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache +// and some marginal speed gains as a result. (it's possible someday in the future the per- +// formance of the two instructions could change, so this constant is provided to restore MOVDQA +// use easily at a later time, if needed). +#define ALWAYS_USE_MOVAPS + +#ifdef ALWAYS_USE_MOVAPS + static const bool AlwaysUseMovaps = true; +#else + static const bool AlwaysUseMovaps = false; +#endif + ///////////////////////////////////////////////////////////////////////////////////////////// // __emitline - preprocessors definition // @@ -688,22 +704,6 @@ namespace x86Emitter #include "implement/test.h" #include "implement/jmpcall.h" } - - ////////////////////////////////////////////////////////////////////////////////////////// - // ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const] - // - // This tells the recompiler's emitter to always use movaps instead of movdqa. Both instructions - // do the exact same thing, but movaps is 1 byte shorter, and thus results in a cleaner L1 cache - // and some marginal speed gains as a result. (it's possible someday in the future the per- - // formance of the two instructions could change, so this constant is provided to restore MOVDQA - // use easily at a later time, if needed). - #define ALWAYS_USE_MOVAPS - - #ifdef ALWAYS_USE_MOVAPS - static const bool AlwaysUseMovaps = true; - #else - static const bool AlwaysUseMovaps = false; - #endif } #include "ix86_inlines.inl" From 73e50f49ea7c63f61a5c6d5b8a7becaa23251fd8 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Mon, 20 Apr 2009 19:25:35 +0000 Subject: [PATCH 116/143] Emitter: Yay! More instructions! All forms of PADD/PSUB/PSRL/PSLL. I'm getting closer! git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1032 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/group1.h | 180 +++++++--------- pcsx2/x86/ix86/implement/group2.h | 119 ++++------- pcsx2/x86/ix86/implement/group3.h | 66 +++--- pcsx2/x86/ix86/implement/xmm/movqss.h | 167 ++++++++++++--- pcsx2/x86/ix86/ix86.cpp | 112 +++++----- pcsx2/x86/ix86/ix86_instructions.h | 112 +++++----- pcsx2/x86/ix86/ix86_legacy_mmx.cpp | 289 ++++---------------------- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 228 +------------------- pcsx2/x86/ix86/ix86_types.h | 4 + 9 files changed, 452 insertions(+), 825 deletions(-) diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 0fb4640e5b..bbc6735b51 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -34,57 +34,81 @@ enum G1Type }; // ------------------------------------------------------------------- -template< typename ImmType > -class Group1Impl +// +template< G1Type InstType > +class xImpl_Group1 { -protected: - static const uint OperandSize = sizeof(ImmType); - - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - -public: - Group1Impl() {} // because GCC doesn't like static classes - - static __emitinline void Emit( G1Type InstType, const xRegister& to, const xRegister& from ) +public: + // ------------------------------------------------------------------------ + template< typename T > __forceinline void operator()( const xRegister& to, const xRegister& from ) const { - prefix16(); - xWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + prefix16(); + xWrite( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); ModRM_Direct( from.Id, to.Id ); } - static __emitinline void Emit( G1Type InstType, const ModSibBase& sibdest, const xRegister& from ) + // ------------------------------------------------------------------------ + template< typename T > __forceinline void operator()( const xRegister& to, const void* src ) const { - prefix16(); - xWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); - EmitSibMagic( from.Id, sibdest ); + prefix16(); + xWrite( (Is8BitOp() ? 2 : 3) | (InstType<<3) ); + xWriteDisp( to.Id, src ); } - - static __emitinline void Emit( G1Type InstType, const xRegister& to, const ModSibBase& sibsrc ) + + // ------------------------------------------------------------------------ + template< typename T > __forceinline void operator()( void* dest, const xRegister& from ) const { - prefix16(); - xWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); - EmitSibMagic( to.Id, sibsrc ); - } - - static __emitinline void Emit( G1Type InstType, void* dest, const xRegister& from ) - { - prefix16(); - xWrite( (Is8BitOperand() ? 0 : 1) | (InstType<<3) ); + prefix16(); + xWrite( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); xWriteDisp( from.Id, dest ); } - static __emitinline void Emit( G1Type InstType, const xRegister& to, const void* src ) + // ------------------------------------------------------------------------ + template< typename T > __noinline void operator()( const ModSibBase& sibdest, const xRegister& from ) const { - prefix16(); - xWrite( (Is8BitOperand() ? 2 : 3) | (InstType<<3) ); - xWriteDisp( to.Id, src ); + prefix16(); + xWrite( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); + EmitSibMagic( from.Id, sibdest ); } - static __emitinline void Emit( G1Type InstType, const xRegister& to, int imm ) + // ------------------------------------------------------------------------ + template< typename T > __noinline void operator()( const xRegister& to, const ModSibBase& sibsrc ) const { - prefix16(); - if( !Is8BitOperand() && is_s8( imm ) ) + prefix16(); + xWrite( (Is8BitOp() ? 2 : 3) | (InstType<<3) ); + EmitSibMagic( to.Id, sibsrc ); + } + + // ------------------------------------------------------------------------ + // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler + // perspective. (using uint tends to make the compiler try and fail to match signed immediates with + // one of the other overloads). + + template< typename T > __noinline void operator()( const ModSibStrict& sibdest, int imm ) const + { + if( Is8BitOp() ) + { + xWrite( 0x80 ); + EmitSibMagic( InstType, sibdest ); + xWrite( imm ); + } + else + { + prefix16(); + xWrite( is_s8( imm ) ? 0x83 : 0x81 ); + EmitSibMagic( InstType, sibdest ); + if( is_s8( imm ) ) + xWrite( imm ); + else + xWrite( imm ); + } + } + + // ------------------------------------------------------------------------ + template< typename T > __forceinline void operator()( const xRegister& to, int imm ) const + { + prefix16(); + if( !Is8BitOp() && is_s8( imm ) ) { xWrite( 0x83 ); ModRM_Direct( InstType, to.Id ); @@ -93,65 +117,17 @@ public: else { if( to.IsAccumulator() ) - xWrite( (Is8BitOperand() ? 4 : 5) | (InstType<<3) ); + xWrite( (Is8BitOp() ? 4 : 5) | (InstType<<3) ); else { - xWrite( Is8BitOperand() ? 0x80 : 0x81 ); + xWrite( Is8BitOp() ? 0x80 : 0x81 ); ModRM_Direct( InstType, to.Id ); } - xWrite( imm ); + xWrite( imm ); } } - static __emitinline void Emit( G1Type InstType, const ModSibStrict& sibdest, int imm ) - { - if( Is8BitOperand() ) - { - xWrite( 0x80 ); - EmitSibMagic( InstType, sibdest ); - xWrite( imm ); - } - else - { - prefix16(); - xWrite( is_s8( imm ) ? 0x83 : 0x81 ); - EmitSibMagic( InstType, sibdest ); - if( is_s8( imm ) ) - xWrite( imm ); - else - xWrite( imm ); - } - } -}; - - -// ------------------------------------------------------------------- -// -template< G1Type InstType > -class Group1ImplAll -{ -public: - template< typename T > - __forceinline void operator()( const xRegister& to, const xRegister& from ) const { Group1Impl::Emit( InstType, to, from ); } - template< typename T > - __forceinline void operator()( const xRegister& to, const void* src ) const { Group1Impl::Emit( InstType, to, src ); } - template< typename T > - __forceinline void operator()( void* dest, const xRegister& from ) const { Group1Impl::Emit( InstType, dest, from ); } - template< typename T > - __noinline void operator()( const ModSibBase& sibdest, const xRegister& from ) const { Group1Impl::Emit( InstType, sibdest, from ); } - template< typename T > - __noinline void operator()( const xRegister& to, const ModSibBase& sibsrc ) const { Group1Impl::Emit( InstType, to, sibsrc ); } - - // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler - // perspective. (using uint tends to make the compiler try and fail to match signed immediates with - // one of the other overloads). - - template< typename T > - __noinline void operator()( const ModSibStrict& sibdest, int imm ) const { Group1Impl::Emit( InstType, sibdest, imm ); } - template< typename T > - __forceinline void operator()( const xRegister& to, int imm ) const { Group1Impl::Emit( InstType, to, imm ); } - - Group1ImplAll() {} // Why does GCC need these? + xImpl_Group1() {} // Why does GCC need these? }; // ------------------------------------------------------------------------ @@ -159,34 +135,34 @@ public: // Note: ANDN [AndNot] is handled below separately. // template< G1Type InstType, u8 OpcodeSSE > -class G1LogicImpl_PlusSSE : public Group1ImplAll +class xImpl_G1Logic : public xImpl_Group1 { public: - using Group1ImplAll::operator(); + using xImpl_Group1::operator(); - const SSELogicImpl<0x00,OpcodeSSE> PS; // packed single precision - const SSELogicImpl<0x66,OpcodeSSE> PD; // packed double precision + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision + const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision - G1LogicImpl_PlusSSE() {} + xImpl_G1Logic() {} }; // ------------------------------------------------------------------------ -// This calss combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB). +// This class combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB). // template< G1Type InstType, u8 OpcodeSSE > -class G1ArithmeticImpl_PlusSSE : public G1LogicImpl_PlusSSE +class xImpl_G1Arith : public xImpl_G1Logic { public: - using Group1ImplAll::operator(); + using xImpl_Group1::operator(); - const SSELogicImpl<0xf3,OpcodeSSE> SS; // scalar single precision - const SSELogicImpl<0xf2,OpcodeSSE> SD; // scalar double precision + const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision - G1ArithmeticImpl_PlusSSE() {} + xImpl_G1Arith() {} }; // ------------------------------------------------------------------------ -class G1CompareImpl_PlusSSE : Group1ImplAll< G1Type_CMP > +class xImpl_G1Compare : xImpl_Group1< G1Type_CMP > { protected: template< u8 Prefix > struct Woot @@ -198,12 +174,12 @@ protected: }; public: - using Group1ImplAll< G1Type_CMP >::operator(); + using xImpl_Group1< G1Type_CMP >::operator(); const Woot<0x00> PS; const Woot<0x66> PD; const Woot<0xf3> SS; const Woot<0xf2> SD; - G1CompareImpl_PlusSSE() {} //GCWhat? + xImpl_G1Compare() {} //GCWhat? }; diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index 45a5430d90..fecef3c6ba 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -38,92 +38,61 @@ enum G2Type // Optimization Note: For Imm forms, we ignore the instruction if the shift count is zero. // This is a safe optimization since any zero-value shift does not affect any flags. // -template< G2Type InstType, typename ImmType > -class Group2Impl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - -public: - Group2Impl() {} // For the love of GCC. - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to ) - { - prefix16(); - xWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); - ModRM_Direct( InstType, to.Id ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, u8 imm ) - { - if( imm == 0 ) return; - - prefix16(); - if( imm == 1 ) - { - // special encoding of 1's - xWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); - ModRM_Direct( InstType, to.Id ); - } - else - { - xWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); - ModRM_Direct( InstType, to.Id ); - xWrite( imm ); - } - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibStrict& sibdest ) - { - prefix16(); - xWrite( Is8BitOperand() ? 0xd2 : 0xd3 ); - EmitSibMagic( InstType, sibdest ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibStrict& sibdest, u8 imm ) - { - if( imm == 0 ) return; - - prefix16(); - if( imm == 1 ) - { - // special encoding of 1's - xWrite( Is8BitOperand() ? 0xd0 : 0xd1 ); - EmitSibMagic( InstType, sibdest ); - } - else - { - xWrite( Is8BitOperand() ? 0xc0 : 0xc1 ); - EmitSibMagic( InstType, sibdest ); - xWrite( imm ); - } - } -}; - -// ------------------------------------------------------------------- -// template< G2Type InstType > class Group2ImplAll { public: template< typename T > __forceinline void operator()( const xRegister& to, __unused const xRegisterCL& from ) const - { Group2Impl::Emit( to ); } + { + prefix16(); + xWrite( Is8BitOp() ? 0xd2 : 0xd3 ); + ModRM_Direct( InstType, to.Id ); + } template< typename T > __noinline void operator()( const ModSibStrict& sibdest, __unused const xRegisterCL& from ) const - { Group2Impl::Emit( sibdest ); } + { + prefix16(); + xWrite( Is8BitOp() ? 0xd2 : 0xd3 ); + EmitSibMagic( InstType, sibdest ); + } template< typename T > __noinline void operator()( const ModSibStrict& sibdest, u8 imm ) const - { Group2Impl::Emit( sibdest, imm ); } + { + if( imm == 0 ) return; + prefix16(); + if( imm == 1 ) + { + // special encoding of 1's + xWrite( Is8BitOp() ? 0xd0 : 0xd1 ); + EmitSibMagic( InstType, sibdest ); + } + else + { + xWrite( Is8BitOp() ? 0xc0 : 0xc1 ); + EmitSibMagic( InstType, sibdest ); + xWrite( imm ); + } + } + template< typename T > __forceinline void operator()( const xRegister& to, u8 imm ) const - { Group2Impl::Emit( to, imm ); } + { + if( imm == 0 ) return; + + prefix16(); + if( imm == 1 ) + { + // special encoding of 1's + xWrite( Is8BitOp() ? 0xd0 : 0xd1 ); + ModRM_Direct( InstType, to.Id ); + } + else + { + xWrite( Is8BitOp() ? 0xc0 : 0xc1 ); + ModRM_Direct( InstType, to.Id ); + xWrite( imm ); + } + } Group2ImplAll() {} // I am a class with no members, so I need an explicit constructor! Sense abounds. }; diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index aae0d77652..a88049789f 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -31,45 +31,27 @@ enum G3Type G3Type_iDIV = 7 }; -// ------------------------------------------------------------------------ -template< typename ImmType > -class Group3Impl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - -public: - Group3Impl() {} // For the love of GCC. - - static __emitinline void Emit( G3Type InstType, const xRegister& from ) - { - prefix16(); - xWrite(Is8BitOperand() ? 0xf6 : 0xf7 ); - ModRM_Direct( InstType, from.Id ); - } - - static __emitinline void Emit( G3Type InstType, const ModSibStrict& sibsrc ) - { - prefix16(); - xWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); - EmitSibMagic( InstType, sibsrc ); - } -}; - -// ------------------------------------------------------------------- +////////////////////////////////////////////////////////////////////////////////////////// // template< G3Type InstType > class Group3ImplAll { public: - template< typename T > - __forceinline void operator()( const xRegister& from ) const { Group3Impl::Emit( InstType, from ); } + // ------------------------------------------------------------------------ + template< typename T > __forceinline void operator()( const xRegister& from ) const + { + prefix16(); + xWrite(Is8BitOp() ? 0xf6 : 0xf7 ); + ModRM_Direct( InstType, from.Id ); + } - template< typename T > - __noinline void operator()( const ModSibStrict& from ) const { Group3Impl::Emit( InstType, from ); } + // ------------------------------------------------------------------------ + template< typename T > __noinline void operator()( const ModSibStrict& from ) const + { + prefix16(); + xWrite( Is8BitOp() ? 0xf6 : 0xf7 ); + EmitSibMagic( InstType, from ); + } Group3ImplAll() {} }; @@ -78,15 +60,15 @@ public: // This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV. // template< G3Type InstType, u8 OpcodeSSE > -class G3Impl_PlusSSE : public Group3ImplAll +class xImpl_Group3 : public Group3ImplAll { public: - const SSELogicImpl<0x00,OpcodeSSE> PS; - const SSELogicImpl<0x66,OpcodeSSE> PD; - const SSELogicImpl<0xf3,OpcodeSSE> SS; - const SSELogicImpl<0xf2,OpcodeSSE> SD; + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; + const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; + const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; - G3Impl_PlusSSE() {} + xImpl_Group3() {} }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -162,14 +144,14 @@ public: }; // ------------------------------------------------------------------------ -class iMul_PlusSSE : public G3Impl_PlusSSE +class xImpl_iMul : public xImpl_Group3 { protected: typedef iMulImpl iMUL32; typedef iMulImpl iMUL16; public: - using G3Impl_PlusSSE::operator(); + using xImpl_Group3::operator(); __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { iMUL32::Emit( to, from ); } __forceinline void operator()( const xRegister32& to, const void* src ) const { iMUL32::Emit( to, src ); } @@ -183,5 +165,5 @@ public: __noinline void operator()( const xRegister16& to, const ModSibBase& src ) const { iMUL16::Emit( to, src ); } __noinline void operator()( const xRegister16& to, const ModSibBase& from, s16 imm ) const { iMUL16::Emit( to, from, imm ); } - iMul_PlusSSE() {} + xImpl_iMul() {} }; diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index 412630cd20..d69107bdfe 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -129,11 +129,11 @@ public: }; ////////////////////////////////////////////////////////////////////////////////////////// -// PLogicImplAll - Implements logic forms for MMX/SSE instructions, and can be used for +// SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for // a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms). // template< u8 Opcode > -class PLogicImplAll +class SimdImpl_PackedLogic { public: template< typename T > @@ -143,21 +143,40 @@ public: template< typename T > __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - PLogicImplAll() {} //GCWho? + SimdImpl_PackedLogic() {} //GCWho? }; // ------------------------------------------------------------------------ -// For implementing SSE-only logic operations, like ANDPS/ANDPD +// For implementing SSE-only logic operations that have reg,reg/rm forms only, +// like ANDPS/ANDPD // template< u8 Prefix, u8 Opcode > -class SSELogicImpl +class SimdImpl_DestRegSSE { public: __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - SSELogicImpl() {} //GCWho? + SimdImpl_DestRegSSE() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +// For implementing MMX/SSE operations that have reg,reg/rm forms only, +// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops). +// +template< u8 Prefix, u8 Opcode > +class SimdImpl_DestRegEither +{ +public: + template< typename DestOperandType > + __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename DestOperandType > + __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename DestOperandType > + __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + + SimdImpl_DestRegEither() {} //GCWho? }; // ------------------------------------------------------------------------ @@ -165,76 +184,76 @@ public: // can be regDirect or ModRM (indirect). // template< u8 Prefix, u8 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > -class SSEImpl_DestRegForm +class SimdImpl_DestRegStrict { public: __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } __noinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - SSEImpl_DestRegForm() {} //GCWho? + SimdImpl_DestRegStrict() {} //GCWho? }; // ------------------------------------------------------------------------ template< u8 OpcodeSSE > -class SSEImpl_PSPD_SSSD +class SimdImpl_PSPD_SSSD { public: - const SSELogicImpl<0x00,OpcodeSSE> PS; // packed single precision - const SSELogicImpl<0x66,OpcodeSSE> PD; // packed double precision - const SSELogicImpl<0xf3,OpcodeSSE> SS; // scalar single precision - const SSELogicImpl<0xf2,OpcodeSSE> SD; // scalar double precision + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision + const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision + const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision - SSEImpl_PSPD_SSSD() {} //GChow? + SimdImpl_PSPD_SSSD() {} //GChow? }; // ------------------------------------------------------------------------ // template< u8 OpcodeSSE > -class SSEAndNotImpl +class SimdImpl_AndNot { public: - const SSELogicImpl<0x00,OpcodeSSE> PS; - const SSELogicImpl<0x66,OpcodeSSE> PD; - SSEAndNotImpl() {} + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; + const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; + SimdImpl_AndNot() {} }; // ------------------------------------------------------------------------ // For instructions that have SS/SD form only (UCOMI, etc) // AltPrefix - prefixed used for doubles (SD form). template< u8 AltPrefix, u8 OpcodeSSE > -class SSEImpl_SS_SD +class SimdImpl_SS_SD { public: - const SSELogicImpl<0x00,OpcodeSSE> SS; - const SSELogicImpl SD; - SSEImpl_SS_SD() {} + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS; + const SimdImpl_DestRegSSE SD; + SimdImpl_SS_SD() {} }; // ------------------------------------------------------------------------ // For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) template< u8 OpcodeSSE > -class SSE_rSqrtImpl +class SimdImpl_rSqrt { public: - const SSELogicImpl<0x00,OpcodeSSE> PS; - const SSELogicImpl<0xf3,OpcodeSSE> SS; - SSE_rSqrtImpl() {} + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; + const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; + SimdImpl_rSqrt() {} }; // ------------------------------------------------------------------------ // For instructions that have PS/SS/SD form only (most commonly Sqrt functions) template< u8 OpcodeSSE > -class SSE_SqrtImpl : public SSE_rSqrtImpl +class SimdImpl_Sqrt : public SimdImpl_rSqrt { public: - const SSELogicImpl<0xf2,OpcodeSSE> SD; - SSE_SqrtImpl() {} + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; + SimdImpl_Sqrt() {} }; // ------------------------------------------------------------------------ template< u8 OpcodeSSE > -class SSEImpl_Shuffle +class SimdImpl_Shuffle { protected: template< u8 Prefix > struct Woot @@ -249,12 +268,12 @@ public: const Woot<0x00> PS; const Woot<0x66> PD; - SSEImpl_Shuffle() {} //GCWhat? + SimdImpl_Shuffle() {} //GCWhat? }; // ------------------------------------------------------------------------ template< SSE2_ComparisonType CType > -class SSECompareImpl +class SimdImpl_Compare { protected: template< u8 Prefix > struct Woot @@ -270,5 +289,87 @@ public: const Woot<0x66> PD; const Woot<0xf3> SS; const Woot<0xf2> SD; - SSECompareImpl() {} //GCWhat? + SimdImpl_Compare() {} //GCWhat? }; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +// +template< u8 Opcode1, u8 OpcodeImm, u8 Modcode > +class SimdImpl_Shift +{ +public: + SimdImpl_Shift() {} + + template< typename OperandType > + __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __emitinline void operator()( const xRegisterSIMD& to, u8 imm ) const + { + SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); + ModRM( 3, (int)Modcode, to.Id ); + xWrite( imm ); + } +}; + +// ------------------------------------------------------------------------ +template< u8 OpcodeBase1, u8 OpcodeBaseImm, u8 Modcode > +class SimdImpl_ShiftAll +{ +public: + const SimdImpl_Shift W; + const SimdImpl_Shift D; + const SimdImpl_Shift Q; + + void DQ( const xRegisterSSE& to, u8 imm ) const + { + SimdPrefix( 0x66, OpcodeBaseImm+3 ); + ModRM( 3, (int)Modcode+1, to.Id ); + xWrite( imm ); + } + + SimdImpl_ShiftAll() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u8 OpcodeB, u8 OpcodeS, u8 OpcodeUS, u8 OpcodeQ > +class SimdImpl_AddSub +{ +public: + const SimdImpl_DestRegEither<0x66,OpcodeB> B; + const SimdImpl_DestRegEither<0x66,OpcodeB+1> W; + const SimdImpl_DestRegEither<0x66,OpcodeB+2> D; + const SimdImpl_DestRegEither<0x66,OpcodeQ> Q; + + // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeS> SB; + + // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeS+1> SW; + + // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeUS> USB; + + // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeUS+1> USW; + + SimdImpl_AddSub() {} +}; \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 2a2eb665a7..e987f180c0 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -253,16 +253,16 @@ using namespace Internal; const MovImplAll xMOV; const TestImplAll xTEST; -const G1LogicImpl_PlusSSE xAND; -const G1LogicImpl_PlusSSE xOR; -const G1LogicImpl_PlusSSE xXOR; +const xImpl_G1Logic xAND; +const xImpl_G1Logic xOR; +const xImpl_G1Logic xXOR; -const G1ArithmeticImpl_PlusSSE xADD; -const G1ArithmeticImpl_PlusSSE xSUB; +const xImpl_G1Arith xADD; +const xImpl_G1Arith xSUB; -const Group1ImplAll xADC; -const Group1ImplAll xSBB; -const G1CompareImpl_PlusSSE xCMP; +const xImpl_Group1 xADC; +const xImpl_Group1 xSBB; +const xImpl_G1Compare xCMP; const Group2ImplAll xROL; const Group2ImplAll xROR; @@ -276,8 +276,8 @@ const Group3ImplAll xNOT; const Group3ImplAll xNEG; const Group3ImplAll xUMUL; const Group3ImplAll xUDIV; -const G3Impl_PlusSSE xDIV; -const iMul_PlusSSE xMUL; +const xImpl_Group3 xDIV; +const xImpl_iMul xMUL; const IncDecImplAll xINC; const IncDecImplAll xDEC; @@ -670,21 +670,32 @@ const MovhlImplAll<0x12> xMOVL; const MovhlImpl_RtoR<0x16> xMOVLH; const MovhlImpl_RtoR<0x12> xMOVHL; -const PLogicImplAll<0xdb> xPAND; -const PLogicImplAll<0xdf> xPANDN; -const PLogicImplAll<0xeb> xPOR; -const PLogicImplAll<0xef> xPXOR; +const SimdImpl_PackedLogic<0xdb> xPAND; +const SimdImpl_PackedLogic<0xdf> xPANDN; +const SimdImpl_PackedLogic<0xeb> xPOR; +const SimdImpl_PackedLogic<0xef> xPXOR; -const SSEAndNotImpl<0x55> xANDN; +const SimdImpl_AndNot<0x55> xANDN; -const SSEImpl_SS_SD<0x66,0x2e> xUCOMI; -const SSE_rSqrtImpl<0x53> xRCP; -const SSE_rSqrtImpl<0x52> xRSQRT; -const SSE_SqrtImpl<0x51> xSQRT; +const SimdImpl_SS_SD<0x66,0x2e> xUCOMI; +const SimdImpl_rSqrt<0x53> xRCP; +const SimdImpl_rSqrt<0x52> xRSQRT; +const SimdImpl_Sqrt<0x51> xSQRT; -const SSEImpl_PSPD_SSSD<0x5f> xMAX; -const SSEImpl_PSPD_SSSD<0x5d> xMIN; -const SSEImpl_Shuffle<0xc6> xSHUF; +const SimdImpl_PSPD_SSSD<0x5f> xMAX; +const SimdImpl_PSPD_SSSD<0x5d> xMIN; +const SimdImpl_Shuffle<0xc6> xSHUF; + +// ------------------------------------------------------------------------ + +const SimdImpl_Compare xCMPEQ; +const SimdImpl_Compare xCMPLT; +const SimdImpl_Compare xCMPLE; +const SimdImpl_Compare xCMPUNORD; +const SimdImpl_Compare xCMPNE; +const SimdImpl_Compare xCMPNLT; +const SimdImpl_Compare xCMPNLE; +const SimdImpl_Compare xCMPORD; // ------------------------------------------------------------------------ // SSE Conversion Operations, as looney as they are. @@ -692,46 +703,43 @@ const SSEImpl_Shuffle<0xc6> xSHUF; // These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing // nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). // -const SSEImpl_DestRegForm<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; -const SSEImpl_DestRegForm<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; +const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; +const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; -const SSEImpl_DestRegForm<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; -const SSEImpl_DestRegForm<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; -const SSEImpl_DestRegForm<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; +const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; +const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; +const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; -const SSEImpl_DestRegForm<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; -const SSEImpl_DestRegForm<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; +const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; +const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; -const SSEImpl_DestRegForm<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; -const SSEImpl_DestRegForm<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; -const SSEImpl_DestRegForm<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; +const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; +const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; +const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; -const SSEImpl_DestRegForm<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; -const SSEImpl_DestRegForm<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; -const SSEImpl_DestRegForm<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; -const SSEImpl_DestRegForm<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; +const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; +const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; +const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; +const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; -const SSEImpl_DestRegForm<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; -const SSEImpl_DestRegForm<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; +const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; +const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; -const SSEImpl_DestRegForm<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; -const SSEImpl_DestRegForm<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; -const SSEImpl_DestRegForm<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; -const SSEImpl_DestRegForm<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; +const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; +const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; +const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; +const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; -const SSEImpl_DestRegForm<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; -const SSEImpl_DestRegForm<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; +const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; +const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; // ------------------------------------------------------------------------ -const SSECompareImpl xCMPEQ; -const SSECompareImpl xCMPLT; -const SSECompareImpl xCMPLE; -const SSECompareImpl xCMPUNORD; -const SSECompareImpl xCMPNE; -const SSECompareImpl xCMPNLT; -const SSECompareImpl xCMPNLE; -const SSECompareImpl xCMPORD; +const SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL; +const SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL; + +const SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD; +const SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB; ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index dbbfb9874c..dd1b1d5299 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -38,16 +38,16 @@ namespace x86Emitter // ------------------------------------------------------------------------ // Group 1 Instruction Class - extern const Internal::G1LogicImpl_PlusSSE xAND; - extern const Internal::G1LogicImpl_PlusSSE xOR; - extern const Internal::G1LogicImpl_PlusSSE xXOR; + extern const Internal::xImpl_G1Logic xAND; + extern const Internal::xImpl_G1Logic xOR; + extern const Internal::xImpl_G1Logic xXOR; - extern const Internal::G1ArithmeticImpl_PlusSSE xADD; - extern const Internal::G1ArithmeticImpl_PlusSSE xSUB; - extern const Internal::G1CompareImpl_PlusSSE xCMP; + extern const Internal::xImpl_G1Arith xADD; + extern const Internal::xImpl_G1Arith xSUB; + extern const Internal::xImpl_G1Compare xCMP; - extern const Internal::Group1ImplAll xADC; - extern const Internal::Group1ImplAll xSBB; + extern const Internal::xImpl_Group1 xADC; + extern const Internal::xImpl_Group1 xSBB; // ------------------------------------------------------------------------ // Group 2 Instruction Class @@ -74,8 +74,8 @@ namespace x86Emitter extern const Internal::Group3ImplAll xNEG; extern const Internal::Group3ImplAll xUMUL; extern const Internal::Group3ImplAll xUDIV; - extern const Internal::G3Impl_PlusSSE xDIV; - extern const Internal::iMul_PlusSSE xMUL; + extern const Internal::xImpl_Group3 xDIV; + extern const Internal::xImpl_iMul xMUL; extern const Internal::IncDecImplAll xINC; extern const Internal::IncDecImplAll xDEC; @@ -435,65 +435,73 @@ namespace x86Emitter // ------------------------------------------------------------------------ - extern const Internal::PLogicImplAll<0xdb> xPAND; - extern const Internal::PLogicImplAll<0xdf> xPANDN; - extern const Internal::PLogicImplAll<0xeb> xPOR; - extern const Internal::PLogicImplAll<0xef> xPXOR; + extern const Internal::SimdImpl_PackedLogic<0xdb> xPAND; + extern const Internal::SimdImpl_PackedLogic<0xdf> xPANDN; + extern const Internal::SimdImpl_PackedLogic<0xeb> xPOR; + extern const Internal::SimdImpl_PackedLogic<0xef> xPXOR; - extern const Internal::SSEAndNotImpl<0x55> xANDN; + extern const Internal::SimdImpl_AndNot<0x55> xANDN; - extern const Internal::SSEImpl_SS_SD<0x66,0x2e> xUCOMI; - extern const Internal::SSE_rSqrtImpl<0x53> xRCP; - extern const Internal::SSE_rSqrtImpl<0x52> xRSQRT; - extern const Internal::SSE_SqrtImpl<0x51> xSQRT; + extern const Internal::SimdImpl_SS_SD<0x66,0x2e> xUCOMI; + extern const Internal::SimdImpl_rSqrt<0x53> xRCP; + extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT; + extern const Internal::SimdImpl_Sqrt<0x51> xSQRT; - extern const Internal::SSEImpl_PSPD_SSSD<0x5f> xMAX; - extern const Internal::SSEImpl_PSPD_SSSD<0x5d> xMIN; - extern const Internal::SSEImpl_Shuffle<0xc6> xSHUF; + extern const Internal::SimdImpl_PSPD_SSSD<0x5f> xMAX; + extern const Internal::SimdImpl_PSPD_SSSD<0x5d> xMIN; + extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF; // ------------------------------------------------------------------------ - extern const Internal::SSECompareImpl xCMPEQ; - extern const Internal::SSECompareImpl xCMPLT; - extern const Internal::SSECompareImpl xCMPLE; - extern const Internal::SSECompareImpl xCMPUNORD; - extern const Internal::SSECompareImpl xCMPNE; - extern const Internal::SSECompareImpl xCMPNLT; - extern const Internal::SSECompareImpl xCMPNLE; - extern const Internal::SSECompareImpl xCMPORD; + extern const Internal::SimdImpl_Compare xCMPEQ; + extern const Internal::SimdImpl_Compare xCMPLT; + extern const Internal::SimdImpl_Compare xCMPLE; + extern const Internal::SimdImpl_Compare xCMPUNORD; + extern const Internal::SimdImpl_Compare xCMPNE; + extern const Internal::SimdImpl_Compare xCMPNLT; + extern const Internal::SimdImpl_Compare xCMPNLE; + extern const Internal::SimdImpl_Compare xCMPORD; // ------------------------------------------------------------------------ // OMG Evil. I went cross-eyed an hour ago doing this. // - extern const Internal::SSEImpl_DestRegForm<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; - extern const Internal::SSEImpl_DestRegForm<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; - extern const Internal::SSEImpl_DestRegForm<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; - extern const Internal::SSEImpl_DestRegForm<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; - extern const Internal::SSEImpl_DestRegForm<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; + extern const Internal::SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; + extern const Internal::SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; - extern const Internal::SSEImpl_DestRegForm<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; - extern const Internal::SSEImpl_DestRegForm<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + extern const Internal::SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; - extern const Internal::SSEImpl_DestRegForm<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; - extern const Internal::SSEImpl_DestRegForm<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; - extern const Internal::SSEImpl_DestRegForm<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + extern const Internal::SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; - extern const Internal::SSEImpl_DestRegForm<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; - extern const Internal::SSEImpl_DestRegForm<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; - extern const Internal::SSEImpl_DestRegForm<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; - extern const Internal::SSEImpl_DestRegForm<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; - extern const Internal::SSEImpl_DestRegForm<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; - extern const Internal::SSEImpl_DestRegForm<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; - extern const Internal::SSEImpl_DestRegForm<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; - extern const Internal::SSEImpl_DestRegForm<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; - extern const Internal::SSEImpl_DestRegForm<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; - extern const Internal::SSEImpl_DestRegForm<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + extern const Internal::SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; + extern const Internal::SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; - extern const Internal::SSEImpl_DestRegForm<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; - extern const Internal::SSEImpl_DestRegForm<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; + + // ------------------------------------------------------------------------ + + extern const Internal::SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL; + extern const Internal::SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL; + + extern const Internal::SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD; + extern const Internal::SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB; } diff --git a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp index e3db6e0e48..7bbca83e33 100644 --- a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp @@ -48,132 +48,51 @@ emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \ emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); } +#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \ + emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ + emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ + emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } + +#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \ + emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ + emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ + emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \ + emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); } + +#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \ + emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } + DEFINE_LEGACY_LOGIC_OPCODE( AND ) DEFINE_LEGACY_LOGIC_OPCODE( ANDN ) DEFINE_LEGACY_LOGIC_OPCODE( OR ) DEFINE_LEGACY_LOGIC_OPCODE( XOR ) +DEFINE_LEGACY_SHIFT_OPCODE( SLL ) +DEFINE_LEGACY_SHIFT_OPCODE( SRL ) -/* psllq r64 to r64 */ -emitterT void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF30F ); - ModRM( 3, to, from ); -} +DEFINE_LEGACY_ARITHMETIC( ADD, B ) +DEFINE_LEGACY_ARITHMETIC( ADD, W ) +DEFINE_LEGACY_ARITHMETIC( ADD, D ) +DEFINE_LEGACY_ARITHMETIC( ADD, Q ) +DEFINE_LEGACY_ARITHMETIC( ADD, SB ) +DEFINE_LEGACY_ARITHMETIC( ADD, SW ) +DEFINE_LEGACY_ARITHMETIC( ADD, USB ) +DEFINE_LEGACY_ARITHMETIC( ADD, USW ) -/* psllq m64 to r64 */ -emitterT void PSLLQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xF30F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* psllq imm8 to r64 */ -emitterT void PSLLQItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x730F ); - ModRM( 3, 6, to); - write8( from ); -} - -/* psrlq r64 to r64 */ -emitterT void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD30F ); - ModRM( 3, to, from ); -} - -/* psrlq m64 to r64 */ -emitterT void PSRLQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xD30F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* psrlq imm8 to r64 */ -emitterT void PSRLQItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x730F ); - ModRM( 3, 2, to); - write8( from ); -} - -/* paddusb r64 to r64 */ -emitterT void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xDC0F ); - ModRM( 3, to, from ); -} - -/* paddusb m64 to r64 */ -emitterT void PADDUSBMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xDC0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* paddusw r64 to r64 */ -emitterT void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xDD0F ); - ModRM( 3, to, from ); -} - -/* paddusw m64 to r64 */ -emitterT void PADDUSWMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xDD0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* paddb r64 to r64 */ -emitterT void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFC0F ); - ModRM( 3, to, from ); -} - -/* paddb m64 to r64 */ -emitterT void PADDBMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFC0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* paddw r64 to r64 */ -emitterT void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFD0F ); - ModRM( 3, to, from ); -} - -/* paddw m64 to r64 */ -emitterT void PADDWMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFD0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -/* paddd r64 to r64 */ -emitterT void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFE0F ); - ModRM( 3, to, from ); -} - -/* paddd m64 to r64 */ -emitterT void PADDDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFE0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} +DEFINE_LEGACY_ARITHMETIC( SUB, B ) +DEFINE_LEGACY_ARITHMETIC( SUB, W ) +DEFINE_LEGACY_ARITHMETIC( SUB, D ) +DEFINE_LEGACY_ARITHMETIC( SUB, Q ) +DEFINE_LEGACY_ARITHMETIC( SUB, SB ) +DEFINE_LEGACY_ARITHMETIC( SUB, SW ) +DEFINE_LEGACY_ARITHMETIC( SUB, USB ) +DEFINE_LEGACY_ARITHMETIC( SUB, USW ) /* emms */ emitterT void EMMS() @@ -181,98 +100,6 @@ emitterT void EMMS() write16( 0x770F ); } -emitterT void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xEC0F ); - ModRM( 3, to, from ); -} - -emitterT void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xED0F ); - ModRM( 3, to, from ); -} - -// paddq m64 to r64 (sse2 only?) -emitterT void PADDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xD40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// paddq r64 to r64 (sse2 only?) -emitterT void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD40F ); - ModRM( 3, to, from ); -} - -emitterT void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xE80F ); - ModRM( 3, to, from ); -} - -emitterT void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xE90F ); - ModRM( 3, to, from ); -} - - -emitterT void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF80F ); - ModRM( 3, to, from ); -} - -emitterT void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF90F ); - ModRM( 3, to, from ); -} - -emitterT void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFA0F ); - ModRM( 3, to, from ); -} - -emitterT void PSUBDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFA0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void PSUBUSBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD80F ); - ModRM( 3, to, from ); -} - -emitterT void PSUBUSWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD90F ); - ModRM( 3, to, from ); -} - -// psubq m64 to r64 (sse2 only?) -emitterT void PSUBQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xFB0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// psubq r64 to r64 (sse2 only?) -emitterT void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xFB0F ); - ModRM( 3, to, from ); -} - // pmuludq m64 to r64 (sse2 only?) emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { @@ -338,46 +165,6 @@ emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from ) write32( MEMADDR(from, 4) ); } -emitterT void PSRLWItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x710F ); - ModRM( 3, 2 , to ); - write8( from ); -} - -emitterT void PSRLDItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x720F ); - ModRM( 3, 2 , to ); - write8( from ); -} - -emitterT void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xD20F ); - ModRM( 3, to, from ); -} - -emitterT void PSLLWItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x710F ); - ModRM( 3, 6 , to ); - write8( from ); -} - -emitterT void PSLLDItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x720F ); - ModRM( 3, 6 , to ); - write8( from ); -} - -emitterT void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF20F ); - ModRM( 3, to, from ); -} - emitterT void PSRAWItoR( x86MMXRegType to, u8 from ) { write16( 0x710F ); diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 8c259b5385..e239af26be 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -34,29 +34,6 @@ using namespace x86Emitter; ModRM( 0, to, DISP32 ), \ write32( MEMADDR(from, 4 + overb) ) -#define SSERtoM( code, overb ) \ - assert( from < iREGCNT_XMM), \ - RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) - -#define SSE_SS_MtoR( code, overb ) \ - assert( to < iREGCNT_XMM ), \ - write8( 0xf3 ), \ - RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) - -#define SSE_SS_RtoM( code, overb ) \ - assert( from < iREGCNT_XMM), \ - write8( 0xf3 ), \ - RexR(0, from), \ - write16( code ), \ - ModRM( 0, from, DISP32 ), \ - write32( MEMADDR(to, 4 + overb) ) - #define SSERtoR( code ) \ assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \ RexRB(0, to, from), \ @@ -95,14 +72,7 @@ using namespace x86Emitter; RexR(0, to), \ write16( code ), \ ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) \ - -#define SSE_SD_RtoR( code ) \ - assert( to < iREGCNT_XMM && from < iREGCNT_XMM) , \ - write8( 0xf2 ), \ - RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) + write32( MEMADDR(from, 4 + overb) ) #define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \ emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \ @@ -220,21 +190,20 @@ emitterT void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { emitterT void SSE2_MOVSD_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVSDZX( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } emitterT void SSE2_MOVSD_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVSD( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } -emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVL.PS( xRegisterSSE(to), (void*)from ); } +emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVL.PS( (void*)to, xRegisterSSE(from) ); } +emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVL.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } +emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVL.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } -emitterT void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVL.PS( xRegisterSSE(to), (void*)from ); } -emitterT void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVL.PS( (void*)to, xRegisterSSE(from) ); } -emitterT void SSE_MOVLPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVL.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } -emitterT void SSE_MOVLPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVL.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } - -emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVH.PS( xRegisterSSE(to), (void*)from ); } -emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.PS( (void*)to, xRegisterSSE(from) ); } -emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } -emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } +emitterT void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ) { xMOVH.PS( xRegisterSSE(to), (void*)from ); } +emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.PS( (void*)to, xRegisterSSE(from) ); } +emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } +emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); } emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } @@ -277,47 +246,6 @@ emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// -//emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x2a0f, 0 ); } -// emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { SSERtoR( 0x2a0f ); } -// -// emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { SSEMtoR( 0x2d0f, 0 ); } -// emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { SSERtoR( 0x2d0f ); } - -/* -emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { write8(0xf3); SSEMtoR(0x2c0f, 0); } -emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) -{ - write8(0xf3); - RexRB(0, to, from); - write16(0x2c0f); - ModRM(3, to, from); -} -*/ - -/*emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x2a0f, 0); } -emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) -{ - write8(0xf3); - RexRB(0, to, from); - write16(0x2a0f); - ModRM(3, to, from); -} - -emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } -emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } -*/ - -/*emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } -emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } - -emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x5b0f, 0 ); } -emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5b0f ); } - -emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x5b0f ); } -emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x5b0f ); } - -emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ write8(0xf3); SSERtoR(0x5b0f); }*/ - /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ @@ -389,19 +317,6 @@ emitterT void SSE_LDMXCSR( uptr from ) { write32( MEMADDR(from, 4) ); } -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PADDB,PADDW,PADDD : Add Packed Integers * -//********************************************************************************** -emitterT void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFC0F ); } -emitterT void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFC0F ); } -emitterT void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFD0F ); } -emitterT void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFD0F ); } -emitterT void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFE0F ); } -emitterT void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFE0F ); } -emitterT void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD40F ); } -emitterT void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ) { SSEMtoR66( 0xD40F ); } - /////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PCMPxx: Compare Packed Integers * @@ -426,65 +341,8 @@ emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } -//////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PSUBx: Subtract Packed Integers * -//********************************************************************************** -emitterT void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF80F ); } -emitterT void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF80F ); } -emitterT void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xF90F ); } -emitterT void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xF90F ); } -emitterT void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFA0F ); } -emitterT void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFA0F ); } -emitterT void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0xFB0F ); } -emitterT void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0xFB0F ); } - /////////////////////////////////////////////////////////////////////////////////////// -// shift right logical - -emitterT void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD10F); } -emitterT void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD10F); } -emitterT void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x710F ); - ModRM( 3, 2 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD20F); } -emitterT void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD20F); } -emitterT void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x720F ); - ModRM( 3, 2 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xD30F); } -emitterT void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xD30F); } -emitterT void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x730F ); - ModRM( 3, 2 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x730F ); - ModRM( 3, 3 , to ); - write8( imm8 ); -} - // shift right arithmetic emitterT void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } @@ -509,50 +367,6 @@ emitterT void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) write8( imm8 ); } -// shift left logical - -emitterT void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF10F); } -emitterT void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF10F); } -emitterT void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x710F ); - ModRM( 3, 6 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF20F); } -emitterT void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF20F); } -emitterT void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x720F ); - ModRM( 3, 6 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF30F); } -emitterT void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xF30F); } -emitterT void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x730F ); - ModRM( 3, 6 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x730F ); - ModRM( 3, 7 , to ); - write8( imm8 ); -} - emitterT void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } emitterT void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } @@ -565,28 +379,6 @@ emitterT void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR emitterT void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } emitterT void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } -emitterT void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEC0F ); } -emitterT void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEC0F ); } - -emitterT void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xED0F ); } -emitterT void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xED0F ); } - -emitterT void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE80F ); } -emitterT void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE80F ); } - -emitterT void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xE90F ); } -emitterT void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xE90F ); } - -emitterT void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD80F ); } -emitterT void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD80F ); } -emitterT void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xD90F ); } -emitterT void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xD90F ); } - -emitterT void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDC0F ); } -emitterT void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDC0F ); } -emitterT void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDD0F ); } -emitterT void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDD0F ); } - //**********************************************************************************/ //PACKSSWB,PACKSSDW: Pack Saturate Signed Word //********************************************************************************** diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index a9604653b6..f788085771 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -693,6 +693,10 @@ namespace x86Emitter extern void EmitSibMagic( uint regfield, const ModSibBase& info ); // ------------------------------------------------------------------------ + + template< typename T > bool Is8BitOp() { return sizeof(T) == 1; } + template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite( 0x66 ); } + #include "implement/xmm/movqss.h" #include "implement/group1.h" #include "implement/group2.h" From 50c4a95c060fdc870f34437a20bec46575847895 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 20 Apr 2009 19:37:41 +0000 Subject: [PATCH 117/143] microVU: added support for logging complete micro programs saved to the files microVU0.txt/microVU1.txt (in the logs folder) note: currently the code only lists the opcodes without the operands. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1033 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/x86/microVU.cpp | 34 +- pcsx2/x86/microVU.h | 21 +- pcsx2/x86/microVU_Analyze.inl | 32 +- pcsx2/x86/microVU_Compile.inl | 16 +- pcsx2/x86/microVU_Execute.inl | 9 +- pcsx2/x86/microVU_Log.inl | 91 +++++ pcsx2/x86/microVU_Lower.inl | 367 ++++++++++++--------- pcsx2/x86/microVU_Misc.h | 18 +- pcsx2/x86/microVU_Tables.inl | 338 ++++++++++++++++++- pcsx2/x86/microVU_Upper.inl | 361 ++++++++++---------- 11 files changed, 902 insertions(+), 389 deletions(-) create mode 100644 pcsx2/x86/microVU_Log.inl diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index cd443e42ad..380a14991c 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2509,6 +2509,10 @@ RelativePath="..\..\x86\microVU_Execute.inl" > + + diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index 9e362768af..ae11ad3b1a 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -28,6 +28,7 @@ PCSX2_ALIGNED16(microVU microVU0); PCSX2_ALIGNED16(microVU microVU1); +FILE *mVUlogFile[2] = {NULL, NULL}; declareAllVariables // Declares All Global Variables :D //------------------------------------------------------------------ @@ -44,7 +45,8 @@ microVUt(void) mVUinit(VURegs* vuRegsPtr) { mVU->progSize = (vuIndex ? 0x4000 : 0x1000) / 4; mVU->cache = NULL; memset(&mVU->prog, 0, sizeof(mVU->prog)); - mVUlog((vuIndex) ? "microVU1: init" : "microVU0: init"); + mVUprint((vuIndex) ? "microVU1: init" : "microVU0: init"); + mVUsetupLog(); mVUreset(); } @@ -55,7 +57,7 @@ microVUt(void) mVUreset() { microVU* mVU = mVUx; mVUclose(); // Close - mVUlog((vuIndex) ? "microVU1: reset" : "microVU0: reset"); + mVUprint((vuIndex) ? "microVU1: reset" : "microVU0: reset"); // Dynarec Cache mVU->cache = SysMmapEx((vuIndex ? 0x1e840000 : 0x0e840000), mVU->cacheSize, 0, (vuIndex ? "Micro VU1" : "Micro VU0")); @@ -95,7 +97,7 @@ microVUt(void) mVUreset() { microVUt(void) mVUclose() { microVU* mVU = mVUx; - mVUlog((vuIndex) ? "microVU1: close" : "microVU0: close"); + mVUprint((vuIndex) ? "microVU1: close" : "microVU0: close"); if ( mVU->cache ) { HostSys::Munmap( mVU->cache, mVU->cacheSize ); mVU->cache = NULL; } @@ -124,7 +126,8 @@ microVUt(void) mVUclear(u32 addr, u32 size) { //------------------------------------------------------------------ // Clears program data (Sets used to 1 because calling this function implies the program will be used at least once) -__forceinline void mVUclearProg(microVU* mVU, int progIndex) { +microVUt(void) mVUclearProg(int progIndex) { + microVU* mVU = mVUx; mVU->prog.prog[progIndex].used = 1; mVU->prog.prog[progIndex].x86ptr = mVU->prog.prog[progIndex].x86start; for (u32 i = 0; i < (mVU->progSize / 2); i++) { @@ -133,15 +136,19 @@ __forceinline void mVUclearProg(microVU* mVU, int progIndex) { } // Caches Micro Program -__forceinline void mVUcacheProg(microVU* mVU, int progIndex) { +microVUt(void) mVUcacheProg(int progIndex) { + microVU* mVU = mVUx; memcpy_fast(mVU->prog.prog[progIndex].data, mVU->regs->Micro, mVU->microSize); + mVUdumpProg(progIndex); } // Finds the least used program, (if program list full clears and returns an old program; if not-full, returns free program) -__forceinline int mVUfindLeastUsedProg(microVU* mVU) { +microVUt(int) mVUfindLeastUsedProg() { + microVU* mVU = mVUx; if (mVU->prog.total < mVU->prog.max) { mVU->prog.total++; - mVUcacheProg(mVU, mVU->prog.total); // Cache Micro Program + mVUcacheProg(mVU->prog.total); // Cache Micro Program + Console::Notice("microVU: Program Total = %d", params mVU->prog.total); return mVU->prog.total; } else { @@ -153,27 +160,28 @@ __forceinline int mVUfindLeastUsedProg(microVU* mVU) { j = i; } } - mVUclearProg(mVU, j); // Clear old data if overwriting old program - mVUcacheProg(mVU, j); // Cache Micro Program - mVUlog("microVU: Program Cache got Full!"); + mVUclearProg(j); // Clear old data if overwriting old program + mVUcacheProg(j); // Cache Micro Program + Console::Notice("microVU: Program Cache got Full!"); return j; } } // Searches for Cached Micro Program and sets prog.cur to it (returns 1 if program found, else returns 0) -__forceinline int mVUsearchProg(microVU* mVU) { +microVUt(int) mVUsearchProg() { + microVU* mVU = mVUx; if (mVU->prog.cleared) { // If cleared, we need to search for new program for (int i = 0; i <= mVU->prog.total; i++) { //if (i == mVU->prog.cur) continue; // We can skip the current program. (ToDo: Verify that games don't clear, and send the same microprogram :/) if (!memcmp_mmx(mVU->prog.prog[i].data, mVU->regs->Micro, mVU->microSize)) { - //if (i == mVU->prog.cur) { mVUlog("microVU: Same micro program sent!"); } + //if (i == mVU->prog.cur) { mVUprint("microVU: Same micro program sent!"); } mVU->prog.cur = i; mVU->prog.cleared = 0; mVU->prog.prog[i].used++; return 1; } } - mVU->prog.cur = mVUfindLeastUsedProg(mVU); // If cleared and program not found, make a new program instance + mVU->prog.cur = mVUfindLeastUsedProg(); // If cleared and program not found, make a new program instance mVU->prog.cleared = 0; return 0; } diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index c163025bb1..0447c68676 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -17,7 +17,8 @@ */ #pragma once -//#define mVUdebug // Prints Extra Info to Console +#define mVUdebug // Prints Extra Info to Console +//#define mVUlogProg // Dumps MicroPrograms into microVU0.txt/microVU1.txt #include "Common.h" #include "VU.h" #include "GS.h" @@ -48,16 +49,16 @@ public: } microBlock* search(microRegInfo* pState) { if (listSize < 0) return NULL; - if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) + //if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) for (int i = 0; i <= listSize; i++) { if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo))) return &blockList[i]; } - } + /*} else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) for (int i = 0; i <= listSize; i++) { if ((blockList[i].pState.q == pState->q) && (blockList[i].pState.p == pState->p)) { return &blockList[i]; } } - } + }*/ return NULL; } }; @@ -118,6 +119,9 @@ extern PCSX2_ALIGNED16(microVU microVU1); extern void (*mVU_UPPER_OPCODE[64])( VURegs* VU, s32 info ); extern void (*mVU_LOWER_OPCODE[128])( VURegs* VU, s32 info ); +// Used for logging microPrograms +extern FILE *mVUlogFile[2]; + // Main Functions microVUt(void) mVUinit(VURegs*); microVUt(void) mVUreset(); @@ -125,10 +129,10 @@ microVUt(void) mVUclose(); microVUt(void) mVUclear(u32, u32); // Private Functions -__forceinline void mVUclearProg(microVU* mVU, int progIndex); -__forceinline int mVUfindLeastUsedProg(microVU* mVU); -__forceinline int mVUsearchProg(microVU* mVU); -__forceinline void mVUcacheProg(microVU* mVU, int progIndex); +microVUt(void) mVUclearProg(microVU* mVU, int progIndex); +microVUt(int) mVUfindLeastUsedProg(microVU* mVU); +microVUt(int) mVUsearchProg(microVU* mVU); +microVUt(void) mVUcacheProg(microVU* mVU, int progIndex); void* __fastcall mVUexecuteVU0(u32 startPC, u32 cycles); void* __fastcall mVUexecuteVU1(u32 startPC, u32 cycles); @@ -140,6 +144,7 @@ typedef void (*mVUrecCall)(u32, u32) __attribute__((__fastcall)); // Not sure if // Include all the *.inl files (Needed because C++ sucks with templates and *.cpp files) #include "microVU_Misc.inl" +#include "microVU_Log.inl" #include "microVU_Analyze.inl" #include "microVU_Alloc.inl" #include "microVU_Upper.inl" diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 506144262b..0605559741 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -51,7 +51,7 @@ microVUt(void) mVUanalyzeFMAC1(int Fd, int Fs, int Ft) { microVU* mVU = mVUx; - //mVUlog("microVU: FMAC1 Opcode"); + //mVUprint("microVU: FMAC1 Opcode"); mVUinfo |= _doStatus; analyzeReg1(Fs); analyzeReg1(Ft); @@ -64,7 +64,7 @@ microVUt(void) mVUanalyzeFMAC1(int Fd, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC2(int Fs, int Ft) { microVU* mVU = mVUx; - //mVUlog("microVU: FMAC2 Opcode"); + //mVUprint("microVU: FMAC2 Opcode"); analyzeReg1(Fs); analyzeReg2(Ft); } @@ -84,7 +84,7 @@ microVUt(void) mVUanalyzeFMAC2(int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC3(int Fd, int Fs, int Ft) { microVU* mVU = mVUx; - //mVUlog("microVU: FMAC3 Opcode"); + //mVUprint("microVU: FMAC3 Opcode"); mVUinfo |= _doStatus; analyzeReg1(Fs); analyzeReg3(Ft); @@ -114,7 +114,7 @@ microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { microVUt(void) mVUanalyzeIALU1(int Id, int Is, int It) { microVU* mVU = mVUx; - //mVUlog("microVU: IALU1 Opcode"); + //mVUprint("microVU: IALU1 Opcode"); if (!Id) { mVUinfo |= _isNOP; } analyzeVIreg1(Is); analyzeVIreg1(It); @@ -123,7 +123,7 @@ microVUt(void) mVUanalyzeIALU1(int Id, int Is, int It) { microVUt(void) mVUanalyzeIALU2(int Is, int It) { microVU* mVU = mVUx; - //mVUlog("microVU: IALU2 Opcode"); + //mVUprint("microVU: IALU2 Opcode"); if (!It) { mVUinfo |= _isNOP; } analyzeVIreg1(Is); analyzeVIreg2(It, 1); @@ -145,7 +145,7 @@ microVUt(void) mVUanalyzeIALU2(int Is, int It) { microVUt(void) mVUanalyzeMR32(int Fs, int Ft) { microVU* mVU = mVUx; - mVUlog("microVU: MR32 Opcode"); + mVUprint("microVU: MR32 Opcode"); if (!Ft) { mVUinfo |= _isNOP; } analyzeReg6(Fs); analyzeReg2(Ft); @@ -171,7 +171,7 @@ microVUt(void) mVUanalyzeMR32(int Fs, int Ft) { microVUt(void) mVUanalyzeFDIV(int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { microVU* mVU = mVUx; - mVUlog("microVU: DIV Opcode"); + mVUprint("microVU: DIV Opcode"); analyzeReg5(Fs, Fsf); analyzeReg5(Ft, Ftf); analyzeQreg(xCycles); @@ -183,14 +183,14 @@ microVUt(void) mVUanalyzeFDIV(int Fs, int Fsf, int Ft, int Ftf, u8 xCycles) { microVUt(void) mVUanalyzeEFU1(int Fs, int Fsf, u8 xCycles) { microVU* mVU = mVUx; - mVUlog("microVU: EFU Opcode"); + mVUprint("microVU: EFU Opcode"); analyzeReg5(Fs, Fsf); analyzePreg(xCycles); } microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { microVU* mVU = mVUx; - mVUlog("microVU: EFU Opcode"); + mVUprint("microVU: EFU Opcode"); analyzeReg1(Fs); analyzePreg(xCycles); } @@ -201,7 +201,7 @@ microVUt(void) mVUanalyzeEFU2(int Fs, u8 xCycles) { microVUt(void) mVUanalyzeMFP(int Ft) { microVU* mVU = mVUx; - mVUlog("microVU: MFP Opcode"); + mVUprint("microVU: MFP Opcode"); if (!Ft) { mVUinfo |= _isNOP; } analyzeReg2(Ft); } @@ -212,7 +212,7 @@ microVUt(void) mVUanalyzeMFP(int Ft) { microVUt(void) mVUanalyzeLQ(int Ft, int Is, bool writeIs) { microVU* mVU = mVUx; - //mVUlog("microVU: LQ Opcode"); + //mVUprint("microVU: LQ Opcode"); analyzeVIreg1(Is); analyzeReg2(Ft); if (!Ft) { mVUinfo |= (writeIs && Is) ? _noWriteVF : _isNOP; } @@ -238,14 +238,14 @@ microVUt(void) mVUanalyzeSQ(int Fs, int It, bool writeIt) { microVUt(void) mVUanalyzeR1(int Fs, int Fsf) { microVU* mVU = mVUx; - mVUlog("microVU: R-reg Opcode"); + mVUprint("microVU: R-reg Opcode"); analyzeReg5(Fs, Fsf); analyzeRreg(); } microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVU* mVU = mVUx; - mVUlog("microVU: R-reg Opcode"); + mVUprint("microVU: R-reg Opcode"); if (!Ft) { mVUinfo |= ((canBeNOP) ? _isNOP : _noWriteVF); } analyzeReg2(Ft); analyzeRreg(); @@ -257,7 +257,7 @@ microVUt(void) mVUanalyzeR2(int Ft, bool canBeNOP) { microVUt(void) mVUanalyzeSflag(int It) { microVU* mVU = mVUx; - mVUlog("microVU: Sflag Opcode"); + mVUprint("microVU: Sflag Opcode"); if (!It) { mVUinfo |= _isNOP; } else { // Sets _isSflag at instruction that FSxxx opcode reads it's status flag from mVUinfo |= _swapOps; @@ -273,7 +273,7 @@ microVUt(void) mVUanalyzeSflag(int It) { microVUt(void) mVUanalyzeFSSET() { microVU* mVU = mVUx; mVUinfo |= _isFSSET; - mVUlog("microVU: FSSET Opcode"); + mVUprint("microVU: FSSET Opcode"); // mVUinfo &= ~_doStatus; // Note: I'm not entirely sure if the non-sticky flags // should be taken from the current upper instruction @@ -287,7 +287,7 @@ microVUt(void) mVUanalyzeFSSET() { microVUt(void) mVUanalyzeMflag(int Is, int It) { microVU* mVU = mVUx; - mVUlog("microVU: Mflag Opcode"); + mVUprint("microVU: Mflag Opcode"); if (!It) { mVUinfo |= _isNOP; } else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) mVUinfo |= _swapOps; diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index a7d67983ef..09a81815f5 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -25,7 +25,7 @@ #define branchCase(JMPcc, nJMPcc) \ mVUsetupBranch(bStatus, bMac); \ - mVUlog("mVUcompile branchCase"); \ + mVUprint("mVUcompile branchCase"); \ CMP16ItoM((uptr)&mVU->branch, 0); \ incPC2(1); \ pBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \ @@ -142,7 +142,7 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { // Recompiles Code for Proper Flags and Q/P regs on Block Linkings microVUt(void) mVUsetupBranch(int* bStatus, int* bMac) { microVU* mVU = mVUx; - mVUlog("mVUsetupBranch"); + mVUprint("mVUsetupBranch"); PUSH32R(gprR); // Backup gprR MOV32RtoM((uptr)&mVU->espBackup, gprESP); @@ -268,7 +268,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { microVU* mVU = mVUx; u8* thisPtr = x86Ptr; - if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUlog("microVU: invalid startPC"); } + if (startPC > ((vuIndex) ? 0x3fff : 0xfff)) { mVUprint("microVU: invalid startPC"); } startPC &= (vuIndex ? 0x3ff8 : 0xff8); // Searches for Existing Compiled Block (if found, then returns; else, compile) @@ -336,7 +336,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { case 8: branchCase(JNZ32, JZ32); // IBNEQ case 1: case 2: // B/BAL - mVUlog("mVUcompile B/BAL"); + mVUprint("mVUcompile B/BAL"); incPC(-3); // Go back to branch opcode (to get branch imm addr) mVUsetupBranch(bStatus, bMac); @@ -348,7 +348,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { return thisPtr; case 9: case 10: // JR/JALR - mVUlog("mVUcompile JR/JALR"); + mVUprint("mVUcompile JR/JALR"); memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); mVUsetupBranch(bStatus, bMac); @@ -364,7 +364,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { return thisPtr; } // Conditional Branches - mVUlog("mVUcompile conditional branch"); + mVUprint("mVUcompile conditional branch"); if (pBlock) { // Branch non-taken has already been compiled incPC(-3); // Go back to branch opcode (to get branch imm addr) // Check if branch-block has already been compiled @@ -386,8 +386,8 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { return thisPtr; } } - mVUlog("mVUcompile ebit"); - if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { mVUlog("microVU: Possible infinite compiling loop!"); } + mVUprint("mVUcompile ebit"); + if (x == (vuIndex?(0x3fff/8):(0xfff/8))) { mVUprint("microVU: Possible infinite compiling loop!"); } // Do E-bit end stuff here mVUendProgram(); diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index 6714813aec..ebb5b3c633 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -15,6 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ + #pragma once #ifdef PCSX2_MICROVU @@ -22,7 +23,7 @@ // Dispatcher Functions //------------------------------------------------------------------ -void testFunction() { mVUlog("microVU: Entered Execution Mode"); } +void testFunction() { mVUprint("microVU: Entered Execution Mode"); } // Generates the code for entering recompiled blocks microVUt(void) mVUdispatcherA() { @@ -128,9 +129,9 @@ microVUt(void) mVUdispatcherB() { microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVU* mVU = mVUx; - //mVUlog("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); + //mVUprint("microVU%x: startPC = 0x%x, cycles = 0x%x", params vuIndex, startPC, cycles); - mVUsearchProg(mVU); // Find and set correct program + mVUsearchProg(); // Find and set correct program mVU->cycles = cycles; mVU->totalCycles = cycles; @@ -145,7 +146,7 @@ microVUt(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) { microVUt(void) mVUcleanUp() { microVU* mVU = mVUx; - mVUlog("microVU: Program exited successfully!"); + //mVUprint("microVU: Program exited successfully!"); mVUcurProg.x86ptr = x86Ptr; mVUcacheCheck(x86Ptr, mVUcurProg.x86start, (uptr)(mVUcurProg.x86end - mVUcurProg.x86start)); } diff --git a/pcsx2/x86/microVU_Log.inl b/pcsx2/x86/microVU_Log.inl new file mode 100644 index 0000000000..c04ea798a2 --- /dev/null +++ b/pcsx2/x86/microVU_Log.inl @@ -0,0 +1,91 @@ +/* Pcsx2 - Pc Ps2 Emulator +* Copyright (C) 2009 Pcsx2-Playground Team +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +*/ + +#pragma once +#ifdef PCSX2_MICROVU + +microVUt(void) __mVUsetupLog() { + if (!vuIndex) { if (!mVUlogFile[0]) mVUlogFile[0] = fopen(LOGS_DIR "\\microVU0.txt", "w"); } + else { if (!mVUlogFile[1]) mVUlogFile[1] = fopen(LOGS_DIR "\\microVU1.txt", "w"); } +} + +// writes text directly to the microVU.txt, no newlines appended. +microVUx(void) __mVULog(const char* fmt, ...) { + char tmp[2024]; + va_list list; + + va_start(list, fmt); + + // concatenate the log message after the prefix: + int length = vsprintf(tmp, fmt, list); + va_end(list); + + if (mVUlogFile[vuIndex]) { + fputs(tmp, mVUlogFile[vuIndex]); + //fputs("\n", mVUlogFile[vuIndex]); + fflush(mVUlogFile[vuIndex]); + } +} + +#define commaIf() { if (bitX[6]) { mVUlog(","); bitX[6] = 0; } } + +microVUt(void) __mVUdumpProgram(int progIndex) { + microVU* mVU = mVUx; + bool bitX[7]; + mVUlog("*********************\n", progIndex); + mVUlog("* Micro-Program #%02d *\n", progIndex); + mVUlog("*********************\n\n", progIndex); + for (u32 i = 0; i < mVU->progSize; i+=2) { + + mVU->code = mVU->prog.prog[progIndex].data[i+1]; + mVUlog("[%04x] (%08x) ", i*4, mVU->code); + + bitX[0] = 0; + bitX[1] = 0; + bitX[2] = 0; + bitX[3] = 0; + bitX[4] = 0; + bitX[5] = 0; + bitX[6] = 0; + + if (mVU->code & _Ibit_) {bitX[0] = 1; bitX[5] = 1;} + if (mVU->code & _Ebit_) {bitX[1] = 1; bitX[5] = 1;} + if (mVU->code & _Mbit_) {bitX[2] = 1; bitX[5] = 1;} + if (mVU->code & _Dbit_) {bitX[3] = 1; bitX[5] = 1;} + if (mVU->code & _Tbit_) {bitX[4] = 1; bitX[5] = 1;} + + mVUopU(); + + if (bitX[5]) { + mVUlog(" ("); + if (bitX[0]) { mVUlog("I"); bitX[6] = 1; } + if (bitX[1]) { commaIf(); mVUlog("E"); bitX[6] = 1; } + if (bitX[2]) { commaIf(); mVUlog("M"); bitX[6] = 1; } + if (bitX[3]) { commaIf(); mVUlog("D"); bitX[6] = 1; } + if (bitX[4]) { commaIf(); mVUlog("T"); } + mVUlog(")"); + } + + mVU->code = mVU->prog.prog[progIndex].data[i]; + mVUlog("\n[%04x] (%08x) ", i*4, mVU->code); + mVUopL(); + mVUlog("\n\n"); + } +} + +#endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 8626ec175f..0ae4c5011b 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -45,8 +45,8 @@ microVUf(void) mVU_DIV() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_, 7); } - else { + pass1 { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_, 7); } + pass2 { u8 *ajmp, *bjmp, *cjmp, *djmp; getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFt, _Ft_, _Ftf_); @@ -76,12 +76,13 @@ microVUf(void) mVU_DIV() { mVUunpack_xyzw(xmmFs, xmmFs, 0); mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8); } + pass3 { mVUlog("DIV"); } } microVUf(void) mVU_SQRT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(0, 0, _Ft_, _Ftf_, 7); } - else { + pass1 { mVUanalyzeFDIV(0, 0, _Ft_, _Ftf_, 7); } + pass2 { u8 *ajmp; getReg5(xmmFt, _Ft_, _Ftf_); @@ -93,12 +94,13 @@ microVUf(void) mVU_SQRT() { mVUunpack_xyzw(xmmFt, xmmFt, 0); mVUmergeRegs(xmmPQ, xmmFt, writeQ ? 4 : 8); } + pass3 { mVUlog("SQRT"); } } microVUf(void) mVU_RSQRT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_, 13); } - else { + pass1 { mVUanalyzeFDIV(_Fs_, _Fsf_, _Ft_, _Ftf_, 13); } + pass2 { u8 *ajmp, *bjmp, *cjmp, *djmp; getReg5(xmmFs, _Fs_, _Fsf_); getReg5(xmmFt, _Ft_, _Ftf_); @@ -130,6 +132,7 @@ microVUf(void) mVU_RSQRT() { mVUunpack_xyzw(xmmFs, xmmFs, 0); mVUmergeRegs(xmmPQ, xmmFs, writeQ ? 4 : 8); } + pass3 { mVUlog("RSQRT"); } } //------------------------------------------------------------------ @@ -166,8 +169,8 @@ microVUt(void) mVU_EATAN_() { microVUf(void) mVU_EATAN() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 54); } - else { + pass1 { mVUanalyzeEFU1(_Fs_, _Fsf_, 54); } + pass2 { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -178,12 +181,13 @@ microVUf(void) mVU_EATAN() { mVU_EATAN_(); } + pass3 { mVUlog("EATAN"); } } microVUf(void) mVU_EATANxy() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU2(_Fs_, 54); } - else { + pass1 { mVUanalyzeEFU2(_Fs_, 54); } + pass2 { getReg6(xmmFt, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x01); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -195,12 +199,13 @@ microVUf(void) mVU_EATANxy() { mVU_EATAN_(); } + pass3 { mVUlog("EATANxy"); } } microVUf(void) mVU_EATANxz() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU2(_Fs_, 54); } - else { + pass1 { mVUanalyzeEFU2(_Fs_, 54); } + pass2 { getReg6(xmmFt, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmFs, xmmFt, 0x02); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance @@ -212,6 +217,7 @@ microVUf(void) mVU_EATANxz() { mVU_EATAN_(); } + pass3 { mVUlog("EATANxz"); } } #define eexpHelper(addr) { \ @@ -223,8 +229,8 @@ microVUf(void) mVU_EATANxz() { microVUf(void) mVU_EEXP() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 44); } - else { + pass1 { mVUanalyzeEFU1(_Fs_, _Fsf_, 44); } + pass2 { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); @@ -251,6 +257,7 @@ microVUf(void) mVU_EEXP() { SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmT1); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("EEXP"); } } microVUt(void) mVU_sumXYZ() { @@ -271,20 +278,21 @@ microVUt(void) mVU_sumXYZ() { microVUf(void) mVU_ELENG() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU2(_Fs_, 18); } - else { + pass1 { mVUanalyzeEFU2(_Fs_, 18); } + pass2 { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(); SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmPQ); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ELENG"); } } microVUf(void) mVU_ERCPR() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 12); } - else { + pass1 { mVUanalyzeEFU1(_Fs_, _Fsf_, 12); } + pass2 { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); @@ -293,12 +301,13 @@ microVUf(void) mVU_ERCPR() { SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ERCPR"); } } microVUf(void) mVU_ERLENG() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU2(_Fs_, 24); } - else { + pass1 { mVUanalyzeEFU2(_Fs_, 24); } + pass2 { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(); @@ -308,12 +317,13 @@ microVUf(void) mVU_ERLENG() { SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ERLENG"); } } microVUf(void) mVU_ERSADD() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU2(_Fs_, 18); } - else { + pass1 { mVUanalyzeEFU2(_Fs_, 18); } + pass2 { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(); @@ -323,12 +333,13 @@ microVUf(void) mVU_ERSADD() { SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ERSADD"); } } microVUf(void) mVU_ERSQRT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 18); } - else { + pass1 { mVUanalyzeEFU1(_Fs_, _Fsf_, 18); } + pass2 { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs); @@ -337,17 +348,19 @@ microVUf(void) mVU_ERSQRT() { SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ERSQRT"); } } microVUf(void) mVU_ESADD() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU2(_Fs_, 11); } - else { + pass1 { mVUanalyzeEFU2(_Fs_, 11); } + pass2 { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance mVU_sumXYZ(); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ESADD"); } } #define esinHelper(addr) { \ @@ -359,8 +372,8 @@ microVUf(void) mVU_ESADD() { microVUf(void) mVU_ESIN() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU2(_Fs_, 29); } - else { + pass1 { mVUanalyzeEFU2(_Fs_, 29); } + pass2 { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); @@ -381,23 +394,25 @@ microVUf(void) mVU_ESIN() { SSE_ADDSS_XMM_to_XMM(xmmPQ, xmmT1); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ESIN"); } } microVUf(void) mVU_ESQRT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU1(_Fs_, _Fsf_, 12); } - else { + pass1 { mVUanalyzeEFU1(_Fs_, _Fsf_, 12); } + pass2 { getReg5(xmmFs, _Fs_, _Fsf_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE_SQRTSS_XMM_to_XMM(xmmPQ, xmmFs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ESQRT"); } } microVUf(void) mVU_ESUM() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeEFU2(_Fs_, 12); } - else { + pass1 { mVUanalyzeEFU2(_Fs_, 12); } + pass2 { getReg6(xmmFs, _Fs_); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance SSE2_PSHUFD_XMM_to_XMM(xmmFt, xmmFs, 0x1b); @@ -407,6 +422,7 @@ microVUf(void) mVU_ESUM() { SSE_MOVSS_XMM_to_XMM(xmmPQ, xmmFs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, writeP ? 0x27 : 0xC6); // Flip back } + pass3 { mVUlog("ESUM"); } } //------------------------------------------------------------------ @@ -415,57 +431,62 @@ microVUf(void) mVU_ESUM() { microVUf(void) mVU_FCAND() { microVU* mVU = mVUx; - if (!recPass) {mVUlog("clip broken");} - else { + pass1 { mVUprint("clip broken"); } + pass2 { mVUallocCFLAGa(gprT1, fvcInstance); AND32ItoR(gprT1, _Imm24_); ADD32ItoR(gprT1, 0xffffff); SHR32ItoR(gprT1, 24); mVUallocVIb(gprT1, 1); } + pass3 { mVUlog("FCAND"); } } microVUf(void) mVU_FCEQ() { microVU* mVU = mVUx; - if (!recPass) {mVUlog("clip broken");} - else { + pass1 { mVUprint("clip broken"); } + pass2 { mVUallocCFLAGa(gprT1, fvcInstance); XOR32ItoR(gprT1, _Imm24_); SUB32ItoR(gprT1, 1); SHR32ItoR(gprT1, 31); mVUallocVIb(gprT1, 1); } + pass3 { mVUlog("FCEQ"); } } microVUf(void) mVU_FCGET() { microVU* mVU = mVUx; - if (!recPass) {mVUlog("clip broken");} - else { + pass1 { mVUprint("clip broken"); } + pass2 { mVUallocCFLAGa(gprT1, fvcInstance); AND32ItoR(gprT1, 0xfff); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("FCGET"); } } microVUf(void) mVU_FCOR() { microVU* mVU = mVUx; - if (!recPass) {mVUlog("clip broken");} - else { + pass1 { mVUprint("clip broken"); } + pass2 { mVUallocCFLAGa(gprT1, fvcInstance); OR32ItoR(gprT1, _Imm24_); ADD32ItoR(gprT1, 1); // If 24 1's will make 25th bit 1, else 0 SHR32ItoR(gprT1, 24); // Get the 25th bit (also clears the rest of the garbage in the reg) mVUallocVIb(gprT1, 1); } + pass3 { mVUlog("FCOR"); } } microVUf(void) mVU_FCSET() { microVU* mVU = mVUx; - if (!recPass) {mVUlog("clip broken");} - else { + pass1 { mVUprint("clip broken"); } + pass2 { MOV32ItoR(gprT1, _Imm24_); mVUallocCFLAGb(gprT1, fcInstance); } + pass3 { mVUlog("FCSET"); } } //------------------------------------------------------------------ @@ -474,19 +495,20 @@ microVUf(void) mVU_FCSET() { microVUf(void) mVU_FMAND() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeMflag(_Fs_, _Ft_); } - else { + pass1 { mVUanalyzeMflag(_Fs_, _Ft_); } + pass2 { mVUallocMFLAGa(gprT1, fvmInstance); mVUallocVIa(gprT2, _Fs_); AND16RtoR(gprT1, gprT2); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("FMAND"); } } microVUf(void) mVU_FMEQ() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeMflag(_Fs_, _Ft_); } - else { + pass1 { mVUanalyzeMflag(_Fs_, _Ft_); } + pass2 { mVUallocMFLAGa(gprT1, fvmInstance); mVUallocVIa(gprT2, _Fs_); XOR32RtoR(gprT1, gprT2); @@ -494,17 +516,19 @@ microVUf(void) mVU_FMEQ() { SHR32ItoR(gprT1, 31); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("FMEQ"); } } microVUf(void) mVU_FMOR() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeMflag(_Fs_, _Ft_); } - else { + pass1 { mVUanalyzeMflag(_Fs_, _Ft_); } + pass2 { mVUallocMFLAGa(gprT1, fvmInstance); mVUallocVIa(gprT2, _Fs_); OR16RtoR(gprT1, gprT2); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("FMOR"); } } //------------------------------------------------------------------ @@ -513,46 +537,50 @@ microVUf(void) mVU_FMOR() { microVUf(void) mVU_FSAND() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeSflag(_Ft_); } - else { + pass1 { mVUanalyzeSflag(_Ft_); } + pass2 { mVUallocSFLAGa(gprT1, fvsInstance); AND16ItoR(gprT1, _Imm12_); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("FSAND"); } } microVUf(void) mVU_FSEQ() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeSflag(_Ft_); } - else { + pass1 { mVUanalyzeSflag(_Ft_); } + pass2 { mVUallocSFLAGa(gprT1, fvsInstance); XOR16ItoR(gprT1, _Imm12_); SUB16ItoR(gprT1, 1); SHR16ItoR(gprT1, 15); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("FSEQ"); } } microVUf(void) mVU_FSOR() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeSflag(_Ft_); } - else { + pass1 { mVUanalyzeSflag(_Ft_); } + pass2 { mVUallocSFLAGa(gprT1, fvsInstance); OR16ItoR(gprT1, _Imm12_); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("FSOR"); } } microVUf(void) mVU_FSSET() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFSSET(); } - else { + pass1 { mVUanalyzeFSSET(); } + pass2 { int flagReg1, flagReg2; getFlagReg(flagReg1, fsInstance); if (!(doStatus||doDivFlag)) { getFlagReg(flagReg2, fpsInstance); MOV16RtoR(flagReg1, flagReg2); } // Get status result from last status setting instruction AND16ItoR(flagReg1, 0x03f); // Remember not to modify upper 16 bits because of mac flag OR16ItoR (flagReg1, (_Imm12_ & 0xfc0)); } + pass3 { mVUlog("FSSET"); } } //------------------------------------------------------------------ @@ -561,9 +589,8 @@ microVUf(void) mVU_FSSET() { microVUf(void) mVU_IADD() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } - else { - mVUlog("IADD"); + pass1 { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } + pass2 { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { mVUallocVIa(gprT2, _Ft_); @@ -572,35 +599,35 @@ microVUf(void) mVU_IADD() { else ADD16RtoR(gprT1, gprT1); mVUallocVIb(gprT1, _Fd_); } + pass3 { mVUlog("IADD"); } } microVUf(void) mVU_IADDI() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } - else { - mVUlog("IADDI"); + pass1 { mVUanalyzeIALU2(_Fs_, _Ft_); } + pass2 { mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm5_); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("IADDI"); } } microVUf(void) mVU_IADDIU() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } - else { - mVUlog("IADDIU"); + pass1 { mVUanalyzeIALU2(_Fs_, _Ft_); } + pass2 { mVUallocVIa(gprT1, _Fs_); ADD16ItoR(gprT1, _Imm15_); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("IADDIU"); } } microVUf(void) mVU_IAND() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } - else { - mVUlog("IAND"); + pass1 { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } + pass2 { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { mVUallocVIa(gprT2, _Ft_); @@ -608,13 +635,13 @@ microVUf(void) mVU_IAND() { } mVUallocVIb(gprT1, _Fd_); } + pass3 { mVUlog("IAND"); } } microVUf(void) mVU_IOR() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } - else { - mVUlog("IOR"); + pass1 { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } + pass2 { mVUallocVIa(gprT1, _Fs_); if (_Ft_ != _Fs_) { mVUallocVIa(gprT2, _Ft_); @@ -622,13 +649,13 @@ microVUf(void) mVU_IOR() { } mVUallocVIb(gprT1, _Fd_); } + pass3 { mVUlog("IOR"); } } microVUf(void) mVU_ISUB() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } - else { - mVUlog("ISUB"); + pass1 { mVUanalyzeIALU1(_Fd_, _Fs_, _Ft_); } + pass2 { if (_Ft_ != _Fs_) { mVUallocVIa(gprT1, _Fs_); mVUallocVIa(gprT2, _Ft_); @@ -640,17 +667,18 @@ microVUf(void) mVU_ISUB() { } else { PXORRtoR(mmVI(_Fd_), mmVI(_Fd_)); } } + pass3 { mVUlog("ISUB"); } } microVUf(void) mVU_ISUBIU() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeIALU2(_Fs_, _Ft_); } - else { - mVUlog("ISUBIU"); + pass1 { mVUanalyzeIALU2(_Fs_, _Ft_); } + pass2 { mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, _Imm15_); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("ISUBIU"); } } //------------------------------------------------------------------ @@ -659,56 +687,56 @@ microVUf(void) mVU_ISUBIU() { microVUf(void) mVU_MFIR() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeReg2(_Ft_); } - else { - mVUlog("MFIR"); + pass1 { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeReg2(_Ft_); } + pass2 { mVUallocVIa(gprT1, _Fs_); MOVSX32R16toR(gprT1, gprT1); SSE2_MOVD_R_to_XMM(xmmT1, gprT1); if (!_XYZW_SS) { mVUunpack_xyzw(xmmT1, xmmT1, 0); } mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } + pass3 { mVUlog("MFIR"); } } microVUf(void) mVU_MFP() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeMFP(_Ft_); } - else { - mVUlog("MFP"); + pass1 { mVUanalyzeMFP(_Ft_); } + pass2 { getPreg(xmmFt); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } + pass3 { mVUlog("MFP"); } } microVUf(void) mVU_MOVE() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_ || (_Ft_ == _Fs_)) { mVUinfo |= _isNOP; } analyzeReg1(_Fs_); analyzeReg2(_Ft_); } - else { - mVUlog("MOVE"); + pass1 { if (!_Ft_ || (_Ft_ == _Fs_)) { mVUinfo |= _isNOP; } analyzeReg1(_Fs_); analyzeReg2(_Ft_); } + pass2 { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], _X_Y_Z_W); mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } + pass3 { mVUlog("MOVE"); } } microVUf(void) mVU_MR32() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeMR32(_Fs_, _Ft_); } - else { - mVUlog("MR32"); + pass1 { mVUanalyzeMR32(_Fs_, _Ft_); } + pass2 { mVUloadReg(xmmT1, (uptr)&mVU->regs->VF[_Fs_].UL[0], (_X_Y_Z_W == 8) ? 4 : 15); if (_X_Y_Z_W != 8) { SSE2_PSHUFD_XMM_to_XMM(xmmT1, xmmT1, 0x39); } mVUsaveReg(xmmT1, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 0); } + pass3 { mVUlog("MR32"); } } microVUf(void) mVU_MTIR() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_Ft_, 1); } - else { - mVUlog("MTIR"); + pass1 { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeReg5(_Fs_, _Fsf_); analyzeVIreg2(_Ft_, 1); } + pass2 { MOVZX32M16toR(gprT1, (uptr)&mVU->regs->VF[_Fs_].UL[_Fsf_]); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("MTIR"); } } //------------------------------------------------------------------ @@ -717,9 +745,8 @@ microVUf(void) mVU_MTIR() { microVUf(void) mVU_ILW() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } - else { - mVUlog("ILW"); + pass1 { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } + pass2 { if (!_Fs_) { MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + getVUmem(_Imm11_) + offsetSS); mVUallocVIb(gprT1, _Ft_); @@ -733,13 +760,13 @@ microVUf(void) mVU_ILW() { mVUallocVIb(gprT1, _Ft_); } } + pass3 { mVUlog("ILW"); } } microVUf(void) mVU_ILWR() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } - else { - mVUlog("ILWR"); + pass1 { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg1(_Fs_); analyzeVIreg2(_Ft_, 4); } + pass2 { if (!_Fs_) { MOVZX32M16toR(gprT1, (uptr)mVU->regs->Mem + offsetSS); mVUallocVIb(gprT1, _Ft_); @@ -752,6 +779,7 @@ microVUf(void) mVU_ILWR() { mVUallocVIb(gprT1, _Ft_); } } + pass3 { mVUlog("ILWR"); } } //------------------------------------------------------------------ @@ -760,9 +788,8 @@ microVUf(void) mVU_ILWR() { microVUf(void) mVU_ISW() { microVU* mVU = mVUx; - if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } - else { - mVUlog("ISW"); + pass1 { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } + pass2 { if (!_Fs_) { int imm = getVUmem(_Imm11_); mVUallocVIa(gprT1, _Ft_); @@ -782,13 +809,13 @@ microVUf(void) mVU_ISW() { if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12); } } + pass3 { mVUlog("ISW"); } } microVUf(void) mVU_ISWR() { microVU* mVU = mVUx; - if (!recPass) { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } - else { - mVUlog("ISWR"); + pass1 { analyzeVIreg1(_Fs_); analyzeVIreg1(_Ft_); } + pass2 { if (!_Fs_) { mVUallocVIa(gprT1, _Ft_); if (_X) MOV32RtoM((uptr)mVU->regs->Mem, gprT1); @@ -806,6 +833,7 @@ microVUf(void) mVU_ISWR() { if (_W) MOV32RtoRm(gprT1, gprT2, (uptr)mVU->regs->Mem+12); } } + pass3 { mVUlog("ISWR"); } } //------------------------------------------------------------------ @@ -814,15 +842,13 @@ microVUf(void) mVU_ISWR() { microVUf(void) mVU_LQ() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 0); } - else { + pass1 { mVUanalyzeLQ(_Ft_, _Fs_, 0); } + pass2 { if (!_Fs_) { - mVUlog("LQ1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } else { - mVUlog("LQ2"); mVUallocVIa(gprT1, _Fs_); ADD32ItoR(gprT1, _Imm11_); mVUaddrFix(gprT1); @@ -830,19 +856,18 @@ microVUf(void) mVU_LQ() { mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } } + pass3 { mVUlog("LQ"); } } microVUf(void) mVU_LQD() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } - else { + pass1 { mVUanalyzeLQ(_Ft_, _Fs_, 1); } + pass2 { if (!_Fs_ && !noWriteVF) { - mVUlog("LQD1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } else { - mVUlog("LQD2"); mVUallocVIa(gprT1, _Fs_); SUB16ItoR(gprT1, 1); mVUallocVIb(gprT1, _Fs_); @@ -853,19 +878,18 @@ microVUf(void) mVU_LQD() { } } } + pass3 { mVUlog("LQD"); } } microVUf(void) mVU_LQI() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeLQ(_Ft_, _Fs_, 1); } - else { + pass1 { mVUanalyzeLQ(_Ft_, _Fs_, 1); } + pass2 { if (!_Fs_ && !noWriteVF) { - mVUlog("LQI1"); mVUloadReg(xmmFt, (uptr)mVU->regs->Mem, _X_Y_Z_W); mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } else { - mVUlog("LQI2"); mVUallocVIa((!noWriteVF) ? gprT1 : gprT2, _Fs_); if (!noWriteVF) { MOV32RtoR(gprT2, gprT1); @@ -877,6 +901,7 @@ microVUf(void) mVU_LQI() { mVUallocVIb(gprT2, _Fs_); } } + pass3 { mVUlog("LQI"); } } //------------------------------------------------------------------ @@ -885,9 +910,8 @@ microVUf(void) mVU_LQI() { microVUf(void) mVU_SQ() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 0); } - else { - mVUlog("SQ"); + pass1 { mVUanalyzeSQ(_Fs_, _Ft_, 0); } + pass2 { if (!_Ft_) { getReg7(xmmFs, _Fs_); mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem + getVUmem(_Imm11_), _X_Y_Z_W, 1); @@ -900,13 +924,13 @@ microVUf(void) mVU_SQ() { mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); } } + pass3 { mVUlog("SQ"); } } microVUf(void) mVU_SQD() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } - else { - mVUlog("SQD"); + pass1 { mVUanalyzeSQ(_Fs_, _Ft_, 1); } + pass2 { if (!_Ft_) { getReg7(xmmFs, _Fs_); mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1); @@ -920,13 +944,13 @@ microVUf(void) mVU_SQD() { mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); } } + pass3 { mVUlog("SQD"); } } microVUf(void) mVU_SQI() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeSQ(_Fs_, _Ft_, 1); } - else { - mVUlog("SQI"); + pass1 { mVUanalyzeSQ(_Fs_, _Ft_, 1); } + pass2 { if (!_Ft_) { getReg7(xmmFs, _Fs_); mVUsaveReg(xmmFs, (uptr)mVU->regs->Mem, _X_Y_Z_W, 1); @@ -941,6 +965,7 @@ microVUf(void) mVU_SQI() { mVUallocVIb(gprT2, _Ft_); // ToDo: Backup to memory check. } } + pass3 { mVUlog("SQI"); } } //------------------------------------------------------------------ @@ -949,9 +974,8 @@ microVUf(void) mVU_SQI() { microVUf(void) mVU_RINIT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } - else { - mVUlog("RINIT"); + pass1 { mVUanalyzeR1(_Fs_, _Fsf_); } + pass2 { if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprR, _Fs_, _Fsf_); AND32ItoR(gprR, 0x007fffff); @@ -959,6 +983,7 @@ microVUf(void) mVU_RINIT() { } else MOV32ItoR(gprR, 0x3f800000); } + pass3 { mVUlog("RINIT"); } } microVUt(void) mVU_RGET_() { @@ -973,15 +998,15 @@ microVUt(void) mVU_RGET_() { microVUf(void) mVU_RGET() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeR2(_Ft_, 1); } - else { mVUlog("RGET"); mVU_RGET_(); } + pass1 { mVUanalyzeR2(_Ft_, 1); } + pass2 { mVU_RGET_(); } + pass3 { mVUlog("RGET"); } } microVUf(void) mVU_RNEXT() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeR2(_Ft_, 0); } - else { - mVUlog("RNEXT"); + pass1 { mVUanalyzeR2(_Ft_, 0); } + pass2 { // algorithm from www.project-fao.org MOV32RtoR(gprT1, gprR); SHR32ItoR(gprT1, 4); @@ -998,19 +1023,20 @@ microVUf(void) mVU_RNEXT() { OR32ItoR (gprR, 0x3f800000); mVU_RGET_(); } + pass3 { mVUlog("RNEXT"); } } microVUf(void) mVU_RXOR() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeR1(_Fs_, _Fsf_); } - else { - mVUlog("RXOR"); + pass1 { mVUanalyzeR1(_Fs_, _Fsf_); } + pass2 { if (_Fs_ || (_Fsf_ == 3)) { getReg8(gprT1, _Fs_, _Fsf_); AND32ItoR(gprT1, 0x7fffff); XOR32RtoR(gprR, gprT1); } } + pass3 { mVUlog("RXOR"); } } //------------------------------------------------------------------ @@ -1019,12 +1045,14 @@ microVUf(void) mVU_RXOR() { microVUf(void) mVU_WAITP() { microVU* mVU = mVUx; - if (!recPass) { mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); } + pass1 { mVUstall = aMax(mVUstall, ((mVUregs.p) ? (mVUregs.p - 1) : 0)); } + pass3 { mVUlog("WAITP"); } } microVUf(void) mVU_WAITQ() { microVU* mVU = mVUx; - if (!recPass) { mVUstall = aMax(mVUstall, mVUregs.q); } + pass1 { mVUstall = aMax(mVUstall, mVUregs.q); } + pass3 { mVUlog("WAITQ"); } } //------------------------------------------------------------------ @@ -1033,20 +1061,22 @@ microVUf(void) mVU_WAITQ() { microVUf(void) mVU_XTOP() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } - else { + pass1 { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } + pass2 { MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->top); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("XTOP"); } } microVUf(void) mVU_XITOP() { microVU* mVU = mVUx; - if (!recPass) { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } - else { + pass1 { if (!_Ft_) { mVUinfo |= _isNOP; } analyzeVIreg2(_Ft_, 1); } + pass2 { MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->itop); mVUallocVIb(gprT1, _Ft_); } + pass3 { mVUlog("XITOP"); } } //------------------------------------------------------------------ @@ -1063,14 +1093,15 @@ void __fastcall mVU_XGKICK_(u32 addr) { microVUf(void) mVU_XGKICK() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeXGkick(_Fs_, 4); } - else { - mVUlog("XGkick"); + pass1 { mVUanalyzeXGkick(_Fs_, 4); } + pass2 { + mVUprint("XGkick"); mVUallocVIa(gprT2, _Fs_); // gprT2 = ECX for __fastcall PUSH32R(gprR); // gprR = EDX is volatile so backup CALLFunc((uptr)mVU_XGKICK_); POP32R(gprR); // Restore } + pass3 { mVUlog("XGKICK"); } } //------------------------------------------------------------------ @@ -1080,98 +1111,107 @@ microVUf(void) mVU_XGKICK() { microVUf(void) mVU_B() { microVU* mVU = mVUx; mVUbranch = 1; + pass3 { mVUlog("B"); } } microVUf(void) mVU_BAL() { microVU* mVU = mVUx; mVUbranch = 2; - if (!recPass) { analyzeVIreg2(_Ft_, 1); } - else { + pass1 { analyzeVIreg2(_Ft_, 1); } + pass2 { MOV32ItoR(gprT1, bSaveAddr); mVUallocVIb(gprT1, _Ft_); // Note: Not sure if the lower instruction in the branch-delay slot // should read the previous VI-value or the VI-value resulting from this branch. // This code does the latter... } + pass3 { mVUlog("BAL"); } } microVUf(void) mVU_IBEQ() { microVU* mVU = mVUx; mVUbranch = 3; - if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } - else { + pass1 { mVUanalyzeBranch2(_Fs_, _Ft_); } + pass2 { if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); if (memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } MOV32RtoM((uptr)&mVU->branch, gprT1); } + pass3 { mVUlog("IBEQ"); } } microVUf(void) mVU_IBGEZ() { microVU* mVU = mVUx; mVUbranch = 4; - if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else { + pass1 { mVUanalyzeBranch1(_Fs_); } + pass2 { if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } + pass3 { mVUlog("IBGEZ"); } } microVUf(void) mVU_IBGTZ() { microVU* mVU = mVUx; mVUbranch = 5; - if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else { + pass1 { mVUanalyzeBranch1(_Fs_); } + pass2 { if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } + pass3 { mVUlog("IBGTZ"); } } microVUf(void) mVU_IBLEZ() { microVU* mVU = mVUx; mVUbranch = 6; - if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else { + pass1 { mVUanalyzeBranch1(_Fs_); } + pass2 { if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } + pass3 { mVUlog("IBLEZ"); } } microVUf(void) mVU_IBLTZ() { microVU* mVU = mVUx; mVUbranch = 7; - if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else { + pass1 { mVUanalyzeBranch1(_Fs_); } + pass2 { if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } + pass3 { mVUlog("IBLTZ"); } } microVUf(void) mVU_IBNE() { microVU* mVU = mVUx; mVUbranch = 8; - if (!recPass) { mVUanalyzeBranch2(_Fs_, _Ft_); } - else { + pass1 { mVUanalyzeBranch2(_Fs_, _Ft_); } + pass2 { if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); if (memReadIt) XOR32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } MOV32RtoM((uptr)&mVU->branch, gprT1); } + pass3 { mVUlog("IBNE"); } } microVUf(void) mVU_JR() { microVU* mVU = mVUx; mVUbranch = 9; - if (!recPass) { mVUanalyzeBranch1(_Fs_); } - else { + pass1 { mVUanalyzeBranch1(_Fs_); } + pass2 { if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } + pass3 { mVUlog("JR"); } } microVUf(void) mVU_JALR() { microVU* mVU = mVUx; mVUbranch = 10; - if (!recPass) { mVUanalyzeBranch1(_Fs_); analyzeVIreg2(_Ft_, 1); } - else { + pass1 { mVUanalyzeBranch1(_Fs_); analyzeVIreg2(_Ft_, 1); } + pass2 { if (memReadIs) MOV32MtoR(gprT1, (uptr)&mVU->VIbackup[0]); else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); @@ -1181,6 +1221,7 @@ microVUf(void) mVU_JALR() { // should read the previous VI-value or the VI-value resulting from this branch. // This code does the latter... } + pass3 { mVUlog("JALR"); } } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 2f0ea1c8ad..5046494b53 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -138,6 +138,10 @@ declareAllVariables #define microVUf(aType) template aType #define microVUq(aType) template __forceinline aType +#define pass1 if (recPass == 0) +#define pass2 if (recPass == 1) +#define pass3 if (recPass == 2) + #define mVUcurProg mVU->prog.prog[mVU->prog.cur] #define mVUblocks mVU->prog.prog[mVU->prog.cur].block #define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo @@ -226,7 +230,7 @@ declareAllVariables #define mmVI(_VIreg_) (_VIreg_ - 1) #ifdef mVUdebug -#define mVUlog Console::Status +#define mVUprint Console::Status #define mVUdebug1() { \ if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ @@ -235,10 +239,20 @@ declareAllVariables if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ } #else -#define mVUlog 0&& +#define mVUprint 0&& #define mVUdebug1() {} #endif +#ifdef mVUlogProg +#define mVUlog __mVULog +#define mVUsetupLog __mVUsetupLog +#define mVUdumpProg __mVUdumpProgram +#else +#define mVUlog 0&& +#define mVUsetupLog() +#define mVUdumpProg 0&& +#endif + #define mVUcacheCheck(ptr, start, limit) { \ uptr diff = ptr - start; \ if (diff >= limit) { Console::Error("microVU Error: Program went over it's cache limit. Size = %x", params diff); } \ diff --git a/pcsx2/x86/microVU_Tables.inl b/pcsx2/x86/microVU_Tables.inl index c6e76ab38e..41b8b9a1c7 100644 --- a/pcsx2/x86/microVU_Tables.inl +++ b/pcsx2/x86/microVU_Tables.inl @@ -178,6 +178,76 @@ void (* mVULOWER_OPCODE11 [128])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, }; + +void (* mVULOWER_OPCODE02 [128])() = { + mVU_LQ<0,2> , mVU_SQ<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_ILW<0,2> , mVU_ISW<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_IADDIU<0,2> , mVU_ISUBIU<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_FCEQ<0,2> , mVU_FCSET<0,2> , mVU_FCAND<0,2> , mVU_FCOR<0,2>, /* 0x10 */ + mVU_FSEQ<0,2> , mVU_FSSET<0,2> , mVU_FSAND<0,2> , mVU_FSOR<0,2>, + mVU_FMEQ<0,2> , mVUunknown<0,2> , mVU_FMAND<0,2> , mVU_FMOR<0,2>, + mVU_FCGET<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_B<0,2> , mVU_BAL<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x20 */ + mVU_JR<0,2> , mVU_JALR<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_IBEQ<0,2> , mVU_IBNE<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_IBLTZ<0,2> , mVU_IBGTZ<0,2> , mVU_IBLEZ<0,2> , mVU_IBGEZ<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x30 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVULowerOP<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x40*/ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x50 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x60 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x70 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, +}; + +void (* mVULOWER_OPCODE12 [128])() = { + mVU_LQ<1,2> , mVU_SQ<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_ILW<1,2> , mVU_ISW<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_IADDIU<1,2> , mVU_ISUBIU<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_FCEQ<1,2> , mVU_FCSET<1,2> , mVU_FCAND<1,2> , mVU_FCOR<1,2>, /* 0x10 */ + mVU_FSEQ<1,2> , mVU_FSSET<1,2> , mVU_FSAND<1,2> , mVU_FSOR<1,2>, + mVU_FMEQ<1,2> , mVUunknown<1,2> , mVU_FMAND<1,2> , mVU_FMOR<1,2>, + mVU_FCGET<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_B<1,2> , mVU_BAL<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x20 */ + mVU_JR<1,2> , mVU_JALR<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_IBEQ<1,2> , mVU_IBNE<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_IBLTZ<1,2> , mVU_IBGTZ<1,2> , mVU_IBLEZ<1,2> , mVU_IBGEZ<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x30 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVULowerOP<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x40*/ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x50 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x60 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x70 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -226,6 +296,28 @@ void (* mVULowerOP_T3_00_OPCODE11 [32])() = { mVUunknown<1,1> , mVU_MFP<1,1> , mVU_XTOP<1,1> , mVU_XGKICK<1,1>, mVU_ESADD<1,1> , mVU_EATANxy<1,1> , mVU_ESQRT<1,1> , mVU_ESIN<1,1>, }; + +void (* mVULowerOP_T3_00_OPCODE02 [32])() = { + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_MOVE<0,2> , mVU_LQI<0,2> , mVU_DIV<0,2> , mVU_MTIR<0,2>, + mVU_RNEXT<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x10 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVU_MFP<0,2> , mVU_XTOP<0,2> , mVU_XGKICK<0,2>, + mVU_ESADD<0,2> , mVU_EATANxy<0,2> , mVU_ESQRT<0,2> , mVU_ESIN<0,2>, +}; + +void (* mVULowerOP_T3_00_OPCODE12 [32])() = { + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_MOVE<1,2> , mVU_LQI<1,2> , mVU_DIV<1,2> , mVU_MTIR<1,2>, + mVU_RNEXT<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x10 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVU_MFP<1,2> , mVU_XTOP<1,2> , mVU_XGKICK<1,2>, + mVU_ESADD<1,2> , mVU_EATANxy<1,2> , mVU_ESQRT<1,2> , mVU_ESIN<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -274,6 +366,29 @@ void (* mVULowerOP_T3_01_OPCODE11 [32])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVU_XITOP<1,1> , mVUunknown<1,1>, mVU_ERSADD<1,1> , mVU_EATANxz<1,1> , mVU_ERSQRT<1,1> , mVU_EATAN<1,1>, }; + +void (* mVULowerOP_T3_01_OPCODE02 [32])() = { + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_MR32<0,2> , mVU_SQI<0,2> , mVU_SQRT<0,2> , mVU_MFIR<0,2>, + mVU_RGET<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x10 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVU_XITOP<0,2> , mVUunknown<0,2>, + mVU_ERSADD<0,2> , mVU_EATANxz<0,2> , mVU_ERSQRT<0,2> , mVU_EATAN<0,2>, +}; + +void (* mVULowerOP_T3_01_OPCODE12 [32])() = { + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_MR32<1,2> , mVU_SQI<1,2> , mVU_SQRT<1,2> , mVU_MFIR<1,2>, + mVU_RGET<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x10 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVU_XITOP<1,2> , mVUunknown<1,2>, + mVU_ERSADD<1,2> , mVU_EATANxz<1,2> , mVU_ERSQRT<1,2> , mVU_EATAN<1,2>, +}; + //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -322,6 +437,28 @@ void (* mVULowerOP_T3_10_OPCODE11 [32])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVU_ELENG<1,1> , mVU_ESUM<1,1> , mVU_ERCPR<1,1> , mVU_EEXP<1,1>, }; + +void (* mVULowerOP_T3_10_OPCODE02 [32])() = { + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVU_LQD<0,2> , mVU_RSQRT<0,2> , mVU_ILWR<0,2>, + mVU_RINIT<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x10 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_ELENG<0,2> , mVU_ESUM<0,2> , mVU_ERCPR<0,2> , mVU_EEXP<0,2>, +}; + +void (* mVULowerOP_T3_10_OPCODE12 [32])() = { + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVU_LQD<1,2> , mVU_RSQRT<1,2> , mVU_ILWR<1,2>, + mVU_RINIT<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x10 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_ELENG<1,2> , mVU_ESUM<1,2> , mVU_ERCPR<1,2> , mVU_EEXP<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -370,6 +507,28 @@ void (* mVULowerOP_T3_11_OPCODE11 [32])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVU_ERLENG<1,1> , mVUunknown<1,1> , mVU_WAITP<1,1> , mVUunknown<1,1>, }; + +void (* mVULowerOP_T3_11_OPCODE02 [32])() = { + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVU_SQD<0,2> , mVU_WAITQ<0,2> , mVU_ISWR<0,2>, + mVU_RXOR<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x10 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_ERLENG<0,2> , mVUunknown<0,2> , mVU_WAITP<0,2> , mVUunknown<0,2>, +}; + +void (* mVULowerOP_T3_11_OPCODE12 [32])() = { + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVU_SQD<1,2> , mVU_WAITQ<1,2> , mVU_ISWR<1,2>, + mVU_RXOR<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x10 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_ERLENG<1,2> , mVUunknown<1,2> , mVU_WAITP<1,2> , mVUunknown<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -450,6 +609,44 @@ void (* mVULowerOP_OPCODE11 [64])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVULowerOP_T3_00<1,1>, mVULowerOP_T3_01<1,1>, mVULowerOP_T3_10<1,1>, mVULowerOP_T3_11<1,1>, }; + +void (* mVULowerOP_OPCODE02 [64])() = { + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x10 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x20 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_IADD<0,2> , mVU_ISUB<0,2> , mVU_IADDI<0,2> , mVUunknown<0,2>, /* 0x30 */ + mVU_IAND<0,2> , mVU_IOR<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVULowerOP_T3_00<0,2>, mVULowerOP_T3_01<0,2>, mVULowerOP_T3_10<0,2>, mVULowerOP_T3_11<0,2>, +}; + +void (* mVULowerOP_OPCODE12 [64])() = { + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x10 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x20 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_IADD<1,2> , mVU_ISUB<1,2> , mVU_IADDI<1,2> , mVUunknown<1,2>, /* 0x30 */ + mVU_IAND<1,2> , mVU_IOR<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVULowerOP_T3_00<1,2>, mVULowerOP_T3_01<1,2>, mVULowerOP_T3_10<1,2>, mVULowerOP_T3_11<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -530,6 +727,44 @@ void (* mVU_UPPER_OPCODE11 [64])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVU_UPPER_FD_00<1,1>, mVU_UPPER_FD_01<1,1>, mVU_UPPER_FD_10<1,1>, mVU_UPPER_FD_11<1,1>, }; + +void (* mVU_UPPER_OPCODE02 [64])() = { + mVU_ADDx<0,2> , mVU_ADDy<0,2> , mVU_ADDz<0,2> , mVU_ADDw<0,2>, + mVU_SUBx<0,2> , mVU_SUBy<0,2> , mVU_SUBz<0,2> , mVU_SUBw<0,2>, + mVU_MADDx<0,2> , mVU_MADDy<0,2> , mVU_MADDz<0,2> , mVU_MADDw<0,2>, + mVU_MSUBx<0,2> , mVU_MSUBy<0,2> , mVU_MSUBz<0,2> , mVU_MSUBw<0,2>, + mVU_MAXx<0,2> , mVU_MAXy<0,2> , mVU_MAXz<0,2> , mVU_MAXw<0,2>, /* 0x10 */ + mVU_MINIx<0,2> , mVU_MINIy<0,2> , mVU_MINIz<0,2> , mVU_MINIw<0,2>, + mVU_MULx<0,2> , mVU_MULy<0,2> , mVU_MULz<0,2> , mVU_MULw<0,2>, + mVU_MULq<0,2> , mVU_MAXi<0,2> , mVU_MULi<0,2> , mVU_MINIi<0,2>, + mVU_ADDq<0,2> , mVU_MADDq<0,2> , mVU_ADDi<0,2> , mVU_MADDi<0,2>, /* 0x20 */ + mVU_SUBq<0,2> , mVU_MSUBq<0,2> , mVU_SUBi<0,2> , mVU_MSUBi<0,2>, + mVU_ADD<0,2> , mVU_MADD<0,2> , mVU_MUL<0,2> , mVU_MAX<0,2>, + mVU_SUB<0,2> , mVU_MSUB<0,2> , mVU_OPMSUB<0,2> , mVU_MINI<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, /* 0x30 */ + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVU_UPPER_FD_00<0,2>, mVU_UPPER_FD_01<0,2>, mVU_UPPER_FD_10<0,2>, mVU_UPPER_FD_11<0,2>, +}; + +void (* mVU_UPPER_OPCODE12 [64])() = { + mVU_ADDx<1,2> , mVU_ADDy<1,2> , mVU_ADDz<1,2> , mVU_ADDw<1,2>, + mVU_SUBx<1,2> , mVU_SUBy<1,2> , mVU_SUBz<1,2> , mVU_SUBw<1,2>, + mVU_MADDx<1,2> , mVU_MADDy<1,2> , mVU_MADDz<1,2> , mVU_MADDw<1,2>, + mVU_MSUBx<1,2> , mVU_MSUBy<1,2> , mVU_MSUBz<1,2> , mVU_MSUBw<1,2>, + mVU_MAXx<1,2> , mVU_MAXy<1,2> , mVU_MAXz<1,2> , mVU_MAXw<1,2>, /* 0x10 */ + mVU_MINIx<1,2> , mVU_MINIy<1,2> , mVU_MINIz<1,2> , mVU_MINIw<1,2>, + mVU_MULx<1,2> , mVU_MULy<1,2> , mVU_MULz<1,2> , mVU_MULw<1,2>, + mVU_MULq<1,2> , mVU_MAXi<1,2> , mVU_MULi<1,2> , mVU_MINIi<1,2>, + mVU_ADDq<1,2> , mVU_MADDq<1,2> , mVU_ADDi<1,2> , mVU_MADDi<1,2>, /* 0x20 */ + mVU_SUBq<1,2> , mVU_MSUBq<1,2> , mVU_SUBi<1,2> , mVU_MSUBi<1,2>, + mVU_ADD<1,2> , mVU_MADD<1,2> , mVU_MUL<1,2> , mVU_MAX<1,2>, + mVU_SUB<1,2> , mVU_MSUB<1,2> , mVU_OPMSUB<1,2> , mVU_MINI<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, /* 0x30 */ + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVU_UPPER_FD_00<1,2>, mVU_UPPER_FD_01<1,2>, mVU_UPPER_FD_10<1,2>, mVU_UPPER_FD_11<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -578,6 +813,28 @@ void (* mVU_UPPER_FD_00_TABLE11 [32])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, }; + +void (* mVU_UPPER_FD_00_TABLE02 [32])() = { + mVU_ADDAx<0,2> , mVU_SUBAx<0,2> , mVU_MADDAx<0,2> , mVU_MSUBAx<0,2>, + mVU_ITOF0<0,2> , mVU_FTOI0<0,2> , mVU_MULAx<0,2> , mVU_MULAq<0,2>, + mVU_ADDAq<0,2> , mVU_SUBAq<0,2> , mVU_ADDA<0,2> , mVU_SUBA<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, +}; + +void (* mVU_UPPER_FD_00_TABLE12 [32])() = { + mVU_ADDAx<1,2> , mVU_SUBAx<1,2> , mVU_MADDAx<1,2> , mVU_MSUBAx<1,2>, + mVU_ITOF0<1,2> , mVU_FTOI0<1,2> , mVU_MULAx<1,2> , mVU_MULAq<1,2>, + mVU_ADDAq<1,2> , mVU_SUBAq<1,2> , mVU_ADDA<1,2> , mVU_SUBA<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -626,6 +883,28 @@ void (* mVU_UPPER_FD_01_TABLE11 [32])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, }; + +void (* mVU_UPPER_FD_01_TABLE02 [32])() = { + mVU_ADDAy<0,2> , mVU_SUBAy<0,2> , mVU_MADDAy<0,2> , mVU_MSUBAy<0,2>, + mVU_ITOF4<0,2> , mVU_FTOI4<0,2> , mVU_MULAy<0,2> , mVU_ABS<0,2>, + mVU_MADDAq<0,2> , mVU_MSUBAq<0,2> , mVU_MADDA<0,2> , mVU_MSUBA<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, +}; + +void (* mVU_UPPER_FD_01_TABLE12 [32])() = { + mVU_ADDAy<1,2> , mVU_SUBAy<1,2> , mVU_MADDAy<1,2> , mVU_MSUBAy<1,2>, + mVU_ITOF4<1,2> , mVU_FTOI4<1,2> , mVU_MULAy<1,2> , mVU_ABS<1,2>, + mVU_MADDAq<1,2> , mVU_MSUBAq<1,2> , mVU_MADDA<1,2> , mVU_MSUBA<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -674,6 +953,28 @@ void (* mVU_UPPER_FD_10_TABLE11 [32])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, }; + +void (* mVU_UPPER_FD_10_TABLE02 [32])() = { + mVU_ADDAz<0,2> , mVU_SUBAz<0,2> , mVU_MADDAz<0,2> , mVU_MSUBAz<0,2>, + mVU_ITOF12<0,2> , mVU_FTOI12<0,2> , mVU_MULAz<0,2> , mVU_MULAi<0,2>, + mVU_ADDAi<0,2> , mVU_SUBAi<0,2> , mVU_MULA<0,2> , mVU_OPMULA<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, +}; + +void (* mVU_UPPER_FD_10_TABLE12 [32])() = { + mVU_ADDAz<1,2> , mVU_SUBAz<1,2> , mVU_MADDAz<1,2> , mVU_MSUBAz<1,2>, + mVU_ITOF12<1,2> , mVU_FTOI12<1,2> , mVU_MULAz<1,2> , mVU_MULAi<1,2>, + mVU_ADDAi<1,2> , mVU_SUBAi<1,2> , mVU_MULA<1,2> , mVU_OPMULA<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ @@ -722,19 +1023,45 @@ void (* mVU_UPPER_FD_11_TABLE11 [32])() = { mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1> , mVUunknown<1,1>, }; + +void (* mVU_UPPER_FD_11_TABLE02 [32])() = { + mVU_ADDAw<0,2> , mVU_SUBAw<0,2> , mVU_MADDAw<0,2> , mVU_MSUBAw<0,2>, + mVU_ITOF15<0,2> , mVU_FTOI15<0,2> , mVU_MULAw<0,2> , mVU_CLIP<0,2>, + mVU_MADDAi<0,2> , mVU_MSUBAi<0,2> , mVUunknown<0,2> , mVU_NOP<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, + mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2> , mVUunknown<0,2>, +}; + +void (* mVU_UPPER_FD_11_TABLE12 [32])() = { + mVU_ADDAw<1,2> , mVU_SUBAw<1,2> , mVU_MADDAw<1,2> , mVU_MSUBAw<1,2>, + mVU_ITOF15<1,2> , mVU_FTOI15<1,2> , mVU_MULAw<1,2> , mVU_CLIP<1,2>, + mVU_MADDAi<1,2> , mVU_MSUBAi<1,2> , mVUunknown<1,2> , mVU_NOP<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, + mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2> , mVUunknown<1,2>, +}; //------------------------------------------------------------------ //------------------------------------------------------------------ // Table Functions //------------------------------------------------------------------ #define doTableStuff(tableName, args) { \ - if (recPass) { \ + pass1 { \ + if (vuIndex) tableName##10[ args ](); \ + else tableName##00[ args ](); \ + } \ + pass2 { \ if (vuIndex) tableName##11[ args ](); \ else tableName##01[ args ](); \ } \ - else { \ - if (vuIndex) tableName##10[ args ](); \ - else tableName##00[ args ](); \ + pass3 { \ + if (vuIndex) tableName##12[ args ](); \ + else tableName##02[ args ](); \ } \ } @@ -750,8 +1077,9 @@ microVUf(void) mVULowerOP_T3_11() { doTableStuff(mVULowerOP_T3_11_OPCODE, ((mVUg microVUf(void) mVUopU() { doTableStuff(mVU_UPPER_OPCODE, (mVUgetCode & 0x3f)); } // Gets Upper Opcode microVUf(void) mVUopL() { doTableStuff(mVULOWER_OPCODE, (mVUgetCode >> 25)); } // Gets Lower Opcode microVUf(void) mVUunknown() { - //if (recPass) return; SysPrintf("mVUunknown<%d,%d> : Unknown Micro VU opcode called (%x)\n", vuIndex, recPass, mVUgetCode); + //pass2 { write8(0xcc); } + pass3 { mVUlog("Unknown", mVUgetCode); } } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index dcbc757a13..51a2a325d6 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -79,10 +79,10 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX //------------------------------------------------------------------ // FMAC1 - Normal FMAC Opcodes -#define mVU_FMAC1(operation) { \ +#define mVU_FMAC1(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, _Ft_); } \ + pass2 { \ int Fd, Fs, Ft; \ mVUallocFMAC1a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ @@ -90,12 +90,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 1); \ mVUallocFMAC1b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC3 - BC(xyzw) FMAC Opcodes -#define mVU_FMAC3(operation) { \ +#define mVU_FMAC3(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC3(_Fd_, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC3(_Fd_, _Fs_, _Ft_); } \ + pass2 { \ int Fd, Fs, Ft; \ mVUallocFMAC3a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ @@ -103,12 +104,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 1); \ mVUallocFMAC3b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC4 - FMAC Opcodes Storing Result to ACC -#define mVU_FMAC4(operation) { \ +#define mVU_FMAC4(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(0, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC1(0, _Fs_, _Ft_); } \ + pass2 { \ int ACC, Fs, Ft; \ mVUallocFMAC4a(ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ @@ -116,12 +118,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC4b(ACC, Fs); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC5 - FMAC BC(xyzw) Opcodes Storing Result to ACC -#define mVU_FMAC5(operation) { \ +#define mVU_FMAC5(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC3(0, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC3(0, _Fs_, _Ft_); } \ + pass2 { \ int ACC, Fs, Ft; \ mVUallocFMAC5a(ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ @@ -129,12 +132,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC5b(ACC, Fs); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC6 - Normal FMAC Opcodes (I Reg) -#define mVU_FMAC6(operation) { \ +#define mVU_FMAC6(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ + pass2 { \ int Fd, Fs, Ft; \ mVUallocFMAC6a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ @@ -142,12 +146,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 1); \ mVUallocFMAC6b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC7 - FMAC Opcodes Storing Result to ACC (I Reg) -#define mVU_FMAC7(operation) { \ +#define mVU_FMAC7(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(0, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(0, _Fs_, 0); } \ + pass2 { \ int ACC, Fs, Ft; \ mVUallocFMAC7a(ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ @@ -155,12 +160,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC7b(ACC, Fs); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC8 - MADD FMAC Opcode Storing Result to Fd -#define mVU_FMAC8(operation) { \ +#define mVU_FMAC8(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, _Ft_); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC8a(Fd, ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -174,12 +180,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC8b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC9 - MSUB FMAC Opcode Storing Result to Fd -#define mVU_FMAC9(operation) { \ +#define mVU_FMAC9(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, _Ft_); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC9a(Fd, ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -193,12 +200,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC9b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC10 - MADD FMAC BC(xyzw) Opcode Storing Result to Fd -#define mVU_FMAC10(operation) { \ +#define mVU_FMAC10(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC3(_Fd_, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC3(_Fd_, _Fs_, _Ft_); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC10a(Fd, ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -212,12 +220,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC10b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC11 - MSUB FMAC BC(xyzw) Opcode Storing Result to Fd -#define mVU_FMAC11(operation) { \ +#define mVU_FMAC11(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC3(_Fd_, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC3(_Fd_, _Fs_, _Ft_); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC11a(Fd, ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -231,12 +240,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC11b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC12 - MADD FMAC Opcode Storing Result to Fd (I Reg) -#define mVU_FMAC12(operation) { \ +#define mVU_FMAC12(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC12a(Fd, ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -250,12 +260,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC12b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC13 - MSUB FMAC Opcode Storing Result to Fd (I Reg) -#define mVU_FMAC13(operation) { \ +#define mVU_FMAC13(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC13a(Fd, ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -269,12 +280,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC13b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC14 - MADDA/MSUBA FMAC Opcode -#define mVU_FMAC14(operation) { \ +#define mVU_FMAC14(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(0, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC1(0, _Fs_, _Ft_); } \ + pass2 { \ int ACCw, ACCr, Fs, Ft; \ mVUallocFMAC14a(ACCw, ACCr, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -288,12 +300,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(ACCr, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC14b(ACCw, ACCr); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC15 - MADDA/MSUBA BC(xyzw) FMAC Opcode -#define mVU_FMAC15(operation) { \ +#define mVU_FMAC15(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC3(0, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC3(0, _Fs_, _Ft_); } \ + pass2 { \ int ACCw, ACCr, Fs, Ft; \ mVUallocFMAC15a(ACCw, ACCr, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -307,12 +320,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(ACCr, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC15b(ACCw, ACCr); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC16 - MADDA/MSUBA FMAC Opcode (I Reg) -#define mVU_FMAC16(operation) { \ +#define mVU_FMAC16(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(0, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(0, _Fs_, 0); } \ + pass2 { \ int ACCw, ACCr, Fs, Ft; \ mVUallocFMAC16a(ACCw, ACCr, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -326,24 +340,26 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(ACCr, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC16b(ACCw, ACCr); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC18 - OPMULA FMAC Opcode -#define mVU_FMAC18(operation) { \ +#define mVU_FMAC18(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(0, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC1(0, _Fs_, _Ft_); } \ + pass2 { \ int ACC, Fs, Ft; \ mVUallocFMAC18a(ACC, Fs, Ft); \ SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \ mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC18b(ACC, Fs); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC19 - OPMULA FMAC Opcode -#define mVU_FMAC19(operation) { \ +#define mVU_FMAC19(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, _Ft_); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, _Ft_); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC19a(Fd, ACC, Fs, Ft); \ SSE_MULPS_XMM_to_XMM(Fs, Ft); \ @@ -351,12 +367,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC19b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC22 - Normal FMAC Opcodes (Q Reg) -#define mVU_FMAC22(operation) { \ +#define mVU_FMAC22(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ + pass2 { \ int Fd, Fs, Ft; \ mVUallocFMAC22a(Fd, Fs, Ft); \ if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ @@ -364,12 +381,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 1); \ mVUallocFMAC22b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC23 - FMAC Opcodes Storing Result to ACC (Q Reg) -#define mVU_FMAC23(operation) { \ +#define mVU_FMAC23(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(0, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(0, _Fs_, 0); } \ + pass2 { \ int ACC, Fs, Ft; \ mVUallocFMAC23a(ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \ @@ -377,12 +395,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fs, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC23b(ACC, Fs); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC24 - MADD FMAC Opcode Storing Result to Fd (Q Reg) -#define mVU_FMAC24(operation) { \ +#define mVU_FMAC24(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC24a(Fd, ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -396,12 +415,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, xmmT1, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC24b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC25 - MSUB FMAC Opcode Storing Result to Fd (Q Reg) -#define mVU_FMAC25(operation) { \ +#define mVU_FMAC25(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(_Fd_, _Fs_, 0); } \ + pass2 { \ int Fd, ACC, Fs, Ft; \ mVUallocFMAC25a(Fd, ACC, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -415,12 +435,13 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(Fd, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC25b(Fd); \ } \ + pass3 { mVUlog(OPname); } \ } // FMAC26 - MADDA/MSUBA FMAC Opcode (Q Reg) -#define mVU_FMAC26(operation) { \ +#define mVU_FMAC26(operation, OPname) { \ microVU* mVU = mVUx; \ - if (!recPass) { mVUanalyzeFMAC1(0, _Fs_, 0); } \ - else { \ + pass1 { mVUanalyzeFMAC1(0, _Fs_, 0); } \ + pass2 { \ int ACCw, ACCr, Fs, Ft; \ mVUallocFMAC26a(ACCw, ACCr, Fs, Ft); \ if (_X_Y_Z_W == 8) { \ @@ -434,6 +455,7 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX mVUupdateFlags(ACCr, Fs, xmmT2, _X_Y_Z_W, 0); \ mVUallocFMAC26b(ACCw, ACCr); \ } \ + pass3 { mVUlog(OPname); } \ } //------------------------------------------------------------------ @@ -442,105 +464,105 @@ microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw, bool modX microVUf(void) mVU_ABS() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFMAC2(_Fs_, _Ft_); } - else { + pass1 { mVUanalyzeFMAC2(_Fs_, _Ft_); } + pass2 { int Fs, Ft; mVUallocFMAC2a(Fs, Ft); SSE_ANDPS_M128_to_XMM(Fs, (uptr)mVU_absclip); mVUallocFMAC2b(Ft); } + pass3 { mVUlog("ABS"); } } -microVUf(void) mVU_ADD() { mVU_FMAC1(ADD); mVUlog("ADD"); } -microVUf(void) mVU_ADDi() { mVU_FMAC6(ADD); mVUlog("ADDi"); } -microVUf(void) mVU_ADDq() { mVU_FMAC22(ADD); mVUlog("ADDq"); } -microVUf(void) mVU_ADDx() { mVU_FMAC3(ADD); mVUlog("ADDx"); } -microVUf(void) mVU_ADDy() { mVU_FMAC3(ADD); mVUlog("ADDy"); } -microVUf(void) mVU_ADDz() { mVU_FMAC3(ADD); mVUlog("ADDz"); } -microVUf(void) mVU_ADDw() { mVU_FMAC3(ADD); mVUlog("ADDw"); } -microVUf(void) mVU_ADDA() { mVU_FMAC4(ADD); mVUlog("ADDA"); } -microVUf(void) mVU_ADDAi() { mVU_FMAC7(ADD); mVUlog("ADDAi"); } -microVUf(void) mVU_ADDAq() { mVU_FMAC23(ADD); mVUlog("ADDAq"); } -microVUf(void) mVU_ADDAx() { mVU_FMAC5(ADD); mVUlog("ADDAx"); } -microVUf(void) mVU_ADDAy() { mVU_FMAC5(ADD); mVUlog("ADDAy"); } -microVUf(void) mVU_ADDAz() { mVU_FMAC5(ADD); mVUlog("ADDAz"); } -microVUf(void) mVU_ADDAw() { mVU_FMAC5(ADD); mVUlog("ADDAw"); } -microVUf(void) mVU_SUB() { mVU_FMAC1(SUB); mVUlog("SUB"); } -microVUf(void) mVU_SUBi() { mVU_FMAC6(SUB); mVUlog("SUBi"); } -microVUf(void) mVU_SUBq() { mVU_FMAC22(SUB); mVUlog("SUBq"); } -microVUf(void) mVU_SUBx() { mVU_FMAC3(SUB); mVUlog("SUBx"); } -microVUf(void) mVU_SUBy() { mVU_FMAC3(SUB); mVUlog("SUBy"); } -microVUf(void) mVU_SUBz() { mVU_FMAC3(SUB); mVUlog("SUBz"); } -microVUf(void) mVU_SUBw() { mVU_FMAC3(SUB); mVUlog("SUBw"); } -microVUf(void) mVU_SUBA() { mVU_FMAC4(SUB); mVUlog("SUBA"); } -microVUf(void) mVU_SUBAi() { mVU_FMAC7(SUB); mVUlog("SUBAi"); } -microVUf(void) mVU_SUBAq() { mVU_FMAC23(SUB); mVUlog("SUBAq"); } -microVUf(void) mVU_SUBAx() { mVU_FMAC5(SUB); mVUlog("SUBAx"); } -microVUf(void) mVU_SUBAy() { mVU_FMAC5(SUB); mVUlog("SUBAy"); } -microVUf(void) mVU_SUBAz() { mVU_FMAC5(SUB); mVUlog("SUBAz"); } -microVUf(void) mVU_SUBAw() { mVU_FMAC5(SUB); mVUlog("SUBAw"); } -microVUf(void) mVU_MUL() { mVU_FMAC1(MUL); mVUlog("MUL"); } -microVUf(void) mVU_MULi() { mVU_FMAC6(MUL); mVUlog("MULi"); } -microVUf(void) mVU_MULq() { mVU_FMAC22(MUL); mVUlog("MULq"); } -microVUf(void) mVU_MULx() { mVU_FMAC3(MUL); mVUlog("MULx"); } -microVUf(void) mVU_MULy() { mVU_FMAC3(MUL); mVUlog("MULy"); } -microVUf(void) mVU_MULz() { mVU_FMAC3(MUL); mVUlog("MULz"); } -microVUf(void) mVU_MULw() { mVU_FMAC3(MUL); mVUlog("MULw"); } -microVUf(void) mVU_MULA() { mVU_FMAC4(MUL); mVUlog("MULA"); } -microVUf(void) mVU_MULAi() { mVU_FMAC7(MUL); mVUlog("MULAi"); } -microVUf(void) mVU_MULAq() { mVU_FMAC23(MUL); mVUlog("MULAq"); } -microVUf(void) mVU_MULAx() { mVU_FMAC5(MUL); mVUlog("MULAx"); } -microVUf(void) mVU_MULAy() { mVU_FMAC5(MUL); mVUlog("MULAy"); } -microVUf(void) mVU_MULAz() { mVU_FMAC5(MUL); mVUlog("MULAz"); } -microVUf(void) mVU_MULAw() { mVU_FMAC5(MUL); mVUlog("MULAw"); } -microVUf(void) mVU_MADD() { mVU_FMAC8(ADD); mVUlog("MADD"); } -microVUf(void) mVU_MADDi() { mVU_FMAC12(ADD); mVUlog("MADDi"); } -microVUf(void) mVU_MADDq() { mVU_FMAC24(ADD); mVUlog("MADDq"); } -microVUf(void) mVU_MADDx() { mVU_FMAC10(ADD); mVUlog("MADDx"); } -microVUf(void) mVU_MADDy() { mVU_FMAC10(ADD); mVUlog("MADDy"); } -microVUf(void) mVU_MADDz() { mVU_FMAC10(ADD); mVUlog("MADDz"); } -microVUf(void) mVU_MADDw() { mVU_FMAC10(ADD); mVUlog("MADDw"); } -microVUf(void) mVU_MADDA() { mVU_FMAC14(ADD); mVUlog("MADDA"); } -microVUf(void) mVU_MADDAi() { mVU_FMAC16(ADD); mVUlog("MADDAi"); } -microVUf(void) mVU_MADDAq() { mVU_FMAC26(ADD); mVUlog("MADDAq"); } -microVUf(void) mVU_MADDAx() { mVU_FMAC15(ADD); mVUlog("MADDAx"); } -microVUf(void) mVU_MADDAy() { mVU_FMAC15(ADD); mVUlog("MADDAy"); } -microVUf(void) mVU_MADDAz() { mVU_FMAC15(ADD); mVUlog("MADDAz"); } -microVUf(void) mVU_MADDAw() { mVU_FMAC15(ADD); mVUlog("MADDAw"); } -microVUf(void) mVU_MSUB() { mVU_FMAC9(SUB); mVUlog("MSUB"); } -microVUf(void) mVU_MSUBi() { mVU_FMAC13(SUB); mVUlog("MSUBi"); } -microVUf(void) mVU_MSUBq() { mVU_FMAC25(SUB); mVUlog("MSUBq"); } -microVUf(void) mVU_MSUBx() { mVU_FMAC11(SUB); mVUlog("MSUBx"); } -microVUf(void) mVU_MSUBy() { mVU_FMAC11(SUB); mVUlog("MSUBy"); } -microVUf(void) mVU_MSUBz() { mVU_FMAC11(SUB); mVUlog("MSUBz"); } -microVUf(void) mVU_MSUBw() { mVU_FMAC11(SUB); mVUlog("MSUBw"); } -microVUf(void) mVU_MSUBA() { mVU_FMAC14(SUB); mVUlog("MSUBA"); } -microVUf(void) mVU_MSUBAi() { mVU_FMAC16(SUB); mVUlog("MSUBAi"); } -microVUf(void) mVU_MSUBAq() { mVU_FMAC26(SUB); mVUlog("MSUBAq"); } -microVUf(void) mVU_MSUBAx() { mVU_FMAC15(SUB); mVUlog("MSUBAx"); } -microVUf(void) mVU_MSUBAy() { mVU_FMAC15(SUB); mVUlog("MSUBAy"); } -microVUf(void) mVU_MSUBAz() { mVU_FMAC15(SUB); mVUlog("MSUBAz"); } -microVUf(void) mVU_MSUBAw() { mVU_FMAC15(SUB); mVUlog("MSUBAw"); } -microVUf(void) mVU_MAX() { mVU_FMAC1(MAX); mVUlog("MAX"); } -microVUf(void) mVU_MAXi() { mVU_FMAC6(MAX); mVUlog("MAXi"); } -microVUf(void) mVU_MAXx() { mVU_FMAC3(MAX); mVUlog("MAXq"); } -microVUf(void) mVU_MAXy() { mVU_FMAC3(MAX); mVUlog("MAXy"); } -microVUf(void) mVU_MAXz() { mVU_FMAC3(MAX); mVUlog("MAXz"); } -microVUf(void) mVU_MAXw() { mVU_FMAC3(MAX); mVUlog("MAXw"); } -microVUf(void) mVU_MINI() { mVU_FMAC1(MIN); mVUlog("MINI"); } -microVUf(void) mVU_MINIi() { mVU_FMAC6(MIN); mVUlog("MINIi"); } -microVUf(void) mVU_MINIx() { mVU_FMAC3(MIN); mVUlog("MINIx"); } -microVUf(void) mVU_MINIy() { mVU_FMAC3(MIN); mVUlog("MINIy"); } -microVUf(void) mVU_MINIz() { mVU_FMAC3(MIN); mVUlog("MINIz"); } -microVUf(void) mVU_MINIw() { mVU_FMAC3(MIN); mVUlog("MINIw"); } -microVUf(void) mVU_OPMULA() { mVU_FMAC18(MUL); mVUlog("OPMULA"); } -microVUf(void) mVU_OPMSUB() { mVU_FMAC19(SUB); mVUlog("OPMSUB"); } -microVUf(void) mVU_NOP() { /*mVUlog("NOP");*/ } +microVUf(void) mVU_ADD() { mVU_FMAC1 (ADD, "ADD"); } +microVUf(void) mVU_ADDi() { mVU_FMAC6 (ADD, "ADDi"); } +microVUf(void) mVU_ADDq() { mVU_FMAC22(ADD, "ADDq"); } +microVUf(void) mVU_ADDx() { mVU_FMAC3 (ADD, "ADDx"); } +microVUf(void) mVU_ADDy() { mVU_FMAC3 (ADD, "ADDy"); } +microVUf(void) mVU_ADDz() { mVU_FMAC3 (ADD, "ADDz"); } +microVUf(void) mVU_ADDw() { mVU_FMAC3 (ADD, "ADDw"); } +microVUf(void) mVU_ADDA() { mVU_FMAC4 (ADD, "ADDA"); } +microVUf(void) mVU_ADDAi() { mVU_FMAC7 (ADD, "ADDAi"); } +microVUf(void) mVU_ADDAq() { mVU_FMAC23(ADD, "ADDAq"); } +microVUf(void) mVU_ADDAx() { mVU_FMAC5 (ADD, "ADDAx"); } +microVUf(void) mVU_ADDAy() { mVU_FMAC5 (ADD, "ADDAy"); } +microVUf(void) mVU_ADDAz() { mVU_FMAC5 (ADD, "ADDAz"); } +microVUf(void) mVU_ADDAw() { mVU_FMAC5 (ADD, "ADDAw"); } +microVUf(void) mVU_SUB() { mVU_FMAC1 (SUB, "SUB"); } +microVUf(void) mVU_SUBi() { mVU_FMAC6 (SUB, "SUBi"); } +microVUf(void) mVU_SUBq() { mVU_FMAC22(SUB, "SUBq"); } +microVUf(void) mVU_SUBx() { mVU_FMAC3 (SUB, "SUBx"); } +microVUf(void) mVU_SUBy() { mVU_FMAC3 (SUB, "SUBy"); } +microVUf(void) mVU_SUBz() { mVU_FMAC3 (SUB, "SUBz"); } +microVUf(void) mVU_SUBw() { mVU_FMAC3 (SUB, "SUBw"); } +microVUf(void) mVU_SUBA() { mVU_FMAC4 (SUB, "SUBA"); } +microVUf(void) mVU_SUBAi() { mVU_FMAC7 (SUB, "SUBAi"); } +microVUf(void) mVU_SUBAq() { mVU_FMAC23(SUB, "SUBAq"); } +microVUf(void) mVU_SUBAx() { mVU_FMAC5 (SUB, "SUBAx"); } +microVUf(void) mVU_SUBAy() { mVU_FMAC5 (SUB, "SUBAy"); } +microVUf(void) mVU_SUBAz() { mVU_FMAC5 (SUB, "SUBAz"); } +microVUf(void) mVU_SUBAw() { mVU_FMAC5 (SUB, "SUBAw"); } +microVUf(void) mVU_MUL() { mVU_FMAC1 (MUL, "MUL"); } +microVUf(void) mVU_MULi() { mVU_FMAC6 (MUL, "MULi"); } +microVUf(void) mVU_MULq() { mVU_FMAC22(MUL, "MULq"); } +microVUf(void) mVU_MULx() { mVU_FMAC3 (MUL, "MULx"); } +microVUf(void) mVU_MULy() { mVU_FMAC3 (MUL, "MULy"); } +microVUf(void) mVU_MULz() { mVU_FMAC3 (MUL, "MULz"); } +microVUf(void) mVU_MULw() { mVU_FMAC3 (MUL, "MULw"); } +microVUf(void) mVU_MULA() { mVU_FMAC4 (MUL, "MULA"); } +microVUf(void) mVU_MULAi() { mVU_FMAC7 (MUL, "MULAi"); } +microVUf(void) mVU_MULAq() { mVU_FMAC23(MUL, "MULAq"); } +microVUf(void) mVU_MULAx() { mVU_FMAC5 (MUL, "MULAx"); } +microVUf(void) mVU_MULAy() { mVU_FMAC5 (MUL, "MULAy"); } +microVUf(void) mVU_MULAz() { mVU_FMAC5 (MUL, "MULAz"); } +microVUf(void) mVU_MULAw() { mVU_FMAC5 (MUL, "MULAw"); } +microVUf(void) mVU_MADD() { mVU_FMAC8 (ADD, "MADD"); } +microVUf(void) mVU_MADDi() { mVU_FMAC12(ADD, "MADDi"); } +microVUf(void) mVU_MADDq() { mVU_FMAC24(ADD, "MADDq"); } +microVUf(void) mVU_MADDx() { mVU_FMAC10(ADD, "MADDx"); } +microVUf(void) mVU_MADDy() { mVU_FMAC10(ADD, "MADDy"); } +microVUf(void) mVU_MADDz() { mVU_FMAC10(ADD, "MADDz"); } +microVUf(void) mVU_MADDw() { mVU_FMAC10(ADD, "MADDw"); } +microVUf(void) mVU_MADDA() { mVU_FMAC14(ADD, "MADDA"); } +microVUf(void) mVU_MADDAi() { mVU_FMAC16(ADD, "MADDAi"); } +microVUf(void) mVU_MADDAq() { mVU_FMAC26(ADD, "MADDAq"); } +microVUf(void) mVU_MADDAx() { mVU_FMAC15(ADD, "MADDAx"); } +microVUf(void) mVU_MADDAy() { mVU_FMAC15(ADD, "MADDAy"); } +microVUf(void) mVU_MADDAz() { mVU_FMAC15(ADD, "MADDAz"); } +microVUf(void) mVU_MADDAw() { mVU_FMAC15(ADD, "MADDAw"); } +microVUf(void) mVU_MSUB() { mVU_FMAC9 (SUB, "MSUB"); } +microVUf(void) mVU_MSUBi() { mVU_FMAC13(SUB, "MSUBi"); } +microVUf(void) mVU_MSUBq() { mVU_FMAC25(SUB, "MSUBq"); } +microVUf(void) mVU_MSUBx() { mVU_FMAC11(SUB, "MSUBx"); } +microVUf(void) mVU_MSUBy() { mVU_FMAC11(SUB, "MSUBy"); } +microVUf(void) mVU_MSUBz() { mVU_FMAC11(SUB, "MSUBz"); } +microVUf(void) mVU_MSUBw() { mVU_FMAC11(SUB, "MSUBw"); } +microVUf(void) mVU_MSUBA() { mVU_FMAC14(SUB, "MSUBA"); } +microVUf(void) mVU_MSUBAi() { mVU_FMAC16(SUB, "MSUBAi"); } +microVUf(void) mVU_MSUBAq() { mVU_FMAC26(SUB, "MSUBAq"); } +microVUf(void) mVU_MSUBAx() { mVU_FMAC15(SUB, "MSUBAx"); } +microVUf(void) mVU_MSUBAy() { mVU_FMAC15(SUB, "MSUBAy"); } +microVUf(void) mVU_MSUBAz() { mVU_FMAC15(SUB, "MSUBAz"); } +microVUf(void) mVU_MSUBAw() { mVU_FMAC15(SUB, "MSUBAw"); } +microVUf(void) mVU_MAX() { mVU_FMAC1 (MAX, "MAX"); } +microVUf(void) mVU_MAXi() { mVU_FMAC6 (MAX, "MAXi"); } +microVUf(void) mVU_MAXx() { mVU_FMAC3 (MAX, "MAXq"); } +microVUf(void) mVU_MAXy() { mVU_FMAC3 (MAX, "MAXy"); } +microVUf(void) mVU_MAXz() { mVU_FMAC3 (MAX, "MAXz"); } +microVUf(void) mVU_MAXw() { mVU_FMAC3 (MAX, "MAXw"); } +microVUf(void) mVU_MINI() { mVU_FMAC1 (MIN, "MINI"); } +microVUf(void) mVU_MINIi() { mVU_FMAC6 (MIN, "MINIi"); } +microVUf(void) mVU_MINIx() { mVU_FMAC3 (MIN, "MINIx"); } +microVUf(void) mVU_MINIy() { mVU_FMAC3 (MIN, "MINIy"); } +microVUf(void) mVU_MINIz() { mVU_FMAC3 (MIN, "MINIz"); } +microVUf(void) mVU_MINIw() { mVU_FMAC3 (MIN, "MINIw"); } +microVUf(void) mVU_OPMULA() { mVU_FMAC18(MUL, "OPMULA"); } +microVUf(void) mVU_OPMSUB() { mVU_FMAC19(SUB, "OPMSUB"); } +microVUf(void) mVU_NOP() { pass3 { mVUlog("NOP"); } } microVUq(void) mVU_FTOIx(uptr addr) { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFMAC2(_Fs_, _Ft_); } - else { + pass1 { mVUanalyzeFMAC2(_Fs_, _Ft_); } + pass2 { int Fs, Ft; - mVUlog("FTOIx"); mVUallocFMAC2a(Fs, Ft); // Note: For help understanding this algorithm see recVUMI_FTOI_Saturate() @@ -557,16 +579,15 @@ microVUq(void) mVU_FTOIx(uptr addr) { mVUallocFMAC2b(Ft); } } -microVUf(void) mVU_FTOI0() { mVU_FTOIx(0); } -microVUf(void) mVU_FTOI4() { mVU_FTOIx((uptr)mVU_FTOI_4); } -microVUf(void) mVU_FTOI12() { mVU_FTOIx((uptr)mVU_FTOI_12); } -microVUf(void) mVU_FTOI15() { mVU_FTOIx((uptr)mVU_FTOI_15); } +microVUf(void) mVU_FTOI0() { mVU_FTOIx((uptr)0); pass3 { mVUlog("FTOI0"); } } +microVUf(void) mVU_FTOI4() { mVU_FTOIx((uptr)mVU_FTOI_4); pass3 { mVUlog("FTOI4"); } } +microVUf(void) mVU_FTOI12() { mVU_FTOIx((uptr)mVU_FTOI_12); pass3 { mVUlog("FTOI12"); } } +microVUf(void) mVU_FTOI15() { mVU_FTOIx((uptr)mVU_FTOI_15); pass3 { mVUlog("FTOI15"); } } microVUq(void) mVU_ITOFx(uptr addr) { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFMAC2(_Fs_, _Ft_); } - else { + pass1 { mVUanalyzeFMAC2(_Fs_, _Ft_); } + pass2 { int Fs, Ft; - mVUlog("ITOFx"); mVUallocFMAC2a(Fs, Ft); SSE2_CVTDQ2PS_XMM_to_XMM(Ft, Fs); @@ -576,16 +597,15 @@ microVUq(void) mVU_ITOFx(uptr addr) { mVUallocFMAC2b(Ft); } } -microVUf(void) mVU_ITOF0() { mVU_ITOFx(0); } -microVUf(void) mVU_ITOF4() { mVU_ITOFx((uptr)mVU_ITOF_4); } -microVUf(void) mVU_ITOF12() { mVU_ITOFx((uptr)mVU_ITOF_12); } -microVUf(void) mVU_ITOF15() { mVU_ITOFx((uptr)mVU_ITOF_15); } +microVUf(void) mVU_ITOF0() { mVU_ITOFx((uptr)0); pass3 { mVUlog("ITOF0"); } } +microVUf(void) mVU_ITOF4() { mVU_ITOFx((uptr)mVU_ITOF_4); pass3 { mVUlog("ITOF4"); } } +microVUf(void) mVU_ITOF12() { mVU_ITOFx((uptr)mVU_ITOF_12); pass3 { mVUlog("ITOF12"); } } +microVUf(void) mVU_ITOF15() { mVU_ITOFx((uptr)mVU_ITOF_15); pass3 { mVUlog("ITOF15"); } } microVUf(void) mVU_CLIP() { microVU* mVU = mVUx; - if (!recPass) { mVUanalyzeFMAC4(_Fs_, _Ft_); mVUlog("clip broken"); } - else { + pass1 { mVUanalyzeFMAC4(_Fs_, _Ft_); mVUprint("clip broken"); } + pass2 { int Fs, Ft; - mVUlog("CLIP"); mVUallocFMAC17a(Fs, Ft); mVUallocCFLAGa(gprT1, fpcInstance); SHL32ItoR(gprT1, 6); @@ -613,5 +633,6 @@ microVUf(void) mVU_CLIP() { mVUallocCFLAGb(gprT1, fcInstance); } + pass3 { mVUlog("CLIP"); } } #endif //PCSX2_MICROVU From cb57386d8508872de7e332fb8973c06d2ece5ecd Mon Sep 17 00:00:00 2001 From: mattmenke Date: Mon, 20 Apr 2009 23:14:32 +0000 Subject: [PATCH 118/143] LilyPad, Sio.cpp both modified to never allow pads in first slot on either port to be unplugged, as it broke things. Unplugging pads from other slots seems to work fine, when using multitap (When not, obviously no such pads in the first place). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1034 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Sio.cpp | 7 +++---- plugins/LilyPad/Config.cpp | 7 +++++-- plugins/LilyPad/LilyPad.cpp | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pcsx2/Sio.cpp b/pcsx2/Sio.cpp index 44f2666b3f..52687d7698 100644 --- a/pcsx2/Sio.cpp +++ b/pcsx2/Sio.cpp @@ -152,7 +152,6 @@ void SIO_CommandWrite(u8 value,int way) { case 3: // No pad connected. sio.parp++; - sio.bufcount = 6; if (sio.parp == sio.bufcount) { sio.padst = 0; return; } SIO_INT(); return; @@ -508,7 +507,7 @@ void InitializeSIO(u8 value) sio.StatReg &= ~TX_EMPTY; // Now the Buffer is not empty sio.StatReg |= RX_RDY; // Transfer is Ready - sio.bufcount = 2; + sio.bufcount = 4; // Default size, when no pad connected. sio.parp = 0; sio.padst = 1; sio.packetsize = 1; @@ -517,7 +516,7 @@ void InitializeSIO(u8 value) switch (sio.CtrlReg&0x2002) { case 0x0002: - if (!PAD1setSlot(1, 1+sio.activePadSlot[0])) { + if (!PAD1setSlot(1, 1+sio.activePadSlot[0]) && sio.activePadSlot[0]) { // Pad is not present. Don't send poll, just return a bunch of 0's. sio2.packet.recvVal1 = 0x1D100; sio.padst = 3; @@ -527,7 +526,7 @@ void InitializeSIO(u8 value) } break; case 0x2002: - if (!PAD2setSlot(2, 1+sio.activePadSlot[1])) { + if (!PAD2setSlot(2, 1+sio.activePadSlot[1]) && sio.activePadSlot[1]) { // Pad is not present. Don't send poll, just return a bunch of 0's. sio2.packet.recvVal1 = 0x1D100; sio.padst = 3; diff --git a/plugins/LilyPad/Config.cpp b/plugins/LilyPad/Config.cpp index 9a504a45d5..e3110b988e 100644 --- a/plugins/LilyPad/Config.cpp +++ b/plugins/LilyPad/Config.cpp @@ -1615,7 +1615,7 @@ void UpdatePadList(HWND hWnd) { int slot; int port; int index = 0; - wchar_t *padTypes[] = {L"Disabled", L"Dualshock 2", L"Guitar"}; + wchar_t *padTypes[] = {L"Unplugged", L"Dualshock 2", L"Guitar"}; for (port=0; port<2; port++) { for (slot = 0; slot<4; slot++) { wchar_t text[20]; @@ -1635,6 +1635,9 @@ void UpdatePadList(HWND hWnd) { item.iSubItem = 1; if (2 < (unsigned int)config.padConfigs[port][slot].type) config.padConfigs[port][slot].type = Dualshock2Pad; item.pszText = padTypes[config.padConfigs[port][slot].type]; + if (!slot && !config.padConfigs[port][slot].type) + item.pszText = L"Unplugged (Kinda)"; + ListView_SetItem(hWndList, &item); item.iSubItem = 2; @@ -1683,7 +1686,7 @@ INT_PTR CALLBACK GeneralDialogProc(HWND hWnd, unsigned int msg, WPARAM wParam, L c.cx = 50; c.pszText = L"Pad"; ListView_InsertColumn(hWndList, 0, &c); - c.cx = 90; + c.cx = 120; c.pszText = L"Type"; ListView_InsertColumn(hWndList, 1, &c); c.cx = 70; diff --git a/plugins/LilyPad/LilyPad.cpp b/plugins/LilyPad/LilyPad.cpp index 7d06d093ba..6f90966ac7 100644 --- a/plugins/LilyPad/LilyPad.cpp +++ b/plugins/LilyPad/LilyPad.cpp @@ -1419,5 +1419,6 @@ s32 CALLBACK PADsetSlot(u8 port, u8 slot) { } // Even if no pad there, record the slot, as it is the active slot regardless. slots[port] = slot; - return pads[port][slot].enabled; + // First slot always allowed. + return pads[port][slot].enabled | !slot; } From 5c312c36c7e7b8521cca9d2cb824a901a161e5b3 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 21 Apr 2009 01:40:45 +0000 Subject: [PATCH 119/143] Added PMIN/PMAX/PMUL/PCMP/PSHUF/PUNPCK/UNPCK/PACK instructions to the new emitter [this basically finishes all MMX instructions -- just some SSE2/SSE3 and SSE4 mess left!] git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1035 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/group1.h | 4 +- pcsx2/x86/ix86/implement/group3.h | 2 +- pcsx2/x86/ix86/implement/xmm/movqss.h | 377 +++++++++++++++++++--- pcsx2/x86/ix86/ix86.cpp | 45 ++- pcsx2/x86/ix86/ix86_instructions.h | 49 +-- pcsx2/x86/ix86/ix86_legacy_instructions.h | 6 - pcsx2/x86/ix86/ix86_legacy_mmx.cpp | 183 ++--------- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 305 +++-------------- 8 files changed, 478 insertions(+), 493 deletions(-) diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index bbc6735b51..8b38f35b04 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -134,7 +134,7 @@ public: // This class combines x86 with SSE/SSE2 logic operations (ADD, OR, and NOT). // Note: ANDN [AndNot] is handled below separately. // -template< G1Type InstType, u8 OpcodeSSE > +template< G1Type InstType, u16 OpcodeSSE > class xImpl_G1Logic : public xImpl_Group1 { public: @@ -149,7 +149,7 @@ public: // ------------------------------------------------------------------------ // This class combines x86 with SSE/SSE2 arithmetic operations (ADD/SUB). // -template< G1Type InstType, u8 OpcodeSSE > +template< G1Type InstType, u16 OpcodeSSE > class xImpl_G1Arith : public xImpl_G1Logic { public: diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index a88049789f..f5d72b3611 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -59,7 +59,7 @@ public: // ------------------------------------------------------------------------ // This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV. // -template< G3Type InstType, u8 OpcodeSSE > +template< G3Type InstType, u16 OpcodeSSE > class xImpl_Group3 : public Group3ImplAll { public: diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h index d69107bdfe..3d47f0b6e9 100644 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ b/pcsx2/x86/ix86/implement/xmm/movqss.h @@ -21,7 +21,7 @@ ////////////////////////////////////////////////////////////////////////////////////////// // MMX / SSE Helper Functions! -extern void SimdPrefix( u8 prefix, u8 opcode ); +extern void SimdPrefix( u8 prefix, u16 opcode ); // ------------------------------------------------------------------------ // xmm emitter helpers for xmm instruction with prefixes. @@ -30,21 +30,21 @@ extern void SimdPrefix( u8 prefix, u8 opcode ); // instructions violate this "guideline.") // template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) +__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) { SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > -void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) +__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) { SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > -__emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) +__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) { SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); xWriteDisp( reg.Id, data ); @@ -56,21 +56,21 @@ __emitinline void writeXMMop( u8 prefix, u8 opcode, const xRegister& reg, con // some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. // template< typename T, typename T2 > -__emitinline void writeXMMop( u8 opcode, const xRegister& to, const xRegister& from ) +__emitinline void writeXMMop( u16 opcode, const xRegister& to, const xRegister& from ) { SimdPrefix( 0, opcode ); ModRM_Direct( to.Id, from.Id ); } template< typename T > -void writeXMMop( u8 opcode, const xRegister& reg, const ModSibBase& sib ) +__noinline void writeXMMop( u16 opcode, const xRegister& reg, const ModSibBase& sib ) { SimdPrefix( 0, opcode ); EmitSibMagic( reg.Id, sib ); } template< typename T > -__emitinline void writeXMMop( u8 opcode, const xRegister& reg, const void* data ) +__emitinline void writeXMMop( u16 opcode, const xRegister& reg, const void* data ) { SimdPrefix( 0, opcode ); xWriteDisp( reg.Id, data ); @@ -80,7 +80,7 @@ __emitinline void writeXMMop( u8 opcode, const xRegister& reg, const void* da // Moves to/from high/low portions of an xmm register. // These instructions cannot be used in reg/reg form. // -template< u8 Opcode > +template< u16 Opcode > class MovhlImplAll { protected: @@ -104,7 +104,7 @@ public: // RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but // do something kinda different! Fun! // -template< u8 Opcode > +template< u16 Opcode > class MovhlImpl_RtoR { public: @@ -115,7 +115,7 @@ public: }; // ------------------------------------------------------------------------ -template< u8 Prefix, u8 Opcode, u8 OpcodeAlt > +template< u8 Prefix, u16 Opcode, u16 OpcodeAlt > class MovapsImplAll { public: @@ -132,49 +132,75 @@ public: // SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for // a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms). // -template< u8 Opcode > +template< u16 Opcode > class SimdImpl_PackedLogic { public: - template< typename T > - __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - template< typename T > - __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } - template< typename T > - __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } SimdImpl_PackedLogic() {} //GCWho? }; // ------------------------------------------------------------------------ -// For implementing SSE-only logic operations that have reg,reg/rm forms only, +// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only, // like ANDPS/ANDPD // -template< u8 Prefix, u8 Opcode > +template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegSSE { public: __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } SimdImpl_DestRegSSE() {} //GCWho? }; +// ------------------------------------------------------------------------ +// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only +// (PSHUFD / PSHUFHW / etc). +// +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegImmSSE +{ +public: + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + + SimdImpl_DestRegImmSSE() {} //GCWho? +}; + +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegImmMMX +{ +public: + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + + SimdImpl_DestRegImmMMX() {} //GCWho? +}; + // ------------------------------------------------------------------------ // For implementing MMX/SSE operations that have reg,reg/rm forms only, // but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops). // -template< u8 Prefix, u8 Opcode > +template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegEither { public: - template< typename DestOperandType > - __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename DestOperandType > - __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename DestOperandType > - __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename DestOperandType > __forceinline + void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename DestOperandType > __forceinline + void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename DestOperandType > __forceinline + void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } SimdImpl_DestRegEither() {} //GCWho? }; @@ -183,19 +209,19 @@ public: // For implementing MMX/SSE operations which the destination *must* be a register, but the source // can be regDirect or ModRM (indirect). // -template< u8 Prefix, u8 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > +template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > class SimdImpl_DestRegStrict { public: - __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __noinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } SimdImpl_DestRegStrict() {} //GCWho? }; // ------------------------------------------------------------------------ -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_PSPD_SSSD { public: @@ -209,7 +235,7 @@ public: // ------------------------------------------------------------------------ // -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_AndNot { public: @@ -221,7 +247,7 @@ public: // ------------------------------------------------------------------------ // For instructions that have SS/SD form only (UCOMI, etc) // AltPrefix - prefixed used for doubles (SD form). -template< u8 AltPrefix, u8 OpcodeSSE > +template< u8 AltPrefix, u16 OpcodeSSE > class SimdImpl_SS_SD { public: @@ -232,7 +258,7 @@ public: // ------------------------------------------------------------------------ // For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_rSqrt { public: @@ -243,7 +269,7 @@ public: // ------------------------------------------------------------------------ // For instructions that have PS/SS/SD form only (most commonly Sqrt functions) -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_Sqrt : public SimdImpl_rSqrt { public: @@ -252,7 +278,7 @@ public: }; // ------------------------------------------------------------------------ -template< u8 OpcodeSSE > +template< u16 OpcodeSSE > class SimdImpl_Shuffle { protected: @@ -296,7 +322,7 @@ public: ////////////////////////////////////////////////////////////////////////////////////////// // // -template< u8 Opcode1, u8 OpcodeImm, u8 Modcode > +template< u16 Opcode1, u16 OpcodeImm, u8 Modcode > class SimdImpl_Shift { public: @@ -330,17 +356,27 @@ public: }; // ------------------------------------------------------------------------ -template< u8 OpcodeBase1, u8 OpcodeBaseImm, u8 Modcode > -class SimdImpl_ShiftAll +// Used for PSRA +template< u16 OpcodeBase1, u8 Modcode > +class SimdImpl_ShiftWithoutQ { public: - const SimdImpl_Shift W; - const SimdImpl_Shift D; - const SimdImpl_Shift Q; + const SimdImpl_Shift W; + const SimdImpl_Shift D; + + SimdImpl_ShiftWithoutQ() {} +}; + +// ------------------------------------------------------------------------ +template< u16 OpcodeBase1, u8 Modcode > +class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ +{ +public: + const SimdImpl_Shift Q; void DQ( const xRegisterSSE& to, u8 imm ) const { - SimdPrefix( 0x66, OpcodeBaseImm+3 ); + SimdPrefix( 0x66, 0x73 ); ModRM( 3, (int)Modcode+1, to.Id ); xWrite( imm ); } @@ -350,26 +386,261 @@ public: ////////////////////////////////////////////////////////////////////////////////////////// // -template< u8 OpcodeB, u8 OpcodeS, u8 OpcodeUS, u8 OpcodeQ > +template< u16 OpcodeB, u16 OpcodeQ > class SimdImpl_AddSub { public: - const SimdImpl_DestRegEither<0x66,OpcodeB> B; - const SimdImpl_DestRegEither<0x66,OpcodeB+1> W; - const SimdImpl_DestRegEither<0x66,OpcodeB+2> D; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D; const SimdImpl_DestRegEither<0x66,OpcodeQ> Q; // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeS> SB; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB; // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeS+1> SW; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW; // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeUS> USB; + const SimdImpl_DestRegEither<0x66,OpcodeB> USB; // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeUS+1> USW; + const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW; SimdImpl_AddSub() {} -}; \ No newline at end of file +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PMul +{ +public: + const SimdImpl_DestRegEither<0x66,0xd5> LW; + const SimdImpl_DestRegEither<0x66,0xe5> HW; + const SimdImpl_DestRegEither<0x66,0xe4> HUW; + const SimdImpl_DestRegEither<0x66,0xf4> UDQ; + + // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the + // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit + // integers. Each intermediate 32-bit integer is truncated to the 18 most significant + // bits. Rounding is always performed by adding 1 to the least significant bit of the + // 18-bit intermediate result. The final result is obtained by selecting the 16 bits + // immediately to the right of the most significant bit of each 18-bit intermediate + // result and packed to the destination operand. + // + // Both operands can be MMX or XMM registers. Source can be register or memory. + // + const SimdImpl_DestRegEither<0x66,0x0b38> HRSW; + + // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store + // the low 32 bits of each product in xmm1. + const SimdImpl_DestRegSSE<0x66,0x4038> LD; + + // [SSE-4.1] Multiply the packed signed dword integers in dest with src. + const SimdImpl_DestRegSSE<0x66,0x2838> DQ; + + SimdImpl_PMul() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PCompare +{ +public: + SimdImpl_PCompare() {} + + // Compare packed bytes for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x74> EQB; + + // Compare packed words for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x75> EQW; + + // Compare packed doublewords [32-bits] for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x76> EQD; + + // Compare packed signed bytes for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x64> GTB; + + // Compare packed signed words for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x65> GTW; + + // Compare packed signed doublewords [32-bits] for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x66> GTD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u8 Opcode1, u16 Opcode2 > +class SimdImpl_PMinMax +{ +public: + SimdImpl_PMinMax() {} + + // Compare packed unsigned byte integers in dest to src and store packed min/max + // values in dest. + // Operation can be performed on either MMX or SSE operands. + const SimdImpl_DestRegEither<0x66,Opcode1> UB; + + // Compare packed signed word integers in dest to src and store packed min/max + // values in dest. + // Operation can be performed on either MMX or SSE operands. + const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW; + + // [SSE-4.1] Compare packed signed byte integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB; + + // [SSE-4.1] Compare packed signed doubleword integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD; + + // [SSE-4.1] Compare packed unsigned word integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW; + + // [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PShuffle +{ +public: + SimdImpl_PShuffle() {} + + // Copies words from src and inserts them into dest at word locations selected with + // the order operand (8 bit immediate). + const SimdImpl_DestRegImmMMX<0x00,0x70> W; + + // Copies doublewords from src and inserts them into dest at dword locations selected + // with the order operand (8 bit immediate). + const SimdImpl_DestRegImmSSE<0x66,0x70> D; + + // Copies words from the low quadword of src and inserts them into the low quadword + // of dest at word locations selected with the order operand (8 bit immediate). + // The high quadword of src is copied to the high quadword of dest. + const SimdImpl_DestRegImmSSE<0xf2,0x70> LW; + + // Copies words from the high quadword of src and inserts them into the high quadword + // of dest at word locations selected with the order operand (8 bit immediate). + // The low quadword of src is copied to the low quadword of dest. + const SimdImpl_DestRegImmSSE<0xf3,0x70> HW; + + // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle + // control mask in src. If the most significant bit (bit[7]) of each byte of the + // shuffle control mask is set, then constant zero is written in the result byte. + // Each byte in the shuffle control mask forms an index to permute the corresponding + // byte in dest. The value of each index is the least significant 4 bits (128-bit + // operation) or 3 bits (64-bit operation) of the shuffle control byte. + // + // Operands can be MMX or XMM registers. + const SimdImpl_DestRegEither<0x66,0x0038> B; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PUnpack +{ +public: + SimdImpl_PUnpack() {} + + // Unpack and interleave low-order bytes from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x60> LBW; + // Unpack and interleave low-order words from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x61> LWD; + // Unpack and interleave low-order doublewords from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x62> LDQ; + // Unpack and interleave low-order quadwords from src and dest into dest. + const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ; + + // Unpack and interleave high-order bytes from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x68> HBW; + // Unpack and interleave high-order words from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x69> HWD; + // Unpack and interleave high-order doublewords from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x6a> HDQ; + // Unpack and interleave high-order quadwords from src and dest into dest. + const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Pack with Signed or Unsigned Saturation +// +class SimdImpl_Pack +{ +public: + SimdImpl_Pack() {} + + // Converts packed signed word integers from src and dest into packed signed + // byte integers in dest, using signed saturation. + const SimdImpl_DestRegEither<0x66,0x63> SSWB; + + // Converts packed signed dword integers from src and dest into packed signed + // word integers in dest, using signed saturation. + const SimdImpl_DestRegEither<0x66,0x6b> SSDW; + + // Converts packed unsigned word integers from src and dest into packed unsigned + // byte integers in dest, using unsigned saturation. + const SimdImpl_DestRegEither<0x66,0x67> USWB; + + // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed + // unsigned word integers in dest, using signed saturation. + const SimdImpl_DestRegSSE<0x66,0x2b38> USDW; +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_Unpack +{ +public: + SimdImpl_Unpack() {} + + // Unpacks the high doubleword [single-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest[0] <- dest[2] + // dest[1] <- src[2] + // dest[2] <- dest[3] + // dest[3] <- src[3] + // + const SimdImpl_DestRegSSE<0x00,0x15> HPS; + + // Unpacks the high quadword [double-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest.lo <- dest.hi + // dest.hi <- src.hi + // + const SimdImpl_DestRegSSE<0x66,0x15> HPD; + + // Unpacks the low doubleword [single-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest[3] <- src[1] + // dest[2] <- dest[1] + // dest[1] <- src[0] + // dest[0] <- dest[0] + // + const SimdImpl_DestRegSSE<0x00,0x14> LPS; + + // Unpacks the low quadword [double-precision] values from src and dest into + // dest, effectively moving the low portion of src into the upper portion of dest. + // The result of dest is loaded as such: + // dest.hi <- src.lo + // dest.lo <- dest.lo [remains unchanged!] + // + const SimdImpl_DestRegSSE<0x66,0x14> LPD; +}; + diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index e987f180c0..9125feaa3e 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -641,15 +641,34 @@ __emitinline void xBSWAP( const xRegister32& to ) // MMX / XMM Instructions // (these will get put in their own file later) -__emitinline void Internal::SimdPrefix( u8 prefix, u8 opcode ) +// If the upper 8 bits of opcode are zero, the opcode is treated as a u8. +// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst +// 0x38, which is the only valid high word for 16 bit opcodes as such) +__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) { if( prefix != 0 ) { - xWrite( 0x0f00 | prefix ); - xWrite( opcode ); + if( (opcode & 0xff00) != 0 ) + { + jASSUME( (opcode & 0xff00) == 0x3800 ); + xWrite( (opcode<<16) | (0x0f00 | prefix) ); + } + else + { + xWrite( 0x0f00 | prefix ); + xWrite( opcode ); + } } else - xWrite( (opcode<<8) | 0x0f ); + { + if( (opcode & 0xff00) != 0 ) + { + jASSUME( (opcode & 0xff00) == 0x3800 ); + xWrite( opcode ); + } + else + xWrite( (opcode<<8) | 0x0f ); + } } const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; @@ -735,11 +754,21 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S // ------------------------------------------------------------------------ -const SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL; -const SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL; +const SimdImpl_ShiftAll<0xd0, 2> xPSRL; +const SimdImpl_ShiftAll<0xf0, 6> xPSLL; +const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; -const SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD; -const SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB; +const SimdImpl_AddSub<0xdc, 0xd4> xPADD; +const SimdImpl_AddSub<0xd8, 0xfb> xPSUB; +const SimdImpl_PMinMax<0xde,0x3c> xPMAX; +const SimdImpl_PMinMax<0xda,0x38> xPMIN; + +const SimdImpl_PMul xPMUL; +const SimdImpl_PCompare xPCMP; +const SimdImpl_PShuffle xPSHUF; +const SimdImpl_PUnpack xPUNPCK; +const SimdImpl_Unpack xUNPCK; +const SimdImpl_Pack xPACK; ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index dd1b1d5299..b0c4bfca9d 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -465,43 +465,52 @@ namespace x86Emitter // ------------------------------------------------------------------------ // OMG Evil. I went cross-eyed an hour ago doing this. // - extern const Internal::SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; extern const Internal::SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; extern const Internal::SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; extern const Internal::SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; extern const Internal::SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; - extern const Internal::SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; - extern const Internal::SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + extern const Internal::SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; extern const Internal::SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; - extern const Internal::SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; - extern const Internal::SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; - extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; - extern const Internal::SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; - extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; - extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; - extern const Internal::SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; - extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; extern const Internal::SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; extern const Internal::SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; extern const Internal::SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; - extern const Internal::SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + extern const Internal::SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; - extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; - extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; + extern const Internal::SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_ShiftAll<0xd0, 0x70, 2> xPSRL; - extern const Internal::SimdImpl_ShiftAll<0xf0, 0x70, 6> xPSLL; - - extern const Internal::SimdImpl_AddSub<0xfc, 0xec, 0xdc, 0xd4> xPADD; - extern const Internal::SimdImpl_AddSub<0xf8, 0xe8, 0xd8, 0xfb> xPSUB; - + extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL; + extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL; + extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; + + extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD; + extern const Internal::SimdImpl_AddSub<0xd8, 0xfb> xPSUB; + extern const Internal::SimdImpl_PMinMax<0xde,0x3c> xPMAX; + extern const Internal::SimdImpl_PMinMax<0xda,0x38> xPMIN; + + extern const Internal::SimdImpl_PMul xPMUL; + extern const Internal::SimdImpl_PCompare xPCMP; + extern const Internal::SimdImpl_PShuffle xPSHUF; + extern const Internal::SimdImpl_PUnpack xPUNPCK; + extern const Internal::SimdImpl_Unpack xUNPCK; + extern const Internal::SimdImpl_Pack xPACK; } diff --git a/pcsx2/x86/ix86/ix86_legacy_instructions.h b/pcsx2/x86/ix86/ix86_legacy_instructions.h index f36522852f..f903e120cb 100644 --- a/pcsx2/x86/ix86/ix86_legacy_instructions.h +++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h @@ -938,12 +938,6 @@ extern void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); // emms extern void EMMS( void ); -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits -//********************************************************************************** -extern void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); -extern void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); - extern void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); //********************* diff --git a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp index 7bbca83e33..f0571df61a 100644 --- a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp @@ -27,11 +27,11 @@ using namespace x86Emitter; -emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); } -emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); } -emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); } -emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } -emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } +emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); } +emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); } +emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); } +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); } emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); } @@ -40,7 +40,8 @@ emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); } emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } -emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); } +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); } +emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); } #define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \ emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \ @@ -66,7 +67,7 @@ emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \ DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \ DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \ - emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } + emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } DEFINE_LEGACY_LOGIC_OPCODE( AND ) DEFINE_LEGACY_LOGIC_OPCODE( ANDN ) @@ -75,6 +76,8 @@ DEFINE_LEGACY_LOGIC_OPCODE( XOR ) DEFINE_LEGACY_SHIFT_OPCODE( SLL ) DEFINE_LEGACY_SHIFT_OPCODE( SRL ) +DEFINE_LEGACY_SHIFT_STUFF( SRA, D ) +DEFINE_LEGACY_SHIFT_STUFF( SRA, W ) DEFINE_LEGACY_ARITHMETIC( ADD, B ) DEFINE_LEGACY_ARITHMETIC( ADD, W ) @@ -94,136 +97,37 @@ DEFINE_LEGACY_ARITHMETIC( SUB, SW ) DEFINE_LEGACY_ARITHMETIC( SUB, USB ) DEFINE_LEGACY_ARITHMETIC( SUB, USW ) +DEFINE_LEGACY_ARITHMETIC( CMP, EQB ); +DEFINE_LEGACY_ARITHMETIC( CMP, EQW ); +DEFINE_LEGACY_ARITHMETIC( CMP, EQD ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTB ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTW ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTD ); + +DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW ); + +DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD ); + + +emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); } +emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); } + +emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); } +emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); } + +////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////// + /* emms */ emitterT void EMMS() { write16( 0x770F ); } -// pmuludq m64 to r64 (sse2 only?) -emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0xF40F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// pmuludq r64 to r64 (sse2 only?) -emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xF40F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x740F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x750F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x760F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPEQDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x760F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x640F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x650F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x660F ); - ModRM( 3, to, from ); -} - -emitterT void PCMPGTDMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x660F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void PSRAWItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( from ); -} - -emitterT void PSRADItoR( x86MMXRegType to, u8 from ) -{ - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( from ); -} - -emitterT void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0xE20F ); - ModRM( 3, to, from ); -} - -emitterT void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x6A0F ); - ModRM( 3, to, from ); -} - -emitterT void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x6A0F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -emitterT void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ) -{ - write16( 0x620F ); - ModRM( 3, to, from ); -} - -emitterT void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ) -{ - write16( 0x620F ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -// untested -emitterT void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from) -{ - write16( 0x630F ); - ModRM( 3, to, from ); -} - -emitterT void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from) -{ - write16( 0x6B0F ); - ModRM( 3, to, from ); -} - emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3); @@ -231,20 +135,3 @@ emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) ModRM( 3, to, from ); write8( imm8 ); } - -emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) -{ - write16(0x700f); - ModRM( 3, to, from ); - write8(imm8); -} - -emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) -{ - write16( 0x700f ); - ModRM( 0, to, DISP32 ); - write32( MEMADDR(from, 4) ); - write8(imm8); -} - -emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index e239af26be..9a97441b87 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -52,28 +52,6 @@ using namespace x86Emitter; write8( 0x66 ), \ SSERtoR( code ) -#define _SSERtoR66( code ) \ - assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \ - write8( 0x66 ), \ - RexRB(0, from, to), \ - write16( code ), \ - ModRM( 3, from, to ) - -#define SSE_SS_RtoR( code ) \ - assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \ - write8( 0xf3 ), \ - RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) - -#define SSE_SD_MtoR( code, overb ) \ - assert( to < iREGCNT_XMM ) , \ - write8( 0xf2 ), \ - RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) - #define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \ emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \ emitterT void sse##_MOV##mod##_XMM_to_M128( uptr to, x86SSERegType from ) { xMOV##mod( (void*)to, xRegisterSSE(from) ); } \ @@ -117,6 +95,11 @@ using namespace x86Emitter; emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } +#define DEFINE_LEGACY_OP128( mod, sub ) \ + emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); } + + #define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ DEFINE_LEGACY_PSD_OPCODE( mod ) \ DEFINE_LEGACY_SSSD_OPCODE( mod ) @@ -153,6 +136,24 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP ) DEFINE_LEGACY_RSQRT_OPCODE( RSQRT ) DEFINE_LEGACY_SQRT_OPCODE( SQRT ) +DEFINE_LEGACY_OP128( PMUL, LW ) +DEFINE_LEGACY_OP128( PMUL, HW ) +DEFINE_LEGACY_OP128( PMUL, UDQ ) + +DEFINE_LEGACY_OP128( PMAX, SW ) +DEFINE_LEGACY_OP128( PMAX, UB ) +DEFINE_LEGACY_OP128( PMIN, SW ) +DEFINE_LEGACY_OP128( PMIN, UB ) + +DEFINE_LEGACY_OP128( UNPCK, LPS ) +DEFINE_LEGACY_OP128( UNPCK, HPS ) +DEFINE_LEGACY_OP128( PUNPCK, LQDQ ) +DEFINE_LEGACY_OP128( PUNPCK, HQDQ ) + +DEFINE_LEGACY_OP128( PACK, SSWB ) +DEFINE_LEGACY_OP128( PACK, SSDW ) +DEFINE_LEGACY_OP128( PACK, USWB ) + emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xMOVDQA( xRegisterSSE(to), xRegisterSSE(from) ); } @@ -216,87 +217,56 @@ emitterT void SSE_SHUFPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int of emitterT void SSE_SHUFPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE_SHUFPD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PD( xRegisterSSE(to), (void*)from, imm8 ); } - emitterT void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ) { xCVTPI2PS( xRegisterSSE(to), (u64*)from ); } emitterT void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ) { xCVTPI2PS( xRegisterSSE(to), xRegisterMMX(from) ); } - emitterT void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ) { xCVTPS2PI( xRegisterMMX(to), (u64*)from ); } emitterT void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ) { xCVTPS2PI( xRegisterMMX(to), xRegisterSSE(from) ); } - emitterT void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from) { xCVTTSS2SI( xRegister32(to), (u32*)from ); } emitterT void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xCVTTSS2SI( xRegister32(to), xRegisterSSE(from) ); } - emitterT void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from) { xCVTSI2SS( xRegisterSSE(to), (u32*)from ); } emitterT void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) { xCVTSI2SS( xRegisterSSE(to), xRegister32(from) ); } emitterT void SSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { xCVTSS2SD( xRegisterSSE(to), (u32*)from ); } emitterT void SSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSS2SD( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { xCVTSD2SS( xRegisterSSE(to), (u64*)from ); } emitterT void SSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { xCVTSD2SS( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTDQ2PS( xRegisterSSE(to), (u128*)from ); } emitterT void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTDQ2PS( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ) { xCVTPS2DQ( xRegisterSSE(to), (u128*)from ); } emitterT void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xCVTTPS2DQ( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ) { xPMAX.SW( xRegisterMMX(to), xRegisterMMX(from) ); } +emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ) { xPMAX.SW( xRegisterMMX(to), xRegisterMMX(from) ); } + +emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.D( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.D( xRegisterSSE(to), (void*)from, imm8 ); } +emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.LW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.LW( xRegisterSSE(to), (void*)from, imm8 ); } +emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); } + +emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.SD( xRegisterSSE(to), (void*)from ); } +emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.SD( xRegisterSSE(to), (void*)from ); } + +emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.UD( xRegisterSSE(to), (void*)from ); } +emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.UD( xRegisterSSE(to), (void*)from ); } + +emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); } +emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.LPS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); } +emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); } + ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PMAXSW: Packed Signed Integer Word Maximum * -//********************************************************************************** -//missing -// SSE_PMAXSW_M64_to_MM -// SSE2_PMAXSW_M128_to_XMM -// SSE2_PMAXSW_XMM_to_XMM -emitterT void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEE0F ); } - -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PMINSW: Packed Signed Integer Word Minimum * -//********************************************************************************** -//missing -// SSE_PMINSW_M64_to_MM -// SSE2_PMINSW_M128_to_XMM -// SSE2_PMINSW_XMM_to_XMM -emitterT void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ){ SSERtoR( 0xEA0F ); } - -//////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PSHUFD: Shuffle Packed DoubleWords * -//********************************************************************************** -emitterT void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) -{ - SSERtoR66( 0x700F ); - write8( imm8 ); -} -emitterT void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { SSEMtoR66( 0x700F ); write8( imm8 ); } - -emitterT void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF2); SSERtoR(0x700F); write8(imm8); } -emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF2); SSEMtoR(0x700F, 1); write8(imm8); } -emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { write8(0xF3); SSERtoR(0x700F); write8(imm8); } -emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { write8(0xF3); SSEMtoR(0x700F, 1); write8(imm8); } - -/////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//UNPCKLPS: Unpack and Interleave low Packed Single-Precision FP Data * -//********************************************************************************** -emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x140f, 0); } -emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x140F ); } - -//////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//UNPCKHPS: Unpack and Interleave High Packed Single-Precision FP Data * -//********************************************************************************** -emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR(0x150f, 0); } -emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x150F ); } - ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //STMXCSR : Store Streaming SIMD Extension Control/Status * @@ -317,23 +287,6 @@ emitterT void SSE_LDMXCSR( uptr from ) { write32( MEMADDR(from, 4) ); } -/////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PCMPxx: Compare Packed Integers * -//********************************************************************************** -emitterT void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x640F ); } -emitterT void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x640F ); } -emitterT void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x650F ); } -emitterT void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x650F ); } -emitterT void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x660F ); } -emitterT void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x660F ); } -emitterT void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x740F ); } -emitterT void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x740F ); } -emitterT void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x750F ); } -emitterT void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x750F ); } -emitterT void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x760F ); } -emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( 0x760F ); } - //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PEXTRW,PINSRW: Packed Extract/Insert Word * @@ -341,88 +294,6 @@ emitterT void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ){ SSEMtoR66( emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } -/////////////////////////////////////////////////////////////////////////////////////// - -// shift right arithmetic - -emitterT void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE10F); } -emitterT void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE10F); } -emitterT void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x710F ); - ModRM( 3, 4 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xE20F); } -emitterT void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66(0xE20F); } -emitterT void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8) -{ - write8( 0x66 ); - RexB(0, to); - write16( 0x720F ); - ModRM( 3, 4 , to ); - write8( imm8 ); -} - -emitterT void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEE0F ); } -emitterT void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEE0F ); } - -emitterT void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDE0F ); } -emitterT void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDE0F ); } - -emitterT void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xEA0F ); } -emitterT void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xEA0F ); } - -emitterT void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0xDA0F ); } -emitterT void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0xDA0F ); } - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word -//********************************************************************************** -emitterT void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x630F ); } -emitterT void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x630F ); } -emitterT void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6B0F ); } -emitterT void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6B0F ); } - -emitterT void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x670F ); } -emitterT void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x670F ); } - -//**********************************************************************************/ -//PUNPCKHWD: Unpack 16bit high -//********************************************************************************** -emitterT void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x600F ); } -emitterT void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x600F ); } - -emitterT void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x680F ); } -emitterT void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x680F ); } - -emitterT void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x610F ); } -emitterT void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x610F ); } -emitterT void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x690F ); } -emitterT void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x690F ); } - -emitterT void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x620F ); } -emitterT void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x620F ); } -emitterT void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6A0F ); } -emitterT void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6A0F ); } - -emitterT void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6C0F ); } -emitterT void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6C0F ); } - -emitterT void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0x6D0F ); } -emitterT void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0x6D0F ); } - -emitterT void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xD50F ); } -emitterT void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xD50F ); } -emitterT void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xE50F ); } -emitterT void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xE50F ); } - -emitterT void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66( 0xF40F ); } -emitterT void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from) { SSEMtoR66( 0xF40F ); } - emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } @@ -589,82 +460,6 @@ emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 im write8(imm8); } -emitterT void SSE4_PMAXSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3D380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMINSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x39380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMAXUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3F380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMINUD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x3B380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMAXSD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3D380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x39380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3F380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x3B380F); - ModRM( 0, to, DISP32 ); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x28380F); - ModRM(3, to, from); -} - ////////////////////////////////////////////////////////////////////////////////////////// // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) // From deb642af43ebc485fa0ffd200aa711ffb5370a7b Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 21 Apr 2009 05:29:14 +0000 Subject: [PATCH 120/143] Fixed a small bug from my last commit (mostly only affected debug builds), and implemented PALIGNR/MOVSLDUP/PABS/PSIGN/PEXTR/PINS. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1036 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/xmm/arithmetic.h | 230 ++++++++ pcsx2/x86/ix86/implement/xmm/basehelpers.h | 152 +++++ pcsx2/x86/ix86/implement/xmm/comparisons.h | 131 +++++ pcsx2/x86/ix86/implement/xmm/moremovs.h | 82 +++ pcsx2/x86/ix86/implement/xmm/movqss.h | 646 --------------------- pcsx2/x86/ix86/implement/xmm/shufflepack.h | 306 ++++++++++ pcsx2/x86/ix86/ix86.cpp | 74 ++- pcsx2/x86/ix86/ix86_instructions.h | 57 +- pcsx2/x86/ix86/ix86_legacy_instructions.h | 1 - pcsx2/x86/ix86/ix86_legacy_sse.cpp | 176 ++---- pcsx2/x86/ix86/ix86_types.h | 6 +- 11 files changed, 1052 insertions(+), 809 deletions(-) create mode 100644 pcsx2/x86/ix86/implement/xmm/arithmetic.h create mode 100644 pcsx2/x86/ix86/implement/xmm/basehelpers.h create mode 100644 pcsx2/x86/ix86/implement/xmm/comparisons.h create mode 100644 pcsx2/x86/ix86/implement/xmm/moremovs.h delete mode 100644 pcsx2/x86/ix86/implement/xmm/movqss.h create mode 100644 pcsx2/x86/ix86/implement/xmm/shufflepack.h diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h new file mode 100644 index 0000000000..6ac3f91877 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -0,0 +1,230 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// Used for PSRA, which lacks the Q form. +// +template< u16 OpcodeBase1, u8 Modcode > +class SimdImpl_ShiftWithoutQ +{ +protected: + template< u16 Opcode1, u16 OpcodeImm, u8 Modcode > + class ShiftHelper + { + public: + ShiftHelper() {} + + template< typename OperandType > + __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __emitinline void operator()( const xRegisterSIMD& to, u8 imm ) const + { + SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); + ModRM( 3, (int)Modcode, to.Id ); + xWrite( imm ); + } + }; + +public: + const ShiftHelper W; + const ShiftHelper D; + + SimdImpl_ShiftWithoutQ() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Implements PSRL and PSLL +// +template< u16 OpcodeBase1, u8 Modcode > +class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ +{ +public: + const ShiftHelper Q; + + void DQ( const xRegisterSSE& to, u8 imm ) const + { + SimdPrefix( 0x66, 0x73 ); + ModRM( 3, (int)Modcode+1, to.Id ); + xWrite( imm ); + } + + SimdImpl_Shift() {} +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u16 OpcodeB, u16 OpcodeQ > +class SimdImpl_AddSub +{ +public: + const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W; + const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D; + const SimdImpl_DestRegEither<0x66,OpcodeQ> Q; + + // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB; + + // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW; + + // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeB> USB; + + // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results. + const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW; + + SimdImpl_AddSub() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PMul +{ +public: + const SimdImpl_DestRegEither<0x66,0xd5> LW; + const SimdImpl_DestRegEither<0x66,0xe5> HW; + const SimdImpl_DestRegEither<0x66,0xe4> HUW; + const SimdImpl_DestRegEither<0x66,0xf4> UDQ; + + // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the + // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit + // integers. Each intermediate 32-bit integer is truncated to the 18 most significant + // bits. Rounding is always performed by adding 1 to the least significant bit of the + // 18-bit intermediate result. The final result is obtained by selecting the 16 bits + // immediately to the right of the most significant bit of each 18-bit intermediate + // result and packed to the destination operand. + // + // Both operands can be MMX or XMM registers. Source can be register or memory. + // + const SimdImpl_DestRegEither<0x66,0x0b38> HRSW; + + // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store + // the low 32 bits of each product in xmm1. + const SimdImpl_DestRegSSE<0x66,0x4038> LD; + + // [SSE-4.1] Multiply the packed signed dword integers in dest with src. + const SimdImpl_DestRegSSE<0x66,0x2838> DQ; + + SimdImpl_PMul() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) +// +template< u16 OpcodeSSE > +class SimdImpl_rSqrt +{ +public: + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; + const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; + SimdImpl_rSqrt() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// SQRT has PS/SS/SD forms, but not the PD form. +// +template< u16 OpcodeSSE > +class SimdImpl_Sqrt : public SimdImpl_rSqrt +{ +public: + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; + SimdImpl_Sqrt() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_AndNot +{ +public: + const SimdImpl_DestRegSSE<0x00,0x55> PS; + const SimdImpl_DestRegSSE<0x66,0x55> PD; + SimdImpl_AndNot() {} +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Packed absolute value. [sSSE3 only] +// +class SimdImpl_PAbsolute +{ +public: + SimdImpl_PAbsolute() {} + + // [sSSE-3] Computes the absolute value of bytes in the src, and stores the result + // in dest, as UNSIGNED. + const SimdImpl_DestRegEither<0x66, 0x1c38> B; + + // [sSSE-3] Computes the absolute value of word in the src, and stores the result + // in dest, as UNSIGNED. + const SimdImpl_DestRegEither<0x66, 0x1d38> W; + + // [sSSE-3] Computes the absolute value of doublewords in the src, and stores the + // result in dest, as UNSIGNED. + const SimdImpl_DestRegEither<0x66, 0x1e38> D; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the +// corresponding sign in src. +// +class SimdImpl_PSign +{ +public: + SimdImpl_PSign() {} + + // [sSSE-3] negates each byte element of dest if the signed integer value of the + // corresponding data element in src is less than zero. If the signed integer value + // of a data element in src is positive, the corresponding data element in dest is + // unchanged. If a data element in src is zero, the corresponding data element in + // dest is set to zero. + const SimdImpl_DestRegEither<0x66, 0x0838> B; + + // [sSSE-3] negates each word element of dest if the signed integer value of the + // corresponding data element in src is less than zero. If the signed integer value + // of a data element in src is positive, the corresponding data element in dest is + // unchanged. If a data element in src is zero, the corresponding data element in + // dest is set to zero. + const SimdImpl_DestRegEither<0x66, 0x0938> W; + + // [sSSE-3] negates each doubleword element of dest if the signed integer value + // of the corresponding data element in src is less than zero. If the signed integer + // value of a data element in src is positive, the corresponding data element in dest + // is unchanged. If a data element in src is zero, the corresponding data element in + // dest is set to zero. + const SimdImpl_DestRegEither<0x66, 0x0a38> D; + +}; diff --git a/pcsx2/x86/ix86/implement/xmm/basehelpers.h b/pcsx2/x86/ix86/implement/xmm/basehelpers.h new file mode 100644 index 0000000000..7094322b3d --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/basehelpers.h @@ -0,0 +1,152 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// MMX / SSE Helper Functions! + +extern void SimdPrefix( u8 prefix, u16 opcode ); + +// ------------------------------------------------------------------------ +// xmm emitter helpers for xmm instruction with prefixes. +// These functions also support deducing the use of the prefix from the template parameters, +// since most xmm instructions use a prefix and most mmx instructions do not. (some mov +// instructions violate this "guideline.") +// +template< typename T, typename T2 > +__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) +{ + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); + ModRM_Direct( to.Id, from.Id ); +} + +template< typename T > +__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) +{ + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); + EmitSibMagic( reg.Id, sib ); +} + +template< typename T > +__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) +{ + SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); + xWriteDisp( reg.Id, data ); +} + +// ------------------------------------------------------------------------ +// xmm emitter helpers for xmm instructions *without* prefixes. +// These are normally used for special instructions that have MMX forms only (non-SSE), however +// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. +// +template< typename T, typename T2 > +__emitinline void writeXMMop( u16 opcode, const xRegister& to, const xRegister& from ) +{ + SimdPrefix( 0, opcode ); + ModRM_Direct( to.Id, from.Id ); +} + +template< typename T > +__noinline void writeXMMop( u16 opcode, const xRegister& reg, const ModSibBase& sib ) +{ + SimdPrefix( 0, opcode ); + EmitSibMagic( reg.Id, sib ); +} + +template< typename T > +__emitinline void writeXMMop( u16 opcode, const xRegister& reg, const void* data ) +{ + SimdPrefix( 0, opcode ); + xWriteDisp( reg.Id, data ); +} + +// ------------------------------------------------------------------------ +// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only, +// like ANDPS/ANDPD +// +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegSSE +{ +public: + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + + SimdImpl_DestRegSSE() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only +// (PSHUFD / PSHUFHW / etc). +// +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegImmSSE +{ +public: + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + + SimdImpl_DestRegImmSSE() {} //GCWho? +}; + +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegImmMMX +{ +public: + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + + SimdImpl_DestRegImmMMX() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +// For implementing MMX/SSE operations that have reg,reg/rm forms only, +// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops). +// +template< u8 Prefix, u16 Opcode > +class SimdImpl_DestRegEither +{ +public: + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + template< typename T > __forceinline + void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + + SimdImpl_DestRegEither() {} //GCWho? +}; + +// ------------------------------------------------------------------------ +// For implementing MMX/SSE operations which the destination *must* be a register, but the source +// can be regDirect or ModRM (indirect). +// +template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > +class SimdImpl_DestRegStrict +{ +public: + __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + + SimdImpl_DestRegStrict() {} //GCWho? +}; + diff --git a/pcsx2/x86/ix86/implement/xmm/comparisons.h b/pcsx2/x86/ix86/implement/xmm/comparisons.h new file mode 100644 index 0000000000..469a808524 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/comparisons.h @@ -0,0 +1,131 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u16 OpcodeSSE > +class SimdImpl_MinMax +{ +public: + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision + const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision + const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision + + SimdImpl_MinMax() {} //GChow? +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< SSE2_ComparisonType CType > +class SimdImpl_Compare +{ +protected: + template< u8 Prefix > struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + Woot() {} + }; + +public: + const Woot<0x00> PS; + const Woot<0x66> PD; + const Woot<0xf3> SS; + const Woot<0xf2> SD; + SimdImpl_Compare() {} //GCWhat? +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PCompare +{ +public: + SimdImpl_PCompare() {} + + // Compare packed bytes for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x74> EQB; + + // Compare packed words for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x75> EQW; + + // Compare packed doublewords [32-bits] for equality. + // If a data element in dest is equal to the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x76> EQD; + + // Compare packed signed bytes for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x64> GTB; + + // Compare packed signed words for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x65> GTW; + + // Compare packed signed doublewords [32-bits] for greater than. + // If a data element in dest is greater than the corresponding date element src, the + // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. + const SimdImpl_DestRegEither<0x66,0x66> GTD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u8 Opcode1, u16 Opcode2 > +class SimdImpl_PMinMax +{ +public: + SimdImpl_PMinMax() {} + + // Compare packed unsigned byte integers in dest to src and store packed min/max + // values in dest. + // Operation can be performed on either MMX or SSE operands. + const SimdImpl_DestRegEither<0x66,Opcode1> UB; + + // Compare packed signed word integers in dest to src and store packed min/max + // values in dest. + // Operation can be performed on either MMX or SSE operands. + const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW; + + // [SSE-4.1] Compare packed signed byte integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB; + + // [SSE-4.1] Compare packed signed doubleword integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD; + + // [SSE-4.1] Compare packed unsigned word integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW; + + // [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store + // packed min/max values in dest. (SSE operands only) + const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD; +}; + diff --git a/pcsx2/x86/ix86/implement/xmm/moremovs.h b/pcsx2/x86/ix86/implement/xmm/moremovs.h new file mode 100644 index 0000000000..44da893436 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h @@ -0,0 +1,82 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// Moves to/from high/low portions of an xmm register. +// These instructions cannot be used in reg/reg form. +// +template< u16 Opcode > +class MovhlImplAll +{ +protected: + template< u8 Prefix > + struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + }; + +public: + Woot<0x00> PS; + Woot<0x66> PD; + + MovhlImplAll() {} //GCC. +}; + +// ------------------------------------------------------------------------ +// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but +// do something kinda different! Fun! +// +template< u16 Opcode > +class MovhlImpl_RtoR +{ +public: + __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); } + __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + + MovhlImpl_RtoR() {} //GCC. +}; + +// ------------------------------------------------------------------------ +template< u8 Prefix, u16 Opcode, u16 OpcodeAlt > +class MovapsImplAll +{ +public: + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } + + MovapsImplAll() {} //GCC. +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u8 AltPrefix, u16 OpcodeSSE > +class SimdImpl_UcomI +{ +public: + const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS; + const SimdImpl_DestRegSSE SD; + SimdImpl_UcomI() {} +}; diff --git a/pcsx2/x86/ix86/implement/xmm/movqss.h b/pcsx2/x86/ix86/implement/xmm/movqss.h deleted file mode 100644 index 3d47f0b6e9..0000000000 --- a/pcsx2/x86/ix86/implement/xmm/movqss.h +++ /dev/null @@ -1,646 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once - -////////////////////////////////////////////////////////////////////////////////////////// -// MMX / SSE Helper Functions! - -extern void SimdPrefix( u8 prefix, u16 opcode ); - -// ------------------------------------------------------------------------ -// xmm emitter helpers for xmm instruction with prefixes. -// These functions also support deducing the use of the prefix from the template parameters, -// since most xmm instructions use a prefix and most mmx instructions do not. (some mov -// instructions violate this "guideline.") -// -template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) -{ - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - ModRM_Direct( to.Id, from.Id ); -} - -template< typename T > -__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) -{ - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - EmitSibMagic( reg.Id, sib ); -} - -template< typename T > -__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) -{ - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - xWriteDisp( reg.Id, data ); -} - -// ------------------------------------------------------------------------ -// xmm emitter helpers for xmm instructions *without* prefixes. -// These are normally used for special instructions that have MMX forms only (non-SSE), however -// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. -// -template< typename T, typename T2 > -__emitinline void writeXMMop( u16 opcode, const xRegister& to, const xRegister& from ) -{ - SimdPrefix( 0, opcode ); - ModRM_Direct( to.Id, from.Id ); -} - -template< typename T > -__noinline void writeXMMop( u16 opcode, const xRegister& reg, const ModSibBase& sib ) -{ - SimdPrefix( 0, opcode ); - EmitSibMagic( reg.Id, sib ); -} - -template< typename T > -__emitinline void writeXMMop( u16 opcode, const xRegister& reg, const void* data ) -{ - SimdPrefix( 0, opcode ); - xWriteDisp( reg.Id, data ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// Moves to/from high/low portions of an xmm register. -// These instructions cannot be used in reg/reg form. -// -template< u16 Opcode > -class MovhlImplAll -{ -protected: - template< u8 Prefix > - struct Woot - { - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } - }; - -public: - Woot<0x00> PS; - Woot<0x66> PD; - - MovhlImplAll() {} //GCC. -}; - -// ------------------------------------------------------------------------ -// RegtoReg forms of MOVHL/MOVLH -- these are the same opcodes as MOVH/MOVL but -// do something kinda different! Fun! -// -template< u16 Opcode > -class MovhlImpl_RtoR -{ -public: - __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); } - __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - - MovhlImpl_RtoR() {} //GCC. -}; - -// ------------------------------------------------------------------------ -template< u8 Prefix, u16 Opcode, u16 OpcodeAlt > -class MovapsImplAll -{ -public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - - MovapsImplAll() {} //GCC. -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// SimdImpl_PackedLogic - Implements logic forms for MMX/SSE instructions, and can be used for -// a few other various instruction too (anything which comes in simdreg,simdreg/ModRM forms). -// -template< u16 Opcode > -class SimdImpl_PackedLogic -{ -public: - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( 0x66, Opcode, to, from ); } - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode, to, from ); } - - SimdImpl_PackedLogic() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -// For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only, -// like ANDPS/ANDPD -// -template< u8 Prefix, u16 Opcode > -class SimdImpl_DestRegSSE -{ -public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - - SimdImpl_DestRegSSE() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -// For implementing SSE-only logic operations that have xmmreg,reg/rm,imm forms only -// (PSHUFD / PSHUFHW / etc). -// -template< u8 Prefix, u16 Opcode > -class SimdImpl_DestRegImmSSE -{ -public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - - SimdImpl_DestRegImmSSE() {} //GCWho? -}; - -template< u8 Prefix, u16 Opcode > -class SimdImpl_DestRegImmMMX -{ -public: - __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - - SimdImpl_DestRegImmMMX() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -// For implementing MMX/SSE operations that have reg,reg/rm forms only, -// but accept either MM or XMM destinations (most PADD/PSUB and other P srithmetic ops). -// -template< u8 Prefix, u16 Opcode > -class SimdImpl_DestRegEither -{ -public: - template< typename DestOperandType > __forceinline - void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename DestOperandType > __forceinline - void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename DestOperandType > __forceinline - void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - - SimdImpl_DestRegEither() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -// For implementing MMX/SSE operations which the destination *must* be a register, but the source -// can be regDirect or ModRM (indirect). -// -template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > -class SimdImpl_DestRegStrict -{ -public: - __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - - SimdImpl_DestRegStrict() {} //GCWho? -}; - -// ------------------------------------------------------------------------ -template< u16 OpcodeSSE > -class SimdImpl_PSPD_SSSD -{ -public: - const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; // packed single precision - const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; // packed double precision - const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; // scalar single precision - const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; // scalar double precision - - SimdImpl_PSPD_SSSD() {} //GChow? -}; - -// ------------------------------------------------------------------------ -// -template< u16 OpcodeSSE > -class SimdImpl_AndNot -{ -public: - const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; - const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; - SimdImpl_AndNot() {} -}; - -// ------------------------------------------------------------------------ -// For instructions that have SS/SD form only (UCOMI, etc) -// AltPrefix - prefixed used for doubles (SD form). -template< u8 AltPrefix, u16 OpcodeSSE > -class SimdImpl_SS_SD -{ -public: - const SimdImpl_DestRegSSE<0x00,OpcodeSSE> SS; - const SimdImpl_DestRegSSE SD; - SimdImpl_SS_SD() {} -}; - -// ------------------------------------------------------------------------ -// For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions) -template< u16 OpcodeSSE > -class SimdImpl_rSqrt -{ -public: - const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; - const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; - SimdImpl_rSqrt() {} -}; - -// ------------------------------------------------------------------------ -// For instructions that have PS/SS/SD form only (most commonly Sqrt functions) -template< u16 OpcodeSSE > -class SimdImpl_Sqrt : public SimdImpl_rSqrt -{ -public: - const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; - SimdImpl_Sqrt() {} -}; - -// ------------------------------------------------------------------------ -template< u16 OpcodeSSE > -class SimdImpl_Shuffle -{ -protected: - template< u8 Prefix > struct Woot - { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - Woot() {} - }; - -public: - const Woot<0x00> PS; - const Woot<0x66> PD; - - SimdImpl_Shuffle() {} //GCWhat? -}; - -// ------------------------------------------------------------------------ -template< SSE2_ComparisonType CType > -class SimdImpl_Compare -{ -protected: - template< u8 Prefix > struct Woot - { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - Woot() {} - }; - -public: - const Woot<0x00> PS; - const Woot<0x66> PD; - const Woot<0xf3> SS; - const Woot<0xf2> SD; - SimdImpl_Compare() {} //GCWhat? -}; - - -////////////////////////////////////////////////////////////////////////////////////////// -// -// -template< u16 Opcode1, u16 OpcodeImm, u8 Modcode > -class SimdImpl_Shift -{ -public: - SimdImpl_Shift() {} - - template< typename OperandType > - __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __emitinline void operator()( const xRegisterSIMD& to, u8 imm ) const - { - SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); - ModRM( 3, (int)Modcode, to.Id ); - xWrite( imm ); - } -}; - -// ------------------------------------------------------------------------ -// Used for PSRA -template< u16 OpcodeBase1, u8 Modcode > -class SimdImpl_ShiftWithoutQ -{ -public: - const SimdImpl_Shift W; - const SimdImpl_Shift D; - - SimdImpl_ShiftWithoutQ() {} -}; - -// ------------------------------------------------------------------------ -template< u16 OpcodeBase1, u8 Modcode > -class SimdImpl_ShiftAll : public SimdImpl_ShiftWithoutQ -{ -public: - const SimdImpl_Shift Q; - - void DQ( const xRegisterSSE& to, u8 imm ) const - { - SimdPrefix( 0x66, 0x73 ); - ModRM( 3, (int)Modcode+1, to.Id ); - xWrite( imm ); - } - - SimdImpl_ShiftAll() {} -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -template< u16 OpcodeB, u16 OpcodeQ > -class SimdImpl_AddSub -{ -public: - const SimdImpl_DestRegEither<0x66,OpcodeB+0x20> B; - const SimdImpl_DestRegEither<0x66,OpcodeB+0x21> W; - const SimdImpl_DestRegEither<0x66,OpcodeB+0x22> D; - const SimdImpl_DestRegEither<0x66,OpcodeQ> Q; - - // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeB+0x10> SB; - - // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeB+0x11> SW; - - // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeB> USB; - - // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results. - const SimdImpl_DestRegEither<0x66,OpcodeB+1> USW; - - SimdImpl_AddSub() {} -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_PMul -{ -public: - const SimdImpl_DestRegEither<0x66,0xd5> LW; - const SimdImpl_DestRegEither<0x66,0xe5> HW; - const SimdImpl_DestRegEither<0x66,0xe4> HUW; - const SimdImpl_DestRegEither<0x66,0xf4> UDQ; - - // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the - // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit - // integers. Each intermediate 32-bit integer is truncated to the 18 most significant - // bits. Rounding is always performed by adding 1 to the least significant bit of the - // 18-bit intermediate result. The final result is obtained by selecting the 16 bits - // immediately to the right of the most significant bit of each 18-bit intermediate - // result and packed to the destination operand. - // - // Both operands can be MMX or XMM registers. Source can be register or memory. - // - const SimdImpl_DestRegEither<0x66,0x0b38> HRSW; - - // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store - // the low 32 bits of each product in xmm1. - const SimdImpl_DestRegSSE<0x66,0x4038> LD; - - // [SSE-4.1] Multiply the packed signed dword integers in dest with src. - const SimdImpl_DestRegSSE<0x66,0x2838> DQ; - - SimdImpl_PMul() {} -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_PCompare -{ -public: - SimdImpl_PCompare() {} - - // Compare packed bytes for equality. - // If a data element in dest is equal to the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x74> EQB; - - // Compare packed words for equality. - // If a data element in dest is equal to the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x75> EQW; - - // Compare packed doublewords [32-bits] for equality. - // If a data element in dest is equal to the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x76> EQD; - - // Compare packed signed bytes for greater than. - // If a data element in dest is greater than the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x64> GTB; - - // Compare packed signed words for greater than. - // If a data element in dest is greater than the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x65> GTW; - - // Compare packed signed doublewords [32-bits] for greater than. - // If a data element in dest is greater than the corresponding date element src, the - // corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s. - const SimdImpl_DestRegEither<0x66,0x66> GTD; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -template< u8 Opcode1, u16 Opcode2 > -class SimdImpl_PMinMax -{ -public: - SimdImpl_PMinMax() {} - - // Compare packed unsigned byte integers in dest to src and store packed min/max - // values in dest. - // Operation can be performed on either MMX or SSE operands. - const SimdImpl_DestRegEither<0x66,Opcode1> UB; - - // Compare packed signed word integers in dest to src and store packed min/max - // values in dest. - // Operation can be performed on either MMX or SSE operands. - const SimdImpl_DestRegEither<0x66,Opcode1+0x10> SW; - - // [SSE-4.1] Compare packed signed byte integers in dest to src and store - // packed min/max values in dest. (SSE operands only) - const SimdImpl_DestRegSSE<0x66,(Opcode2<<8)|0x38> SB; - - // [SSE-4.1] Compare packed signed doubleword integers in dest to src and store - // packed min/max values in dest. (SSE operands only) - const SimdImpl_DestRegSSE<0x66,((Opcode2+1)<<8)|0x38> SD; - - // [SSE-4.1] Compare packed unsigned word integers in dest to src and store - // packed min/max values in dest. (SSE operands only) - const SimdImpl_DestRegSSE<0x66,((Opcode2+2)<<8)|0x38> UW; - - // [SSE-4.1] Compare packed unsigned doubleword integers in dest to src and store - // packed min/max values in dest. (SSE operands only) - const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_PShuffle -{ -public: - SimdImpl_PShuffle() {} - - // Copies words from src and inserts them into dest at word locations selected with - // the order operand (8 bit immediate). - const SimdImpl_DestRegImmMMX<0x00,0x70> W; - - // Copies doublewords from src and inserts them into dest at dword locations selected - // with the order operand (8 bit immediate). - const SimdImpl_DestRegImmSSE<0x66,0x70> D; - - // Copies words from the low quadword of src and inserts them into the low quadword - // of dest at word locations selected with the order operand (8 bit immediate). - // The high quadword of src is copied to the high quadword of dest. - const SimdImpl_DestRegImmSSE<0xf2,0x70> LW; - - // Copies words from the high quadword of src and inserts them into the high quadword - // of dest at word locations selected with the order operand (8 bit immediate). - // The low quadword of src is copied to the low quadword of dest. - const SimdImpl_DestRegImmSSE<0xf3,0x70> HW; - - // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle - // control mask in src. If the most significant bit (bit[7]) of each byte of the - // shuffle control mask is set, then constant zero is written in the result byte. - // Each byte in the shuffle control mask forms an index to permute the corresponding - // byte in dest. The value of each index is the least significant 4 bits (128-bit - // operation) or 3 bits (64-bit operation) of the shuffle control byte. - // - // Operands can be MMX or XMM registers. - const SimdImpl_DestRegEither<0x66,0x0038> B; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_PUnpack -{ -public: - SimdImpl_PUnpack() {} - - // Unpack and interleave low-order bytes from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x60> LBW; - // Unpack and interleave low-order words from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x61> LWD; - // Unpack and interleave low-order doublewords from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x62> LDQ; - // Unpack and interleave low-order quadwords from src and dest into dest. - const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ; - - // Unpack and interleave high-order bytes from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x68> HBW; - // Unpack and interleave high-order words from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x69> HWD; - // Unpack and interleave high-order doublewords from src and dest into dest. - const SimdImpl_DestRegEither<0x66,0x6a> HDQ; - // Unpack and interleave high-order quadwords from src and dest into dest. - const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ; -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// Pack with Signed or Unsigned Saturation -// -class SimdImpl_Pack -{ -public: - SimdImpl_Pack() {} - - // Converts packed signed word integers from src and dest into packed signed - // byte integers in dest, using signed saturation. - const SimdImpl_DestRegEither<0x66,0x63> SSWB; - - // Converts packed signed dword integers from src and dest into packed signed - // word integers in dest, using signed saturation. - const SimdImpl_DestRegEither<0x66,0x6b> SSDW; - - // Converts packed unsigned word integers from src and dest into packed unsigned - // byte integers in dest, using unsigned saturation. - const SimdImpl_DestRegEither<0x66,0x67> USWB; - - // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed - // unsigned word integers in dest, using signed saturation. - const SimdImpl_DestRegSSE<0x66,0x2b38> USDW; -}; - - -////////////////////////////////////////////////////////////////////////////////////////// -// -class SimdImpl_Unpack -{ -public: - SimdImpl_Unpack() {} - - // Unpacks the high doubleword [single-precision] values from src and dest into - // dest, such that the result of dest looks like this: - // dest[0] <- dest[2] - // dest[1] <- src[2] - // dest[2] <- dest[3] - // dest[3] <- src[3] - // - const SimdImpl_DestRegSSE<0x00,0x15> HPS; - - // Unpacks the high quadword [double-precision] values from src and dest into - // dest, such that the result of dest looks like this: - // dest.lo <- dest.hi - // dest.hi <- src.hi - // - const SimdImpl_DestRegSSE<0x66,0x15> HPD; - - // Unpacks the low doubleword [single-precision] values from src and dest into - // dest, such that the result of dest looks like this: - // dest[3] <- src[1] - // dest[2] <- dest[1] - // dest[1] <- src[0] - // dest[0] <- dest[0] - // - const SimdImpl_DestRegSSE<0x00,0x14> LPS; - - // Unpacks the low quadword [double-precision] values from src and dest into - // dest, effectively moving the low portion of src into the upper portion of dest. - // The result of dest is loaded as such: - // dest.hi <- src.lo - // dest.lo <- dest.lo [remains unchanged!] - // - const SimdImpl_DestRegSSE<0x66,0x14> LPD; -}; - diff --git a/pcsx2/x86/ix86/implement/xmm/shufflepack.h b/pcsx2/x86/ix86/implement/xmm/shufflepack.h new file mode 100644 index 0000000000..93a96569c6 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h @@ -0,0 +1,306 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////// +// +template< u16 OpcodeSSE > +class SimdImpl_Shuffle +{ +protected: + template< u8 Prefix > struct Woot + { + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + Woot() {} + }; + +public: + const Woot<0x00> PS; + const Woot<0x66> PD; + + SimdImpl_Shuffle() {} //GCWhat? +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PShuffle +{ +public: + SimdImpl_PShuffle() {} + + // Copies words from src and inserts them into dest at word locations selected with + // the order operand (8 bit immediate). + const SimdImpl_DestRegImmMMX<0x00,0x70> W; + + // Copies doublewords from src and inserts them into dest at dword locations selected + // with the order operand (8 bit immediate). + const SimdImpl_DestRegImmSSE<0x66,0x70> D; + + // Copies words from the low quadword of src and inserts them into the low quadword + // of dest at word locations selected with the order operand (8 bit immediate). + // The high quadword of src is copied to the high quadword of dest. + const SimdImpl_DestRegImmSSE<0xf2,0x70> LW; + + // Copies words from the high quadword of src and inserts them into the high quadword + // of dest at word locations selected with the order operand (8 bit immediate). + // The low quadword of src is copied to the low quadword of dest. + const SimdImpl_DestRegImmSSE<0xf3,0x70> HW; + + // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle + // control mask in src. If the most significant bit (bit[7]) of each byte of the + // shuffle control mask is set, then constant zero is written in the result byte. + // Each byte in the shuffle control mask forms an index to permute the corresponding + // byte in dest. The value of each index is the least significant 4 bits (128-bit + // operation) or 3 bits (64-bit operation) of the shuffle control byte. + // + // Operands can be MMX or XMM registers. + const SimdImpl_DestRegEither<0x66,0x0038> B; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_PUnpack +{ +public: + SimdImpl_PUnpack() {} + + // Unpack and interleave low-order bytes from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x60> LBW; + // Unpack and interleave low-order words from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x61> LWD; + // Unpack and interleave low-order doublewords from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x62> LDQ; + // Unpack and interleave low-order quadwords from src and dest into dest. + const SimdImpl_DestRegSSE<0x66,0x6c> LQDQ; + + // Unpack and interleave high-order bytes from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x68> HBW; + // Unpack and interleave high-order words from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x69> HWD; + // Unpack and interleave high-order doublewords from src and dest into dest. + const SimdImpl_DestRegEither<0x66,0x6a> HDQ; + // Unpack and interleave high-order quadwords from src and dest into dest. + const SimdImpl_DestRegSSE<0x66,0x6d> HQDQ; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Pack with Signed or Unsigned Saturation +// +class SimdImpl_Pack +{ +public: + SimdImpl_Pack() {} + + // Converts packed signed word integers from src and dest into packed signed + // byte integers in dest, using signed saturation. + const SimdImpl_DestRegEither<0x66,0x63> SSWB; + + // Converts packed signed dword integers from src and dest into packed signed + // word integers in dest, using signed saturation. + const SimdImpl_DestRegEither<0x66,0x6b> SSDW; + + // Converts packed unsigned word integers from src and dest into packed unsigned + // byte integers in dest, using unsigned saturation. + const SimdImpl_DestRegEither<0x66,0x67> USWB; + + // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed + // unsigned word integers in dest, using signed saturation. + const SimdImpl_DestRegSSE<0x66,0x2b38> USDW; +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_Unpack +{ +public: + SimdImpl_Unpack() {} + + // Unpacks the high doubleword [single-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest[0] <- dest[2] + // dest[1] <- src[2] + // dest[2] <- dest[3] + // dest[3] <- src[3] + // + const SimdImpl_DestRegSSE<0x00,0x15> HPS; + + // Unpacks the high quadword [double-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest.lo <- dest.hi + // dest.hi <- src.hi + // + const SimdImpl_DestRegSSE<0x66,0x15> HPD; + + // Unpacks the low doubleword [single-precision] values from src and dest into + // dest, such that the result of dest looks like this: + // dest[3] <- src[1] + // dest[2] <- dest[1] + // dest[1] <- src[0] + // dest[0] <- dest[0] + // + const SimdImpl_DestRegSSE<0x00,0x14> LPS; + + // Unpacks the low quadword [double-precision] values from src and dest into + // dest, effectively moving the low portion of src into the upper portion of dest. + // The result of dest is loaded as such: + // dest.hi <- src.lo + // dest.lo <- dest.lo [remains unchanged!] + // + const SimdImpl_DestRegSSE<0x66,0x14> LPD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// PINSW/B/D [all but Word form are SSE4.1 only!] +// +class SimdImpl_PInsert +{ +protected: + template< u16 Opcode > + class ByteDwordForms + { + public: + ByteDwordForms() {} + + __forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + + __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + }; + +public: + SimdImpl_PInsert() {} + + // Operation can be performed on either MMX or SSE src operands. + template< typename T > + __forceinline void W( const xRegisterSIMD& to, const xRegister32& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0xc4, to, from ); + xWrite( imm8 ); + } + + // Operation can be performed on either MMX or SSE src operands. + template< typename T > + __forceinline void W( const xRegisterSIMD& to, const void* from, u8 imm8 ) const + { + writeXMMop( 0x66, 0xc4, to, from ); + xWrite( imm8 ); + } + + // Operation can be performed on either MMX or SSE src operands. + template< typename T > + __noinline void W( const xRegisterSIMD& to, const ModSibBase& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0xc4, to, from ); + xWrite( imm8 ); + } + + // [SSE-4.1] + const ByteDwordForms<0x20> B; + + // [SSE-4.1] + const ByteDwordForms<0x22> D; +}; + + +////////////////////////////////////////////////////////////////////////////////////////// +// PEXTRW/B/D [all but Word form are SSE4.1 only!] +// +// Note: Word form's indirect memory form is only available in SSE4.1. +// +class SimdImpl_PExtract +{ +protected: + template< u16 Opcode > + class ByteDwordForms + { + public: + ByteDwordForms() {} + + __forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + + __forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + + __noinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + xWrite( imm ); + } + }; + +public: + SimdImpl_PExtract() {} + + // Copies the word element specified by imm8 from src to dest. The upper bits + // of dest are zero-extended (cleared). This can be used to extract any single packed + // word value from src into an x86 32 bit register. + // + // [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension! + // + template< typename T > + __forceinline void W( const xRegister32& to, const xRegisterSIMD& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0xc5, to, from, true ); + xWrite( imm8 ); + } + + __forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0x153a, from, dest ); + xWrite( imm8 ); + } + + __noinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const + { + writeXMMop( 0x66, 0x153a, from, dest ); + xWrite( imm8 ); + } + + // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits + // of dest are zero-extended (cleared). This can be used to extract any single packed + // byte value from src into an x86 32 bit register. + const ByteDwordForms<0x14> B; + + // [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be + // used to extract any single packed dword value from src into an x86 32 bit register. + const ByteDwordForms<0x16> D; +}; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 9125feaa3e..e6bc9f34a7 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -641,18 +641,25 @@ __emitinline void xBSWAP( const xRegister32& to ) // MMX / XMM Instructions // (these will get put in their own file later) -// If the upper 8 bits of opcode are zero, the opcode is treated as a u8. -// The upper bits are non-zero, the opcode is assumed 16 bit (and the upper bits are checked aginst -// 0x38, which is the only valid high word for 16 bit opcodes as such) +// ------------------------------------------------------------------------ +// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is +// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4 +// instructions). Any other lower value assumes the upper value is 0 and ignored. +// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will +// generate an assertion. +// __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) { + const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a); + + // If the lower byte is not a valid previx and the upper byte is non-zero it + // means we made a mistake! + if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 ); + if( prefix != 0 ) { - if( (opcode & 0xff00) != 0 ) - { - jASSUME( (opcode & 0xff00) == 0x3800 ); - xWrite( (opcode<<16) | (0x0f00 | prefix) ); - } + if( is16BitOpcode ) + xWrite( (opcode<<16) | 0x0f00 | prefix ); else { xWrite( 0x0f00 | prefix ); @@ -661,9 +668,9 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) } else { - if( (opcode & 0xff00) != 0 ) + if( is16BitOpcode ) { - jASSUME( (opcode & 0xff00) == 0x3800 ); + xWrite( 0x0f ); xWrite( opcode ); } else @@ -671,6 +678,11 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) } } +// [SSE-3] +const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; +// [SSE-3] +const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; + const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS; const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD; @@ -689,20 +701,20 @@ const MovhlImplAll<0x12> xMOVL; const MovhlImpl_RtoR<0x16> xMOVLH; const MovhlImpl_RtoR<0x12> xMOVHL; -const SimdImpl_PackedLogic<0xdb> xPAND; -const SimdImpl_PackedLogic<0xdf> xPANDN; -const SimdImpl_PackedLogic<0xeb> xPOR; -const SimdImpl_PackedLogic<0xef> xPXOR; +const SimdImpl_DestRegEither<0x66,0xdb> xPAND; +const SimdImpl_DestRegEither<0x66,0xdf> xPANDN; +const SimdImpl_DestRegEither<0x66,0xeb> xPOR; +const SimdImpl_DestRegEither<0x66,0xef> xPXOR; -const SimdImpl_AndNot<0x55> xANDN; +const SimdImpl_AndNot xANDN; -const SimdImpl_SS_SD<0x66,0x2e> xUCOMI; +const SimdImpl_UcomI<0x66,0x2e> xUCOMI; const SimdImpl_rSqrt<0x53> xRCP; const SimdImpl_rSqrt<0x52> xRSQRT; const SimdImpl_Sqrt<0x51> xSQRT; -const SimdImpl_PSPD_SSSD<0x5f> xMAX; -const SimdImpl_PSPD_SSSD<0x5d> xMIN; +const SimdImpl_MinMax<0x5f> xMAX; +const SimdImpl_MinMax<0x5d> xMIN; const SimdImpl_Shuffle<0xc6> xSHUF; // ------------------------------------------------------------------------ @@ -754,8 +766,8 @@ const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2S // ------------------------------------------------------------------------ -const SimdImpl_ShiftAll<0xd0, 2> xPSRL; -const SimdImpl_ShiftAll<0xf0, 6> xPSLL; +const SimdImpl_Shift<0xd0, 2> xPSRL; +const SimdImpl_Shift<0xf0, 6> xPSLL; const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; const SimdImpl_AddSub<0xdc, 0xd4> xPADD; @@ -770,10 +782,29 @@ const SimdImpl_PUnpack xPUNPCK; const SimdImpl_Unpack xUNPCK; const SimdImpl_Pack xPACK; +const SimdImpl_PAbsolute xPABS; +const SimdImpl_PSign xPSIGN; +const SimdImpl_PInsert xPINS; +const SimdImpl_PExtract xPEXTR; + ////////////////////////////////////////////////////////////////////////////////////////// // +// Store Streaming SIMD Extension Control/Status to Mem32. +__emitinline void xSTMXCSR( u32* dest ) +{ + SimdPrefix( 0, 0xae ); + xWriteDisp( 3, dest ); +} + +// Load Streaming SIMD Extension Control/Status from Mem32. +__emitinline void xLDMXCSR( const u32* src ) +{ + SimdPrefix( 0, 0xae ); + xWriteDisp( 2, src ); +} + // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. @@ -851,5 +882,8 @@ __noinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { wri __forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } __noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } +__forceinline void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x50, to, from ); } +__forceinline void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); } + } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index b0c4bfca9d..62f434ccf3 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -370,8 +370,23 @@ namespace x86Emitter template< typename T > static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } + // [sSSE-3] Concatenates dest and source operands into an intermediate composite, + // shifts the composite at byte granularity to the right by a constant immediate, + // and extracts the right-aligned result into the destination. + // + template< typename T > + static __forceinline void xPALIGNR( const xRegisterSIMD& to, const xRegisterSIMD& from, u8 imm8 ) + { + Internal::writeXMMop( 0x66, 0x0f3a, to, from ); + xWrite( imm8 ); + } + + // ------------------------------------------------------------------------ - + + extern void xSTMXCSR( u32* dest ); + extern void xLDMXCSR( const u32* src ); + extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ); extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ); extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ); @@ -411,8 +426,14 @@ namespace x86Emitter extern void xMOVNTQ( void* to, const xRegisterMMX& from ); extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ); + extern void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from ); + extern void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from ); + // ------------------------------------------------------------------------ + extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; + extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; + extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS; extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS; @@ -435,29 +456,29 @@ namespace x86Emitter // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_PackedLogic<0xdb> xPAND; - extern const Internal::SimdImpl_PackedLogic<0xdf> xPANDN; - extern const Internal::SimdImpl_PackedLogic<0xeb> xPOR; - extern const Internal::SimdImpl_PackedLogic<0xef> xPXOR; + extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND; + extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN; + extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR; + extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR; - extern const Internal::SimdImpl_AndNot<0x55> xANDN; + extern const Internal::SimdImpl_AndNot xANDN; - extern const Internal::SimdImpl_SS_SD<0x66,0x2e> xUCOMI; + extern const Internal::SimdImpl_UcomI<0x66,0x2e> xUCOMI; extern const Internal::SimdImpl_rSqrt<0x53> xRCP; extern const Internal::SimdImpl_rSqrt<0x52> xRSQRT; extern const Internal::SimdImpl_Sqrt<0x51> xSQRT; - extern const Internal::SimdImpl_PSPD_SSSD<0x5f> xMAX; - extern const Internal::SimdImpl_PSPD_SSSD<0x5d> xMIN; + extern const Internal::SimdImpl_MinMax<0x5f> xMAX; + extern const Internal::SimdImpl_MinMax<0x5d> xMIN; extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF; // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_Compare xCMPEQ; - extern const Internal::SimdImpl_Compare xCMPLT; + extern const Internal::SimdImpl_Compare xCMPEQ; + extern const Internal::SimdImpl_Compare xCMPLT; extern const Internal::SimdImpl_Compare xCMPLE; - extern const Internal::SimdImpl_Compare xCMPUNORD; - extern const Internal::SimdImpl_Compare xCMPNE; + extern const Internal::SimdImpl_Compare xCMPUNORD; + extern const Internal::SimdImpl_Compare xCMPNE; extern const Internal::SimdImpl_Compare xCMPNLT; extern const Internal::SimdImpl_Compare xCMPNLE; extern const Internal::SimdImpl_Compare xCMPORD; @@ -497,8 +518,8 @@ namespace x86Emitter // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_ShiftAll<0xd0, 2> xPSRL; - extern const Internal::SimdImpl_ShiftAll<0xf0, 6> xPSLL; + extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL; + extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL; extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD; @@ -512,5 +533,11 @@ namespace x86Emitter extern const Internal::SimdImpl_PUnpack xPUNPCK; extern const Internal::SimdImpl_Unpack xUNPCK; extern const Internal::SimdImpl_Pack xPACK; + + extern const Internal::SimdImpl_PAbsolute xPABS; + extern const Internal::SimdImpl_PSign xPSIGN; + extern const Internal::SimdImpl_PInsert xPINS; + extern const Internal::SimdImpl_PExtract xPEXTR; + } diff --git a/pcsx2/x86/ix86/ix86_legacy_instructions.h b/pcsx2/x86/ix86/ix86_legacy_instructions.h index f903e120cb..7f3e2ea740 100644 --- a/pcsx2/x86/ix86/ix86_legacy_instructions.h +++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h @@ -1351,7 +1351,6 @@ extern void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); extern void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); extern void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); extern void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -extern void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); // SSE4.1 diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 9a97441b87..2a164b4d99 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -95,9 +95,13 @@ using namespace x86Emitter; emitterT void SSE2_##mod##SD_M64_to_XMM( x86SSERegType to, uptr from ) { x##mod.SD( xRegisterSSE(to), (void*)from ); } \ emitterT void SSE2_##mod##SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.SD( xRegisterSSE(to), xRegisterSSE(from) ); } -#define DEFINE_LEGACY_OP128( mod, sub ) \ - emitterT void SSE2_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); } +#define DEFINE_LEGACY_OP128( ssenum, mod, sub ) \ + emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod.sub( xRegisterSSE(to), (void*)from ); } + +#define DEFINE_LEGACY_MOV128( ssenum, mod, sub ) \ + emitterT void SSE##ssenum##_##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { x##mod##sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE##ssenum##_##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { x##mod##sub( xRegisterSSE(to), (void*)from ); } #define DEFINE_LEGACY_PSSD_OPCODE( mod ) \ @@ -136,23 +140,31 @@ DEFINE_LEGACY_RSQRT_OPCODE( RCP ) DEFINE_LEGACY_RSQRT_OPCODE( RSQRT ) DEFINE_LEGACY_SQRT_OPCODE( SQRT ) -DEFINE_LEGACY_OP128( PMUL, LW ) -DEFINE_LEGACY_OP128( PMUL, HW ) -DEFINE_LEGACY_OP128( PMUL, UDQ ) +DEFINE_LEGACY_OP128( 2, PMUL, LW ) +DEFINE_LEGACY_OP128( 2, PMUL, HW ) +DEFINE_LEGACY_OP128( 2, PMUL, UDQ ) -DEFINE_LEGACY_OP128( PMAX, SW ) -DEFINE_LEGACY_OP128( PMAX, UB ) -DEFINE_LEGACY_OP128( PMIN, SW ) -DEFINE_LEGACY_OP128( PMIN, UB ) +DEFINE_LEGACY_OP128( 2, PMAX, SW ) +DEFINE_LEGACY_OP128( 2, PMAX, UB ) +DEFINE_LEGACY_OP128( 2, PMIN, SW ) +DEFINE_LEGACY_OP128( 2, PMIN, UB ) -DEFINE_LEGACY_OP128( UNPCK, LPS ) -DEFINE_LEGACY_OP128( UNPCK, HPS ) -DEFINE_LEGACY_OP128( PUNPCK, LQDQ ) -DEFINE_LEGACY_OP128( PUNPCK, HQDQ ) +DEFINE_LEGACY_OP128( 2, UNPCK, LPS ) +DEFINE_LEGACY_OP128( 2, UNPCK, HPS ) +DEFINE_LEGACY_OP128( 2, PUNPCK, LQDQ ) +DEFINE_LEGACY_OP128( 2, PUNPCK, HQDQ ) -DEFINE_LEGACY_OP128( PACK, SSWB ) -DEFINE_LEGACY_OP128( PACK, SSDW ) -DEFINE_LEGACY_OP128( PACK, USWB ) +DEFINE_LEGACY_OP128( 2, PACK, SSWB ) +DEFINE_LEGACY_OP128( 2, PACK, SSDW ) +DEFINE_LEGACY_OP128( 2, PACK, USWB ) + +DEFINE_LEGACY_MOV128( 3, MOV, SLDUP ) +DEFINE_LEGACY_MOV128( 3, MOV, SHDUP ) + +DEFINE_LEGACY_OP128( 4, PMAX, SD ) +DEFINE_LEGACY_OP128( 4, PMIN, SD ) +DEFINE_LEGACY_OP128( 4, PMAX, UD ) +DEFINE_LEGACY_OP128( 4, PMIN, UD ) emitterT void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVAPS( xRegisterSSE(to), xRegisterSSE(from) ); } @@ -201,11 +213,11 @@ emitterT void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { xMOVH.P emitterT void SSE_MOVHPS_Rm_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { xMOVH.PS( xRegisterSSE(to), ptr[xAddressReg(from)+offset] ); } emitterT void SSE_MOVHPS_XMM_to_Rm( x86IntRegType to, x86SSERegType from, int offset ) { xMOVH.PS( ptr[xAddressReg(to)+offset], xRegisterSSE(from) ); } -emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVLH.PS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMOVHL.PS( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); } +emitterT void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xMASKMOV( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xPMOVMSKB( xRegister32(to), xRegisterSSE(from) ); } emitterT void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xSHUF.PS( xRegisterSSE(to), (void*)from, imm8 ); } @@ -247,16 +259,6 @@ emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); } -emitterT void SSE4_PMAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.SD( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE4_PMAXSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.SD( xRegisterSSE(to), (void*)from ); } -emitterT void SSE4_PMINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.SD( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE4_PMINSD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.SD( xRegisterSSE(to), (void*)from ); } - -emitterT void SSE4_PMAXUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMAX.UD( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE4_PMAXUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMAX.UD( xRegisterSSE(to), (void*)from ); } -emitterT void SSE4_PMINUD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xPMIN.UD( xRegisterSSE(to), xRegisterSSE(from) ); } -emitterT void SSE4_PMINUD_M128_to_XMM( x86SSERegType to, uptr from ) { xPMIN.UD( xRegisterSSE(to), (void*)from ); } - emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); } @@ -264,113 +266,35 @@ emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); } emitterT void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.HPS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xMOVMSKPS( xRegister32(to), xRegisterSSE(from) ); } +emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { xMOVMSKPD( xRegister32(to), xRegisterSSE(from) ); } + +emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.B( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.W( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPABS.D( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.B( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.W( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.D( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ) { xPEXTR.W( xRegister32(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ) { xPINS.W( xRegisterSSE(to), xRegister32(from), imm8 ); } + +emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } + ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//STMXCSR : Store Streaming SIMD Extension Control/Status * -//********************************************************************************** -emitterT void SSE_STMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); -} - -///////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//LDMXCSR : Load Streaming SIMD Extension Control/Status * -//********************************************************************************** -emitterT void SSE_LDMXCSR( uptr from ) { - write16( 0xAE0F ); - ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); -} - //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PEXTRW,PINSRW: Packed Extract/Insert Word * -//********************************************************************************** -emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ){ SSERtoR66(0xC50F); write8( imm8 ); } -emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ){ SSERtoR66(0xC40F); write8( imm8 ); } - -emitterT void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR(0x500F); } -emitterT void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from) { SSERtoR66(0x500F); } +//**********************************************************************************} emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } -emitterT void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { - write8(0xf3); - RexRB(0, to, from); - write16( 0x120f); - ModRM( 3, to, from ); -} - -emitterT void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x120f, 0); } -emitterT void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } -emitterT void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } - -// SSSE3 - -emitterT void SSSE3_PABSB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1C380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PABSW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1D380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PABSD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x1E380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PALIGNR_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0F3A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void SSSE3_PSIGNB_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x08380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x09380F); - ModRM(3, to, from); -} - -emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0A380F); - ModRM(3, to, from); -} // SSE4.1 diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index f788085771..647c812657 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -697,7 +697,11 @@ namespace x86Emitter template< typename T > bool Is8BitOp() { return sizeof(T) == 1; } template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite( 0x66 ); } - #include "implement/xmm/movqss.h" + #include "implement/xmm/basehelpers.h" + #include "implement/xmm/moremovs.h" + #include "implement/xmm/arithmetic.h" + #include "implement/xmm/comparisons.h" + #include "implement/xmm/shufflepack.h" #include "implement/group1.h" #include "implement/group2.h" #include "implement/group3.h" From 2dcee320796c533dcda9aab237dee0703d0c9f53 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 21 Apr 2009 13:54:41 +0000 Subject: [PATCH 121/143] Linux: Fix some GCC compilation errors. And update the msvc project (somehow didn't get committed last night, but only change header files so prolly didn't break anything). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1037 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 18 ++++++++++++++- pcsx2/x86/ix86/implement/bittest.h | 12 ++++------ pcsx2/x86/ix86/implement/dwshift.h | 26 +++++++++++----------- pcsx2/x86/ix86/implement/xmm/arithmetic.h | 12 +++++----- pcsx2/x86/ix86/implement/xmm/shufflepack.h | 6 ++--- 5 files changed, 43 insertions(+), 31 deletions(-) diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 380a14991c..0167672b41 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -3024,7 +3024,23 @@ Name="xmm" > + + + + + + + + diff --git a/pcsx2/x86/ix86/implement/bittest.h b/pcsx2/x86/ix86/implement/bittest.h index 5ad091667d..7fcdfb5027 100644 --- a/pcsx2/x86/ix86/implement/bittest.h +++ b/pcsx2/x86/ix86/implement/bittest.h @@ -109,14 +109,10 @@ public: __noinline void operator()( const ModSibBase& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } __noinline void operator()( const ModSibBase& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - // Note on Imm forms : use int as the source operand since it's "reasonably inert" from a compiler - // perspective. (using uint tends to make the compiler try and fail to match signed immediates with - // one of the other overloads). - - __noinline void operator()( const ModSibStrict& bitbase, u8 immoffset ) const { m_32::Emit( bitbase, immoffset ); } - __noinline void operator()( const ModSibStrict& bitbase, u8 immoffset ) const { m_16::Emit( bitbase, immoffset ); } - void operator()( const xRegister& bitbase, u8 immoffset ) const { m_32::Emit( bitbase, immoffset ); } - void operator()( const xRegister& bitbase, u8 immoffset ) const { m_16::Emit( bitbase, immoffset ); } + __noinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } + __noinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } + void operator()( const xRegister& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } + void operator()( const xRegister& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } Group8ImplAll() {} }; diff --git a/pcsx2/x86/ix86/implement/dwshift.h b/pcsx2/x86/ix86/implement/dwshift.h index 390577ae20..261f1d3ed8 100644 --- a/pcsx2/x86/ix86/implement/dwshift.h +++ b/pcsx2/x86/ix86/implement/dwshift.h @@ -54,13 +54,13 @@ public: } // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const xRegister& from, u8 imm ) + static __emitinline void Emit( const xRegister& to, const xRegister& from, u8 shiftcnt ) { - if( imm == 0 ) return; + if( shiftcnt == 0 ) return; prefix16(); write16( 0xa40f | (isShiftRight ? 0x800 : 0) ); ModRM_Direct( from.Id, to.Id ); - write8( imm ); + write8( shiftcnt ); } // ------------------------------------------------------------------------ @@ -71,11 +71,11 @@ public: } // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& sibdest, const xRegister& from, u8 imm ) + static __emitinline void Emit( const ModSibBase& sibdest, const xRegister& from, u8 shiftcnt ) { basesibform(); EmitSibMagic( from.Id, sibdest ); - write8( imm ); + write8( shiftcnt ); } // ------------------------------------------------------------------------ @@ -88,11 +88,11 @@ public: // ------------------------------------------------------------------------ // dest data type is inferred from the 'from' register, so we can do void* resolution :) - static __emitinline void Emit( void* dest, const xRegister& from, u8 imm ) + static __emitinline void Emit( void* dest, const xRegister& from, u8 shiftcnt ) { basesibform(); xWriteDisp( from.Id, dest ); - write8( imm ); + write8( shiftcnt ); } }; @@ -113,17 +113,17 @@ public: __forceinline void operator()( const xRegister32& to, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( to, from ); } __forceinline void operator()( void* dest, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( dest, from ); } __noinline void operator()( const ModSibBase& sibdest, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( sibdest, from ); } - __forceinline void operator()( const xRegister32& to, const xRegister32& from, u8 imm ) const { m_32::Emit( to, from, imm ); } - __forceinline void operator()( void* dest, const xRegister32& from, u8 imm ) const { m_32::Emit( dest, from, imm ); } - __noinline void operator()( const ModSibBase& sibdest, const xRegister32& from, u8 imm ) const { m_32::Emit( sibdest, from ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from, u8 shiftcnt ) const { m_32::Emit( to, from, shiftcnt ); } + __forceinline void operator()( void* dest, const xRegister32& from, u8 shiftcnt ) const { m_32::Emit( dest, from, shiftcnt ); } + __noinline void operator()( const ModSibBase& sibdest, const xRegister32& from, u8 shiftcnt ) const { m_32::Emit( sibdest, shiftcnt ); } // ---------- 16 Bit Interface ----------- __forceinline void operator()( const xRegister16& to, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( to, from ); } __forceinline void operator()( void* dest, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( dest, from ); } __noinline void operator()( const ModSibBase& sibdest, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( sibdest, from ); } - __forceinline void operator()( const xRegister16& to, const xRegister16& from, u8 imm ) const { m_16::Emit( to, from, imm ); } - __forceinline void operator()( void* dest, const xRegister16& from, u8 imm ) const { m_16::Emit( dest, from, imm ); } - __noinline void operator()( const ModSibBase& sibdest, const xRegister16& from, u8 imm ) const { m_16::Emit( sibdest, from ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from, u8 shiftcnt ) const { m_16::Emit( to, from, shiftcnt ); } + __forceinline void operator()( void* dest, const xRegister16& from, u8 shiftcnt ) const { m_16::Emit( dest, from, shiftcnt ); } + __noinline void operator()( const ModSibBase& sibdest, const xRegister16& from, u8 shiftcnt ) const { m_16::Emit( sibdest, shiftcnt ); } DwordShiftImplAll() {} // Why does GCC need these? }; diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h index 6ac3f91877..54ecb0c095 100644 --- a/pcsx2/x86/ix86/implement/xmm/arithmetic.h +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -25,7 +25,7 @@ template< u16 OpcodeBase1, u8 Modcode > class SimdImpl_ShiftWithoutQ { protected: - template< u16 Opcode1, u16 OpcodeImm, u8 Modcode > + template< u16 Opcode1, u16 OpcodeImm > class ShiftHelper { public: @@ -50,17 +50,17 @@ protected: } template< typename OperandType > - __emitinline void operator()( const xRegisterSIMD& to, u8 imm ) const + __emitinline void operator()( const xRegisterSIMD& to, u8 imm8 ) const { SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); ModRM( 3, (int)Modcode, to.Id ); - xWrite( imm ); + xWrite( imm8 ); } }; public: - const ShiftHelper W; - const ShiftHelper D; + const ShiftHelper W; + const ShiftHelper D; SimdImpl_ShiftWithoutQ() {} }; @@ -72,7 +72,7 @@ template< u16 OpcodeBase1, u8 Modcode > class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ { public: - const ShiftHelper Q; + const ShiftHelper Q; void DQ( const xRegisterSSE& to, u8 imm ) const { diff --git a/pcsx2/x86/ix86/implement/xmm/shufflepack.h b/pcsx2/x86/ix86/implement/xmm/shufflepack.h index 93a96569c6..baf110c845 100644 --- a/pcsx2/x86/ix86/implement/xmm/shufflepack.h +++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h @@ -251,19 +251,19 @@ protected: __forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm ); + xWrite( imm8 ); } __forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const { writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm ); + xWrite( imm8 ); } __noinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm ); + xWrite( imm8 ); } }; From 27a8f3aa9a5662cf4d44dcbc496ab9268ec4673c Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Tue, 21 Apr 2009 21:30:47 +0000 Subject: [PATCH 122/143] Added PMADD/HADD/INSERT/EXTRACT instructions, and fixed more cross-compiler problems. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1038 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/xmm/arithmetic.h | 137 +++++++++++++++------ pcsx2/x86/ix86/implement/xmm/moremovs.h | 11 ++ pcsx2/x86/ix86/implement/xmm/shufflepack.h | 13 +- pcsx2/x86/ix86/ix86.cpp | 68 +++++++++- pcsx2/x86/ix86/ix86_instructions.h | 13 +- pcsx2/x86/ix86/ix86_legacy_mmx.cpp | 17 +-- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 29 ++--- 7 files changed, 206 insertions(+), 82 deletions(-) diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h index 54ecb0c095..87deb51429 100644 --- a/pcsx2/x86/ix86/implement/xmm/arithmetic.h +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -18,49 +18,52 @@ #pragma once +////////////////////////////////////////////////////////////////////////////////////////// +// ShiftHelper -- It's out here because C++ child class template semantics are generally +// not cross-compiler friendly. +// +template< u16 Opcode1, u16 OpcodeImm, u8 Modcode > +class _SimdShiftHelper +{ +public: + _SimdShiftHelper() {} + + template< typename OperandType > + __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const + { + writeXMMop( 0x66, Opcode1, to, from ); + } + + template< typename OperandType > + __emitinline void operator()( const xRegisterSIMD& to, u8 imm8 ) const + { + SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); + ModRM( 3, (int)Modcode, to.Id ); + xWrite( imm8 ); + } +}; + ////////////////////////////////////////////////////////////////////////////////////////// // Used for PSRA, which lacks the Q form. // template< u16 OpcodeBase1, u8 Modcode > class SimdImpl_ShiftWithoutQ { -protected: - template< u16 Opcode1, u16 OpcodeImm > - class ShiftHelper - { - public: - ShiftHelper() {} - - template< typename OperandType > - __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } - - template< typename OperandType > - __emitinline void operator()( const xRegisterSIMD& to, u8 imm8 ) const - { - SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); - ModRM( 3, (int)Modcode, to.Id ); - xWrite( imm8 ); - } - }; - public: - const ShiftHelper W; - const ShiftHelper D; + const _SimdShiftHelper W; + const _SimdShiftHelper D; SimdImpl_ShiftWithoutQ() {} }; @@ -72,7 +75,7 @@ template< u16 OpcodeBase1, u8 Modcode > class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ { public: - const ShiftHelper Q; + const _SimdShiftHelper Q; void DQ( const xRegisterSSE& to, u8 imm ) const { @@ -228,3 +231,63 @@ public: const SimdImpl_DestRegEither<0x66, 0x0a38> D; }; + +////////////////////////////////////////////////////////////////////////////////////////// +// Packed Multiply and Add!! +// +class SimdImpl_PMultAdd +{ +public: + SimdImpl_PMultAdd() {} + + // Multiplies the individual signed words of dest by the corresponding signed words + // of src, producing temporary signed, doubleword results. The adjacent doubleword + // results are then summed and stored in the destination operand. + // + // DEST[31:0] = ( DEST[15:0] * SRC[15:0]) + (DEST[31:16] * SRC[31:16] ); + // DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] ); + // [.. repeat in the case of XMM src/dest operands ..] + // + const SimdImpl_DestRegEither<0x66, 0xf5> WD; + + // [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding + // signed byte of src, producing intermediate signed 16-bit integers. Each adjacent + // pair of signed words is added and the saturated result is packed to dest. + // For example, the lowest-order bytes (bits 7-0) in src and dest are multiplied + // and the intermediate signed word result is added with the corresponding + // intermediate result from the 2nd lowest-order bytes (bits 15-8) of the operands; + // the sign-saturated result is stored in the lowest word of dest (bits 15-0). + // The same operation is performed on the other pairs of adjacent bytes. + // + // In Coder Speak: + // DEST[15-0] = SaturateToSignedWord( SRC[15-8] * DEST[15-8] + SRC[7-0] * DEST[7-0] ); + // DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] ); + // [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..] + // + const SimdImpl_DestRegEither<0x66, 0xf438> UBSW; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Packed Horizontal Add [SSE3 only] +// +class SimdImpl_HorizAdd +{ +public: + SimdImpl_HorizAdd() {} + + // [SSE-3] Horizontal Add of Packed Data. A three step process: + // * Adds the single-precision floating-point values in the first and second dwords of + // dest and stores the result in the first dword of dest. + // * Adds single-precision floating-point values in the third and fourth dword of dest + // stores the result in the second dword of dest. + // * Adds single-precision floating-point values in the first and second dword of *src* + // and stores the result in the third dword of dest. + const SimdImpl_DestRegSSE<0xf2, 0x7c> PS; + + // [SSE-3] Horizontal Add of Packed Data. A two step process: + // * Adds the double-precision floating-point values in the high and low quadwords of + // dest and stores the result in the low quadword of dest. + // * Adds the double-precision floating-point values in the high and low quadwords of + // *src* stores the result in the high quadword of dest. + const SimdImpl_DestRegSSE<0x66, 0x7c> PD; +}; \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/xmm/moremovs.h b/pcsx2/x86/ix86/implement/xmm/moremovs.h index 44da893436..fa00b91179 100644 --- a/pcsx2/x86/ix86/implement/xmm/moremovs.h +++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h @@ -80,3 +80,14 @@ public: const SimdImpl_DestRegSSE SD; SimdImpl_UcomI() {} }; + +////////////////////////////////////////////////////////////////////////////////////////// +// +class SimdImpl_Blend +{ + SimdImpl_DestRegImmSSE<0x66,0x0c3a> PS; + SimdImpl_DestRegImmSSE<0x66,0x0d3a> PD; + + SimdImpl_DestRegImmSSE<0x66,0x1438> VPS; + SimdImpl_DestRegImmSSE<0x66,0x1538> VPD; +}; \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/xmm/shufflepack.h b/pcsx2/x86/ix86/implement/xmm/shufflepack.h index baf110c845..f97d8f8f75 100644 --- a/pcsx2/x86/ix86/implement/xmm/shufflepack.h +++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h @@ -169,7 +169,7 @@ public: }; ////////////////////////////////////////////////////////////////////////////////////////// -// PINSW/B/D [all but Word form are SSE4.1 only!] +// PINSRW/B/D [all but Word form are SSE4.1 only!] // class SimdImpl_PInsert { @@ -183,19 +183,19 @@ protected: __forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm ); + xWrite( imm8 ); } __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const { writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm ); + xWrite( imm8 ); } __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm ); + xWrite( imm8 ); } }; @@ -256,13 +256,13 @@ protected: __forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest ); xWrite( imm8 ); } __noinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); + writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest ); xWrite( imm8 ); } }; @@ -304,3 +304,4 @@ public: // used to extract any single packed dword value from src into an x86 32 bit register. const ByteDwordForms<0x16> D; }; + diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index e6bc9f34a7..6b8a288289 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -784,13 +784,20 @@ const SimdImpl_Pack xPACK; const SimdImpl_PAbsolute xPABS; const SimdImpl_PSign xPSIGN; -const SimdImpl_PInsert xPINS; +const SimdImpl_PInsert xPINSR; const SimdImpl_PExtract xPEXTR; +const SimdImpl_PMultAdd xPMADD; +const SimdImpl_HorizAdd xHADD; ////////////////////////////////////////////////////////////////////////////////////////// // +__emitinline void xEMMS() +{ + xWrite( 0x770F ); +} + // Store Streaming SIMD Extension Control/Status to Mem32. __emitinline void xSTMXCSR( u32* dest ) { @@ -885,5 +892,64 @@ __noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writ __forceinline void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x50, to, from ); } __forceinline void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); } +////////////////////////////////////////////////////////////////////////////////////////// +// INSERTPS / EXTRACTPS [SSE4.1 only!] +// +// [TODO] these might be served better as classes, especially if other instructions use +// the M32,sse,imm form (I forget offhand if any do). + + +// [SSE-4.1] Insert a single-precision floating-point value from src into a specified +// location in dest, and selectively zero out the data elements in dest according to +// the mask field in the immediate byte. The source operand can be a memory location +// (32 bits) or an XMM register (lower 32 bits used). +// +// Imm8 provides three fields: +// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if +// the source is a memory operand. +// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest. +// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written +// with 0.0 if set to 1. +// +__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) +{ + writeXMMop( 0x66, 0x213a, to, from ); + xWrite( imm8 ); +} + +__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ) +{ + writeXMMop( 0x66, 0x213a, to, from ); + xWrite( imm8 ); +} + +__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict& from, u8 imm8 ) +{ + writeXMMop( 0x66, 0x213a, to, from ); + xWrite( imm8 ); +} + +// [SSE-4.1] Extract a single-precision floating-point value from src at an offset +// determined by imm8[1-0]*32. The extracted single precision floating-point value +// is stored into the low 32-bits of dest (or at a 32-bit memory pointer). +// +__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) +{ + writeXMMop( 0x66, 0x173a, to, from, true ); + xWrite( imm8 ); +} + +__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ) +{ + writeXMMop( 0x66, 0x173a, from, dest, true ); + xWrite( imm8 ); +} + +__emitinline void xEXTRACTPS( const ModSibStrict& dest, const xRegisterSSE& from, u8 imm8 ) +{ + writeXMMop( 0x66, 0x173a, from, dest, true ); + xWrite( imm8 ); +} + } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 62f434ccf3..8455f927b7 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -384,6 +384,7 @@ namespace x86Emitter // ------------------------------------------------------------------------ + extern void xEMMS(); extern void xSTMXCSR( u32* dest ); extern void xLDMXCSR( const u32* src ); @@ -429,6 +430,14 @@ namespace x86Emitter extern void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from ); extern void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from ); + extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ); + extern void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ); + extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict& from, u8 imm8 ); + + extern void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ); + extern void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ); + extern void xEXTRACTPS( const ModSibStrict& dest, const xRegisterSSE& from, u8 imm8 ); + // ------------------------------------------------------------------------ extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; @@ -536,8 +545,10 @@ namespace x86Emitter extern const Internal::SimdImpl_PAbsolute xPABS; extern const Internal::SimdImpl_PSign xPSIGN; - extern const Internal::SimdImpl_PInsert xPINS; + extern const Internal::SimdImpl_PInsert xPINSR; extern const Internal::SimdImpl_PExtract xPEXTR; + extern const Internal::SimdImpl_PMultAdd xPMADD; + extern const Internal::SimdImpl_HorizAdd xHADD; } diff --git a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp index f0571df61a..d8bdb0b8a2 100644 --- a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp @@ -119,19 +119,6 @@ emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); } emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); } -////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////// +emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); } -/* emms */ -emitterT void EMMS() -{ - write16( 0x770F ); -} - -emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) -{ - if (to > 7 || from > 7) Rex(1, to >> 3, 0, from >> 3); - write16( 0xc40f ); - ModRM( 3, to, from ); - write8( imm8 ); -} +emitterT void EMMS() { xEMMS(); } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 2a164b4d99..07acf0ca06 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -278,7 +278,10 @@ emitterT void SSSE3_PSIGNW_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { x emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPSIGN.D( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ) { xPEXTR.W( xRegister32(to), xRegisterSSE(from), imm8 ); } -emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ) { xPINS.W( xRegisterSSE(to), xRegister32(from), imm8 ); } +emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ) { xPINSR.W( xRegisterSSE(to), xRegister32(from), imm8 ); } + +emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xINSERTPS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { xEXTRACTPS( xRegister32(to), xRegisterSSE(from), imm8 ); } emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } @@ -290,10 +293,10 @@ emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } //PEXTRW,PINSRW: Packed Extract/Insert Word * //**********************************************************************************} -emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { SSERtoR66(0xF50F); } +emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMADD.WD( xRegisterSSE(from), xRegisterSSE(to) ); } -emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf2); SSERtoR( 0x7c0f ); } -emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf2); SSEMtoR( 0x7c0f, 0 ); } +emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xHADD.PS( xRegisterSSE(from), xRegisterSSE(to) ); } +emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { xHADD.PS( xRegisterSSE(from), (void*)to ); } // SSE4.1 @@ -315,24 +318,6 @@ emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) write8(imm8); } -emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x213A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x173A0F); - ModRM(3, to, from); - write8(imm8); -} - emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) { write8(0x66); From bfe128cd418e0135f59ee047149580d95e0036d2 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Tue, 21 Apr 2009 21:42:33 +0000 Subject: [PATCH 123/143] Linux: Fix the last of the GCC compilation errors. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1039 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/Makefile.am | 3 ++- pcsx2/x86/ix86/ix86.cpp | 4 ++-- pcsx2/x86/ix86/ix86_instructions.h | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index c146c7c4dc..bb1c76665b 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -6,4 +6,5 @@ ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_legacy_mmx.cpp ix86_t ix86_legacy.cpp ix86_legacy_sse.cpp \ ix86_internal.h ix86_legacy_instructions.h ix86_macros.h ix86_sse_helpers.h ix86.h ix86_legacy_internal.h \ ix86_instructions.h ix86_legacy_types.h ix86_types.h \ -bittest.h dwshift.h group1.h group2.h group3.h incdec.h jmpcall.h movs.h test.h movqss.h \ No newline at end of file +bittest.h dwshift.h group1.h group2.h group3.h incdec.h jmpcall.h movs.h test.h \ +movqss.h arithmetic.h shufflepack.h basehelpers.h comparisons.h moremovs.h \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 6b8a288289..0f871735dc 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -889,8 +889,8 @@ __noinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { wri __forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } __noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } -__forceinline void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x50, to, from ); } -__forceinline void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); } +__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x50, to, from ); } +__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); } ////////////////////////////////////////////////////////////////////////////////////////// // INSERTPS / EXTRACTPS [SSE4.1 only!] diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 8455f927b7..65ce383144 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -427,8 +427,8 @@ namespace x86Emitter extern void xMOVNTQ( void* to, const xRegisterMMX& from ); extern void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ); - extern void xMOVMSKPS( const xRegister32& to, xRegisterSSE& from ); - extern void xMOVMSKPD( const xRegister32& to, xRegisterSSE& from ); + extern void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from ); + extern void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from ); extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ); extern void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ); From d7341d5b6983d92f61507b6f4acaab8e2870d5ef Mon Sep 17 00:00:00 2001 From: sudonim1 Date: Tue, 21 Apr 2009 23:30:06 +0000 Subject: [PATCH 124/143] Protect manual pages again after running code in them for a while, should speed up some games and probably not significantly slow down any. Related new speed hack, fast-forward a block starting at 0x81FC0 which some games (FFX) run a lot. This block is also excluded from the previous feature. Rearrange configuration file and speed hack dialog. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1040 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 17 ++++----- pcsx2/Linux/Pref.cpp | 23 +++++++++--- pcsx2/Memory.cpp | 5 +-- pcsx2/R5900.cpp | 4 +-- pcsx2/windows/HacksDlg.cpp | 40 +++++++++++---------- pcsx2/windows/WinSysExec.cpp | 2 +- pcsx2/windows/ini.cpp | 19 +++++++--- pcsx2/windows/pcsx2.rc | 21 +++++------ pcsx2/windows/resource.h | 3 +- pcsx2/x86/iR3000A.cpp | 2 +- pcsx2/x86/iVUzerorec.cpp | 6 ++-- pcsx2/x86/ix86-32/iR5900-32.cpp | 64 ++++++++++++++++++++++++--------- pcsx2/x86/ix86-32/recVTLB.cpp | 2 +- 13 files changed, 135 insertions(+), 73 deletions(-) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index cfff17a591..89f46f7c71 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -58,12 +58,6 @@ extern SessionOverrideFlags g_Session; #define CHECK_MULTIGS (Config.Options&PCSX2_GSMULTITHREAD) #define CHECK_EEREC (!g_Session.ForceDisableEErec && Config.Options&PCSX2_EEREC) -//------------ SPEED/MISC HACKS!!! --------------- -#define CHECK_EE_CYCLERATE (Config.Hacks & 0x03) -#define CHECK_IOP_CYCLERATE (Config.Hacks & 0x08) -#define CHECK_WAITCYCLE_HACK (Config.Hacks & 0x10) -#define CHECK_INTC_STAT_HACK (Config.Hacks & 0x20) -#define CHECK_ESCAPE_HACK (Config.Hacks & 0x400) //------------ SPECIAL GAME FIXES!!! --------------- #define CHECK_VUADDSUBHACK (Config.GameFixes & 0x1) // Special Fix for Tri-ace games, they use an encryption algorithm that requires VU addi opcode to be bit-accurate. #define CHECK_FPUCOMPAREHACK (Config.GameFixes & 0x4) // Special Fix for Digimon Rumble Arena 2, fixes spinning/hanging on intro-menu. @@ -136,8 +130,15 @@ public: int Mdec; int Patch; int CustomFps; - int Hacks; - int VUCycleHack; + struct Hacks_t { + int EECycleRate; + bool IOPCycleDouble; + bool WaitCycleExt; + bool INTCSTATSlow; + int VUCycleSteal; + bool IdleLoopFF; + bool ESCExits; // this is a hack!? + } Hacks; int GameFixes; int CustomFrameSkip; int CustomConsecutiveFrames; diff --git a/pcsx2/Linux/Pref.cpp b/pcsx2/Linux/Pref.cpp index 90f513353b..02fb3ede3f 100644 --- a/pcsx2/Linux/Pref.cpp +++ b/pcsx2/Linux/Pref.cpp @@ -100,8 +100,17 @@ int LoadConfig() GetValuel("varLog", varLog); #endif GetValuel("Options", Config.Options); - GetValuel("Hacks", Config.Hacks); - GetValuel("VUCycleHack", Config.VUCycleHack); + + GetValuel("EECycleRate", Config.Hacks.EECycleRate); + if (Config.Hacks.EECycleRate > 2) + Config.Hacks.EECycleRate = 2; + GetValuel("IOPCycleDouble", Config.Hacks.IOPCycleDouble); + GetValuel("WaitCycleExt", Config.Hacks.WaitCycleExt); + GetValuel("INTCSTATSlow", Config.Hacks.INTCSTATSlow); + GetValuel("VUCycleSteal", Config.Hacks.VUCycleSteal); + GetValuel("IdleLoopFF", Config.Hacks.IdleLoopFF); + GetValuel("ESCExits", Config.Hacks.ESCExits); + if (Config.VUCycleHack < 0 || Config.VUCycleHack > 4) Config.VUCycleHack = 0; GetValuel("Fixes", Config.GameFixes); @@ -165,8 +174,14 @@ void SaveConfig() SetValuel("Options", Config.Options); - SetValuel("Hacks", Config.Hacks); - SetValuel("VUCycleHack", Config.VUCycleHack); + SetValuel("EECycleRate", Config.Hacks.EECycleRate); + SetValuel("IOPCycleDouble", Config.Hacks.IOPCycleDouble); + SetValuel("WaitCycleExt", Config.Hacks.WaitCycleExt); + SetValuel("INTCSTATSlow", Config.Hacks.INTCSTATSlow); + SetValuel("VUCycleSteal", Config.Hacks.VUCycleSteal); + SetValuel("IdleLoopFF", Config.Hacks.IdleLoopFF); + SetValuel("ESCExits", Config.Hacks.ESCExits); + SetValuel("Fixes", Config.GameFixes); SetValuel("Patch", Config.Patch); diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index 8d993e7b70..aef7d3c627 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -738,8 +738,8 @@ void memReset() _ext_memWrite8<1>, _ext_memWrite16<1>, hwWrite32_page_0E, hwWrite64_page_0E, hwWrite128_generic ); - vtlbMemR32FP* page0F32( CHECK_INTC_STAT_HACK ? hwRead32_page_0F_INTC_HACK : hwRead32_page_0F ); - vtlbMemR64FP* page0F64( CHECK_INTC_STAT_HACK ? hwRead64_generic_INTC_HACK : hwRead64_generic ); + vtlbMemR32FP* page0F32( Config.Hacks.INTCSTATSlow ? hwRead32_page_0F_INTC_HACK : hwRead32_page_0F ); + vtlbMemR64FP* page0F64( Config.Hacks.INTCSTATSlow ? hwRead64_generic_INTC_HACK : hwRead64_generic ); hw_by_page[0xf] = vtlb_RegisterHandler( _ext_memRead8<1>, _ext_memRead16<1>, page0F32, page0F64, hwRead128_generic, @@ -820,6 +820,7 @@ void mmap_MarkCountedRamPage(void* ptr,u32 vaddr) u32 offset=((u8*)ptr-psM); offset>>=12; + psMPWC[(offset/32)] &= ~(1<<(offset&31)); for (u32 i=0;i 1 ) + if( Config.Hacks.EECycleRate > 1 ) eeWaitCycles += 1024; hwReset(); diff --git a/pcsx2/windows/HacksDlg.cpp b/pcsx2/windows/HacksDlg.cpp index 3bead95237..5eb37a49c0 100644 --- a/pcsx2/windows/HacksDlg.cpp +++ b/pcsx2/windows/HacksDlg.cpp @@ -41,16 +41,17 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) switch (message) { case WM_INITDIALOG: - CheckRadioButton( hDlg, IDC_EESYNC_DEFAULT, IDC_EESYNC3, IDC_EESYNC_DEFAULT + CHECK_EE_CYCLERATE ); + CheckRadioButton( hDlg, IDC_EESYNC_DEFAULT, IDC_EESYNC3, IDC_EESYNC_DEFAULT + Config.Hacks.EECycleRate ); - if(CHECK_IOP_CYCLERATE) CheckDlgButton(hDlg, IDC_IOPSYNC, TRUE); - if(CHECK_WAITCYCLE_HACK) CheckDlgButton(hDlg, IDC_WAITCYCLES, TRUE); - if(CHECK_INTC_STAT_HACK) CheckDlgButton(hDlg, IDC_INTCSTATHACK, TRUE); - if(CHECK_ESCAPE_HACK) CheckDlgButton(hDlg, IDC_ESCHACK, TRUE); + if(Config.Hacks.IOPCycleDouble) CheckDlgButton(hDlg, IDC_IOPSYNC, TRUE); + if(Config.Hacks.WaitCycleExt) CheckDlgButton(hDlg, IDC_WAITCYCLES, TRUE); + if(Config.Hacks.INTCSTATSlow) CheckDlgButton(hDlg, IDC_INTCSTATHACK, TRUE); + if(Config.Hacks.IdleLoopFF) CheckDlgButton(hDlg, IDC_IDLELOOPFF, TRUE); + if(Config.Hacks.ESCExits) CheckDlgButton(hDlg, IDC_ESCHACK, TRUE); SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_SETRANGE, TRUE, MAKELONG(0, 4)); - CheckVUCycleHack(hDlg, Config.VUCycleHack); - SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_SETPOS, TRUE, Config.VUCycleHack); + CheckVUCycleHack(hDlg, Config.Hacks.VUCycleSteal); + SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_SETPOS, TRUE, Config.Hacks.VUCycleSteal); return TRUE; @@ -73,31 +74,32 @@ BOOL APIENTRY HacksProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM lParam) { case IDOK: { - int newhacks = 0; - for( int i=1; i<4; i++ ) + PcsxConfig::Hacks_t newhacks; + + newhacks.EECycleRate = 0; + for( int i=1; i<3; i++ ) { if( IsDlgButtonChecked(hDlg, IDC_EESYNC_DEFAULT+i) ) { - newhacks = i; + newhacks.EECycleRate = i; break; } } - newhacks |= IsDlgButtonChecked(hDlg, IDC_IOPSYNC) << 3; - newhacks |= IsDlgButtonChecked(hDlg, IDC_WAITCYCLES) << 4; - newhacks |= IsDlgButtonChecked(hDlg, IDC_INTCSTATHACK) << 5; - newhacks |= IsDlgButtonChecked(hDlg, IDC_ESCHACK) << 10; - - int newvucyclehack = SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_GETPOS, 0, 0); - CheckVUCycleHack(hDlg, newvucyclehack); + newhacks.IOPCycleDouble = IsDlgButtonChecked(hDlg, IDC_IOPSYNC); + newhacks.WaitCycleExt = IsDlgButtonChecked(hDlg, IDC_WAITCYCLES); + newhacks.INTCSTATSlow = IsDlgButtonChecked(hDlg, IDC_INTCSTATHACK); + newhacks.ESCExits = IsDlgButtonChecked(hDlg, IDC_ESCHACK); + newhacks.IdleLoopFF = IsDlgButtonChecked(hDlg, IDC_IDLELOOPFF); + newhacks.VUCycleSteal = SendDlgItemMessage(hDlg, IDC_VUCYCLE, TBM_GETPOS, 0, 0); + CheckVUCycleHack(hDlg, newhacks.VUCycleSteal); EndDialog(hDlg, TRUE); - if( newhacks != Config.Hacks || newvucyclehack != Config.VUCycleHack) + if(memcmp(&newhacks, &Config.Hacks, sizeof(newhacks))) { SysRestorableReset(); Config.Hacks = newhacks; - Config.VUCycleHack = newvucyclehack; SaveConfig(); } } diff --git a/pcsx2/windows/WinSysExec.cpp b/pcsx2/windows/WinSysExec.cpp index ec62fe33d0..fb8e48e106 100644 --- a/pcsx2/windows/WinSysExec.cpp +++ b/pcsx2/windows/WinSysExec.cpp @@ -442,7 +442,7 @@ namespace HostGui } #endif - if( CHECK_ESCAPE_HACK ) + if( Config.Hacks.ESCExits ) { g_EmulationInProgress = false; DestroyWindow( gApp.hWnd ); diff --git a/pcsx2/windows/ini.cpp b/pcsx2/windows/ini.cpp index 6a1016afef..81f25f82eb 100644 --- a/pcsx2/windows/ini.cpp +++ b/pcsx2/windows/ini.cpp @@ -21,7 +21,7 @@ #include "Common.h" #include "Paths.h" -static const u32 IniVersion = 101; +static const u32 IniVersion = 102; const char* g_CustomConfigFile; char g_WorkingFolder[g_MaxPath]; // Working folder at application startup @@ -222,10 +222,19 @@ void IniFile::DoConfig( PcsxConfig& Conf ) Entry( "sseVUMXCSR", Conf.sseVUMXCSR, DEFAULT_sseVUMXCSR ); Entry( "eeOptions", Conf.eeOptions, DEFAULT_eeOptions ); Entry( "vuOptions", Conf.vuOptions, DEFAULT_vuOptions ); - Entry( "SpeedHacks", Conf.Hacks ); - Entry( "VUCycleHack", Conf.VUCycleHack, 0 ); - if (Conf.VUCycleHack < 0 || Conf.VUCycleHack > 4) - Conf.VUCycleHack = 0; + + SetCurrentSection("Hacks"); + Entry("EECycleRate", Config.Hacks.EECycleRate); + if (Config.Hacks.EECycleRate > 2) + Config.Hacks.EECycleRate = 2; + Entry("IOPCycleDouble", Config.Hacks.IOPCycleDouble); + Entry("WaitCycleExt", Config.Hacks.WaitCycleExt); + Entry("INTCSTATSlow", Config.Hacks.INTCSTATSlow); + Entry("VUCycleSteal", Config.Hacks.VUCycleSteal); + Entry("IdleLoopFF", Config.Hacks.IdleLoopFF); + if (Conf.Hacks.VUCycleSteal < 0 || Conf.Hacks.VUCycleSteal > 4) + Conf.Hacks.VUCycleSteal = 0; + Entry("ESCExits", Config.Hacks.ESCExits); } ////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pcsx2/windows/pcsx2.rc b/pcsx2/windows/pcsx2.rc index 65623bd61d..e5934d0551 100644 --- a/pcsx2/windows/pcsx2.rc +++ b/pcsx2/windows/pcsx2.rc @@ -352,28 +352,29 @@ BEGIN CONTROL "Default Cycle Rate",IDC_EESYNC_DEFAULT,"Button",BS_AUTORADIOBUTTON,13,44,87,10 CONTROL "Use x1.5 Cycle Rate",IDC_EESYNC1,"Button",BS_AUTORADIOBUTTON,13,79,87,10 CONTROL "Use x2 Cycle Rate",IDC_EESYNC2,"Button",BS_AUTORADIOBUTTON,13,113,83,10 - CONTROL "Use x3 Cycle Rate",IDC_EESYNC3,"Button",BS_AUTORADIOBUTTON,13,147,80,10 CONTROL "Enable IOP x2 Cycle Rate",IDC_IOPSYNC,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,174,88,98,10 CONTROL "WaitCycles Sync Hack",IDC_WAITCYCLES,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,174,127,90,10 CONTROL "Escape Hack - Use Esc key to fully exit PCSX2.",IDC_ESCHACK, - "Button",BS_AUTOCHECKBOX | WS_TABSTOP,13,234,180,10 + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,13,238,180,10 DEFPUSHBUTTON "OK",IDOK,217,242,50,14 PUSHBUTTON "Cancel",IDCANCEL,278,242,50,14 CTEXT "These hacks will speed up emulation but reduce emulation compatibility or cause visual errors. If you have problems, disable all these and try again!",IDC_HACKDESC,18,7,286,19 - GROUPBOX "EmotionEngine (EE) Sync Hacks",IDC_STATIC,7,31,159,185 - GROUPBOX "Miscellaneous",IDC_STATIC,7,220,194,33 - LTEXT "Important: X2 and X3 sync hacks *will* cause choppy/skippy audio on many FMV movies.",IDC_STATIC,20,188,137,25 - LTEXT "Known to work well with a couple games, namely Shadow of the Colossus (but breaks most other games).",IDC_STATIC,25,158,133,28 + GROUPBOX "EmotionEngine (EE) Sync Hacks",IDC_STATIC,7,31,159,143 + GROUPBOX "Miscellaneous",IDC_STATIC,7,223,194,33 + LTEXT "Important: the X2 sync hack *will* cause choppy/skippy audio on many FMV movies.",IDC_STATIC,13,149,137,22 LTEXT "Big speedup! Works well with many games.",IDC_STATIC,25,124,125,19 LTEXT "Most compatible option - recommended for everyone with high-end machines.",IDC_STATIC,25,55,136,19 LTEXT "Small speedup and works well with most games.",IDC_STATIC,186,100,134,22 LTEXT "Small speedup. Works well with most games, but may cause certain games to crash or freeze up during bootup or stage changes.",IDC_STATIC,186,139,141,39 LTEXT "Moderate speedup and works well with most games.",IDC_STATIC,25,90,129,19 - CONTROL "INTC Sync Hack (experimental)",IDC_INTCSTATHACK,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,174,43,127,10 + CONTROL "INTC Sync Hack",IDC_INTCSTATHACK,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,174,43,127,10 LTEXT "Huge speedup in many games, and a pretty high compatibility rate (some games still work better with EE sync hacks).",IDC_STATIC,186,55,140,28 - CONTROL "",IDC_VUCYCLE,"msctls_trackbar32",TBS_BOTH | TBS_NOTICKS | WS_TABSTOP,174,176,36,15 - LTEXT "This space intentionally left blank",IDC_VUCYCLEDESC,186,194,142,30 - LTEXT "VU Cycle Stealing (experimental)",IDC_STATIC,210,180,105,8 + CONTROL "",IDC_VUCYCLE,"msctls_trackbar32",TBS_BOTH | TBS_NOTICKS | WS_TABSTOP,13,178,36,15 + LTEXT "This space intentionally left blank",IDC_VUCYCLEDESC,25,196,142,24 + LTEXT "VU Cycle Stealing (experimental)",IDC_STATIC,49,183,105,8 + CONTROL "Idle Loop Fast-Forward (experimental)",IDC_IDLELOOPFF, + "Button",BS_AUTOCHECKBOX | WS_TABSTOP,174,179,139,10 + LTEXT "Speedup for a few games, including FFX with no known side effects. More later.",IDC_STATIC,186,193,142,26 END diff --git a/pcsx2/windows/resource.h b/pcsx2/windows/resource.h index 24e02c3816..e519a3275a 100644 --- a/pcsx2/windows/resource.h +++ b/pcsx2/windows/resource.h @@ -275,6 +275,7 @@ #define IDC_SLIDER1 1327 #define IDC_VUCYCLE 1327 #define IDC_VUCYCLEDESC 1328 +#define IDC_IDLELOOPFF 1330 #define IDC_CPULOG 1500 #define IDC_MEMLOG 1501 #define IDC_HWLOG 1502 @@ -408,7 +409,7 @@ #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 141 #define _APS_NEXT_COMMAND_VALUE 40018 -#define _APS_NEXT_CONTROL_VALUE 1329 +#define _APS_NEXT_CONTROL_VALUE 1331 #define _APS_NEXT_SYMED_VALUE 104 #endif #endif diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 9f79701e7a..1dedcfd624 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -800,7 +800,7 @@ void psxSetBranchImm( u32 imm ) static __forceinline u32 psxScaleBlockCycles() { - return s_psxBlockCycles * (CHECK_IOP_CYCLERATE ? 2 : 1); + return s_psxBlockCycles * (Config.Hacks.IOPCycleDouble ? 2 : 1); } static void iPsxBranchTest(u32 newpc, u32 cpuBranch) diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/iVUzerorec.cpp index 59102f303e..b5fdbc1c54 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/iVUzerorec.cpp @@ -2297,8 +2297,10 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex) VU->cycle += s_TotalVUCycles; //VU cycle stealing hack, 3000 cycle maximum so it doesn't get out of hand - if (s_TotalVUCycles < 3000) cpuRegs.cycle += s_TotalVUCycles * Config.VUCycleHack; - else cpuRegs.cycle += 3000 * Config.VUCycleHack; + if (s_TotalVUCycles < 3000) + cpuRegs.cycle += s_TotalVUCycles * Config.Hacks.VUCycleSteal; + else + cpuRegs.cycle += 3000 * Config.Hacks.VUCycleSteal; if( (int)s_writeQ > 0 ) VU->VI[REG_Q] = VU->q; if( (int)s_writeP > 0 ) { diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index d500f25c75..c36687bda7 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -48,6 +48,8 @@ #include "NakedAsm.h" +using namespace x86Emitter; + using namespace R5900; // used to disable register freezing during cpuBranchTests (registers @@ -92,6 +94,7 @@ static BASEBLOCK* s_pCurBlock = NULL; static BASEBLOCKEX* s_pCurBlockEx = NULL; static u32 s_nEndBlock = 0; // what pc the current block ends static u32 s_nHasDelay = 0; +static bool s_nBlockFF; // save states for branches GPR_reg64 s_saveConstRegs[32]; @@ -1071,7 +1074,7 @@ u32 eeScaleBlockCycles() // caused by sync hacks and such, since games seem to care a lot more about // these small blocks having accurate cycle counts. - if( s_nBlockCycles <= (5<<3) || (CHECK_EE_CYCLERATE == 0) ) + if( s_nBlockCycles <= (5<<3) || (Config.Hacks.EECycleRate == 0) ) return s_nBlockCycles >> 3; uint scalarLow, scalarMid, scalarHigh; @@ -1079,7 +1082,7 @@ u32 eeScaleBlockCycles() // Note: larger blocks get a smaller scalar, to help keep // them from becoming "too fat" and delaying branch tests. - switch( CHECK_EE_CYCLERATE ) + switch( Config.Hacks.EECycleRate ) { case 0: return s_nBlockCycles >> 3; @@ -1147,19 +1150,27 @@ static void iBranchTest(u32 newpc, bool noDispatch) // Equiv code to: // cpuRegs.cycle += blockcycles; // if( cpuRegs.cycle > g_nextBranchCycle ) { DoEvents(); } - MOV32MtoR(EAX, (uptr)&cpuRegs.cycle); - ADD32ItoR(EAX, eeScaleBlockCycles()); - MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles - SUB32MtoR(EAX, (uptr)&g_nextBranchCycle); - if (!noDispatch) { - if (newpc == 0xffffffff) - JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 )); - else - iBranch(newpc, 1); + if (Config.Hacks.IdleLoopFF && s_nBlockFF) { + xMOV(eax, ptr32[&g_nextBranchCycle]); + xADD(ptr32[&cpuRegs.cycle], eeScaleBlockCycles()); + xCMP(eax, ptr32[&cpuRegs.cycle]); + xCMOVL(eax, ptr32[&cpuRegs.cycle]); + xMOV(ptr32[&cpuRegs.cycle], eax); + RET(); + } else { + MOV32MtoR(EAX, (uptr)&cpuRegs.cycle); + ADD32ItoR(EAX, eeScaleBlockCycles()); + MOV32RtoM((uptr)&cpuRegs.cycle, EAX); // update cycles + SUB32MtoR(EAX, (uptr)&g_nextBranchCycle); + if (!noDispatch) { + if (newpc == 0xffffffff) + JS32((uptr)DispatcherReg - ( (uptr)x86Ptr + 6 )); + else + iBranch(newpc, 1); + } + RET(); } - - RET(); } static void checkcodefn() @@ -1355,6 +1366,13 @@ void __fastcall dyna_block_discard(u32 start,u32 sz) Cpu->Clear(start,sz); } +void __fastcall dyna_block_reset(u32 start,u32 sz) +{ + DevCon::WriteLn("dyna_block_reset %08X , count %d", params start,sz); + Cpu->Clear(start & ~0xfffUL, 0x400); + mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL); +} + void recRecompile( const u32 startpc ) { u32 i = 0; @@ -1384,6 +1402,10 @@ void recRecompile( const u32 startpc ) x86Align(16); recPtr = x86Ptr; + s_nBlockFF = false; + if (HWADDR(startpc) == 0x81fc0) + s_nBlockFF = true; + s_pCurBlock = PC_GETBLOCK(startpc); assert(s_pCurBlock->GetFnptr() == (uptr)JITCompile @@ -1695,9 +1717,10 @@ StartRecomp: iDumpBlock(startpc, recPtr); #endif + static u16 manual_page[Ps2MemSize::Base >> 12]; u32 sz=(s_nEndBlock-startpc)>>2; - u32 inpage_ptr=startpc; + u32 inpage_ptr=HWADDR(startpc); u32 inpage_sz=sz*4; while(inpage_sz) @@ -1708,12 +1731,14 @@ StartRecomp: if(PageType!=-1) { - if (PageType==0) + if (PageType==0) { mmap_MarkCountedRamPage(PSM(inpage_ptr),inpage_ptr&~0xFFF); + manual_page[inpage_ptr >> 12] = 0; + } else { - MOV32ItoR(ECX, startpc); - MOV32ItoR(EDX, sz); + MOV32ItoR(ECX, inpage_ptr); + MOV32ItoR(EDX, pgsz); u32 lpc=inpage_ptr; u32 stg=pgsz; @@ -1726,6 +1751,11 @@ StartRecomp: stg-=4; lpc+=4; } + if (startpc != 0x81fc0) { + xADD(ptr16[&manual_page[inpage_ptr >> 12]], 1); + iJccKnownTarget(Jcc_Carry, dyna_block_reset); + } + DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4); } diff --git a/pcsx2/x86/ix86-32/recVTLB.cpp b/pcsx2/x86/ix86-32/recVTLB.cpp index 1f1960eb5d..19e8fe8408 100644 --- a/pcsx2/x86/ix86-32/recVTLB.cpp +++ b/pcsx2/x86/ix86-32/recVTLB.cpp @@ -367,7 +367,7 @@ void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ) } // Shortcut for the INTC_STAT register, which many games like to spin on heavily. - if( (bits == 32) && !CHECK_INTC_STAT_HACK && (paddr == INTC_STAT) ) + if( (bits == 32) && !Config.Hacks.INTCSTATSlow && (paddr == INTC_STAT) ) { MOV32MtoR( EAX, (uptr)&psHu32( INTC_STAT ) ); } From 5744306a8f76ecf70e422d0041c0df0c8ec71e44 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 22 Apr 2009 02:31:31 +0000 Subject: [PATCH 125/143] Linux: Fix compilation, and add in the new hack. I may work on making the dialog look better in a bit... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1041 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Linux/HacksDlg.cpp | 52 ++++----- pcsx2/Linux/Pref.cpp | 4 +- pcsx2/Linux/interface.c | 94 ++++++---------- pcsx2/Linux/pcsx2.glade | 237 ++++++++++++++++----------------------- 4 files changed, 160 insertions(+), 227 deletions(-) diff --git a/pcsx2/Linux/HacksDlg.cpp b/pcsx2/Linux/HacksDlg.cpp index 4bed23966a..a44849531a 100644 --- a/pcsx2/Linux/HacksDlg.cpp +++ b/pcsx2/Linux/HacksDlg.cpp @@ -57,7 +57,7 @@ void on_Speed_Hacks(GtkMenuItem *menuitem, gpointer user_data) { SpeedHacksDlg = create_SpeedHacksDlg(); - switch (CHECK_EE_CYCLERATE) + switch (Config.Hacks.EECycleRate) { case 0: set_checked(SpeedHacksDlg, "check_default_cycle_rate", true); @@ -68,20 +68,17 @@ void on_Speed_Hacks(GtkMenuItem *menuitem, gpointer user_data) case 2: set_checked(SpeedHacksDlg, "check_2_cycle_rate", true); break; - case 3: - set_checked(SpeedHacksDlg, "check_3_cycle_rate", true); - break; default: set_checked(SpeedHacksDlg, "check_default_cycle_rate", true); break; } + + set_checked(SpeedHacksDlg, "check_iop_cycle_rate", Config.Hacks.IOPCycleDouble); + set_checked(SpeedHacksDlg, "check_wait_cycles_sync_hack", Config.Hacks.WaitCycleExt); + set_checked(SpeedHacksDlg, "check_intc_sync_hack", Config.Hacks.INTCSTATSlow); + set_checked(SpeedHacksDlg, "check_idle_loop_fastforward", Config.Hacks.IdleLoopFF); - set_checked(SpeedHacksDlg, "check_iop_cycle_rate", CHECK_IOP_CYCLERATE); - set_checked(SpeedHacksDlg, "check_wait_cycles_sync_hack", CHECK_WAITCYCLE_HACK); - set_checked(SpeedHacksDlg, "check_intc_sync_hack", CHECK_INTC_STAT_HACK); - set_checked(SpeedHacksDlg, "check_ESC_hack", CHECK_ESCAPE_HACK); - - gtk_range_set_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale")), Config.VUCycleHack); + gtk_range_set_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale")), Config.Hacks.VUCycleSteal); gtk_widget_show_all(SpeedHacksDlg); gtk_widget_set_sensitive(MainWindow, FALSE); gtk_main(); @@ -89,25 +86,30 @@ void on_Speed_Hacks(GtkMenuItem *menuitem, gpointer user_data) void on_Speed_Hack_OK(GtkButton *button, gpointer user_data) { - Config.Hacks = 0; + PcsxConfig::Hacks_t newhacks; + newhacks.EECycleRate = 0; if is_checked(SpeedHacksDlg, "check_default_cycle_rate") - Config.Hacks = 0; + newhacks.EECycleRate = 0; else if is_checked(SpeedHacksDlg, "check_1_5_cycle_rate") - Config.Hacks = 1; + newhacks.EECycleRate = 1; else if is_checked(SpeedHacksDlg, "check_2_cycle_rate") - Config.Hacks = 2; - else if is_checked(SpeedHacksDlg, "check_3_cycle_rate") - Config.Hacks = 3; - - Config.Hacks |= is_checked(SpeedHacksDlg, "check_iop_cycle_rate") << 3; - Config.Hacks |= is_checked(SpeedHacksDlg, "check_wait_cycles_sync_hack") << 4; - Config.Hacks |= is_checked(SpeedHacksDlg, "check_intc_sync_hack") << 5; - Config.Hacks |= is_checked(SpeedHacksDlg, "check_ESC_hack") << 10; - - Config.VUCycleHack = gtk_range_get_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale"))); - SaveConfig(); - + newhacks.EECycleRate = 2; + + newhacks.IOPCycleDouble = is_checked(SpeedHacksDlg, "check_iop_cycle_rate"); + newhacks.WaitCycleExt = is_checked(SpeedHacksDlg, "check_wait_cycles_sync_hack"); + newhacks.INTCSTATSlow = is_checked(SpeedHacksDlg, "check_intc_sync_hack"); + newhacks.IdleLoopFF = is_checked(SpeedHacksDlg, "check_idle_loop_fastforward"); + + newhacks.VUCycleSteal = gtk_range_get_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale"))); + + if (memcmp(&newhacks, &Config.Hacks, sizeof(newhacks))) + { + SysRestorableReset(); + Config.Hacks = newhacks; + SaveConfig(); + } + gtk_widget_destroy(SpeedHacksDlg); gtk_widget_set_sensitive(MainWindow, TRUE); gtk_main_quit(); diff --git a/pcsx2/Linux/Pref.cpp b/pcsx2/Linux/Pref.cpp index 02fb3ede3f..7db9dd2d4f 100644 --- a/pcsx2/Linux/Pref.cpp +++ b/pcsx2/Linux/Pref.cpp @@ -111,8 +111,8 @@ int LoadConfig() GetValuel("IdleLoopFF", Config.Hacks.IdleLoopFF); GetValuel("ESCExits", Config.Hacks.ESCExits); - if (Config.VUCycleHack < 0 || Config.VUCycleHack > 4) - Config.VUCycleHack = 0; + if (Config.Hacks.VUCycleSteal < 0 || Config.Hacks.VUCycleSteal > 4) + Config.Hacks.VUCycleSteal = 0; GetValuel("Fixes", Config.GameFixes); GetValuel("CustomFps", Config.CustomFps); diff --git a/pcsx2/Linux/interface.c b/pcsx2/Linux/interface.c index 843e490537..9872687d1a 100644 --- a/pcsx2/Linux/interface.c +++ b/pcsx2/Linux/interface.c @@ -625,9 +625,6 @@ create_SpeedHacksDlg (void) GtkWidget *label93; GtkWidget *check_2_cycle_rate; GtkWidget *label94; - GtkWidget *check_3_cycle_rate; - GtkWidget *label95; - GtkWidget *hseparator1; GtkWidget *label91; GtkWidget *label105; GtkWidget *vbox60; @@ -638,12 +635,12 @@ create_SpeedHacksDlg (void) GtkWidget *check_intc_sync_hack; GtkWidget *label101; GtkWidget *vbox71; + GtkWidget *check_idle_loop_fastforward; + GtkWidget *label110; + GtkWidget *hseparator1; + GtkWidget *label109; GtkWidget *VUCycleHackScale; GtkWidget *label108; - GtkWidget *frame36; - GtkWidget *alignment31; - GtkWidget *check_ESC_hack; - GtkWidget *label89; GtkWidget *dialog_action_area3; GtkWidget *button99; GtkWidget *button98; @@ -730,26 +727,7 @@ create_SpeedHacksDlg (void) gtk_label_set_line_wrap (GTK_LABEL (label94), TRUE); gtk_misc_set_alignment (GTK_MISC (label94), 0.36, 0.5); - check_3_cycle_rate = gtk_radio_button_new_with_mnemonic (NULL, _("Use x3 Cycle Rate")); - gtk_widget_set_name (check_3_cycle_rate, "check_3_cycle_rate"); - gtk_widget_show (check_3_cycle_rate); - gtk_box_pack_start (GTK_BOX (vbox61), check_3_cycle_rate, FALSE, FALSE, 0); - gtk_radio_button_set_group (GTK_RADIO_BUTTON (check_3_cycle_rate), check_default_cycle_rate_group); - check_default_cycle_rate_group = gtk_radio_button_get_group (GTK_RADIO_BUTTON (check_3_cycle_rate)); - - label95 = gtk_label_new (_("Big speedup, but causes flickering or missing geometry on many games.")); - gtk_widget_set_name (label95, "label95"); - gtk_widget_show (label95); - gtk_box_pack_start (GTK_BOX (vbox61), label95, FALSE, FALSE, 0); - gtk_label_set_line_wrap (GTK_LABEL (label95), TRUE); - gtk_misc_set_alignment (GTK_MISC (label95), 0.24, 0.5); - - hseparator1 = gtk_hseparator_new (); - gtk_widget_set_name (hseparator1, "hseparator1"); - gtk_widget_show (hseparator1); - gtk_box_pack_start (GTK_BOX (vbox61), hseparator1, FALSE, FALSE, 0); - - label91 = gtk_label_new (_("Important: X2 & X3 sync hacks *will* cause choppy/skippy audio on many FMV movies.\nKnown to work well with a couple games, namely Shadow of the Colossus (but breaks most other games).")); + label91 = gtk_label_new (_("Important: the X2 sync hack *will* cause choppy/skippy audio on many FMV movies.")); gtk_widget_set_name (label91, "label91"); gtk_widget_show (label91); gtk_box_pack_start (GTK_BOX (vbox61), label91, FALSE, FALSE, 0); @@ -788,7 +766,7 @@ create_SpeedHacksDlg (void) gtk_box_pack_start (GTK_BOX (vbox60), label97, FALSE, FALSE, 0); gtk_label_set_line_wrap (GTK_LABEL (label97), TRUE); - check_intc_sync_hack = gtk_check_button_new_with_mnemonic (_("INTC Sync Hack (experimental)")); + check_intc_sync_hack = gtk_check_button_new_with_mnemonic (_("INTC Sync Hack")); gtk_widget_set_name (check_intc_sync_hack, "check_intc_sync_hack"); gtk_widget_show (check_intc_sync_hack); gtk_box_pack_start (GTK_BOX (vbox60), check_intc_sync_hack, FALSE, FALSE, 0); @@ -804,38 +782,37 @@ create_SpeedHacksDlg (void) gtk_widget_show (vbox71); gtk_box_pack_start (GTK_BOX (vbox60), vbox71, TRUE, TRUE, 0); + check_idle_loop_fastforward = gtk_check_button_new_with_mnemonic (_("Idle Loop Fast-Forward (experimental)")); + gtk_widget_set_name (check_idle_loop_fastforward, "check_idle_loop_fastforward"); + gtk_widget_show (check_idle_loop_fastforward); + gtk_box_pack_start (GTK_BOX (vbox71), check_idle_loop_fastforward, FALSE, FALSE, 0); + + label110 = gtk_label_new (_("Speedup for a few games, including FFX with no known side effects. More later.")); + gtk_widget_set_name (label110, "label110"); + gtk_widget_show (label110); + gtk_box_pack_start (GTK_BOX (vbox71), label110, FALSE, FALSE, 0); + + hseparator1 = gtk_hseparator_new (); + gtk_widget_set_name (hseparator1, "hseparator1"); + gtk_widget_show (hseparator1); + gtk_box_pack_start (GTK_BOX (vbox60), hseparator1, FALSE, FALSE, 0); + + label109 = gtk_label_new (_("VU Cycle Stealing (Speedup for 3d geometry)")); + gtk_widget_set_name (label109, "label109"); + gtk_widget_show (label109); + gtk_box_pack_start (GTK_BOX (vbox60), label109, FALSE, FALSE, 0); + VUCycleHackScale = gtk_hscale_new (GTK_ADJUSTMENT (gtk_adjustment_new (0, 0, 4, 1, 0, 0))); gtk_widget_set_name (VUCycleHackScale, "VUCycleHackScale"); gtk_widget_show (VUCycleHackScale); - gtk_box_pack_start (GTK_BOX (vbox71), VUCycleHackScale, TRUE, TRUE, 0); + gtk_box_pack_start (GTK_BOX (vbox60), VUCycleHackScale, TRUE, TRUE, 0); gtk_scale_set_digits (GTK_SCALE (VUCycleHackScale), 0); - label108 = gtk_label_new (_("Speedup for 3D games.\n0: No speedup.\n1: Slight speedup for 3D geometry, should work with most games.\n2: Moderate speedup for 3D geometry, should work with most games with minor problems.\n3: Large speedup for 3D geometry, may break many games and make others skip frames.\n4: Very large speedup for 3D geometry, will break games in interesting ways.")); + label108 = gtk_label_new (_("0: No speedup.\n1: Slight speedup, should work with most games.\n2: Moderate speedup, should work with most games with minor problems.\n3: Large speedup, may break many games and make others skip frames.\n4: Very large speedup, will break games in interesting ways.")); gtk_widget_set_name (label108, "label108"); gtk_widget_show (label108); - gtk_box_pack_start (GTK_BOX (vbox71), label108, FALSE, FALSE, 0); - - frame36 = gtk_frame_new (NULL); - gtk_widget_set_name (frame36, "frame36"); - gtk_widget_show (frame36); - gtk_box_pack_start (GTK_BOX (vbox59), frame36, FALSE, FALSE, 0); - - alignment31 = gtk_alignment_new (0.5, 0.5, 1, 1); - gtk_widget_set_name (alignment31, "alignment31"); - gtk_widget_show (alignment31); - gtk_container_add (GTK_CONTAINER (frame36), alignment31); - gtk_alignment_set_padding (GTK_ALIGNMENT (alignment31), 0, 0, 12, 0); - - check_ESC_hack = gtk_check_button_new_with_mnemonic (_("Escape Hack - Use Esc key to fully exit PCSX2.")); - gtk_widget_set_name (check_ESC_hack, "check_ESC_hack"); - gtk_widget_show (check_ESC_hack); - gtk_container_add (GTK_CONTAINER (alignment31), check_ESC_hack); - - label89 = gtk_label_new (_("Miscellaneous")); - gtk_widget_set_name (label89, "label89"); - gtk_widget_show (label89); - gtk_frame_set_label_widget (GTK_FRAME (frame36), label89); - gtk_label_set_use_markup (GTK_LABEL (label89), TRUE); + gtk_box_pack_start (GTK_BOX (vbox60), label108, FALSE, FALSE, 0); + gtk_label_set_line_wrap (GTK_LABEL (label108), TRUE); dialog_action_area3 = GTK_DIALOG (SpeedHacksDlg)->action_area; gtk_widget_set_name (dialog_action_area3, "dialog_action_area3"); @@ -876,9 +853,6 @@ create_SpeedHacksDlg (void) GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label93, "label93"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_2_cycle_rate, "check_2_cycle_rate"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label94, "label94"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_3_cycle_rate, "check_3_cycle_rate"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label95, "label95"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, hseparator1, "hseparator1"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label91, "label91"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label105, "label105"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vbox60, "vbox60"); @@ -889,12 +863,12 @@ create_SpeedHacksDlg (void) GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_intc_sync_hack, "check_intc_sync_hack"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label101, "label101"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vbox71, "vbox71"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_idle_loop_fastforward, "check_idle_loop_fastforward"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label110, "label110"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, hseparator1, "hseparator1"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label109, "label109"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, VUCycleHackScale, "VUCycleHackScale"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label108, "label108"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, frame36, "frame36"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, alignment31, "alignment31"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_ESC_hack, "check_ESC_hack"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label89, "label89"); GLADE_HOOKUP_OBJECT_NO_REF (SpeedHacksDlg, dialog_action_area3, "dialog_action_area3"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, button99, "button99"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, button98, "button98"); diff --git a/pcsx2/Linux/pcsx2.glade b/pcsx2/Linux/pcsx2.glade index b7e95d4d91..40e23c19e7 100644 --- a/pcsx2/Linux/pcsx2.glade +++ b/pcsx2/Linux/pcsx2.glade @@ -1433,67 +1433,10 @@ If you have problems, Disable all of these and try again. - - - True - True - Use x3 Cycle Rate - True - GTK_RELIEF_NORMAL - True - False - False - True - check_default_cycle_rate - - - 0 - False - False - - - - - - True - Big speedup, but causes flickering or missing geometry on many games. - False - False - GTK_JUSTIFY_LEFT - True - False - 0.239999994636 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 - - - 0 - False - False - - - - - - True - - - 0 - False - False - - - True - Important: X2 & X3 sync hacks *will* cause choppy/skippy audio on many FMV movies. -Known to work well with a couple games, namely Shadow of the Colossus (but breaks most other games). + Important: the X2 sync hack *will* cause choppy/skippy audio on many FMV movies. False False GTK_JUSTIFY_LEFT @@ -1649,7 +1592,7 @@ Known to work well with a couple games, namely Shadow of the Colossus (but break True True - INTC Sync Hack (experimental) + INTC Sync Hack True GTK_RELIEF_NORMAL True @@ -1696,32 +1639,28 @@ Known to work well with a couple games, namely Shadow of the Colossus (but break 0 - + True True - True - GTK_POS_TOP - 0 - GTK_UPDATE_CONTINUOUS - False - 0 0 4 1 0 0 + Idle Loop Fast-Forward (experimental) + True + GTK_RELIEF_NORMAL + True + False + False + True 0 - True - True + False + False - + True - Speedup for 3D games. -0: No speedup. -1: Slight speedup for 3D geometry, should work with most games. -2: Moderate speedup for 3D geometry, should work with most games with minor problems. -3: Large speedup for 3D geometry, may break many games and make others skip frames. -4: Very large speedup for 3D geometry, will break games in interesting ways. + Speedup for a few games, including FFX with no known side effects. More later. False False GTK_JUSTIFY_LEFT @@ -1749,6 +1688,89 @@ Known to work well with a couple games, namely Shadow of the Colossus (but break True + + + + True + + + 0 + False + False + + + + + + True + VU Cycle Stealing (Speedup for 3d geometry) + False + False + GTK_JUSTIFY_LEFT + False + False + 0.5 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + 0 + False + False + + + + + + True + True + True + GTK_POS_TOP + 0 + GTK_UPDATE_CONTINUOUS + False + 0 0 4 1 0 0 + + + 0 + True + True + + + + + + True + 0: No speedup. +1: Slight speedup, should work with most games. +2: Moderate speedup, should work with most games with minor problems. +3: Large speedup, may break many games and make others skip frames. +4: Very large speedup, will break games in interesting ways. + False + False + GTK_JUSTIFY_LEFT + True + False + 0.5 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + 0 + False + False + + 0 @@ -1763,71 +1785,6 @@ Known to work well with a couple games, namely Shadow of the Colossus (but break True - - - - True - 0 - 0.5 - GTK_SHADOW_ETCHED_IN - - - - True - 0.5 - 0.5 - 1 - 1 - 0 - 0 - 12 - 0 - - - - True - True - Escape Hack - Use Esc key to fully exit PCSX2. - True - GTK_RELIEF_NORMAL - True - False - False - True - - - - - - - - True - <b>Miscellaneous</b> - False - True - GTK_JUSTIFY_LEFT - False - False - 0.5 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 - - - label_item - - - - - 0 - False - False - - 0 From 9380ccc85bf81d40673192af03a324550d8be283 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 22 Apr 2009 03:19:30 +0000 Subject: [PATCH 126/143] Linux: Clean up the hack dialog in Linux. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1042 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Linux/HacksDlg.cpp | 41 ++-- pcsx2/Linux/Linux.h | 15 ++ pcsx2/Linux/callbacks.h | 8 + pcsx2/Linux/interface.c | 159 ++++++------- pcsx2/Linux/pcsx2.glade | 475 ++++++++++++++++++--------------------- 5 files changed, 344 insertions(+), 354 deletions(-) diff --git a/pcsx2/Linux/HacksDlg.cpp b/pcsx2/Linux/HacksDlg.cpp index a44849531a..90b83f8b48 100644 --- a/pcsx2/Linux/HacksDlg.cpp +++ b/pcsx2/Linux/HacksDlg.cpp @@ -56,22 +56,6 @@ void on_Game_Fix_OK(GtkButton *button, gpointer user_data) void on_Speed_Hacks(GtkMenuItem *menuitem, gpointer user_data) { SpeedHacksDlg = create_SpeedHacksDlg(); - - switch (Config.Hacks.EECycleRate) - { - case 0: - set_checked(SpeedHacksDlg, "check_default_cycle_rate", true); - break; - case 1: - set_checked(SpeedHacksDlg, "check_1_5_cycle_rate", true); - break; - case 2: - set_checked(SpeedHacksDlg, "check_2_cycle_rate", true); - break; - default: - set_checked(SpeedHacksDlg, "check_default_cycle_rate", true); - break; - } set_checked(SpeedHacksDlg, "check_iop_cycle_rate", Config.Hacks.IOPCycleDouble); set_checked(SpeedHacksDlg, "check_wait_cycles_sync_hack", Config.Hacks.WaitCycleExt); @@ -79,22 +63,32 @@ void on_Speed_Hacks(GtkMenuItem *menuitem, gpointer user_data) set_checked(SpeedHacksDlg, "check_idle_loop_fastforward", Config.Hacks.IdleLoopFF); gtk_range_set_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale")), Config.Hacks.VUCycleSteal); + gtk_range_set_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "EECycleHackScale")), Config.Hacks.EECycleRate); gtk_widget_show_all(SpeedHacksDlg); gtk_widget_set_sensitive(MainWindow, FALSE); gtk_main(); } +void on_vu_slider_changed(GtkRange *range, gpointer user_data) +{ + int i; + + i = gtk_range_get_value(range); + gtk_label_set_text(GTK_LABEL(lookup_widget(SpeedHacksDlg,"vu_cycle_stealing_label")),vu_stealing_labels[i]); +} + +void on_ee_slider_changed(GtkRange *range, gpointer user_data) +{ + int i; + + i = gtk_range_get_value(range); + gtk_label_set_text(GTK_LABEL(lookup_widget(SpeedHacksDlg,"ee_cycle_label")),ee_cycle_labels[i]); +} + void on_Speed_Hack_OK(GtkButton *button, gpointer user_data) { PcsxConfig::Hacks_t newhacks; newhacks.EECycleRate = 0; - - if is_checked(SpeedHacksDlg, "check_default_cycle_rate") - newhacks.EECycleRate = 0; - else if is_checked(SpeedHacksDlg, "check_1_5_cycle_rate") - newhacks.EECycleRate = 1; - else if is_checked(SpeedHacksDlg, "check_2_cycle_rate") - newhacks.EECycleRate = 2; newhacks.IOPCycleDouble = is_checked(SpeedHacksDlg, "check_iop_cycle_rate"); newhacks.WaitCycleExt = is_checked(SpeedHacksDlg, "check_wait_cycles_sync_hack"); @@ -102,6 +96,7 @@ void on_Speed_Hack_OK(GtkButton *button, gpointer user_data) newhacks.IdleLoopFF = is_checked(SpeedHacksDlg, "check_idle_loop_fastforward"); newhacks.VUCycleSteal = gtk_range_get_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale"))); + newhacks.EECycleRate = gtk_range_get_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "EECycleHackScale"))); if (memcmp(&newhacks, &Config.Hacks, sizeof(newhacks))) { diff --git a/pcsx2/Linux/Linux.h b/pcsx2/Linux/Linux.h index 5ca6f7f9f8..e1cf6b6272 100644 --- a/pcsx2/Linux/Linux.h +++ b/pcsx2/Linux/Linux.h @@ -101,6 +101,21 @@ char iop_log_names[9][32] = "GPU Log" }; +char vu_stealing_labels[5][256] = +{ + "0: No speedup.", + "1: Slight speedup, should work with most games.", + "2: Moderate speedup, should work with most games with minor problems.", + "3: Large speedup, may break many games and make others skip frames.", + "4: Very large speedup, will break games in interesting ways." +}; + +char ee_cycle_labels[3][256] = +{ + "Default Cycle Rate: Most compatible option - recommended for everyone with high-end machines.", + "x1.5 Cycle Rate: Moderate speedup, and works well with most games.", + "x2 Cycle Rate: Big speedup! Works well with many games." +}; //Tri-Ace - IDC_GAMEFIX2 #define FLAG_VU_ADD_SUB 0x1 // Persona3/4 - IDC_GAMEFIX4 diff --git a/pcsx2/Linux/callbacks.h b/pcsx2/Linux/callbacks.h index a70ca55935..771418a3ca 100644 --- a/pcsx2/Linux/callbacks.h +++ b/pcsx2/Linux/callbacks.h @@ -13,6 +13,14 @@ void On_Dialog_Cancelled (GtkButton *button, gpointer user_data); +void +on_ee_slider_changed (GtkRange *range, + gpointer user_data); + +void +on_vu_slider_changed (GtkRange *range, + gpointer user_data); + void on_Speed_Hack_OK (GtkButton *button, gpointer user_data); diff --git a/pcsx2/Linux/interface.c b/pcsx2/Linux/interface.c index 9872687d1a..035bc3fe03 100644 --- a/pcsx2/Linux/interface.c +++ b/pcsx2/Linux/interface.c @@ -615,18 +615,21 @@ create_SpeedHacksDlg (void) GtkWidget *vbox59; GtkWidget *label88; GtkWidget *hbox39; + GtkWidget *vbox72; GtkWidget *frame37; GtkWidget *alignment32; GtkWidget *vbox61; - GtkWidget *check_default_cycle_rate; - GSList *check_default_cycle_rate_group = NULL; - GtkWidget *label98; - GtkWidget *check_1_5_cycle_rate; - GtkWidget *label93; - GtkWidget *check_2_cycle_rate; - GtkWidget *label94; + GtkWidget *EECycleHackScale; + GtkWidget *ee_cycle_label; + GtkWidget *hseparator2; GtkWidget *label91; GtkWidget *label105; + GtkWidget *frame39; + GtkWidget *alignment34; + GtkWidget *vbox73; + GtkWidget *VUCycleHackScale; + GtkWidget *vu_cycle_stealing_label; + GtkWidget *label111; GtkWidget *vbox60; GtkWidget *check_iop_cycle_rate; GtkWidget *label96; @@ -638,9 +641,6 @@ create_SpeedHacksDlg (void) GtkWidget *check_idle_loop_fastforward; GtkWidget *label110; GtkWidget *hseparator1; - GtkWidget *label109; - GtkWidget *VUCycleHackScale; - GtkWidget *label108; GtkWidget *dialog_action_area3; GtkWidget *button99; GtkWidget *button98; @@ -669,10 +669,15 @@ create_SpeedHacksDlg (void) gtk_widget_show (hbox39); gtk_box_pack_start (GTK_BOX (vbox59), hbox39, TRUE, TRUE, 0); + vbox72 = gtk_vbox_new (FALSE, 0); + gtk_widget_set_name (vbox72, "vbox72"); + gtk_widget_show (vbox72); + gtk_box_pack_start (GTK_BOX (hbox39), vbox72, TRUE, TRUE, 0); + frame37 = gtk_frame_new (NULL); gtk_widget_set_name (frame37, "frame37"); gtk_widget_show (frame37); - gtk_box_pack_start (GTK_BOX (hbox39), frame37, TRUE, TRUE, 0); + gtk_box_pack_start (GTK_BOX (vbox72), frame37, TRUE, TRUE, 0); alignment32 = gtk_alignment_new (0.5, 0.5, 1, 1); gtk_widget_set_name (alignment32, "alignment32"); @@ -685,47 +690,23 @@ create_SpeedHacksDlg (void) gtk_widget_show (vbox61); gtk_container_add (GTK_CONTAINER (alignment32), vbox61); - check_default_cycle_rate = gtk_radio_button_new_with_mnemonic (NULL, _("Default Cycle Rate")); - gtk_widget_set_name (check_default_cycle_rate, "check_default_cycle_rate"); - gtk_widget_show (check_default_cycle_rate); - gtk_box_pack_start (GTK_BOX (vbox61), check_default_cycle_rate, FALSE, FALSE, 0); - gtk_radio_button_set_group (GTK_RADIO_BUTTON (check_default_cycle_rate), check_default_cycle_rate_group); - check_default_cycle_rate_group = gtk_radio_button_get_group (GTK_RADIO_BUTTON (check_default_cycle_rate)); + EECycleHackScale = gtk_hscale_new (GTK_ADJUSTMENT (gtk_adjustment_new (0, 0, 2, 1, 0, 0))); + gtk_widget_set_name (EECycleHackScale, "EECycleHackScale"); + gtk_widget_show (EECycleHackScale); + gtk_box_pack_start (GTK_BOX (vbox61), EECycleHackScale, FALSE, FALSE, 0); + gtk_scale_set_draw_value (GTK_SCALE (EECycleHackScale), FALSE); + gtk_scale_set_digits (GTK_SCALE (EECycleHackScale), 0); - label98 = gtk_label_new (_("Most compatible option - recommended for everyone with high-end machines.")); - gtk_widget_set_name (label98, "label98"); - gtk_widget_show (label98); - gtk_box_pack_start (GTK_BOX (vbox61), label98, FALSE, FALSE, 0); - gtk_label_set_line_wrap (GTK_LABEL (label98), TRUE); - gtk_misc_set_alignment (GTK_MISC (label98), 0.29, 0.5); + ee_cycle_label = gtk_label_new (_("Most compatible option - recommended for everyone with high-end machines.")); + gtk_widget_set_name (ee_cycle_label, "ee_cycle_label"); + gtk_widget_show (ee_cycle_label); + gtk_box_pack_start (GTK_BOX (vbox61), ee_cycle_label, FALSE, FALSE, 0); + gtk_label_set_line_wrap (GTK_LABEL (ee_cycle_label), TRUE); - check_1_5_cycle_rate = gtk_radio_button_new_with_mnemonic (NULL, _("Use x1.5 Cycle Rate")); - gtk_widget_set_name (check_1_5_cycle_rate, "check_1_5_cycle_rate"); - gtk_widget_show (check_1_5_cycle_rate); - gtk_box_pack_start (GTK_BOX (vbox61), check_1_5_cycle_rate, FALSE, FALSE, 0); - gtk_radio_button_set_group (GTK_RADIO_BUTTON (check_1_5_cycle_rate), check_default_cycle_rate_group); - check_default_cycle_rate_group = gtk_radio_button_get_group (GTK_RADIO_BUTTON (check_1_5_cycle_rate)); - - label93 = gtk_label_new (_("Moderate speedup, and works well with most games.")); - gtk_widget_set_name (label93, "label93"); - gtk_widget_show (label93); - gtk_box_pack_start (GTK_BOX (vbox61), label93, FALSE, FALSE, 0); - gtk_label_set_line_wrap (GTK_LABEL (label93), TRUE); - gtk_misc_set_alignment (GTK_MISC (label93), 0.29, 0.5); - - check_2_cycle_rate = gtk_radio_button_new_with_mnemonic (NULL, _("Use x2 Cycle Rate")); - gtk_widget_set_name (check_2_cycle_rate, "check_2_cycle_rate"); - gtk_widget_show (check_2_cycle_rate); - gtk_box_pack_start (GTK_BOX (vbox61), check_2_cycle_rate, FALSE, FALSE, 0); - gtk_radio_button_set_group (GTK_RADIO_BUTTON (check_2_cycle_rate), check_default_cycle_rate_group); - check_default_cycle_rate_group = gtk_radio_button_get_group (GTK_RADIO_BUTTON (check_2_cycle_rate)); - - label94 = gtk_label_new (_("Big speedup! Works well with many games.")); - gtk_widget_set_name (label94, "label94"); - gtk_widget_show (label94); - gtk_box_pack_start (GTK_BOX (vbox61), label94, FALSE, FALSE, 0); - gtk_label_set_line_wrap (GTK_LABEL (label94), TRUE); - gtk_misc_set_alignment (GTK_MISC (label94), 0.36, 0.5); + hseparator2 = gtk_hseparator_new (); + gtk_widget_set_name (hseparator2, "hseparator2"); + gtk_widget_show (hseparator2); + gtk_box_pack_start (GTK_BOX (vbox61), hseparator2, FALSE, FALSE, 0); label91 = gtk_label_new (_("Important: the X2 sync hack *will* cause choppy/skippy audio on many FMV movies.")); gtk_widget_set_name (label91, "label91"); @@ -739,6 +720,41 @@ create_SpeedHacksDlg (void) gtk_frame_set_label_widget (GTK_FRAME (frame37), label105); gtk_label_set_use_markup (GTK_LABEL (label105), TRUE); + frame39 = gtk_frame_new (NULL); + gtk_widget_set_name (frame39, "frame39"); + gtk_widget_show (frame39); + gtk_box_pack_start (GTK_BOX (vbox72), frame39, TRUE, TRUE, 0); + + alignment34 = gtk_alignment_new (0.5, 0.5, 1, 1); + gtk_widget_set_name (alignment34, "alignment34"); + gtk_widget_show (alignment34); + gtk_container_add (GTK_CONTAINER (frame39), alignment34); + gtk_alignment_set_padding (GTK_ALIGNMENT (alignment34), 0, 0, 12, 0); + + vbox73 = gtk_vbox_new (FALSE, 0); + gtk_widget_set_name (vbox73, "vbox73"); + gtk_widget_show (vbox73); + gtk_container_add (GTK_CONTAINER (alignment34), vbox73); + + VUCycleHackScale = gtk_hscale_new (GTK_ADJUSTMENT (gtk_adjustment_new (0, 0, 4, 1, 0, 0))); + gtk_widget_set_name (VUCycleHackScale, "VUCycleHackScale"); + gtk_widget_show (VUCycleHackScale); + gtk_box_pack_start (GTK_BOX (vbox73), VUCycleHackScale, TRUE, TRUE, 0); + gtk_scale_set_draw_value (GTK_SCALE (VUCycleHackScale), FALSE); + gtk_scale_set_digits (GTK_SCALE (VUCycleHackScale), 0); + + vu_cycle_stealing_label = gtk_label_new (_("2: Moderate speedup, should work with most games with minor problems.")); + gtk_widget_set_name (vu_cycle_stealing_label, "vu_cycle_stealing_label"); + gtk_widget_show (vu_cycle_stealing_label); + gtk_box_pack_start (GTK_BOX (vbox73), vu_cycle_stealing_label, FALSE, FALSE, 0); + gtk_label_set_line_wrap (GTK_LABEL (vu_cycle_stealing_label), TRUE); + + label111 = gtk_label_new (_("VU Cycle Stealing (Speedup for 3d geometry)")); + gtk_widget_set_name (label111, "label111"); + gtk_widget_show (label111); + gtk_frame_set_label_widget (GTK_FRAME (frame39), label111); + gtk_label_set_use_markup (GTK_LABEL (label111), TRUE); + vbox60 = gtk_vbox_new (FALSE, 0); gtk_widget_set_name (vbox60, "vbox60"); gtk_widget_show (vbox60); @@ -791,29 +807,13 @@ create_SpeedHacksDlg (void) gtk_widget_set_name (label110, "label110"); gtk_widget_show (label110); gtk_box_pack_start (GTK_BOX (vbox71), label110, FALSE, FALSE, 0); + gtk_label_set_line_wrap (GTK_LABEL (label110), TRUE); hseparator1 = gtk_hseparator_new (); gtk_widget_set_name (hseparator1, "hseparator1"); gtk_widget_show (hseparator1); gtk_box_pack_start (GTK_BOX (vbox60), hseparator1, FALSE, FALSE, 0); - label109 = gtk_label_new (_("VU Cycle Stealing (Speedup for 3d geometry)")); - gtk_widget_set_name (label109, "label109"); - gtk_widget_show (label109); - gtk_box_pack_start (GTK_BOX (vbox60), label109, FALSE, FALSE, 0); - - VUCycleHackScale = gtk_hscale_new (GTK_ADJUSTMENT (gtk_adjustment_new (0, 0, 4, 1, 0, 0))); - gtk_widget_set_name (VUCycleHackScale, "VUCycleHackScale"); - gtk_widget_show (VUCycleHackScale); - gtk_box_pack_start (GTK_BOX (vbox60), VUCycleHackScale, TRUE, TRUE, 0); - gtk_scale_set_digits (GTK_SCALE (VUCycleHackScale), 0); - - label108 = gtk_label_new (_("0: No speedup.\n1: Slight speedup, should work with most games.\n2: Moderate speedup, should work with most games with minor problems.\n3: Large speedup, may break many games and make others skip frames.\n4: Very large speedup, will break games in interesting ways.")); - gtk_widget_set_name (label108, "label108"); - gtk_widget_show (label108); - gtk_box_pack_start (GTK_BOX (vbox60), label108, FALSE, FALSE, 0); - gtk_label_set_line_wrap (GTK_LABEL (label108), TRUE); - dialog_action_area3 = GTK_DIALOG (SpeedHacksDlg)->action_area; gtk_widget_set_name (dialog_action_area3, "dialog_action_area3"); gtk_widget_show (dialog_action_area3); @@ -831,6 +831,12 @@ create_SpeedHacksDlg (void) gtk_dialog_add_action_widget (GTK_DIALOG (SpeedHacksDlg), button98, GTK_RESPONSE_CANCEL); GTK_WIDGET_SET_FLAGS (button98, GTK_CAN_DEFAULT); + g_signal_connect ((gpointer) EECycleHackScale, "value_changed", + G_CALLBACK (on_ee_slider_changed), + NULL); + g_signal_connect ((gpointer) VUCycleHackScale, "value_changed", + G_CALLBACK (on_vu_slider_changed), + NULL); g_signal_connect ((gpointer) button99, "clicked", G_CALLBACK (on_Speed_Hack_OK), NULL); @@ -844,17 +850,21 @@ create_SpeedHacksDlg (void) GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vbox59, "vbox59"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label88, "label88"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, hbox39, "hbox39"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vbox72, "vbox72"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, frame37, "frame37"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, alignment32, "alignment32"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vbox61, "vbox61"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_default_cycle_rate, "check_default_cycle_rate"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label98, "label98"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_1_5_cycle_rate, "check_1_5_cycle_rate"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label93, "label93"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_2_cycle_rate, "check_2_cycle_rate"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label94, "label94"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, EECycleHackScale, "EECycleHackScale"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, ee_cycle_label, "ee_cycle_label"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, hseparator2, "hseparator2"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label91, "label91"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label105, "label105"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, frame39, "frame39"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, alignment34, "alignment34"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vbox73, "vbox73"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, VUCycleHackScale, "VUCycleHackScale"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vu_cycle_stealing_label, "vu_cycle_stealing_label"); + GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label111, "label111"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, vbox60, "vbox60"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_iop_cycle_rate, "check_iop_cycle_rate"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label96, "label96"); @@ -866,9 +876,6 @@ create_SpeedHacksDlg (void) GLADE_HOOKUP_OBJECT (SpeedHacksDlg, check_idle_loop_fastforward, "check_idle_loop_fastforward"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label110, "label110"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, hseparator1, "hseparator1"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label109, "label109"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, VUCycleHackScale, "VUCycleHackScale"); - GLADE_HOOKUP_OBJECT (SpeedHacksDlg, label108, "label108"); GLADE_HOOKUP_OBJECT_NO_REF (SpeedHacksDlg, dialog_action_area3, "dialog_action_area3"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, button99, "button99"); GLADE_HOOKUP_OBJECT (SpeedHacksDlg, button98, "button98"); diff --git a/pcsx2/Linux/pcsx2.glade b/pcsx2/Linux/pcsx2.glade index 40e23c19e7..ae3a436467 100644 --- a/pcsx2/Linux/pcsx2.glade +++ b/pcsx2/Linux/pcsx2.glade @@ -1275,213 +1275,250 @@ If you have problems, Disable all of these and try again. 0 - + True - 0 - 0.5 - GTK_SHADOW_ETCHED_IN + False + 0 - + True - 0.5 - 0.5 - 1 - 1 - 0 - 0 - 12 - 0 + 0 + 0.5 + GTK_SHADOW_ETCHED_IN - + True - False - 2 + 0.5 + 0.5 + 1 + 1 + 0 + 0 + 12 + 0 - + True - True - Default Cycle Rate - True - GTK_RELIEF_NORMAL - True - False - False - True - - - 0 - False - False - - + False + 2 - - - True - Most compatible option - recommended for everyone with high-end machines. - False - False - GTK_JUSTIFY_LEFT - True - False - 0.289999991655 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 - - - 0 - False - False - - + + + True + True + False + GTK_POS_TOP + 0 + GTK_UPDATE_CONTINUOUS + False + 0 0 2 1 0 0 + + + + 0 + False + False + + - - - True - True - Use x1.5 Cycle Rate - True - GTK_RELIEF_NORMAL - True - False - False - True - check_default_cycle_rate - - - 0 - False - False - - + + + True + Most compatible option - recommended for everyone with high-end machines. + False + False + GTK_JUSTIFY_LEFT + True + False + 0.5 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + 0 + False + False + + - - - True - Moderate speedup, and works well with most games. - False - False - GTK_JUSTIFY_LEFT - True - False - 0.289999991655 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 - - - 0 - False - False - - + + + True + + + 0 + False + False + + - - - True - True - Use x2 Cycle Rate - True - GTK_RELIEF_NORMAL - True - False - False - True - check_default_cycle_rate + + + True + Important: the X2 sync hack *will* cause choppy/skippy audio on many FMV movies. + False + False + GTK_JUSTIFY_LEFT + True + False + 0.5 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + 0 + False + False + + - - 0 - False - False - - - - - - True - Big speedup! Works well with many games. - False - False - GTK_JUSTIFY_LEFT - True - False - 0.360000014305 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 - - - 0 - False - False - - - - - - True - Important: the X2 sync hack *will* cause choppy/skippy audio on many FMV movies. - False - False - GTK_JUSTIFY_LEFT - True - False - 0.5 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 - - - 0 - False - False - + + + + True + <b>EmotionEngine (EE) Sync Hacks</b> + False + True + GTK_JUSTIFY_LEFT + False + False + 0.5 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + label_item + + + + 0 + True + True + - + True - <b>EmotionEngine (EE) Sync Hacks</b> - False - True - GTK_JUSTIFY_LEFT - False - False - 0.5 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 + 0 + 0.5 + GTK_SHADOW_ETCHED_IN + + + + True + 0.5 + 0.5 + 1 + 1 + 0 + 0 + 12 + 0 + + + + True + False + 0 + + + + True + True + False + GTK_POS_TOP + 0 + GTK_UPDATE_CONTINUOUS + False + 0 0 4 1 0 0 + + + + 0 + True + True + + + + + + True + 2: Moderate speedup, should work with most games with minor problems. + False + False + GTK_JUSTIFY_LEFT + True + False + 0.5 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + 0 + False + False + + + + + + + + + + True + <b>VU Cycle Stealing (Speedup for 3d geometry)</b> + False + True + GTK_JUSTIFY_LEFT + False + False + 0.5 + 0.5 + 0 + 0 + PANGO_ELLIPSIZE_NONE + -1 + False + 0 + + + label_item + + - label_item + 0 + True + True @@ -1664,7 +1701,7 @@ If you have problems, Disable all of these and try again. False False GTK_JUSTIFY_LEFT - False + True False 0.5 0.5 @@ -1699,78 +1736,6 @@ If you have problems, Disable all of these and try again. False - - - - True - VU Cycle Stealing (Speedup for 3d geometry) - False - False - GTK_JUSTIFY_LEFT - False - False - 0.5 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 - - - 0 - False - False - - - - - - True - True - True - GTK_POS_TOP - 0 - GTK_UPDATE_CONTINUOUS - False - 0 0 4 1 0 0 - - - 0 - True - True - - - - - - True - 0: No speedup. -1: Slight speedup, should work with most games. -2: Moderate speedup, should work with most games with minor problems. -3: Large speedup, may break many games and make others skip frames. -4: Very large speedup, will break games in interesting ways. - False - False - GTK_JUSTIFY_LEFT - True - False - 0.5 - 0.5 - 0 - 0 - PANGO_ELLIPSIZE_NONE - -1 - False - 0 - - - 0 - False - False - - 0 From 5bb89fef92011ba68e45f2869c48a3c3a4cdd5de Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 22 Apr 2009 10:16:41 +0000 Subject: [PATCH 127/143] Linux: Minor fix for the hack dialog box. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1043 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Linux/HacksDlg.cpp | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/pcsx2/Linux/HacksDlg.cpp b/pcsx2/Linux/HacksDlg.cpp index 90b83f8b48..0177d60930 100644 --- a/pcsx2/Linux/HacksDlg.cpp +++ b/pcsx2/Linux/HacksDlg.cpp @@ -53,22 +53,6 @@ void on_Game_Fix_OK(GtkButton *button, gpointer user_data) gtk_main_quit(); } -void on_Speed_Hacks(GtkMenuItem *menuitem, gpointer user_data) -{ - SpeedHacksDlg = create_SpeedHacksDlg(); - - set_checked(SpeedHacksDlg, "check_iop_cycle_rate", Config.Hacks.IOPCycleDouble); - set_checked(SpeedHacksDlg, "check_wait_cycles_sync_hack", Config.Hacks.WaitCycleExt); - set_checked(SpeedHacksDlg, "check_intc_sync_hack", Config.Hacks.INTCSTATSlow); - set_checked(SpeedHacksDlg, "check_idle_loop_fastforward", Config.Hacks.IdleLoopFF); - - gtk_range_set_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale")), Config.Hacks.VUCycleSteal); - gtk_range_set_value(GTK_RANGE(lookup_widget(SpeedHacksDlg, "EECycleHackScale")), Config.Hacks.EECycleRate); - gtk_widget_show_all(SpeedHacksDlg); - gtk_widget_set_sensitive(MainWindow, FALSE); - gtk_main(); -} - void on_vu_slider_changed(GtkRange *range, gpointer user_data) { int i; @@ -85,6 +69,29 @@ void on_ee_slider_changed(GtkRange *range, gpointer user_data) gtk_label_set_text(GTK_LABEL(lookup_widget(SpeedHacksDlg,"ee_cycle_label")),ee_cycle_labels[i]); } +void on_Speed_Hacks(GtkMenuItem *menuitem, gpointer user_data) +{ + SpeedHacksDlg = create_SpeedHacksDlg(); + GtkRange *vuScale = GTK_RANGE(lookup_widget(SpeedHacksDlg, "VUCycleHackScale")); + GtkRange *eeScale = GTK_RANGE(lookup_widget(SpeedHacksDlg, "EECycleHackScale")); + + set_checked(SpeedHacksDlg, "check_iop_cycle_rate", Config.Hacks.IOPCycleDouble); + set_checked(SpeedHacksDlg, "check_wait_cycles_sync_hack", Config.Hacks.WaitCycleExt); + set_checked(SpeedHacksDlg, "check_intc_sync_hack", Config.Hacks.INTCSTATSlow); + set_checked(SpeedHacksDlg, "check_idle_loop_fastforward", Config.Hacks.IdleLoopFF); + + gtk_range_set_value(vuScale, Config.Hacks.VUCycleSteal); + on_vu_slider_changed(vuScale, NULL); + gtk_range_set_value(eeScale, Config.Hacks.EECycleRate); + on_ee_slider_changed(eeScale, NULL); + + gtk_widget_show_all(SpeedHacksDlg); + gtk_widget_set_sensitive(MainWindow, FALSE); + gtk_main(); +} + + + void on_Speed_Hack_OK(GtkButton *button, gpointer user_data) { PcsxConfig::Hacks_t newhacks; From e87d224cc964dc0fb6a8cde61763bb7fee18d253 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Wed, 22 Apr 2009 13:25:26 +0000 Subject: [PATCH 128/143] Started consolidating the various Dump routines, most of which are broken, into one file. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1044 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Dump.cpp | 305 +++++++++++++++++++++ pcsx2/Dump.h | 24 ++ pcsx2/Makefile.am | 5 +- pcsx2/Misc.cpp | 1 + pcsx2/VU0micro.cpp | 20 -- pcsx2/VU1micro.cpp | 21 -- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 8 + pcsx2/x86/iCore.cpp | 100 +------ pcsx2/x86/iCore.h | 2 +- pcsx2/x86/iR3000A.cpp | 32 --- pcsx2/x86/ix86-32/iR5900-32.cpp | 115 +------- 11 files changed, 346 insertions(+), 287 deletions(-) create mode 100644 pcsx2/Dump.cpp create mode 100644 pcsx2/Dump.h diff --git a/pcsx2/Dump.cpp b/pcsx2/Dump.cpp new file mode 100644 index 0000000000..2e9453569f --- /dev/null +++ b/pcsx2/Dump.cpp @@ -0,0 +1,305 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +//////////////////////////////////////////////////// +#include "PrecompiledHeader.h" +#include "IopCommon.h" +#include "Counters.h" +#include "iCore.h" +#include "iR5900.h" +#include "IPU/IPU.h" + +using namespace R5900; +// fixme: currently should not be uncommented. +//#define TEST_BROKEN_DUMP_ROUTINES + +#ifdef TEST_BROKEN_DUMP_ROUTINES +//extern u32 psxdump; +//extern int rdram_devices; // put 8 for TOOL and 2 for PS2 and PSX +//extern int rdram_sdevid; +extern tIPU_BP g_BP; + +#define VF_VAL(x) ((x==0x80000000)?0:(x)) +#endif + + +// iR5900-32.cpp +extern EEINST* s_pInstCache; +extern u32 s_nEndBlock; // what pc the current block ends + + +void iDumpPsxRegisters(u32 startpc, u32 temp) +{ +// [TODO] fixme : thie code is broken and has no labels. Needs a rewrite to be useful. + +#ifdef TEST_BROKEN_DUMP_ROUTINES + int i; + const char* pstr = temp ? "t" : ""; + + // fixme: PSXM doesn't exist any more. + //__Log("%spsxreg: %x %x ra:%x k0: %x %x", pstr, startpc, psxRegs.cycle, psxRegs.GPR.n.ra, psxRegs.GPR.n.k0, *(int*)PSXM(0x13c128)); + + for(i = 0; i < 34; i+=2) __Log("%spsx%s: %x %x", pstr, disRNameGPR[i], psxRegs.GPR.r[i], psxRegs.GPR.r[i+1]); + + __Log("%scycle: %x %x %x; counters %x %x", pstr, psxRegs.cycle, g_psxNextBranchCycle, EEsCycle, + psxNextsCounter, psxNextCounter); + + __Log("psxdma%d c%x b%x m%x t%x", 2, HW_DMA2_CHCR, HW_DMA2_BCR, HW_DMA2_MADR, HW_DMA2_TADR); + __Log("psxdma%d c%x b%x m%x", 3, HW_DMA3_CHCR, HW_DMA3_BCR, HW_DMA3_MADR); + __Log("psxdma%d c%x b%x m%x t%x", 4, HW_DMA4_CHCR, HW_DMA4_BCR, HW_DMA4_MADR, HW_DMA4_TADR); + __Log("psxdma%d c%x b%x m%x", 6, HW_DMA6_CHCR, HW_DMA6_BCR, HW_DMA6_MADR); + __Log("psxdma%d c%x b%x m%x", 7, HW_DMA7_CHCR, HW_DMA7_BCR, HW_DMA7_MADR); + __Log("psxdma%d c%x b%x m%x", 8, HW_DMA8_CHCR, HW_DMA8_BCR, HW_DMA8_MADR); + __Log("psxdma%d c%x b%x m%x t%x", 9, HW_DMA9_CHCR, HW_DMA9_BCR, HW_DMA9_MADR, HW_DMA9_TADR); + __Log("psxdma%d c%x b%x m%x", 10, HW_DMA10_CHCR, HW_DMA10_BCR, HW_DMA10_MADR); + __Log("psxdma%d c%x b%x m%x", 11, HW_DMA11_CHCR, HW_DMA11_BCR, HW_DMA11_MADR); + __Log("psxdma%d c%x b%x m%x", 12, HW_DMA12_CHCR, HW_DMA12_BCR, HW_DMA12_MADR); + + for(i = 0; i < 7; ++i) + __Log("%scounter%d: mode %x count %I64x rate %x scycle %x target %I64x", pstr, i, psxCounters[i].mode, psxCounters[i].count, psxCounters[i].rate, psxCounters[i].sCycleT, psxCounters[i].target); +#endif +} + +void iDumpRegisters(u32 startpc, u32 temp) +{ +// [TODO] fixme : this code is broken and has no labels. Needs a rewrite to be useful. + +#ifdef TEST_BROKEN_DUMP_ROUTINES + + int i; + const char* pstr;// = temp ? "t" : ""; + const u32 dmacs[] = {0x8000, 0x9000, 0xa000, 0xb000, 0xb400, 0xc000, 0xc400, 0xc800, 0xd000, 0xd400 }; + const char* psymb; + + if (temp) + pstr = "t"; + else + pstr = ""; + + psymb = disR5900GetSym(startpc); + + if( psymb != NULL ) + __Log("%sreg(%s): %x %x c:%x", pstr, psymb, startpc, cpuRegs.interrupt, cpuRegs.cycle); + else + __Log("%sreg: %x %x c:%x", pstr, startpc, cpuRegs.interrupt, cpuRegs.cycle); + + for(i = 1; i < 32; ++i) __Log("%s: %x_%x_%x_%x", disRNameGPR[i], cpuRegs.GPR.r[i].UL[3], cpuRegs.GPR.r[i].UL[2], cpuRegs.GPR.r[i].UL[1], cpuRegs.GPR.r[i].UL[0]); + + //for(i = 0; i < 32; i+=4) __Log("cp%d: %x_%x_%x_%x", i, cpuRegs.CP0.r[i], cpuRegs.CP0.r[i+1], cpuRegs.CP0.r[i+2], cpuRegs.CP0.r[i+3]); + //for(i = 0; i < 32; ++i) __Log("%sf%d: %f %x", pstr, i, fpuRegs.fpr[i].f, fpuRegs.fprc[i]); + //for(i = 1; i < 32; ++i) __Log("%svf%d: %f %f %f %f, vi: %x", pstr, i, VU0.VF[i].F[3], VU0.VF[i].F[2], VU0.VF[i].F[1], VU0.VF[i].F[0], VU0.VI[i].UL); + + for(i = 0; i < 32; ++i) __Log("%sf%d: %x %x", pstr, i, fpuRegs.fpr[i].UL, fpuRegs.fprc[i]); + for(i = 1; i < 32; ++i) __Log("%svf%d: %x %x %x %x, vi: %x", pstr, i, VU0.VF[i].UL[3], VU0.VF[i].UL[2], VU0.VF[i].UL[1], VU0.VF[i].UL[0], VU0.VI[i].UL); + + __Log("%svfACC: %x %x %x %x", pstr, VU0.ACC.UL[3], VU0.ACC.UL[2], VU0.ACC.UL[1], VU0.ACC.UL[0]); + __Log("%sLO: %x_%x_%x_%x, HI: %x_%x_%x_%x", pstr, cpuRegs.LO.UL[3], cpuRegs.LO.UL[2], cpuRegs.LO.UL[1], cpuRegs.LO.UL[0], + cpuRegs.HI.UL[3], cpuRegs.HI.UL[2], cpuRegs.HI.UL[1], cpuRegs.HI.UL[0]); + __Log("%sCycle: %x %x, Count: %x", pstr, cpuRegs.cycle, g_nextBranchCycle, cpuRegs.CP0.n.Count); + + iDumpPsxRegisters(psxRegs.pc, temp); + + __Log("f410,30,40: %x %x %x, %d %d", psHu32(0xf410), psHu32(0xf430), psHu32(0xf440), rdram_sdevid, rdram_devices); + __Log("cyc11: %x %x; vu0: %x, vu1: %x", cpuRegs.sCycle[1], cpuRegs.eCycle[1], VU0.cycle, VU1.cycle); + + __Log("%scounters: %x %x; psx: %x %x", pstr, nextsCounter, nextCounter, psxNextsCounter, psxNextCounter); + + // fixme: The members of the counters[i] struct are wrong here. + /*for(i = 0; i < 4; ++i) { + __Log("eetimer%d: count: %x mode: %x target: %x %x; %x %x; %x %x %x %x", i, + counters[i].count, counters[i].mode, counters[i].target, counters[i].hold, counters[i].rate, + counters[i].interrupt, counters[i].Cycle, counters[i].sCycle, counters[i].CycleT, counters[i].sCycleT); + }*/ + __Log("VIF0_STAT = %x, VIF1_STAT = %x", psHu32(0x3800), psHu32(0x3C00)); + __Log("ipu %x %x %x %x; bp: %x %x %x %x", psHu32(0x2000), psHu32(0x2010), psHu32(0x2020), psHu32(0x2030), g_BP.BP, g_BP.bufferhasnew, g_BP.FP, g_BP.IFC); + __Log("gif: %x %x %x", psHu32(0x3000), psHu32(0x3010), psHu32(0x3020)); + + for(i = 0; i < ArraySize(dmacs); ++i) { + DMACh* p = (DMACh*)(PS2MEM_HW+dmacs[i]); + __Log("dma%d c%x m%x q%x t%x s%x", i, p->chcr, p->madr, p->qwc, p->tadr, p->sadr); + } + __Log("dmac %x %x %x %x", psHu32(DMAC_CTRL), psHu32(DMAC_STAT), psHu32(DMAC_RBSR), psHu32(DMAC_RBOR)); + __Log("intc %x %x", psHu32(INTC_STAT), psHu32(INTC_MASK)); + __Log("sif: %x %x %x %x %x", psHu32(0xf200), psHu32(0xf220), psHu32(0xf230), psHu32(0xf240), psHu32(0xf260)); +#endif +} + +void iDumpVU0Registers() +{ + // fixme: This code is outdated, broken, and lacks printed labels. + // Needs heavy mods to be useful. +#ifdef TEST_BROKEN_DUMP_ROUTINES + int i; + + for(i = 1; i < 32; ++i) { + __Log("v%d: %x %x %x %x, vi: ", i, VF_VAL(VU0.VF[i].UL[3]), VF_VAL(VU0.VF[i].UL[2]), + VF_VAL(VU0.VF[i].UL[1]), VF_VAL(VU0.VF[i].UL[0])); + if( i == REG_Q || i == REG_P ) + __Log("%f\n", VU0.VI[i].F); + else if( i == REG_MAC_FLAG ) + __Log("%x\n", 0);//VU0.VI[i].UL&0xff); + else if( i == REG_STATUS_FLAG ) + __Log("%x\n", 0);//VU0.VI[i].UL&0x03); + else if( i == REG_CLIP_FLAG ) + __Log("0\n"); + else + __Log("%x\n", VU0.VI[i].UL); + } + __Log("vfACC: %f %f %f %f\n", VU0.ACC.F[3], VU0.ACC.F[2], VU0.ACC.F[1], VU0.ACC.F[0]); +#endif +} + +void iDumpVU1Registers() +{ + // fixme: This code is outdated, broken, and lacks printed labels. + // Needs heavy mods to be useful. +#ifdef TEST_BROKEN_DUMP_ROUTINES + int i; + +// static int icount = 0; +// __Log("%x\n", icount); + + for(i = 1; i < 32; ++i) { + +// __Log("v%d: w%f(%x) z%f(%x) y%f(%x) x%f(%x), vi: ", i, VU1.VF[i].F[3], VU1.VF[i].UL[3], VU1.VF[i].F[2], VU1.VF[i].UL[2], +// VU1.VF[i].F[1], VU1.VF[i].UL[1], VU1.VF[i].F[0], VU1.VF[i].UL[0]); + //__Log("v%d: %f %f %f %f, vi: ", i, VU1.VF[i].F[3], VU1.VF[i].F[2], VU1.VF[i].F[1], VU1.VF[i].F[0]); + + __Log("v%d: %x %x %x %x, vi: ", i, VF_VAL(VU1.VF[i].UL[3]), VF_VAL(VU1.VF[i].UL[2]), VF_VAL(VU1.VF[i].UL[1]), VF_VAL(VU1.VF[i].UL[0])); + + if( i == REG_Q || i == REG_P ) __Log("%f\n", VU1.VI[i].F); + //else __Log("%x\n", VU1.VI[i].UL); + else __Log("%x\n", (i==REG_STATUS_FLAG||i==REG_MAC_FLAG||i==REG_CLIP_FLAG)?0:VU1.VI[i].UL); + } + __Log("vfACC: %f %f %f %f\n", VU1.ACC.F[3], VU1.ACC.F[2], VU1.ACC.F[1], VU1.ACC.F[0]); +#endif +} + + +#ifdef PCSX2_DEVBUILD +// and not sure what these might have once been used for... (air) +//static const char *txt0 = "EAX = %x : ECX = %x : EDX = %x\n"; +//static const char *txt0RC = "EAX = %x : EBX = %x : ECX = %x : EDX = %x : ESI = %x : EDI = %x\n"; +//static const char *txt1 = "REG[%d] = %x_%x\n"; +//static const char *txt2 = "M32 = %x\n"; +#endif +//////////////////////////////////////////////////// + +// Originally from iR5900-32.cpp +void iDumpBlock( int startpc, u8 * ptr ) +{ + FILE *f; + string filename; + u32 i, j; + EEINST* pcur; + u8 used[34]; + u8 fpuused[33]; + int numused, count, fpunumused; + + Console::Status( "dump1 %x:%x, %x", params startpc, pc, cpuRegs.cycle ); + Path::CreateDirectory( "dumps" ); +#ifndef __LINUX__ + ssprintf( filename, "dumps\\R5900dump%.8X.txt", startpc ); +#else + ssprintf( filename, "dumps/R5900dump%.8X.txt", startpc ); +#endif + + fflush( stdout ); + +// f = fopen( "dump1", "wb" ); +// fwrite( ptr, 1, (u32)x86Ptr - (u32)ptr, f ); +// fclose( f ); +// +// sprintf( command, "objdump -D --target=binary --architecture=i386 dump1 > %s", filename ); +// system( command ); + + f = fopen( filename.c_str(), "w" ); + + std::string output; + + if( disR5900GetSym(startpc) != NULL ) + fprintf(f, "%s\n", disR5900GetSym(startpc)); + + for ( i = startpc; i < s_nEndBlock; i += 4 ) { + disR5900Fasm( output, memRead32( i ), i ); + fprintf( f, output.c_str() ); + } + + // write the instruction info + + fprintf(f, "\n\nlive0 - %x, live1 - %x, live2 - %x, lastuse - %x\nmmx - %x, xmm - %x, used - %x\n", + EEINST_LIVE0, EEINST_LIVE1, EEINST_LIVE2, EEINST_LASTUSE, EEINST_MMX, EEINST_XMM, EEINST_USED); + + memzero_obj(used); + numused = 0; + for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { + if( s_pInstCache->regs[i] & EEINST_USED ) { + used[i] = 1; + numused++; + } + } + + memzero_obj(fpuused); + fpunumused = 0; + for(i = 0; i < ArraySize(s_pInstCache->fpuregs); ++i) { + if( s_pInstCache->fpuregs[i] & EEINST_USED ) { + fpuused[i] = 1; + fpunumused++; + } + } + + fprintf(f, " "); + for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { + if( used[i] ) fprintf(f, "%2d ", i); + } + for(i = 0; i < ArraySize(s_pInstCache->fpuregs); ++i) { + if( fpuused[i] ) fprintf(f, "%2d ", i); + } + fprintf(f, "\n"); + + fprintf(f, " "); + for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { + if( used[i] ) fprintf(f, "%s ", disRNameGPR[i]); + } + for(i = 0; i < ArraySize(s_pInstCache->fpuregs); ++i) { + if( fpuused[i] ) fprintf(f, "%s ", i<32?"FR":"FA"); + } + fprintf(f, "\n"); + + pcur = s_pInstCache+1; + for( i = 0; i < (s_nEndBlock-startpc)/4; ++i, ++pcur) { + fprintf(f, "%2d: %2.2x ", i+1, pcur->info); + + count = 1; + for(j = 0; j < ArraySize(s_pInstCache->regs); j++) { + if( used[j] ) { + fprintf(f, "%2.2x%s", pcur->regs[j], ((count%8)&&countfpuregs); j++) { + if( fpuused[j] ) { + fprintf(f, "%2.2x%s", pcur->fpuregs[j], ((count%8)&&count + + + + diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 4cb6dc6bc6..97600e541f 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -48,33 +48,6 @@ _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR]; #include using namespace std; -//void _eeSetLoadStoreReg(int gprreg, u32 offset, int x86reg) -//{ -// int regs[2] = {ESI, EDI}; -// -// int i = _checkX86reg(X86TYPE_MEMOFFSET, gprreg, MODE_WRITE); -// if( i < 0 ) { -// for(i = 0; i < 2; ++i) { -// if( !x86regs[regs[i]].inuse ) break; -// } -// -// assert( i < 2 ); -// i = regs[i]; -// } -// -// if( i != x86reg ) MOV32RtoR(x86reg, i); -// x86regs[i].extra = offset; -//} - -//int _eeGeLoadStoreReg(int gprreg, int* poffset) -//{ -// int i = _checkX86reg(X86TYPE_MEMOFFSET, gprreg, MODE_READ); -// if( i >= 0 ) return -1; -// -// if( poffset ) *poffset = x86regs[i].extra; -// return i; -//} - // XMM Caching #define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] #define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] @@ -107,8 +80,7 @@ __forceinline void* _XMMGetAddr(int type, int reg, VURegs *VU) case XMMTYPE_FPACC: return &fpuRegs.ACC.f; - default: - assert(0); + jNO_DEFAULT } return NULL; @@ -1133,73 +1105,3 @@ void _recFillRegister(EEINST& pinst, int type, int reg, int write) void SetMMXstate() { x86FpuState = MMX_STATE; } - -//////////////////////////////////////////////////// -//#include "R3000A.h" -//#include "PsxCounters.h" -//#include "PsxMem.h" -//extern tIPU_BP g_BP; - -#if 0 -extern u32 psxdump; -extern void iDumpPsxRegisters(u32 startpc, u32 temp); -extern Counter counters[6]; -extern int rdram_devices; // put 8 for TOOL and 2 for PS2 and PSX -extern int rdram_sdevid; -#endif - -void iDumpRegisters(u32 startpc, u32 temp) -{ -// [TODO] fixme : this code is broken and has no labels. Needs a rewrite to be useful. - -#if 0 - - int i; - const char* pstr;// = temp ? "t" : ""; - const u32 dmacs[] = {0x8000, 0x9000, 0xa000, 0xb000, 0xb400, 0xc000, 0xc400, 0xc800, 0xd000, 0xd400 }; - const char* psymb; - - if (temp) - pstr = "t"; - else - pstr = ""; - - psymb = disR5900GetSym(startpc); - - if( psymb != NULL ) - __Log("%sreg(%s): %x %x c:%x", pstr, psymb, startpc, cpuRegs.interrupt, cpuRegs.cycle); - else - __Log("%sreg: %x %x c:%x", pstr, startpc, cpuRegs.interrupt, cpuRegs.cycle); - for(i = 1; i < 32; ++i) __Log("%s: %x_%x_%x_%x", disRNameGPR[i], cpuRegs.GPR.r[i].UL[3], cpuRegs.GPR.r[i].UL[2], cpuRegs.GPR.r[i].UL[1], cpuRegs.GPR.r[i].UL[0]); - //for(i = 0; i < 32; i+=4) __Log("cp%d: %x_%x_%x_%x", i, cpuRegs.CP0.r[i], cpuRegs.CP0.r[i+1], cpuRegs.CP0.r[i+2], cpuRegs.CP0.r[i+3]); - //for(i = 0; i < 32; ++i) __Log("%sf%d: %f %x", pstr, i, fpuRegs.fpr[i].f, fpuRegs.fprc[i]); - //for(i = 1; i < 32; ++i) __Log("%svf%d: %f %f %f %f, vi: %x", pstr, i, VU0.VF[i].F[3], VU0.VF[i].F[2], VU0.VF[i].F[1], VU0.VF[i].F[0], VU0.VI[i].UL); - for(i = 0; i < 32; ++i) __Log("%sf%d: %x %x", pstr, i, fpuRegs.fpr[i].UL, fpuRegs.fprc[i]); - for(i = 1; i < 32; ++i) __Log("%svf%d: %x %x %x %x, vi: %x", pstr, i, VU0.VF[i].UL[3], VU0.VF[i].UL[2], VU0.VF[i].UL[1], VU0.VF[i].UL[0], VU0.VI[i].UL); - __Log("%svfACC: %x %x %x %x", pstr, VU0.ACC.UL[3], VU0.ACC.UL[2], VU0.ACC.UL[1], VU0.ACC.UL[0]); - __Log("%sLO: %x_%x_%x_%x, HI: %x_%x_%x_%x", pstr, cpuRegs.LO.UL[3], cpuRegs.LO.UL[2], cpuRegs.LO.UL[1], cpuRegs.LO.UL[0], - cpuRegs.HI.UL[3], cpuRegs.HI.UL[2], cpuRegs.HI.UL[1], cpuRegs.HI.UL[0]); - __Log("%sCycle: %x %x, Count: %x", pstr, cpuRegs.cycle, g_nextBranchCycle, cpuRegs.CP0.n.Count); - iDumpPsxRegisters(psxRegs.pc, temp); - - __Log("f410,30,40: %x %x %x, %d %d", psHu32(0xf410), psHu32(0xf430), psHu32(0xf440), rdram_sdevid, rdram_devices); - __Log("cyc11: %x %x; vu0: %x, vu1: %x", cpuRegs.sCycle[1], cpuRegs.eCycle[1], VU0.cycle, VU1.cycle); - - __Log("%scounters: %x %x; psx: %x %x", pstr, nextsCounter, nextCounter, psxNextsCounter, psxNextCounter); - for(i = 0; i < 4; ++i) { - __Log("eetimer%d: count: %x mode: %x target: %x %x; %x %x; %x %x %x %x", i, - counters[i].count, counters[i].mode, counters[i].target, counters[i].hold, counters[i].rate, - counters[i].interrupt, counters[i].Cycle, counters[i].sCycle, counters[i].CycleT, counters[i].sCycleT); - } - __Log("VIF0_STAT = %x, VIF1_STAT = %x", psHu32(0x3800), psHu32(0x3C00)); - __Log("ipu %x %x %x %x; bp: %x %x %x %x", psHu32(0x2000), psHu32(0x2010), psHu32(0x2020), psHu32(0x2030), g_BP.BP, g_BP.bufferhasnew, g_BP.FP, g_BP.IFC); - __Log("gif: %x %x %x", psHu32(0x3000), psHu32(0x3010), psHu32(0x3020)); - for(i = 0; i < ARRAYSIZE(dmacs); ++i) { - DMACh* p = (DMACh*)(PS2MEM_HW+dmacs[i]); - __Log("dma%d c%x m%x q%x t%x s%x", i, p->chcr, p->madr, p->qwc, p->tadr, p->sadr); - } - __Log("dmac %x %x %x %x", psHu32(DMAC_CTRL), psHu32(DMAC_STAT), psHu32(DMAC_RBSR), psHu32(DMAC_RBOR)); - __Log("intc %x %x", psHu32(INTC_STAT), psHu32(INTC_MASK)); - __Log("sif: %x %x %x %x %x", psHu32(0xf200), psHu32(0xf220), psHu32(0xf230), psHu32(0xf240), psHu32(0xf260)); -#endif -} diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index bb3cb3f536..1352be4157 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -364,7 +364,7 @@ extern int _signExtendGPRtoMMX(x86MMXRegType to, u32 gprreg, int shift); extern _mmxregs mmxregs[iREGCNT_MMX], s_saveMMXregs[iREGCNT_MMX]; extern u16 x86FpuState; -extern void iDumpRegisters(u32 startpc, u32 temp); +// extern void iDumpRegisters(u32 startpc, u32 temp); ////////////////////////////////////////////////////////////////////////// // iFlushCall / _psxFlushCall Parameters diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 1dedcfd624..846ab01a3e 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -932,38 +932,6 @@ void psxRecompileNextInstruction(int delayslot) _clearNeededX86regs(); } -#include "IopHw.h" - -void iDumpPsxRegisters(u32 startpc, u32 temp) -{ -// [TODO] fixme : thie code is broken and has no labels. Needs a rewrite to be useful. - -#if 0 - int i; - const char* pstr = temp ? "t" : ""; - - __Log("%spsxreg: %x %x ra:%x k0: %x %x", pstr, startpc, psxRegs.cycle, psxRegs.GPR.n.ra, psxRegs.GPR.n.k0, *(int*)PSXM(0x13c128)); - for(i = 0; i < 34; i+=2) __Log("%spsx%s: %x %x", pstr, disRNameGPR[i], psxRegs.GPR.r[i], psxRegs.GPR.r[i+1]); - __Log("%scycle: %x %x %x; counters %x %x", pstr, psxRegs.cycle, g_psxNextBranchCycle, EEsCycle, - psxNextsCounter, psxNextCounter); - - __Log("psxdma%d c%x b%x m%x t%x", 2, HW_DMA2_CHCR, HW_DMA2_BCR, HW_DMA2_MADR, HW_DMA2_TADR); - __Log("psxdma%d c%x b%x m%x", 3, HW_DMA3_CHCR, HW_DMA3_BCR, HW_DMA3_MADR); - __Log("psxdma%d c%x b%x m%x t%x", 4, HW_DMA4_CHCR, HW_DMA4_BCR, HW_DMA4_MADR, HW_DMA4_TADR); - __Log("psxdma%d c%x b%x m%x", 6, HW_DMA6_CHCR, HW_DMA6_BCR, HW_DMA6_MADR); - __Log("psxdma%d c%x b%x m%x", 7, HW_DMA7_CHCR, HW_DMA7_BCR, HW_DMA7_MADR); - __Log("psxdma%d c%x b%x m%x", 8, HW_DMA8_CHCR, HW_DMA8_BCR, HW_DMA8_MADR); - __Log("psxdma%d c%x b%x m%x t%x", 9, HW_DMA9_CHCR, HW_DMA9_BCR, HW_DMA9_MADR, HW_DMA9_TADR); - __Log("psxdma%d c%x b%x m%x", 10, HW_DMA10_CHCR, HW_DMA10_BCR, HW_DMA10_MADR); - __Log("psxdma%d c%x b%x m%x", 11, HW_DMA11_CHCR, HW_DMA11_BCR, HW_DMA11_MADR); - __Log("psxdma%d c%x b%x m%x", 12, HW_DMA12_CHCR, HW_DMA12_BCR, HW_DMA12_MADR); - for(i = 0; i < 7; ++i) - __Log("%scounter%d: mode %x count %I64x rate %x scycle %x target %I64x", pstr, i, psxCounters[i].mode, psxCounters[i].count, psxCounters[i].rate, psxCounters[i].sCycleT, psxCounters[i].target); -#endif -} - -void iDumpPsxRegisters(u32 startpc); - #ifdef _DEBUG static void printfn() { diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index c36687bda7..ac20ffd0a1 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -47,9 +47,9 @@ #include "Paths.h" #include "NakedAsm.h" +#include "Dump.h" using namespace x86Emitter; - using namespace R5900; // used to disable register freezing during cpuBranchTests (registers @@ -87,12 +87,12 @@ static u32 *recRAMCopy = NULL; void JITCompile(); static BaseBlocks recBlocks(EE_NUMBLOCKS, (uptr)JITCompile); static u8* recPtr = NULL, *recStackPtr = NULL; -static EEINST* s_pInstCache = NULL; +EEINST* s_pInstCache = NULL; static u32 s_nInstCacheSize = 0; static BASEBLOCK* s_pCurBlock = NULL; static BASEBLOCKEX* s_pCurBlockEx = NULL; -static u32 s_nEndBlock = 0; // what pc the current block ends +u32 s_nEndBlock = 0; // what pc the current block ends static u32 s_nHasDelay = 0; static bool s_nBlockFF; @@ -110,118 +110,9 @@ static u32 dumplog = 0; #define dumplog 0 #endif -#ifdef PCSX2_DEVBUILD -// and not sure what these might have once been used for... (air) -//static const char *txt0 = "EAX = %x : ECX = %x : EDX = %x\n"; -//static const char *txt0RC = "EAX = %x : EBX = %x : ECX = %x : EDX = %x : ESI = %x : EDI = %x\n"; -//static const char *txt1 = "REG[%d] = %x_%x\n"; -//static const char *txt2 = "M32 = %x\n"; -#endif - static void iBranchTest(u32 newpc = 0xffffffff, bool noDispatch=false); static void ClearRecLUT(BASEBLOCK* base, int count); -//////////////////////////////////////////////////// -static void iDumpBlock( int startpc, u8 * ptr ) -{ - FILE *f; - string filename; - u32 i, j; - EEINST* pcur; - u8 used[34]; - u8 fpuused[33]; - int numused, count, fpunumused; - - Console::Status( "dump1 %x:%x, %x", params startpc, pc, cpuRegs.cycle ); - Path::CreateDirectory( "dumps" ); -#ifndef __LINUX__ - ssprintf( filename, "dumps\\R5900dump%.8X.txt", startpc ); -#else - ssprintf( filename, "dumps/R5900dump%.8X.txt", startpc ); -#endif - fflush( stdout ); -// f = fopen( "dump1", "wb" ); -// fwrite( ptr, 1, (u32)x86Ptr - (u32)ptr, f ); -// fclose( f ); -// -// sprintf( command, "objdump -D --target=binary --architecture=i386 dump1 > %s", filename ); -// system( command ); - - f = fopen( filename.c_str(), "w" ); - - std::string output; - - if( disR5900GetSym(startpc) != NULL ) - fprintf(f, "%s\n", disR5900GetSym(startpc)); - for ( i = startpc; i < s_nEndBlock; i += 4 ) { - disR5900Fasm( output, memRead32( i ), i ); - fprintf( f, output.c_str() ); - } - - // write the instruction info - - fprintf(f, "\n\nlive0 - %x, live1 - %x, live2 - %x, lastuse - %x\nmmx - %x, xmm - %x, used - %x\n", - EEINST_LIVE0, EEINST_LIVE1, EEINST_LIVE2, EEINST_LASTUSE, EEINST_MMX, EEINST_XMM, EEINST_USED); - - memzero_obj(used); - numused = 0; - for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { - if( s_pInstCache->regs[i] & EEINST_USED ) { - used[i] = 1; - numused++; - } - } - - memzero_obj(fpuused); - fpunumused = 0; - for(i = 0; i < ArraySize(s_pInstCache->fpuregs); ++i) { - if( s_pInstCache->fpuregs[i] & EEINST_USED ) { - fpuused[i] = 1; - fpunumused++; - } - } - - fprintf(f, " "); - for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { - if( used[i] ) fprintf(f, "%2d ", i); - } - for(i = 0; i < ArraySize(s_pInstCache->fpuregs); ++i) { - if( fpuused[i] ) fprintf(f, "%2d ", i); - } - fprintf(f, "\n"); - - fprintf(f, " "); - for(i = 0; i < ArraySize(s_pInstCache->regs); ++i) { - if( used[i] ) fprintf(f, "%s ", disRNameGPR[i]); - } - for(i = 0; i < ArraySize(s_pInstCache->fpuregs); ++i) { - if( fpuused[i] ) fprintf(f, "%s ", i<32?"FR":"FA"); - } - fprintf(f, "\n"); - - pcur = s_pInstCache+1; - for( i = 0; i < (s_nEndBlock-startpc)/4; ++i, ++pcur) { - fprintf(f, "%2d: %2.2x ", i+1, pcur->info); - - count = 1; - for(j = 0; j < ArraySize(s_pInstCache->regs); j++) { - if( used[j] ) { - fprintf(f, "%2.2x%s", pcur->regs[j], ((count%8)&&countfpuregs); j++) { - if( fpuused[j] ) { - fprintf(f, "%2.2x%s", pcur->fpuregs[j], ((count%8)&&count Date: Wed, 22 Apr 2009 18:35:44 +0000 Subject: [PATCH 129/143] Fixed a bug from r1038. PMADD and HADD instructions had inverted to/from fields. >_< git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1045 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/xmm/arithmetic.h | 21 +++-------- pcsx2/x86/ix86/implement/xmm/comparisons.h | 2 +- pcsx2/x86/ix86/implement/xmm/moremovs.h | 13 ++++--- pcsx2/x86/ix86/implement/xmm/shufflepack.h | 10 ++--- pcsx2/x86/ix86/ix86.cpp | 13 +++---- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 31 +++++---------- pcsx2/x86/ix86/ix86_types.h | 44 +++++++++++----------- 7 files changed, 57 insertions(+), 77 deletions(-) diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h index 87deb51429..dae4a0c0c3 100644 --- a/pcsx2/x86/ix86/implement/xmm/arithmetic.h +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -28,23 +28,14 @@ class _SimdShiftHelper public: _SimdShiftHelper() {} - template< typename OperandType > - __forceinline void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode1, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( 0x66, Opcode1, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode1, to, from ); } - template< typename OperandType > - __forceinline void operator()( const xRegisterSIMD& to, const void* from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { writeXMMop( Opcode1, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from ) const { writeXMMop( Opcode1, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { writeXMMop( Opcode1, to, from ); } - template< typename OperandType > - __noinline void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const - { - writeXMMop( 0x66, Opcode1, to, from ); - } template< typename OperandType > __emitinline void operator()( const xRegisterSIMD& to, u8 imm8 ) const diff --git a/pcsx2/x86/ix86/implement/xmm/comparisons.h b/pcsx2/x86/ix86/implement/xmm/comparisons.h index 469a808524..c71b53d3d5 100644 --- a/pcsx2/x86/ix86/implement/xmm/comparisons.h +++ b/pcsx2/x86/ix86/implement/xmm/comparisons.h @@ -43,7 +43,7 @@ protected: { __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } Woot() {} }; diff --git a/pcsx2/x86/ix86/implement/xmm/moremovs.h b/pcsx2/x86/ix86/implement/xmm/moremovs.h index fa00b91179..7fcd7a1d8d 100644 --- a/pcsx2/x86/ix86/implement/xmm/moremovs.h +++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h @@ -29,15 +29,16 @@ protected: template< u8 Prefix > struct Woot { + Woot() {} __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } }; public: - Woot<0x00> PS; - Woot<0x66> PD; + const Woot<0x00> PS; + const Woot<0x66> PD; MovhlImplAll() {} //GCC. }; @@ -64,8 +65,8 @@ public: __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __noinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } MovapsImplAll() {} //GCC. }; diff --git a/pcsx2/x86/ix86/implement/xmm/shufflepack.h b/pcsx2/x86/ix86/implement/xmm/shufflepack.h index f97d8f8f75..f4056c9369 100644 --- a/pcsx2/x86/ix86/implement/xmm/shufflepack.h +++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h @@ -28,7 +28,7 @@ protected: { __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } Woot() {} }; @@ -192,7 +192,7 @@ protected: xWrite( imm8 ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); xWrite( imm8 ); @@ -220,7 +220,7 @@ public: // Operation can be performed on either MMX or SSE src operands. template< typename T > - __noinline void W( const xRegisterSIMD& to, const ModSibBase& from, u8 imm8 ) const + __forceinline void W( const xRegisterSIMD& to, const ModSibBase& from, u8 imm8 ) const { writeXMMop( 0x66, 0xc4, to, from ); xWrite( imm8 ); @@ -260,7 +260,7 @@ protected: xWrite( imm8 ); } - __noinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const + __forceinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest ); xWrite( imm8 ); @@ -289,7 +289,7 @@ public: xWrite( imm8 ); } - __noinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const + __forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { writeXMMop( 0x66, 0x153a, from, dest ); xWrite( imm8 ); diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 0f871735dc..0453ffc8e1 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -90,7 +90,7 @@ const xRegisterMMX mm4( 4 ), mm5( 5 ), mm6( 6 ), mm7( 7 ); -const xRegister32 +const xAddressReg eax( 0 ), ebx( 3 ), ecx( 1 ), edx( 2 ), esi( 6 ), edi( 7 ), @@ -430,7 +430,6 @@ void ModSibBase::Reduce() return; } - // The Scale has a series of valid forms, all shown here: switch( Scale ) @@ -872,22 +871,22 @@ __forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from ) xWriteDisp( to.Id, from ); } -__noinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ) +__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ) { xWrite( 0x2A380f66 ); EmitSibMagic( to.Id, from ); } __forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } -__noinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } +__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } __forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } -__noinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } +__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } __forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } -__noinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } +__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } __forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } -__noinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } +__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } __forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x50, to, from ); } __forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 07acf0ca06..d2845f2a09 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -280,24 +280,22 @@ emitterT void SSSE3_PSIGND_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { x emitterT void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ) { xPEXTR.W( xRegister32(to), xRegisterSSE(from), imm8 ); } emitterT void SSE_PINSRW_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8 ) { xPINSR.W( xRegisterSSE(to), xRegister32(from), imm8 ); } -emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xINSERTPS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } -emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { xEXTRACTPS( xRegister32(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMADD.WD( xRegisterSSE(to), xRegisterSSE(from) ); } + +emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xHADD.PS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { xHADD.PS( xRegisterSSE(to), (void*)from ); } + +emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) { xPINSR.D( xRegisterSSE(to), xRegister32(from), imm8 ); } + +emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xINSERTPS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { xEXTRACTPS( xRegister32(to), xRegisterSSE(from), imm8 ); } + emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PEXTRW,PINSRW: Packed Extract/Insert Word * -//**********************************************************************************} - -emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMADD.WD( xRegisterSSE(from), xRegisterSSE(to) ); } - -emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xHADD.PS( xRegisterSSE(from), xRegisterSSE(to) ); } -emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { xHADD.PS( xRegisterSSE(from), (void*)to ); } - // SSE4.1 @@ -360,15 +358,6 @@ emitterT void SSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) ModRM(3, to, from); } -emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x223A0F); - ModRM(3, to, from); - write8(imm8); -} - ////////////////////////////////////////////////////////////////////////////////////////// // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) // diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 647c812657..7cca341cd3 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -294,28 +294,6 @@ namespace x86Emitter xRegisterCL(): xRegister8( 1 ) {} }; - extern const xRegisterSSE - xmm0, xmm1, xmm2, xmm3, - xmm4, xmm5, xmm6, xmm7; - - extern const xRegisterMMX - mm0, mm1, mm2, mm3, - mm4, mm5, mm6, mm7; - - extern const xRegister32 - eax, ebx, ecx, edx, - esi, edi, ebp, esp; - - extern const xRegister16 - ax, bx, cx, dx, - si, di, bp, sp; - - extern const xRegister8 - al, dl, bl, - ah, ch, dh, bh; - - extern const xRegisterCL cl; // I'm special! - ////////////////////////////////////////////////////////////////////////////////////////// // Use 32 bit registers as out index register (for ModSib memory address calculations) // Only xAddressReg provides operators for constructing xAddressInfo types. @@ -403,6 +381,28 @@ namespace x86Emitter __forceinline xAddressInfo operator-( s32 imm ) const { return xAddressInfo( *this ).Add( -imm ); } }; + extern const xRegisterSSE + xmm0, xmm1, xmm2, xmm3, + xmm4, xmm5, xmm6, xmm7; + + extern const xRegisterMMX + mm0, mm1, mm2, mm3, + mm4, mm5, mm6, mm7; + + extern const xAddressReg + eax, ebx, ecx, edx, + esi, edi, ebp, esp; + + extern const xRegister16 + ax, bx, cx, dx, + si, di, bp, sp; + + extern const xRegister8 + al, dl, bl, + ah, ch, dh, bh; + + extern const xRegisterCL cl; // I'm special! + ////////////////////////////////////////////////////////////////////////////////////////// // ModSib - Internal low-level representation of the ModRM/SIB information. // From ac0768e9a32dd224900a81c5f20c37fffe499cbc Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 23 Apr 2009 10:18:43 +0000 Subject: [PATCH 130/143] ZeroGS: Apply Zeydlitz's patch from Issue 174. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1046 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zerogs/opengl/GSmain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/zerogs/opengl/GSmain.cpp b/plugins/zerogs/opengl/GSmain.cpp index 97941a5579..abdcf05c0a 100644 --- a/plugins/zerogs/opengl/GSmain.cpp +++ b/plugins/zerogs/opengl/GSmain.cpp @@ -982,7 +982,7 @@ void _GSgifTransfer(pathInfo *path, u32 *pMem, u32 size) continue; } - break; + continue; } } From ef565303a506895f157eb456a1b7d7865e946907 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Thu, 23 Apr 2009 12:39:59 +0000 Subject: [PATCH 131/143] Finished the emitter, complete with code cleanups! :) (added last few SSE instructions, and inserted placebos for some future additions to the x86 portion, regarding xchg/xadd/etc). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1047 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 28 +- pcsx2/x86/iR3000A.cpp | 4 + pcsx2/x86/ix86-32/iR5900Move.cpp | 2 +- pcsx2/x86/ix86/implement/bittest.h | 167 ++------- pcsx2/x86/ix86/implement/group1.h | 6 +- pcsx2/x86/ix86/implement/xchg.h | 22 ++ pcsx2/x86/ix86/implement/xmm/arithmetic.h | 118 ++++++- pcsx2/x86/ix86/implement/xmm/basehelpers.h | 161 ++++++--- pcsx2/x86/ix86/implement/xmm/comparisons.h | 7 +- pcsx2/x86/ix86/implement/xmm/moremovs.h | 183 +++++++++- pcsx2/x86/ix86/implement/xmm/shufflepack.h | 72 +--- pcsx2/x86/ix86/ix86.cpp | 363 +++---------------- pcsx2/x86/ix86/ix86_instructions.h | 182 ++++------ pcsx2/x86/ix86/ix86_legacy_mmx.cpp | 124 ------- pcsx2/x86/ix86/ix86_legacy_sse.cpp | 192 +++++----- pcsx2/x86/ix86/ix86_simd.cpp | 388 +++++++++++++++++++++ pcsx2/x86/ix86/ix86_sse_helpers.h | 1 - pcsx2/x86/ix86/ix86_types.h | 26 +- 18 files changed, 1072 insertions(+), 974 deletions(-) create mode 100644 pcsx2/x86/ix86/implement/xchg.h delete mode 100644 pcsx2/x86/ix86/ix86_legacy_mmx.cpp create mode 100644 pcsx2/x86/ix86/ix86_simd.cpp diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 3731bff67c..5b03403ef2 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -905,14 +905,6 @@ - - - - @@ -921,6 +913,14 @@ RelativePath="..\..\Dump.h" > + + + + @@ -2965,10 +2965,6 @@ RelativePath="..\..\x86\ix86\ix86_legacy_internal.h" > - - @@ -2977,6 +2973,10 @@ RelativePath="..\..\x86\ix86\ix86_legacy_types.h" > + + @@ -3028,6 +3028,10 @@ RelativePath="..\..\x86\ix86\implement\test.h" > + + diff --git a/pcsx2/x86/iR3000A.cpp b/pcsx2/x86/iR3000A.cpp index 846ab01a3e..2faa34ba69 100644 --- a/pcsx2/x86/iR3000A.cpp +++ b/pcsx2/x86/iR3000A.cpp @@ -935,6 +935,8 @@ void psxRecompileNextInstruction(int delayslot) #ifdef _DEBUG static void printfn() { + extern void iDumpPsxRegisters(u32 startpc, u32 temp); + static int lastrec = 0; static int curcount = 0; const int skip = 0; @@ -962,6 +964,8 @@ void iopRecRecompile(u32 startpc) u32 willbranch3 = 0; #ifdef _DEBUG + extern void iDumpPsxRegisters(u32 startpc, u32 temp); + if( psxdump & 4 ) iDumpPsxRegisters(startpc, 0); #endif diff --git a/pcsx2/x86/ix86-32/iR5900Move.cpp b/pcsx2/x86/ix86-32/iR5900Move.cpp index d61ade09ab..0d1878e26b 100644 --- a/pcsx2/x86/ix86-32/iR5900Move.cpp +++ b/pcsx2/x86/ix86-32/iR5900Move.cpp @@ -316,7 +316,7 @@ void recMFHILO1(int hi) if( reghi >= 0 ) { if( regd >= 0 ) { - SSEX_MOVHLPS_XMM_to_XMM(regd, reghi); + SSE_MOVHLPS_XMM_to_XMM(regd, reghi); xmmregs[regd].mode |= MODE_WRITE; } else { diff --git a/pcsx2/x86/ix86/implement/bittest.h b/pcsx2/x86/ix86/implement/bittest.h index 7fcdfb5027..a3829a66af 100644 --- a/pcsx2/x86/ix86/implement/bittest.h +++ b/pcsx2/x86/ix86/implement/bittest.h @@ -32,152 +32,39 @@ enum G8Type G8Type_BTC, }; -////////////////////////////////////////////////////////////////////////////////////////// -// Notes: Bit Test instructions are valid on 16/32 bit operands only. -// -template< G8Type InstType, typename ImmType > -class Group8Impl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - -public: - Group8Impl() {} // For the love of GCC. - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& bitbase, const xRegister& bitoffset ) - { - prefix16(); - xWrite( 0x0f ); - xWrite( 0xa3 | (InstType << 2) ); - ModRM_Direct( bitoffset.Id, bitbase.Id ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( void* bitbase, const xRegister& bitoffset ) - { - prefix16(); - xWrite( 0x0f ); - xWrite( 0xa3 | (InstType << 2) ); - xWriteDisp( bitoffset.Id, bitbase ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& bitbase, const xRegister& bitoffset ) - { - prefix16(); - xWrite( 0x0f ); - xWrite( 0xa3 | (InstType << 2) ); - EmitSibMagic( bitoffset.Id, bitbase ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& bitbase, u8 immoffset ) - { - prefix16(); - xWrite( 0xba0f ); - ModRM_Direct( InstType, bitbase.Id ); - xWrite( immoffset ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibStrict& bitbase, u8 immoffset ) - { - prefix16(); - xWrite( 0xba0f ); - EmitSibMagic( InstType, bitbase ); - xWrite( immoffset ); - } -}; - -// ------------------------------------------------------------------- -// -template< G8Type InstType > -class Group8ImplAll -{ -protected: - typedef Group8Impl m_32; - typedef Group8Impl m_16; - -public: - __forceinline void operator()( const xRegister32& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __forceinline void operator()( const xRegister16& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - __forceinline void operator()( void* bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __forceinline void operator()( void* bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - __noinline void operator()( const ModSibBase& bitbase, const xRegister32& bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __noinline void operator()( const ModSibBase& bitbase, const xRegister16& bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - - __noinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - __noinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - void operator()( const xRegister& bitbase, u8 bitoffset ) const { m_32::Emit( bitbase, bitoffset ); } - void operator()( const xRegister& bitbase, u8 bitoffset ) const { m_16::Emit( bitbase, bitoffset ); } - - Group8ImplAll() {} -}; - - ////////////////////////////////////////////////////////////////////////////////////////// // BSF / BSR -- 16/32 operands supported only. // -template< bool isReverse, typename ImmType > +// 0xbc [fwd] / 0xbd [rev] +// +template< u16 Opcode > class BitScanImpl { -protected: - static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - static void emitbase() - { - prefix16(); - xWrite( 0x0f ); - xWrite( isReverse ? 0xbd : 0xbc ); - } - -public: - BitScanImpl() {} // For the love of GCC. - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const xRegister& from ) - { - emitbase(); - ModRM_Direct( to.Id, from.Id ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const void* src ) - { - emitbase(); - xWriteDisp( to.Id, src ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const ModSibBase& sibsrc ) - { - emitbase(); - EmitSibMagic( to.Id, sibsrc ); - } -}; - - -// ------------------------------------------------------------------- -// BSF/BSR -- 16 and 32 bit operand forms only! -// -template< bool isReverse > -class BitScanImplAll -{ -protected: - typedef BitScanImpl m_32; - typedef BitScanImpl m_16; - public: - __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { m_32::Emit( to, from ); } - __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { m_16::Emit( to, from ); } - __forceinline void operator()( const xRegister32& to, const void* src ) const { m_32::Emit( to, src ); } - __forceinline void operator()( const xRegister16& to, const void* src ) const { m_16::Emit( to, src ); } - __noinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( to, sibsrc ); } - __noinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( to, sibsrc ); } + BitScanImpl() {} - BitScanImplAll() {} + __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); } + __forceinline void operator()( const xRegister32& to, const void* src ) const { xOpWrite0F( Opcode, to, src ); } + __forceinline void operator()( const xRegister16& to, const void* src ) const { xOpWrite0F( 0x66, Opcode, to, src ); } + __forceinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { xOpWrite0F( Opcode, to, sibsrc ); } + __forceinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { xOpWrite0F( 0x66, Opcode, to, sibsrc ); } +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Bit Test Instructions - Valid on 16/32 bit instructions only. +// +template< G8Type InstType > +class Group8Impl : public BitScanImpl<0xa3 | (InstType << 2)> +{ +public: + using BitScanImpl<0xa3 | (InstType << 2)>::operator(); + + __forceinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite( bitoffset ); } + __forceinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite( bitoffset ); } + void operator()( const xRegister& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite( bitoffset ); } + void operator()( const xRegister& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite( bitoffset ); } + + Group8Impl() {} }; diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 8b38f35b04..424ec212dc 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -167,9 +167,9 @@ class xImpl_G1Compare : xImpl_Group1< G1Type_CMP > protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } - __noinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } Woot() {} }; diff --git a/pcsx2/x86/ix86/implement/xchg.h b/pcsx2/x86/ix86/implement/xchg.h new file mode 100644 index 0000000000..2128728458 --- /dev/null +++ b/pcsx2/x86/ix86/implement/xchg.h @@ -0,0 +1,22 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +// This header file is intended to be the future home of xchg, cmpxchg, xadd, and +// other threading-related exchange instructions. diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h index dae4a0c0c3..0b25bd827d 100644 --- a/pcsx2/x86/ix86/implement/xmm/arithmetic.h +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -28,19 +28,25 @@ class _SimdShiftHelper public: _SimdShiftHelper() {} - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode1, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( 0x66, Opcode1, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( 0x66, Opcode1, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( 0x66, Opcode1, to, from ); } - __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { writeXMMop( Opcode1, to, from ); } - __forceinline void operator()( const xRegisterMMX& to, const void* from ) const { writeXMMop( Opcode1, to, from ); } - __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { writeXMMop( Opcode1, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode1, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from ) const { xOpWrite0F( Opcode1, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode1, to, from ); } - template< typename OperandType > - __emitinline void operator()( const xRegisterSIMD& to, u8 imm8 ) const + __emitinline void operator()( const xRegisterSSE& to, u8 imm8 ) const { - SimdPrefix( (sizeof( OperandType ) == 16) ? 0x66 : 0, OpcodeImm ); + SimdPrefix( 0x66, OpcodeImm ); + ModRM( 3, (int)Modcode, to.Id ); + xWrite( imm8 ); + } + + __emitinline void operator()( const xRegisterMMX& to, u8 imm8 ) const + { + SimdPrefix( 0x00, OpcodeImm ); ModRM( 3, (int)Modcode, to.Id ); xWrite( imm8 ); } @@ -68,11 +74,11 @@ class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ public: const _SimdShiftHelper Q; - void DQ( const xRegisterSSE& to, u8 imm ) const + void DQ( const xRegisterSSE& to, u8 imm8 ) const { SimdPrefix( 0x66, 0x73 ); ModRM( 3, (int)Modcode+1, to.Id ); - xWrite( imm ); + xWrite( imm8 ); } SimdImpl_Shift() {} @@ -156,8 +162,8 @@ template< u16 OpcodeSSE > class SimdImpl_Sqrt : public SimdImpl_rSqrt { public: - const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; SimdImpl_Sqrt() {} + const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -165,9 +171,9 @@ public: class SimdImpl_AndNot { public: + SimdImpl_AndNot() {} const SimdImpl_DestRegSSE<0x00,0x55> PS; const SimdImpl_DestRegSSE<0x66,0x55> PD; - SimdImpl_AndNot() {} }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -281,4 +287,88 @@ public: // * Adds the double-precision floating-point values in the high and low quadwords of // *src* stores the result in the high quadword of dest. const SimdImpl_DestRegSSE<0x66, 0x7c> PD; -}; \ No newline at end of file +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// DotProduct calculation (SSE4.1 only!) +// +class SimdImpl_DotProduct +{ +public: + SimdImpl_DotProduct() {} + + // [SSE-4.1] Conditionally multiplies the packed single precision floating-point + // values in dest with the packed single-precision floats in src depending on a + // mask extracted from the high 4 bits of the immediate byte. If a condition mask + // bit in Imm8[7:4] is zero, the corresponding multiplication is replaced by a value + // of 0.0. The four resulting single-precision values are summed into an inter- + // mediate result. + // + // The intermediate result is conditionally broadcasted to the destination using a + // broadcast mask specified by bits [3:0] of the immediate byte. If a broadcast + // mask bit is 1, the intermediate result is copied to the corresponding dword + // element in dest. If a broadcast mask bit is zero, the corresponding element in + // the destination is set to zero. + // + SimdImpl_DestRegImmSSE<0x66,0x403a> PS; + + // [SSE-4.1] + SimdImpl_DestRegImmSSE<0x66,0x413a> PD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Rounds floating point values (packed or single scalar) by an arbitrary rounding mode. +// (SSE4.1 only!) +class SimdImpl_Round +{ +public: + SimdImpl_Round() {} + + // [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest. + // + // Imm8 specifies control fields for the rounding operation: + // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) + // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. + // Bits 1:0 - Specifies a rounding mode for this instruction only. + // + // Rounding Mode Reference: + // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. + // + const SimdImpl_DestRegImmSSE<0x66,0x083a> PS; + + // [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest. + // + // Imm8 specifies control fields for the rounding operation: + // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) + // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. + // Bits 1:0 - Specifies a rounding mode for this instruction only. + // + // Rounding Mode Reference: + // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. + // + const SimdImpl_DestRegImmSSE<0x66,0x093a> PD; + + // [SSE-4.1] Rounds the single-precision src value and stores in dest. + // + // Imm8 specifies control fields for the rounding operation: + // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) + // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. + // Bits 1:0 - Specifies a rounding mode for this instruction only. + // + // Rounding Mode Reference: + // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. + // + const SimdImpl_DestRegImmSSE<0x66,0x0a3a> SS; + + // [SSE-4.1] Rounds the double-precision src value and stores in dest. + // + // Imm8 specifies control fields for the rounding operation: + // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact) + // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8. + // Bits 1:0 - Specifies a rounding mode for this instruction only. + // + // Rounding Mode Reference: + // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate. + // + const SimdImpl_DestRegImmSSE<0x66,0x0b3a> SD; +}; diff --git a/pcsx2/x86/ix86/implement/xmm/basehelpers.h b/pcsx2/x86/ix86/implement/xmm/basehelpers.h index 7094322b3d..0cce6b2369 100644 --- a/pcsx2/x86/ix86/implement/xmm/basehelpers.h +++ b/pcsx2/x86/ix86/implement/xmm/basehelpers.h @@ -23,57 +23,106 @@ extern void SimdPrefix( u8 prefix, u16 opcode ); -// ------------------------------------------------------------------------ -// xmm emitter helpers for xmm instruction with prefixes. -// These functions also support deducing the use of the prefix from the template parameters, -// since most xmm instructions use a prefix and most mmx instructions do not. (some mov -// instructions violate this "guideline.") -// -template< typename T, typename T2 > -__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) +extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib ); +extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data ); +extern void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib ); +extern void xOpWrite0F( u16 opcode, int instId, const void* data ); + +template< typename T2 > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, int instId, const xRegister& from ) { - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - ModRM_Direct( to.Id, from.Id ); + SimdPrefix( prefix, opcode ); + ModRM_Direct( instId, from.Id ); } -template< typename T > -__noinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) +template< typename T2 > __emitinline +void xOpWrite0F( u16 opcode, int instId, const xRegister& from ) { - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - EmitSibMagic( reg.Id, sib ); + xOpWrite0F( 0, opcode, instId, from ); } -template< typename T > -__emitinline void writeXMMop( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) +template< typename T, typename T2 > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) { - SimdPrefix( (forcePrefix || (sizeof( T ) == 16)) ? prefix : 0, opcode ); - xWriteDisp( reg.Id, data ); + xOpWrite0F( prefix, opcode, to.Id, from ); +} + +template< typename T > __noinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) +{ + xOpWrite0F( prefix, opcode, reg.Id, sib ); +} + +template< typename T > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) +{ + xOpWrite0F( prefix, opcode, reg.Id, data ); } // ------------------------------------------------------------------------ -// xmm emitter helpers for xmm instructions *without* prefixes. -// These are normally used for special instructions that have MMX forms only (non-SSE), however -// some special forms of sse/xmm mov instructions also use them due to prefixing inconsistencies. // -template< typename T, typename T2 > -__emitinline void writeXMMop( u16 opcode, const xRegister& to, const xRegister& from ) +template< typename T, typename T2 > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, u8 imm8 ) { - SimdPrefix( 0, opcode ); - ModRM_Direct( to.Id, from.Id ); + xOpWrite0F( prefix, opcode, to, from ); + xWrite( imm8 ); } -template< typename T > -__noinline void writeXMMop( u16 opcode, const xRegister& reg, const ModSibBase& sib ) +template< typename T > __noinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, u8 imm8 ) { - SimdPrefix( 0, opcode ); - EmitSibMagic( reg.Id, sib ); + xOpWrite0F( prefix, opcode, reg, sib ); + xWrite( imm8 ); } -template< typename T > -__emitinline void writeXMMop( u16 opcode, const xRegister& reg, const void* data ) +template< typename T > __emitinline +void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const void* data, u8 imm8 ) { - SimdPrefix( 0, opcode ); - xWriteDisp( reg.Id, data ); + xOpWrite0F( prefix, opcode, reg, data ); + xWrite( imm8 ); +} + +// ------------------------------------------------------------------------ + +template< typename T, typename T2 > __emitinline +void xOpWrite0F( u16 opcode, const xRegister& to, const xRegister& from ) +{ + xOpWrite0F( 0, opcode, to, from ); +} + +template< typename T > __noinline +void xOpWrite0F( u16 opcode, const xRegister& reg, const ModSibBase& sib ) +{ + xOpWrite0F( 0, opcode, reg, sib ); +} + +template< typename T > __emitinline +void xOpWrite0F( u16 opcode, const xRegister& reg, const void* data ) +{ + xOpWrite0F( 0, opcode, reg, data ); +} + +// ------------------------------------------------------------------------ + +template< typename T, typename T2 > __emitinline +void xOpWrite0F( u16 opcode, const xRegister& to, const xRegister& from, u8 imm8 ) +{ + xOpWrite0F( opcode, to, from ); + xWrite( imm8 ); +} + +template< typename T > __noinline +void xOpWrite0F( u16 opcode, const xRegister& reg, const ModSibBase& sib, u8 imm8 ) +{ + xOpWrite0F( opcode, reg, sib ); + xWrite( imm8 ); +} + +template< typename T > __emitinline +void xOpWrite0F( u16 opcode, const xRegister& reg, const void* data, u8 imm8 ) +{ + xOpWrite0F( opcode, reg, data ); + xWrite( imm8 ); } // ------------------------------------------------------------------------ @@ -84,9 +133,9 @@ template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegSSE { public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } SimdImpl_DestRegSSE() {} //GCWho? }; @@ -99,9 +148,9 @@ template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegImmSSE { public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Prefix, Opcode, to, from, imm ); } SimdImpl_DestRegImmSSE() {} //GCWho? }; @@ -110,9 +159,9 @@ template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegImmMMX { public: - __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } - __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { writeXMMop( Prefix, Opcode, to, from ); xWrite( imm ); } + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from, u8 imm ) const { xOpWrite0F( Opcode, to, from, imm ); } SimdImpl_DestRegImmMMX() {} //GCWho? }; @@ -125,27 +174,33 @@ template< u8 Prefix, u16 Opcode > class SimdImpl_DestRegEither { public: - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const xRegisterSIMD& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - template< typename T > __forceinline - void operator()( const xRegisterSIMD& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + + __forceinline void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const void* from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void operator()( const xRegisterMMX& to, const ModSibBase& from ) const { xOpWrite0F( Opcode, to, from ); } SimdImpl_DestRegEither() {} //GCWho? }; // ------------------------------------------------------------------------ -// For implementing MMX/SSE operations which the destination *must* be a register, but the source -// can be regDirect or ModRM (indirect). +// For implementing MMX/SSE operations where the destination *must* be a register, but the +// source can be Direct or Indirect (ModRM/SibSB). The SrcOperandType template parameter +// is used to enforce type strictness of the (void*) parameter and ModSib<> parameter, so +// that the programmer must be explicit in specifying desired operand size. +// +// IMPORTANT: This helper assumes the prefix opcode is written *always* -- regardless of +// MMX or XMM register status. // template< u8 Prefix, u16 Opcode, typename DestRegType, typename SrcRegType, typename SrcOperandType > class SimdImpl_DestRegStrict { public: - __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } - __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { writeXMMop( Prefix, Opcode, to, from, true ); } + __forceinline void operator()( const DestRegType& to, const SrcRegType& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const DestRegType& to, const SrcOperandType* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const DestRegType& to, const ModSibStrict& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } SimdImpl_DestRegStrict() {} //GCWho? }; diff --git a/pcsx2/x86/ix86/implement/xmm/comparisons.h b/pcsx2/x86/ix86/implement/xmm/comparisons.h index c71b53d3d5..a7e3197038 100644 --- a/pcsx2/x86/ix86/implement/xmm/comparisons.h +++ b/pcsx2/x86/ix86/implement/xmm/comparisons.h @@ -41,9 +41,9 @@ class SimdImpl_Compare protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } Woot() {} }; @@ -128,4 +128,3 @@ public: // packed min/max values in dest. (SSE operands only) const SimdImpl_DestRegSSE<0x66,((Opcode2+3)<<8)|0x38> UD; }; - diff --git a/pcsx2/x86/ix86/implement/xmm/moremovs.h b/pcsx2/x86/ix86/implement/xmm/moremovs.h index 7fcd7a1d8d..93fc620799 100644 --- a/pcsx2/x86/ix86/implement/xmm/moremovs.h +++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h @@ -30,10 +30,10 @@ protected: struct Woot { Woot() {} - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, Opcode+1, from, to ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, Opcode, to, from ); } + __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, Opcode+1, from, to ); } }; public: @@ -51,26 +51,104 @@ template< u16 Opcode > class MovhlImpl_RtoR { public: - __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( Opcode, to, from ); } - __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { writeXMMop( 0x66, Opcode, to, from ); } + __forceinline void PS( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void PD( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); } MovhlImpl_RtoR() {} //GCC. }; -// ------------------------------------------------------------------------ -template< u8 Prefix, u16 Opcode, u16 OpcodeAlt > -class MovapsImplAll +////////////////////////////////////////////////////////////////////////////////////////// +// Legends in their own right: MOVAPS / MOVAPD / MOVUPS / MOVUPD +// +// All implementations of Unaligned Movs will, when possible, use aligned movs instead. +// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement +// which can be checked for alignment at runtime. +// +template< u8 Prefix, bool isAligned > +class SimdImpl_MoveSSE { + static const u16 OpcodeA = 0x28; // Aligned [aps] form + static const u16 OpcodeU = 0x10; // unaligned [ups] form + public: - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { if( to != from ) writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { writeXMMop( Prefix, Opcode, to, from ); } - __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { writeXMMop( Prefix, OpcodeAlt, from, to ); } - - MovapsImplAll() {} //GCC. + SimdImpl_MoveSSE() {} //GCC. + + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const + { + if( to != from ) xOpWrite0F( Prefix, OpcodeA, to, from ); + } + + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const + { + xOpWrite0F( Prefix, (isAligned || ((uptr)from & 0x0f) == 0) ? OpcodeA : OpcodeU, to, from ); + } + + __forceinline void operator()( void* to, const xRegisterSSE& from ) const + { + xOpWrite0F( Prefix, (isAligned || ((uptr)to & 0x0f) == 0) ? OpcodeA+1 : OpcodeU+1, from, to ); + } + + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const + { + // ModSib form is aligned if it's displacement-only and the displacement is aligned: + bool isReallyAligned = isAligned || ( ((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty() ); + xOpWrite0F( Prefix, isReallyAligned ? OpcodeA : OpcodeU, to, from ); + } + + __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const + { + // ModSib form is aligned if it's displacement-only and the displacement is aligned: + bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() ); + xOpWrite0F( Prefix, isReallyAligned ? OpcodeA+1 : OpcodeU+1, from, to ); + } }; +////////////////////////////////////////////////////////////////////////////////////////// +// Implementations for MOVDQA / MOVDQU +// +template< u8 Prefix, bool isAligned > +class SimdImpl_MoveDQ +{ + static const u8 PrefixA = 0x66; // Aligned [aps] form + static const u8 PrefixU = 0xf3; // unaligned [ups] form + + static const u16 Opcode = 0x6f; + static const u16 Opcode_Alt = 0x7f; // alternate ModRM encoding (reverse src/dst) + +public: + SimdImpl_MoveDQ() {} //GCC. + + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const + { + if( to != from ) xOpWrite0F( PrefixA, Opcode, to, from ); + } + + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const + { + xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode, to, from ); + } + + __forceinline void operator()( const void* to, const xRegisterSSE& from ) const + { + xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode_Alt, to, from ); + } + + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const + { + // ModSib form is aligned if it's displacement-only and the displacement is aligned: + bool isReallyAligned = isAligned || ( (from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty() ); + xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode, to, from ); + } + + __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const + { + // ModSib form is aligned if it's displacement-only and the displacement is aligned: + bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() ); + xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, to, from ); + } +}; + + ////////////////////////////////////////////////////////////////////////////////////////// // template< u8 AltPrefix, u16 OpcodeSSE > @@ -83,12 +161,79 @@ public: }; ////////////////////////////////////////////////////////////////////////////////////////// +// Blend - Conditional copying of values in src into dest. // class SimdImpl_Blend { +public: + // [SSE-4.1] Conditionally copies dword values from src to dest, depending on the + // mask bits in the immediate operand (bits [3:0]). Each mask bit corresponds to a + // dword element in a 128-bit operand. + // + // If a mask bit is 1, then the corresponding dword in the source operand is copied + // to dest, else the dword element in dest is left unchanged. + // SimdImpl_DestRegImmSSE<0x66,0x0c3a> PS; + + // [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the + // mask bits in the immediate operand (bits [1:0]). Each mask bit corresponds to a + // quadword element in a 128-bit operand. + // + // If a mask bit is 1, then the corresponding dword in the source operand is copied + // to dest, else the dword element in dest is left unchanged. + // SimdImpl_DestRegImmSSE<0x66,0x0d3a> PD; - SimdImpl_DestRegImmSSE<0x66,0x1438> VPS; - SimdImpl_DestRegImmSSE<0x66,0x1538> VPD; -}; \ No newline at end of file + // [SSE-4.1] Conditionally copies dword values from src to dest, depending on the + // mask (bits [3:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds + // to a dword element in the 128-bit operand. + // + // If a mask bit is 1, then the corresponding dword in the source operand is copied + // to dest, else the dword element in dest is left unchanged. + // + SimdImpl_DestRegSSE<0x66,0x1438> VPS; + + // [SSE-4.1] Conditionally copies quadword values from src to dest, depending on the + // mask (bits [1:0]) in XMM0 (yes, the fixed register). Each mask bit corresponds + // to a quadword element in the 128-bit operand. + // + // If a mask bit is 1, then the corresponding dword in the source operand is copied + // to dest, else the dword element in dest is left unchanged. + // + SimdImpl_DestRegSSE<0x66,0x1538> VPD; +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// Packed Move with Sign or Zero extension. +// +template< bool SignExtend > +class SimdImpl_PMove +{ + static const u16 OpcodeBase = SignExtend ? 0x2038 : 0x3038; + +public: + // [SSE-4.1] Zero/Sign-extend the low byte values in src into word integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase,xRegisterSSE,xRegisterSSE,u64> BW; + + // [SSE-4.1] Zero/Sign-extend the low byte values in src into dword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x100,xRegisterSSE,xRegisterSSE,u32> BD; + + // [SSE-4.1] Zero/Sign-extend the low byte values in src into qword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x200,xRegisterSSE,xRegisterSSE,u16> BQ; + + // [SSE-4.1] Zero/Sign-extend the low word values in src into dword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x300,xRegisterSSE,xRegisterSSE,u64> WD; + + // [SSE-4.1] Zero/Sign-extend the low word values in src into qword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x400,xRegisterSSE,xRegisterSSE,u32> WQ; + + // [SSE-4.1] Zero/Sign-extend the low dword values in src into qword integers + // and store them in dest. + SimdImpl_DestRegStrict<0x66,OpcodeBase+0x500,xRegisterSSE,xRegisterSSE,u64> DQ; +}; + diff --git a/pcsx2/x86/ix86/implement/xmm/shufflepack.h b/pcsx2/x86/ix86/implement/xmm/shufflepack.h index f4056c9369..ee306e51d4 100644 --- a/pcsx2/x86/ix86/implement/xmm/shufflepack.h +++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h @@ -26,9 +26,9 @@ class SimdImpl_Shuffle protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { writeXMMop( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } Woot() {} }; @@ -182,20 +182,17 @@ protected: __forceinline void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 ); } __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 ); } __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 ); } }; @@ -203,28 +200,13 @@ public: SimdImpl_PInsert() {} // Operation can be performed on either MMX or SSE src operands. - template< typename T > - __forceinline void W( const xRegisterSIMD& to, const xRegister32& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0xc4, to, from ); - xWrite( imm8 ); - } + __forceinline void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); } + __forceinline void W( const xRegisterSSE& to, const void* from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); } + __forceinline void W( const xRegisterSSE& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc4, to, from, imm8 ); } - // Operation can be performed on either MMX or SSE src operands. - template< typename T > - __forceinline void W( const xRegisterSIMD& to, const void* from, u8 imm8 ) const - { - writeXMMop( 0x66, 0xc4, to, from ); - xWrite( imm8 ); - } - - // Operation can be performed on either MMX or SSE src operands. - template< typename T > - __forceinline void W( const xRegisterSIMD& to, const ModSibBase& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0xc4, to, from ); - xWrite( imm8 ); - } + __forceinline void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); } + __forceinline void W( const xRegisterMMX& to, const void* from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); } + __forceinline void W( const xRegisterMMX& to, const ModSibBase& from, u8 imm8 ) const { xOpWrite0F( 0xc4, to, from, imm8 ); } // [SSE-4.1] const ByteDwordForms<0x20> B; @@ -250,20 +232,17 @@ protected: __forceinline void operator()( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, to, from ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, to, from, imm8 ); } __forceinline void operator()( void* dest, const xRegisterSSE& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 ); } __forceinline void operator()( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { - writeXMMop( 0x66, (Opcode<<8) | 0x3a, from, dest ); - xWrite( imm8 ); + xOpWrite0F( 0x66, (Opcode<<8) | 0x3a, from, dest, imm8 ); } }; @@ -276,24 +255,11 @@ public: // // [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension! // - template< typename T > - __forceinline void W( const xRegister32& to, const xRegisterSIMD& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0xc5, to, from, true ); - xWrite( imm8 ); - } + __forceinline void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0xc5, to, from, imm8 ); } + __forceinline void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const { xOpWrite0F( 0xc5, to, from, imm8 ); } - __forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0x153a, from, dest ); - xWrite( imm8 ); - } - - __forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const - { - writeXMMop( 0x66, 0x153a, from, dest ); - xWrite( imm8 ); - } + __forceinline void W( void* dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); } + __forceinline void W( const ModSibBase& dest, const xRegisterSSE& from, u8 imm8 ) const { xOpWrite0F( 0x66, 0x153a, from, dest, imm8 ); } // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits // of dest are zero-extended (cleared). This can be used to extract any single packed diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 0453ffc8e1..4d272fd154 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -160,8 +160,41 @@ namespace Internal { xWriteDisp( regfield, (s32)address ); } + + ////////////////////////////////////////////////////////////////////////////////////////// + // emitter helpers for xmm instruction with prefixes, most of which are using + // the basic opcode format (items inside braces denote optional or conditional + // emission): + // + // [Prefix] / 0x0f / [OpcodePrefix] / Opcode / ModRM+[SibSB] + // + // Prefixes are typically 0x66, 0xf2, or 0xf3. OpcodePrefixes are either 0x38 or + // 0x3a [and other value will result in assertion failue]. + // + __emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib ) + { + SimdPrefix( prefix, opcode ); + EmitSibMagic( instId, sib ); + } - // ------------------------------------------------------------------------ + __emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data ) + { + SimdPrefix( prefix, opcode ); + xWriteDisp( instId, data ); + } + + __emitinline void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib ) + { + xOpWrite0F( 0, opcode, instId, sib ); + } + + __emitinline void xOpWrite0F( u16 opcode, int instId, const void* data ) + { + xOpWrite0F( 0, opcode, instId, data ); + } + + + ////////////////////////////////////////////////////////////////////////////////////////// // returns TRUE if this instruction requires SIB to be encoded, or FALSE if the // instruction ca be encoded as ModRm alone. static __forceinline bool NeedsSibMagic( const ModSibBase& info ) @@ -288,13 +321,13 @@ const MovExtendImplAll xMOVSX; const DwordShiftImplAll xSHLD; const DwordShiftImplAll xSHRD; -const Group8ImplAll xBT; -const Group8ImplAll xBTR; -const Group8ImplAll xBTS; -const Group8ImplAll xBTC; +const Group8Impl xBT; +const Group8Impl xBTR; +const Group8Impl xBTS; +const Group8Impl xBTC; -const BitScanImplAll xBSF; -const BitScanImplAll xBSR; +const BitScanImpl<0xbc> xBSF; +const BitScanImpl<0xbd> xBSR; // ------------------------------------------------------------------------ const CMovImplGeneric xCMOV; @@ -635,320 +668,4 @@ __emitinline void xBSWAP( const xRegister32& to ) write8( 0xC8 | to.Id ); } - -////////////////////////////////////////////////////////////////////////////////////////// -// MMX / XMM Instructions -// (these will get put in their own file later) - -// ------------------------------------------------------------------------ -// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is -// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4 -// instructions). Any other lower value assumes the upper value is 0 and ignored. -// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will -// generate an assertion. -// -__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) -{ - const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a); - - // If the lower byte is not a valid previx and the upper byte is non-zero it - // means we made a mistake! - if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 ); - - if( prefix != 0 ) - { - if( is16BitOpcode ) - xWrite( (opcode<<16) | 0x0f00 | prefix ); - else - { - xWrite( 0x0f00 | prefix ); - xWrite( opcode ); - } - } - else - { - if( is16BitOpcode ) - { - xWrite( 0x0f ); - xWrite( opcode ); - } - else - xWrite( (opcode<<8) | 0x0f ); - } -} - -// [SSE-3] -const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; -// [SSE-3] -const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; - -const MovapsImplAll< 0, 0x28, 0x29 > xMOVAPS; -const MovapsImplAll< 0, 0x10, 0x11 > xMOVUPS; -const MovapsImplAll< 0x66, 0x28, 0x29 > xMOVAPD; -const MovapsImplAll< 0x66, 0x10, 0x11 > xMOVUPD; - -#ifdef ALWAYS_USE_MOVAPS -const MovapsImplAll< 0x66, 0x6f, 0x7f > xMOVDQA; -const MovapsImplAll< 0xf3, 0x6f, 0x7f > xMOVDQU; -#else -const MovapsImplAll< 0, 0x28, 0x29 > xMOVDQA; -const MovapsImplAll< 0, 0x10, 0x11 > xMOVDQU; -#endif - -const MovhlImplAll<0x16> xMOVH; -const MovhlImplAll<0x12> xMOVL; -const MovhlImpl_RtoR<0x16> xMOVLH; -const MovhlImpl_RtoR<0x12> xMOVHL; - -const SimdImpl_DestRegEither<0x66,0xdb> xPAND; -const SimdImpl_DestRegEither<0x66,0xdf> xPANDN; -const SimdImpl_DestRegEither<0x66,0xeb> xPOR; -const SimdImpl_DestRegEither<0x66,0xef> xPXOR; - -const SimdImpl_AndNot xANDN; - -const SimdImpl_UcomI<0x66,0x2e> xUCOMI; -const SimdImpl_rSqrt<0x53> xRCP; -const SimdImpl_rSqrt<0x52> xRSQRT; -const SimdImpl_Sqrt<0x51> xSQRT; - -const SimdImpl_MinMax<0x5f> xMAX; -const SimdImpl_MinMax<0x5d> xMIN; -const SimdImpl_Shuffle<0xc6> xSHUF; - -// ------------------------------------------------------------------------ - -const SimdImpl_Compare xCMPEQ; -const SimdImpl_Compare xCMPLT; -const SimdImpl_Compare xCMPLE; -const SimdImpl_Compare xCMPUNORD; -const SimdImpl_Compare xCMPNE; -const SimdImpl_Compare xCMPNLT; -const SimdImpl_Compare xCMPNLE; -const SimdImpl_Compare xCMPORD; - -// ------------------------------------------------------------------------ -// SSE Conversion Operations, as looney as they are. -// -// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing -// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). -// -const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; -const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; - -const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; -const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; -const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; - -const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; -const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; - -const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; -const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; -const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; - -const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; -const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; -const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; -const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; - -const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; -const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; - -const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; -const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; -const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; -const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; - -const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; -const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; - -// ------------------------------------------------------------------------ - -const SimdImpl_Shift<0xd0, 2> xPSRL; -const SimdImpl_Shift<0xf0, 6> xPSLL; -const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; - -const SimdImpl_AddSub<0xdc, 0xd4> xPADD; -const SimdImpl_AddSub<0xd8, 0xfb> xPSUB; -const SimdImpl_PMinMax<0xde,0x3c> xPMAX; -const SimdImpl_PMinMax<0xda,0x38> xPMIN; - -const SimdImpl_PMul xPMUL; -const SimdImpl_PCompare xPCMP; -const SimdImpl_PShuffle xPSHUF; -const SimdImpl_PUnpack xPUNPCK; -const SimdImpl_Unpack xUNPCK; -const SimdImpl_Pack xPACK; - -const SimdImpl_PAbsolute xPABS; -const SimdImpl_PSign xPSIGN; -const SimdImpl_PInsert xPINSR; -const SimdImpl_PExtract xPEXTR; -const SimdImpl_PMultAdd xPMADD; -const SimdImpl_HorizAdd xHADD; - - -////////////////////////////////////////////////////////////////////////////////////////// -// - -__emitinline void xEMMS() -{ - xWrite( 0x770F ); -} - -// Store Streaming SIMD Extension Control/Status to Mem32. -__emitinline void xSTMXCSR( u32* dest ) -{ - SimdPrefix( 0, 0xae ); - xWriteDisp( 3, dest ); -} - -// Load Streaming SIMD Extension Control/Status from Mem32. -__emitinline void xLDMXCSR( const u32* src ) -{ - SimdPrefix( 0, 0xae ); - xWriteDisp( 2, src ); -} - - -// Moves from XMM to XMM, with the *upper 64 bits* of the destination register -// being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { writeXMMop( 0xf3, 0x7e, to, from ); } - -// Moves from XMM to XMM, with the *upper 64 bits* of the destination register -// being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { writeXMMop( 0xf3, 0x7e, to, src ); } - -// Moves from XMM to XMM, with the *upper 64 bits* of the destination register -// being cleared to zero. -__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { writeXMMop( 0xf3, 0x7e, to, src ); } - -// Moves lower quad of XMM to ptr64 (no bits are cleared) -__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } -// Moves lower quad of XMM to ptr64 (no bits are cleared) -__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xd6, from, dest ); } - -__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) writeXMMop( 0x6f, to, from ); } -__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { writeXMMop( 0x6f, to, src ); } -__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { writeXMMop( 0x6f, to, src ); } -__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } -__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { writeXMMop( 0x7f, from, dest ); } - -// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ' -__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { writeXMMop( 0xf3, 0xd6, to, from ); } - -// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q' -__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) -{ - // Manual implementation of this form of MOVQ, since its parameters are unique in a way - // that breaks the template inference of writeXMMop(); - - SimdPrefix( 0xf2, 0xd6 ); - ModRM_Direct( to.Id, from.Id ); -} - -////////////////////////////////////////////////////////////////////////////////////////// -// - -#define IMPLEMENT_xMOVS( ssd, prefix ) \ - __forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { writeXMMop( prefix, 0x10, to, from ); } \ - __forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } \ - __forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( prefix, 0x11, from, to ); } - -IMPLEMENT_xMOVS( SS, 0xf3 ) -IMPLEMENT_xMOVS( SD, 0xf2 ) - -////////////////////////////////////////////////////////////////////////////////////////// -// Non-temporal movs only support a register as a target (ie, load form only, no stores) -// - -__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from ) -{ - xWrite( 0x2A380f66 ); - xWriteDisp( to.Id, from ); -} - -__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ) -{ - xWrite( 0x2A380f66 ); - EmitSibMagic( to.Id, from ); -} - -__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } -__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0xe7, from, to ); } - -__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } -__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x66, 0x2b, from, to ); } -__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } -__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { writeXMMop( 0x2b, from, to ); } - -__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } -__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { writeXMMop( 0xe7, from, to ); } - -__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x50, to, from ); } -__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { writeXMMop( 0x66, 0x50, to, from, true ); } - -////////////////////////////////////////////////////////////////////////////////////////// -// INSERTPS / EXTRACTPS [SSE4.1 only!] -// -// [TODO] these might be served better as classes, especially if other instructions use -// the M32,sse,imm form (I forget offhand if any do). - - -// [SSE-4.1] Insert a single-precision floating-point value from src into a specified -// location in dest, and selectively zero out the data elements in dest according to -// the mask field in the immediate byte. The source operand can be a memory location -// (32 bits) or an XMM register (lower 32 bits used). -// -// Imm8 provides three fields: -// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if -// the source is a memory operand. -// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest. -// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written -// with 0.0 if set to 1. -// -__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x213a, to, from ); - xWrite( imm8 ); -} - -__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x213a, to, from ); - xWrite( imm8 ); -} - -__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x213a, to, from ); - xWrite( imm8 ); -} - -// [SSE-4.1] Extract a single-precision floating-point value from src at an offset -// determined by imm8[1-0]*32. The extracted single precision floating-point value -// is stored into the low 32-bits of dest (or at a 32-bit memory pointer). -// -__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x173a, to, from, true ); - xWrite( imm8 ); -} - -__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x173a, from, dest, true ); - xWrite( imm8 ); -} - -__emitinline void xEXTRACTPS( const ModSibStrict& dest, const xRegisterSSE& from, u8 imm8 ) -{ - writeXMMop( 0x66, 0x173a, from, dest, true ); - xWrite( imm8 ); -} - - } diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 65ce383144..32d210edbc 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -86,16 +86,16 @@ namespace x86Emitter extern const Internal::DwordShiftImplAll xSHLD; extern const Internal::DwordShiftImplAll xSHRD; - extern const Internal::Group8ImplAll xBT; - extern const Internal::Group8ImplAll xBTR; - extern const Internal::Group8ImplAll xBTS; - extern const Internal::Group8ImplAll xBTC; + extern const Internal::Group8Impl xBT; + extern const Internal::Group8Impl xBTR; + extern const Internal::Group8Impl xBTS; + extern const Internal::Group8Impl xBTC; extern const Internal::JmpCallImplAll xJMP; extern const Internal::JmpCallImplAll xCALL; - extern const Internal::BitScanImplAll xBSF; - extern const Internal::BitScanImplAll xBSR; + extern const Internal::BitScanImpl<0xbc> xBSF; + extern const Internal::BitScanImpl<0xbd> xBSR; // ------------------------------------------------------------------------ extern const Internal::CMovImplGeneric xCMOV; @@ -299,95 +299,28 @@ namespace x86Emitter typedef xForwardJPO xForwardJPO8; typedef xForwardJPO xForwardJPO32; - ////////////////////////////////////////////////////////////////////////////////////////// - // MMX Mov Instructions (MOVD, MOVQ, MOVSS). - // - // Notes: - // * Some of the functions have been renamed to more clearly reflect what they actually - // do. Namely we've affixed "ZX" to several MOVs that take a register as a destination - // since that's what they do (MOVD clears upper 32/96 bits, etc). - // - - // ------------------------------------------------------------------------ - // MOVD has valid forms for MMX and XMM registers. - // - template< typename T > - __emitinline void xMOVDZX( const xRegisterSIMD& to, const xRegister32& from ) - { - Internal::writeXMMop( 0x66, 0x6e, to, from ); - } - - template< typename T > - __emitinline void xMOVDZX( const xRegisterSIMD& to, const void* src ) - { - Internal::writeXMMop( 0x66, 0x6e, to, src ); - } - - template< typename T > - void xMOVDZX( const xRegisterSIMD& to, const ModSibBase& src ) - { - Internal::writeXMMop( 0x66, 0x6e, to, src ); - } - - template< typename T > - __emitinline void xMOVD( const xRegister32& to, const xRegisterSIMD& from ) - { - Internal::writeXMMop( 0x66, 0x7e, from, to ); - } - - template< typename T > - __emitinline void xMOVD( void* dest, const xRegisterSIMD& from ) - { - Internal::writeXMMop( 0x66, 0x7e, from, dest ); - } - - template< typename T > - void xMOVD( const ModSibBase& dest, const xRegisterSIMD& from ) - { - Internal::writeXMMop( 0x66, 0x7e, from, dest ); - } - - - // ------------------------------------------------------------------------ - - // xMASKMOV: - // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. - // The default memory location is specified by DS:EDI. The most significant bit in each byte - // of the mask operand determines whether the corresponding byte in the source operand is - // written to the corresponding byte location in memory. - - template< typename T > - static __forceinline void xMASKMOV( const xRegisterSIMD& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xf7, to, from ); } - - // xPMOVMSKB: - // Creates a mask made up of the most significant bit of each byte of the source - // operand and stores the result in the low byte or word of the destination operand. - // Upper bits of the destination are cleared to zero. - // - // When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on - // 128-bit (SSE) source, the byte mask is 16-bits. - // - template< typename T > - static __forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSIMD& from ) { Internal::writeXMMop( 0x66, 0xd7, to, from ); } - - // [sSSE-3] Concatenates dest and source operands into an intermediate composite, - // shifts the composite at byte granularity to the right by a constant immediate, - // and extracts the right-aligned result into the destination. - // - template< typename T > - static __forceinline void xPALIGNR( const xRegisterSIMD& to, const xRegisterSIMD& from, u8 imm8 ) - { - Internal::writeXMMop( 0x66, 0x0f3a, to, from ); - xWrite( imm8 ); - } - - // ------------------------------------------------------------------------ extern void xEMMS(); extern void xSTMXCSR( u32* dest ); extern void xLDMXCSR( const u32* src ); + extern void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ); + extern void xMOVDZX( const xRegisterSSE& to, const void* src ); + extern void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src ); + + extern void xMOVDZX( const xRegisterMMX& to, const xRegister32& from ); + extern void xMOVDZX( const xRegisterMMX& to, const void* src ); + extern void xMOVDZX( const xRegisterMMX& to, const ModSibBase& src ); + + extern void xMOVD( const xRegister32& to, const xRegisterSSE& from ); + extern void xMOVD( void* dest, const xRegisterSSE& from ); + extern void xMOVD( const ModSibBase& dest, const xRegisterSSE& from ); + + extern void xMOVD( const xRegister32& to, const xRegisterMMX& from ); + extern void xMOVD( void* dest, const xRegisterMMX& from ); + extern void xMOVD( const ModSibBase& dest, const xRegisterMMX& from ); + extern void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ); extern void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ); extern void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ); @@ -430,6 +363,39 @@ namespace x86Emitter extern void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from ); extern void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from ); + extern void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from ); + extern void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ); + extern void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from ); + extern void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from ); + extern void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ); + extern void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ); + + // ------------------------------------------------------------------------ + + extern const Internal::SimdImpl_MoveSSE<0x00,true> xMOVAPS; + extern const Internal::SimdImpl_MoveSSE<0x00,false> xMOVUPS; + +#ifdef ALWAYS_USE_MOVAPS + extern const Internal::SimdImpl_MoveSSE<0,true> xMOVDQA; + extern const Internal::SimdImpl_MoveSSE<0,false> xMOVDQU; + extern const Internal::SimdImpl_MoveSSE<0,true> xMOVAPD; + extern const Internal::SimdImpl_MoveSSE<0,false> xMOVUPD; +#else + extern const Internal::SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA; + extern const Internal::SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU; + extern const Internal::SimdImpl_MoveSSE<0x66,true> xMOVAPD; + extern const Internal::SimdImpl_MoveSSE<0x66,false> xMOVUPD; +#endif + + extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH; + extern const Internal::MovhlImpl_RtoR<0x12> xMOVHL; + + extern const Internal::MovhlImplAll<0x16> xMOVH; + extern const Internal::MovhlImplAll<0x12> xMOVL; + + extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; + extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; + extern void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ); extern void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ); extern void xINSERTPS( const xRegisterSSE& to, const ModSibStrict& from, u8 imm8 ); @@ -438,38 +404,13 @@ namespace x86Emitter extern void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ); extern void xEXTRACTPS( const ModSibStrict& dest, const xRegisterSSE& from, u8 imm8 ); - // ------------------------------------------------------------------------ - - extern const Internal::SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; - extern const Internal::SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; - - extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVAPS; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVUPS; - - extern const Internal::MovapsImplAll<0x66, 0x28, 0x29> xMOVAPD; - extern const Internal::MovapsImplAll<0x66, 0x10, 0x11> xMOVUPD; - -#ifdef ALWAYS_USE_MOVAPS - extern const Internal::MovapsImplAll<0x66, 0x6f, 0x7f> xMOVDQA; - extern const Internal::MovapsImplAll<0xf3, 0x6f, 0x7f> xMOVDQU; -#else - extern const Internal::MovapsImplAll<0, 0x28, 0x29> xMOVDQA; - extern const Internal::MovapsImplAll<0, 0x10, 0x11> xMOVDQU; -#endif - - extern const Internal::MovhlImpl_RtoR<0x16> xMOVLH; - extern const Internal::MovhlImpl_RtoR<0x12> xMOVHL; - - extern const Internal::MovhlImplAll<0x16> xMOVH; - extern const Internal::MovhlImplAll<0x12> xMOVL; - // ------------------------------------------------------------------------ extern const Internal::SimdImpl_DestRegEither<0x66,0xdb> xPAND; extern const Internal::SimdImpl_DestRegEither<0x66,0xdf> xPANDN; extern const Internal::SimdImpl_DestRegEither<0x66,0xeb> xPOR; extern const Internal::SimdImpl_DestRegEither<0x66,0xef> xPXOR; - + extern const Internal::SimdImpl_AndNot xANDN; extern const Internal::SimdImpl_UcomI<0x66,0x2e> xUCOMI; @@ -482,6 +423,8 @@ namespace x86Emitter extern const Internal::SimdImpl_Shuffle<0xc6> xSHUF; // ------------------------------------------------------------------------ + + extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; extern const Internal::SimdImpl_Compare xCMPEQ; extern const Internal::SimdImpl_Compare xCMPLT; @@ -527,8 +470,8 @@ namespace x86Emitter // ------------------------------------------------------------------------ - extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL; extern const Internal::SimdImpl_Shift<0xf0, 6> xPSLL; + extern const Internal::SimdImpl_Shift<0xd0, 2> xPSRL; extern const Internal::SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; extern const Internal::SimdImpl_AddSub<0xdc, 0xd4> xPADD; @@ -550,5 +493,12 @@ namespace x86Emitter extern const Internal::SimdImpl_PMultAdd xPMADD; extern const Internal::SimdImpl_HorizAdd xHADD; + extern const Internal::SimdImpl_Blend xBLEND; + extern const Internal::SimdImpl_DotProduct xDP; + extern const Internal::SimdImpl_Round xROUND; + + extern const Internal::SimdImpl_PMove xPMOVSX; + extern const Internal::SimdImpl_PMove xPMOVZX; + } diff --git a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp b/pcsx2/x86/ix86/ix86_legacy_mmx.cpp deleted file mode 100644 index d8bdb0b8a2..0000000000 --- a/pcsx2/x86/ix86/ix86_legacy_mmx.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#include "PrecompiledHeader.h" -#include "ix86_legacy_internal.h" - -//------------------------------------------------------------------ -// MMX instructions -// -// note: r64 = mm -//------------------------------------------------------------------ - -using namespace x86Emitter; - -emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); } -emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); } -emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); } -emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } -emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } - -emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); } -emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); } -emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { xMOVDZX( xRegisterMMX(to), xRegister32(from) ); } -emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVDZX( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } -emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); } -emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } - -emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); } -emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); } - -#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \ - emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \ - emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { xP##mod( xRegisterMMX(to), (void*)from ); } \ - emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); } - -#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \ - emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ - emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ - emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } - -#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \ - emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ - emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ - emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \ - emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ - emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \ - emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); } - -#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \ - DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \ - DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \ - DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \ - emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } - -DEFINE_LEGACY_LOGIC_OPCODE( AND ) -DEFINE_LEGACY_LOGIC_OPCODE( ANDN ) -DEFINE_LEGACY_LOGIC_OPCODE( OR ) -DEFINE_LEGACY_LOGIC_OPCODE( XOR ) - -DEFINE_LEGACY_SHIFT_OPCODE( SLL ) -DEFINE_LEGACY_SHIFT_OPCODE( SRL ) -DEFINE_LEGACY_SHIFT_STUFF( SRA, D ) -DEFINE_LEGACY_SHIFT_STUFF( SRA, W ) - -DEFINE_LEGACY_ARITHMETIC( ADD, B ) -DEFINE_LEGACY_ARITHMETIC( ADD, W ) -DEFINE_LEGACY_ARITHMETIC( ADD, D ) -DEFINE_LEGACY_ARITHMETIC( ADD, Q ) -DEFINE_LEGACY_ARITHMETIC( ADD, SB ) -DEFINE_LEGACY_ARITHMETIC( ADD, SW ) -DEFINE_LEGACY_ARITHMETIC( ADD, USB ) -DEFINE_LEGACY_ARITHMETIC( ADD, USW ) - -DEFINE_LEGACY_ARITHMETIC( SUB, B ) -DEFINE_LEGACY_ARITHMETIC( SUB, W ) -DEFINE_LEGACY_ARITHMETIC( SUB, D ) -DEFINE_LEGACY_ARITHMETIC( SUB, Q ) -DEFINE_LEGACY_ARITHMETIC( SUB, SB ) -DEFINE_LEGACY_ARITHMETIC( SUB, SW ) -DEFINE_LEGACY_ARITHMETIC( SUB, USB ) -DEFINE_LEGACY_ARITHMETIC( SUB, USW ) - -DEFINE_LEGACY_ARITHMETIC( CMP, EQB ); -DEFINE_LEGACY_ARITHMETIC( CMP, EQW ); -DEFINE_LEGACY_ARITHMETIC( CMP, EQD ); -DEFINE_LEGACY_ARITHMETIC( CMP, GTB ); -DEFINE_LEGACY_ARITHMETIC( CMP, GTW ); -DEFINE_LEGACY_ARITHMETIC( CMP, GTD ); - -DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ ); -DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ ); -DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW ); -DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW ); - -DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD ); -DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD ); - - -emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); } -emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); } - -emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); } -emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); } - -emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); } - -emitterT void EMMS() { xEMMS(); } diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index d2845f2a09..169f6ac100 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -22,35 +22,109 @@ using namespace x86Emitter; +// ------------------------------------------------------------------------ +// MMX / SSE Mixed Bag +// ------------------------------------------------------------------------ -//------------------------------------------------------------------ -// SSE instructions -//------------------------------------------------------------------ +emitterT void MOVQMtoR( x86MMXRegType to, uptr from ) { xMOVQ( xRegisterMMX(to), (void*)from ); } +emitterT void MOVQRtoM( uptr to, x86MMXRegType from ) { xMOVQ( (void*)to, xRegisterMMX(from) ); } +emitterT void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ) { xMOVQ( xRegisterMMX(to), xRegisterMMX(from) ); } +emitterT void MOVQRmtoR( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVQ( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } +emitterT void MOVQRtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVQ( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } -#define SSEMtoR( code, overb ) \ - assert( to < iREGCNT_XMM ), \ - RexR(0, to), \ - write16( code ), \ - ModRM( 0, to, DISP32 ), \ - write32( MEMADDR(from, 4 + overb) ) +emitterT void MOVDMtoMMX( x86MMXRegType to, uptr from ) { xMOVDZX( xRegisterMMX(to), (void*)from ); } +emitterT void MOVDMMXtoM( uptr to, x86MMXRegType from ) { xMOVD( (void*)to, xRegisterMMX(from) ); } +emitterT void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ) { xMOVDZX( xRegisterMMX(to), xRegister32(from) ); } +emitterT void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from, int offset ) { xMOVDZX( xRegisterMMX(to), ptr[xAddressReg(from)+offset] ); } +emitterT void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ) { xMOVD( xRegister32(to), xRegisterMMX(from) ); } +emitterT void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from, int offset ) { xMOVD( ptr[xAddressReg(to)+offset], xRegisterMMX(from) ); } -#define SSERtoR( code ) \ - assert( to < iREGCNT_XMM && from < iREGCNT_XMM), \ - RexRB(0, to, from), \ - write16( code ), \ - ModRM( 3, to, from ) +emitterT void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from) { xPMOVMSKB( xRegister32(to), xRegisterMMX(from) ); } +emitterT void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from) { xMASKMOV( xRegisterMMX(to), xRegisterMMX(from) ); } -#define SSEMtoR66( code ) \ - write8( 0x66 ), \ - SSEMtoR( code, 0 ) +#define DEFINE_LEGACY_LOGIC_OPCODE( mod ) \ + emitterT void P##mod##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod( xRegisterMMX(to), xRegisterMMX(from) ); } \ + emitterT void P##mod##MtoR( x86MMXRegType to, uptr from ) { xP##mod( xRegisterMMX(to), (void*)from ); } \ + emitterT void SSE2_P##mod##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod( xRegisterSSE(to), (void*)from ); } -#define SSERtoM66( code ) \ - write8( 0x66 ), \ - SSERtoM( code, 0 ) +#define DEFINE_LEGACY_ARITHMETIC( mod, sub ) \ + emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ + emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ + emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } -#define SSERtoR66( code ) \ - write8( 0x66 ), \ - SSERtoR( code ) +#define DEFINE_LEGACY_SHIFT_STUFF( mod, sub ) \ + emitterT void P##mod##sub##RtoR( x86MMXRegType to, x86MMXRegType from ) { xP##mod.sub( xRegisterMMX(to), xRegisterMMX(from) ); } \ + emitterT void P##mod##sub##MtoR( x86MMXRegType to, uptr from ) { xP##mod.sub( xRegisterMMX(to), (void*)from ); } \ + emitterT void P##mod##sub##ItoR( x86MMXRegType to, u8 imm ) { xP##mod.sub( xRegisterMMX(to), imm ); } \ + emitterT void SSE2_P##mod##sub##_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xP##mod.sub( xRegisterSSE(to), xRegisterSSE(from) ); } \ + emitterT void SSE2_P##mod##sub##_M128_to_XMM( x86SSERegType to, uptr from ) { xP##mod.sub( xRegisterSSE(to), (void*)from ); } \ + emitterT void SSE2_P##mod##sub##_I8_to_XMM( x86SSERegType to, u8 imm ) { xP##mod.sub( xRegisterSSE(to), imm ); } + +#define DEFINE_LEGACY_SHIFT_OPCODE( mod ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, Q ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, D ) \ + DEFINE_LEGACY_SHIFT_STUFF( mod, W ) \ + emitterT void SSE2_P##mod##DQ_I8_to_XMM( x86MMXRegType to, u8 imm ) { xP##mod.DQ( xRegisterSSE(to), imm ); } + +DEFINE_LEGACY_LOGIC_OPCODE( AND ) +DEFINE_LEGACY_LOGIC_OPCODE( ANDN ) +DEFINE_LEGACY_LOGIC_OPCODE( OR ) +DEFINE_LEGACY_LOGIC_OPCODE( XOR ) + +DEFINE_LEGACY_SHIFT_OPCODE( SLL ) +DEFINE_LEGACY_SHIFT_OPCODE( SRL ) +DEFINE_LEGACY_SHIFT_STUFF( SRA, D ) +DEFINE_LEGACY_SHIFT_STUFF( SRA, W ) + +DEFINE_LEGACY_ARITHMETIC( ADD, B ) +DEFINE_LEGACY_ARITHMETIC( ADD, W ) +DEFINE_LEGACY_ARITHMETIC( ADD, D ) +DEFINE_LEGACY_ARITHMETIC( ADD, Q ) +DEFINE_LEGACY_ARITHMETIC( ADD, SB ) +DEFINE_LEGACY_ARITHMETIC( ADD, SW ) +DEFINE_LEGACY_ARITHMETIC( ADD, USB ) +DEFINE_LEGACY_ARITHMETIC( ADD, USW ) + +DEFINE_LEGACY_ARITHMETIC( SUB, B ) +DEFINE_LEGACY_ARITHMETIC( SUB, W ) +DEFINE_LEGACY_ARITHMETIC( SUB, D ) +DEFINE_LEGACY_ARITHMETIC( SUB, Q ) +DEFINE_LEGACY_ARITHMETIC( SUB, SB ) +DEFINE_LEGACY_ARITHMETIC( SUB, SW ) +DEFINE_LEGACY_ARITHMETIC( SUB, USB ) +DEFINE_LEGACY_ARITHMETIC( SUB, USW ) + +DEFINE_LEGACY_ARITHMETIC( CMP, EQB ); +DEFINE_LEGACY_ARITHMETIC( CMP, EQW ); +DEFINE_LEGACY_ARITHMETIC( CMP, EQD ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTB ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTW ); +DEFINE_LEGACY_ARITHMETIC( CMP, GTD ); + +DEFINE_LEGACY_ARITHMETIC( UNPCK, HDQ ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, LDQ ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, HBW ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, LBW ); + +DEFINE_LEGACY_ARITHMETIC( UNPCK, LWD ); +DEFINE_LEGACY_ARITHMETIC( UNPCK, HWD ); + + +emitterT void PMULUDQMtoR( x86MMXRegType to, uptr from ) { xPMUL.UDQ( xRegisterMMX( to ), (void*)from ); } +emitterT void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ) { xPMUL.UDQ( xRegisterMMX( to ), xRegisterMMX( from ) ); } + +emitterT void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), xRegisterMMX(from), imm8 ); } +emitterT void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8) { xPSHUF.W( xRegisterMMX(to), (void*)from, imm8 ); } + +emitterT void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ) { xPINSR.W( xRegisterMMX(to), xRegister32(from), imm8 ); } + +emitterT void EMMS() { xEMMS(); } + +// ------------------------------------------------------------------------ +// Begin SSE-Only Part! +// ------------------------------------------------------------------------ #define DEFINE_LEGACY_MOV_OPCODE( mod, sse ) \ emitterT void sse##_MOV##mod##_M128_to_XMM( x86SSERegType to, uptr from ) { xMOV##mod( xRegisterSSE(to), (void*)from ); } \ @@ -290,73 +364,17 @@ emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 im emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xINSERTPS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { xEXTRACTPS( xRegister32(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xDP.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) { xDP.PS( xRegisterSSE(to), (void*)from, imm8 ); } + +emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) { xBLEND.PS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } +emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xBLEND.VPS( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) { xBLEND.VPS( xRegisterSSE(to), (void*)from ); } + +emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMOVSX.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } -////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////// - - -// SSE4.1 - -emitterT void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - write24(0x403A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) -{ - write8(0x66); - write24(0x403A0F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); - write8(imm8); -} - -emitterT void SSE4_BLENDPS_XMM_to_XMM(x86IntRegType to, x86SSERegType from, u8 imm8) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x0C3A0F); - ModRM(3, to, from); - write8(imm8); -} - -emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x14380F); - ModRM(3, to, from); -} - -emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) -{ - write8(0x66); - RexR(0, to); - write24(0x14380F); - ModRM(0, to, DISP32); - write32(MEMADDR(from, 4)); -} - -emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x25380F); - ModRM(3, to, from); -} - -emitterT void SSE4_PMOVZXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) -{ - write8(0x66); - RexRB(0, to, from); - write24(0x35380F); - ModRM(3, to, from); -} ////////////////////////////////////////////////////////////////////////////////////////// // SSE-X Helpers (generates either INT or FLOAT versions of certain SSE instructions) diff --git a/pcsx2/x86/ix86/ix86_simd.cpp b/pcsx2/x86/ix86/ix86_simd.cpp new file mode 100644 index 0000000000..42754cce18 --- /dev/null +++ b/pcsx2/x86/ix86/ix86_simd.cpp @@ -0,0 +1,388 @@ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "PrecompiledHeader.h" + +#include "System.h" +#include "ix86_internal.h" + +namespace x86Emitter { + +using namespace Internal; + +// ------------------------------------------------------------------------ +// SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is +// treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4 +// instructions). Any other lower value assumes the upper value is 0 and ignored. +// Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will +// generate an assertion. +// +__emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) +{ + const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a); + + // If the lower byte is not a valid previx and the upper byte is non-zero it + // means we made a mistake! + if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 ); + + if( prefix != 0 ) + { + if( is16BitOpcode ) + xWrite( (opcode<<16) | 0x0f00 | prefix ); + else + { + xWrite( 0x0f00 | prefix ); + xWrite( opcode ); + } + } + else + { + if( is16BitOpcode ) + { + xWrite( 0x0f ); + xWrite( opcode ); + } + else + xWrite( (opcode<<8) | 0x0f ); + } +} + +// [SSE-3] +const SimdImpl_DestRegSSE<0xf3,0x12> xMOVSLDUP; +// [SSE-3] +const SimdImpl_DestRegSSE<0xf3,0x16> xMOVSHDUP; + +const SimdImpl_MoveSSE<0x00,true> xMOVAPS; + +// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead. +// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement +// which can be checked for alignment at runtime. +const SimdImpl_MoveSSE<0x00,false> xMOVUPS; + +#ifdef ALWAYS_USE_MOVAPS +const SimdImpl_MoveSSE<0,true> xMOVDQA; +const SimdImpl_MoveSSE<0,true> xMOVAPD; + +// Note: All implementations of Unaligned Movs will, when possible, use aligned movs instead. +// This happens when using Mem,Reg or Reg,Mem forms where the address is simple displacement +// which can be checked for alignment at runtime. +const SimdImpl_MoveSSE<0,false> xMOVDQU; +const SimdImpl_MoveSSE<0,false> xMOVUPD; +#else +const SimdImpl_MoveDQ<0x66, 0x6f, 0x7f> xMOVDQA; +const SimdImpl_MoveDQ<0xf3, 0x6f, 0x7f> xMOVDQU; +const SimdImpl_MoveSSE<0x66,true> xMOVAPD; +const SimdImpl_MoveSSE<0x66,false> xMOVUPD; +#endif + +const MovhlImplAll<0x16> xMOVH; +const MovhlImplAll<0x12> xMOVL; +const MovhlImpl_RtoR<0x16> xMOVLH; +const MovhlImpl_RtoR<0x12> xMOVHL; + +const SimdImpl_DestRegEither<0x66,0xdb> xPAND; +const SimdImpl_DestRegEither<0x66,0xdf> xPANDN; +const SimdImpl_DestRegEither<0x66,0xeb> xPOR; +const SimdImpl_DestRegEither<0x66,0xef> xPXOR; + +const SimdImpl_AndNot xANDN; + +const SimdImpl_UcomI<0x66,0x2e> xUCOMI; +const SimdImpl_rSqrt<0x53> xRCP; +const SimdImpl_rSqrt<0x52> xRSQRT; +const SimdImpl_Sqrt<0x51> xSQRT; + +const SimdImpl_MinMax<0x5f> xMAX; +const SimdImpl_MinMax<0x5d> xMIN; +const SimdImpl_Shuffle<0xc6> xSHUF; + +// ------------------------------------------------------------------------ + +// [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag +// only if all bits in the result are 0. PTEST also sets the CF flag according +// to the following condition: (xmm2/m128 AND NOT xmm1) == 0; +extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; + +const SimdImpl_Compare xCMPEQ; +const SimdImpl_Compare xCMPLT; +const SimdImpl_Compare xCMPLE; +const SimdImpl_Compare xCMPUNORD; +const SimdImpl_Compare xCMPNE; +const SimdImpl_Compare xCMPNLT; +const SimdImpl_Compare xCMPNLE; +const SimdImpl_Compare xCMPORD; + +// ------------------------------------------------------------------------ +// SSE Conversion Operations, as looney as they are. +// +// These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing +// nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). +// +const SimdImpl_DestRegStrict<0xf3,0xe6,xRegisterSSE,xRegisterSSE,u64> xCVTDQ2PD; +const SimdImpl_DestRegStrict<0x00,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTDQ2PS; + +const SimdImpl_DestRegStrict<0xf2,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTPD2DQ; +const SimdImpl_DestRegStrict<0x66,0x2d,xRegisterMMX,xRegisterSSE,u128> xCVTPD2PI; +const SimdImpl_DestRegStrict<0x66,0x5a,xRegisterSSE,xRegisterSSE,u128> xCVTPD2PS; + +const SimdImpl_DestRegStrict<0x66,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PD; +const SimdImpl_DestRegStrict<0x00,0x2a,xRegisterSSE,xRegisterMMX,u64> xCVTPI2PS; + +const SimdImpl_DestRegStrict<0x66,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTPS2DQ; +const SimdImpl_DestRegStrict<0x00,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTPS2PD; +const SimdImpl_DestRegStrict<0x00,0x2d,xRegisterMMX,xRegisterSSE,u64> xCVTPS2PI; + +const SimdImpl_DestRegStrict<0xf2,0x2d,xRegister32, xRegisterSSE,u64> xCVTSD2SI; +const SimdImpl_DestRegStrict<0xf2,0x5a,xRegisterSSE,xRegisterSSE,u64> xCVTSD2SS; +const SimdImpl_DestRegStrict<0xf2,0x2a,xRegisterMMX,xRegister32, u32> xCVTSI2SD; +const SimdImpl_DestRegStrict<0xf3,0x2a,xRegisterSSE,xRegister32, u32> xCVTSI2SS; + +const SimdImpl_DestRegStrict<0xf3,0x5a,xRegisterSSE,xRegisterSSE,u32> xCVTSS2SD; +const SimdImpl_DestRegStrict<0xf3,0x2d,xRegister32, xRegisterSSE,u32> xCVTSS2SI; + +const SimdImpl_DestRegStrict<0x66,0xe6,xRegisterSSE,xRegisterSSE,u128> xCVTTPD2DQ; +const SimdImpl_DestRegStrict<0x66,0x2c,xRegisterMMX,xRegisterSSE,u128> xCVTTPD2PI; +const SimdImpl_DestRegStrict<0xf3,0x5b,xRegisterSSE,xRegisterSSE,u128> xCVTTPS2DQ; +const SimdImpl_DestRegStrict<0x00,0x2c,xRegisterMMX,xRegisterSSE,u64> xCVTTPS2PI; + +const SimdImpl_DestRegStrict<0xf2,0x2c,xRegister32, xRegisterSSE,u64> xCVTTSD2SI; +const SimdImpl_DestRegStrict<0xf3,0x2c,xRegister32, xRegisterSSE,u32> xCVTTSS2SI; + +// ------------------------------------------------------------------------ + +const SimdImpl_Shift<0xd0, 2> xPSRL; +const SimdImpl_Shift<0xf0, 6> xPSLL; +const SimdImpl_ShiftWithoutQ<0xe0, 4> xPSRA; + +const SimdImpl_AddSub<0xdc, 0xd4> xPADD; +const SimdImpl_AddSub<0xd8, 0xfb> xPSUB; +const SimdImpl_PMinMax<0xde,0x3c> xPMAX; +const SimdImpl_PMinMax<0xda,0x38> xPMIN; + +const SimdImpl_PMul xPMUL; +const SimdImpl_PCompare xPCMP; +const SimdImpl_PShuffle xPSHUF; +const SimdImpl_PUnpack xPUNPCK; +const SimdImpl_Unpack xUNPCK; +const SimdImpl_Pack xPACK; + +const SimdImpl_PAbsolute xPABS; +const SimdImpl_PSign xPSIGN; +const SimdImpl_PInsert xPINSR; +const SimdImpl_PExtract xPEXTR; +const SimdImpl_PMultAdd xPMADD; +const SimdImpl_HorizAdd xHADD; + +const SimdImpl_Blend xBLEND; +const SimdImpl_DotProduct xDP; +const SimdImpl_Round xROUND; + +const SimdImpl_PMove xPMOVSX; +const SimdImpl_PMove xPMOVZX; + + +////////////////////////////////////////////////////////////////////////////////////////// +// + +__emitinline void xEMMS() +{ + xWrite( 0x770F ); +} + +// Store Streaming SIMD Extension Control/Status to Mem32. +__emitinline void xSTMXCSR( u32* dest ) +{ + SimdPrefix( 0, 0xae ); + xWriteDisp( 3, dest ); +} + +// Load Streaming SIMD Extension Control/Status from Mem32. +__emitinline void xLDMXCSR( const u32* src ) +{ + SimdPrefix( 0, 0xae ); + xWriteDisp( 2, src ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// MMX Mov Instructions (MOVD, MOVQ, MOVSS). +// +// Notes: +// * Some of the functions have been renamed to more clearly reflect what they actually +// do. Namely we've affixed "ZX" to several MOVs that take a register as a destination +// since that's what they do (MOVD clears upper 32/96 bits, etc). +// +// * MOVD has valid forms for MMX and XMM registers. +// + +__forceinline void xMOVDZX( const xRegisterSSE& to, const xRegister32& from ) { xOpWrite0F( 0x66, 0x6e, to, from ); } +__forceinline void xMOVDZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0x66, 0x6e, to, src ); } +__forceinline void xMOVDZX( const xRegisterSSE& to, const ModSibBase& src ) { xOpWrite0F( 0x66, 0x6e, to, src ); } + +__forceinline void xMOVDZX( const xRegisterMMX& to, const xRegister32& from ) { xOpWrite0F( 0x6e, to, from ); } +__forceinline void xMOVDZX( const xRegisterMMX& to, const void* src ) { xOpWrite0F( 0x6e, to, src ); } +__forceinline void xMOVDZX( const xRegisterMMX& to, const ModSibBase& src ) { xOpWrite0F( 0x6e, to, src ); } + +__forceinline void xMOVD( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, to ); } +__forceinline void xMOVD( void* dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); } +__forceinline void xMOVD( const ModSibBase& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x7e, from, dest ); } + +__forceinline void xMOVD( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, to ); } +__forceinline void xMOVD( void* dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); } +__forceinline void xMOVD( const ModSibBase& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7e, from, dest ); } + + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__forceinline void xMOVQZX( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0xf3, 0x7e, to, from ); } + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__forceinline void xMOVQZX( const xRegisterSSE& to, const ModSibBase& src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); } + +// Moves from XMM to XMM, with the *upper 64 bits* of the destination register +// being cleared to zero. +__forceinline void xMOVQZX( const xRegisterSSE& to, const void* src ) { xOpWrite0F( 0xf3, 0x7e, to, src ); } + +// Moves lower quad of XMM to ptr64 (no bits are cleared) +__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); } +// Moves lower quad of XMM to ptr64 (no bits are cleared) +__forceinline void xMOVQ( void* dest, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd6, from, dest ); } + +__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterMMX& from ) { if( to != from ) xOpWrite0F( 0x6f, to, from ); } +__forceinline void xMOVQ( const xRegisterMMX& to, const ModSibBase& src ) { xOpWrite0F( 0x6f, to, src ); } +__forceinline void xMOVQ( const xRegisterMMX& to, const void* src ) { xOpWrite0F( 0x6f, to, src ); } +__forceinline void xMOVQ( const ModSibBase& dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); } +__forceinline void xMOVQ( void* dest, const xRegisterMMX& from ) { xOpWrite0F( 0x7f, from, dest ); } + +// This form of xMOVQ is Intel's adeptly named 'MOVQ2DQ' +__forceinline void xMOVQ( const xRegisterSSE& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf3, 0xd6, to, from ); } + +// This form of xMOVQ is Intel's adeptly named 'MOVDQ2Q' +__forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) +{ + // Manual implementation of this form of MOVQ, since its parameters are unique in a way + // that breaks the template inference of writeXMMop(); + + SimdPrefix( 0xf2, 0xd6 ); + ModRM_Direct( to.Id, from.Id ); +} + +////////////////////////////////////////////////////////////////////////////////////////// +// + +#define IMPLEMENT_xMOVS( ssd, prefix ) \ + __forceinline void xMOV##ssd( const xRegisterSSE& to, const xRegisterSSE& from ) { if( to != from ) xOpWrite0F( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const void* from ) { xOpWrite0F( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd##ZX( const xRegisterSSE& to, const ModSibBase& from ) { xOpWrite0F( prefix, 0x10, to, from ); } \ + __forceinline void xMOV##ssd( const void* to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); } \ + __forceinline void xMOV##ssd( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( prefix, 0x11, from, to ); } + +IMPLEMENT_xMOVS( SS, 0xf3 ) +IMPLEMENT_xMOVS( SD, 0xf2 ) + +////////////////////////////////////////////////////////////////////////////////////////// +// Non-temporal movs only support a register as a target (ie, load form only, no stores) +// + +__forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from ) +{ + xWrite( 0x2A380f66 ); + xWriteDisp( to.Id, from ); +} + +__forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ) +{ + xWrite( 0x2A380f66 ); + EmitSibMagic( to.Id, from ); +} + +__forceinline void xMOVNTDQ( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); } +__forceinline void xMOVNTDQA( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xe7, from, to ); } + +__forceinline void xMOVNTPD( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); } +__forceinline void xMOVNTPD( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0x2b, from, to ); } +__forceinline void xMOVNTPS( void* to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); } +__forceinline void xMOVNTPS( const ModSibBase& to, const xRegisterSSE& from ) { xOpWrite0F( 0x2b, from, to ); } + +__forceinline void xMOVNTQ( void* to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); } +__forceinline void xMOVNTQ( const ModSibBase& to, const xRegisterMMX& from ) { xOpWrite0F( 0xe7, from, to ); } + +// ------------------------------------------------------------------------ + +__forceinline void xMOVMSKPS( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x50, to, from ); } +__forceinline void xMOVMSKPD( const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F( 0x66, 0x50, to, from, true ); } + +// xMASKMOV: +// Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. +// The default memory location is specified by DS:EDI. The most significant bit in each byte +// of the mask operand determines whether the corresponding byte in the source operand is +// written to the corresponding byte location in memory. +__forceinline void xMASKMOV( const xRegisterSSE& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xf7, to, from ); } +__forceinline void xMASKMOV( const xRegisterMMX& to, const xRegisterMMX& from ) { xOpWrite0F( 0xf7, to, from ); } + +// xPMOVMSKB: +// Creates a mask made up of the most significant bit of each byte of the source +// operand and stores the result in the low byte or word of the destination operand. +// Upper bits of the destination are cleared to zero. +// +// When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on +// 128-bit (SSE) source, the byte mask is 16-bits. +// +__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterSSE& from ) { xOpWrite0F( 0x66, 0xd7, to, from ); } +__forceinline void xPMOVMSKB( const xRegister32& to, const xRegisterMMX& from ) { xOpWrite0F( 0xd7, to, from ); } + +// [sSSE-3] Concatenates dest and source operands into an intermediate composite, +// shifts the composite at byte granularity to the right by a constant immediate, +// and extracts the right-aligned result into the destination. +// +__forceinline void xPALIGNR( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x0f3a, to, from, imm8 ); } +__forceinline void xPALIGNR( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm8 ) { xOpWrite0F( 0x0f3a, to, from, imm8 ); } + + +////////////////////////////////////////////////////////////////////////////////////////// +// INSERTPS / EXTRACTPS [SSE4.1 only!] +// +// [TODO] these might be served better as classes, especially if other instructions use +// the M32,sse,imm form (I forget offhand if any do). + + +// [SSE-4.1] Insert a single-precision floating-point value from src into a specified +// location in dest, and selectively zero out the data elements in dest according to +// the mask field in the immediate byte. The source operand can be a memory location +// (32 bits) or an XMM register (lower 32 bits used). +// +// Imm8 provides three fields: +// * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if +// the source is a memory operand. +// * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest. +// * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written +// with 0.0 if set to 1. +// +__emitinline void xINSERTPS( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); } +__emitinline void xINSERTPS( const xRegisterSSE& to, const u32* from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); } +__emitinline void xINSERTPS( const xRegisterSSE& to, const ModSibStrict& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x213a, to, from, imm8 ); } + +// [SSE-4.1] Extract a single-precision floating-point value from src at an offset +// determined by imm8[1-0]*32. The extracted single precision floating-point value +// is stored into the low 32-bits of dest (or at a 32-bit memory pointer). +// +__emitinline void xEXTRACTPS( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, to, from, imm8 ); } +__emitinline void xEXTRACTPS( u32* dest, const xRegisterSSE& from, u8 imm8 ) { xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); } +__emitinline void xEXTRACTPS( const ModSibStrict& dest, const xRegisterSSE& from, u8 imm8 ){ xOpWrite0F( 0x66, 0x173a, from, dest, imm8 ); } + +} \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_sse_helpers.h b/pcsx2/x86/ix86/ix86_sse_helpers.h index b198c336b5..f568282d9e 100644 --- a/pcsx2/x86/ix86/ix86_sse_helpers.h +++ b/pcsx2/x86/ix86/ix86_sse_helpers.h @@ -48,4 +48,3 @@ extern void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from ); extern void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); extern void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from ); extern void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -extern void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 7cca341cd3..6ecd400e98 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -252,28 +252,6 @@ namespace x86Emitter } }; - ////////////////////////////////////////////////////////////////////////////////////////// - // - template< typename OperandType > - class xRegisterSIMD : public xRegister - { - public: - static const xRegisterSIMD Empty; // defined as an empty/unused value (-1) - - public: - xRegisterSIMD(): xRegister() {} - xRegisterSIMD( const xRegisterSIMD& src ) : xRegister( src.Id ) {} - xRegisterSIMD( const xRegister& src ) : xRegister( src ) {} - explicit xRegisterSIMD( int regId ) : xRegister( regId ) {} - - xRegisterSIMD& operator=( const xRegisterSIMD& src ) - { - xRegister::Id = src.Id; - return *this; - } - }; - - // ------------------------------------------------------------------------ // Note: GCC parses templates ahead of time apparently as a 'favor' to the programmer, which // means it finds undeclared variables when MSVC does not (Since MSVC compiles templates @@ -282,8 +260,8 @@ namespace x86Emitter // all about the the templated code in haphazard fashion. Yay.. >_< // - typedef xRegisterSIMD xRegisterSSE; - typedef xRegisterSIMD xRegisterMMX; + typedef xRegister xRegisterSSE; + typedef xRegister xRegisterMMX; typedef xRegister xRegister32; typedef xRegister xRegister16; typedef xRegister xRegister8; From 52fcc69628019ca22733fa5e72b2791c174242fd Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 23 Apr 2009 13:15:44 +0000 Subject: [PATCH 132/143] Apply r1046 to ZeroGS DX as well. Ifdef a few things from r1047 so Linux compiles. A few minor changes. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1048 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/COP0.cpp | 40 ++++++++++++++----------- pcsx2/COP2.cpp | 6 ---- pcsx2/MMI.cpp | 14 +++++---- pcsx2/R3000A.h | 12 ++++---- pcsx2/R5900.h | 14 ++++----- pcsx2/x86/ix86/Makefile.am | 6 ++-- pcsx2/x86/ix86/implement/xmm/moremovs.h | 6 ++-- plugins/zerogs/dx/GSmain.cpp | 2 +- 8 files changed, 53 insertions(+), 47 deletions(-) diff --git a/pcsx2/COP0.cpp b/pcsx2/COP0.cpp index 21f8d2c5a4..a59f054525 100644 --- a/pcsx2/COP0.cpp +++ b/pcsx2/COP0.cpp @@ -308,8 +308,8 @@ namespace COP0 { void MFC0() { // Note on _Rd_ Condition 9: CP0.Count should be updated even if _Rt_ is 0. - if( (_Rd_ != 9) && !_Rt_ ) return; - if(_Rd_ != 9) { COP0_LOG("%s", disR5900Current.getCString() ); } + if ((_Rd_ != 9) && !_Rt_ ) return; + if (_Rd_ != 9) { COP0_LOG("%s", disR5900Current.getCString() ); } //if(bExecBIOS == FALSE && _Rd_ == 25) Console::WriteLn("MFC0 _Rd_ %x = %x", params _Rd_, cpuRegs.CP0.r[_Rd_]); switch (_Rd_) @@ -412,33 +412,40 @@ int CPCOND0() { //#define CPCOND0 1 -#define BC0(cond) \ +/*#define BC0(cond) \ if (CPCOND0() cond) { \ intDoBranch(_BranchTarget_); \ - } + }*/ void BC0F() { - BC0(== 0); + if (CPCOND0() == 0) intDoBranch(_BranchTarget_); COP0_LOG( "COP0 > BC0F" ); } void BC0T() { - BC0(== 1); + if (CPCOND0() == 1) intDoBranch(_BranchTarget_); COP0_LOG( "COP0 > BC0T" ); } -#define BC0L(cond) \ +/*#define BC0L(cond) \ if (CPCOND0() cond) { \ intDoBranch(_BranchTarget_); \ - } else cpuRegs.pc+= 4; - + } else cpuRegs.pc+= 4;*/ + void BC0FL() { - BC0L(== 0); + if (CPCOND0() == 0) + intDoBranch(_BranchTarget_); + else + cpuRegs.pc+= 4; + COP0_LOG( "COP0 > BC0FL" ); } void BC0TL() { - BC0L(== 1); + if (CPCOND0() == 1) + intDoBranch(_BranchTarget_); + else + cpuRegs.pc+= 4; COP0_LOG( "COP0 > BCOTL" ); } @@ -487,8 +494,7 @@ void TLBWR() { void TLBP() { int i; - - + union { struct { u32 VPN2:19; @@ -499,13 +505,13 @@ void TLBP() { u32 u; } EntryHi32; - EntryHi32.u=cpuRegs.CP0.n.EntryHi; + EntryHi32.u = cpuRegs.CP0.n.EntryHi; cpuRegs.CP0.n.Index=0xFFFFFFFF; for(i=0;i<48;i++){ - if(tlb[i].VPN2==((~tlb[i].Mask)&(EntryHi32.s.VPN2)) - &&((tlb[i].G&1)||((tlb[i].ASID & 0xff) == EntryHi32.s.ASID))) { - cpuRegs.CP0.n.Index=i; + if (tlb[i].VPN2 == ((~tlb[i].Mask) & (EntryHi32.s.VPN2)) + && ((tlb[i].G&1) || ((tlb[i].ASID & 0xff) == EntryHi32.s.ASID))) { + cpuRegs.CP0.n.Index = i; break; } } diff --git a/pcsx2/COP2.cpp b/pcsx2/COP2.cpp index d57df3915b..e227ea3f42 100644 --- a/pcsx2/COP2.cpp +++ b/pcsx2/COP2.cpp @@ -25,10 +25,6 @@ #include "VUops.h" #include "VUmicro.h" -//namespace R5900 { -//namespace Interpreter { -//namespace OpcodeImpl{ - using namespace R5900; using namespace R5900::Interpreter; @@ -85,5 +81,3 @@ void BC2TL() cpuRegs.pc+= 4; } } - -//}}} diff --git a/pcsx2/MMI.cpp b/pcsx2/MMI.cpp index 49b525d705..448c1bacff 100644 --- a/pcsx2/MMI.cpp +++ b/pcsx2/MMI.cpp @@ -177,11 +177,15 @@ void PLZCW() { _PLZCW (1); } -#define PMFHL_CLAMP(dst, src) \ - if ((int)src > (int)0x00007fff) dst = 0x7fff; \ - else \ - if ((int)src < (int)0xffff8000) dst = 0x8000; \ - else dst = (u16)src; +__forceinline void PMFHL_CLAMP(u16 dst, u16 src) +{ + if ((int)src > (int)0x00007fff) + dst = 0x7fff; + else if ((int)src < (int)0xffff8000) + dst = 0x8000; + else + dst = (u16)src; +} void PMFHL() { if (!_Rd_) return; diff --git a/pcsx2/R3000A.h b/pcsx2/R3000A.h index 9a93b851c1..49aee70828 100644 --- a/pcsx2/R3000A.h +++ b/pcsx2/R3000A.h @@ -126,14 +126,14 @@ extern s32 psxCycleEE; // tracks IOP's current sych status with the EE #ifndef _PC_ -#define _i32(x) (s32)x -#define _u32(x) (u32)x +#define _i32(x) (s32)x //R3000A +#define _u32(x) (u32)x //R3000A -#define _i16(x) (s16)x -#define _u16(x) (u16)x +#define _i16(x) (s16)x // Not used +#define _u16(x) (u16)x // Not used -#define _i8(x) (s8)x -#define _u8(x) (u8)x +#define _i8(x) (s8)x // Not used +#define _u8(x) (u8)x //R3000A - once /**** R3000A Instruction Macros ****/ #define _PC_ psxRegs.pc // The next PC to be executed diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index cb482ee6bf..7dedb6c329 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -121,16 +121,16 @@ union CP0regs { }; struct cpuRegisters { - GPRregs GPR; // GPR regs + GPRregs GPR; // GPR regs // NOTE: don't change order since recompiler uses it GPR_reg HI; GPR_reg LO; // hi & log 128bit wide CP0regs CP0; // is COP0 32bit? u32 sa; // shift amount (32bit), needs to be 16 byte aligned u32 IsDelaySlot; // set true when the current instruction is a delay slot. - u32 pc; // Program counter, when changing offset in struct, check iR5900-X.S to make sure offset is correct - u32 code; // current instruction - PERFregs PERF; + u32 pc; // Program counter, when changing offset in struct, check iR5900-X.S to make sure offset is correct + u32 code; // current instruction + PERFregs PERF; u32 eCycle[32]; u32 sCycle[32]; // for internal counters u32 cycle; // calculate cpucycles.. @@ -180,7 +180,7 @@ struct tlbs #ifndef _PC_ -#define _i64(x) (s64)x +/*#define _i64(x) (s64)x #define _u64(x) (u64)x #define _i32(x) (s32)x @@ -190,12 +190,12 @@ struct tlbs #define _u16(x) (u16)x #define _i8(x) (s8)x -#define _u8(x) (u8)x +#define _u8(x) (u8)x*/ //////////////////////////////////////////////////////////////////// // R5900 Instruction Macros -#define _PC_ cpuRegs.pc // The next PC to be executed +#define _PC_ cpuRegs.pc // The next PC to be executed - only used in this header and R3000A.h #define _Funct_ ((cpuRegs.code ) & 0x3F) // The funct part of the instruction register #define _Rd_ ((cpuRegs.code >> 11) & 0x1F) // The rd part of the instruction register diff --git a/pcsx2/x86/ix86/Makefile.am b/pcsx2/x86/ix86/Makefile.am index bb1c76665b..a85187bb58 100644 --- a/pcsx2/x86/ix86/Makefile.am +++ b/pcsx2/x86/ix86/Makefile.am @@ -2,9 +2,9 @@ INCLUDES = -I@srcdir@/.. -I@srcdir@/../../ -I@srcdir@/../../../common/include noinst_LIBRARIES = libix86.a libix86_a_SOURCES = \ -ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_legacy_mmx.cpp ix86_tools.cpp ix86_3dnow.cpp \ -ix86_legacy.cpp ix86_legacy_sse.cpp \ +ix86.cpp ix86_cpudetect.cpp ix86_fpu.cpp ix86_jmp.cpp ix86_tools.cpp ix86_3dnow.cpp \ +ix86_legacy.cpp ix86_legacy_sse.cpp ix86_simd.cpp \ ix86_internal.h ix86_legacy_instructions.h ix86_macros.h ix86_sse_helpers.h ix86.h ix86_legacy_internal.h \ ix86_instructions.h ix86_legacy_types.h ix86_types.h \ bittest.h dwshift.h group1.h group2.h group3.h incdec.h jmpcall.h movs.h test.h \ -movqss.h arithmetic.h shufflepack.h basehelpers.h comparisons.h moremovs.h \ No newline at end of file +movqss.h arithmetic.h shufflepack.h basehelpers.h comparisons.h moremovs.h xchg.h \ No newline at end of file diff --git a/pcsx2/x86/ix86/implement/xmm/moremovs.h b/pcsx2/x86/ix86/implement/xmm/moremovs.h index 93fc620799..aeb95130a1 100644 --- a/pcsx2/x86/ix86/implement/xmm/moremovs.h +++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h @@ -122,7 +122,7 @@ public: { if( to != from ) xOpWrite0F( PrefixA, Opcode, to, from ); } - +#ifndef __LINUX__ // Ifdef till Jake fixes; you can't use & on a const void*! __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode, to, from ); @@ -132,7 +132,7 @@ public: { xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode_Alt, to, from ); } - +#endif __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { // ModSib form is aligned if it's displacement-only and the displacement is aligned: @@ -140,12 +140,14 @@ public: xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode, to, from ); } +#ifndef __LINUX__ // II'll ifdef this one, too. xOpWrite0F doesn't take ModSibBase & xRegisterSSE in that order. __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { // ModSib form is aligned if it's displacement-only and the displacement is aligned: bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() ); xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, to, from ); } +#endif }; diff --git a/plugins/zerogs/dx/GSmain.cpp b/plugins/zerogs/dx/GSmain.cpp index 9a84caa4ed..0ee9b5ce74 100644 --- a/plugins/zerogs/dx/GSmain.cpp +++ b/plugins/zerogs/dx/GSmain.cpp @@ -842,7 +842,7 @@ void _GSgifTransfer(pathInfo *path, u32 *pMem, u32 size) continue; } - break; + continue; } } From 9c7aed9ad461e77649a877722f8526539926cab3 Mon Sep 17 00:00:00 2001 From: refraction Date: Thu, 23 Apr 2009 19:51:34 +0000 Subject: [PATCH 133/143] Readjusted my Gifsplit nonsense to refix Gradius V videos again, its more the value i originally wanted it now anyway :P Fixed Issue 176 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1049 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 2 +- pcsx2/IPU/IPU.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index e1cf32efae..edb8ee7069 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -28,7 +28,7 @@ using std::min; -#define gifsplit 128 +#define gifsplit 64 enum gifstate_t { GIF_STATE_READY = 0, diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index 16f0cfc994..e87d41ac7e 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -1367,7 +1367,7 @@ int FIFOto_write(u32* pMem, int size) g_nDMATransfer |= IPU_DMA_ACTV1; \ return totalqwc; \ } \ - } \ + } g_nDMATransfer &= ~(IPU_DMA_ACTV1 | IPU_DMA_DOTIE1);\ } extern void gsInterrupt(); @@ -1452,7 +1452,7 @@ int IPU1dma() } } - g_nDMATransfer &= ~(IPU_DMA_ACTV1 | IPU_DMA_DOTIE1); + } if ((ipu1dma->chcr & 0xc) == 0 && ipu1dma->qwc == 0) // Normal Mode From e843712f581342610fd01e92d495c95d4145c200 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Thu, 23 Apr 2009 21:58:09 +0000 Subject: [PATCH 134/143] Reverted the last GIF/IPU change, as per request. Timing this stuff is evil :/ git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1050 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 2 +- pcsx2/IPU/IPU.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index edb8ee7069..e1cf32efae 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -28,7 +28,7 @@ using std::min; -#define gifsplit 64 +#define gifsplit 128 enum gifstate_t { GIF_STATE_READY = 0, diff --git a/pcsx2/IPU/IPU.cpp b/pcsx2/IPU/IPU.cpp index e87d41ac7e..e8f9fa72a6 100644 --- a/pcsx2/IPU/IPU.cpp +++ b/pcsx2/IPU/IPU.cpp @@ -1367,7 +1367,7 @@ int FIFOto_write(u32* pMem, int size) g_nDMATransfer |= IPU_DMA_ACTV1; \ return totalqwc; \ } \ - } g_nDMATransfer &= ~(IPU_DMA_ACTV1 | IPU_DMA_DOTIE1);\ + } \ } extern void gsInterrupt(); @@ -1452,7 +1452,7 @@ int IPU1dma() } } - + g_nDMATransfer &= ~(IPU_DMA_ACTV1 | IPU_DMA_DOTIE1); } if ((ipu1dma->chcr & 0xc) == 0 && ipu1dma->qwc == 0) // Normal Mode From 2ac355eb039aae03e2e12d41754b0f6575e300cd Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Thu, 23 Apr 2009 22:05:34 +0000 Subject: [PATCH 135/143] Nothing happened here. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1051 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index e1cf32efae..edb8ee7069 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -28,7 +28,7 @@ using std::min; -#define gifsplit 128 +#define gifsplit 64 enum gifstate_t { GIF_STATE_READY = 0, From e22a50a7c84c200527d2070e18a643cd7662457c Mon Sep 17 00:00:00 2001 From: gigaherz Date: Fri, 24 Apr 2009 01:14:25 +0000 Subject: [PATCH 136/143] So.. I got an "unlazy" day, and coded a bit more of my iop dma handler. It's disabled in the code, so that it wont' affect normal users. The code isn't working properly yet. Only handlers for cdvd and spu2 are in it: - the cdvd one doesn't work because of the way pcsx2 handles cdvd currently - the spu2 one seems to work, but it's not implemented in the plugin interface, so ATM it's just "faking" it using the old functions. So really nothing worth testing, just commiting to let people know I'm actually working on it... with long pauses in between. xD git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1052 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/CdRom.cpp | 28 +++++++ pcsx2/IopCommon.h | 2 +- pcsx2/IopCounters.cpp | 27 ++++++- pcsx2/IopCounters.h | 8 +- pcsx2/IopDma.cpp | 170 +++++++++++++++++++++++++++++++----------- pcsx2/IopDma.h | 40 ++++++++++ pcsx2/IopHw.cpp | 4 +- pcsx2/IopHw.h | 19 +++++ 8 files changed, 250 insertions(+), 48 deletions(-) diff --git a/pcsx2/CdRom.cpp b/pcsx2/CdRom.cpp index e22160be77..e9b4e6eb00 100644 --- a/pcsx2/CdRom.cpp +++ b/pcsx2/CdRom.cpp @@ -937,6 +937,34 @@ void psxDma3(u32 madr, u32 bcr, u32 chcr) { psxDmaInterrupt(3); } +#ifdef ENABLE_NEW_IOPDMA +s32 cdvdDmaRead(s32 channel, u32* data, u32 wordsLeft, u32* wordsProcessed) +{ + // hacked up from the code above + + if (cdr.Readed == 0) + { + //CDR_LOG("*** DMA 3 *** NOT READY"); + wordsProcessed = 0; + return 10000; + } + + memcpy_fast(data, cdr.pTransfer, wordsLeft); + //psxCpu->Clear(madr, cdsize/4); + cdr.pTransfer+=wordsLeft; + *wordsProcessed = wordsLeft; + + Console::Status("New IOP DMA handled CDVD DMA: channel %d, data %p, remaining %08x, processed %08x.", params channel,data,wordsLeft, *wordsProcessed); + return 0; +} + +void cdvdDmaInterrupt(s32 channel) +{ + cdrInterrupt(); +} + +#endif + void cdrReset() { memzero_obj(cdr); cdr.CurTrack=1; diff --git a/pcsx2/IopCommon.h b/pcsx2/IopCommon.h index 27b8341f4d..18b2161e32 100644 --- a/pcsx2/IopCommon.h +++ b/pcsx2/IopCommon.h @@ -29,10 +29,10 @@ #include "Sio.h" #include "Sif.h" +#include "IopDma.h" #include "IopMem.h" #include "IopHw.h" #include "IopBios.h" -#include "IopDma.h" #include "IopCounters.h" #include "IopSio2.h" diff --git a/pcsx2/IopCounters.cpp b/pcsx2/IopCounters.cpp index 1152854a19..664dceb228 100644 --- a/pcsx2/IopCounters.cpp +++ b/pcsx2/IopCounters.cpp @@ -38,8 +38,7 @@ #define PSXPIXEL ((int)(PSXCLK / 13500000)) #define PSXSOUNDCLK ((int)(48000)) - -psxCounter psxCounters[8]; +psxCounter psxCounters[NUM_COUNTERS]; s32 psxNextCounter; u32 psxNextsCounter; u8 psxhblankgate = 0; @@ -141,6 +140,12 @@ void psxRcntInit() { psxCounters[7].mode = 0x8; } +#ifdef ENABLE_NEW_IOPDMA + psxCounters[8].rate = 2000; + psxCounters[8].CycleT = psxCounters[7].rate; + psxCounters[8].mode = 0x8; +#endif + for (i=0; i<8; i++) psxCounters[i].sCycleT = psxRegs.cycle; @@ -453,6 +458,24 @@ void psxRcntUpdate() if (c < psxNextCounter) psxNextCounter = c; } +#ifdef ENABLE_NEW_IOPDMA + + // New Iop DMA handler WIP + { + const s32 difference = psxRegs.cycle - psxCounters[8].sCycleT; + s32 c = psxCounters[8].CycleT; + + if(difference >= psxCounters[8].CycleT) + { + psxCounters[8].sCycleT = psxRegs.cycle; + psxCounters[8].CycleT = psxCounters[8].rate; + IopDmaUpdate(difference); + } + else c -= difference; + if (c < psxNextCounter) psxNextCounter = c; + } +#endif + for (i=0; i<6; i++) _rcntSet( i ); } diff --git a/pcsx2/IopCounters.h b/pcsx2/IopCounters.h index 69c56acd74..8a743408f0 100644 --- a/pcsx2/IopCounters.h +++ b/pcsx2/IopCounters.h @@ -27,7 +27,13 @@ struct psxCounter { s32 CycleT; }; -extern psxCounter psxCounters[8]; +#ifdef ENABLE_NEW_IOPDMA +# define NUM_COUNTERS 9 +#else +# define NUM_COUNTERS 8 +#endif + +extern psxCounter psxCounters[NUM_COUNTERS]; extern s32 psxNextCounter; extern u32 psxNextsCounter; diff --git a/pcsx2/IopDma.cpp b/pcsx2/IopDma.cpp index 697b0acb9e..1af6cef622 100644 --- a/pcsx2/IopDma.cpp +++ b/pcsx2/IopDma.cpp @@ -258,35 +258,96 @@ void iopIntcIrq(uint irqType) // // fixme: Is this in progress? -#if FALSE +#ifdef ENABLE_NEW_IOPDMA -typedef s32(* DmaHandler)(s32 channel, u32* data, u32 wordsLeft, u32* wordsProcessed); -typedef void (* DmaIHandler)(s32 channel); - -s32 errDmaWrite(s32 channel, u32* data, u32 wordsLeft, u32* wordsProcessed); -s32 errDmaRead(s32 channel, u32* data, u32 wordsLeft, u32* wordsProcessed); - -struct DmaHandlerInfo +s32 spu2DmaRead(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed) { - DmaHandler Read; - DmaHandler Write; - DmaIHandler Interrupt; -}; + // FIXME: change the plugin interfaces so that they are aware of this new dma handler -struct DmaStatusInfo + /* + u32 bytes = 1024; + if(bytesLeft<1024) + bytes=bytesLeft; + */ + u32 bytes=bytesLeft; + + // Update the spu2 to the current cycle before initiating the DMA + if (SPU2async) + { + SPU2async(psxRegs.cycle - psxCounters[6].sCycleT); + //Console::Status("cycles sent to SPU2 %x\n", psxRegs.cycle - psxCounters[6].sCycleT); + + psxCounters[6].sCycleT = psxRegs.cycle; + psxCounters[6].CycleT = bytes * 3; + + psxNextCounter -= (psxRegs.cycle - psxNextsCounter); + psxNextsCounter = psxRegs.cycle; + if (psxCounters[6].CycleT < psxNextCounter) + psxNextCounter = psxCounters[6].CycleT; + } + + if(channel==7) + SPU2readDMA7Mem((u16 *)data, bytes/2); + else + SPU2readDMA4Mem((u16 *)data, bytes/2); + + *bytesProcessed = bytes; + + return 0; +} + +s32 spu2DmaWrite(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed) { - u32 Control; - u32 Width; // bytes/word, for timing purposes - u32 MemAddr; - u32 ByteCount; - u32 Target; -}; + // FIXME: change the plugin interfaces so that they are aware of this new dma handler -// FIXME: Dummy constants, to be "filled in" with proper values later -#define DMA_CTRL_ACTIVE 0x80000000 -#define DMA_CTRL_DIRECTION 0x00000001 -#define DMA_CHANNEL_MAX 16 /* ? */ + /* + u32 bytes = 1024; + if(bytesLeft<1024) + bytes=bytesLeft; + */ + u32 bytes=bytesLeft; + + + // Update the spu2 to the current cycle before initiating the DMA + if (SPU2async) + { + SPU2async(psxRegs.cycle - psxCounters[6].sCycleT); + //Console::Status("cycles sent to SPU2 %x\n", psxRegs.cycle - psxCounters[6].sCycleT); + + psxCounters[6].sCycleT = psxRegs.cycle; + psxCounters[6].CycleT = bytes * 3; + + psxNextCounter -= (psxRegs.cycle - psxNextsCounter); + psxNextsCounter = psxRegs.cycle; + if (psxCounters[6].CycleT < psxNextCounter) + psxNextCounter = psxCounters[6].CycleT; + } + + if(channel==7) + SPU2writeDMA7Mem((u16 *)data, bytes/2); + else + SPU2writeDMA4Mem((u16 *)data, bytes/2); + + + *bytesProcessed = bytes; + + return 0; +} + +void spu2DmaInterrupt(s32 channel) +{ + if(channel==7) + SPU2interruptDMA7(); + else + SPU2interruptDMA4(); +} + +//typedef s32(* DmaHandler)(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed); +//typedef void (* DmaIHandler)(s32 channel); + +s32 errDmaWrite(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed); +s32 errDmaRead(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed); DmaStatusInfo IopChannels[DMA_CHANNEL_MAX]; // I dont' knwo how many there are, 10? @@ -300,9 +361,9 @@ DmaHandlerInfo IopDmaHandlers[DMA_CHANNEL_MAX] = {0}, //5 {0}, //6: OT? {spu2DmaRead, spu2DmaWrite, spu2DmaInterrupt}, //7: Spu Core1 - {dev9DmaRead, dev9DmaWrite, dev9DmaInterrupt}, //8: Dev9 - {sif0DmaRead, sif0DmaWrite, sif0DmaInterrupt}, //9: SIF0 - {sif1DmaRead, sif1DmaWrite, sif1DmaInterrupt}, //10: SIF1 + {0},//{dev9DmaRead, dev9DmaWrite, dev9DmaInterrupt}, //8: Dev9 + {0},//{sif0DmaRead, sif0DmaWrite, sif0DmaInterrupt}, //9: SIF0 + {0},//{sif1DmaRead, sif1DmaWrite, sif1DmaInterrupt}, //10: SIF1 {0}, // Sio2 {0}, // Sio2 }; @@ -324,26 +385,37 @@ const char* IopDmaNames[DMA_CHANNEL_MAX] = "Sio2", "?", "?", "?" }; -}; // Prototypes. To be implemented later (or in other parts of the emulator) -void SetDmaUpdateTarget(u32 delay); -void RaiseDmaIrq(u32 channel); +void SetDmaUpdateTarget(u32 delay) +{ + psxCounters[8].CycleT = delay; +} + +void RaiseDmaIrq(u32 channel) +{ + if(channel<7) + psxDmaInterrupt(channel); + else + psxDmaInterrupt2(channel-7); +} // WARNING: CALLER ****[MUST]**** CALL IopDmaUpdate RIGHT AFTER THIS! void IopDmaStart(int channel, u32 chcr, u32 madr, u32 bcr) { // I dont' really understand this, but it's used above. Is this BYTES OR WHAT? - int size = (bcr >> 16) * (bcr & 0xFFFF); + int size = 4* (bcr >> 16) * (bcr & 0xFFFF); IopChannels[channel].Control = chcr | DMA_CTRL_ACTIVE; IopChannels[channel].MemAddr = madr; IopChannels[channel].ByteCount = size; + + SetDmaUpdateTarget(0); } void IopDmaUpdate(u32 elapsed) { - u32 MinDelay = 0xFFFFFFFF; + s32 MinDelay = 0x7FFFFFFF; for (int i = 0;i < DMA_CHANNEL_MAX;i++) { @@ -363,12 +435,17 @@ void IopDmaUpdate(u32 elapsed) else { // TODO: Make sure it's the right order - DmaHandler handler = (ch->Control & DMA_CTRL_DIRECTION) ? IopDmaHandlers[i].Read : IopDmaHandlers[i].Write; + DmaHandler handler = (ch->Control & DMA_CTRL_DIRECTION) ? IopDmaHandlers[i].Write : IopDmaHandlers[i].Read; u32 BCount = 0; - s32 Target = (handler) ? handler(i, (u32*)PSXM(ch->MemAddr), ch->ByteCount, &BCount) : 0; + s32 Target = (handler) ? handler(i, (u32*)iopPhysMem(ch->MemAddr), ch->ByteCount, &BCount) : 0; - ch->Target = 100; + if(BCount>0) + { + psxCpu->Clear(ch->MemAddr, BCount/4); + } + + int TTarget = 100; if (Target < 0) { // TODO: ... What to do if the plugin errors? :P @@ -378,29 +455,38 @@ void IopDmaUpdate(u32 elapsed) ch->MemAddr += BCount; ch->ByteCount -= BCount; - ch->Target = BCount / ch->Width; + TTarget = BCount; // / ch->Width; } - if (Target != 0) ch->Target = Target; + if (Target != 0) TTarget = Target; + + if (ch->TargetTarget += TTarget; } } } } + + if(MinDelay<0x7FFFFFFF) + SetDmaUpdateTarget(MinDelay); + else + SetDmaUpdateTarget(10000); } -s32 errDmaRead(s32 channel, u32* data, u32 wordsLeft, u32* wordsProcessed) +s32 errDmaRead(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed) { - Console::Error("ERROR: Tried to read using DMA %d (%s). Ignoring.", 0, channel, IopDmaNames[channel]); + Console::Error("ERROR: Tried to read using DMA %d (%s). Ignoring.", params 0, channel, IopDmaNames[channel]); - *wordsProcessed = wordsLeft; + *bytesProcessed = bytesLeft; return 0; } -s32 errDmaWrite(s32 channel, u32* data, u32 wordsLeft, u32* wordsProcessed) +s32 errDmaWrite(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed) { - Console::Error("ERROR: Tried to write using DMA %d (%s). Ignoring.", 0, channel, IopDmaNames[channel]); + Console::Error("ERROR: Tried to write using DMA %d (%s). Ignoring.", params 0, channel, IopDmaNames[channel]); - *wordsProcessed = wordsLeft; + *bytesProcessed = bytesLeft; return 0; } diff --git a/pcsx2/IopDma.h b/pcsx2/IopDma.h index 416d941e29..c4136fe334 100644 --- a/pcsx2/IopDma.h +++ b/pcsx2/IopDma.h @@ -21,6 +21,46 @@ #include "PS2Edefs.h" +//#define ENABLE_NEW_IOPDMA + +#ifdef ENABLE_NEW_IOPDMA + +typedef s32(* DmaHandler)(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed); +typedef void (* DmaIHandler)(s32 channel); + +struct DmaHandlerInfo +{ + DmaHandler Read; + DmaHandler Write; + DmaIHandler Interrupt; +}; + +struct DmaStatusInfo +{ + u32 Control; + u32 Width; // bytes/word, for timing purposes + u32 MemAddr; + u32 ByteCount; + s32 Target; +}; + +// FIXME: Dummy constants, to be "filled in" with proper values later +#define DMA_CTRL_ACTIVE 0x01000000 +#define DMA_CTRL_DIRECTION 0x00000001 + +#define DMA_CHANNEL_MAX 16 /* ? */ + +// WARNING: CALLER ****[MUST]**** CALL IopDmaUpdate RIGHT AFTER THIS! +void IopDmaStart(int channel, u32 chcr, u32 madr, u32 bcr); +void IopDmaUpdate(u32 elapsed); + +// external dma handlers +extern s32 cdvdDmaRead(s32 channel, u32* data, u32 bytesLeft, u32* bytesProcessed); +extern void cdvdDmaInterrupt(s32 channel); + +//#else +#endif + void psxDma2(u32 madr, u32 bcr, u32 chcr); void psxDma3(u32 madr, u32 bcr, u32 chcr); void psxDma4(u32 madr, u32 bcr, u32 chcr); diff --git a/pcsx2/IopHw.cpp b/pcsx2/IopHw.cpp index 8a486855d2..b973f43ac0 100644 --- a/pcsx2/IopHw.cpp +++ b/pcsx2/IopHw.cpp @@ -1020,7 +1020,7 @@ void psxHwWrite32(u32 add, u32 value) { case 0x1f8010c8: PSXHW_LOG("DMA4 CHCR 32bit write %lx", value); HW_DMA4_CHCR = value; // DMA4 chcr (SPU DMA) - DmaExec(4); + DmaExecNew(4); return; //------------------------------------------------------------------ @@ -1053,7 +1053,7 @@ void psxHwWrite32(u32 add, u32 value) { case 0x1f801508: PSXHW_LOG("DMA7 CHCR 32bit write %lx", value); HW_DMA7_CHCR = value; // DMA7 chcr (SPU2) - DmaExec2(7); + DmaExecNew2(7); return; //------------------------------------------------------------------ diff --git a/pcsx2/IopHw.h b/pcsx2/IopHw.h index 6fdabcc287..5579161b9c 100644 --- a/pcsx2/IopHw.h +++ b/pcsx2/IopHw.h @@ -69,6 +69,25 @@ enum IOPCountRegs } \ } +#ifdef ENABLE_NEW_IOPDMA +#define DmaExecNew(n) { \ + if (HW_DMA##n##_CHCR & 0x01000000 && \ + HW_DMA_PCR & (8 << (n * 4))) { \ + IopDmaStart(n, HW_DMA##n##_CHCR, HW_DMA##n##_MADR, HW_DMA##n##_BCR); \ + } \ +} + +#define DmaExecNew2(n) { \ + if (HW_DMA##n##_CHCR & 0x01000000 && \ + HW_DMA_PCR2 & (8 << ((n-7) * 4))) { \ + IopDmaStart(n, HW_DMA##n##_CHCR, HW_DMA##n##_MADR, HW_DMA##n##_BCR); \ + } \ +} +#else +#define DmaExecNew(n) DmaExec(n) +#define DmaExecNew2(n) DmaExec2(n) +#endif + #define HW_DMA0_MADR (psxHu32(0x1080)) // MDEC in DMA #define HW_DMA0_BCR (psxHu32(0x1084)) #define HW_DMA0_CHCR (psxHu32(0x1088)) From 286c405ce9810e6688de0f95dd33433d3900ca95 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 24 Apr 2009 02:28:14 +0000 Subject: [PATCH 137/143] Moved a few things around. Redid some of the work I did earlier on Hw with the non-obsolete version of the code. Changed some code not to use defines... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1053 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 39 +++++- pcsx2/Hw.cpp | 12 +- pcsx2/HwRead.cpp | 63 +++++----- pcsx2/HwWrite.cpp | 148 +++++++++++----------- pcsx2/Interpreter.cpp | 236 +++++++++++++++++++++++++++-------- pcsx2/x86/iMMI.cpp | 4 +- pcsx2/x86/iR5900.h | 21 +--- 7 files changed, 337 insertions(+), 186 deletions(-) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index 89f46f7c71..8c248bd521 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -56,13 +56,20 @@ extern SessionOverrideFlags g_Session; #define PCSX2_FRAMELIMIT_SKIP 0x800 #define PCSX2_FRAMELIMIT_VUSKIP 0xc00 +#define CHECK_FRAMELIMIT (Config.Options&PCSX2_FRAMELIMIT_MASK) + +//------------ CPU Options!!! --------------- #define CHECK_MULTIGS (Config.Options&PCSX2_GSMULTITHREAD) #define CHECK_EEREC (!g_Session.ForceDisableEErec && Config.Options&PCSX2_EEREC) +#define CHECK_VU0REC (!g_Session.ForceDisableVU0rec && Config.Options&PCSX2_VU0REC) +#define CHECK_VU1REC (!g_Session.ForceDisableVU1rec && (Config.Options&PCSX2_VU1REC)) + //------------ SPECIAL GAME FIXES!!! --------------- #define CHECK_VUADDSUBHACK (Config.GameFixes & 0x1) // Special Fix for Tri-ace games, they use an encryption algorithm that requires VU addi opcode to be bit-accurate. #define CHECK_FPUCOMPAREHACK (Config.GameFixes & 0x4) // Special Fix for Digimon Rumble Arena 2, fixes spinning/hanging on intro-menu. #define CHECK_VUCLIPFLAGHACK (Config.GameFixes & 0x2) // Special Fix for Persona games, maybe others. It's to do with the VU clip flag (again). #define CHECK_FPUMULHACK (Config.GameFixes & 0x8) // Special Fix for Tales of Destiny hangs. + //------------ Advanced Options!!! --------------- #define CHECK_VU_OVERFLOW (Config.vuOptions & 0x1) #define CHECK_VU_EXTRA_OVERFLOW (Config.vuOptions & 0x2) // If enabled, Operands are clamped before being used in the VU recs @@ -75,14 +82,40 @@ extern SessionOverrideFlags g_Session; #define CHECK_FPU_FULL (Config.eeOptions & 0x4) #define DEFAULT_eeOptions 0x01 #define DEFAULT_vuOptions 0x01 + //------------ DEFAULT sseMXCSR VALUES!!! --------------- #define DEFAULT_sseMXCSR 0xffc0 //FPU rounding > DaZ, FtZ, "chop" #define DEFAULT_sseVUMXCSR 0xffc0 //VU rounding > DaZ, FtZ, "chop" -#define CHECK_FRAMELIMIT (Config.Options&PCSX2_FRAMELIMIT_MASK) +//------------ Recompiler defines - Comment to disable a recompiler --------------- +// Yay! These work now! (air) ... almost (air) -#define CHECK_VU0REC (!g_Session.ForceDisableVU0rec && Config.Options&PCSX2_VU0REC) -#define CHECK_VU1REC (!g_Session.ForceDisableVU1rec && (Config.Options&PCSX2_VU1REC)) +#define SHIFT_RECOMPILE // Speed majorly reduced if disabled +#define BRANCH_RECOMPILE // Speed extremely reduced if disabled - more then shift + +// Disabling all the recompilers in this block is interesting, as it still runs at a reasonable rate. +// It also adds a few glitches. Really reminds me of the old Linux 64-bit version. --arcum42 +#define ARITHMETICIMM_RECOMPILE +#define ARITHMETIC_RECOMPILE +#define MULTDIV_RECOMPILE +#define JUMP_RECOMPILE +#define LOADSTORE_RECOMPILE +#define MOVE_RECOMPILE +#define MMI_RECOMPILE +#define MMI0_RECOMPILE +#define MMI1_RECOMPILE +#define MMI2_RECOMPILE +#define MMI3_RECOMPILE +#define FPU_RECOMPILE +#define CP0_RECOMPILE +#define CP2_RECOMPILE + +// You can't recompile ARITHMETICIMM without ARITHMETIC. +#ifndef ARITHMETIC_RECOMPILE +#undef ARITHMETICIMM_RECOMPILE +#endif + +#define EE_CONST_PROP // rec2 - enables constant propagation (faster) // Memory Card configuration, per slot. struct McdConfig diff --git a/pcsx2/Hw.cpp b/pcsx2/Hw.cpp index 591de7138e..3ff49efc15 100644 --- a/pcsx2/Hw.cpp +++ b/pcsx2/Hw.cpp @@ -149,8 +149,6 @@ int hwMFIFOWrite(u32 addr, u8 *data, u32 size) { bool hwDmacSrcChainWithStack(DMACh *dma, int id) { - u32 temp; - switch (id) { case 0: // Refe - Transfer Packet According to ADDR field return true; //End Transfer @@ -161,18 +159,20 @@ bool hwDmacSrcChainWithStack(DMACh *dma, int id) { return false; case 2: // Next - Transfer QWC following tag. TADR = ADDR - temp = dma->madr; //Temporarily Store ADDR + { + u32 temp = dma->madr; //Temporarily Store ADDR dma->madr = dma->tadr + 16; //Set MADR to QW following the tag dma->tadr = temp; //Copy temporarily stored ADDR to Tag return false; - + } case 3: // Ref - Transfer QWC from ADDR field case 4: // Refs - Transfer QWC from ADDR field (Stall Control) dma->tadr += 16; //Set TADR to next tag return false; case 5: // Call - Transfer QWC following the tag, save succeeding tag - temp = dma->madr; //Temporarily Store ADDR + { + u32 temp = dma->madr; //Temporarily Store ADDR dma->madr = dma->tadr + 16; //Set MADR to data following the tag @@ -190,7 +190,7 @@ bool hwDmacSrcChainWithStack(DMACh *dma, int id) { dma->tadr = temp; //Set TADR to temporarily stored ADDR return false; - + } case 6: // Ret - Transfer QWC following the tag, load next tag dma->madr = dma->tadr + 16; //Set MADR to data following the tag diff --git a/pcsx2/HwRead.cpp b/pcsx2/HwRead.cpp index bedf07fc46..2104c8acb8 100644 --- a/pcsx2/HwRead.cpp +++ b/pcsx2/HwRead.cpp @@ -61,34 +61,35 @@ __forceinline u8 hwRead8(u32 mem) switch (mem) { - case 0x10000000: ret = (u8)rcntRcount(0); break; - case 0x10000010: ret = (u8)counters[0].modeval; break; - case 0x10000020: ret = (u8)counters[0].target; break; - case 0x10000030: ret = (u8)counters[0].hold; break; + // Note: the values without defines = the defines + 1. + case RCNT0_COUNT: ret = (u8)rcntRcount(0); break; + case RCNT0_MODE: ret = (u8)counters[0].modeval; break; + case RCNT0_TARGET: ret = (u8)counters[0].target; break; + case RCNT0_HOLD: ret = (u8)counters[0].hold; break; case 0x10000001: ret = (u8)(rcntRcount(0)>>8); break; case 0x10000011: ret = (u8)(counters[0].modeval>>8); break; case 0x10000021: ret = (u8)(counters[0].target>>8); break; case 0x10000031: ret = (u8)(counters[0].hold>>8); break; - case 0x10000800: ret = (u8)rcntRcount(1); break; - case 0x10000810: ret = (u8)counters[1].modeval; break; - case 0x10000820: ret = (u8)counters[1].target; break; - case 0x10000830: ret = (u8)counters[1].hold; break; + case RCNT1_COUNT: ret = (u8)rcntRcount(1); break; + case RCNT1_MODE: ret = (u8)counters[1].modeval; break; + case RCNT1_TARGET: ret = (u8)counters[1].target; break; + case RCNT1_HOLD: ret = (u8)counters[1].hold; break; case 0x10000801: ret = (u8)(rcntRcount(1)>>8); break; case 0x10000811: ret = (u8)(counters[1].modeval>>8); break; case 0x10000821: ret = (u8)(counters[1].target>>8); break; case 0x10000831: ret = (u8)(counters[1].hold>>8); break; - case 0x10001000: ret = (u8)rcntRcount(2); break; - case 0x10001010: ret = (u8)counters[2].modeval; break; - case 0x10001020: ret = (u8)counters[2].target; break; + case RCNT2_COUNT: ret = (u8)rcntRcount(2); break; + case RCNT2_MODE: ret = (u8)counters[2].modeval; break; + case RCNT2_TARGET: ret = (u8)counters[2].target; break; case 0x10001001: ret = (u8)(rcntRcount(2)>>8); break; case 0x10001011: ret = (u8)(counters[2].modeval>>8); break; case 0x10001021: ret = (u8)(counters[2].target>>8); break; - case 0x10001800: ret = (u8)rcntRcount(3); break; - case 0x10001810: ret = (u8)counters[3].modeval; break; - case 0x10001820: ret = (u8)counters[3].target; break; + case RCNT3_COUNT: ret = (u8)rcntRcount(3); break; + case RCNT3_MODE: ret = (u8)counters[3].modeval; break; + case RCNT3_TARGET: ret = (u8)counters[3].target; break; case 0x10001801: ret = (u8)(rcntRcount(3)>>8); break; case 0x10001811: ret = (u8)(counters[3].modeval>>8); break; case 0x10001821: ret = (u8)(counters[3].target>>8); break; @@ -97,7 +98,7 @@ __forceinline u8 hwRead8(u32 mem) if ((mem & 0xffffff0f) == 0x1000f200) { if(mem == 0x1000f260) ret = 0; - else if(mem == 0x1000F240) { + else if(mem == SBUS_F240) { ret = psHu32(mem); //psHu32(mem) &= ~0x4000; } @@ -120,34 +121,34 @@ __forceinline u16 hwRead16(u32 mem) { u16 ret; - if( mem >= 0x10002000 && mem < 0x10008000 ) + if( mem >= IPU_CMD && mem < D0_CHCR ) Console::Notice("Unexpected hwRead16 from 0x%x", params mem); switch (mem) { - case 0x10000000: ret = (u16)rcntRcount(0); break; - case 0x10000010: ret = (u16)counters[0].modeval; break; - case 0x10000020: ret = (u16)counters[0].target; break; - case 0x10000030: ret = (u16)counters[0].hold; break; + case RCNT0_COUNT: ret = (u16)rcntRcount(0); break; + case RCNT0_MODE: ret = (u16)counters[0].modeval; break; + case RCNT0_TARGET: ret = (u16)counters[0].target; break; + case RCNT0_HOLD: ret = (u16)counters[0].hold; break; - case 0x10000800: ret = (u16)rcntRcount(1); break; - case 0x10000810: ret = (u16)counters[1].modeval; break; - case 0x10000820: ret = (u16)counters[1].target; break; - case 0x10000830: ret = (u16)counters[1].hold; break; + case RCNT1_COUNT: ret = (u16)rcntRcount(1); break; + case RCNT1_MODE: ret = (u16)counters[1].modeval; break; + case RCNT1_TARGET: ret = (u16)counters[1].target; break; + case RCNT1_HOLD: ret = (u16)counters[1].hold; break; - case 0x10001000: ret = (u16)rcntRcount(2); break; - case 0x10001010: ret = (u16)counters[2].modeval; break; - case 0x10001020: ret = (u16)counters[2].target; break; + case RCNT2_COUNT: ret = (u16)rcntRcount(2); break; + case RCNT2_MODE: ret = (u16)counters[2].modeval; break; + case RCNT2_TARGET: ret = (u16)counters[2].target; break; - case 0x10001800: ret = (u16)rcntRcount(3); break; - case 0x10001810: ret = (u16)counters[3].modeval; break; - case 0x10001820: ret = (u16)counters[3].target; break; + case RCNT3_COUNT: ret = (u16)rcntRcount(3); break; + case RCNT3_MODE: ret = (u16)counters[3].modeval; break; + case RCNT3_TARGET: ret = (u16)counters[3].target; break; default: if ((mem & 0xffffff0f) == 0x1000f200) { if(mem == 0x1000f260) ret = 0; - else if(mem == 0x1000F240) { + else if(mem == SBUS_F240) { ret = psHu16(mem) | 0x0102; psHu32(mem) &= ~0x4000; } diff --git a/pcsx2/HwWrite.cpp b/pcsx2/HwWrite.cpp index 3739c23619..fa5fbdd4ff 100644 --- a/pcsx2/HwWrite.cpp +++ b/pcsx2/HwWrite.cpp @@ -145,27 +145,27 @@ void hwWrite8(u32 mem, u8 value) { DevCon::Notice( "hwWrite8 to 0x%x = 0x%x", params mem, value ); switch (mem) { - case 0x10000000: rcntWcount(0, value); break; - case 0x10000010: rcntWmode(0, (counters[0].modeval & 0xff00) | value); break; + case RCNT0_COUNT: rcntWcount(0, value); break; + case RCNT0_MODE: rcntWmode(0, (counters[0].modeval & 0xff00) | value); break; case 0x10000011: rcntWmode(0, (counters[0].modeval & 0xff) | value << 8); break; - case 0x10000020: rcntWtarget(0, value); break; - case 0x10000030: rcntWhold(0, value); break; + case RCNT0_TARGET: rcntWtarget(0, value); break; + case RCNT0_HOLD: rcntWhold(0, value); break; - case 0x10000800: rcntWcount(1, value); break; - case 0x10000810: rcntWmode(1, (counters[1].modeval & 0xff00) | value); break; + case RCNT1_COUNT: rcntWcount(1, value); break; + case RCNT1_MODE: rcntWmode(1, (counters[1].modeval & 0xff00) | value); break; case 0x10000811: rcntWmode(1, (counters[1].modeval & 0xff) | value << 8); break; - case 0x10000820: rcntWtarget(1, value); break; - case 0x10000830: rcntWhold(1, value); break; + case RCNT1_TARGET: rcntWtarget(1, value); break; + case RCNT1_HOLD: rcntWhold(1, value); break; - case 0x10001000: rcntWcount(2, value); break; - case 0x10001010: rcntWmode(2, (counters[2].modeval & 0xff00) | value); break; + case RCNT2_COUNT: rcntWcount(2, value); break; + case RCNT2_MODE: rcntWmode(2, (counters[2].modeval & 0xff00) | value); break; case 0x10001011: rcntWmode(2, (counters[2].modeval & 0xff) | value << 8); break; - case 0x10001020: rcntWtarget(2, value); break; + case RCNT2_TARGET: rcntWtarget(2, value); break; - case 0x10001800: rcntWcount(3, value); break; - case 0x10001810: rcntWmode(3, (counters[3].modeval & 0xff00) | value); break; + case RCNT3_COUNT: rcntWcount(3, value); break; + case RCNT3_MODE: rcntWmode(3, (counters[3].modeval & 0xff00) | value); break; case 0x10001811: rcntWmode(3, (counters[3].modeval & 0xff) | value << 8); break; - case 0x10001820: rcntWtarget(3, value); break; + case RCNT3_TARGET: rcntWtarget(3, value); break; case 0x1000f180: if (value == '\n') { @@ -321,25 +321,25 @@ __forceinline void hwWrite16(u32 mem, u16 value) switch(mem) { - case 0x10000000: rcntWcount(0, value); break; - case 0x10000010: rcntWmode(0, value); break; - case 0x10000020: rcntWtarget(0, value); break; - case 0x10000030: rcntWhold(0, value); break; + case RCNT0_COUNT: rcntWcount(0, value); break; + case RCNT0_MODE: rcntWmode(0, value); break; + case RCNT0_TARGET: rcntWtarget(0, value); break; + case RCNT0_HOLD: rcntWhold(0, value); break; - case 0x10000800: rcntWcount(1, value); break; - case 0x10000810: rcntWmode(1, value); break; - case 0x10000820: rcntWtarget(1, value); break; - case 0x10000830: rcntWhold(1, value); break; + case RCNT1_COUNT: rcntWcount(1, value); break; + case RCNT1_MODE: rcntWmode(1, value); break; + case RCNT1_TARGET: rcntWtarget(1, value); break; + case RCNT1_HOLD: rcntWhold(1, value); break; - case 0x10001000: rcntWcount(2, value); break; - case 0x10001010: rcntWmode(2, value); break; - case 0x10001020: rcntWtarget(2, value); break; + case RCNT2_COUNT: rcntWcount(2, value); break; + case RCNT2_MODE: rcntWmode(2, value); break; + case RCNT2_TARGET: rcntWtarget(2, value); break; - case 0x10001800: rcntWcount(3, value); break; - case 0x10001810: rcntWmode(3, value); break; - case 0x10001820: rcntWtarget(3, value); break; + case RCNT3_COUNT: rcntWcount(3, value); break; + case RCNT3_MODE: rcntWmode(3, value); break; + case RCNT3_TARGET: rcntWtarget(3, value); break; - case 0x10008000: // dma0 - vif0 + case D0_CHCR: // dma0 - vif0 DMA_LOG("VIF0dma %lx", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -349,7 +349,7 @@ __forceinline void hwWrite16(u32 mem, u16 value) DmaExec16(dmaVIF0, mem, value); break; - case 0x10009000: // dma1 - vif1 - chcr + case D1_CHCR: // dma1 - vif1 - chcr DMA_LOG("VIF1dma CHCR %lx", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -361,34 +361,34 @@ __forceinline void hwWrite16(u32 mem, u16 value) break; #ifdef PCSX2_DEVBUILD - case 0x10009010: // dma1 - vif1 - madr + case D1_MADR: // dma1 - vif1 - madr HW_LOG("VIF1dma Madr %lx", value); psHu16(mem) = value;//dma1 madr break; - case 0x10009020: // dma1 - vif1 - qwc + case D1_QWC: // dma1 - vif1 - qwc HW_LOG("VIF1dma QWC %lx", value); psHu16(mem) = value;//dma1 qwc break; - case 0x10009030: // dma1 - vif1 - tadr + case D1_TADR: // dma1 - vif1 - tadr HW_LOG("VIF1dma TADR %lx", value); psHu16(mem) = value;//dma1 tadr break; - case 0x10009040: // dma1 - vif1 - asr0 + case D1_ASR0: // dma1 - vif1 - asr0 HW_LOG("VIF1dma ASR0 %lx", value); psHu16(mem) = value;//dma1 asr0 break; - case 0x10009050: // dma1 - vif1 - asr1 + case D1_ASR1: // dma1 - vif1 - asr1 HW_LOG("VIF1dma ASR1 %lx", value); psHu16(mem) = value;//dma1 asr1 break; - case 0x10009080: // dma1 - vif1 - sadr + case D1_SADR: // dma1 - vif1 - sadr HW_LOG("VIF1dma SADR %lx", value); psHu16(mem) = value;//dma1 sadr break; #endif // --------------------------------------------------- - case 0x1000a000: // dma2 - gif + case D2_CHCR: // dma2 - gif DMA_LOG("0x%8.8x hwWrite32: GSdma %lx", cpuRegs.cycle, value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -399,33 +399,33 @@ __forceinline void hwWrite16(u32 mem, u16 value) break; #ifdef PCSX2_DEVBUILD - case 0x1000a010: + case D2_MADR: psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write DMA2_MADR 32bit at %x with value %x",mem,value); break; - case 0x1000a020: + case D2_QWC: psHu16(mem) = value;//dma2 qwc HW_LOG("Hardware write DMA2_QWC 32bit at %x with value %x",mem,value); break; - case 0x1000a030: + case D2_TADR: psHu16(mem) = value;//dma2 taddr HW_LOG("Hardware write DMA2_TADDR 32bit at %x with value %x",mem,value); break; - case 0x1000a040: + case D2_ASR0: psHu16(mem) = value;//dma2 asr0 HW_LOG("Hardware write DMA2_ASR0 32bit at %x with value %x",mem,value); break; - case 0x1000a050: + case D2_ASR1: psHu16(mem) = value;//dma2 asr1 HW_LOG("Hardware write DMA2_ASR1 32bit at %x with value %x",mem,value); break; - case 0x1000a080: + case D2_SADR: psHu16(mem) = value;//dma2 saddr HW_LOG("Hardware write DMA2_SADDR 32bit at %x with value %x",mem,value); break; #endif - case 0x1000b000: // dma3 - fromIPU + case D3_CHCR: // dma3 - fromIPU DMA_LOG("IPU0dma %lx", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -436,25 +436,25 @@ __forceinline void hwWrite16(u32 mem, u16 value) break; #ifdef PCSX2_DEVBUILD - case 0x1000b010: + case D3_MADR: psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write IPU0DMA_MADR 32bit at %x with value %x",mem,value); break; - case 0x1000b020: + case D3_QWC: psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write IPU0DMA_QWC 32bit at %x with value %x",mem,value); break; - case 0x1000b030: + case D3_TADR: psHu16(mem) = value;//dma2 tadr HW_LOG("Hardware write IPU0DMA_TADR 32bit at %x with value %x",mem,value); break; - case 0x1000b080: + case D3_SADR: psHu16(mem) = value;//dma2 saddr HW_LOG("Hardware write IPU0DMA_SADDR 32bit at %x with value %x",mem,value); break; #endif - case 0x1000b400: // dma4 - toIPU + case D4_CHCR: // dma4 - toIPU DMA_LOG("IPU1dma %lx", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -465,24 +465,24 @@ __forceinline void hwWrite16(u32 mem, u16 value) break; #ifdef PCSX2_DEVBUILD - case 0x1000b410: + case D4_MADR: psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write IPU1DMA_MADR 32bit at %x with value %x",mem,value); break; - case 0x1000b420: + case D4_QWC: psHu16(mem) = value;//dma2 madr HW_LOG("Hardware write IPU1DMA_QWC 32bit at %x with value %x",mem,value); break; - case 0x1000b430: + case D4_TADR: psHu16(mem) = value;//dma2 tadr HW_LOG("Hardware write IPU1DMA_TADR 32bit at %x with value %x",mem,value); break; - case 0x1000b480: + case D4_SADR: psHu16(mem) = value;//dma2 saddr HW_LOG("Hardware write IPU1DMA_SADDR 32bit at %x with value %x",mem,value); break; #endif - case 0x1000c000: // dma5 - sif0 + case D5_CHCR: // dma5 - sif0 DMA_LOG("SIF0dma %lx", value); // if (value == 0) psxSu32(0x30) = 0x40000; if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) @@ -496,7 +496,7 @@ __forceinline void hwWrite16(u32 mem, u16 value) case 0x1000c002: //? break; - case 0x1000c400: // dma6 - sif1 + case D6_CHCR: // dma6 - sif1 DMA_LOG("SIF1dma %lx", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -507,7 +507,8 @@ __forceinline void hwWrite16(u32 mem, u16 value) break; #ifdef PCSX2_DEVBUILD - case 0x1000c420: // dma6 - sif1 - qwc + // No D6_MADR, and a TADR address that's not in the defines? + case D6_QWC: // dma6 - sif1 - qwc HW_LOG("SIF1dma QWC = %lx", value); psHu16(mem) = value; break; @@ -518,7 +519,7 @@ __forceinline void hwWrite16(u32 mem, u16 value) break; #endif - case 0x1000c800: // dma7 - sif2 + case D7_CHCR: // dma7 - sif2 DMA_LOG("SIF2dma %lx", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -530,7 +531,7 @@ __forceinline void hwWrite16(u32 mem, u16 value) case 0x1000c802: //? break; - case 0x1000d000: // dma8 - fromSPR + case D8_CHCR: // dma8 - fromSPR DMA_LOG("fromSPRdma %lx", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -565,13 +566,13 @@ __forceinline void hwWrite16(u32 mem, u16 value) psHu16(mem) = value; break; - case 0x1000f220: + case SBUS_F220: psHu16(mem) |= value; break; - case 0x1000f230: + case SBUS_SMFLG: psHu16(mem) &= ~value; break; - case 0x1000f240: + case SBUS_F240: if(!(value & 0x100)) psHu16(mem) &= ~0x100; else @@ -795,13 +796,13 @@ void __fastcall hwWrite32_page_0F( u32 mem, u32 value ) case HELPSWITCH(0x1000f200): psHu32(mem) = value; break; - case HELPSWITCH(0x1000f220): + case HELPSWITCH(SBUS_F220): psHu32(mem) |= value; break; - case HELPSWITCH(0x1000f230): + case HELPSWITCH(SBUS_SMFLG): psHu32(mem) &= ~value; break; - case HELPSWITCH(0x1000f240): + case HELPSWITCH(SBUS_F240): if(!(value & 0x100)) psHu32(mem) &= ~0x100; else @@ -815,7 +816,7 @@ void __fastcall hwWrite32_page_0F( u32 mem, u32 value ) psHu32(mem) = value; break; - case HELPSWITCH(0x1000f590): // DMAC_ENABLEW + case HELPSWITCH(DMAC_ENABLEW): // DMAC_ENABLEW HW_LOG("DMAC_ENABLEW Write 32bit %lx", value); psHu32(0xf590) = value; psHu32(0xf520) = value; @@ -890,7 +891,7 @@ void __fastcall hwWrite32_generic( u32 mem, u32 value ) case D2_SADR: regName = "GIFdma SADDR"; break; //------------------------------------------------------------------ - case 0x1000c000: // dma5 - sif0 + case D5_CHCR: // dma5 - sif0 DMA_LOG("SIF0dma EXECUTE, value=0x%x", value); //if (value == 0) psxSu32(0x30) = 0x40000; if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) @@ -901,7 +902,7 @@ void __fastcall hwWrite32_generic( u32 mem, u32 value ) DmaExec(dmaSIF0, mem, value); return; //------------------------------------------------------------------ - case 0x1000c400: // dma6 - sif1 + case D6_CHCR: // dma6 - sif1 DMA_LOG("SIF1dma EXECUTE, value=0x%x", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -911,11 +912,12 @@ void __fastcall hwWrite32_generic( u32 mem, u32 value ) DmaExec(dmaSIF1, mem, value); return; - case 0x1000c420: regName = "SIF1dma QWC"; break; + // Again, no MADR, and an undefined TADR. + case D6_QWC: regName = "SIF1dma QWC"; break; case 0x1000c430: regName = "SIF1dma TADR"; break; //------------------------------------------------------------------ - case 0x1000c800: // dma7 - sif2 + case D7_CHCR: // dma7 - sif2 DMA_LOG("SIF2dma EXECUTE, value=0x%x", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -925,7 +927,7 @@ void __fastcall hwWrite32_generic( u32 mem, u32 value ) DmaExec(dmaSIF2, mem, value); return; //------------------------------------------------------------------ - case 0x1000d000: // dma8 - fromSPR + case D8_CHCR: // dma8 - fromSPR DMA_LOG("SPR0dma EXECUTE (fromSPR), value=0x%x", value); if ((value & 0x100) && !(psHu32(DMAC_CTRL) & 0x1)) { @@ -1048,7 +1050,7 @@ void __fastcall hwWrite64_generic( u32 mem, const mem64_t* srcval ) switch (mem) { - case 0x1000a000: // dma2 - gif + case D2_CHCR: // dma2 - gif DMA_LOG("0x%8.8x hwWrite64: GSdma %x", cpuRegs.cycle, value); DmaExec(dmaGIF, mem, value); break; @@ -1070,7 +1072,7 @@ void __fastcall hwWrite64_generic( u32 mem, const mem64_t* srcval ) case 0x1000f430: break; - case 0x1000f590: // DMAC_ENABLEW + case DMAC_ENABLEW: // DMAC_ENABLEW psHu32(0xf590) = value; psHu32(0xf520) = value; break; @@ -1103,7 +1105,7 @@ void __fastcall hwWrite128_generic(u32 mem, const mem128_t *srcval) cpuTestINTCInts(); break; - case 0x1000f590: // DMAC_ENABLEW + case DMAC_ENABLEW: // DMAC_ENABLEW psHu32(0xf590) = srcval[0]; psHu32(0xf520) = srcval[0]; break; diff --git a/pcsx2/Interpreter.cpp b/pcsx2/Interpreter.cpp index 533c6ca1a5..9e915c6de3 100644 --- a/pcsx2/Interpreter.cpp +++ b/pcsx2/Interpreter.cpp @@ -112,7 +112,8 @@ void __fastcall intDoBranch(u32 target) } } -void intSetBranch() { +void intSetBranch() +{ branch2 = /*cpuRegs.branch =*/ 1; } @@ -133,90 +134,223 @@ namespace OpcodeImpl { * Format: OP target * *********************************************************/ -void J() { +void J() +{ doBranch(_JumpTarget_); } -void JAL() { - _SetLink(31); doBranch(_JumpTarget_); +void JAL() +{ + _SetLink(31); + doBranch(_JumpTarget_); } /********************************************************* * Register branch logic * * Format: OP rs, rt, offset * *********************************************************/ -#define RepBranchi32(op) \ - if (cpuRegs.GPR.r[_Rs_].SD[0] op cpuRegs.GPR.r[_Rt_].SD[0]) doBranch(_BranchTarget_); \ - else intEventTest(); +void BEQ() // Branch if Rs == Rt +{ + if (cpuRegs.GPR.r[_Rs_].SD[0] == cpuRegs.GPR.r[_Rt_].SD[0]) + doBranch(_BranchTarget_); + else + intEventTest(); +} -void BEQ() { RepBranchi32(==) } // Branch if Rs == Rt -void BNE() { RepBranchi32(!=) } // Branch if Rs != Rt +void BNE() // Branch if Rs != Rt +{ + if (cpuRegs.GPR.r[_Rs_].SD[0] != cpuRegs.GPR.r[_Rt_].SD[0]) + doBranch(_BranchTarget_); + else + intEventTest(); +} /********************************************************* * Register branch logic * * Format: OP rs, offset * *********************************************************/ -#define RepZBranchi32(op) \ - if(cpuRegs.GPR.r[_Rs_].SD[0] op 0) { \ - doBranch(_BranchTarget_); \ - } -#define RepZBranchLinki32(op) \ +void BGEZ() // Branch if Rs >= 0 +{ + if(cpuRegs.GPR.r[_Rs_].SD[0] >= 0) + { + doBranch(_BranchTarget_); + } +} + +void BGEZAL() // Branch if Rs >= 0 and link +{ + _SetLink(31); + + if (cpuRegs.GPR.r[_Rs_].SD[0] >= 0) + { + doBranch(_BranchTarget_); + } +} + +void BGTZ() // Branch if Rs > 0 +{ + if (cpuRegs.GPR.r[_Rs_].SD[0] > 0) + { + doBranch(_BranchTarget_); + } +} + +void BLEZ() // Branch if Rs <= 0 +{ + if (cpuRegs.GPR.r[_Rs_].SD[0] <= 0) + { + doBranch(_BranchTarget_); + } +} + +void BLTZ() // Branch if Rs < 0 +{ + if (cpuRegs.GPR.r[_Rs_].SD[0] < 0) + { + doBranch(_BranchTarget_); + } +} + +void BLTZAL() // Branch if Rs < 0 and link +{ _SetLink(31); \ - if(cpuRegs.GPR.r[_Rs_].SD[0] op 0) { \ - doBranch(_BranchTarget_); \ + if (cpuRegs.GPR.r[_Rs_].SD[0] < 0) + { + doBranch(_BranchTarget_); } - -void BGEZ() { RepZBranchi32(>=) } // Branch if Rs >= 0 -void BGEZAL() { RepZBranchLinki32(>=) } // Branch if Rs >= 0 and link -void BGTZ() { RepZBranchi32(>) } // Branch if Rs > 0 -void BLEZ() { RepZBranchi32(<=) } // Branch if Rs <= 0 -void BLTZ() { RepZBranchi32(<) } // Branch if Rs < 0 -void BLTZAL() { RepZBranchLinki32(<) } // Branch if Rs < 0 and link - +} /********************************************************* * Register branch logic Likely * * Format: OP rs, offset * *********************************************************/ -#define RepZBranchi32Likely(op) \ - if(cpuRegs.GPR.r[_Rs_].SD[0] op 0) { \ - doBranch(_BranchTarget_); \ - } else { cpuRegs.pc +=4; intEventTest(); } - -#define RepZBranchLinki32Likely(op) \ - _SetLink(31); \ - if(cpuRegs.GPR.r[_Rs_].SD[0] op 0) { \ - doBranch(_BranchTarget_); \ - } else { cpuRegs.pc +=4; intEventTest(); } - -#define RepBranchi32Likely(op) \ - if(cpuRegs.GPR.r[_Rs_].SD[0] op cpuRegs.GPR.r[_Rt_].SD[0]) { \ - doBranch(_BranchTarget_); \ - } else { cpuRegs.pc +=4; intEventTest(); } -void BEQL() { RepBranchi32Likely(==) } // Branch if Rs == Rt -void BNEL() { RepBranchi32Likely(!=) } // Branch if Rs != Rt -void BLEZL() { RepZBranchi32Likely(<=) } // Branch if Rs <= 0 -void BGTZL() { RepZBranchi32Likely(>) } // Branch if Rs > 0 -void BLTZL() { RepZBranchi32Likely(<) } // Branch if Rs < 0 -void BGEZL() { RepZBranchi32Likely(>=) } // Branch if Rs >= 0 -void BLTZALL() { RepZBranchLinki32Likely(<) } // Branch if Rs < 0 and link -void BGEZALL() { RepZBranchLinki32Likely(>=) } // Branch if Rs >= 0 and link +void BEQL() // Branch if Rs == Rt +{ + if(cpuRegs.GPR.r[_Rs_].SD[0] == cpuRegs.GPR.r[_Rt_].SD[0]) + { + doBranch(_BranchTarget_); + } + else + { + cpuRegs.pc +=4; + intEventTest(); + } +} + +void BNEL() // Branch if Rs != Rt +{ + if(cpuRegs.GPR.r[_Rs_].SD[0] != cpuRegs.GPR.r[_Rt_].SD[0]) + { + doBranch(_BranchTarget_); + } + else + { + cpuRegs.pc +=4; + intEventTest(); + } +} + +void BLEZL() // Branch if Rs <= 0 +{ + if(cpuRegs.GPR.r[_Rs_].SD[0] <= 0) + { + doBranch(_BranchTarget_); + } + else + { + cpuRegs.pc +=4; + intEventTest(); + } +} + +void BGTZL() // Branch if Rs > 0 +{ + if(cpuRegs.GPR.r[_Rs_].SD[0] > 0) + { + doBranch(_BranchTarget_); + } + else + { + cpuRegs.pc +=4; + intEventTest(); + } +} + +void BLTZL() // Branch if Rs < 0 +{ + if(cpuRegs.GPR.r[_Rs_].SD[0] < 0) + { + doBranch(_BranchTarget_); + } + else + { + cpuRegs.pc +=4; + intEventTest(); + } +} + +void BGEZL() // Branch if Rs >= 0 +{ + if(cpuRegs.GPR.r[_Rs_].SD[0] >= 0) + { + doBranch(_BranchTarget_); + } + else + { + cpuRegs.pc +=4; + intEventTest(); + } +} + +void BLTZALL() // Branch if Rs < 0 and link +{ + _SetLink(31); + + if(cpuRegs.GPR.r[_Rs_].SD[0] < 0) + { + doBranch(_BranchTarget_); + } + else + { + cpuRegs.pc +=4; + intEventTest(); + } +} + +void BGEZALL() // Branch if Rs >= 0 and link +{ + _SetLink(31); + + if(cpuRegs.GPR.r[_Rs_].SD[0] >= 0) + { + doBranch(_BranchTarget_); + } + else + { + cpuRegs.pc +=4; + intEventTest(); + } +} /********************************************************* * Register jump * * Format: OP rs, rd * *********************************************************/ -void JR() { +void JR() +{ doBranch(cpuRegs.GPR.r[_Rs_].UL[0]); } -void JALR() { +void JALR() +{ u32 temp = cpuRegs.GPR.r[_Rs_].UL[0]; - if (_Rd_) { _SetLink(_Rd_); } + + if (_Rd_) _SetLink(_Rd_); + doBranch(temp); } @@ -235,7 +369,7 @@ void intReset() branch2 = 0; } -bool intEventTest() + bool intEventTest() { // Perform counters, ints, and IOP updates: return _cpuBranchTest_Shared(); diff --git a/pcsx2/x86/iMMI.cpp b/pcsx2/x86/iMMI.cpp index aebb01ff67..265f105b03 100644 --- a/pcsx2/x86/iMMI.cpp +++ b/pcsx2/x86/iMMI.cpp @@ -273,7 +273,7 @@ void recPMTHL() } // MMX helper routines -#define MMX_ALLOC_TEMP1(code) { \ +/*#define MMX_ALLOC_TEMP1(code) { \ int t0reg; \ t0reg = _allocMMXreg(-1, MMX_TEMP, 0); \ code; \ @@ -311,7 +311,7 @@ void recPMTHL() _freeMMXreg(t1reg); \ _freeMMXreg(t2reg); \ _freeMMXreg(t3reg); \ -} \ +} \*/ //////////////////////////////////////////////////// void recPSRLH( void ) diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index a14f1cee3a..af168fc238 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -24,26 +24,7 @@ #include "R5900.h" #include "VU.h" #include "iCore.h" - -// Yay! These work now! (air) ... almost (air) -#define ARITHMETICIMM_RECOMPILE -#define ARITHMETIC_RECOMPILE -#define MULTDIV_RECOMPILE -#define SHIFT_RECOMPILE -#define BRANCH_RECOMPILE -#define JUMP_RECOMPILE -#define LOADSTORE_RECOMPILE -#define MOVE_RECOMPILE -#define MMI_RECOMPILE -#define MMI0_RECOMPILE -#define MMI1_RECOMPILE -#define MMI2_RECOMPILE -#define MMI3_RECOMPILE -#define FPU_RECOMPILE -#define CP0_RECOMPILE -#define CP2_RECOMPILE - -#define EE_CONST_PROP // rec2 - enables constant propagation (faster) +#include "Pcsx2Config.h" #define PC_GETBLOCK(x) PC_GETBLOCK_(x, recLUT) From 7394e99ef6ddccc1a512a1a7aac17b1b25bdb0d4 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Fri, 24 Apr 2009 11:25:10 +0000 Subject: [PATCH 138/143] Ok, I lied -- more work on the emitter. I forgot to fix up the Jump/Call api yesterday and, along the way, did lots of major cleanups. :) (and broke linux, probably!) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1054 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 8 +- pcsx2/x86/ix86-32/iR5900-32.cpp | 2 +- pcsx2/x86/ix86/implement/bittest.h | 70 -------- pcsx2/x86/ix86/implement/dwshift.h | 109 ++---------- pcsx2/x86/ix86/implement/group1.h | 42 ++--- pcsx2/x86/ix86/implement/group2.h | 22 +-- pcsx2/x86/ix86/implement/group3.h | 133 ++++---------- pcsx2/x86/ix86/implement/incdec.h | 65 ++----- pcsx2/x86/ix86/implement/jmpcall.h | 52 ++---- pcsx2/x86/ix86/implement/movs.h | 160 +++++------------ pcsx2/x86/ix86/implement/test.h | 107 ++++++----- pcsx2/x86/ix86/implement/xmm/arithmetic.h | 10 +- pcsx2/x86/ix86/implement/xmm/basehelpers.h | 104 ----------- pcsx2/x86/ix86/implement/xmm/comparisons.h | 6 +- pcsx2/x86/ix86/implement/xmm/moremovs.h | 12 +- pcsx2/x86/ix86/implement/xmm/shufflepack.h | 6 +- pcsx2/x86/ix86/ix86.cpp | 138 ++++++++++----- pcsx2/x86/ix86/ix86_3dnow.cpp | 118 ++++++------ pcsx2/x86/ix86/ix86_fpu.cpp | 144 +++++++-------- pcsx2/x86/ix86/ix86_inlines.inl | 14 +- pcsx2/x86/ix86/ix86_instructions.h | 103 +++++++---- pcsx2/x86/ix86/ix86_jmp.cpp | 36 ++-- pcsx2/x86/ix86/ix86_legacy.cpp | 85 ++++----- pcsx2/x86/ix86/ix86_legacy_instructions.h | 11 -- pcsx2/x86/ix86/ix86_legacy_internal.h | 32 +--- pcsx2/x86/ix86/ix86_simd.cpp | 42 +++-- pcsx2/x86/ix86/ix86_types.h | 197 ++++++++++++++------- 27 files changed, 769 insertions(+), 1059 deletions(-) delete mode 100644 pcsx2/x86/ix86/implement/bittest.h diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 5b03403ef2..d8a1aa780e 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2989,13 +2989,13 @@ RelativePath="..\..\x86\ix86\ix86_types.h" > + + - - diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index ac20ffd0a1..43ec86d7e2 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1644,7 +1644,7 @@ StartRecomp: } if (startpc != 0x81fc0) { xADD(ptr16[&manual_page[inpage_ptr >> 12]], 1); - iJccKnownTarget(Jcc_Carry, dyna_block_reset); + xJC( dyna_block_reset ); } DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params diff --git a/pcsx2/x86/ix86/implement/bittest.h b/pcsx2/x86/ix86/implement/bittest.h deleted file mode 100644 index a3829a66af..0000000000 --- a/pcsx2/x86/ix86/implement/bittest.h +++ /dev/null @@ -1,70 +0,0 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#pragma once - -// Implementations found here: BTS/BT/BTC/BTR plus BSF/BSR! -// Note: This header is meant to be included from within the x86Emitter::Internal namespace. - -// These instructions are in the 'Group8' as per Intel's manual, but since they all have -// a unified purpose, I've named them for that instead. - -enum G8Type -{ - G8Type_BT = 4, - G8Type_BTS, - G8Type_BTR, - G8Type_BTC, -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// BSF / BSR -- 16/32 operands supported only. -// -// 0xbc [fwd] / 0xbd [rev] -// -template< u16 Opcode > -class BitScanImpl -{ -public: - BitScanImpl() {} - - __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { xOpWrite0F( Opcode, to, from ); } - __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); } - __forceinline void operator()( const xRegister32& to, const void* src ) const { xOpWrite0F( Opcode, to, src ); } - __forceinline void operator()( const xRegister16& to, const void* src ) const { xOpWrite0F( 0x66, Opcode, to, src ); } - __forceinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { xOpWrite0F( Opcode, to, sibsrc ); } - __forceinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { xOpWrite0F( 0x66, Opcode, to, sibsrc ); } -}; - -////////////////////////////////////////////////////////////////////////////////////////// -// Bit Test Instructions - Valid on 16/32 bit instructions only. -// -template< G8Type InstType > -class Group8Impl : public BitScanImpl<0xa3 | (InstType << 2)> -{ -public: - using BitScanImpl<0xa3 | (InstType << 2)>::operator(); - - __forceinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite( bitoffset ); } - __forceinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite( bitoffset ); } - void operator()( const xRegister& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase ); xWrite( bitoffset ); } - void operator()( const xRegister& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase ); xWrite( bitoffset ); } - - Group8Impl() {} -}; - diff --git a/pcsx2/x86/ix86/implement/dwshift.h b/pcsx2/x86/ix86/implement/dwshift.h index 261f1d3ed8..b0499f46e0 100644 --- a/pcsx2/x86/ix86/implement/dwshift.h +++ b/pcsx2/x86/ix86/implement/dwshift.h @@ -21,109 +21,34 @@ // Implementations here cover SHLD and SHRD. // Note: This header is meant to be included from within the x86Emitter::Internal namespace. - -// ------------------------------------------------------------------- -// Optimization Note: Imm shifts by 0 are ignore (no code generated). This is a safe optimization -// because shifts by 0 do *not* affect flags status. - -template< typename ImmType, bool isShiftRight > -class DwordShiftImpl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - - static void basesibform( bool isCL ) - { - prefix16(); - write8( 0x0f ); - write8( (isCL ? 0xa5 : 0xa4) | (isShiftRight ? 0x8 : 0) ); - } - -public: - DwordShiftImpl() {} // because GCC doesn't like static classes - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const xRegister& from ) - { - prefix16(); - write16( 0xa50f | (isShiftRight ? 0x800 : 0) ); - ModRM_Direct( from.Id, to.Id ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const xRegister& from, u8 shiftcnt ) - { - if( shiftcnt == 0 ) return; - prefix16(); - write16( 0xa40f | (isShiftRight ? 0x800 : 0) ); - ModRM_Direct( from.Id, to.Id ); - write8( shiftcnt ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& sibdest, const xRegister& from, __unused const xRegisterCL& clreg ) - { - basesibform(); - EmitSibMagic( from.Id, sibdest ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const ModSibBase& sibdest, const xRegister& from, u8 shiftcnt ) - { - basesibform(); - EmitSibMagic( from.Id, sibdest ); - write8( shiftcnt ); - } - - // ------------------------------------------------------------------------ - // dest data type is inferred from the 'from' register, so we can do void* resolution :) - static __emitinline void Emit( void* dest, const xRegister& from, __unused const xRegisterCL& clreg ) - { - basesibform(); - xWriteDisp( from.Id, dest ); - } - - // ------------------------------------------------------------------------ - // dest data type is inferred from the 'from' register, so we can do void* resolution :) - static __emitinline void Emit( void* dest, const xRegister& from, u8 shiftcnt ) - { - basesibform(); - xWriteDisp( from.Id, dest ); - write8( shiftcnt ); - } -}; - - -// ------------------------------------------------------------------- +////////////////////////////////////////////////////////////////////////////////////////// // I use explicit method declarations here instead of templates, in order to provide // *only* 32 and 16 bit register operand forms (8 bit registers are not valid in SHLD/SHRD). // +// Optimization Note: Imm shifts by 0 are ignore (no code generated). This is a safe optimization +// because shifts by 0 do *not* affect flags status. +// template< bool isShiftRight > class DwordShiftImplAll { -protected: - typedef DwordShiftImpl m_32; - typedef DwordShiftImpl m_16; + static const u8 m_shiftop = isShiftRight ? 0x8 : 0; public: // ---------- 32 Bit Interface ----------- - __forceinline void operator()( const xRegister32& to, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( to, from ); } - __forceinline void operator()( void* dest, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( dest, from ); } - __noinline void operator()( const ModSibBase& sibdest, const xRegister32& from, __unused const xRegisterCL& clreg ) const { m_32::Emit( sibdest, from ); } - __forceinline void operator()( const xRegister32& to, const xRegister32& from, u8 shiftcnt ) const { m_32::Emit( to, from, shiftcnt ); } - __forceinline void operator()( void* dest, const xRegister32& from, u8 shiftcnt ) const { m_32::Emit( dest, from, shiftcnt ); } - __noinline void operator()( const ModSibBase& sibdest, const xRegister32& from, u8 shiftcnt ) const { m_32::Emit( sibdest, shiftcnt ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from, __unused const xRegisterCL& clreg ) const { xOpWrite0F( 0xa5 | m_shiftop, to, from ); } + __forceinline void operator()( void* dest, const xRegister32& from, __unused const xRegisterCL& clreg ) const { xOpWrite0F( 0xa5 | m_shiftop, from, dest ); } + __forceinline void operator()( const ModSibBase& dest, const xRegister32& from, __unused const xRegisterCL& clreg ) const { xOpWrite0F( 0xa5 | m_shiftop, from, dest ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from, u8 shiftcnt ) const { if( shiftcnt != 0 ) xOpWrite0F( 0xa4 | m_shiftop, to, from ); } + __forceinline void operator()( void* dest, const xRegister32& from, u8 shiftcnt ) const { if( shiftcnt != 0 ) xOpWrite0F( 0xa4 | m_shiftop, from, dest, shiftcnt ); } + __forceinline void operator()( const ModSibBase& dest, const xRegister32& from, u8 shiftcnt ) const { if( shiftcnt != 0 ) xOpWrite0F( 0xa4 | m_shiftop, from, dest, shiftcnt ); } // ---------- 16 Bit Interface ----------- - __forceinline void operator()( const xRegister16& to, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( to, from ); } - __forceinline void operator()( void* dest, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( dest, from ); } - __noinline void operator()( const ModSibBase& sibdest, const xRegister16& from, __unused const xRegisterCL& clreg ) const { m_16::Emit( sibdest, from ); } - __forceinline void operator()( const xRegister16& to, const xRegister16& from, u8 shiftcnt ) const { m_16::Emit( to, from, shiftcnt ); } - __forceinline void operator()( void* dest, const xRegister16& from, u8 shiftcnt ) const { m_16::Emit( dest, from, shiftcnt ); } - __noinline void operator()( const ModSibBase& sibdest, const xRegister16& from, u8 shiftcnt ) const { m_16::Emit( sibdest, shiftcnt ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from, __unused const xRegisterCL& clreg ) const { xOpWrite0F( 0x66, 0xa5 | m_shiftop, to, from ); } + __forceinline void operator()( void* dest, const xRegister16& from, __unused const xRegisterCL& clreg ) const { xOpWrite0F( 0x66, 0xa5 | m_shiftop, from, dest ); } + __forceinline void operator()( const ModSibBase& dest, const xRegister16& from, __unused const xRegisterCL& clreg ) const { xOpWrite0F( 0x66, 0xa5 | m_shiftop, from, dest ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from, u8 shiftcnt ) const { if( shiftcnt != 0 ) xOpWrite0F( 0x66, 0xa4 | m_shiftop, to, from ); } + __forceinline void operator()( void* dest, const xRegister16& from, u8 shiftcnt ) const { if( shiftcnt != 0 ) xOpWrite0F( 0x66, 0xa4 | m_shiftop, from, dest, shiftcnt ); } + __forceinline void operator()( const ModSibBase& dest, const xRegister16& from, u8 shiftcnt ) const { if( shiftcnt != 0 ) xOpWrite0F( 0x66, 0xa4 | m_shiftop, from, dest, shiftcnt ); } DwordShiftImplAll() {} // Why does GCC need these? }; diff --git a/pcsx2/x86/ix86/implement/group1.h b/pcsx2/x86/ix86/implement/group1.h index 424ec212dc..2afdc40303 100644 --- a/pcsx2/x86/ix86/implement/group1.h +++ b/pcsx2/x86/ix86/implement/group1.h @@ -43,40 +43,40 @@ public: template< typename T > __forceinline void operator()( const xRegister& to, const xRegister& from ) const { prefix16(); - xWrite( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); - ModRM_Direct( from.Id, to.Id ); + xWrite8( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); + EmitSibMagic( from, to ); } // ------------------------------------------------------------------------ template< typename T > __forceinline void operator()( const xRegister& to, const void* src ) const { prefix16(); - xWrite( (Is8BitOp() ? 2 : 3) | (InstType<<3) ); - xWriteDisp( to.Id, src ); + xWrite8( (Is8BitOp() ? 2 : 3) | (InstType<<3) ); + EmitSibMagic( to, src ); } // ------------------------------------------------------------------------ template< typename T > __forceinline void operator()( void* dest, const xRegister& from ) const { prefix16(); - xWrite( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); - xWriteDisp( from.Id, dest ); + xWrite8( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); + EmitSibMagic( from, dest ); } // ------------------------------------------------------------------------ template< typename T > __noinline void operator()( const ModSibBase& sibdest, const xRegister& from ) const { prefix16(); - xWrite( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); - EmitSibMagic( from.Id, sibdest ); + xWrite8( (Is8BitOp() ? 0 : 1) | (InstType<<3) ); + EmitSibMagic( from, sibdest ); } // ------------------------------------------------------------------------ template< typename T > __noinline void operator()( const xRegister& to, const ModSibBase& sibsrc ) const { prefix16(); - xWrite( (Is8BitOp() ? 2 : 3) | (InstType<<3) ); - EmitSibMagic( to.Id, sibsrc ); + xWrite8( (Is8BitOp() ? 2 : 3) | (InstType<<3) ); + EmitSibMagic( to, sibsrc ); } // ------------------------------------------------------------------------ @@ -88,14 +88,14 @@ public: { if( Is8BitOp() ) { - xWrite( 0x80 ); + xWrite8( 0x80 ); EmitSibMagic( InstType, sibdest ); xWrite( imm ); } else { prefix16(); - xWrite( is_s8( imm ) ? 0x83 : 0x81 ); + xWrite8( is_s8( imm ) ? 0x83 : 0x81 ); EmitSibMagic( InstType, sibdest ); if( is_s8( imm ) ) xWrite( imm ); @@ -110,18 +110,18 @@ public: prefix16(); if( !Is8BitOp() && is_s8( imm ) ) { - xWrite( 0x83 ); - ModRM_Direct( InstType, to.Id ); + xWrite8( 0x83 ); + EmitSibMagic( InstType, to ); xWrite( imm ); } else { if( to.IsAccumulator() ) - xWrite( (Is8BitOp() ? 4 : 5) | (InstType<<3) ); + xWrite8( (Is8BitOp() ? 4 : 5) | (InstType<<3) ); else { - xWrite( Is8BitOp() ? 0x80 : 0x81 ); - ModRM_Direct( InstType, to.Id ); + xWrite8( Is8BitOp() ? 0x80 : 0x81 ); + EmitSibMagic( InstType, to ); } xWrite( imm ); } @@ -167,14 +167,14 @@ class xImpl_G1Compare : xImpl_Group1< G1Type_CMP > protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType cmptype ) const{ xOpWrite0F( Prefix, 0xc2, to, from, (u8)cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from, (u8)cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, SSE2_ComparisonType cmptype ) const { xOpWrite0F( Prefix, 0xc2, to, from, (u8)cmptype ); } Woot() {} }; public: - using xImpl_Group1< G1Type_CMP >::operator(); + using xImpl_Group1::operator(); const Woot<0x00> PS; const Woot<0x66> PD; diff --git a/pcsx2/x86/ix86/implement/group2.h b/pcsx2/x86/ix86/implement/group2.h index fecef3c6ba..eab5bb2605 100644 --- a/pcsx2/x86/ix86/implement/group2.h +++ b/pcsx2/x86/ix86/implement/group2.h @@ -45,14 +45,14 @@ public: template< typename T > __forceinline void operator()( const xRegister& to, __unused const xRegisterCL& from ) const { prefix16(); - xWrite( Is8BitOp() ? 0xd2 : 0xd3 ); - ModRM_Direct( InstType, to.Id ); + xWrite8( Is8BitOp() ? 0xd2 : 0xd3 ); + EmitSibMagic( InstType, to ); } template< typename T > __noinline void operator()( const ModSibStrict& sibdest, __unused const xRegisterCL& from ) const { prefix16(); - xWrite( Is8BitOp() ? 0xd2 : 0xd3 ); + xWrite8( Is8BitOp() ? 0xd2 : 0xd3 ); EmitSibMagic( InstType, sibdest ); } @@ -64,14 +64,14 @@ public: if( imm == 1 ) { // special encoding of 1's - xWrite( Is8BitOp() ? 0xd0 : 0xd1 ); + xWrite8( Is8BitOp() ? 0xd0 : 0xd1 ); EmitSibMagic( InstType, sibdest ); } else { - xWrite( Is8BitOp() ? 0xc0 : 0xc1 ); + xWrite8( Is8BitOp() ? 0xc0 : 0xc1 ); EmitSibMagic( InstType, sibdest ); - xWrite( imm ); + xWrite8( imm ); } } @@ -83,14 +83,14 @@ public: if( imm == 1 ) { // special encoding of 1's - xWrite( Is8BitOp() ? 0xd0 : 0xd1 ); - ModRM_Direct( InstType, to.Id ); + xWrite8( Is8BitOp() ? 0xd0 : 0xd1 ); + EmitSibMagic( InstType, to ); } else { - xWrite( Is8BitOp() ? 0xc0 : 0xc1 ); - ModRM_Direct( InstType, to.Id ); - xWrite( imm ); + xWrite8( Is8BitOp() ? 0xc0 : 0xc1 ); + EmitSibMagic( InstType, to ); + xWrite8( imm ); } } diff --git a/pcsx2/x86/ix86/implement/group3.h b/pcsx2/x86/ix86/implement/group3.h index f5d72b3611..dc9bb169c2 100644 --- a/pcsx2/x86/ix86/implement/group3.h +++ b/pcsx2/x86/ix86/implement/group3.h @@ -34,136 +34,79 @@ enum G3Type ////////////////////////////////////////////////////////////////////////////////////////// // template< G3Type InstType > -class Group3ImplAll +class xImpl_Group3 { public: // ------------------------------------------------------------------------ - template< typename T > __forceinline void operator()( const xRegister& from ) const + template< typename T > __emitinline void operator()( const xRegister& from ) const { prefix16(); - xWrite(Is8BitOp() ? 0xf6 : 0xf7 ); - ModRM_Direct( InstType, from.Id ); + xWrite8(Is8BitOp() ? 0xf6 : 0xf7 ); + EmitSibMagic( InstType, from ); } // ------------------------------------------------------------------------ - template< typename T > __noinline void operator()( const ModSibStrict& from ) const + template< typename T > __emitinline void operator()( const ModSibStrict& from ) const { prefix16(); - xWrite( Is8BitOp() ? 0xf6 : 0xf7 ); + xWrite8( Is8BitOp() ? 0xf6 : 0xf7 ); EmitSibMagic( InstType, from ); } - Group3ImplAll() {} + xImpl_Group3() {} }; // ------------------------------------------------------------------------ // This class combines x86 and SSE/SSE2 instructions for iMUL and iDIV. // template< G3Type InstType, u16 OpcodeSSE > -class xImpl_Group3 : public Group3ImplAll +class ImplMulDivBase : public xImpl_Group3 { public: + ImplMulDivBase() {} const SimdImpl_DestRegSSE<0x00,OpcodeSSE> PS; const SimdImpl_DestRegSSE<0x66,OpcodeSSE> PD; const SimdImpl_DestRegSSE<0xf3,OpcodeSSE> SS; const SimdImpl_DestRegSSE<0xf2,OpcodeSSE> SD; +}; - xImpl_Group3() {} +////////////////////////////////////////////////////////////////////////////////////////// +// +class xImpl_iDiv : public ImplMulDivBase +{ +public: + using ImplMulDivBase::operator(); }; ////////////////////////////////////////////////////////////////////////////////////////// // The following iMul-specific forms are valid for 16 and 32 bit register operands only! - -template< typename ImmType > -class iMulImpl +// +class xImpl_iMul : public ImplMulDivBase { -protected: - static const uint OperandSize = sizeof(ImmType); - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } + template< typename T1, typename T2, typename ImmType > + static __forceinline void ImmStyle( const T1& param1, const T2& param2, ImmType imm8 ) + { + xOpWrite0F( (sizeof(ImmType) == 2) ? 0x66 : 0, is_s8( imm8 ) ? 0x6b : 0x69, param1, param2 ); + if( is_s8( imm8 ) ) + xWrite8( imm8 ); + else + xWrite( imm8 ); + } public: - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const xRegister& from ) - { - prefix16(); - write16( 0xaf0f ); - ModRM_Direct( to.Id, from.Id ); - } + using ImplMulDivBase::operator(); - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const void* src ) - { - prefix16(); - write16( 0xaf0f ); - xWriteDisp( to.Id, src ); - } + __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { xOpWrite0F( 0xaf, to, from ); } + __forceinline void operator()( const xRegister32& to, const void* src ) const { xOpWrite0F( 0xaf, to, src ); } + __forceinline void operator()( const xRegister32& to, const ModSibBase& src ) const { xOpWrite0F( 0xaf, to, src ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from, s32 imm ) const{ ImmStyle( to, from, imm ); } + __forceinline void operator()( const xRegister32& to, const ModSibBase& from, s32 imm ) const { ImmStyle( to, from, imm ); } - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const ModSibBase& src ) - { - prefix16(); - write16( 0xaf0f ); - EmitSibMagic( to.Id, src ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const xRegister& from, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - ModRM_Direct( to.Id, from.Id ); - if( is_s8( imm ) ) - write8( imm ); - else - xWrite( imm ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const void* src, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - xWriteDisp( to.Id, src ); - if( is_s8( imm ) ) - write8( imm ); - else - xWrite( imm ); - } - - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const ModSibBase& src, ImmType imm ) - { - prefix16(); - write16( is_s8( imm ) ? 0x6b : 0x69 ); - EmitSibMagic( to.Id, src ); - if( is_s8( imm ) ) - write8( imm ); - else - xWrite( imm ); - } -}; - -// ------------------------------------------------------------------------ -class xImpl_iMul : public xImpl_Group3 -{ -protected: - typedef iMulImpl iMUL32; - typedef iMulImpl iMUL16; - -public: - using xImpl_Group3::operator(); - - __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { iMUL32::Emit( to, from ); } - __forceinline void operator()( const xRegister32& to, const void* src ) const { iMUL32::Emit( to, src ); } - __forceinline void operator()( const xRegister32& to, const xRegister32& from, s32 imm ) const{ iMUL32::Emit( to, from, imm ); } - __noinline void operator()( const xRegister32& to, const ModSibBase& src ) const { iMUL32::Emit( to, src ); } - __noinline void operator()( const xRegister32& to, const ModSibBase& from, s32 imm ) const { iMUL32::Emit( to, from, imm ); } - - __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { iMUL16::Emit( to, from ); } - __forceinline void operator()( const xRegister16& to, const void* src ) const { iMUL16::Emit( to, src ); } - __forceinline void operator()( const xRegister16& to, const xRegister16& from, s16 imm ) const{ iMUL16::Emit( to, from, imm ); } - __noinline void operator()( const xRegister16& to, const ModSibBase& src ) const { iMUL16::Emit( to, src ); } - __noinline void operator()( const xRegister16& to, const ModSibBase& from, s16 imm ) const { iMUL16::Emit( to, from, imm ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { xOpWrite0F( 0x66, 0xaf, to, from ); } + __forceinline void operator()( const xRegister16& to, const void* src ) const { xOpWrite0F( 0x66, 0xaf, to, src ); } + __forceinline void operator()( const xRegister16& to, const ModSibBase& src ) const { xOpWrite0F( 0x66, 0xaf, to, src ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from, s16 imm ) const{ ImmStyle( to, from, imm ); } + __forceinline void operator()( const xRegister16& to, const ModSibBase& from, s16 imm ) const { ImmStyle( to, from, imm ); } xImpl_iMul() {} }; diff --git a/pcsx2/x86/ix86/implement/incdec.h b/pcsx2/x86/ix86/implement/incdec.h index 76f5a87b9a..f33ed905ed 100644 --- a/pcsx2/x86/ix86/implement/incdec.h +++ b/pcsx2/x86/ix86/implement/incdec.h @@ -21,62 +21,33 @@ // Implementations found here: Increment and Decrement Instructions! // Note: This header is meant to be included from within the x86Emitter::Internal namespace. -// ------------------------------------------------------------------------ -// -template< typename ImmType > -class IncDecImpl + +template< bool isDec > +class xImpl_IncDec { -protected: - static const uint OperandSize = sizeof(ImmType); - - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - -public: - IncDecImpl() {} // For the love of GCC. - - static __emitinline void Emit( bool isDec, const xRegister& to ) +public: + template< typename T > + __forceinline void operator()( const xRegister& to ) const { - // There is no valid 8-bit form of direct register inc/dec, so fall - // back on Mod/RM format instead: - if (Is8BitOperand() ) + if( Is8BitOp() ) { - write8( 0xfe ); - ModRM_Direct( isDec ? 1 : 0, to.Id ); + xWrite8( 0xfe ); + EmitSibMagic( isDec ? 1 : 0, to ); } else { - prefix16(); - write8( (isDec ? 0x48 : 0x40) | to.Id ); + prefix16(); + xWrite8( (isDec ? 0x48 : 0x40) | to.Id ); } } - static __emitinline void Emit( bool isDec, const ModSibStrict& dest ) + template< typename T > + __forceinline void operator()( const ModSibStrict& sibdest ) const { - prefix16(); - write8( Is8BitOperand() ? 0xfe : 0xff ); - EmitSibMagic( isDec ? 1 : 0, dest ); + prefix16(); + xWrite8( Is8BitOp() ? 0xfe : 0xff ); + EmitSibMagic( isDec ? 1 : 0, sibdest ); } -}; - -// ------------------------------------------------------------------------ -template< bool isDec > -class IncDecImplAll -{ -protected: - typedef IncDecImpl m_32; - typedef IncDecImpl m_16; - typedef IncDecImpl m_8; - -public: - __forceinline void operator()( const xRegister32& to ) const { m_32::Emit( isDec, to ); } - __noinline void operator()( const ModSibStrict& sibdest ) const{ m_32::Emit( isDec, sibdest ); } - - __forceinline void operator()( const xRegister16& to ) const { m_16::Emit( isDec, to ); } - __noinline void operator()( const ModSibStrict& sibdest ) const{ m_16::Emit( isDec, sibdest ); } - - __forceinline void operator()( const xRegister8& to ) const { m_8::Emit( isDec, to ); } - __noinline void operator()( const ModSibStrict& sibdest ) const { m_8::Emit( isDec, sibdest ); } - - IncDecImplAll() {} // don't ask. + + xImpl_IncDec() {} // don't ask. }; diff --git a/pcsx2/x86/ix86/implement/jmpcall.h b/pcsx2/x86/ix86/implement/jmpcall.h index 7858b07a92..59cae03538 100644 --- a/pcsx2/x86/ix86/implement/jmpcall.h +++ b/pcsx2/x86/ix86/implement/jmpcall.h @@ -21,48 +21,18 @@ // Implementations found here: CALL and JMP! (unconditional only) // Note: This header is meant to be included from within the x86Emitter::Internal namespace. -template< typename ImmType > -class JmpCallImpl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - -public: - JmpCallImpl() {} // For the love of GCC. - - static __emitinline void Emit( bool isJmp, const xRegister& absreg ) - { - prefix16(); - xWrite( 0xff ); - ModRM_Direct( isJmp ? 4 : 2, absreg.Id ); - } - - static __emitinline void Emit( bool isJmp, const ModSibStrict& src ) - { - prefix16(); - xWrite( 0xff ); - EmitSibMagic( isJmp ? 4 : 2, src ); - } -}; - // ------------------------------------------------------------------------ template< bool isJmp > -class JmpCallImplAll +class xImpl_JmpCall { -protected: - typedef JmpCallImpl m_32; - typedef JmpCallImpl m_16; - public: - JmpCallImplAll() {} + xImpl_JmpCall() {} - __forceinline void operator()( const xRegister32& absreg ) const { m_32::Emit( isJmp, absreg ); } - __forceinline void operator()( const ModSibStrict& src ) const { m_32::Emit( isJmp, src ); } + __forceinline void operator()( const xRegister32& absreg ) const { xOpWrite( 0x00, 0xff, isJmp ? 4 : 2, absreg ); } + __forceinline void operator()( const ModSibStrict& src ) const { xOpWrite( 0x00, 0xff, isJmp ? 4 : 2, src ); } - __forceinline void operator()( const xRegister16& absreg ) const { m_16::Emit( isJmp, absreg ); } - __forceinline void operator()( const ModSibStrict& src ) const { m_16::Emit( isJmp, src ); } + __forceinline void operator()( const xRegister16& absreg ) const { xOpWrite( 0x66, 0xff, isJmp ? 4 : 2, absreg ); } + __forceinline void operator()( const ModSibStrict& src ) const { xOpWrite( 0x66, 0xff, isJmp ? 4 : 2, src ); } // Special form for calling functions. This form automatically resolves the // correct displacement based on the size of the instruction being generated. @@ -70,16 +40,16 @@ public: __forceinline void operator()( const T* func ) const { if( isJmp ) - iJccKnownTarget( Jcc_Unconditional, (void*)(uptr)func ); + iJcc( Jcc_Unconditional, (void*)(uptr)func ); // double cast to/from (uptr) needed to appease GCC else { // calls are relative to the instruction after this one, and length is // always 5 bytes (16 bit calls are bad mojo, so no bother to do special logic). - sptr dest = (sptr)func - ((sptr)iGetPtr() + 5); - xWrite( 0xe8 ); - xWrite( dest ); + sptr dest = (sptr)func - ((sptr)xGetPtr() + 5); + xWrite8( 0xe8 ); + xWrite32( dest ); } } - }; + diff --git a/pcsx2/x86/ix86/implement/movs.h b/pcsx2/x86/ix86/implement/movs.h index 577df29be3..b8b5323db1 100644 --- a/pcsx2/x86/ix86/implement/movs.h +++ b/pcsx2/x86/ix86/implement/movs.h @@ -32,7 +32,7 @@ class MovImpl protected: static const uint OperandSize = sizeof(ImmType); static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } + static void prefix16() { if( OperandSize == 2 ) xWrite8( 0x66 ); } public: MovImpl() {} @@ -43,8 +43,8 @@ public: if( to == from ) return; // ignore redundant MOVs. prefix16(); - xWrite( Is8BitOperand() ? 0x88 : 0x89 ); - ModRM_Direct( from.Id, to.Id ); + xWrite8( Is8BitOperand() ? 0x88 : 0x89 ); + EmitSibMagic( from, to ); } // ------------------------------------------------------------------------ @@ -57,12 +57,12 @@ public: if( from.IsAccumulator() && dest.Index.IsEmpty() && dest.Base.IsEmpty() ) { - xWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); - xWrite( dest.Displacement ); + xWrite8( Is8BitOperand() ? 0xa2 : 0xa3 ); + xWrite32( dest.Displacement ); } else { - xWrite( Is8BitOperand() ? 0x88 : 0x89 ); + xWrite8( Is8BitOperand() ? 0x88 : 0x89 ); EmitSibMagic( from.Id, dest ); } } @@ -77,13 +77,13 @@ public: if( to.IsAccumulator() && src.Index.IsEmpty() && src.Base.IsEmpty() ) { - xWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); - xWrite( src.Displacement ); + xWrite8( Is8BitOperand() ? 0xa0 : 0xa1 ); + xWrite32( src.Displacement ); } else { - xWrite( Is8BitOperand() ? 0x8a : 0x8b ); - EmitSibMagic( to.Id, src ); + xWrite8( Is8BitOperand() ? 0x8a : 0x8b ); + EmitSibMagic( to, src ); } } @@ -96,13 +96,13 @@ public: if( from.IsAccumulator() ) { - xWrite( Is8BitOperand() ? 0xa2 : 0xa3 ); + xWrite8( Is8BitOperand() ? 0xa2 : 0xa3 ); xWrite( (s32)dest ); } else { - xWrite( Is8BitOperand() ? 0x88 : 0x89 ); - xWriteDisp( from.Id, dest ); + xWrite8( Is8BitOperand() ? 0x88 : 0x89 ); + EmitSibMagic( from, dest ); } } @@ -115,13 +115,13 @@ public: if( to.IsAccumulator() ) { - xWrite( Is8BitOperand() ? 0xa0 : 0xa1 ); + xWrite8( Is8BitOperand() ? 0xa0 : 0xa1 ); xWrite( (s32)src ); } else { - xWrite( Is8BitOperand() ? 0x8a : 0x8b ); - xWriteDisp( to.Id, src ); + xWrite8( Is8BitOperand() ? 0x8a : 0x8b ); + EmitSibMagic( to, src ); } } @@ -131,7 +131,7 @@ public: // Note: MOV does not have (reg16/32,imm8) forms. prefix16(); - xWrite( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); + xWrite8( (Is8BitOperand() ? 0xb0 : 0xb8) | to.Id ); xWrite( imm ); } @@ -139,7 +139,7 @@ public: static __emitinline void Emit( ModSibStrict dest, ImmType imm ) { prefix16(); - xWrite( Is8BitOperand() ? 0xc6 : 0xc7 ); + xWrite8( Is8BitOperand() ? 0xc6 : 0xc7 ); EmitSibMagic( 0, dest ); xWrite( imm ); } @@ -178,6 +178,7 @@ public: MovImplAll() {} // Satisfy GCC's whims. }; +#define ccSane() jASSUME( ccType >= 0 && ccType <= 0x0f ) ////////////////////////////////////////////////////////////////////////////////////////// // CMOV !! [in all of it's disappointing lack-of glory] .. and .. @@ -186,85 +187,19 @@ public: // CMOV Disclaimer: Caution! This instruction can look exciting and cool, until you // realize that it cannot load immediate values into registers. -_- // -template< typename ImmType, int InstBaseVal > -class CMovSetImpl -{ -protected: - static const uint OperandSize = sizeof(ImmType); - - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - - static __forceinline void emit_base( JccComparisonType cc ) - { - jASSUME( cc >= 0 && cc <= 0x0f ); - prefix16(); - write8( 0x0f ); - write8( InstBaseVal | cc ); - } - -public: - CMovSetImpl() {} - - static __emitinline void Emit( JccComparisonType cc, const xRegister& to, const xRegister& from ) - { - if( to == from ) return; - emit_base( cc ); - ModRM_Direct( to.Id, from.Id ); - } - - static __emitinline void Emit( JccComparisonType cc, const xRegister& to, const void* src ) - { - emit_base( cc ); - xWriteDisp( to.Id, src ); - } - - static __emitinline void Emit( JccComparisonType cc, const xRegister& to, const ModSibBase& sibsrc ) - { - emit_base( cc ); - EmitSibMagic( to.Id, sibsrc ); - } - - // This form is provided for SETcc only (not available in CMOV) - static __emitinline void EmitSet( JccComparisonType cc, const xRegister& to ) - { - emit_base( cc ); - ModRM_Direct( 0, to.Id ); - } - - // This form is provided for SETcc only (not available in CMOV) - static __emitinline void EmitSet( JccComparisonType cc, const void* src ) - { - emit_base( cc ); - xWriteDisp( 0, src ); - } - - // This form is provided for SETcc only (not available in CMOV) - static __emitinline void EmitSet( JccComparisonType cc, const ModSibStrict& sibsrc ) - { - emit_base( cc ); - EmitSibMagic( 0, sibsrc ); - } -}; - -// ------------------------------------------------------------------------ // I use explicit method declarations here instead of templates, in order to provide // *only* 32 and 16 bit register operand forms (8 bit registers are not valid in CMOV). // class CMovImplGeneric { -protected: - typedef CMovSetImpl m_32; // 0x40 is the cmov base instruction id - typedef CMovSetImpl m_16; // 0x40 is the cmov base instruction id - public: - __forceinline void operator()( JccComparisonType ccType, const xRegister32& to, const xRegister32& from ) const { m_32::Emit( ccType, to, from ); } - __forceinline void operator()( JccComparisonType ccType, const xRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); } - __noinline void operator()( JccComparisonType ccType, const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister32& to, const xRegister32& from ) const { ccSane(); xOpWrite0F( 0x40 | ccType, to, from ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister32& to, const void* src ) const { ccSane(); xOpWrite0F( 0x40 | ccType, to, src ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister32& to, const ModSibBase& sibsrc ) const { ccSane(); xOpWrite0F( 0x40 | ccType, to, sibsrc ); } - __forceinline void operator()( JccComparisonType ccType, const xRegister16& to, const xRegister16& from ) const { m_16::Emit( ccType, to, from ); } - __forceinline void operator()( JccComparisonType ccType, const xRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); } - __noinline void operator()( JccComparisonType ccType, const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister16& to, const xRegister16& from ) const { ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, from ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister16& to, const void* src ) const { ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, src ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister16& to, const ModSibBase& sibsrc ) const { ccSane(); xOpWrite0F( 0x66, 0x40 | ccType, to, sibsrc ); } CMovImplGeneric() {} // don't ask. }; @@ -273,18 +208,16 @@ public: template< JccComparisonType ccType > class CMovImplAll { -protected: - typedef CMovSetImpl m_32; - typedef CMovSetImpl m_16; + static const u16 Opcode = 0x40 | ccType; public: - __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { m_32::Emit( ccType, to, from ); } - __forceinline void operator()( const xRegister32& to, const void* src ) const { m_32::Emit( ccType, to, src ); } - __noinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { m_32::Emit( ccType, to, sibsrc ); } + __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { ccSane(); xOpWrite0F( Opcode, to, from ); } + __forceinline void operator()( const xRegister32& to, const void* src ) const { ccSane(); xOpWrite0F( Opcode, to, src ); } + __forceinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { ccSane(); xOpWrite0F( Opcode, to, sibsrc ); } - __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { m_16::Emit( ccType, to, from ); } - __forceinline void operator()( const xRegister16& to, const void* src ) const { m_16::Emit( ccType, to, src ); } - __noinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { m_16::Emit( ccType, to, sibsrc ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { ccSane(); xOpWrite0F( 0x66, Opcode, to, from ); } + __forceinline void operator()( const xRegister16& to, const void* src ) const { ccSane(); xOpWrite0F( 0x66, Opcode, to, src ); } + __forceinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { ccSane(); xOpWrite0F( 0x66, Opcode, to, sibsrc ); } CMovImplAll() {} // don't ask. }; @@ -292,13 +225,11 @@ public: // ------------------------------------------------------------------------ class SetImplGeneric { -protected: - typedef CMovSetImpl Impl; // 0x90 is the SETcc base instruction id - + // note: SETcc are 0x90, with 0 in the Reg field of ModRM. public: - __forceinline void operator()( JccComparisonType cc, const xRegister8& to ) const { Impl::EmitSet( cc, to ); } - __forceinline void operator()( JccComparisonType cc, void* dest ) const { Impl::EmitSet( cc, dest ); } - __noinline void operator()( JccComparisonType cc, const ModSibStrict& dest ) const { Impl::EmitSet( cc, dest ); } + __forceinline void operator()( JccComparisonType ccType, const xRegister8& to ) const { ccSane(); xOpWrite0F( 0x90 | ccType, 0, to ); } + __forceinline void operator()( JccComparisonType ccType, void* dest ) const { ccSane(); xOpWrite0F( 0x90 | ccType, 0, dest ); } + __noinline void operator()( JccComparisonType ccType, const ModSibStrict& dest ) const { ccSane(); xOpWrite0F( 0x90 | ccType, 0, dest ); } SetImplGeneric() {} // if you do, ask GCC. }; @@ -307,13 +238,12 @@ public: template< JccComparisonType ccType > class SetImplAll { -protected: - typedef CMovSetImpl Impl; // 0x90 is the SETcc base instruction id + static const u16 Opcode = 0x90 | ccType; // SETcc are 0x90 base opcode, with 0 in the Reg field of ModRM. public: - __forceinline void operator()( const xRegister8& to ) const { Impl::EmitSet( ccType, to ); } - __forceinline void operator()( void* dest ) const { Impl::EmitSet( ccType, dest ); } - __noinline void operator()( const ModSibStrict& dest ) const { Impl::EmitSet( ccType, dest ); } + __forceinline void operator()( const xRegister8& to ) const { ccSane(); xOpWrite0F( Opcode, 0, to ); } + __forceinline void operator()( void* dest ) const { ccSane(); xOpWrite0F( Opcode, 0, dest ); } + __noinline void operator()( const ModSibStrict& dest ) const { ccSane(); xOpWrite0F( Opcode, 0, dest ); } SetImplAll() {} // if you do, ask GCC. }; @@ -330,12 +260,12 @@ protected: static const uint SrcOperandSize = sizeof( SrcImmType ); static bool Is8BitOperand() { return SrcOperandSize == 1; } - static void prefix16() { if( DestOperandSize == 2 ) xWrite( 0x66 ); } + static void prefix16() { if( DestOperandSize == 2 ) xWrite8( 0x66 ); } static __forceinline void emit_base( bool SignExtend ) { prefix16(); - xWrite( 0x0f ); - xWrite( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) ); + xWrite8( 0x0f ); + xWrite8( 0xb6 | (Is8BitOperand() ? 0 : 1) | (SignExtend ? 8 : 0 ) ); } public: @@ -344,13 +274,13 @@ public: static __emitinline void Emit( const xRegister& to, const xRegister& from, bool SignExtend ) { emit_base( SignExtend ); - ModRM_Direct( to.Id, from.Id ); + EmitSibMagic( to, from ); } static __emitinline void Emit( const xRegister& to, const ModSibStrict& sibsrc, bool SignExtend ) { emit_base( SignExtend ); - EmitSibMagic( to.Id, sibsrc ); + EmitSibMagic( to, sibsrc ); } }; diff --git a/pcsx2/x86/ix86/implement/test.h b/pcsx2/x86/ix86/implement/test.h index 0c66b0203f..196c3cbba0 100644 --- a/pcsx2/x86/ix86/implement/test.h +++ b/pcsx2/x86/ix86/implement/test.h @@ -18,66 +18,93 @@ #pragma once +// Implementations found here: TEST + BTS/BT/BTC/BTR + BSF/BSR! (for lack of better location) +// Note: This header is meant to be included from within the x86Emitter::Internal namespace. + ////////////////////////////////////////////////////////////////////////////////////////// // TEST instruction Implementation - -template< typename ImmType > -class TestImpl +// +class xImpl_Test { -protected: - static const uint OperandSize = sizeof(ImmType); - static bool Is8BitOperand() { return OperandSize == 1; } - static void prefix16() { if( OperandSize == 2 ) xWrite( 0x66 ); } - public: - TestImpl() {} - // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, const xRegister& from ) + template< typename T > __forceinline + void operator()( const xRegister& to, const xRegister& from ) const { - prefix16(); - xWrite( Is8BitOperand() ? 0x84 : 0x85 ); - ModRM_Direct( from.Id, to.Id ); + prefix16(); + xWrite8( Is8BitOp() ? 0x84 : 0x85 ); + EmitSibMagic( from, to ); } - + // ------------------------------------------------------------------------ - static __emitinline void Emit( const xRegister& to, ImmType imm ) + template< typename T > __forceinline + void operator()( const ModSibStrict& dest, int imm ) const { - prefix16(); - + prefix16(); + xWrite8( Is8BitOp() ? 0xf6 : 0xf7 ); + EmitSibMagic( 0, dest ); + xWrite( imm ); + } + + // ------------------------------------------------------------------------ + template< typename T > __forceinline + void operator()( const xRegister& to, int imm ) const + { + prefix16(); + if( to.IsAccumulator() ) - xWrite( Is8BitOperand() ? 0xa8 : 0xa9 ); + xWrite8( Is8BitOp() ? 0xa8 : 0xa9 ); else { - xWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); - ModRM_Direct( 0, to.Id ); + xWrite8( Is8BitOp() ? 0xf6 : 0xf7 ); + EmitSibMagic( 0, to ); } - xWrite( imm ); + xWrite( imm ); } - // ------------------------------------------------------------------------ - static __emitinline void Emit( ModSibStrict dest, ImmType imm ) - { - prefix16(); - xWrite( Is8BitOperand() ? 0xf6 : 0xf7 ); - EmitSibMagic( 0, dest ); - xWrite( imm ); - } + xImpl_Test() {} // Why does GCC need these? }; -// ------------------------------------------------------------------- +enum G8Type +{ + G8Type_BT = 4, + G8Type_BTS, + G8Type_BTR, + G8Type_BTC, +}; + +////////////////////////////////////////////////////////////////////////////////////////// +// BSF / BSR -- 16/32 operands supported only. // -class TestImplAll +// 0xbc [fwd] / 0xbd [rev] +// +template< u16 Opcode > +class xImpl_BitScan { public: - template< typename T > - __forceinline void operator()( const xRegister& to, const xRegister& from ) const { TestImpl::Emit( to, from ); } + xImpl_BitScan() {} - template< typename T > - __noinline void operator()( const ModSibStrict& sibdest, T imm ) const { TestImpl::Emit( sibdest, imm ); } - template< typename T > - void operator()( const xRegister& to, T imm ) const { TestImpl::Emit( to, imm ); } - - TestImplAll() {} // Why does GCC need these? + __forceinline void operator()( const xRegister32& to, const xRegister32& from ) const { xOpWrite0F( Opcode, to, from ); } + __forceinline void operator()( const xRegister16& to, const xRegister16& from ) const { xOpWrite0F( 0x66, Opcode, to, from ); } + __forceinline void operator()( const xRegister32& to, const void* src ) const { xOpWrite0F( Opcode, to, src ); } + __forceinline void operator()( const xRegister16& to, const void* src ) const { xOpWrite0F( 0x66, Opcode, to, src ); } + __forceinline void operator()( const xRegister32& to, const ModSibBase& sibsrc ) const { xOpWrite0F( Opcode, to, sibsrc ); } + __forceinline void operator()( const xRegister16& to, const ModSibBase& sibsrc ) const { xOpWrite0F( 0x66, Opcode, to, sibsrc ); } }; +////////////////////////////////////////////////////////////////////////////////////////// +// Bit Test Instructions - Valid on 16/32 bit instructions only. +// +template< G8Type InstType > +class xImpl_Group8 : public xImpl_BitScan<0xa3 | (InstType << 2)> +{ +public: + using xImpl_BitScan<0xa3 | (InstType << 2)>::operator(); + + __forceinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase, bitoffset ); } + __forceinline void operator()( const ModSibStrict& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase, bitoffset ); } + void operator()( const xRegister& bitbase, u8 bitoffset ) const { xOpWrite0F( 0xba, InstType, bitbase, bitoffset ); } + void operator()( const xRegister& bitbase, u8 bitoffset ) const { xOpWrite0F( 0x66, 0xba, InstType, bitbase, bitoffset ); } + + xImpl_Group8() {} +}; diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h index 0b25bd827d..6afd1d27a9 100644 --- a/pcsx2/x86/ix86/implement/xmm/arithmetic.h +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -40,15 +40,15 @@ public: __emitinline void operator()( const xRegisterSSE& to, u8 imm8 ) const { SimdPrefix( 0x66, OpcodeImm ); - ModRM( 3, (int)Modcode, to.Id ); - xWrite( imm8 ); + EmitSibMagic( (int)Modcode, to ); + xWrite8( imm8 ); } __emitinline void operator()( const xRegisterMMX& to, u8 imm8 ) const { SimdPrefix( 0x00, OpcodeImm ); - ModRM( 3, (int)Modcode, to.Id ); - xWrite( imm8 ); + EmitSibMagic( (int)Modcode, to ); + xWrite8( imm8 ); } }; @@ -78,7 +78,7 @@ public: { SimdPrefix( 0x66, 0x73 ); ModRM( 3, (int)Modcode+1, to.Id ); - xWrite( imm8 ); + xWrite8( imm8 ); } SimdImpl_Shift() {} diff --git a/pcsx2/x86/ix86/implement/xmm/basehelpers.h b/pcsx2/x86/ix86/implement/xmm/basehelpers.h index 0cce6b2369..05ef74d5a7 100644 --- a/pcsx2/x86/ix86/implement/xmm/basehelpers.h +++ b/pcsx2/x86/ix86/implement/xmm/basehelpers.h @@ -21,110 +21,6 @@ ////////////////////////////////////////////////////////////////////////////////////////// // MMX / SSE Helper Functions! -extern void SimdPrefix( u8 prefix, u16 opcode ); - -extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const ModSibBase& sib ); -extern void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data ); -extern void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib ); -extern void xOpWrite0F( u16 opcode, int instId, const void* data ); - -template< typename T2 > __emitinline -void xOpWrite0F( u8 prefix, u16 opcode, int instId, const xRegister& from ) -{ - SimdPrefix( prefix, opcode ); - ModRM_Direct( instId, from.Id ); -} - -template< typename T2 > __emitinline -void xOpWrite0F( u16 opcode, int instId, const xRegister& from ) -{ - xOpWrite0F( 0, opcode, instId, from ); -} - -template< typename T, typename T2 > __emitinline -void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, bool forcePrefix=false ) -{ - xOpWrite0F( prefix, opcode, to.Id, from ); -} - -template< typename T > __noinline -void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, bool forcePrefix=false ) -{ - xOpWrite0F( prefix, opcode, reg.Id, sib ); -} - -template< typename T > __emitinline -void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const void* data, bool forcePrefix=false ) -{ - xOpWrite0F( prefix, opcode, reg.Id, data ); -} - -// ------------------------------------------------------------------------ -// -template< typename T, typename T2 > __emitinline -void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& to, const xRegister& from, u8 imm8 ) -{ - xOpWrite0F( prefix, opcode, to, from ); - xWrite( imm8 ); -} - -template< typename T > __noinline -void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const ModSibBase& sib, u8 imm8 ) -{ - xOpWrite0F( prefix, opcode, reg, sib ); - xWrite( imm8 ); -} - -template< typename T > __emitinline -void xOpWrite0F( u8 prefix, u16 opcode, const xRegister& reg, const void* data, u8 imm8 ) -{ - xOpWrite0F( prefix, opcode, reg, data ); - xWrite( imm8 ); -} - -// ------------------------------------------------------------------------ - -template< typename T, typename T2 > __emitinline -void xOpWrite0F( u16 opcode, const xRegister& to, const xRegister& from ) -{ - xOpWrite0F( 0, opcode, to, from ); -} - -template< typename T > __noinline -void xOpWrite0F( u16 opcode, const xRegister& reg, const ModSibBase& sib ) -{ - xOpWrite0F( 0, opcode, reg, sib ); -} - -template< typename T > __emitinline -void xOpWrite0F( u16 opcode, const xRegister& reg, const void* data ) -{ - xOpWrite0F( 0, opcode, reg, data ); -} - -// ------------------------------------------------------------------------ - -template< typename T, typename T2 > __emitinline -void xOpWrite0F( u16 opcode, const xRegister& to, const xRegister& from, u8 imm8 ) -{ - xOpWrite0F( opcode, to, from ); - xWrite( imm8 ); -} - -template< typename T > __noinline -void xOpWrite0F( u16 opcode, const xRegister& reg, const ModSibBase& sib, u8 imm8 ) -{ - xOpWrite0F( opcode, reg, sib ); - xWrite( imm8 ); -} - -template< typename T > __emitinline -void xOpWrite0F( u16 opcode, const xRegister& reg, const void* data, u8 imm8 ) -{ - xOpWrite0F( opcode, reg, data ); - xWrite( imm8 ); -} - // ------------------------------------------------------------------------ // For implementing SSE-only logic operations that have xmmreg,xmmreg/rm forms only, // like ANDPS/ANDPD diff --git a/pcsx2/x86/ix86/implement/xmm/comparisons.h b/pcsx2/x86/ix86/implement/xmm/comparisons.h index a7e3197038..e74d25b1f9 100644 --- a/pcsx2/x86/ix86/implement/xmm/comparisons.h +++ b/pcsx2/x86/ix86/implement/xmm/comparisons.h @@ -41,9 +41,9 @@ class SimdImpl_Compare protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite8( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite8( CType ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { xOpWrite0F( Prefix, 0xc2, to, from ); xWrite8( CType ); } Woot() {} }; diff --git a/pcsx2/x86/ix86/implement/xmm/moremovs.h b/pcsx2/x86/ix86/implement/xmm/moremovs.h index aeb95130a1..98cfafc230 100644 --- a/pcsx2/x86/ix86/implement/xmm/moremovs.h +++ b/pcsx2/x86/ix86/implement/xmm/moremovs.h @@ -122,17 +122,17 @@ public: { if( to != from ) xOpWrite0F( PrefixA, Opcode, to, from ); } -#ifndef __LINUX__ // Ifdef till Jake fixes; you can't use & on a const void*! + __forceinline void operator()( const xRegisterSSE& to, const void* from ) const { - xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode, to, from ); + xOpWrite0F( (isAligned || ((uptr)from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode, to, from ); } __forceinline void operator()( const void* to, const xRegisterSSE& from ) const { - xOpWrite0F( (isAligned || (from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode_Alt, to, from ); + xOpWrite0F( (isAligned || ((uptr)from & 0x0f) == 0) ? PrefixA : PrefixU, Opcode_Alt, to, from ); } -#endif + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from ) const { // ModSib form is aligned if it's displacement-only and the displacement is aligned: @@ -140,14 +140,12 @@ public: xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode, to, from ); } -#ifndef __LINUX__ // II'll ifdef this one, too. xOpWrite0F doesn't take ModSibBase & xRegisterSSE in that order. __forceinline void operator()( const ModSibBase& to, const xRegisterSSE& from ) const { // ModSib form is aligned if it's displacement-only and the displacement is aligned: bool isReallyAligned = isAligned || ( (to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty() ); - xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, to, from ); + xOpWrite0F( isReallyAligned ? PrefixA : PrefixU, Opcode_Alt, from, to ); } -#endif }; diff --git a/pcsx2/x86/ix86/implement/xmm/shufflepack.h b/pcsx2/x86/ix86/implement/xmm/shufflepack.h index ee306e51d4..f251393056 100644 --- a/pcsx2/x86/ix86/implement/xmm/shufflepack.h +++ b/pcsx2/x86/ix86/implement/xmm/shufflepack.h @@ -26,9 +26,9 @@ class SimdImpl_Shuffle protected: template< u8 Prefix > struct Woot { - __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } - __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const xRegisterSSE& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite8( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const void* from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite8( cmptype ); } + __forceinline void operator()( const xRegisterSSE& to, const ModSibBase& from, u8 cmptype ) const { xOpWrite0F( Prefix, OpcodeSSE, to, from ); xWrite8( cmptype ); } Woot() {} }; diff --git a/pcsx2/x86/ix86/ix86.cpp b/pcsx2/x86/ix86/ix86.cpp index 4d272fd154..e04a66ddd3 100644 --- a/pcsx2/x86/ix86/ix86.cpp +++ b/pcsx2/x86/ix86/ix86.cpp @@ -66,6 +66,26 @@ __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM] = { XMMT_INT }; namespace x86Emitter { +__forceinline void xWrite8( u8 val ) +{ + xWrite( val ); +} + +__forceinline void xWrite16( u16 val ) +{ + xWrite( val ); +} + +__forceinline void xWrite32( u32 val ) +{ + xWrite( val ); +} + +__forceinline void xWrite64( u64 val ) +{ + xWrite( val ); +} + const xAddressIndexerBase ptr; const xAddressIndexer ptr128; const xAddressIndexer ptr64; @@ -75,7 +95,7 @@ const xAddressIndexer ptr8; // ------------------------------------------------------------------------ -template< typename OperandType > const xRegister xRegister::Empty; +template< typename OperandType > const xRegisterBase xRegisterBase::Empty; const xAddressReg xAddressReg::Empty; const xRegisterSSE @@ -110,8 +130,11 @@ const xRegister8 const xRegisterCL cl; +////////////////////////////////////////////////////////////////////////////////////////// + namespace Internal { + ////////////////////////////////////////////////////////////////////////////////////////// // Performance note: VC++ wants to use byte/word register form for the following // ModRM/SibSB constructors when we use xWrite, and furthermore unrolls the // the shift using a series of ADDs for the following results: @@ -135,32 +158,22 @@ namespace Internal // (btw, I know this isn't a critical performance item by any means, but it's // annoying simply because it *should* be an easy thing to optimize) - __forceinline void ModRM( uint mod, uint reg, uint rm ) + static __forceinline void ModRM( uint mod, uint reg, uint rm ) { - xWrite( (mod << 6) | (reg << 3) | rm ); + xWrite8( (mod << 6) | (reg << 3) | rm ); } - __forceinline void ModRM_Direct( uint reg, uint rm ) + static __forceinline void SibSB( u32 ss, u32 index, u32 base ) { - ModRM( Mod_Direct, reg, rm ); + xWrite8( (ss << 6) | (index << 3) | base ); } - __forceinline void SibSB( u32 ss, u32 index, u32 base ) - { - xWrite( (ss << 6) | (index << 3) | base ); - } - - __forceinline void xWriteDisp( int regfield, s32 displacement ) + __forceinline void EmitSibMagic( uint regfield, const void* address ) { ModRM( 0, regfield, ModRm_UseDisp32 ); - xWrite( displacement ); + xWrite( (s32)address ); } - __forceinline void xWriteDisp( int regfield, const void* address ) - { - xWriteDisp( regfield, (s32)address ); - } - ////////////////////////////////////////////////////////////////////////////////////////// // emitter helpers for xmm instruction with prefixes, most of which are using // the basic opcode format (items inside braces denote optional or conditional @@ -180,7 +193,7 @@ namespace Internal __emitinline void xOpWrite0F( u8 prefix, u16 opcode, int instId, const void* data ) { SimdPrefix( prefix, opcode ); - xWriteDisp( instId, data ); + EmitSibMagic( instId, data ); } __emitinline void xOpWrite0F( u16 opcode, int instId, const ModSibBase& sib ) @@ -219,7 +232,7 @@ namespace Internal // regfield - register field to be written to the ModRm. This is either a register specifier // or an opcode extension. In either case, the instruction determines the value for us. // - void EmitSibMagic( uint regfield, const ModSibBase& info ) + __noinline void EmitSibMagic( uint regfield, const ModSibBase& info ) { jASSUME( regfield < 8 ); @@ -235,7 +248,7 @@ namespace Internal if( info.Index.IsEmpty() ) { - xWriteDisp( regfield, info.Displacement ); + EmitSibMagic( regfield, (void*)info.Displacement ); return; } else @@ -284,7 +297,7 @@ namespace Internal using namespace Internal; const MovImplAll xMOV; -const TestImplAll xTEST; +const xImpl_Test xTEST; const xImpl_G1Logic xAND; const xImpl_G1Logic xOR; @@ -305,15 +318,15 @@ const Group2ImplAll xSHL; const Group2ImplAll xSHR; const Group2ImplAll xSAR; -const Group3ImplAll xNOT; -const Group3ImplAll xNEG; -const Group3ImplAll xUMUL; -const Group3ImplAll xUDIV; -const xImpl_Group3 xDIV; +const xImpl_Group3 xNOT; +const xImpl_Group3 xNEG; +const xImpl_Group3 xUMUL; +const xImpl_Group3 xUDIV; +const xImpl_iDiv xDIV; const xImpl_iMul xMUL; -const IncDecImplAll xINC; -const IncDecImplAll xDEC; +const xImpl_IncDec xINC; +const xImpl_IncDec xDEC; const MovExtendImplAll xMOVZX; const MovExtendImplAll xMOVSX; @@ -321,13 +334,13 @@ const MovExtendImplAll xMOVSX; const DwordShiftImplAll xSHLD; const DwordShiftImplAll xSHRD; -const Group8Impl xBT; -const Group8Impl xBTR; -const Group8Impl xBTS; -const Group8Impl xBTC; +const xImpl_Group8 xBT; +const xImpl_Group8 xBTR; +const xImpl_Group8 xBTS; +const xImpl_Group8 xBTC; -const BitScanImpl<0xbc> xBSF; -const BitScanImpl<0xbd> xBSR; +const xImpl_BitScan<0xbc> xBSF; +const xImpl_BitScan<0xbd> xBSR; // ------------------------------------------------------------------------ const CMovImplGeneric xCMOV; @@ -390,7 +403,7 @@ const SetImplAll xSETPO; // Assigns the current emitter buffer target address. // This is provided instead of using x86Ptr directly, since we may in the future find // a need to change the storage class system for the x86Ptr 'under the hood.' -__emitinline void iSetPtr( void* ptr ) +__emitinline void xSetPtr( void* ptr ) { x86Ptr = (u8*)ptr; } @@ -399,26 +412,26 @@ __emitinline void iSetPtr( void* ptr ) // Retrieves the current emitter buffer target address. // This is provided instead of using x86Ptr directly, since we may in the future find // a need to change the storage class system for the x86Ptr 'under the hood.' -__emitinline u8* iGetPtr() +__emitinline u8* xGetPtr() { return x86Ptr; } // ------------------------------------------------------------------------ -__emitinline void iAlignPtr( uint bytes ) +__emitinline void xAlignPtr( uint bytes ) { // forward align x86Ptr = (u8*)( ( (uptr)x86Ptr + bytes - 1) & ~(bytes - 1) ); } // ------------------------------------------------------------------------ -__emitinline void iAdvancePtr( uint bytes ) +__emitinline void xAdvancePtr( uint bytes ) { if( IsDevBuild ) { // common debugger courtesy: advance with INT3 as filler. for( uint i=0; i( 0xcc ); + xWrite8( 0xcc ); } else x86Ptr += bytes; @@ -553,7 +566,7 @@ static void EmitLeaMagic( xRegister to, const ModSibBase& src, bool // note: no need to do ebp+0 check since we encode all 0 displacements as // register assignments above (via MOV) - xWrite( 0x8d ); + xWrite8( 0x8d ); ModRM( displacement_size, to.Id, src.Index.Id ); } } @@ -575,10 +588,10 @@ static void EmitLeaMagic( xRegister to, const ModSibBase& src, bool xSHL( to, src.Scale ); return; } - xWrite( 0x8d ); + xWrite8( 0x8d ); ModRM( 0, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, ModRm_UseDisp32 ); - xWrite( src.Displacement ); + xWrite32( src.Displacement ); return; } else @@ -614,7 +627,7 @@ static void EmitLeaMagic( xRegister to, const ModSibBase& src, bool if( src.Base == ebp && displacement_size == 0 ) displacement_size = 1; // forces [ebp] to be encoded as [ebp+0]! - xWrite( 0x8d ); + xWrite8( 0x8d ); ModRM( displacement_size, to.Id, ModRm_UseSib ); SibSB( src.Scale, src.Index.Id, src.Base.Id ); } @@ -637,7 +650,7 @@ __emitinline void xLEA( xRegister32 to, const ModSibBase& src, bool preserve_fla __emitinline void xLEA( xRegister16 to, const ModSibBase& src, bool preserve_flags ) { - write8( 0x66 ); + xWrite8( 0x66 ); EmitLeaMagic( to, src, preserve_flags ); } @@ -650,22 +663,51 @@ __emitinline void xLEA( xRegister16 to, const ModSibBase& src, bool preserve_fla __emitinline void xPOP( const ModSibBase& from ) { - xWrite( 0x8f ); + xWrite8( 0x8f ); EmitSibMagic( 0, from ); } __emitinline void xPUSH( const ModSibBase& from ) { - xWrite( 0xff ); + xWrite8( 0xff ); EmitSibMagic( 6, from ); } +__forceinline void xPOP( xRegister32 from ) { xWrite8( 0x58 | from.Id ); } +__forceinline void xPOP( void* from ) { xPOP( ptr[from] ); } + +__forceinline void xPUSH( u32 imm ) { xWrite8( 0x68 ); xWrite32( imm ); } +__forceinline void xPUSH( xRegister32 from ) { xWrite8( 0x50 | from.Id ); } +__forceinline void xPUSH( void* from ) { xPUSH( ptr[from] ); } + +// pushes the EFLAGS register onto the stack +__forceinline void xPUSHFD() { xWrite8( 0x9C ); } +// pops the EFLAGS register from the stack +__forceinline void xPOPFD() { xWrite8( 0x9D ); } + + ////////////////////////////////////////////////////////////////////////////////////////// // + +__forceinline void xRET() { xWrite8( 0xC3 ); } +__forceinline void xCBW() { xWrite16( 0x9866 ); } +__forceinline void xCWD() { xWrite8( 0x98 ); } +__forceinline void xCDQ() { xWrite8( 0x99 ); } +__forceinline void xCWDE() { xWrite8( 0x98 ); } + +__forceinline void xLAHF() { xWrite8( 0x9f ); } +__forceinline void xSAHF() { xWrite8( 0x9e ); } + +__forceinline void xSTC() { xWrite8( 0xF9 ); } +__forceinline void xCLC() { xWrite8( 0xF8 ); } + +// NOP 1-byte +__forceinline void xNOP() { xWrite8(0x90); } + __emitinline void xBSWAP( const xRegister32& to ) { - write8( 0x0F ); - write8( 0xC8 | to.Id ); + xWrite8( 0x0F ); + xWrite8( 0xC8 | to.Id ); } } diff --git a/pcsx2/x86/ix86/ix86_3dnow.cpp b/pcsx2/x86/ix86/ix86_3dnow.cpp index 4f053ff37c..0dc600d1be 100644 --- a/pcsx2/x86/ix86/ix86_3dnow.cpp +++ b/pcsx2/x86/ix86/ix86_3dnow.cpp @@ -20,183 +20,183 @@ #include "ix86_legacy_internal.h" //------------------------------------------------------------------ -// 3DNOW instructions +// 3DNOW instructions [Anyone caught dead using these will be re-killed] //------------------------------------------------------------------ /* femms */ emitterT void FEMMS( void ) { - write16( 0x0E0F ); + xWrite16( 0x0E0F ); } emitterT void PFCMPEQMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xB0 ); + xWrite32( from ); + xWrite8( 0xB0 ); } emitterT void PFCMPGTMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xA0 ); + xWrite32( from ); + xWrite8( 0xA0 ); } emitterT void PFCMPGEMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x90 ); + xWrite32( from ); + xWrite8( 0x90 ); } emitterT void PFADDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x9E ); + xWrite32( from ); + xWrite8( 0x9E ); } emitterT void PFADDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0x9E ); + xWrite8( 0x9E ); } emitterT void PFSUBMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x9A ); + xWrite32( from ); + xWrite8( 0x9A ); } emitterT void PFSUBRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0x9A ); + xWrite8( 0x9A ); } emitterT void PFMULMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xB4 ); + xWrite32( from ); + xWrite8( 0xB4 ); } emitterT void PFMULRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0xB4 ); + xWrite8( 0xB4 ); } emitterT void PFRCPMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x96 ); + xWrite32( from ); + xWrite8( 0x96 ); } emitterT void PFRCPRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0x96 ); + xWrite8( 0x96 ); } emitterT void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0xA6 ); + xWrite8( 0xA6 ); } emitterT void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0xB6 ); + xWrite8( 0xB6 ); } emitterT void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0x97 ); + xWrite8( 0x97 ); } emitterT void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0xA7 ); + xWrite8( 0xA7 ); } emitterT void PF2IDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x1D ); + xWrite32( from ); + xWrite8( 0x1D ); } emitterT void PF2IDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0x1D ); + xWrite8( 0x1D ); } emitterT void PI2FDMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x0D ); + xWrite32( from ); + xWrite8( 0x0D ); } emitterT void PI2FDRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0x0D ); + xWrite8( 0x0D ); } emitterT void PFMAXMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0xA4 ); + xWrite32( from ); + xWrite8( 0xA4 ); } emitterT void PFMAXRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0xA4 ); + xWrite8( 0xA4 ); } emitterT void PFMINMtoR( x86IntRegType to, uptr from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 0, to, DISP32 ); - write32( from ); - write8( 0x94 ); + xWrite32( from ); + xWrite8( 0x94 ); } emitterT void PFMINRtoR( x86IntRegType to, x86IntRegType from ) { - write16( 0x0F0F ); + xWrite16( 0x0F0F ); ModRM( 3, to, from ); - write8( 0x94 ); + xWrite8( 0x94 ); } diff --git a/pcsx2/x86/ix86/ix86_fpu.cpp b/pcsx2/x86/ix86/ix86_fpu.cpp index f3f9631e01..9394fc2156 100644 --- a/pcsx2/x86/ix86/ix86_fpu.cpp +++ b/pcsx2/x86/ix86/ix86_fpu.cpp @@ -26,244 +26,244 @@ /* fild m32 to fpu reg stack */ emitterT void FILD32( u32 from ) { - write8( 0xDB ); + xWrite8( 0xDB ); ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } /* fistp m32 from fpu reg stack */ emitterT void FISTP32( u32 from ) { - write8( 0xDB ); + xWrite8( 0xDB ); ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } /* fld m32 to fpu reg stack */ emitterT void FLD32( u32 from ) { - write8( 0xD9 ); + xWrite8( 0xD9 ); ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } // fld st(i) -emitterT void FLD(int st) { write16(0xc0d9+(st<<8)); } -emitterT void FLD1() { write16(0xe8d9); } -emitterT void FLDL2E() { write16(0xead9); } +emitterT void FLD(int st) { xWrite16(0xc0d9+(st<<8)); } +emitterT void FLD1() { xWrite16(0xe8d9); } +emitterT void FLDL2E() { xWrite16(0xead9); } /* fst m32 from fpu reg stack */ emitterT void FST32( u32 to ) { - write8( 0xD9 ); + xWrite8( 0xD9 ); ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(to, 4) ); + xWrite32( MEMADDR(to, 4) ); } /* fstp m32 from fpu reg stack */ emitterT void FSTP32( u32 to ) { - write8( 0xD9 ); + xWrite8( 0xD9 ); ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(to, 4) ); + xWrite32( MEMADDR(to, 4) ); } // fstp st(i) -emitterT void FSTP(int st) { write16(0xd8dd+(st<<8)); } +emitterT void FSTP(int st) { xWrite16(0xd8dd+(st<<8)); } /* fldcw fpu control word from m16 */ emitterT void FLDCW( u32 from ) { - write8( 0xD9 ); + xWrite8( 0xD9 ); ModRM( 0, 0x5, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } /* fnstcw fpu control word to m16 */ emitterT void FNSTCW( u32 to ) { - write8( 0xD9 ); + xWrite8( 0xD9 ); ModRM( 0, 0x7, DISP32 ); - write32( MEMADDR(to, 4) ); + xWrite32( MEMADDR(to, 4) ); } -emitterT void FNSTSWtoAX() { write16(0xE0DF); } -emitterT void FXAM() { write16(0xe5d9); } -emitterT void FDECSTP() { write16(0xf6d9); } -emitterT void FRNDINT() { write16(0xfcd9); } -emitterT void FXCH(int st) { write16(0xc8d9+(st<<8)); } -emitterT void F2XM1() { write16(0xf0d9); } -emitterT void FSCALE() { write16(0xfdd9); } -emitterT void FPATAN(void) { write16(0xf3d9); } -emitterT void FSIN(void) { write16(0xfed9); } +emitterT void FNSTSWtoAX() { xWrite16(0xE0DF); } +emitterT void FXAM() { xWrite16(0xe5d9); } +emitterT void FDECSTP() { xWrite16(0xf6d9); } +emitterT void FRNDINT() { xWrite16(0xfcd9); } +emitterT void FXCH(int st) { xWrite16(0xc8d9+(st<<8)); } +emitterT void F2XM1() { xWrite16(0xf0d9); } +emitterT void FSCALE() { xWrite16(0xfdd9); } +emitterT void FPATAN(void) { xWrite16(0xf3d9); } +emitterT void FSIN(void) { xWrite16(0xfed9); } /* fadd ST(src) to fpu reg stack ST(0) */ emitterT void FADD32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xC0 + src ); + xWrite8( 0xD8 ); + xWrite8( 0xC0 + src ); } /* fadd ST(0) to fpu reg stack ST(src) */ emitterT void FADD320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xC0 + src ); + xWrite8( 0xDC ); + xWrite8( 0xC0 + src ); } /* fsub ST(src) to fpu reg stack ST(0) */ emitterT void FSUB32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xE0 + src ); + xWrite8( 0xD8 ); + xWrite8( 0xE0 + src ); } /* fsub ST(0) to fpu reg stack ST(src) */ emitterT void FSUB320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xE8 + src ); + xWrite8( 0xDC ); + xWrite8( 0xE8 + src ); } /* fsubp -> substract ST(0) from ST(1), store in ST(1) and POP stack */ emitterT void FSUBP( void ) { - write8( 0xDE ); - write8( 0xE9 ); + xWrite8( 0xDE ); + xWrite8( 0xE9 ); } /* fmul ST(src) to fpu reg stack ST(0) */ emitterT void FMUL32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xC8 + src ); + xWrite8( 0xD8 ); + xWrite8( 0xC8 + src ); } /* fmul ST(0) to fpu reg stack ST(src) */ emitterT void FMUL320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xC8 + src ); + xWrite8( 0xDC ); + xWrite8( 0xC8 + src ); } /* fdiv ST(src) to fpu reg stack ST(0) */ emitterT void FDIV32Rto0( x86IntRegType src ) { - write8( 0xD8 ); - write8( 0xF0 + src ); + xWrite8( 0xD8 ); + xWrite8( 0xF0 + src ); } /* fdiv ST(0) to fpu reg stack ST(src) */ emitterT void FDIV320toR( x86IntRegType src ) { - write8( 0xDC ); - write8( 0xF8 + src ); + xWrite8( 0xDC ); + xWrite8( 0xF8 + src ); } emitterT void FDIV320toRP( x86IntRegType src ) { - write8( 0xDE ); - write8( 0xF8 + src ); + xWrite8( 0xDE ); + xWrite8( 0xF8 + src ); } /* fadd m32 to fpu reg stack */ emitterT void FADD32( u32 from ) { - write8( 0xD8 ); + xWrite8( 0xD8 ); ModRM( 0, 0x0, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } /* fsub m32 to fpu reg stack */ emitterT void FSUB32( u32 from ) { - write8( 0xD8 ); + xWrite8( 0xD8 ); ModRM( 0, 0x4, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } /* fmul m32 to fpu reg stack */ emitterT void FMUL32( u32 from ) { - write8( 0xD8 ); + xWrite8( 0xD8 ); ModRM( 0, 0x1, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } /* fdiv m32 to fpu reg stack */ emitterT void FDIV32( u32 from ) { - write8( 0xD8 ); + xWrite8( 0xD8 ); ModRM( 0, 0x6, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } /* fabs fpu reg stack */ emitterT void FABS( void ) { - write16( 0xE1D9 ); + xWrite16( 0xE1D9 ); } /* fsqrt fpu reg stack */ emitterT void FSQRT( void ) { - write16( 0xFAD9 ); + xWrite16( 0xFAD9 ); } /* fchs fpu reg stack */ emitterT void FCHS( void ) { - write16( 0xE0D9 ); + xWrite16( 0xE0D9 ); } /* fcomi st, st(i) */ emitterT void FCOMI( x86IntRegType src ) { - write8( 0xDB ); - write8( 0xF0 + src ); + xWrite8( 0xDB ); + xWrite8( 0xF0 + src ); } /* fcomip st, st(i) */ emitterT void FCOMIP( x86IntRegType src ) { - write8( 0xDF ); - write8( 0xF0 + src ); + xWrite8( 0xDF ); + xWrite8( 0xF0 + src ); } /* fucomi st, st(i) */ emitterT void FUCOMI( x86IntRegType src ) { - write8( 0xDB ); - write8( 0xE8 + src ); + xWrite8( 0xDB ); + xWrite8( 0xE8 + src ); } /* fucomip st, st(i) */ emitterT void FUCOMIP( x86IntRegType src ) { - write8( 0xDF ); - write8( 0xE8 + src ); + xWrite8( 0xDF ); + xWrite8( 0xE8 + src ); } /* fcom m32 to fpu reg stack */ emitterT void FCOM32( u32 from ) { - write8( 0xD8 ); + xWrite8( 0xD8 ); ModRM( 0, 0x2, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } /* fcomp m32 to fpu reg stack */ emitterT void FCOMP32( u32 from ) { - write8( 0xD8 ); + xWrite8( 0xD8 ); ModRM( 0, 0x3, DISP32 ); - write32( MEMADDR(from, 4) ); + xWrite32( MEMADDR(from, 4) ); } #define FCMOV32( low, high ) \ { \ - write8( low ); \ - write8( high + from ); \ + xWrite8( low ); \ + xWrite8( high + from ); \ } emitterT void FCMOVB32( x86IntRegType from ) { FCMOV32( 0xDA, 0xC0 ); } diff --git a/pcsx2/x86/ix86/ix86_inlines.inl b/pcsx2/x86/ix86/ix86_inlines.inl index c2b54cfcb0..9eedcb0b1a 100644 --- a/pcsx2/x86/ix86/ix86_inlines.inl +++ b/pcsx2/x86/ix86/ix86_inlines.inl @@ -175,7 +175,7 @@ namespace x86Emitter // ------------------------------------------------------------------------ template< typename OperandType > xForwardJump::xForwardJump( JccComparisonType cctype ) : - BasePtr( (s8*)iGetPtr() + + BasePtr( (s8*)xGetPtr() + ((OperandSize == 1) ? 2 : // j8's are always 2 bytes. ((cctype==Jcc_Unconditional) ? 5 : 6 )) // j32's are either 5 or 6 bytes ) @@ -184,19 +184,19 @@ namespace x86Emitter jASSUME( OperandSize == 1 || OperandSize == 4 ); if( OperandSize == 1 ) - xWrite( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) ); + xWrite8( (cctype == Jcc_Unconditional) ? 0xeb : (0x70 | cctype) ); else { if( cctype == Jcc_Unconditional ) - xWrite( 0xe9 ); + xWrite8( 0xe9 ); else { - xWrite( 0x0f ); - xWrite( 0x80 | cctype ); + xWrite8( 0x0f ); + xWrite8( 0x80 | cctype ); } } - iAdvancePtr( OperandSize ); + xAdvancePtr( OperandSize ); } // ------------------------------------------------------------------------ @@ -205,7 +205,7 @@ namespace x86Emitter { jASSUME( BasePtr != NULL ); - sptr displacement = (sptr)iGetPtr() - (sptr)BasePtr; + sptr displacement = (sptr)xGetPtr() - (sptr)BasePtr; if( OperandSize == 1 ) { if( !is_s8( displacement ) ) diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index 32d210edbc..f02d006e4d 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -57,7 +57,7 @@ namespace x86Emitter // flags. extern const Internal::MovImplAll xMOV; - extern const Internal::TestImplAll xTEST; + extern const Internal::xImpl_Test xTEST; extern const Internal::Group2ImplAll xROL; extern const Internal::Group2ImplAll xROR; @@ -70,15 +70,15 @@ namespace x86Emitter // ------------------------------------------------------------------------ // Group 3 Instruction Class - extern const Internal::Group3ImplAll xNOT; - extern const Internal::Group3ImplAll xNEG; - extern const Internal::Group3ImplAll xUMUL; - extern const Internal::Group3ImplAll xUDIV; - extern const Internal::xImpl_Group3 xDIV; + extern const Internal::xImpl_Group3 xNOT; + extern const Internal::xImpl_Group3 xNEG; + extern const Internal::xImpl_Group3 xUMUL; + extern const Internal::xImpl_Group3 xUDIV; + extern const Internal::xImpl_iDiv xDIV; extern const Internal::xImpl_iMul xMUL; - extern const Internal::IncDecImplAll xINC; - extern const Internal::IncDecImplAll xDEC; + extern const Internal::xImpl_IncDec xINC; + extern const Internal::xImpl_IncDec xDEC; extern const Internal::MovExtendImplAll xMOVZX; extern const Internal::MovExtendImplAll xMOVSX; @@ -86,16 +86,16 @@ namespace x86Emitter extern const Internal::DwordShiftImplAll xSHLD; extern const Internal::DwordShiftImplAll xSHRD; - extern const Internal::Group8Impl xBT; - extern const Internal::Group8Impl xBTR; - extern const Internal::Group8Impl xBTS; - extern const Internal::Group8Impl xBTC; + extern const Internal::xImpl_Group8 xBT; + extern const Internal::xImpl_Group8 xBTR; + extern const Internal::xImpl_Group8 xBTS; + extern const Internal::xImpl_Group8 xBTC; - extern const Internal::JmpCallImplAll xJMP; - extern const Internal::JmpCallImplAll xCALL; + extern const Internal::xImpl_JmpCall xJMP; + extern const Internal::xImpl_JmpCall xCALL; - extern const Internal::BitScanImpl<0xbc> xBSF; - extern const Internal::BitScanImpl<0xbd> xBSR; + extern const Internal::xImpl_BitScan<0xbc> xBSF; + extern const Internal::xImpl_BitScan<0xbd> xBSR; // ------------------------------------------------------------------------ extern const Internal::CMovImplGeneric xCMOV; @@ -175,41 +175,76 @@ namespace x86Emitter extern void xPOP( const ModSibBase& from ); extern void xPUSH( const ModSibBase& from ); - static __forceinline void xPOP( xRegister32 from ) { write8( 0x58 | from.Id ); } - static __forceinline void xPOP( void* from ) { xPOP( ptr[from] ); } + extern void xPOP( xRegister32 from ); + extern void xPOP( void* from ); - static __forceinline void xPUSH( u32 imm ) { write8( 0x68 ); write32( imm ); } - static __forceinline void xPUSH( xRegister32 from ) { write8( 0x50 | from.Id ); } - static __forceinline void xPUSH( void* from ) { xPUSH( ptr[from] ); } + extern void xPUSH( u32 imm ); + extern void xPUSH( xRegister32 from ); + extern void xPUSH( void* from ); // pushes the EFLAGS register onto the stack - static __forceinline void xPUSHFD() { write8( 0x9C ); } + extern void xPUSHFD(); // pops the EFLAGS register from the stack - static __forceinline void xPOPFD() { write8( 0x9D ); } + extern void xPOPFD(); // ----- Miscellaneous Instructions ----- // Various Instructions with no parameter and no special encoding logic. - __forceinline void xRET() { write8( 0xC3 ); } - __forceinline void xCBW() { write16( 0x9866 ); } - __forceinline void xCWD() { write8( 0x98 ); } - __forceinline void xCDQ() { write8( 0x99 ); } - __forceinline void xCWDE() { write8( 0x98 ); } + extern void xRET(); + extern void xCBW(); + extern void xCWD(); + extern void xCDQ(); + extern void xCWDE(); - __forceinline void xLAHF() { write8( 0x9f ); } - __forceinline void xSAHF() { write8( 0x9e ); } + extern void xLAHF(); + extern void xSAHF(); - __forceinline void xSTC() { write8( 0xF9 ); } - __forceinline void xCLC() { write8( 0xF8 ); } + extern void xSTC(); + extern void xCLC(); // NOP 1-byte - __forceinline void xNOP() { write8(0x90); } + extern void xNOP(); ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! - extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward=false ); + extern void iJcc( JccComparisonType comparison, void* target ); + // ------------------------------------------------------------------------ + // Conditional jumps to fixed targets. + // Jumps accept any pointer as a valid target (function or data), and will generate either + // 8 or 32 bit displacement versions of the jump, depending on relative displacement of + // the target (efficient!) + // + + template< typename T > __forceinline void xJE( const T* func ) { iJcc( Jcc_Equal, (void*)(uptr)func ); } + template< typename T > __forceinline void xJZ( const T* func ) { iJcc( Jcc_Zero, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNE( const T* func ) { iJcc( Jcc_NotEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNZ( const T* func ) { iJcc( Jcc_NotZero, (void*)(uptr)func ); } + + template< typename T > __forceinline void xJO( const T* func ) { iJcc( Jcc_Overflow, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNO( const T* func ) { iJcc( Jcc_NotOverflow, (void*)(uptr)func ); } + template< typename T > __forceinline void xJC( const T* func ) { iJcc( Jcc_Carry, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNC( const T* func ) { iJcc( Jcc_NotCarry, (void*)(uptr)func ); } + template< typename T > __forceinline void xJS( const T* func ) { iJcc( Jcc_Signed, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNS( const T* func ) { iJcc( Jcc_Unsigned, (void*)(uptr)func ); } + + template< typename T > __forceinline void xJPE( const T* func ) { iJcc( Jcc_ParityEven, (void*)(uptr)func ); } + template< typename T > __forceinline void xJPO( const T* func ) { iJcc( Jcc_ParityOdd, (void*)(uptr)func ); } + + template< typename T > __forceinline void xJL( const T* func ) { iJcc( Jcc_Less, (void*)(uptr)func ); } + template< typename T > __forceinline void xJLE( const T* func ) { iJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJG( const T* func ) { iJcc( Jcc_Greater, (void*)(uptr)func ); } + template< typename T > __forceinline void xJGE( const T* func ) { iJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } + + template< typename T > __forceinline void xJB( const T* func ) { iJcc( Jcc_Below, (void*)(uptr)func ); } + template< typename T > __forceinline void xJBE( const T* func ) { iJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJA( const T* func ) { iJcc( Jcc_Above, (void*)(uptr)func ); } + template< typename T > __forceinline void xJAE( const T* func ) { iJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } + + // ------------------------------------------------------------------------ + // Forward Jump Helpers (act as labels!) + #define DEFINE_FORWARD_JUMP( label, cond ) \ template< typename OperandType > \ class xForward##label : public xForwardJump \ diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp index 15f3fc68f9..b9ea2efde1 100644 --- a/pcsx2/x86/ix86/ix86_jmp.cpp +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -40,30 +40,30 @@ namespace x86Emitter { using namespace Internal; -const JmpCallImplAll xJMP; -const JmpCallImplAll xCALL; +const xImpl_JmpCall xJMP; +const xImpl_JmpCall xCALL; // ------------------------------------------------------------------------ void xSmartJump::SetTarget() { - u8* target = iGetPtr(); + u8* target = xGetPtr(); if( m_baseptr == NULL ) return; - iSetPtr( m_baseptr ); + xSetPtr( m_baseptr ); u8* const saveme = m_baseptr + GetMaxInstructionSize(); iJccKnownTarget( m_cc, target, true ); // Copy recompiled data inward if the jump instruction didn't fill the // alloted buffer (means that we optimized things to a j8!) - const int spacer = (sptr)saveme - (sptr)iGetPtr(); + const int spacer = (sptr)saveme - (sptr)xGetPtr(); if( spacer != 0 ) { - u8* destpos = iGetPtr(); + u8* destpos = xGetPtr(); const int copylen = (sptr)target - (sptr)saveme; memcpy_fast( destpos, saveme, copylen ); - iSetPtr( target - spacer ); + xSetPtr( target - spacer ); } } @@ -81,11 +81,10 @@ xSmartJump::~xSmartJump() // slideForward - used internally by xSmartJump to indicate that the jump target is going // to slide forward in the event of an 8 bit displacement. // -// Using this -__emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ) +__emitinline void Internal::iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ) { // Calculate the potential j8 displacement first, assuming an instruction length of 2: - sptr displacement8 = (sptr)target - ((sptr)iGetPtr() + 2); + sptr displacement8 = (sptr)target - ((sptr)xGetPtr() + 2); const int slideVal = slideForward ? ((comparison == Jcc_Unconditional) ? 3 : 4) : 0; displacement8 -= slideVal; @@ -96,7 +95,7 @@ __emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, b if( is_s8( displacement8 ) ) { - xWrite( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) ); + xWrite8( (comparison == Jcc_Unconditional) ? 0xeb : (0x70 | comparison) ); xWrite( displacement8 ); } else @@ -104,14 +103,21 @@ __emitinline void iJccKnownTarget( JccComparisonType comparison, void* target, b // Perform a 32 bit jump instead. :( if( comparison == Jcc_Unconditional ) - xWrite( 0xe9 ); + xWrite8( 0xe9 ); else { - xWrite( 0x0f ); - xWrite( 0x80 | comparison ); + xWrite8( 0x0f ); + xWrite8( 0x80 | comparison ); } - xWrite( (sptr)target - ((sptr)iGetPtr() + 4) ); + xWrite( (sptr)target - ((sptr)xGetPtr() + 4) ); } } +// Low-level jump instruction! Specify a comparison type and a target in void* form, and +// a jump (either 8 or 32 bit) is generated. +__emitinline void iJcc( JccComparisonType comparison, void* target ) +{ + iJccKnownTarget( comparison, target, false ); +} + } \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_legacy.cpp b/pcsx2/x86/ix86/ix86_legacy.cpp index 8f07278443..8bf4bd3c4a 100644 --- a/pcsx2/x86/ix86/ix86_legacy.cpp +++ b/pcsx2/x86/ix86/ix86_legacy.cpp @@ -32,6 +32,30 @@ #include "System.h" #include "ix86_legacy_internal.h" +////////////////////////////////////////////////////////////////////////////////////////// +// +emitterT void ModRM( uint mod, uint reg, uint rm ) +{ + // Note: Following ASSUMEs are for legacy support only. + // The new emitter performs these sanity checks during operand construction, so these + // assertions can probably be removed once all legacy emitter code has been removed. + jASSUME( mod < 4 ); + jASSUME( reg < 8 ); + jASSUME( rm < 8 ); + xWrite8( (mod << 6) | (reg << 3) | rm ); +} + +emitterT void SibSB( uint ss, uint index, uint base ) +{ + // Note: Following ASSUMEs are for legacy support only. + // The new emitter performs these sanity checks during operand construction, so these + // assertions can probably be removed once all legacy emitter code has been removed. + jASSUME( ss < 4 ); + jASSUME( index < 8 ); + jASSUME( base < 8 ); + xWrite8( (ss << 6) | (index << 3) | base ); +} + using namespace x86Emitter; template< typename ImmType > @@ -310,63 +334,26 @@ emitterT void SAHF() { xSAHF(); } ////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////// -// Note: the 'to' field can either be a register or a special opcode extension specifier -// depending on the opcode's encoding. - -emitterT void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset) -{ - if ((from&7) == ESP) { - if( offset == 0 ) { - ModRM( 0, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - } - else if( is_s8( offset ) ) { - ModRM( 1, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - write8(offset); - } - else { - ModRM( 2, to, 0x4 ); - SibSB( 0, 0x4, 0x4 ); - write32(offset); - } - } - else { - if( offset == 0 ) { - ModRM( 0, to, from ); - } - else if( is_s8( offset ) ) { - ModRM( 1, to, from ); - write8(offset); - } - else { - ModRM( 2, to, from ); - write32(offset); - } - } -} - emitterT u8* J8Rel( int cc, int to ) { - write8( cc ); - write8( to ); + xWrite8( cc ); + xWrite8( to ); return (u8*)(x86Ptr - 1); } emitterT u16* J16Rel( int cc, u32 to ) { - write16( 0x0F66 ); - write8( cc ); - write16( to ); + xWrite16( 0x0F66 ); + xWrite8( cc ); + xWrite16( to ); return (u16*)( x86Ptr - 2 ); } emitterT u32* J32Rel( int cc, u32 to ) { - write8( 0x0F ); - write8( cc ); - write32( to ); + xWrite8( 0x0F ); + xWrite8( cc ); + xWrite32( to ); return (u32*)( x86Ptr - 4 ); } @@ -448,8 +435,8 @@ emitterT void NOP( void ) { xNOP(); } /* jmp rel8 */ emitterT u8* JMP8( u8 to ) { - write8( 0xEB ); - write8( to ); + xWrite8( 0xEB ); + xWrite8( to ); return x86Ptr - 1; } @@ -457,8 +444,8 @@ emitterT u8* JMP8( u8 to ) emitterT u32* JMP32( uptr to ) { assert( (sptr)to <= 0x7fffffff && (sptr)to >= -0x7fffffff ); - write8( 0xE9 ); - write32( to ); + xWrite8( 0xE9 ); + xWrite32( to ); return (u32*)(x86Ptr - 4 ); } diff --git a/pcsx2/x86/ix86/ix86_legacy_instructions.h b/pcsx2/x86/ix86/ix86_legacy_instructions.h index 7f3e2ea740..8e41e58f4c 100644 --- a/pcsx2/x86/ix86/ix86_legacy_instructions.h +++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h @@ -18,17 +18,6 @@ #pragma once -//------------------------------------------------------------------ -// legacy write functions (depreciated, use x86Emitter::EmitPtrCache instead) -//------------------------------------------------------------------ -#define emitterT __forceinline - -using x86Emitter::write8; -using x86Emitter::write16; -using x86Emitter::write24; -using x86Emitter::write32; -using x86Emitter::write64; - //------------------------------------------------------------------ // legacy jump/align functions //------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86_legacy_internal.h b/pcsx2/x86/ix86/ix86_legacy_internal.h index e823f88d92..90ba4aef49 100644 --- a/pcsx2/x86/ix86/ix86_legacy_internal.h +++ b/pcsx2/x86/ix86/ix86_legacy_internal.h @@ -24,6 +24,13 @@ // Legacy Helper Macros and Functions (depreciated) //------------------------------------------------------------------ +#define emitterT __forceinline + +using x86Emitter::xWrite8; +using x86Emitter::xWrite16; +using x86Emitter::xWrite32; +using x86Emitter::xWrite64; + #include "ix86_legacy_types.h" #include "ix86_legacy_instructions.h" @@ -37,7 +44,6 @@ #define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) -extern void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); extern void ModRM( uint mod, uint reg, uint rm ); extern void SibSB( uint ss, uint index, uint base ); extern void SET8R( int cc, int to ); @@ -45,27 +51,3 @@ extern u8* J8Rel( int cc, int to ); extern u32* J32Rel( int cc, u32 to ); extern u64 GetCPUTick( void ); - -////////////////////////////////////////////////////////////////////////////////////////// -// -emitterT void ModRM( uint mod, uint reg, uint rm ) -{ - // Note: Following ASSUMEs are for legacy support only. - // The new emitter performs these sanity checks during operand construction, so these - // assertions can probably be removed once all legacy emitter code has been removed. - jASSUME( mod < 4 ); - jASSUME( reg < 8 ); - jASSUME( rm < 8 ); - write8( (mod << 6) | (reg << 3) | rm ); -} - -emitterT void SibSB( uint ss, uint index, uint base ) -{ - // Note: Following ASSUMEs are for legacy support only. - // The new emitter performs these sanity checks during operand construction, so these - // assertions can probably be removed once all legacy emitter code has been removed. - jASSUME( ss < 4 ); - jASSUME( index < 8 ); - jASSUME( base < 8 ); - write8( (ss << 6) | (index << 3) | base ); -} diff --git a/pcsx2/x86/ix86/ix86_simd.cpp b/pcsx2/x86/ix86/ix86_simd.cpp index 42754cce18..860e8ffc0a 100644 --- a/pcsx2/x86/ix86/ix86_simd.cpp +++ b/pcsx2/x86/ix86/ix86_simd.cpp @@ -36,29 +36,29 @@ __emitinline void Internal::SimdPrefix( u8 prefix, u16 opcode ) { const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a); - // If the lower byte is not a valid previx and the upper byte is non-zero it + // If the lower byte is not a valid prefix and the upper byte is non-zero it // means we made a mistake! if( !is16BitOpcode ) jASSUME( (opcode >> 8) == 0 ); if( prefix != 0 ) { if( is16BitOpcode ) - xWrite( (opcode<<16) | 0x0f00 | prefix ); + xWrite32( (opcode<<16) | 0x0f00 | prefix ); else { - xWrite( 0x0f00 | prefix ); - xWrite( opcode ); + xWrite16( 0x0f00 | prefix ); + xWrite8( opcode ); } } else { if( is16BitOpcode ) { - xWrite( 0x0f ); - xWrite( opcode ); + xWrite8( 0x0f ); + xWrite16( opcode ); } else - xWrite( (opcode<<8) | 0x0f ); + xWrite16( (opcode<<8) | 0x0f ); } } @@ -199,23 +199,31 @@ const SimdImpl_PMove xPMOVZX; ////////////////////////////////////////////////////////////////////////////////////////// // -__emitinline void xEMMS() -{ - xWrite( 0x770F ); -} +// Converts from MMX register mode to FPU register mode. The cpu enters MMX register mode +// when ever MMX instructions are run, and if FPU instructions are run without using EMMS, +// the FPU results will be invalid. +__forceinline void xEMMS() { xWrite16( 0x770F ); } + +// [3DNow] Same as EMMS, but an AMD special version which may (or may not) leave MMX regs +// in an undefined state (which is fine, since presumably you're done using them anyway). +// This instruction is thus faster than EMMS on K8s, but all newer AMD cpus use the same +// logic for either EMMS or FEMMS. +// Conclusion: Obsolete. Just use EMMS instead. +__forceinline void xFEMMS() { xWrite16( 0x0E0F ); } + // Store Streaming SIMD Extension Control/Status to Mem32. __emitinline void xSTMXCSR( u32* dest ) { SimdPrefix( 0, 0xae ); - xWriteDisp( 3, dest ); + EmitSibMagic( 3, dest ); } // Load Streaming SIMD Extension Control/Status from Mem32. __emitinline void xLDMXCSR( const u32* src ) { SimdPrefix( 0, 0xae ); - xWriteDisp( 2, src ); + EmitSibMagic( 2, src ); } ////////////////////////////////////////////////////////////////////////////////////////// @@ -279,7 +287,7 @@ __forceinline void xMOVQ( const xRegisterMMX& to, const xRegisterSSE& from ) // that breaks the template inference of writeXMMop(); SimdPrefix( 0xf2, 0xd6 ); - ModRM_Direct( to.Id, from.Id ); + EmitSibMagic( to, from ); } ////////////////////////////////////////////////////////////////////////////////////////// @@ -301,13 +309,13 @@ IMPLEMENT_xMOVS( SD, 0xf2 ) __forceinline void xMOVNTDQA( const xRegisterSSE& to, const void* from ) { - xWrite( 0x2A380f66 ); - xWriteDisp( to.Id, from ); + xWrite32( 0x2A380f66 ); + EmitSibMagic( to.Id, from ); } __forceinline void xMOVNTDQA( const xRegisterSSE& to, const ModSibBase& from ) { - xWrite( 0x2A380f66 ); + xWrite32( 0x2A380f66 ); EmitSibMagic( to.Id, from ); } diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 6ecd400e98..39aa9b0839 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -21,7 +21,8 @@ extern void cpudetectInit();//this is all that needs to be called and will fill up the below structs //cpu capabilities structure -struct CAPABILITIES { +struct CAPABILITIES +{ u32 hasFloatingPointUnit; u32 hasVirtual8086ModeEnhancements; u32 hasDebuggingExtensions; @@ -63,8 +64,8 @@ struct CAPABILITIES { extern CAPABILITIES cpucaps; -struct CPUINFO{ - +struct CPUINFO +{ u32 x86Family; // Processor Family u32 x86Model; // Processor Model u32 x86PType; // Processor Type @@ -109,20 +110,26 @@ extern __threadlocal u32 *j32Ptr[32]; // depreciated item. use local u32* vars extern __threadlocal XMMSSEType g_xmmtypes[iREGCNT_XMM]; +namespace x86Emitter +{ + +extern void xWrite8( u8 val ); +extern void xWrite16( u16 val ); +extern void xWrite32( u32 val ); +extern void xWrite64( u64 val ); + //------------------------------------------------------------------ // templated version of is_s8 is required, so that u16's get correct sign extension treatment. template< typename T > static __forceinline bool is_s8( T imm ) { return (s8)imm == (s32)imm; } template< typename T > -static __forceinline void xWrite( T val ) +__forceinline void xWrite( T val ) { *(T*)x86Ptr = val; x86Ptr += sizeof(T); } -namespace x86Emitter -{ ////////////////////////////////////////////////////////////////////////////////////////// // ALWAYS_USE_MOVAPS [define] / AlwaysUseMovaps [const] @@ -178,54 +185,27 @@ namespace x86Emitter class xAddressInfo; class ModSibBase; - extern void iSetPtr( void* ptr ); - extern u8* iGetPtr(); - extern void iAlignPtr( uint bytes ); - extern void iAdvancePtr( uint bytes ); - - - static __forceinline void write8( u8 val ) - { - xWrite( val ); - } - - static __forceinline void write16( u16 val ) - { - xWrite( val ); - } - - static __forceinline void write24( u32 val ) - { - *(u32*)x86Ptr = val; - x86Ptr += 3; - } - - static __forceinline void write32( u32 val ) - { - xWrite( val ); - } - - static __forceinline void write64( u64 val ) - { - xWrite( val ); - } + extern void xSetPtr( void* ptr ); + extern u8* xGetPtr(); + extern void xAlignPtr( uint bytes ); + extern void xAdvancePtr( uint bytes ); ////////////////////////////////////////////////////////////////////////////////////////// - // xRegister + // xRegisterBase // Unless templating some fancy stuff, use the friendly xRegister32/16/8 typedefs instead. // template< typename OperandType > - class xRegister + class xRegisterBase { public: static const uint OperandSize = sizeof( OperandType ); - static const xRegister Empty; // defined as an empty/unused value (-1) + static const xRegisterBase Empty; // defined as an empty/unused value (-1) int Id; - xRegister( const xRegister& src ) : Id( src.Id ) {} - xRegister(): Id( -1 ) {} - explicit xRegister( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } + xRegisterBase( const xRegisterBase& src ) : Id( src.Id ) {} + xRegisterBase(): Id( -1 ) {} + explicit xRegisterBase( int regId ) : Id( regId ) { jASSUME( Id >= -1 && Id < 8 ); } bool IsEmpty() const { return Id < 0; } @@ -235,19 +215,60 @@ namespace x86Emitter // returns true if the register is a valid MMX or XMM register. bool IsSIMD() const { return OperandSize == 8 || OperandSize == 16; } - bool operator==( const xRegister& src ) const - { - return (Id == src.Id); - } + bool operator==( const xRegisterBase& src ) const { return (Id == src.Id); } + bool operator!=( const xRegisterBase& src ) const { return (Id != src.Id); } - bool operator!=( const xRegister& src ) const + xRegisterBase& operator=( const xRegisterBase& src ) { - return (Id != src.Id); + Id = src.Id; + return *this; } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + template< typename OperandType > + class xRegister : public xRegisterBase + { + public: + static const xRegister Empty; // defined as an empty/unused value (-1) + + public: + xRegister(): xRegisterBase() {} + xRegister( const xRegister& src ) : xRegisterBase( src.Id ) {} + explicit xRegister( const xRegisterBase& src ) : xRegisterBase( src ) {} + explicit xRegister( int regId ) : xRegisterBase( regId ) {} + + bool operator==( const xRegister& src ) const { return Id == src.Id; } + bool operator!=( const xRegister& src ) const { return Id != src.Id; } xRegister& operator=( const xRegister& src ) { - Id = src.Id; + xRegisterBase::Id = src.Id; + return *this; + } + }; + + ////////////////////////////////////////////////////////////////////////////////////////// + // + template< typename OperandType > + class xRegisterSIMD : public xRegisterBase + { + public: + static const xRegisterSIMD Empty; // defined as an empty/unused value (-1) + + public: + xRegisterSIMD(): xRegisterBase() {} + xRegisterSIMD( const xRegisterSIMD& src ) : xRegisterBase( src.Id ) {} + explicit xRegisterSIMD( const xRegisterBase& src ) : xRegisterBase( src ) {} + explicit xRegisterSIMD( int regId ) : xRegisterBase( regId ) {} + + bool operator==( const xRegisterSIMD& src ) const { return Id == src.Id; } + bool operator!=( const xRegisterSIMD& src ) const { return Id != src.Id; } + + xRegisterSIMD& operator=( const xRegisterSIMD& src ) + { + xRegisterBase::Id = src.Id; return *this; } }; @@ -260,8 +281,8 @@ namespace x86Emitter // all about the the templated code in haphazard fashion. Yay.. >_< // - typedef xRegister xRegisterSSE; - typedef xRegister xRegisterMMX; + typedef xRegisterSIMD xRegisterSSE; + typedef xRegisterSIMD xRegisterMMX; typedef xRegister xRegister32; typedef xRegister xRegister16; typedef xRegister xRegister8; @@ -624,9 +645,9 @@ namespace x86Emitter xSmartJump( JccComparisonType ccType ) { jASSUME( ccType != Jcc_Unknown ); - m_baseptr = iGetPtr(); + m_baseptr = xGetPtr(); m_cc = ccType; - iAdvancePtr( GetMaxInstructionSize() ); + xAdvancePtr( GetMaxInstructionSize() ); } protected: @@ -662,18 +683,69 @@ namespace x86Emitter // namespace Internal { - extern void ModRM( uint mod, uint reg, uint rm ); - extern void ModRM_Direct( uint reg, uint rm ); - extern void SibSB( u32 ss, u32 index, u32 base ); - extern void xWriteDisp( int regfield, s32 displacement ); - extern void xWriteDisp( int regfield, const void* address ); - + extern void SimdPrefix( u8 prefix, u16 opcode ); + extern void EmitSibMagic( uint regfield, const void* address ); extern void EmitSibMagic( uint regfield, const ModSibBase& info ); + extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ); + + + // Writes a ModRM byte for "Direct" register access forms, which is used for all + // instructions taking a form of [reg,reg]. + template< typename T > __emitinline + void EmitSibMagic( uint reg1, const xRegisterBase& reg2 ) + { + xWrite8( (Mod_Direct << 6) | (reg1 << 3) | reg2.Id ); + } + + template< typename T1, typename T2 > __emitinline + void EmitSibMagic( const xRegisterBase reg1, const xRegisterBase& reg2 ) + { + xWrite8( (Mod_Direct << 6) | (reg1.Id << 3) | reg2.Id ); + } + + template< typename T1 > __emitinline + void EmitSibMagic( const xRegisterBase reg1, const void* src ) { EmitSibMagic( reg1.Id, src ); } + + template< typename T1 > __emitinline + void EmitSibMagic( const xRegisterBase reg1, const ModSibBase& sib ) { EmitSibMagic( reg1.Id, sib ); } // ------------------------------------------------------------------------ - + template< typename T1, typename T2 > __emitinline + void xOpWrite( u8 prefix, u8 opcode, const T1& param1, const T2& param2 ) + { + if( prefix != 0 ) + xWrite16( (opcode<<8) | prefix ); + else + xWrite8( opcode ); + + EmitSibMagic( param1, param2 ); + } + + // ------------------------------------------------------------------------ + template< typename T1, typename T2 > __emitinline + void xOpWrite0F( u8 prefix, u16 opcode, const T1& param1, const T2& param2 ) + { + SimdPrefix( prefix, opcode ); + EmitSibMagic( param1, param2 ); + } + + template< typename T1, typename T2 > __emitinline + void xOpWrite0F( u8 prefix, u16 opcode, const T1& param1, const T2& param2, u8 imm8 ) + { + xOpWrite0F( prefix, opcode, param1, param2 ); + xWrite8( imm8 ); + } + + template< typename T1, typename T2 > __emitinline + void xOpWrite0F( u16 opcode, const T1& param1, const T2& param2 ) { xOpWrite0F( 0, opcode, param1, param2 ); } + + template< typename T1, typename T2 > __emitinline + void xOpWrite0F( u16 opcode, const T1& param1, const T2& param2, u8 imm8 ) { xOpWrite0F( 0, opcode, param1, param2, imm8 ); } + + // ------------------------------------------------------------------------ + template< typename T > bool Is8BitOp() { return sizeof(T) == 1; } - template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite( 0x66 ); } + template< typename T > void prefix16() { if( sizeof(T) == 2 ) xWrite8( 0x66 ); } #include "implement/xmm/basehelpers.h" #include "implement/xmm/moremovs.h" @@ -686,7 +758,6 @@ namespace x86Emitter #include "implement/movs.h" // cmov and movsx/zx #include "implement/dwshift.h" // doubleword shifts! #include "implement/incdec.h" - #include "implement/bittest.h" #include "implement/test.h" #include "implement/jmpcall.h" } From dff8cf5ac1c6917c8ccdcc426b63a006e21354a2 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Fri, 24 Apr 2009 11:44:07 +0000 Subject: [PATCH 139/143] Save linux from the dastardly deeds of my previous commit, and fix one last remnant 'i' from the apple-like naming schemes of yesterweek. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1055 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/implement/jmpcall.h | 2 +- pcsx2/x86/ix86/implement/xmm/arithmetic.h | 7 +--- pcsx2/x86/ix86/ix86_instructions.h | 42 +++++++++++------------ pcsx2/x86/ix86/ix86_jmp.cpp | 8 ++--- pcsx2/x86/ix86/ix86_types.h | 14 ++++---- 5 files changed, 34 insertions(+), 39 deletions(-) diff --git a/pcsx2/x86/ix86/implement/jmpcall.h b/pcsx2/x86/ix86/implement/jmpcall.h index 59cae03538..4c9d266c87 100644 --- a/pcsx2/x86/ix86/implement/jmpcall.h +++ b/pcsx2/x86/ix86/implement/jmpcall.h @@ -40,7 +40,7 @@ public: __forceinline void operator()( const T* func ) const { if( isJmp ) - iJcc( Jcc_Unconditional, (void*)(uptr)func ); // double cast to/from (uptr) needed to appease GCC + xJccKnownTarget( Jcc_Unconditional, (void*)(uptr)func, false ); // double cast to/from (uptr) needed to appease GCC else { // calls are relative to the instruction after this one, and length is diff --git a/pcsx2/x86/ix86/implement/xmm/arithmetic.h b/pcsx2/x86/ix86/implement/xmm/arithmetic.h index 6afd1d27a9..328070ce2d 100644 --- a/pcsx2/x86/ix86/implement/xmm/arithmetic.h +++ b/pcsx2/x86/ix86/implement/xmm/arithmetic.h @@ -74,12 +74,7 @@ class SimdImpl_Shift : public SimdImpl_ShiftWithoutQ public: const _SimdShiftHelper Q; - void DQ( const xRegisterSSE& to, u8 imm8 ) const - { - SimdPrefix( 0x66, 0x73 ); - ModRM( 3, (int)Modcode+1, to.Id ); - xWrite8( imm8 ); - } + __forceinline void DQ( const xRegisterSSE& to, u8 imm8 ) const { xOpWrite0F( 0x66, 0x73, (int)Modcode+1, to, imm8 ); } SimdImpl_Shift() {} }; diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index f02d006e4d..f69af460e7 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -208,7 +208,7 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! - extern void iJcc( JccComparisonType comparison, void* target ); + extern void xJcc( JccComparisonType comparison, void* target ); // ------------------------------------------------------------------------ // Conditional jumps to fixed targets. @@ -217,30 +217,30 @@ namespace x86Emitter // the target (efficient!) // - template< typename T > __forceinline void xJE( const T* func ) { iJcc( Jcc_Equal, (void*)(uptr)func ); } - template< typename T > __forceinline void xJZ( const T* func ) { iJcc( Jcc_Zero, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNE( const T* func ) { iJcc( Jcc_NotEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNZ( const T* func ) { iJcc( Jcc_NotZero, (void*)(uptr)func ); } + template< typename T > __forceinline void xJE( const T* func ) { xJcc( Jcc_Equal, (void*)(uptr)func ); } + template< typename T > __forceinline void xJZ( const T* func ) { xJcc( Jcc_Zero, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNE( const T* func ) { xJcc( Jcc_NotEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNZ( const T* func ) { xJcc( Jcc_NotZero, (void*)(uptr)func ); } - template< typename T > __forceinline void xJO( const T* func ) { iJcc( Jcc_Overflow, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNO( const T* func ) { iJcc( Jcc_NotOverflow, (void*)(uptr)func ); } - template< typename T > __forceinline void xJC( const T* func ) { iJcc( Jcc_Carry, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNC( const T* func ) { iJcc( Jcc_NotCarry, (void*)(uptr)func ); } - template< typename T > __forceinline void xJS( const T* func ) { iJcc( Jcc_Signed, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNS( const T* func ) { iJcc( Jcc_Unsigned, (void*)(uptr)func ); } + template< typename T > __forceinline void xJO( const T* func ) { xJcc( Jcc_Overflow, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNO( const T* func ) { xJcc( Jcc_NotOverflow, (void*)(uptr)func ); } + template< typename T > __forceinline void xJC( const T* func ) { xJcc( Jcc_Carry, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNC( const T* func ) { xJcc( Jcc_NotCarry, (void*)(uptr)func ); } + template< typename T > __forceinline void xJS( const T* func ) { xJcc( Jcc_Signed, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNS( const T* func ) { xJcc( Jcc_Unsigned, (void*)(uptr)func ); } - template< typename T > __forceinline void xJPE( const T* func ) { iJcc( Jcc_ParityEven, (void*)(uptr)func ); } - template< typename T > __forceinline void xJPO( const T* func ) { iJcc( Jcc_ParityOdd, (void*)(uptr)func ); } + template< typename T > __forceinline void xJPE( const T* func ) { xJcc( Jcc_ParityEven, (void*)(uptr)func ); } + template< typename T > __forceinline void xJPO( const T* func ) { xJcc( Jcc_ParityOdd, (void*)(uptr)func ); } - template< typename T > __forceinline void xJL( const T* func ) { iJcc( Jcc_Less, (void*)(uptr)func ); } - template< typename T > __forceinline void xJLE( const T* func ) { iJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJG( const T* func ) { iJcc( Jcc_Greater, (void*)(uptr)func ); } - template< typename T > __forceinline void xJGE( const T* func ) { iJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJL( const T* func ) { xJcc( Jcc_Less, (void*)(uptr)func ); } + template< typename T > __forceinline void xJLE( const T* func ) { xJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJG( const T* func ) { xJcc( Jcc_Greater, (void*)(uptr)func ); } + template< typename T > __forceinline void xJGE( const T* func ) { xJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJB( const T* func ) { iJcc( Jcc_Below, (void*)(uptr)func ); } - template< typename T > __forceinline void xJBE( const T* func ) { iJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJA( const T* func ) { iJcc( Jcc_Above, (void*)(uptr)func ); } - template< typename T > __forceinline void xJAE( const T* func ) { iJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJB( const T* func ) { xJcc( Jcc_Below, (void*)(uptr)func ); } + template< typename T > __forceinline void xJBE( const T* func ) { xJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJA( const T* func ) { xJcc( Jcc_Above, (void*)(uptr)func ); } + template< typename T > __forceinline void xJAE( const T* func ) { xJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } // ------------------------------------------------------------------------ // Forward Jump Helpers (act as labels!) diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp index b9ea2efde1..0954794f8b 100644 --- a/pcsx2/x86/ix86/ix86_jmp.cpp +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -51,7 +51,7 @@ void xSmartJump::SetTarget() xSetPtr( m_baseptr ); u8* const saveme = m_baseptr + GetMaxInstructionSize(); - iJccKnownTarget( m_cc, target, true ); + xJccKnownTarget( m_cc, target, true ); // Copy recompiled data inward if the jump instruction didn't fill the // alloted buffer (means that we optimized things to a j8!) @@ -81,7 +81,7 @@ xSmartJump::~xSmartJump() // slideForward - used internally by xSmartJump to indicate that the jump target is going // to slide forward in the event of an 8 bit displacement. // -__emitinline void Internal::iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ) +__emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ) { // Calculate the potential j8 displacement first, assuming an instruction length of 2: sptr displacement8 = (sptr)target - ((sptr)xGetPtr() + 2); @@ -115,9 +115,9 @@ __emitinline void Internal::iJccKnownTarget( JccComparisonType comparison, void* // Low-level jump instruction! Specify a comparison type and a target in void* form, and // a jump (either 8 or 32 bit) is generated. -__emitinline void iJcc( JccComparisonType comparison, void* target ) +__emitinline void xJcc( JccComparisonType comparison, void* target ) { - iJccKnownTarget( comparison, target, false ); + xJccKnownTarget( comparison, target, false ); } } \ No newline at end of file diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 39aa9b0839..53b664ceb2 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -239,12 +239,12 @@ __forceinline void xWrite( T val ) explicit xRegister( const xRegisterBase& src ) : xRegisterBase( src ) {} explicit xRegister( int regId ) : xRegisterBase( regId ) {} - bool operator==( const xRegister& src ) const { return Id == src.Id; } - bool operator!=( const xRegister& src ) const { return Id != src.Id; } + bool operator==( const xRegister& src ) const { return this->Id == src.Id; } + bool operator!=( const xRegister& src ) const { return this->Id != src.Id; } xRegister& operator=( const xRegister& src ) { - xRegisterBase::Id = src.Id; + this->Id = src.Id; return *this; } }; @@ -263,12 +263,12 @@ __forceinline void xWrite( T val ) explicit xRegisterSIMD( const xRegisterBase& src ) : xRegisterBase( src ) {} explicit xRegisterSIMD( int regId ) : xRegisterBase( regId ) {} - bool operator==( const xRegisterSIMD& src ) const { return Id == src.Id; } - bool operator!=( const xRegisterSIMD& src ) const { return Id != src.Id; } + bool operator==( const xRegisterSIMD& src ) const { return this->Id == src.Id; } + bool operator!=( const xRegisterSIMD& src ) const { return this->Id != src.Id; } xRegisterSIMD& operator=( const xRegisterSIMD& src ) { - xRegisterBase::Id = src.Id; + this->Id = src.Id; return *this; } }; @@ -686,7 +686,7 @@ __forceinline void xWrite( T val ) extern void SimdPrefix( u8 prefix, u16 opcode ); extern void EmitSibMagic( uint regfield, const void* address ); extern void EmitSibMagic( uint regfield, const ModSibBase& info ); - extern void iJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ); + extern void xJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ); // Writes a ModRM byte for "Direct" register access forms, which is used for all From e3b217c57a90a58182c2fc170caceebe7101a1da Mon Sep 17 00:00:00 2001 From: arcum42 Date: Fri, 24 Apr 2009 13:38:16 +0000 Subject: [PATCH 140/143] Remove some consts to appease the gcc 4.3.x gods. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1056 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86_instructions.h | 40 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index f69af460e7..af7ee03622 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -217,30 +217,30 @@ namespace x86Emitter // the target (efficient!) // - template< typename T > __forceinline void xJE( const T* func ) { xJcc( Jcc_Equal, (void*)(uptr)func ); } - template< typename T > __forceinline void xJZ( const T* func ) { xJcc( Jcc_Zero, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNE( const T* func ) { xJcc( Jcc_NotEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNZ( const T* func ) { xJcc( Jcc_NotZero, (void*)(uptr)func ); } + template< typename T > __forceinline void xJE(T* func ) { xJcc( Jcc_Equal, (void*)(uptr)func ); } + template< typename T > __forceinline void xJZ(T* func ) { xJcc( Jcc_Zero, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNE(T* func ) { xJcc( Jcc_NotEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNZ(T* func ) { xJcc( Jcc_NotZero, (void*)(uptr)func ); } - template< typename T > __forceinline void xJO( const T* func ) { xJcc( Jcc_Overflow, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNO( const T* func ) { xJcc( Jcc_NotOverflow, (void*)(uptr)func ); } - template< typename T > __forceinline void xJC( const T* func ) { xJcc( Jcc_Carry, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNC( const T* func ) { xJcc( Jcc_NotCarry, (void*)(uptr)func ); } - template< typename T > __forceinline void xJS( const T* func ) { xJcc( Jcc_Signed, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNS( const T* func ) { xJcc( Jcc_Unsigned, (void*)(uptr)func ); } + template< typename T > __forceinline void xJO(T* func ) { xJcc( Jcc_Overflow, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNO(T* func ) { xJcc( Jcc_NotOverflow, (void*)(uptr)func ); } + template< typename T > __forceinline void xJC(T* func ) { xJcc( Jcc_Carry, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNC(T* func ) { xJcc( Jcc_NotCarry, (void*)(uptr)func ); } + template< typename T > __forceinline void xJS(T* func ) { xJcc( Jcc_Signed, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNS(T* func ) { xJcc( Jcc_Unsigned, (void*)(uptr)func ); } - template< typename T > __forceinline void xJPE( const T* func ) { xJcc( Jcc_ParityEven, (void*)(uptr)func ); } - template< typename T > __forceinline void xJPO( const T* func ) { xJcc( Jcc_ParityOdd, (void*)(uptr)func ); } + template< typename T > __forceinline void xJPE(T* func ) { xJcc( Jcc_ParityEven, (void*)(uptr)func ); } + template< typename T > __forceinline void xJPO(T* func ) { xJcc( Jcc_ParityOdd, (void*)(uptr)func ); } - template< typename T > __forceinline void xJL( const T* func ) { xJcc( Jcc_Less, (void*)(uptr)func ); } - template< typename T > __forceinline void xJLE( const T* func ) { xJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJG( const T* func ) { xJcc( Jcc_Greater, (void*)(uptr)func ); } - template< typename T > __forceinline void xJGE( const T* func ) { xJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJL(T* func ) { xJcc( Jcc_Less, (void*)(uptr)func ); } + template< typename T > __forceinline void xJLE(T* func ) { xJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJG(T* func ) { xJcc( Jcc_Greater, (void*)(uptr)func ); } + template< typename T > __forceinline void xJGE(T* func ) { xJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJB( const T* func ) { xJcc( Jcc_Below, (void*)(uptr)func ); } - template< typename T > __forceinline void xJBE( const T* func ) { xJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJA( const T* func ) { xJcc( Jcc_Above, (void*)(uptr)func ); } - template< typename T > __forceinline void xJAE( const T* func ) { xJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJB(T* func ) { xJcc( Jcc_Below, (void*)(uptr)func ); } + template< typename T > __forceinline void xJBE(T* func ) { xJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJA(T* func ) { xJcc( Jcc_Above, (void*)(uptr)func ); } + template< typename T > __forceinline void xJAE(T* func ) { xJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } // ------------------------------------------------------------------------ // Forward Jump Helpers (act as labels!) From 4f08dc6bad2ed9f6bf5b2d8201d9cfec1763c768 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Fri, 24 Apr 2009 13:49:00 +0000 Subject: [PATCH 141/143] Linux: A more complete rendition of the GCC compilation fix. Note: GCC requires non-const qualifiers on function pointers and function pointer-templated parameters, but not on void* (didn't know they were that different, did you?). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1057 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/NakedAsm.h | 3 ++- pcsx2/SaveState.h | 3 ++- pcsx2/x86/ix86/implement/jmpcall.h | 2 +- pcsx2/x86/ix86/ix86_instructions.h | 42 +++++++++++++++--------------- pcsx2/x86/ix86/ix86_jmp.cpp | 7 ++--- pcsx2/x86/ix86/ix86_tools.cpp | 3 ++- pcsx2/x86/ix86/ix86_types.h | 2 +- 7 files changed, 33 insertions(+), 29 deletions(-) diff --git a/pcsx2/NakedAsm.h b/pcsx2/NakedAsm.h index 359f95183f..589bdc2957 100644 --- a/pcsx2/NakedAsm.h +++ b/pcsx2/NakedAsm.h @@ -63,4 +63,5 @@ void DispatcherReg(); } #endif -#endif \ No newline at end of file +#endif + diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index d884755a6d..bfe35d415e 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -217,4 +217,5 @@ namespace StateRecovery extern void Clear(); } -#endif \ No newline at end of file +#endif + diff --git a/pcsx2/x86/ix86/implement/jmpcall.h b/pcsx2/x86/ix86/implement/jmpcall.h index 4c9d266c87..2986410b10 100644 --- a/pcsx2/x86/ix86/implement/jmpcall.h +++ b/pcsx2/x86/ix86/implement/jmpcall.h @@ -37,7 +37,7 @@ public: // Special form for calling functions. This form automatically resolves the // correct displacement based on the size of the instruction being generated. template< typename T > - __forceinline void operator()( const T* func ) const + __forceinline void operator()( T* func ) const { if( isJmp ) xJccKnownTarget( Jcc_Unconditional, (void*)(uptr)func, false ); // double cast to/from (uptr) needed to appease GCC diff --git a/pcsx2/x86/ix86/ix86_instructions.h b/pcsx2/x86/ix86/ix86_instructions.h index af7ee03622..783adc6435 100644 --- a/pcsx2/x86/ix86/ix86_instructions.h +++ b/pcsx2/x86/ix86/ix86_instructions.h @@ -208,7 +208,7 @@ namespace x86Emitter ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! - extern void xJcc( JccComparisonType comparison, void* target ); + extern void xJcc( JccComparisonType comparison, const void* target ); // ------------------------------------------------------------------------ // Conditional jumps to fixed targets. @@ -217,30 +217,30 @@ namespace x86Emitter // the target (efficient!) // - template< typename T > __forceinline void xJE(T* func ) { xJcc( Jcc_Equal, (void*)(uptr)func ); } - template< typename T > __forceinline void xJZ(T* func ) { xJcc( Jcc_Zero, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNE(T* func ) { xJcc( Jcc_NotEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNZ(T* func ) { xJcc( Jcc_NotZero, (void*)(uptr)func ); } + template< typename T > __forceinline void xJE( T* func ) { xJcc( Jcc_Equal, (void*)(uptr)func ); } + template< typename T > __forceinline void xJZ( T* func ) { xJcc( Jcc_Zero, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNE( T* func ) { xJcc( Jcc_NotEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNZ( T* func ) { xJcc( Jcc_NotZero, (void*)(uptr)func ); } - template< typename T > __forceinline void xJO(T* func ) { xJcc( Jcc_Overflow, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNO(T* func ) { xJcc( Jcc_NotOverflow, (void*)(uptr)func ); } - template< typename T > __forceinline void xJC(T* func ) { xJcc( Jcc_Carry, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNC(T* func ) { xJcc( Jcc_NotCarry, (void*)(uptr)func ); } - template< typename T > __forceinline void xJS(T* func ) { xJcc( Jcc_Signed, (void*)(uptr)func ); } - template< typename T > __forceinline void xJNS(T* func ) { xJcc( Jcc_Unsigned, (void*)(uptr)func ); } + template< typename T > __forceinline void xJO( T* func ) { xJcc( Jcc_Overflow, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNO( T* func ) { xJcc( Jcc_NotOverflow, (void*)(uptr)func ); } + template< typename T > __forceinline void xJC( T* func ) { xJcc( Jcc_Carry, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNC( T* func ) { xJcc( Jcc_NotCarry, (void*)(uptr)func ); } + template< typename T > __forceinline void xJS( T* func ) { xJcc( Jcc_Signed, (void*)(uptr)func ); } + template< typename T > __forceinline void xJNS( T* func ) { xJcc( Jcc_Unsigned, (void*)(uptr)func ); } - template< typename T > __forceinline void xJPE(T* func ) { xJcc( Jcc_ParityEven, (void*)(uptr)func ); } - template< typename T > __forceinline void xJPO(T* func ) { xJcc( Jcc_ParityOdd, (void*)(uptr)func ); } + template< typename T > __forceinline void xJPE( T* func ) { xJcc( Jcc_ParityEven, (void*)(uptr)func ); } + template< typename T > __forceinline void xJPO( T* func ) { xJcc( Jcc_ParityOdd, (void*)(uptr)func ); } - template< typename T > __forceinline void xJL(T* func ) { xJcc( Jcc_Less, (void*)(uptr)func ); } - template< typename T > __forceinline void xJLE(T* func ) { xJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJG(T* func ) { xJcc( Jcc_Greater, (void*)(uptr)func ); } - template< typename T > __forceinline void xJGE(T* func ) { xJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJL( T* func ) { xJcc( Jcc_Less, (void*)(uptr)func ); } + template< typename T > __forceinline void xJLE( T* func ) { xJcc( Jcc_LessOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJG( T* func ) { xJcc( Jcc_Greater, (void*)(uptr)func ); } + template< typename T > __forceinline void xJGE( T* func ) { xJcc( Jcc_GreaterOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJB(T* func ) { xJcc( Jcc_Below, (void*)(uptr)func ); } - template< typename T > __forceinline void xJBE(T* func ) { xJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } - template< typename T > __forceinline void xJA(T* func ) { xJcc( Jcc_Above, (void*)(uptr)func ); } - template< typename T > __forceinline void xJAE(T* func ) { xJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJB( T* func ) { xJcc( Jcc_Below, (void*)(uptr)func ); } + template< typename T > __forceinline void xJBE( T* func ) { xJcc( Jcc_BelowOrEqual, (void*)(uptr)func ); } + template< typename T > __forceinline void xJA( T* func ) { xJcc( Jcc_Above, (void*)(uptr)func ); } + template< typename T > __forceinline void xJAE( T* func ) { xJcc( Jcc_AboveOrEqual, (void*)(uptr)func ); } // ------------------------------------------------------------------------ // Forward Jump Helpers (act as labels!) diff --git a/pcsx2/x86/ix86/ix86_jmp.cpp b/pcsx2/x86/ix86/ix86_jmp.cpp index 0954794f8b..ab5611ce9e 100644 --- a/pcsx2/x86/ix86/ix86_jmp.cpp +++ b/pcsx2/x86/ix86/ix86_jmp.cpp @@ -81,7 +81,7 @@ xSmartJump::~xSmartJump() // slideForward - used internally by xSmartJump to indicate that the jump target is going // to slide forward in the event of an 8 bit displacement. // -__emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ) +__emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward ) { // Calculate the potential j8 displacement first, assuming an instruction length of 2: sptr displacement8 = (sptr)target - ((sptr)xGetPtr() + 2); @@ -115,9 +115,10 @@ __emitinline void Internal::xJccKnownTarget( JccComparisonType comparison, void* // Low-level jump instruction! Specify a comparison type and a target in void* form, and // a jump (either 8 or 32 bit) is generated. -__emitinline void xJcc( JccComparisonType comparison, void* target ) +__emitinline void xJcc( JccComparisonType comparison, const void* target ) { xJccKnownTarget( comparison, target, false ); } -} \ No newline at end of file +} + diff --git a/pcsx2/x86/ix86/ix86_tools.cpp b/pcsx2/x86/ix86/ix86_tools.cpp index 59463ef84e..91a37d1333 100644 --- a/pcsx2/x86/ix86/ix86_tools.cpp +++ b/pcsx2/x86/ix86/ix86_tools.cpp @@ -253,4 +253,5 @@ __forceinline void FreezeXMMRegs_(int save) } #ifndef __INTEL_COMPILER } -#endif \ No newline at end of file +#endif + diff --git a/pcsx2/x86/ix86/ix86_types.h b/pcsx2/x86/ix86/ix86_types.h index 53b664ceb2..78cd2538a5 100644 --- a/pcsx2/x86/ix86/ix86_types.h +++ b/pcsx2/x86/ix86/ix86_types.h @@ -686,7 +686,7 @@ __forceinline void xWrite( T val ) extern void SimdPrefix( u8 prefix, u16 opcode ); extern void EmitSibMagic( uint regfield, const void* address ); extern void EmitSibMagic( uint regfield, const ModSibBase& info ); - extern void xJccKnownTarget( JccComparisonType comparison, void* target, bool slideForward ); + extern void xJccKnownTarget( JccComparisonType comparison, const void* target, bool slideForward ); // Writes a ModRM byte for "Direct" register access forms, which is used for all From 7f096c63ad955510689e5748c498e3e196ae3ddb Mon Sep 17 00:00:00 2001 From: sudonim1 Date: Sat, 25 Apr 2009 00:39:07 +0000 Subject: [PATCH 142/143] Break up very long recompiled blocks. Perform a branch test before jumping to the next block. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1058 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86-32/iR5900-32.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 43ec86d7e2..36bbfb7bfc 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -1219,6 +1219,9 @@ void recompileNextInstruction(int delayslot) // _freeMMXregs(); // _flushCachedRegs(); // g_cpuHasConstReg = 1; + + if (!delayslot && x86Ptr - recPtr > 0x1000) + s_nEndBlock = pc; } extern u32 psxdump; @@ -1724,7 +1727,7 @@ StartRecomp: if( willbranch3 || !branch) { iFlushCall(FLUSH_EVERYTHING); - iBranch(pc, 0); + iBranchTest(pc); } } From a6cbab677f256c416405ba0cddbf06a2654ed41f Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 25 Apr 2009 06:39:16 +0000 Subject: [PATCH 143/143] microVU: -fixed problems with branches jumping to incorrect addresses -finished implementing clip flag instances -started adding operand info to logging (also logs branch jump-to addresses) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1059 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iVU0micro.cpp | 2 +- pcsx2/x86/iVU1micro.cpp | 2 +- pcsx2/x86/microVU.cpp | 13 +++++--- pcsx2/x86/microVU.h | 4 +-- pcsx2/x86/microVU_Alloc.h | 3 +- pcsx2/x86/microVU_Analyze.inl | 1 + pcsx2/x86/microVU_Compile.inl | 37 +++++++++++++-------- pcsx2/x86/microVU_Execute.inl | 3 -- pcsx2/x86/microVU_Log.inl | 5 ++- pcsx2/x86/microVU_Lower.inl | 61 +++++++++++++++-------------------- pcsx2/x86/microVU_Misc.h | 10 ++++-- pcsx2/x86/microVU_Tables.inl | 3 +- pcsx2/x86/microVU_Upper.inl | 2 +- 13 files changed, 78 insertions(+), 68 deletions(-) diff --git a/pcsx2/x86/iVU0micro.cpp b/pcsx2/x86/iVU0micro.cpp index bbc9b64cc9..1a2ee09ea7 100644 --- a/pcsx2/x86/iVU0micro.cpp +++ b/pcsx2/x86/iVU0micro.cpp @@ -84,7 +84,7 @@ namespace VU0micro FreezeXMMRegs(1); FreezeMMXRegs(1); - runVUrec(VU0.VI[REG_TPC].UL, 0x20000, 0); + runVUrec(VU0.VI[REG_TPC].UL, 5000, 0); FreezeXMMRegs(0); FreezeMMXRegs(0); } diff --git a/pcsx2/x86/iVU1micro.cpp b/pcsx2/x86/iVU1micro.cpp index 93c43b36af..d282778f4c 100644 --- a/pcsx2/x86/iVU1micro.cpp +++ b/pcsx2/x86/iVU1micro.cpp @@ -143,7 +143,7 @@ namespace VU1micro FreezeXMMRegs(1); FreezeMMXRegs(1); - runVUrec(VU1.VI[REG_TPC].UL, 20000, 1); + runVUrec(VU1.VI[REG_TPC].UL, 5000, 1); FreezeXMMRegs(0); FreezeMMXRegs(0); } diff --git a/pcsx2/x86/microVU.cpp b/pcsx2/x86/microVU.cpp index ae11ad3b1a..daa5f01ad3 100644 --- a/pcsx2/x86/microVU.cpp +++ b/pcsx2/x86/microVU.cpp @@ -69,12 +69,8 @@ microVUt(void) mVUreset() { mVUdispatcherA(); mVUdispatcherB(); - // Program Variables + // Clear All Program Data memset(&mVU->prog, 0, sizeof(mVU->prog)); - mVU->prog.finished = 1; - mVU->prog.cleared = 1; - mVU->prog.cur = -1; - mVU->prog.total = -1; // Create Block Managers for (int i = 0; i <= mVU->prog.max; i++) { @@ -83,6 +79,13 @@ microVUt(void) mVUreset() { } } + // Program Variables + mVU->prog.finished = 1; + mVU->prog.cleared = 1; + mVU->prog.cur = -1; + mVU->prog.total = -1; + //mVU->prog.lpState = &mVU->prog.prog[15].allocInfo.block.pState; // Blank Pipeline State (ToDo: finish implementation) + // Setup Dynarec Cache Limits for Each Program u8* z = (mVU->cache + 512); // Dispatcher Code is in first 512 bytes for (int i = 0; i <= mVU->prog.max; i++) { diff --git a/pcsx2/x86/microVU.h b/pcsx2/x86/microVU.h index 0447c68676..78cd364220 100644 --- a/pcsx2/x86/microVU.h +++ b/pcsx2/x86/microVU.h @@ -18,7 +18,7 @@ #pragma once #define mVUdebug // Prints Extra Info to Console -//#define mVUlogProg // Dumps MicroPrograms into microVU0.txt/microVU1.txt +#define mVUlogProg // Dumps MicroPrograms into microVU0.txt/microVU1.txt #include "Common.h" #include "VU.h" #include "GS.h" @@ -51,7 +51,7 @@ public: if (listSize < 0) return NULL; //if (blockList[0].pState.needExactMatch) { // Needs Detailed Search (Exact Match of Pipeline State) for (int i = 0; i <= listSize; i++) { - if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo))) return &blockList[i]; + if (!memcmp(pState, &blockList[i].pState, sizeof(microRegInfo) - 1)) return &blockList[i]; } /*} else { // Can do Simple Search (Only Matches the Important Pipeline Stuff) diff --git a/pcsx2/x86/microVU_Alloc.h b/pcsx2/x86/microVU_Alloc.h index b1f1bbdc24..a16c3f4396 100644 --- a/pcsx2/x86/microVU_Alloc.h +++ b/pcsx2/x86/microVU_Alloc.h @@ -36,7 +36,8 @@ struct microRegInfo { u8 p; u8 r; u8 xgkick; - u8 needExactMatch; // If set, block needs an exact match of pipeline state + u8 clip; + u8 needExactMatch; // If set, block needs an exact match of pipeline state (needs to be last byte in struct) }; struct microTempRegInfo { diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 0605559741..7a1992570f 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -101,6 +101,7 @@ microVUt(void) mVUanalyzeFMAC3(int Fd, int Fs, int Ft) { microVUt(void) mVUanalyzeFMAC4(int Fs, int Ft) { microVU* mVU = mVUx; + mVUinfo |= doClip; analyzeReg1(Fs); analyzeReg4(Ft); } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 09a81815f5..aa60df2037 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -28,9 +28,9 @@ mVUprint("mVUcompile branchCase"); \ CMP16ItoM((uptr)&mVU->branch, 0); \ incPC2(1); \ - pBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \ + bBlock = mVUblocks[iPC/2]->search((microRegInfo*)&mVUregs); \ incPC2(-1); \ - if (pBlock) { nJMPcc((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \ + if (bBlock) { nJMPcc((uptr)pBlock->x86ptrStart - ((uptr)x86Ptr + 6)); } \ else { ajmp = JMPcc((uptr)0); } \ break @@ -94,8 +94,9 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { } // Status/Mac Flags Setup Code - int xStatus = 8, xMac = 8, xClip = 8; // Flag Instances start at #0 on every block ((8&3) == 0) - int pStatus = 3, pMac = 3, pClip = 3; + int xStatus = 8, xMac = 8; // Flag Instances start at #0 on every block ((8&3) == 0) + int pStatus = 3, pMac = 3; + int xClip = mVUregs.clip + 8, pClip = mVUregs.clip + 7; // Clip Instance starts from where it left off int xS = 0, yS = 1, zS = 0; int xM = 0, yM = 1, zM = 0; int xC = 0, yC = 1, zC = 0; @@ -122,6 +123,7 @@ microVUt(void) mVUsetFlags(int* bStatus, int* bMac) { incPC2(2); } mVUcount = xCount; // Restore count + mVUregs.clip = xClip&3; // Note: Clip timing isn't cycle-accurate between block linking; but hopefully doesn't matter // Setup Last 4 instances of Status/Mac flags (needed for accurate block linking) iPC = endPC; @@ -238,13 +240,15 @@ microVUt(void) mVUdivSet() { microVUt(void) mVUendProgram() { microVU* mVU = mVUx; - incCycles(55); // Ensures Valid P/Q instances - mVUcycles -= 55; + incCycles(100); // Ensures Valid P/Q instances (And sets all cycle data to 0) + mVUcycles -= 100; if (mVU->q) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe5); } SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVU->p ? 3 : 2); SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); + //memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); + //MOV32ItoM((uptr)&mVU->prog.lpState, (int)&mVUblock.pState); // Save pipeline state (clipflag instance) AND32ItoM((uptr)µVU0.regs->VI[REG_VPU_STAT].UL, (vuIndex ? ~0x100 : ~0x001)); // VBS0/VBS1 flag AND32ItoM((uptr)&mVU->regs->vifRegs->stat, ~0x4); // Clear VU 'is busy' signal for vif MOV32ItoM((uptr)&mVU->regs->VI[REG_TPC].UL, xPC); @@ -290,6 +294,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { for (int branch = 0;; ) { incPC(1); + mVUinfo = 0; incCycles(1); startLoop(); mVUopU(); @@ -326,6 +331,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { if (!isBdelay) { incPC(1); } else { + microBlock* bBlock = NULL; u32* ajmp = 0; switch (mVUbranch) { case 3: branchCase(JZ32, JNZ32); // IBEQ @@ -365,7 +371,7 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { } // Conditional Branches mVUprint("mVUcompile conditional branch"); - if (pBlock) { // Branch non-taken has already been compiled + if (bBlock) { // Branch non-taken has already been compiled incPC(-3); // Go back to branch opcode (to get branch imm addr) // Check if branch-block has already been compiled pBlock = mVUblocks[branchAddr/8]->search((microRegInfo*)&mVUregs); @@ -375,12 +381,17 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { } else { uptr jumpAddr; - incPC(1); // Get PC for branch not-taken + u32 bPC = iPC; // mVUcompile can modify iPC and mVUregs, so back them up + memcpy_fast(&pBlock->pStateEnd, &mVUregs, sizeof(microRegInfo)); + + incPC2(1); // Get PC for branch not-taken if (!vuIndex) mVUcompileVU0(xPC, (uptr)&mVUregs); else mVUcompileVU1(xPC, (uptr)&mVUregs); - incPC(-4); // Go back to branch opcode (to get branch imm addr) - if (!vuIndex) jumpAddr = (uptr)mVUcompileVU0(branchAddr, (uptr)&mVUregs); - else jumpAddr = (uptr)mVUcompileVU1(branchAddr, (uptr)&mVUregs); + + iPC = bPC; + incPC(-3); // Go back to branch opcode (to get branch imm addr) + if (!vuIndex) jumpAddr = (uptr)mVUcompileVU0(branchAddr, (uptr)&pBlock->pStateEnd); + else jumpAddr = (uptr)mVUcompileVU1(branchAddr, (uptr)&pBlock->pStateEnd); *ajmp = (jumpAddr - ((uptr)ajmp + 4)); } return thisPtr; @@ -391,8 +402,8 @@ microVUt(void*) __fastcall mVUcompile(u32 startPC, uptr pState) { // Do E-bit end stuff here mVUendProgram(); - //ToDo: Save pipeline state? - return thisPtr; + + return thisPtr; //ToDo: Save pipeline state? } void* __fastcall mVUcompileVU0(u32 startPC, uptr pState) { return mVUcompile<0>(startPC, pState); } diff --git a/pcsx2/x86/microVU_Execute.inl b/pcsx2/x86/microVU_Execute.inl index ebb5b3c633..ff892ab0ca 100644 --- a/pcsx2/x86/microVU_Execute.inl +++ b/pcsx2/x86/microVU_Execute.inl @@ -103,9 +103,6 @@ microVUt(void) mVUdispatcherB() { } SSE_MOVAPS_XMM_to_M128((uptr)&mVU->regs->ACC.UL[0], xmmACC); - //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_Q].UL, xmmPQ); // ToDo: Ensure Correct Q/P instances - //SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0); // wzyx = PPPP - //SSE_MOVSS_XMM_to_M32((uptr)&mVU->regs->VI[REG_P].UL, xmmPQ); // Restore cpu state POP32R(EDI); diff --git a/pcsx2/x86/microVU_Log.inl b/pcsx2/x86/microVU_Log.inl index c04ea798a2..4aaf139880 100644 --- a/pcsx2/x86/microVU_Log.inl +++ b/pcsx2/x86/microVU_Log.inl @@ -47,6 +47,7 @@ microVUx(void) __mVULog(const char* fmt, ...) { microVUt(void) __mVUdumpProgram(int progIndex) { microVU* mVU = mVUx; bool bitX[7]; + //mVU->prog.cur = progIndex; // Needed in order to set iPC mVUlog("*********************\n", progIndex); mVUlog("* Micro-Program #%02d *\n", progIndex); mVUlog("*********************\n\n", progIndex); @@ -68,7 +69,8 @@ microVUt(void) __mVUdumpProgram(int progIndex) { if (mVU->code & _Mbit_) {bitX[2] = 1; bitX[5] = 1;} if (mVU->code & _Dbit_) {bitX[3] = 1; bitX[5] = 1;} if (mVU->code & _Tbit_) {bitX[4] = 1; bitX[5] = 1;} - + + iPC = (i+1)/4; mVUopU(); if (bitX[5]) { @@ -81,6 +83,7 @@ microVUt(void) __mVUdumpProgram(int progIndex) { mVUlog(")"); } + iPC = i/4; mVU->code = mVU->prog.prog[progIndex].data[i]; mVUlog("\n[%04x] (%08x) ", i*4, mVU->code); mVUopL(); diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 0ae4c5011b..2522ef83a0 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -431,7 +431,6 @@ microVUf(void) mVU_ESUM() { microVUf(void) mVU_FCAND() { microVU* mVU = mVUx; - pass1 { mVUprint("clip broken"); } pass2 { mVUallocCFLAGa(gprT1, fvcInstance); AND32ItoR(gprT1, _Imm24_); @@ -444,7 +443,6 @@ microVUf(void) mVU_FCAND() { microVUf(void) mVU_FCEQ() { microVU* mVU = mVUx; - pass1 { mVUprint("clip broken"); } pass2 { mVUallocCFLAGa(gprT1, fvcInstance); XOR32ItoR(gprT1, _Imm24_); @@ -457,7 +455,6 @@ microVUf(void) mVU_FCEQ() { microVUf(void) mVU_FCGET() { microVU* mVU = mVUx; - pass1 { mVUprint("clip broken"); } pass2 { mVUallocCFLAGa(gprT1, fvcInstance); AND32ItoR(gprT1, 0xfff); @@ -468,7 +465,6 @@ microVUf(void) mVU_FCGET() { microVUf(void) mVU_FCOR() { microVU* mVU = mVUx; - pass1 { mVUprint("clip broken"); } pass2 { mVUallocCFLAGa(gprT1, fvcInstance); OR32ItoR(gprT1, _Imm24_); @@ -481,7 +477,7 @@ microVUf(void) mVU_FCOR() { microVUf(void) mVU_FCSET() { microVU* mVU = mVUx; - pass1 { mVUprint("clip broken"); } + pass1 { mVUinfo |= doClip; } pass2 { MOV32ItoR(gprT1, _Imm24_); mVUallocCFLAGb(gprT1, fcInstance); @@ -660,6 +656,7 @@ microVUf(void) mVU_ISUB() { mVUallocVIa(gprT1, _Fs_); mVUallocVIa(gprT2, _Ft_); SUB16RtoR(gprT1, gprT2); + mVUallocVIb(gprT1, _Fd_); } else if (!isMMX(_Fd_)) { XOR32RtoR(gprT1, gprT1); @@ -856,7 +853,7 @@ microVUf(void) mVU_LQ() { mVUsaveReg(xmmFt, (uptr)&mVU->regs->VF[_Ft_].UL[0], _X_Y_Z_W, 1); } } - pass3 { mVUlog("LQ"); } + pass3 { mVUlog("LQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Ft_, _Fs_, _Imm11_); } } microVUf(void) mVU_LQD() { @@ -878,7 +875,7 @@ microVUf(void) mVU_LQD() { } } } - pass3 { mVUlog("LQD"); } + pass3 { mVUlog("LQD.%s vf%02d, --vi%02d", _XYZW_String, _Ft_, _Fs_); } } microVUf(void) mVU_LQI() { @@ -901,7 +898,7 @@ microVUf(void) mVU_LQI() { mVUallocVIb(gprT2, _Fs_); } } - pass3 { mVUlog("LQI"); } + pass3 { mVUlog("LQI.%s vf%02d, vi%02d++", _XYZW_String, _Ft_, _Fs_); } } //------------------------------------------------------------------ @@ -924,7 +921,7 @@ microVUf(void) mVU_SQ() { mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); } } - pass3 { mVUlog("SQ"); } + pass3 { mVUlog("SQ.%s vf%02d, vi%02d + %d", _XYZW_String, _Fs_, _Ft_, _Imm11_); } } microVUf(void) mVU_SQD() { @@ -944,7 +941,7 @@ microVUf(void) mVU_SQD() { mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); } } - pass3 { mVUlog("SQD"); } + pass3 { mVUlog("SQD.%s vf%02d, --vi%02d", _XYZW_String, _Fs_, _Ft_); } } microVUf(void) mVU_SQI() { @@ -962,10 +959,10 @@ microVUf(void) mVU_SQI() { getReg7(xmmFs, _Fs_); mVUsaveReg2(xmmFs, gprT1, (uptr)mVU->regs->Mem, _X_Y_Z_W); ADD16ItoR(gprT2, 1); - mVUallocVIb(gprT2, _Ft_); // ToDo: Backup to memory check. + mVUallocVIb(gprT2, _Ft_); } } - pass3 { mVUlog("SQI"); } + pass3 { mVUlog("SQI.%s vf%02d, vi%02d++", _XYZW_String, _Fs_, _Ft_); } } //------------------------------------------------------------------ @@ -983,7 +980,7 @@ microVUf(void) mVU_RINIT() { } else MOV32ItoR(gprR, 0x3f800000); } - pass3 { mVUlog("RINIT"); } + pass3 { mVUlog("RINIT R, vf%02d%s", _Fs_, _Fsf_String); } } microVUt(void) mVU_RGET_() { @@ -1000,7 +997,7 @@ microVUf(void) mVU_RGET() { microVU* mVU = mVUx; pass1 { mVUanalyzeR2(_Ft_, 1); } pass2 { mVU_RGET_(); } - pass3 { mVUlog("RGET"); } + pass3 { mVUlog("RGET.%s vf%02d, R", _XYZW_String, _Ft_); } } microVUf(void) mVU_RNEXT() { @@ -1023,7 +1020,7 @@ microVUf(void) mVU_RNEXT() { OR32ItoR (gprR, 0x3f800000); mVU_RGET_(); } - pass3 { mVUlog("RNEXT"); } + pass3 { mVUlog("RNEXT.%s vf%02d, R", _XYZW_String, _Ft_); } } microVUf(void) mVU_RXOR() { @@ -1036,7 +1033,7 @@ microVUf(void) mVU_RXOR() { XOR32RtoR(gprR, gprT1); } } - pass3 { mVUlog("RXOR"); } + pass3 { mVUlog("RXOR R, vf%02d%s", _Fs_, _Fsf_String); } } //------------------------------------------------------------------ @@ -1066,7 +1063,7 @@ microVUf(void) mVU_XTOP() { MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->top); mVUallocVIb(gprT1, _Ft_); } - pass3 { mVUlog("XTOP"); } + pass3 { mVUlog("XTOP vi%02d", _Ft_); } } microVUf(void) mVU_XITOP() { @@ -1076,7 +1073,7 @@ microVUf(void) mVU_XITOP() { MOVZX32M16toR(gprT1, (uptr)&mVU->regs->vifRegs->itop); mVUallocVIb(gprT1, _Ft_); } - pass3 { mVUlog("XITOP"); } + pass3 { mVUlog("XITOP vi%02d", _Ft_); } } //------------------------------------------------------------------ @@ -1101,7 +1098,7 @@ microVUf(void) mVU_XGKICK() { CALLFunc((uptr)mVU_XGKICK_); POP32R(gprR); // Restore } - pass3 { mVUlog("XGKICK"); } + pass3 { mVUlog("XGKICK vi%02d", _Fs_); } } //------------------------------------------------------------------ @@ -1111,7 +1108,7 @@ microVUf(void) mVU_XGKICK() { microVUf(void) mVU_B() { microVU* mVU = mVUx; mVUbranch = 1; - pass3 { mVUlog("B"); } + pass3 { mVUlog("B [%04x]", branchAddr); } } microVUf(void) mVU_BAL() { microVU* mVU = mVUx; @@ -1120,11 +1117,8 @@ microVUf(void) mVU_BAL() { pass2 { MOV32ItoR(gprT1, bSaveAddr); mVUallocVIb(gprT1, _Ft_); - // Note: Not sure if the lower instruction in the branch-delay slot - // should read the previous VI-value or the VI-value resulting from this branch. - // This code does the latter... } - pass3 { mVUlog("BAL"); } + pass3 { mVUlog("BAL vi%02d [%04x]", _Ft_, branchAddr); } } microVUf(void) mVU_IBEQ() { microVU* mVU = mVUx; @@ -1137,7 +1131,7 @@ microVUf(void) mVU_IBEQ() { else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } MOV32RtoM((uptr)&mVU->branch, gprT1); } - pass3 { mVUlog("IBEQ"); } + pass3 { mVUlog("IBEQ vi%02d, vi%02d [%04x]", _Ft_, _Fs_, branchAddr); } } microVUf(void) mVU_IBGEZ() { microVU* mVU = mVUx; @@ -1148,7 +1142,7 @@ microVUf(void) mVU_IBGEZ() { else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } - pass3 { mVUlog("IBGEZ"); } + pass3 { mVUlog("IBGEZ vi%02d [%04x]", _Fs_, branchAddr); } } microVUf(void) mVU_IBGTZ() { microVU* mVU = mVUx; @@ -1159,7 +1153,7 @@ microVUf(void) mVU_IBGTZ() { else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } - pass3 { mVUlog("IBGTZ"); } + pass3 { mVUlog("IBGTZ vi%02d [%04x]", _Fs_, branchAddr); } } microVUf(void) mVU_IBLEZ() { microVU* mVU = mVUx; @@ -1170,7 +1164,7 @@ microVUf(void) mVU_IBLEZ() { else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } - pass3 { mVUlog("IBLEZ"); } + pass3 { mVUlog("IBLEZ vi%02d [%04x]", _Fs_, branchAddr); } } microVUf(void) mVU_IBLTZ() { microVU* mVU = mVUx; @@ -1181,7 +1175,7 @@ microVUf(void) mVU_IBLTZ() { else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } - pass3 { mVUlog("IBLTZ"); } + pass3 { mVUlog("IBLTZ vi%02d [%04x]", _Fs_, branchAddr); } } microVUf(void) mVU_IBNE() { microVU* mVU = mVUx; @@ -1194,7 +1188,7 @@ microVUf(void) mVU_IBNE() { else { mVUallocVIa(gprT2, _Ft_); XOR32RtoR(gprT1, gprT2); } MOV32RtoM((uptr)&mVU->branch, gprT1); } - pass3 { mVUlog("IBNE"); } + pass3 { mVUlog("IBNE vi%02d, vi%02d [%04x]", _Ft_, _Fs_, branchAddr); } } microVUf(void) mVU_JR() { microVU* mVU = mVUx; @@ -1205,7 +1199,7 @@ microVUf(void) mVU_JR() { else mVUallocVIa(gprT1, _Fs_); MOV32RtoM((uptr)&mVU->branch, gprT1); } - pass3 { mVUlog("JR"); } + pass3 { mVUlog("JR [vi%02d]", _Fs_); } } microVUf(void) mVU_JALR() { microVU* mVU = mVUx; @@ -1217,11 +1211,8 @@ microVUf(void) mVU_JALR() { MOV32RtoM((uptr)&mVU->branch, gprT1); MOV32ItoR(gprT1, bSaveAddr); mVUallocVIb(gprT1, _Ft_); - // Note: Not sure if the lower instruction in the branch-delay slot - // should read the previous VI-value or the VI-value resulting from this branch. - // This code does the latter... } - pass3 { mVUlog("JALR"); } + pass3 { mVUlog("JALR vi%02d, [vi%02d]", _Ft_, _Fs_); } } #endif //PCSX2_MICROVU diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 5046494b53..fb2eb21fec 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -89,8 +89,8 @@ declareAllVariables #define _Ftf_ ((mVU->code >> 23) & 0x03) #define _Imm5_ (((mVU->code & 0x400) ? 0xfff0 : 0) | ((mVU->code >> 6) & 0xf)) -#define _Imm11_ (s32)(mVU->code & 0x400 ? 0xfffffc00 | (mVU->code & 0x3ff) : mVU->code & 0x3ff) -#define _Imm12_ (((mVU->code >> 21 ) & 0x1) << 11) | (mVU->code & 0x7ff) +#define _Imm11_ (s32)((mVU->code & 0x400) ? (0xfffffc00 | (mVU->code & 0x3ff)) : mVU->code & 0x3ff) +#define _Imm12_ (((mVU->code >> 21) & 0x1) << 11) | (mVU->code & 0x7ff) #define _Imm15_ (((mVU->code >> 10) & 0x7800) | (mVU->code & 0x7ff)) #define _Imm24_ (u32)(mVU->code & 0xffffff) @@ -162,8 +162,12 @@ declareAllVariables #define incPC2(x) { iPC = ((iPC + x) & (mVU->progSize-1)); } #define incCycles(x) { mVUincCycles(x); } #define bSaveAddr ((xPC + (2 * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) -#define branchAddr ((xPC + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) +#define branchAddr ((xPC + 8 + (_Imm11_ * 8)) & ((vuIndex) ? 0x3ff8:0xff8)) #define shufflePQ (((mVU->q) ? 0xb0 : 0xe0) | ((mVU->q) ? 0x01 : 0x04)) +#define _Fsf_String ((_Fsf_ == 3) ? "w" : ((_Fsf_ == 2) ? "z" : ((_Fsf_ == 1) ? "y" : "x"))) +#define _Ftf_String ((_Ftf_ == 3) ? "w" : ((_Ftf_ == 2) ? "z" : ((_Ftf_ == 1) ? "y" : "x"))) +#define xyzwStr(x,s) (_X_Y_Z_W == x) ? s : +#define _XYZW_String (xyzwStr(1, "w") (xyzwStr(2, "z") (xyzwStr(3, "zw") (xyzwStr(4, "y") (xyzwStr(5, "yw") (xyzwStr(6, "yz") (xyzwStr(7, "yzw") (xyzwStr(8, "x") (xyzwStr(9, "xw") (xyzwStr(10, "xz") (xyzwStr(11, "xzw") (xyzwStr(12, "xy") (xyzwStr(13, "xyw") (xyzwStr(14, "xyz") "xyzw")))))))))))))) #define _isNOP (1<<0) // Skip Lower Instruction diff --git a/pcsx2/x86/microVU_Tables.inl b/pcsx2/x86/microVU_Tables.inl index 41b8b9a1c7..26dc6ea8cd 100644 --- a/pcsx2/x86/microVU_Tables.inl +++ b/pcsx2/x86/microVU_Tables.inl @@ -1077,8 +1077,7 @@ microVUf(void) mVULowerOP_T3_11() { doTableStuff(mVULowerOP_T3_11_OPCODE, ((mVUg microVUf(void) mVUopU() { doTableStuff(mVU_UPPER_OPCODE, (mVUgetCode & 0x3f)); } // Gets Upper Opcode microVUf(void) mVUopL() { doTableStuff(mVULOWER_OPCODE, (mVUgetCode >> 25)); } // Gets Lower Opcode microVUf(void) mVUunknown() { - SysPrintf("mVUunknown<%d,%d> : Unknown Micro VU opcode called (%x)\n", vuIndex, recPass, mVUgetCode); - //pass2 { write8(0xcc); } + pass2 { SysPrintf("microVU%d: Unknown Micro VU opcode called (%x)\n", vuIndex, mVUgetCode); } pass3 { mVUlog("Unknown", mVUgetCode); } } diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 51a2a325d6..2d45895a63 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -603,7 +603,7 @@ microVUf(void) mVU_ITOF12() { mVU_ITOFx((uptr)mVU_ITOF_12); p microVUf(void) mVU_ITOF15() { mVU_ITOFx((uptr)mVU_ITOF_15); pass3 { mVUlog("ITOF15"); } } microVUf(void) mVU_CLIP() { microVU* mVU = mVUx; - pass1 { mVUanalyzeFMAC4(_Fs_, _Ft_); mVUprint("clip broken"); } + pass1 { mVUanalyzeFMAC4(_Fs_, _Ft_); } pass2 { int Fs, Ft; mVUallocFMAC17a(Fs, Ft);