From 4286c20dda76bdddfed11fc0a90fce573eda6c25 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Tue, 3 Mar 2009 09:27:58 +0000 Subject: [PATCH] Nneeve coded a new FPU clamp mode "Full" that can help some games which wouldn't work with the other modes. This fixes the Digimon menu for example (Gamefix will stay a while longer though, until we confirm ingame is fine as well). He also updated the advanced dialog a bit. Remember that this mode can break games if VU clamp is below "Extra + preserve sign" git-svn-id: http://pcsx2.googlecode.com/svn/trunk@665 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Misc.h | 1 + pcsx2/windows/AdvancedDlg.cpp | 10 +- pcsx2/windows/VCprojects/pcsx2_2008.vcproj | 4 + pcsx2/windows/pcsx2.rc | 7 +- pcsx2/windows/resource.h | 1 + pcsx2/x86/iFPU.cpp | 27 +++++ pcsx2/x86/iR5900.h | 6 +- pcsx2/x86/ix86/ix86_macros.h | 45 +++++++++ pcsx2/x86/ix86/ix86_sse.inl | 112 ++++++++++++++++++++- 9 files changed, 201 insertions(+), 12 deletions(-) diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index 601f51cadc..325f6bb69b 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -99,6 +99,7 @@ extern SessionOverrideFlags g_Session; #define CHECK_FPU_OVERFLOW (Config.eeOptions & 0x1) #define CHECK_FPU_EXTRA_OVERFLOW (Config.eeOptions & 0x2) // If enabled, Operands are checked for infinities before being used in the FPU recs #define CHECK_FPU_EXTRA_FLAGS 1 // Always enabled now // Sets D/I flags on FPU instructions +#define CHECK_FPU_FULL (Config.eeOptions & 0x4) #define DEFAULT_eeOptions 0x01 #define DEFAULT_vuOptions 0x01 //------------ DEFAULT sseMXCSR VALUES!!! --------------- diff --git a/pcsx2/windows/AdvancedDlg.cpp b/pcsx2/windows/AdvancedDlg.cpp index 4e6b33a02b..16a6f0d3aa 100644 --- a/pcsx2/windows/AdvancedDlg.cpp +++ b/pcsx2/windows/AdvancedDlg.cpp @@ -29,13 +29,17 @@ static void InitRoundClampModes( HWND hDlg, u32 new_eeopt, u32 new_vuopt ) { CheckRadioButton(hDlg, IDC_EE_ROUNDMODE0, IDC_EE_ROUNDMODE3, IDC_EE_ROUNDMODE0 + ((Config.sseMXCSR & 0x6000) >> 13)); CheckRadioButton(hDlg, IDC_VU_ROUNDMODE0, IDC_VU_ROUNDMODE3, IDC_VU_ROUNDMODE0 + ((Config.sseVUMXCSR & 0x6000) >> 13)); - CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE2, IDC_EE_CLAMPMODE0 + ((new_eeopt & 0x2) ? 2 : (new_eeopt & 0x1))); if (new_vuopt & 0x4) CheckRadioButton(hDlg, IDC_VU_CLAMPMODE0, IDC_VU_CLAMPMODE3, IDC_VU_CLAMPMODE0 + 3); else if (new_vuopt & 0x2) CheckRadioButton(hDlg, IDC_VU_CLAMPMODE0, IDC_VU_CLAMPMODE3, IDC_VU_CLAMPMODE0 + 2); else if (new_vuopt & 0x1) CheckRadioButton(hDlg, IDC_VU_CLAMPMODE0, IDC_VU_CLAMPMODE3, IDC_VU_CLAMPMODE0 + 1); else CheckRadioButton(hDlg, IDC_VU_CLAMPMODE0, IDC_VU_CLAMPMODE3, IDC_VU_CLAMPMODE0 + 0); + if (new_eeopt & 0x4) CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE3, IDC_EE_CLAMPMODE0 + 3); + else if (new_eeopt & 0x2) CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE3, IDC_EE_CLAMPMODE0 + 2); + else if (new_eeopt & 0x1) CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE3, IDC_EE_CLAMPMODE0 + 1); + else CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE3, IDC_EE_CLAMPMODE0 + 0); + if (Config.sseMXCSR & 0x8000) CheckDlgButton(hDlg, IDC_EE_CHECK1, TRUE); if (Config.sseVUMXCSR & 0x8000) CheckDlgButton(hDlg, IDC_VU_CHECK1, TRUE); } @@ -88,6 +92,7 @@ BOOL APIENTRY AdvancedOptionsProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM new_eeopt |= IsDlgButtonChecked(hDlg, IDC_EE_CLAMPMODE0) ? 0x0 : 0; new_eeopt |= IsDlgButtonChecked(hDlg, IDC_EE_CLAMPMODE1) ? 0x1 : 0; new_eeopt |= IsDlgButtonChecked(hDlg, IDC_EE_CLAMPMODE2) ? 0x3 : 0; + new_eeopt |= IsDlgButtonChecked(hDlg, IDC_EE_CLAMPMODE3) ? 0x7 : 0; new_vuopt |= IsDlgButtonChecked(hDlg, IDC_VU_CLAMPMODE0) ? 0x0 : 0; new_vuopt |= IsDlgButtonChecked(hDlg, IDC_VU_CLAMPMODE1) ? 0x1 : 0; @@ -161,8 +166,9 @@ BOOL APIENTRY AdvancedOptionsProc(HWND hDlg, UINT message, WPARAM wParam, LPARAM case IDC_EE_CLAMPMODE0: case IDC_EE_CLAMPMODE1: case IDC_EE_CLAMPMODE2: + case IDC_EE_CLAMPMODE3: - CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE2, IDC_EE_CLAMPMODE0 + ( LOWORD(wParam) % IDC_EE_CLAMPMODE0 ) ); + CheckRadioButton(hDlg, IDC_EE_CLAMPMODE0, IDC_EE_CLAMPMODE3, IDC_EE_CLAMPMODE0 + ( LOWORD(wParam) % IDC_EE_CLAMPMODE0 ) ); break; case IDC_VU_CLAMPMODE0: diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index f6e834f868..60d23af4db 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -1938,6 +1938,10 @@ RelativePath="..\..\x86\iFPU.h" > + + diff --git a/pcsx2/windows/pcsx2.rc b/pcsx2/windows/pcsx2.rc index e5c27a4ec3..5acaed1080 100644 --- a/pcsx2/windows/pcsx2.rc +++ b/pcsx2/windows/pcsx2.rc @@ -227,13 +227,14 @@ BEGIN RADIOBUTTON "None",IDC_EE_CLAMPMODE0,20,76,44,16 RADIOBUTTON "Normal",IDC_EE_CLAMPMODE1,64,76,47,16 RADIOBUTTON "Extra + Preserve Sign",IDC_EE_CLAMPMODE2,111,76,91,16 + RADIOBUTTON "Full",IDC_EE_CLAMPMODE3,202,76,44,16 LTEXT "These options specify how your CPU rounds floating point values.\n\nTry changing the roundmode for EE if your game hangs, it could make it work again.",IDC_STATIC,287,33,216,35 GROUPBOX "Round Mode",IDC_STATIC,281,22,235,51,BS_LEFT GROUPBOX "Clamp Mode",IDC_STATIC,281,80,236,84,BS_LEFT - GROUPBOX "Other Options",IDC_STATIC,280,172,237,67,BS_LEFT + GROUPBOX "EE-only Clamp Mode",IDC_STATIC,280,172,237,67,BS_LEFT LTEXT "These options specify how PCSX2's recompilers will clamp Infinities and NaN (Not a Number) values in the opcode instructions.",IDC_STATIC,286,94,224,19 - LTEXT "Flush to Zero - Your CPU makes Floating Point Underflows become Zero, so it does less work. (Speed Up)\n\nDenormals are Zero - Your CPU makes Floating Point Denormals become Zero, so it does less work. (Speed Up)",IDC_STATIC,286,186,224,46 - LTEXT "None - No clamping. (Fastest Mode)\nNormal - Clamps the result.\nExtra - Clamps the operands, the result, and anywhere in between.\nExtra + Preserve Sign - Same as ""Extra"", except preserves NaN's sign when clamping the operands. (Slowest Mode)",IDC_STATIC,286,114,224,45 + LTEXT "None - No clamping. (Fastest Mode)\nNormal - Clamps the result.\nExtra - Clamps the operands, the result, and anywhere in between.\nExtra + Preserve Sign - Same as ""Extra"", except preserves NaN's sign when clamping the operands.",IDC_STATIC,286,114,224,45 + LTEXT "Full - Emulates large numbers correctly. Not fully compatible with VU clamp options aside from ""Extra + Preserve Sign"" (but still works for most games even with ""Normal"")",IDC_STATIC,286,186,214,46 END IDD_CONF_MEMCARD DIALOGEX 0, 0, 451, 215 diff --git a/pcsx2/windows/resource.h b/pcsx2/windows/resource.h index 410dd10e6d..6714dd1b09 100644 --- a/pcsx2/windows/resource.h +++ b/pcsx2/windows/resource.h @@ -265,6 +265,7 @@ #define IDC_MCD_BROWSE1 1320 #define IDC_EE_CLAMPMODE2 1321 #define IDC_MCD_BROWSE2 1321 +#define IDC_EE_CLAMPMODE3 1322 #define IDC_MCD_FILE1 1322 #define IDC_MCD_FILE2 1323 #define IDC_MCD_LABEL1 1324 diff --git a/pcsx2/x86/iFPU.cpp b/pcsx2/x86/iFPU.cpp index d4c3b38317..d48167be46 100644 --- a/pcsx2/x86/iFPU.cpp +++ b/pcsx2/x86/iFPU.cpp @@ -62,6 +62,33 @@ namespace Dynarec { namespace OpcodeImpl { namespace COP1 { +namespace DOUBLE { + +void recABS_S_xmm(int info); +void recADD_S_xmm(int info); +void recADDA_S_xmm(int info); +void recC_EQ_xmm(int info); +void recC_LE_xmm(int info); +void recC_LT_xmm(int info); +void recCVT_S_xmm(int info); +void recDIV_S_xmm(int info); +void recMADD_S_xmm(int info); +void recMADDA_S_xmm(int info); +void recMAX_S_xmm(int info); +void recMIN_S_xmm(int info); +void recMOV_S_xmm(int info); +void recMSUB_S_xmm(int info); +void recMSUBA_S_xmm(int info); +void recMUL_S_xmm(int info); +void recMULA_S_xmm(int info); +void recNEG_S_xmm(int info); +void recSUB_S_xmm(int info); +void recSUBA_S_xmm(int info); +void recSQRT_S_xmm(int info); +void recRSQRT_S_xmm(int info); + +}; + //------------------------------------------------------------------ // Helper Macros //------------------------------------------------------------------ diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 7413b09f5a..409536c6b8 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -22,7 +22,6 @@ #define _EmitterId_ EmitterId_R5900 #include "ix86/ix86.h" #include "ix86/ix86_sse_helpers.h" - #include "R5900.h" #include "VU.h" #include "iCore.h" @@ -263,7 +262,10 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode #define FPURECOMPILE_CONSTCODE(fn, xmminfo) \ void rec##fn(void) \ { \ - eeFPURecompileCode(rec##fn##_xmm, R5900::Interpreter::OpcodeImpl::COP1::fn, xmminfo); \ + if (CHECK_FPU_FULL) \ + eeFPURecompileCode(DOUBLE::rec##fn##_xmm, R5900::Interpreter::OpcodeImpl::COP1::fn, xmminfo); \ + else \ + eeFPURecompileCode(rec##fn##_xmm, R5900::Interpreter::OpcodeImpl::COP1::fn, xmminfo); \ } // rd = rs op rt (all regs need to be in xmm) diff --git a/pcsx2/x86/ix86/ix86_macros.h b/pcsx2/x86/ix86/ix86_macros.h index 381dfe775c..60609eaa30 100644 --- a/pcsx2/x86/ix86/ix86_macros.h +++ b/pcsx2/x86/ix86/ix86_macros.h @@ -701,6 +701,7 @@ //------------------------------------------------------------------ // *SSE 2 Instructions* //------------------------------------------------------------------ + #define SSE2_MOVDQA_M128_to_XMM eSSE2_MOVDQA_M128_to_XMM<_EmitterId_> #define SSE2_MOVDQA_XMM_to_M128 eSSE2_MOVDQA_XMM_to_M128<_EmitterId_> #define SSE2_MOVDQA_XMM_to_XMM eSSE2_MOVDQA_XMM_to_XMM<_EmitterId_> @@ -797,6 +798,50 @@ #define SSE2_PSHUFHW_M128_to_XMM eSSE2_PSHUFHW_M128_to_XMM<_EmitterId_> #define SSE2_SHUFPD_XMM_to_XMM eSSE2_SHUFPD_XMM_to_XMM<_EmitterId_> #define SSE2_SHUFPD_M128_to_XMM eSSE2_SHUFPD_M128_to_XMM<_EmitterId_> +#define SSE2_ORPD_M128_to_XMM eSSE2_ORPD_M128_to_XMM<0> +#define SSE2_ORPD_XMM_to_XMM eSSE2_ORPD_XMM_to_XMM<0> +#define SSE2_XORPD_M128_to_XMM eSSE2_XORPD_M128_to_XMM<0> +#define SSE2_XORPD_XMM_to_XMM eSSE2_XORPD_XMM_to_XMM<0> +#define SSE2_ANDPD_M128_to_XMM eSSE2_ANDPD_M128_to_XMM<0> +#define SSE2_ANDPD_XMM_to_XMM eSSE2_ANDPD_XMM_to_XMM<0> +#define SSE2_ANDNPD_M128_to_XMM eSSE2_ANDNPD_M128_to_XMM<0> +#define SSE2_ANDNPD_XMM_to_XMM eSSE2_ANDNPD_XMM_to_XMM<0> +#define SSE2_ADDSD_M64_to_XMM eSSE2_ADDSD_M64_to_XMM<0> +#define SSE2_ADDSD_XMM_to_XMM eSSE2_ADDSD_XMM_to_XMM<0> +#define SSE2_SUBSD_M64_to_XMM eSSE2_SUBSD_M64_to_XMM<0> +#define SSE2_SUBSD_XMM_to_XMM eSSE2_SUBSD_XMM_to_XMM<0> +#define SSE2_MULSD_M64_to_XMM eSSE2_MULSD_M64_to_XMM<0> +#define SSE2_MULSD_XMM_to_XMM eSSE2_MULSD_XMM_to_XMM<0> +#define SSE2_CMPEQSD_M64_to_XMM eSSE2_CMPEQSD_M64_to_XMM<0> +#define SSE2_CMPEQSD_XMM_to_XMM eSSE2_CMPEQSD_XMM_to_XMM<0> +#define SSE2_CMPLTSD_M64_to_XMM eSSE2_CMPLTSD_M64_to_XMM<0> +#define SSE2_CMPLTSD_XMM_to_XMM eSSE2_CMPLTSD_XMM_to_XMM<0> +#define SSE2_CMPLESD_M64_to_XMM eSSE2_CMPLESD_M64_to_XMM<0> +#define SSE2_CMPLESD_XMM_to_XMM eSSE2_CMPLESD_XMM_to_XMM<0> +#define SSE2_CMPUNORDSD_M64_to_XMM eSSE2_CMPUNORDSD_M64_to_XMM<0> +#define SSE2_CMPUNORDSD_XMM_to_XMM eSSE2_CMPUNORDSD_XMM_to_XMM<0> +#define SSE2_CMPNESD_M64_to_XMM eSSE2_CMPNESD_M64_to_XMM<0> +#define SSE2_CMPNESD_XMM_to_XMM eSSE2_CMPNESD_XMM_to_XMM<0> +#define SSE2_CMPNLTSD_M64_to_XMM eSSE2_CMPNLTSD_M64_to_XMM<0> +#define SSE2_CMPNLTSD_XMM_to_XMM eSSE2_CMPNLTSD_XMM_to_XMM<0> +#define SSE2_CMPNLESD_M64_to_XMM eSSE2_CMPNLESD_M64_to_XMM<0> +#define SSE2_CMPNLESD_XMM_to_XMM eSSE2_CMPNLESD_XMM_to_XMM<0> +#define SSE2_CMPORDSD_M64_to_XMM eSSE2_CMPORDSD_M64_to_XMM<0> +#define SSE2_CMPORDSD_XMM_to_XMM eSSE2_CMPORDSD_XMM_to_XMM<0> +#define SSE2_UCOMISD_M64_to_XMM eSSE2_UCOMISD_M64_to_XMM<0> +#define SSE2_UCOMISD_XMM_to_XMM eSSE2_UCOMISD_XMM_to_XMM<0> +#define SSE2_CVTSS2SD_M32_to_XMM eSSE2_CVTSS2SD_M32_to_XMM<0> +#define SSE2_CVTSS2SD_XMM_to_XMM eSSE2_CVTSS2SD_XMM_to_XMM<0> +#define SSE2_CVTSD2SS_M64_to_XMM eSSE2_CVTSD2SS_M64_to_XMM<0> +#define SSE2_CVTSD2SS_XMM_to_XMM eSSE2_CVTSD2SS_XMM_to_XMM<0> +#define SSE2_MAXSD_M64_to_XMM eSSE2_MAXSD_M64_to_XMM<0> +#define SSE2_MAXSD_XMM_to_XMM eSSE2_MAXSD_XMM_to_XMM<0> +#define SSE2_MINSD_M64_to_XMM eSSE2_MINSD_M64_to_XMM<0> +#define SSE2_MINSD_XMM_to_XMM eSSE2_MINSD_XMM_to_XMM<0> +#define SSE2_SQRTSD_M64_to_XMM eSSE2_SQRTSD_M64_to_XMM<0> +#define SSE2_SQRTSD_XMM_to_XMM eSSE2_SQRTSD_XMM_to_XMM<0> +#define SSE2_DIVSD_M64_to_XMM eSSE2_DIVSD_M64_to_XMM<0> +#define SSE2_DIVSD_XMM_to_XMM eSSE2_DIVSD_XMM_to_XMM<0> //------------------------------------------------------------------ // PACKSSWB,PACKSSDW: Pack Saturate Signed Word //------------------------------------------------------------------ diff --git a/pcsx2/x86/ix86/ix86_sse.inl b/pcsx2/x86/ix86/ix86_sse.inl index f9a277d43b..4f27e03d27 100644 --- a/pcsx2/x86/ix86/ix86_sse.inl +++ b/pcsx2/x86/ix86/ix86_sse.inl @@ -92,6 +92,29 @@ static const bool AlwaysUseMovaps = true; write16( code ), \ ModRM( 3, to, from ) +#define SSE_SD_MtoR( code, overb ) \ + assert( to < XMMREGS ) , \ + write8( 0xf2 ), \ + RexR(0, to), \ + write16( code ), \ + ModRM( 0, to, DISP32 ), \ + write32( MEMADDR(from, 4 + overb) ) \ + +#define SSE_SD_RtoM( code, overb ) \ + assert( from < XMMREGS) , \ + write8( 0xf2 ), \ + RexR(0, from), \ + write16( code ), \ + ModRM( 0, from, DISP32 ), \ + write32( MEMADDR(to, 4 + overb) ) \ + +#define SSE_SD_RtoR( code ) \ + assert( to < XMMREGS && from < XMMREGS) , \ + write8( 0xf2 ), \ + RexRB(0, to, from), \ + write16( code ), \ + ModRM( 3, to, from ) + #define CMPPSMtoR( op ) \ SSEMtoR( 0xc20f, 1 ), \ write8( op ) @@ -108,6 +131,14 @@ static const bool AlwaysUseMovaps = true; SSE_SS_RtoR( 0xc20f ), \ write8( op ) +#define CMPSDMtoR( op ) \ + SSE_SD_MtoR( 0xc20f, 1 ), \ + write8( op ) + +#define CMPSDRtoR( op ) \ + SSE_SD_RtoR( 0xc20f ), \ + write8( op ) + /* movups [r32][r32*scale] to xmm1 */ emitterT void eSSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { @@ -262,11 +293,9 @@ emitterT void eSSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { emitterT void eSSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x100f, 0 ); } emitterT void eSSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ) { SSERtoM( 0x110f, 0 ); } -emitterT void eSSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) -{ - write8(0xf2); - SSERtoR( 0x100f); -} +emitterT void eSSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x100f); } +emitterT void eSSE2_MOVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x100f, 0); } +emitterT void eSSE2_MOVSD_XMM_to_M64( uptr to, x86SSERegType from ) { SSE_SD_RtoM( 0x110f, 0); } emitterT void eSSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ) { @@ -386,6 +415,9 @@ emitterT void eSSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { emitterT void eSSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x540f, 0 ); } emitterT void eSSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x540f ); } +emitterT void eSSE2_ANDPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x540f ); } +emitterT void eSSE2_ANDPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x540f ); } + /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ANDNPS : Logical Bit-wise AND NOT of Single-precision FP values * @@ -393,6 +425,9 @@ emitterT void eSSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SS emitterT void eSSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x550f, 0 ); } emitterT void eSSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR( 0x550f ); } +emitterT void eSSE2_ANDNPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x550f ); } +emitterT void eSSE2_ANDNPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSERtoR66( 0x550f ); } + ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //RCPPS : Packed Single-Precision FP Reciprocal * @@ -410,6 +445,9 @@ emitterT void eSSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_Mt emitterT void eSSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x560f, 0 ); } emitterT void eSSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x560f ); } +emitterT void eSSE2_ORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x560f ); } +emitterT void eSSE2_ORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x560f ); } + ///////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //XORPS : Bitwise Logical XOR of Single-Precision FP Values * @@ -417,6 +455,9 @@ emitterT void eSSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SS emitterT void eSSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } emitterT void eSSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } +emitterT void eSSE2_XORPD_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR66( 0x570f ); } +emitterT void eSSE2_XORPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR66( 0x570f ); } + /////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //ADDPS : ADD Packed Single-Precision FP Values * @@ -431,6 +472,9 @@ emitterT void eSSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S emitterT void eSSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x580f, 0 ); } emitterT void eSSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x580f ); } +emitterT void eSSE2_ADDSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x580f, 0 ); } +emitterT void eSSE2_ADDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x580f ); } + ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //SUBPS: Packed Single-Precision FP Subtract * @@ -445,6 +489,9 @@ emitterT void eSSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S emitterT void eSSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5c0f, 0 ); } emitterT void eSSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5c0f ); } +emitterT void eSSE2_SUBSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5c0f, 0 ); } +emitterT void eSSE2_SUBSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5c0f ); } + ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MULPS : Packed Single-Precision FP Multiply * @@ -459,6 +506,9 @@ emitterT void eSSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S emitterT void eSSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x590f, 0 ); } emitterT void eSSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x590f ); } +emitterT void eSSE2_MULSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x590f, 0 ); } +emitterT void eSSE2_MULSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x590f ); } + //////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //Packed Single-Precission FP compare (CMPccPS) * @@ -507,6 +557,23 @@ emitterT void eSSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) emitterT void eSSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ) { CMPSSMtoR( 7 ); } emitterT void eSSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSSRtoR( 7 ); } +emitterT void eSSE2_CMPEQSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 0 ); } +emitterT void eSSE2_CMPEQSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 0 ); } +emitterT void eSSE2_CMPLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 1 ); } +emitterT void eSSE2_CMPLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 1 ); } +emitterT void eSSE2_CMPLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 2 ); } +emitterT void eSSE2_CMPLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 2 ); } +emitterT void eSSE2_CMPUNORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 3 ); } +emitterT void eSSE2_CMPUNORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 3 ); } +emitterT void eSSE2_CMPNESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 4 ); } +emitterT void eSSE2_CMPNESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 4 ); } +emitterT void eSSE2_CMPNLTSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 5 ); } +emitterT void eSSE2_CMPNLTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 5 ); } +emitterT void eSSE2_CMPNLESD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 6 ); } +emitterT void eSSE2_CMPNLESD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 6 ); } +emitterT void eSSE2_CMPORDSD_M64_to_XMM( x86SSERegType to, uptr from ) { CMPSDMtoR( 7 ); } +emitterT void eSSE2_CMPORDSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { CMPSDRtoR( 7 ); } + emitterT void eSSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ) { RexR(0, to); @@ -522,6 +589,23 @@ emitterT void eSSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) ModRM( 3, to, from ); } +emitterT void eSSE2_UCOMISD_M64_to_XMM( x86SSERegType to, uptr from ) +{ + write8(0x66); + RexR(0, to); + write16( 0x2e0f ); + ModRM( 0, to, DISP32 ); + write32( MEMADDR(from, 4) ); +} + +emitterT void eSSE2_UCOMISD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) +{ + write8(0x66); + RexRB(0, to, from); + write16( 0x2e0f ); + ModRM( 3, to, from ); +} + ////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //RSQRTPS : Packed Single-Precision FP Square Root Reciprocal * @@ -550,6 +634,9 @@ emitterT void eSSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S emitterT void eSSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x510f, 0 ); } emitterT void eSSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x510f ); } +emitterT void eSSE2_SQRTSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x510f, 0 ); } +emitterT void eSSE2_SQRTSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ){ SSE_SD_RtoR( 0x510f ); } + //////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //MAXPS: Return Packed Single-Precision FP Maximum * @@ -567,6 +654,9 @@ emitterT void eSSE2_MAXPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S emitterT void eSSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5f0f, 0 ); } emitterT void eSSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5f0f ); } +emitterT void eSSE2_MAXSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5f0f, 0 ); } +emitterT void eSSE2_MAXSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5f0f ); } + ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //CVTPI2PS: Packed Signed INT32 to Packed Single FP Conversion * @@ -599,6 +689,12 @@ emitterT void eSSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from) ModRM(3, to, from); } +emitterT void eSSE2_CVTSS2SD_M32_to_XMM( x86SSERegType to, uptr from) { SSE_SS_MtoR(0x5a0f, 0); } +emitterT void eSSE2_CVTSS2SD_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SS_RtoR(0x5a0f); } + +emitterT void eSSE2_CVTSD2SS_M64_to_XMM( x86SSERegType to, uptr from) { SSE_SD_MtoR(0x5a0f, 0); } +emitterT void eSSE2_CVTSD2SS_XMM_to_XMM( x86SSERegType to, x86SSERegType from) { SSE_SD_RtoR(0x5a0f); } + /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //CVTDQ2PS: Packed Signed INT32 to Packed Single Precision FP Conversion * @@ -630,6 +726,9 @@ emitterT void eSSE2_MINPD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S emitterT void eSSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5d0f, 0 ); } emitterT void eSSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5d0f ); } +emitterT void eSSE2_MINSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5d0f, 0 ); } +emitterT void eSSE2_MINSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5d0f ); } + /////////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //PMAXSW: Packed Signed Integer Word Maximum * @@ -716,6 +815,9 @@ emitterT void eSSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { S emitterT void eSSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ) { SSE_SS_MtoR( 0x5e0F, 0 ); } emitterT void eSSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SS_RtoR( 0x5e0F ); } +emitterT void eSSE2_DIVSD_M64_to_XMM( x86SSERegType to, uptr from ) { SSE_SD_MtoR( 0x5e0F, 0 ); } +emitterT void eSSE2_DIVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSE_SD_RtoR( 0x5e0F ); } + ///////////////////////////////////////////////////////////////////////////////////////// //**********************************************************************************/ //STMXCSR : Store Streaming SIMD Extension Control/Status *