mirror of https://github.com/PCSX2/pcsx2.git
core: purge sse2
This commit is contained in:
parent
5509bfc003
commit
8a9ec4c706
|
@ -46,13 +46,13 @@ void Pcsx2App::DetectCpuAndUserMode()
|
|||
x86caps.CountCores();
|
||||
x86caps.SIMD_EstablishMXCSRmask();
|
||||
|
||||
if (!x86caps.hasStreamingSIMD2Extensions)
|
||||
if (!x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
// This code will probably never run if the binary was correctly compiled for SSE2
|
||||
// SSE2 is required for any decent speed and is supported by more than decade old x86 CPUs
|
||||
// This code will probably never run if the binary was correctly compiled for SSE4
|
||||
// SSE4 is required for any decent speed and is supported by more than decade old x86 CPUs
|
||||
throw Exception::HardwareDeficiency()
|
||||
.SetDiagMsg(L"Critical Failure: SSE2 Extensions not available.")
|
||||
.SetUserMsg(_("SSE2 extensions are not available. PCSX2 requires a cpu that supports the SSE2 instruction set."));
|
||||
.SetDiagMsg(L"Critical Failure: SSE4 Extensions not available.")
|
||||
.SetUserMsg(_("SSE4 extensions are not available. PCSX2 requires a cpu that supports the SSE4 instruction set."));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -684,7 +684,6 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt )
|
|||
|
||||
int index_avx2 = -1;
|
||||
int index_sse4 = -1;
|
||||
int index_sse2 = -1;
|
||||
|
||||
for( int i = 0; i < count; i++ )
|
||||
{
|
||||
|
@ -692,12 +691,10 @@ void Panels::PluginSelectorPanel::OnEnumComplete( wxCommandEvent& evt )
|
|||
|
||||
if( x86caps.hasAVX2 && str.Contains("AVX2") ) index_avx2 = i;
|
||||
if( x86caps.hasStreamingSIMD4Extensions && str.Contains("SSE4") ) index_sse4 = i;
|
||||
if( str.Contains("SSE2") ) index_sse2 = i;
|
||||
}
|
||||
|
||||
if( index_avx2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_avx2 );
|
||||
else if( index_sse4 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse4 );
|
||||
else if( index_sse2 >= 0 ) m_ComponentBoxes->Get(pid).SetSelection( index_sse2 );
|
||||
else m_ComponentBoxes->Get(pid).SetSelection( 0 );
|
||||
}
|
||||
else
|
||||
|
|
|
@ -215,17 +215,9 @@ void recPMTHL()
|
|||
|
||||
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READLO|XMMINFO_READHI|XMMINFO_WRITELO|XMMINFO_WRITEHI );
|
||||
|
||||
if ( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
xBLEND.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x5);
|
||||
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd);
|
||||
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72);
|
||||
}
|
||||
else {
|
||||
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S), 0x8d);
|
||||
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S), 0xdd);
|
||||
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0x72);
|
||||
xSHUF.PS(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_HI), 0x72);
|
||||
}
|
||||
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
@ -400,7 +392,6 @@ void recPMAXW()
|
|||
EE::Profiler.EmitOp(eeOpcode::PMAXW);
|
||||
|
||||
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
|
||||
if ( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else if( EEREC_D == EEREC_S ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if ( EEREC_D == EEREC_T ) xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
|
@ -408,40 +399,6 @@ void recPMAXW()
|
|||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPMAX.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
}
|
||||
else {
|
||||
int t0reg;
|
||||
|
||||
if( EEREC_S == EEREC_T ) {
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
}
|
||||
else {
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
|
||||
if( EEREC_D == EEREC_S ) {
|
||||
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
else if( EEREC_D == EEREC_T ) {
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
else {
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
|
||||
xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
||||
|
@ -1173,18 +1130,7 @@ void recPABSW() //needs clamping
|
|||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSLL.D(xRegisterSSE(t0reg), 31);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffffffff if equal to 0x80000000
|
||||
if( x86caps.hasSupplementalStreamingSIMD3Extensions ) {
|
||||
xPABS.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x80000000 -> 0x80000000
|
||||
}
|
||||
else {
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPSRA.D(xRegisterSSE(t1reg), 31);
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
|
||||
xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x80000000 -> 0x80000000
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x80000000 -> 0x7fffffff
|
||||
_freeXMMreg(t0reg);
|
||||
_clearNeededXMMregs();
|
||||
|
@ -1203,18 +1149,7 @@ void recPABSH()
|
|||
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSLL.W(xRegisterSSE(t0reg), 15);
|
||||
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffff if equal to 0x8000
|
||||
if( x86caps.hasSupplementalStreamingSIMD3Extensions ) {
|
||||
xPABS.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); //0x8000 -> 0x8000
|
||||
}
|
||||
else {
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPSRA.W(xRegisterSSE(t1reg), 15);
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
|
||||
xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg)); //0x8000 -> 0x8000
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); //0x8000 -> 0x7fff
|
||||
_freeXMMreg(t0reg);
|
||||
_clearNeededXMMregs();
|
||||
|
@ -1228,7 +1163,6 @@ void recPMINW()
|
|||
EE::Profiler.EmitOp(eeOpcode::PMINW);
|
||||
|
||||
int info = eeRecompileCodeXMM( XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED );
|
||||
if ( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
if( EEREC_S == EEREC_T ) xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else if( EEREC_D == EEREC_S ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if ( EEREC_D == EEREC_T ) xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
|
@ -1236,40 +1170,6 @@ void recPMINW()
|
|||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPMIN.SD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
}
|
||||
else {
|
||||
int t0reg;
|
||||
|
||||
if( EEREC_S == EEREC_T ) {
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
}
|
||||
else {
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
|
||||
if( EEREC_D == EEREC_S ) {
|
||||
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
else if( EEREC_D == EEREC_T ) {
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
else {
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPAND(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
xPANDN(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
|
||||
xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
||||
|
@ -1718,12 +1618,6 @@ void recPMADDW()
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::PMADDW);
|
||||
|
||||
if( !x86caps.hasStreamingSIMD4Extensions ) {
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
recCall(Interp::PMADDW);
|
||||
return;
|
||||
}
|
||||
|
||||
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI );
|
||||
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
|
||||
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
|
||||
|
@ -1775,19 +1669,9 @@ void recPSLLVW()
|
|||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
}
|
||||
else {
|
||||
if ( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
|
||||
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
|
||||
xPSRA.D(xRegisterSSE(t0reg), 31);
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( _Rt_ == 0 ) {
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
|
@ -1813,16 +1697,8 @@ void recPSLLVW()
|
|||
xPSLL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg));
|
||||
|
||||
// merge & sign extend
|
||||
if ( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
|
||||
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
}
|
||||
else {
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
|
||||
xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
}
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -1843,19 +1719,9 @@ void recPSRLVW()
|
|||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
}
|
||||
else {
|
||||
if ( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
|
||||
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
|
||||
xPSRA.D(xRegisterSSE(t0reg), 31);
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( _Rt_ == 0 ) {
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
|
@ -1881,16 +1747,8 @@ void recPSRLVW()
|
|||
xPSRL.D(xRegisterSSE(t1reg), xRegisterSSE(t0reg));
|
||||
|
||||
// merge & sign extend
|
||||
if ( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
|
||||
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
}
|
||||
else {
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t1reg));
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
|
||||
xPSRA.D(xRegisterSSE(t0reg), 31); // get the signs
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
}
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -1903,11 +1761,6 @@ void recPMSUBW()
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::PMSUBW);
|
||||
|
||||
if( !x86caps.hasStreamingSIMD4Extensions ) {
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
recCall(Interp::PMSUBW);
|
||||
return;
|
||||
}
|
||||
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI|XMMINFO_READLO|XMMINFO_READHI );
|
||||
xSHUF.PS(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_HI), 0x88);
|
||||
xPSHUF.D(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO), 0xd8); // LO = {LO[0], HI[0], LO[2], HI[2]}
|
||||
|
@ -1957,11 +1810,6 @@ void recPMULTW()
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::PMULTW);
|
||||
|
||||
if( !x86caps.hasStreamingSIMD4Extensions ) {
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
recCall(Interp::PMULTW);
|
||||
return;
|
||||
}
|
||||
int info = eeRecompileCodeXMM( (((_Rs_)&&(_Rt_))?XMMINFO_READS:0)|(((_Rs_)&&(_Rt_))?XMMINFO_READT:0)|(_Rd_?XMMINFO_WRITED:0)|XMMINFO_WRITELO|XMMINFO_WRITEHI );
|
||||
if( !_Rs_ || !_Rt_ ) {
|
||||
if( _Rd_ ) xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
|
@ -2455,19 +2303,9 @@ void recPSRAVW()
|
|||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
}
|
||||
else {
|
||||
if ( x86caps.hasStreamingSIMD4Extensions ) {
|
||||
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
|
||||
xPMOVSX.DQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
}
|
||||
else {
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_D));
|
||||
xPSRA.D(xRegisterSSE(t0reg), 31);
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( _Rt_ == 0 ) {
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
|
|
|
@ -508,8 +508,8 @@ static void recReserve()
|
|||
{
|
||||
// Hardware Requirements Check...
|
||||
|
||||
if ( !x86caps.hasStreamingSIMD2Extensions )
|
||||
recThrowHardwareDeficiency( L"SSE2" );
|
||||
if ( !x86caps.hasStreamingSIMD4Extensions )
|
||||
recThrowHardwareDeficiency( L"SSE4" );
|
||||
|
||||
recReserveCache();
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ void mVUreserveCache(microVU& mVU) {
|
|||
// Only run this once per VU! ;)
|
||||
void mVUinit(microVU& mVU, uint vuIndex) {
|
||||
|
||||
if(!x86caps.hasStreamingSIMD2Extensions) mVUthrowHardwareDeficiency( L"SSE2", vuIndex );
|
||||
if(!x86caps.hasStreamingSIMD4Extensions) mVUthrowHardwareDeficiency( L"SSE4", vuIndex );
|
||||
|
||||
memzero(mVU.prog);
|
||||
|
||||
|
|
|
@ -166,13 +166,7 @@ __fi void getQreg(const xmm& reg, int qInstance)
|
|||
|
||||
__ri void writeQreg(const xmm& reg, int qInstance)
|
||||
{
|
||||
if (qInstance) {
|
||||
if (!x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
|
||||
xMOVSS(xmmPQ, reg);
|
||||
xPSHUF.D(xmmPQ, xmmPQ, 0xe1);
|
||||
}
|
||||
else xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0));
|
||||
}
|
||||
if (qInstance)
|
||||
xINSERTPS(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0));
|
||||
else xMOVSS(xmmPQ, reg);
|
||||
}
|
||||
|
|
|
@ -56,34 +56,11 @@ void mVUclamp1(const xmm& reg, const xmm& regT1, int xyzw, bool bClampE = 0) {
|
|||
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
|
||||
void mVUclamp2(microVU& mVU, const xmm& reg, const xmm& regT1in, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 1;
|
||||
xPMIN.SD(reg, ptr128[&sse4_maxvals[i][0]]);
|
||||
xPMIN.UD(reg, ptr128[&sse4_minvals[i][0]]);
|
||||
return;
|
||||
}
|
||||
//const xmm& regT1 = regT1b ? mVU.regAlloc->allocReg() : regT1in;
|
||||
const xmm& regT1 = regT1in.IsEmpty() ? xmm((reg.Id + 1) % 8) : regT1in;
|
||||
if (regT1 != regT1in) xMOVAPS(ptr128[mVU.xmmCTemp], regT1);
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
xMOVAPS(regT1, reg);
|
||||
xAND.PS(regT1, ptr128[mVUglob.signbit]);
|
||||
xMIN.SS(reg, ptr128[mVUglob.maxvals]);
|
||||
xMAX.SS(reg, ptr128[mVUglob.minvals]);
|
||||
xOR.PS (reg, regT1);
|
||||
break;
|
||||
default:
|
||||
xMOVAPS(regT1, reg);
|
||||
xAND.PS(regT1, ptr128[mVUglob.signbit]);
|
||||
xMIN.PS(reg, ptr128[mVUglob.maxvals]);
|
||||
xMAX.PS(reg, ptr128[mVUglob.minvals]);
|
||||
xOR.PS (reg, regT1);
|
||||
break;
|
||||
}
|
||||
//if (regT1 != regT1in) mVU.regAlloc->clearNeeded(regT1);
|
||||
if (regT1 != regT1in) xMOVAPS(regT1, ptr128[mVU.xmmCTemp]);
|
||||
}
|
||||
else mVUclamp1(reg, regT1in, xyzw, bClampE);
|
||||
}
|
||||
|
||||
|
|
|
@ -28,11 +28,7 @@ static __fi void testZero(const xmm& xmmReg, const xmm& xmmTemp, const x32& gprT
|
|||
{
|
||||
xXOR.PS(xmmTemp, xmmTemp);
|
||||
xCMPEQ.SS(xmmTemp, xmmReg);
|
||||
if (!x86caps.hasStreamingSIMD4Extensions) {
|
||||
xMOVMSKPS(gprTemp, xmmTemp);
|
||||
xTEST(gprTemp, 1);
|
||||
}
|
||||
else xPTEST(xmmTemp, xmmTemp);
|
||||
xPTEST(xmmTemp, xmmTemp);
|
||||
}
|
||||
|
||||
// Test if Vector is Negative (Set Flags and Makes Positive)
|
||||
|
@ -298,19 +294,9 @@ mVUop(mVU_EEXP) {
|
|||
|
||||
// sumXYZ(): PQ.x = x ^ 2 + y ^ 2 + z ^ 2
|
||||
static __fi void mVU_sumXYZ(mV, const xmm& PQ, const xmm& Fs) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xDP.PS(Fs, Fs, 0x71);
|
||||
xMOVSS(PQ, Fs);
|
||||
}
|
||||
else {
|
||||
SSE_MULPS(mVU, Fs, Fs); // wzyx ^ 2
|
||||
xMOVSS (PQ, Fs); // x ^ 2
|
||||
xPSHUF.D (Fs, Fs, 0xe1); // wzyx -> wzxy
|
||||
SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2
|
||||
xPSHUF.D (Fs, Fs, 0xd2); // wzxy -> wxyz
|
||||
SSE_ADDSS(mVU, PQ, Fs); // x ^ 2 + y ^ 2 + z ^ 2
|
||||
}
|
||||
}
|
||||
|
||||
mVUop(mVU_ELENG) {
|
||||
pass1 { mVUanalyzeEFU2(mVU, _Fs_, 18); }
|
||||
|
|
|
@ -59,72 +59,29 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
|
|||
return;*/
|
||||
|
||||
switch ( xyzw ) {
|
||||
case 5: if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xEXTRACTPS(ptr32[ptr+4], reg, 1);
|
||||
case 5: xEXTRACTPS(ptr32[ptr+4], reg, 1);
|
||||
xEXTRACTPS(ptr32[ptr+12], reg, 3);
|
||||
}
|
||||
else {
|
||||
xPSHUF.D(reg, reg, 0xe1); //WZXY
|
||||
xMOVSS(ptr32[ptr+4], reg);
|
||||
xPSHUF.D(reg, reg, 0xff); //WWWW
|
||||
xMOVSS(ptr32[ptr+12], reg);
|
||||
}
|
||||
break; // YW
|
||||
case 6: xPSHUF.D(reg, reg, 0xc9);
|
||||
xMOVL.PS(ptr64[ptr+4], reg);
|
||||
break; // YZ
|
||||
case 7: if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xMOVH.PS(ptr64[ptr+8], reg);
|
||||
case 7: xMOVH.PS(ptr64[ptr+8], reg);
|
||||
xEXTRACTPS(ptr32[ptr+4], reg, 1);
|
||||
}
|
||||
else {
|
||||
xPSHUF.D(reg, reg, 0x93); //ZYXW
|
||||
xMOVH.PS(ptr64[ptr+4], reg);
|
||||
xMOVSS(ptr32[ptr+12], reg);
|
||||
}
|
||||
break; // YZW
|
||||
case 9: if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xMOVSS(ptr32[ptr], reg);
|
||||
case 9: xMOVSS(ptr32[ptr], reg);
|
||||
xEXTRACTPS(ptr32[ptr+12], reg, 3);
|
||||
}
|
||||
else {
|
||||
xMOVSS(ptr32[ptr], reg);
|
||||
xPSHUF.D(reg, reg, 0xff); //WWWW
|
||||
xMOVSS(ptr32[ptr+12], reg);
|
||||
}
|
||||
break; // XW
|
||||
case 10: if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xMOVSS(ptr32[ptr], reg);
|
||||
case 10: xMOVSS(ptr32[ptr], reg);
|
||||
xEXTRACTPS(ptr32[ptr+8], reg, 2);
|
||||
}
|
||||
else {
|
||||
xMOVSS(ptr32[ptr], reg);
|
||||
xMOVHL.PS(reg, reg);
|
||||
xMOVSS(ptr32[ptr+8], reg);
|
||||
}
|
||||
break; //XZ
|
||||
case 11: xMOVSS(ptr32[ptr], reg);
|
||||
xMOVH.PS(ptr64[ptr+8], reg);
|
||||
break; //XZW
|
||||
case 13: if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xMOVL.PS(ptr64[ptr], reg);
|
||||
case 13: xMOVL.PS(ptr64[ptr], reg);
|
||||
xEXTRACTPS(ptr32[ptr+12], reg, 3);
|
||||
}
|
||||
else {
|
||||
xPSHUF.D(reg, reg, 0x4b); //YXZW
|
||||
xMOVH.PS(ptr64[ptr], reg);
|
||||
xMOVSS(ptr32[ptr+12], reg);
|
||||
}
|
||||
break; // XYW
|
||||
case 14: if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xMOVL.PS(ptr64[ptr], reg);
|
||||
case 14: xMOVL.PS(ptr64[ptr], reg);
|
||||
xEXTRACTPS(ptr32[ptr+8], reg, 2);
|
||||
}
|
||||
else {
|
||||
xMOVL.PS(ptr64[ptr], reg);
|
||||
xMOVHL.PS(reg, reg);
|
||||
xMOVSS(ptr32[ptr+8], reg);
|
||||
}
|
||||
break; // XYZ
|
||||
case 4: if (!modXYZW) mVUunpack_xyzw(reg, reg, 1);
|
||||
xMOVSS(ptr32[ptr+4], reg);
|
||||
|
@ -146,8 +103,14 @@ void mVUsaveReg(const xmm& reg, xAddressVoid ptr, int xyzw, bool modXYZW)
|
|||
void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
||||
{
|
||||
xyzw &= 0xf;
|
||||
if ( (dest != src) && (xyzw != 0) ) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf)) {
|
||||
if ( (dest != src) && (xyzw != 0) )
|
||||
{
|
||||
if (xyzw == 0x8)
|
||||
xMOVSS(dest, src);
|
||||
else if (xyzw == 0xf)
|
||||
xMOVAPS(dest, src);
|
||||
else
|
||||
{
|
||||
if (modXYZW) {
|
||||
if (xyzw == 1) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 3, 0)); return; }
|
||||
else if (xyzw == 2) { xINSERTPS(dest, src, _MM_MK_INSERTPS_NDX(0, 2, 0)); return; }
|
||||
|
@ -156,56 +119,6 @@ void mVUmergeRegs(const xmm& dest, const xmm& src, int xyzw, bool modXYZW)
|
|||
xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3);
|
||||
xBLEND.PS(dest, src, xyzw);
|
||||
}
|
||||
else {
|
||||
switch (xyzw) {
|
||||
case 1: if (modXYZW) mVUunpack_xyzw(src, src, 0);
|
||||
xMOVHL.PS(src, dest); // src = Sw Sz Dw Dz
|
||||
xSHUF.PS(dest, src, 0xc4); // 11 00 01 00
|
||||
break;
|
||||
case 2: if (modXYZW) mVUunpack_xyzw(src, src, 0);
|
||||
xMOVHL.PS(src, dest);
|
||||
xSHUF.PS(dest, src, 0x64);
|
||||
break;
|
||||
case 3: xSHUF.PS(dest, src, 0xe4);
|
||||
break;
|
||||
case 4: if (modXYZW) mVUunpack_xyzw(src, src, 0);
|
||||
xMOVSS(src, dest);
|
||||
xMOVSD(dest, src);
|
||||
break;
|
||||
case 5: xSHUF.PS(dest, src, 0xd8);
|
||||
xPSHUF.D(dest, dest, 0xd8);
|
||||
break;
|
||||
case 6: xSHUF.PS(dest, src, 0x9c);
|
||||
xPSHUF.D(dest, dest, 0x78);
|
||||
break;
|
||||
case 7: xMOVSS(src, dest);
|
||||
xMOVAPS(dest, src);
|
||||
break;
|
||||
case 8: xMOVSS(dest, src);
|
||||
break;
|
||||
case 9: xSHUF.PS(dest, src, 0xc9);
|
||||
xPSHUF.D(dest, dest, 0xd2);
|
||||
break;
|
||||
case 10: xSHUF.PS(dest, src, 0x8d);
|
||||
xPSHUF.D(dest, dest, 0x72);
|
||||
break;
|
||||
case 11: xMOVSS(dest, src);
|
||||
xSHUF.PS(dest, src, 0xe4);
|
||||
break;
|
||||
case 12: xMOVSD(dest, src);
|
||||
break;
|
||||
case 13: xMOVHL.PS(dest, src);
|
||||
xSHUF.PS(src, dest, 0x64);
|
||||
xMOVAPS(dest, src);
|
||||
break;
|
||||
case 14: xMOVHL.PS(dest, src);
|
||||
xSHUF.PS(src, dest, 0xc4);
|
||||
xMOVAPS(dest, src);
|
||||
break;
|
||||
default: xMOVAPS(dest, src);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,24 +35,8 @@ static RecompiledCodeReserve* nVifUpkExec = NULL;
|
|||
|
||||
// Merges xmm vectors without modifying source reg
|
||||
void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15)
|
||||
|| (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) {
|
||||
mVUmergeRegs(dest, src, xyzw);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(temp != src) xMOVAPS(temp, src); //Sometimes we don't care if the source is modified and is temp reg.
|
||||
if(dest == temp)
|
||||
{
|
||||
//VIF can sent the temp directory as the source and destination, just need to clear the ones we dont want in which case.
|
||||
if(!(xyzw & 0x1)) xAND.PS( dest, ptr128[SSEXYZWMask[0]]);
|
||||
if(!(xyzw & 0x2)) xAND.PS( dest, ptr128[SSEXYZWMask[1]]);
|
||||
if(!(xyzw & 0x4)) xAND.PS( dest, ptr128[SSEXYZWMask[2]]);
|
||||
if(!(xyzw & 0x8)) xAND.PS( dest, ptr128[SSEXYZWMask[3]]);
|
||||
}
|
||||
else mVUmergeRegs(dest, temp, xyzw);
|
||||
}
|
||||
}
|
||||
|
||||
// =====================================================================================================
|
||||
// VifUnpackSSE_Base Section
|
||||
|
@ -113,16 +97,6 @@ void VifUnpackSSE_Base::xUPK_S_32() const {
|
|||
|
||||
void VifUnpackSSE_Base::xUPK_S_16() const {
|
||||
|
||||
if (!x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 16);
|
||||
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
return;
|
||||
}
|
||||
|
||||
switch(UnpkLoopIteration)
|
||||
{
|
||||
case 0:
|
||||
|
@ -144,17 +118,6 @@ void VifUnpackSSE_Base::xUPK_S_16() const {
|
|||
|
||||
void VifUnpackSSE_Base::xUPK_S_8() const {
|
||||
|
||||
if (!x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xMOV8 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(workReg, workReg);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 24);
|
||||
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
return;
|
||||
}
|
||||
|
||||
switch(UnpkLoopIteration)
|
||||
{
|
||||
case 0:
|
||||
|
@ -199,18 +162,8 @@ void VifUnpackSSE_Base::xUPK_V2_32() const {
|
|||
void VifUnpackSSE_Base::xUPK_V2_16() const {
|
||||
|
||||
if(UnpkLoopIteration == 0)
|
||||
{
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (workReg);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV64 (workReg, ptr64[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 16);
|
||||
}
|
||||
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
|
||||
}
|
||||
else
|
||||
|
@ -223,19 +176,9 @@ void VifUnpackSSE_Base::xUPK_V2_16() const {
|
|||
|
||||
void VifUnpackSSE_Base::xUPK_V2_8() const {
|
||||
|
||||
if(UnpkLoopIteration == 0 || !x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
if(UnpkLoopIteration == 0)
|
||||
{
|
||||
xPMOVXX8 (workReg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(workReg, workReg);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 24);
|
||||
}
|
||||
xPSHUF.D (destReg, workReg, 0x44); //v1v0v1v0
|
||||
}
|
||||
else
|
||||
|
@ -254,16 +197,7 @@ void VifUnpackSSE_Base::xUPK_V3_32() const {
|
|||
|
||||
void VifUnpackSSE_Base::xUPK_V3_16() const {
|
||||
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (destReg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV64 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
}
|
||||
|
||||
//With V3-16, it takes the first vector from the next position as the W vector
|
||||
//However - IF the end of this iteration of the unpack falls on a quadword boundary, W becomes 0
|
||||
|
@ -278,17 +212,7 @@ void VifUnpackSSE_Base::xUPK_V3_16() const {
|
|||
|
||||
void VifUnpackSSE_Base::xUPK_V3_8() const {
|
||||
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (destReg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 24);
|
||||
}
|
||||
if (UnpkLoopIteration != IsAligned)
|
||||
xAND.PS(destReg, ptr128[SSEXYZWMask[0]]);
|
||||
}
|
||||
|
@ -300,32 +224,13 @@ void VifUnpackSSE_Base::xUPK_V4_32() const {
|
|||
|
||||
void VifUnpackSSE_Base::xUPK_V4_16() const {
|
||||
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX16 (destReg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV64 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
}
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_8() const {
|
||||
|
||||
if (x86caps.hasStreamingSIMD4Extensions)
|
||||
{
|
||||
xPMOVXX8 (destReg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 24);
|
||||
}
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_5() const {
|
||||
|
||||
|
|
Loading…
Reference in New Issue