mirror of https://github.com/PCSX2/pcsx2.git
Let's try that again...
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1202 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
15821b465a
commit
9d38252d0e
|
@ -19,7 +19,7 @@
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
|
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "CDVDisodrv.h"
|
#include "CDVD/CDVDisodrv.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
|
@ -23,8 +23,8 @@
|
||||||
#include "R3000A.h"
|
#include "R3000A.h"
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
|
||||||
#include "CdRom.h"
|
#include "CDVD/CdRom.h"
|
||||||
#include "CDVD.h"
|
#include "CDVD/CDVD.h"
|
||||||
|
|
||||||
#include "Sio.h"
|
#include "Sio.h"
|
||||||
#include "Sif.h"
|
#include "Sif.h"
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
#include "IopCommon.h"
|
#include "IopCommon.h"
|
||||||
#include "HostGui.h"
|
#include "HostGui.h"
|
||||||
|
|
||||||
#include "CDVDisodrv.h"
|
#include "CDVD/CDVDisodrv.h"
|
||||||
#include "VUmicro.h"
|
#include "VUmicro.h"
|
||||||
#include "VU.h"
|
#include "VU.h"
|
||||||
#include "iCore.h"
|
#include "iCore.h"
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
#include "IopCommon.h"
|
#include "IopCommon.h"
|
||||||
#include "SaveState.h"
|
#include "SaveState.h"
|
||||||
|
|
||||||
#include "CDVDisodrv.h"
|
#include "CDVD/CDVDisodrv.h"
|
||||||
#include "VUmicro.h"
|
#include "VUmicro.h"
|
||||||
#include "VU.h"
|
#include "VU.h"
|
||||||
#include "iCore.h"
|
#include "iCore.h"
|
||||||
|
|
|
@ -134,4 +134,3 @@ echo " local plugin inis? $localinis"
|
||||||
echo " custom cflags? $customcflags"
|
echo " custom cflags? $customcflags"
|
||||||
echo " memcpy_fast? $memcpyfast"
|
echo " memcpy_fast? $memcpyfast"
|
||||||
#echo " microVU? $microVU"
|
#echo " microVU? $microVU"
|
||||||
echo " pcsx2_dir = '$pcsx2_dir'"
|
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "Win32.h"
|
#include "Win32.h"
|
||||||
#include "cdvd.h"
|
#include "CDVD/CDVD.h"
|
||||||
|
|
||||||
static LARGE_INTEGER lfreq;
|
static LARGE_INTEGER lfreq;
|
||||||
|
|
||||||
|
|
|
@ -25,37 +25,40 @@
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
//#define USE_OLD_IVIF_CODE
|
||||||
|
|
||||||
// sse2 highly optimized vif (~200 separate functions are built) zerofrog(@gmail.com)
|
// sse2 highly optimized vif (~200 separate functions are built) zerofrog(@gmail.com)
|
||||||
extern u32 g_vif1Masks[48], g_vif0Masks[48];
|
extern u32 g_vif1Masks[48], g_vif0Masks[48];
|
||||||
extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4];
|
extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4];
|
||||||
|
|
||||||
// arranged in writearr, rowarr, colarr, updatearr
|
// arranged in writearr, rowarr, colarr, updatearr
|
||||||
static PCSX2_ALIGNED16(u32 s_maskarr[16][4]) = {
|
static PCSX2_ALIGNED16(u32 s_maskarr[16][4]) = {
|
||||||
0xffffffff, 0x00000000, 0x00000000, 0xffffffff,
|
{0xffffffff, 0x00000000, 0x00000000, 0xffffffff},
|
||||||
0xffff0000, 0x0000ffff, 0x00000000, 0xffffffff,
|
{0xffff0000, 0x0000ffff, 0x00000000, 0xffffffff},
|
||||||
0xffff0000, 0x00000000, 0x0000ffff, 0xffffffff,
|
{0xffff0000, 0x00000000, 0x0000ffff, 0xffffffff},
|
||||||
0xffff0000, 0x00000000, 0x00000000, 0xffff0000,
|
{0xffff0000, 0x00000000, 0x00000000, 0xffff0000},
|
||||||
0x0000ffff, 0xffff0000, 0x00000000, 0xffffffff,
|
{0x0000ffff, 0xffff0000, 0x00000000, 0xffffffff},
|
||||||
0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
|
{0x00000000, 0xffffffff, 0x00000000, 0xffffffff},
|
||||||
0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff,
|
{0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff},
|
||||||
0x00000000, 0xffff0000, 0x00000000, 0xffff0000,
|
{0x00000000, 0xffff0000, 0x00000000, 0xffff0000},
|
||||||
0x0000ffff, 0x00000000, 0xffff0000, 0xffffffff,
|
{0x0000ffff, 0x00000000, 0xffff0000, 0xffffffff},
|
||||||
0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff,
|
{0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff},
|
||||||
0x00000000, 0x00000000, 0xffffffff, 0xffffffff,
|
{0x00000000, 0x00000000, 0xffffffff, 0xffffffff},
|
||||||
0x00000000, 0x00000000, 0xffff0000, 0xffff0000,
|
{0x00000000, 0x00000000, 0xffff0000, 0xffff0000},
|
||||||
0x0000ffff, 0x00000000, 0x00000000, 0x0000ffff,
|
{0x0000ffff, 0x00000000, 0x00000000, 0x0000ffff},
|
||||||
0x00000000, 0x0000ffff, 0x00000000, 0x0000ffff,
|
{0x00000000, 0x0000ffff, 0x00000000, 0x0000ffff},
|
||||||
0x00000000, 0x00000000, 0x0000ffff, 0x0000ffff,
|
{0x00000000, 0x00000000, 0x0000ffff, 0x0000ffff},
|
||||||
0x00000000, 0x00000000, 0x00000000, 0x00000000
|
{0x00000000, 0x00000000, 0x00000000, 0x00000000}
|
||||||
};
|
};
|
||||||
|
|
||||||
extern u8 s_maskwrite[256];
|
extern u8 s_maskwrite[256];
|
||||||
|
|
||||||
extern "C" PCSX2_ALIGNED16(u32 s_TempDecompress[4]) = {0};
|
extern "C" PCSX2_ALIGNED16(u32 s_TempDecompress[4]) = {0};
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) || !defined(USE_OLD_IVIF_CODE)
|
||||||
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
||||||
{
|
{
|
||||||
u32 i;
|
u32 i;
|
||||||
u32 prev = 0;
|
u32 prev = 0;
|
||||||
FreezeXMMRegs(1);
|
FreezeXMMRegs(1);
|
||||||
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
|
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
|
||||||
|
@ -86,3 +89,48 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
||||||
}
|
}
|
||||||
FreezeXMMRegs(0);
|
FreezeXMMRegs(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else // gcc
|
||||||
|
// After some experimentation, I'm putting the old code back in for now for testing purposes, as the
|
||||||
|
// other version reliably SegFaults when loading YuGiOh: Duelist of the Roses on Linux (when setting
|
||||||
|
// r0 to _mm_load_si128, when vif1masks=0x846f670, hasmask=0x846f454, mask=0, and oldmask=5).
|
||||||
|
// Seems to work everywhere else. It'll stay disabled for now, but it's easier for me to fiddle with if its in here. -arcum42
|
||||||
|
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
u32 prev = 0;
|
||||||
|
FreezeXMMRegs(1);
|
||||||
|
|
||||||
|
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
|
||||||
|
|
||||||
|
prev |= s_maskwrite[mask&0xff];//((mask&3)==3)||((mask&0xc)==0xc)||((mask&0x30)==0x30)||((mask&0xc0)==0xc0);
|
||||||
|
hasmask[i] = prev;
|
||||||
|
|
||||||
|
if( (mask&0xff) != (oldmask&0xff) ) {
|
||||||
|
u8* p0 = (u8*)&s_maskarr[mask&15][0];
|
||||||
|
u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0];
|
||||||
|
|
||||||
|
__asm__(".intel_syntax noprefix\n"
|
||||||
|
"movaps xmm0, [%0]\n"
|
||||||
|
"movaps xmm1, [%1]\n"
|
||||||
|
"movaps xmm2, xmm0\n"
|
||||||
|
"punpcklwd xmm0, xmm0\n"
|
||||||
|
"punpckhwd xmm2, xmm2\n"
|
||||||
|
"movaps xmm3, xmm1\n"
|
||||||
|
"punpcklwd xmm1, xmm1\n"
|
||||||
|
"punpckhwd xmm3, xmm3\n"
|
||||||
|
"movq [%2], xmm0\n"
|
||||||
|
"movq [%2+8], xmm1\n"
|
||||||
|
"movhps [%2+16], xmm0\n"
|
||||||
|
"movhps [%2+24], xmm1\n"
|
||||||
|
"movq [%2+32], xmm2\n"
|
||||||
|
"movq [%2+40], xmm3\n"
|
||||||
|
"movhps [%2+48], xmm2\n"
|
||||||
|
"movhps [%2+56], xmm3\n"
|
||||||
|
".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FreezeXMMRegs(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue