Let's try that again...

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1202 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
arcum42 2009-05-16 07:24:31 +00:00
parent 15821b465a
commit 9d38252d0e
7 changed files with 71 additions and 24 deletions

View File

@ -19,7 +19,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "CDVDisodrv.h"
#include "CDVD/CDVDisodrv.h"
using namespace std;

View File

@ -23,8 +23,8 @@
#include "R3000A.h"
#include "Common.h"
#include "CdRom.h"
#include "CDVD.h"
#include "CDVD/CdRom.h"
#include "CDVD/CDVD.h"
#include "Sio.h"
#include "Sif.h"

View File

@ -29,7 +29,7 @@
#include "IopCommon.h"
#include "HostGui.h"
#include "CDVDisodrv.h"
#include "CDVD/CDVDisodrv.h"
#include "VUmicro.h"
#include "VU.h"
#include "iCore.h"

View File

@ -21,7 +21,7 @@
#include "IopCommon.h"
#include "SaveState.h"
#include "CDVDisodrv.h"
#include "CDVD/CDVDisodrv.h"
#include "VUmicro.h"
#include "VU.h"
#include "iCore.h"

View File

@ -134,4 +134,3 @@ echo " local plugin inis? $localinis"
echo " custom cflags? $customcflags"
echo " memcpy_fast? $memcpyfast"
#echo " microVU? $microVU"
echo " pcsx2_dir = '$pcsx2_dir'"

View File

@ -17,7 +17,7 @@
*/
#include "Win32.h"
#include "cdvd.h"
#include "CDVD/CDVD.h"
static LARGE_INTEGER lfreq;

View File

@ -25,37 +25,40 @@
#include <xmmintrin.h>
#include <emmintrin.h>
//#define USE_OLD_IVIF_CODE
// sse2 highly optimized vif (~200 separate functions are built) zerofrog(@gmail.com)
extern u32 g_vif1Masks[48], g_vif0Masks[48];
extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4];
// arranged in writearr, rowarr, colarr, updatearr
static PCSX2_ALIGNED16(u32 s_maskarr[16][4]) = {
0xffffffff, 0x00000000, 0x00000000, 0xffffffff,
0xffff0000, 0x0000ffff, 0x00000000, 0xffffffff,
0xffff0000, 0x00000000, 0x0000ffff, 0xffffffff,
0xffff0000, 0x00000000, 0x00000000, 0xffff0000,
0x0000ffff, 0xffff0000, 0x00000000, 0xffffffff,
0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff,
0x00000000, 0xffff0000, 0x00000000, 0xffff0000,
0x0000ffff, 0x00000000, 0xffff0000, 0xffffffff,
0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff,
0x00000000, 0x00000000, 0xffffffff, 0xffffffff,
0x00000000, 0x00000000, 0xffff0000, 0xffff0000,
0x0000ffff, 0x00000000, 0x00000000, 0x0000ffff,
0x00000000, 0x0000ffff, 0x00000000, 0x0000ffff,
0x00000000, 0x00000000, 0x0000ffff, 0x0000ffff,
0x00000000, 0x00000000, 0x00000000, 0x00000000
{0xffffffff, 0x00000000, 0x00000000, 0xffffffff},
{0xffff0000, 0x0000ffff, 0x00000000, 0xffffffff},
{0xffff0000, 0x00000000, 0x0000ffff, 0xffffffff},
{0xffff0000, 0x00000000, 0x00000000, 0xffff0000},
{0x0000ffff, 0xffff0000, 0x00000000, 0xffffffff},
{0x00000000, 0xffffffff, 0x00000000, 0xffffffff},
{0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff},
{0x00000000, 0xffff0000, 0x00000000, 0xffff0000},
{0x0000ffff, 0x00000000, 0xffff0000, 0xffffffff},
{0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff},
{0x00000000, 0x00000000, 0xffffffff, 0xffffffff},
{0x00000000, 0x00000000, 0xffff0000, 0xffff0000},
{0x0000ffff, 0x00000000, 0x00000000, 0x0000ffff},
{0x00000000, 0x0000ffff, 0x00000000, 0x0000ffff},
{0x00000000, 0x00000000, 0x0000ffff, 0x0000ffff},
{0x00000000, 0x00000000, 0x00000000, 0x00000000}
};
extern u8 s_maskwrite[256];
extern "C" PCSX2_ALIGNED16(u32 s_TempDecompress[4]) = {0};
#if defined(_MSC_VER) || !defined(USE_OLD_IVIF_CODE)
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{
u32 i;
u32 i;
u32 prev = 0;
FreezeXMMRegs(1);
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
@ -86,3 +89,48 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
}
FreezeXMMRegs(0);
}
#else // gcc
// After some experimentation, I'm putting the old code back in for now for testing purposes, as the
// other version reliably SegFaults when loading YuGiOh: Duelist of the Roses on Linux (when setting
// r0 to _mm_load_si128, when vif1masks=0x846f670, hasmask=0x846f454, mask=0, and oldmask=5).
// Seems to work everywhere else. It'll stay disabled for now, but it's easier for me to fiddle with if its in here. -arcum42
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
{
u32 i;
u32 prev = 0;
FreezeXMMRegs(1);
for(i = 0; i < 4; ++i, mask >>= 8, oldmask >>= 8, vif1masks += 16) {
prev |= s_maskwrite[mask&0xff];//((mask&3)==3)||((mask&0xc)==0xc)||((mask&0x30)==0x30)||((mask&0xc0)==0xc0);
hasmask[i] = prev;
if( (mask&0xff) != (oldmask&0xff) ) {
u8* p0 = (u8*)&s_maskarr[mask&15][0];
u8* p1 = (u8*)&s_maskarr[(mask>>4)&15][0];
__asm__(".intel_syntax noprefix\n"
"movaps xmm0, [%0]\n"
"movaps xmm1, [%1]\n"
"movaps xmm2, xmm0\n"
"punpcklwd xmm0, xmm0\n"
"punpckhwd xmm2, xmm2\n"
"movaps xmm3, xmm1\n"
"punpcklwd xmm1, xmm1\n"
"punpckhwd xmm3, xmm3\n"
"movq [%2], xmm0\n"
"movq [%2+8], xmm1\n"
"movhps [%2+16], xmm0\n"
"movhps [%2+24], xmm1\n"
"movq [%2+32], xmm2\n"
"movq [%2+40], xmm3\n"
"movhps [%2+48], xmm2\n"
"movhps [%2+56], xmm3\n"
".att_syntax\n" : : "r"(p0), "r"(p1), "r"(vif1masks) );
}
}
FreezeXMMRegs(0);
}
#endif