mirror of https://github.com/PCSX2/pcsx2.git
Linux: Straighten up or remove a few Windows/Linux differences in the code (experamental), remove some dead code, fix a mistake in the Linux version of memcpy_amd_ (still broken, though), and change optimization levels due to gcc optimization induced errors.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@718 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
804050aaef
commit
25df8958b2
|
@ -264,44 +264,16 @@ void mpeg2_idct_mmx_init (void);
|
|||
|
||||
void mpeg2_idct_init()
|
||||
{
|
||||
#if !defined(_MSC_VER) || _MSC_VER < 1400 // ignore vc2005 and beyond
|
||||
int i, j;
|
||||
int i, j;
|
||||
|
||||
/* if(hasMultimediaExtensions == 1)
|
||||
{
|
||||
mpeg2_idct_copy = mpeg2_idct_copy_mmx;
|
||||
mpeg2_idct_add = mpeg2_idct_add_mmx;
|
||||
mpeg2_idct_mmx_init ();
|
||||
}else if(hasMultimediaExtensionsExt == 1)
|
||||
{
|
||||
mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
|
||||
mpeg2_idct_add = mpeg2_idct_add_mmxext;
|
||||
mpeg2_idct_mmx_init ();
|
||||
}else*/
|
||||
{
|
||||
mpeg2_idct_copy = mpeg2_idct_copy_c;
|
||||
mpeg2_idct_add = mpeg2_idct_add_c;
|
||||
for (i = -384; i < 640; i++)
|
||||
clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
|
||||
for (i = 0; i < 64; i++) {
|
||||
j = mpeg2_scan_norm[i];
|
||||
mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
j = mpeg2_scan_alt[i];
|
||||
mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
}
|
||||
mpeg2_idct_copy = mpeg2_idct_copy_c;
|
||||
mpeg2_idct_add = mpeg2_idct_add_c;
|
||||
for (i = -384; i < 640; i++)
|
||||
clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
|
||||
for (i = 0; i < 64; i++) {
|
||||
j = mpeg2_scan_norm[i];
|
||||
mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
j = mpeg2_scan_alt[i];
|
||||
mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
}
|
||||
|
||||
#else //blah vcnet2005 idiocity :D
|
||||
int i,j;
|
||||
mpeg2_idct_copy = mpeg2_idct_copy_c;
|
||||
mpeg2_idct_add = mpeg2_idct_add_c;
|
||||
for (i = -384; i < 640; i++)
|
||||
clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
|
||||
for (i = 0; i < 64; i++) {
|
||||
j = mpeg2_scan_norm[i];
|
||||
mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
j = mpeg2_scan_alt[i];
|
||||
mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -187,9 +187,11 @@ void mpeg2_idct_init ();
|
|||
#ifdef _MSC_VER
|
||||
#define BigEndian(out, in) out = _byteswap_ulong(in)
|
||||
#else
|
||||
#define BigEndian(out, in) \
|
||||
out = (((((in) >> 24) & 0xFF) << 0) + ((((in) >> 16) & 0xFF) << 8) + \
|
||||
((((in) >> 8) & 0xFF) << 16) + ((((in) >> 0) & 0xFF) << 24));
|
||||
#define BigEndian(out, in) out = __builtin_bswap32(in) // or we could use the asm function bswap...
|
||||
// No need to reimplement something already in the compiler.
|
||||
//#define BigEndian(out, in) \
|
||||
// out = (((((in) >> 24) & 0xFF) << 0) + ((((in) >> 16) & 0xFF) << 8) + \
|
||||
// ((((in) >> 8) & 0xFF) << 16) + ((((in) >> 0) & 0xFF) << 24));
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -100,12 +100,6 @@ extern PatchTextTable cpuCore[];
|
|||
extern IniPatch patch[ MAX_PATCH ];
|
||||
extern int patchnumber;
|
||||
|
||||
#ifdef __LINUX__
|
||||
// Nasty, currently neccessary hack
|
||||
extern u32 LinuxsseMXCSR;
|
||||
extern u32 LinuxsseVUMXCSR;
|
||||
#endif
|
||||
|
||||
void applypatch( int place );
|
||||
void inifile_read( const char * name );
|
||||
void inifile_command( char * cmd );
|
||||
|
|
|
@ -25,10 +25,10 @@
|
|||
|
||||
#include "VifDma.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
//#ifdef _MSC_VER
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
using namespace std; // for min / max
|
||||
|
||||
|
@ -330,9 +330,9 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
u32 memsize = VIFdmanum ? 0x4000 : 0x1000;
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
//#ifdef _MSC_VER
|
||||
_mm_prefetch((char*)data, _MM_HINT_NTA);
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
if (VIFdmanum == 0)
|
||||
{
|
||||
|
@ -381,9 +381,9 @@ static void VIFunpack(u32 *data, vifCode *v, int size, const unsigned int VIFdma
|
|||
VIFUNPACK_LOG("*PCSX2*: Unpack %x with size 0!! v->size = %d cl = %d, wl = %d, mode %d mask %x\n", v->cmd, v->size, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mode, vifRegs->mask);
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
//#ifdef _MSC_VER
|
||||
_mm_prefetch((char*)data+128, _MM_HINT_NTA);
|
||||
#endif
|
||||
//#endif
|
||||
_vifRegs = (VIFregisters*)vifRegs;
|
||||
_vifMaskRegs = VIFdmanum ? g_vif1Masks : g_vif0Masks;
|
||||
_vif = vif;
|
||||
|
|
|
@ -36,11 +36,11 @@ DEBUG_FLAGS=" -O0 -g "
|
|||
fi
|
||||
|
||||
WARNING_FLAGS="-Wall -Wno-format -Wno-unused-value"
|
||||
NORMAL_FLAGS=" -pipe -msse -O3 "
|
||||
NORMAL_FLAGS=" -pipe -msse -msse2 -O2 "
|
||||
# These optimizations seem to cause issues with GCC 4.3.3, so we'll turn them off.
|
||||
NORMAL_FLAGS+=" -fno-guess-branch-probability -fno-dse -fno-tree-dse "
|
||||
|
||||
DEBUG_FLAGS+=" -g -msse ${WARNING_FLAGS} "
|
||||
DEBUG_FLAGS+=" -g -msse -msse2 ${WARNING_FLAGS} "
|
||||
|
||||
dnl Check for debug build
|
||||
AC_MSG_CHECKING(debug build)
|
||||
|
|
|
@ -19,9 +19,9 @@
|
|||
|
||||
#define TINY_BLOCK_COPY 64
|
||||
#define IN_CACHE_COPY 2 * 1024
|
||||
#define UNCACHED_COPY 4 * 1024 /
|
||||
#define UNCACHED_COPY 4 * 1024
|
||||
#define BLOCK_PREFETCH_COPY infinity // no limit for movq/movntq w/block prefetch
|
||||
#define CACHEBLOCK 80h
|
||||
#define CACHEBLOCK 80
|
||||
|
||||
// Fast assembly routines for x86-64
|
||||
// zerofrog(@gmail.com)
|
||||
|
@ -40,7 +40,7 @@
|
|||
#define MEMCMP_SRC2 esi
|
||||
#define MEMCMP_SIZE ecx
|
||||
|
||||
.globl memcmp_mmx
|
||||
.global memcmp_mmx
|
||||
memcmp_mmx:
|
||||
// make sure mmx regs are stored
|
||||
// FreezeMMXRegs(1);
|
||||
|
@ -225,7 +225,7 @@ memcmp_End:
|
|||
#define MEMXOR_SRC2 esi
|
||||
#define MEMXOR_SIZE ecx
|
||||
|
||||
.globl memxor_mmx
|
||||
.global memxor_mmx
|
||||
memxor_mmx:
|
||||
// make sure mmx regs are stored
|
||||
// FreezeMMXRegs(1);
|
||||
|
@ -338,7 +338,7 @@ memxor_End:
|
|||
ret
|
||||
|
||||
// void __fastcall memcpy_amd_(void *dest, const void *src, size_t n)
|
||||
.globl memcpy_amd_
|
||||
.global memcpy_amd_
|
||||
memcpy_amd_:
|
||||
push edi
|
||||
push esi
|
||||
|
@ -356,10 +356,11 @@ memcpy_amd_:
|
|||
jbe $memcpy_do_align // it appears to be slower
|
||||
cmp eax, 64*1024
|
||||
jbe $memcpy_align_done
|
||||
|
||||
$memcpy_do_align:
|
||||
mov eax, 8 // a trick that's faster than rep movsb...
|
||||
sub eax, edi // align destination to qword
|
||||
andb eax, 111 // get the low bits
|
||||
and eax, 0b111 // get the low bits
|
||||
sub ecx, eax // update copy count
|
||||
neg eax // set up to jump into the array
|
||||
add eax, offset $memcpy_align_done
|
||||
|
@ -427,7 +428,7 @@ $memcpy_ic_2:
|
|||
mov eax, ecx // has valid low 6 bits of the byte count
|
||||
$memcpy_ic_3:
|
||||
shr eax, 2 // dword count
|
||||
andb eax, 1111 // only look at the "remainder" bits
|
||||
and eax, 0b1111 // only look at the "remainder" bits
|
||||
neg eax // set up to jump into the array
|
||||
add eax, offset $memcpy_last_few
|
||||
jmp eax // jump to array of movsd's
|
||||
|
@ -512,7 +513,7 @@ $memcpy_uc_1: // 64-byte blocks, uncached copy
|
|||
|
||||
$memcpy_last_few: // dword aligned from before movsd's
|
||||
mov eax, ecx // has valid low 2 bits of the byte count
|
||||
andb eax, 11 // the last few cows must come home
|
||||
and eax, 0b11 // the last few cows must come home
|
||||
jz $memcpy_final // no more, let's leave
|
||||
rep movsb // the last 1, 2, or 3 bytes
|
||||
|
||||
|
|
|
@ -22,6 +22,9 @@
|
|||
#include "Vif.h"
|
||||
#include "VUmicro.h"
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
// sse2 highly optimized vif (~200 separate functions are built) zerofrog(@gmail.com)
|
||||
extern u32 g_vif1Masks[48], g_vif0Masks[48];
|
||||
extern u32 g_vif1HasMask3[4], g_vif0HasMask3[4];
|
||||
|
@ -55,10 +58,7 @@ extern u8 s_maskwrite[256];
|
|||
|
||||
extern "C" PCSX2_ALIGNED16(u32 s_TempDecompress[4]) = {0};
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
//#if defined(_MSC_VER)
|
||||
|
||||
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
||||
{
|
||||
|
@ -95,7 +95,7 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
|||
}
|
||||
|
||||
|
||||
#else // gcc
|
||||
/*#else // gcc
|
||||
// Is this really supposed to be assembly for gcc and C for Windows?
|
||||
void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
||||
{
|
||||
|
@ -135,4 +135,4 @@ void __fastcall SetNewMask(u32* vif1masks, u32* hasmask, u32 mask, u32 oldmask)
|
|||
FreezeXMMRegs(0);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif*/
|
||||
|
|
Loading…
Reference in New Issue