mirror of https://github.com/PCSX2/pcsx2.git
Many small bugfixes and optimizations:
* Fixed Memcard init so that Memcard1 isn't default in both slots (oops!) * Fixed Memcard path logic so that cards outside your pcsx2 folder can be browsed/selected. * Fixed CDVD-to-BIOS time sync (I simply forgot a function call!) * Optimized yuvrgb_sse2, by using Mod/RM form instructions. * Win32: Same optimization applied to FreezeXMMRegs and FreezeMMXRegs (linux already had this optimization) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@719 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
25df8958b2
commit
71e2dc2fb4
|
@ -31,13 +31,7 @@
|
|||
#define ArraySize(x) (sizeof(x)/sizeof((x)[0]))
|
||||
#endif
|
||||
|
||||
#ifdef __LINUX__
|
||||
#define CALLBACK
|
||||
#else
|
||||
#define CALLBACK __stdcall
|
||||
#endif
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// jASSUME - give hints to the optimizer
|
||||
// This is primarily useful for the default case switch optimizer, which enables VC to
|
||||
// generate more compact switches.
|
||||
|
@ -68,8 +62,9 @@ default: \
|
|||
break; \
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Basic Atomic Types
|
||||
|
||||
// Basic types
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
typedef __int8 s8;
|
||||
|
@ -92,6 +87,7 @@ typedef unsigned int uint;
|
|||
#define PCSX2_ALIGNED16_EXTERN(x) extern __declspec(align(16)) x
|
||||
|
||||
#define __naked __declspec(naked)
|
||||
#define CALLBACK __stdcall
|
||||
|
||||
#else // _MSC_VER
|
||||
|
||||
|
@ -140,6 +136,7 @@ typedef union _LARGE_INTEGER
|
|||
#define _inline __inline__ __attribute__((unused))
|
||||
#define __forceinline __attribute__((always_inline,unused))
|
||||
#define __naked // GCC lacks the naked specifier
|
||||
#define CALLBACK // CALLBACK is win32-specific mess
|
||||
|
||||
#endif // __LINUX__
|
||||
|
||||
|
@ -164,6 +161,7 @@ typedef s32 sptr;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// A rough-and-ready cross platform 128-bit datatype, Non-SSE style.
|
||||
#ifdef __cplusplus
|
||||
struct u128
|
||||
|
|
|
@ -692,7 +692,7 @@ __forceinline void cdvdGetDiskType()
|
|||
// gets value for start lsn of layer1
|
||||
// returns: 1 if on dual layer disc
|
||||
// 0 if not on dual layer disc
|
||||
s32 cdvdReadDvdDualInfo(s32* dualType, u32* layer1Start)
|
||||
static s32 cdvdReadDvdDualInfo(s32* dualType, u32* layer1Start)
|
||||
{
|
||||
u8 toc[2064];
|
||||
*dualType = 0;
|
||||
|
@ -754,6 +754,8 @@ void cdvdReset()
|
|||
cdvd.RTC.day = 25;
|
||||
cdvd.RTC.month = 5;
|
||||
cdvd.RTC.year = 7; //2007
|
||||
|
||||
cdvdSetSystemTime( cdvd );
|
||||
}
|
||||
|
||||
struct Freeze_v10Compat
|
||||
|
|
|
@ -21,35 +21,74 @@
|
|||
|
||||
#include "PrecompiledHeader.h"
|
||||
|
||||
#include "System.h"
|
||||
#include "Misc.h"
|
||||
#include "IPU.h"
|
||||
#include "yuv2rgb.h"
|
||||
|
||||
// Everything below is bit accurate to the IPU specification (except maybe rounding).
|
||||
// Know the specification before you touch it.
|
||||
PCSX2_ALIGNED16(u16 C_bias[8]) = {0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000};
|
||||
PCSX2_ALIGNED16(u8 Y_bias[16]) = {16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16};
|
||||
#define SSE_COEFFICIENTS(name, x) \
|
||||
PCSX2_ALIGNED16(u16 name[8]) = {x<<2,x<<2,x<<2,x<<2,x<<2,x<<2,x<<2,x<<2};
|
||||
SSE_COEFFICIENTS(Y_coefficients, 0x95); // 1.1640625
|
||||
SSE_COEFFICIENTS(RCr_coefficients, 0xcc); // 1.59375
|
||||
SSE_COEFFICIENTS(GCr_coefficients, (-0x68)); // -0.8125
|
||||
SSE_COEFFICIENTS(GCb_coefficients, (-0x32)); // -0.390625
|
||||
SSE_COEFFICIENTS(BCb_coefficients, 0x102); // 2.015625
|
||||
PCSX2_ALIGNED16(u16 Y_mask[8]) = {0xff00,0xff00,0xff00,0xff00,0xff00,0xff00,0xff00,0xff00};
|
||||
// Specifying round off instead of round down as everywhere else
|
||||
// implies that this is right
|
||||
PCSX2_ALIGNED16(u16 round_1bit[8]) = {1,1,1,1,1,1,1,1};
|
||||
PCSX2_ALIGNED16(u16 yuv2rgb_temp[3][8]);
|
||||
#define SSE_COEFFICIENTS(x) \
|
||||
{(x)<<2,(x)<<2,(x)<<2,(x)<<2,(x)<<2,(x)<<2,(x)<<2,(x)<<2}
|
||||
|
||||
struct SSE2_Tables
|
||||
{
|
||||
u16 C_bias[8]; // offset -64
|
||||
u8 Y_bias[16]; // offset -48
|
||||
u16 Y_mask[8]; // offset -32
|
||||
u16 round_1bit[8]; // offset -16
|
||||
|
||||
u16 Y_coefficients[8]; // offset 0
|
||||
u16 GCr_coefficients[8];// offset 16
|
||||
u16 GCb_coefficients[8];// offset 32
|
||||
u16 RCr_coefficients[8];// offset 48
|
||||
u16 BCb_coefficients[8];// offset 64
|
||||
};
|
||||
|
||||
#define C_BIAS (-64)
|
||||
#define Y_BIAS (-48)
|
||||
#define Y_MASK (-32)
|
||||
#define ROUND_1BIT (-16)
|
||||
|
||||
#define Y_COEFF 0
|
||||
#define GCr_COEFF 16
|
||||
#define GCb_COEFF 32
|
||||
#define RCr_COEFF 48
|
||||
#define BCb_COEFF 64
|
||||
|
||||
static PCSX2_ALIGNED16(const SSE2_Tables sse2_tables) =
|
||||
{
|
||||
{0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000}, // c_bias
|
||||
{16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16}, // y_bias
|
||||
{0xff00,0xff00,0xff00,0xff00,0xff00,0xff00,0xff00,0xff00}, // y_mask
|
||||
|
||||
// Specifying round off instead of round down as everywhere else
|
||||
// implies that this is right
|
||||
{1,1,1,1,1,1,1,1}, // round_1bit
|
||||
|
||||
SSE_COEFFICIENTS(0x95), // 1.1640625 [Y_coefficients]
|
||||
SSE_COEFFICIENTS(-0x68), // -0.8125 [GCr_coefficients]
|
||||
SSE_COEFFICIENTS(-0x32), // -0.390625 [GCb_coefficients]
|
||||
SSE_COEFFICIENTS(0xcc), // 1.59375 [RCr_coefficients]
|
||||
SSE_COEFFICIENTS(0x102), // 2.015625 [BCb_coefficients]
|
||||
};
|
||||
|
||||
static PCSX2_ALIGNED16(u16 yuv2rgb_temp[3][8]);
|
||||
|
||||
// This could potentially be improved for SSE4
|
||||
void yuv2rgb_sse2(void)
|
||||
__releaseinline void yuv2rgb_sse2(void)
|
||||
{
|
||||
FreezeXMMRegs(1);
|
||||
|
||||
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
|
||||
__asm {
|
||||
mov eax, 1
|
||||
mov esi, 0
|
||||
mov edi, 0
|
||||
xor esi, esi
|
||||
xor edi, edi
|
||||
|
||||
// Use ecx and edx as base pointers, to allow for Mod/RM form on memOps.
|
||||
// This saves 2-3 bytes per instruction where these are used. :)
|
||||
mov ecx, offset yuv2rgb_temp
|
||||
mov edx, offset sse2_tables+64;
|
||||
|
||||
align 16
|
||||
tworows:
|
||||
|
@ -65,29 +104,29 @@ tworows:
|
|||
// unfortunately I don't think this will matter despite being
|
||||
// technically potentially a little faster, but this is
|
||||
// equivalent to an add or sub
|
||||
pxor xmm2, xmmword ptr [C_bias] // xmm2 <-- 8 x (Cb - 128) << 8
|
||||
pxor xmm0, xmmword ptr [C_bias] // xmm0 <-- 8 x (Cr - 128) << 8
|
||||
pxor xmm2, xmmword ptr [edx+C_BIAS] // xmm2 <-- 8 x (Cb - 128) << 8
|
||||
pxor xmm0, xmmword ptr [edx+C_BIAS] // xmm0 <-- 8 x (Cr - 128) << 8
|
||||
|
||||
movaps xmm1, xmm0
|
||||
movaps xmm3, xmm2
|
||||
pmulhw xmm1, xmmword ptr [GCr_coefficients]
|
||||
pmulhw xmm3, xmmword ptr [GCb_coefficients]
|
||||
pmulhw xmm0, xmmword ptr [RCr_coefficients]
|
||||
pmulhw xmm2, xmmword ptr [BCb_coefficients]
|
||||
pmulhw xmm1, xmmword ptr [edx+GCr_COEFF]
|
||||
pmulhw xmm3, xmmword ptr [edx+GCb_COEFF]
|
||||
pmulhw xmm0, xmmword ptr [edx+RCr_COEFF]
|
||||
pmulhw xmm2, xmmword ptr [edx+BCb_COEFF]
|
||||
paddsw xmm1, xmm3
|
||||
// store for the next line; looking at the code above
|
||||
// compared to the code below, I have to wonder whether
|
||||
// this was worth the hassle
|
||||
movaps xmmword ptr [yuv2rgb_temp], xmm0
|
||||
movaps xmmword ptr [yuv2rgb_temp+16], xmm1
|
||||
movaps xmmword ptr [yuv2rgb_temp+32], xmm2
|
||||
movaps xmmword ptr [ecx], xmm0
|
||||
movaps xmmword ptr [ecx+16], xmm1
|
||||
movaps xmmword ptr [ecx+32], xmm2
|
||||
jmp ihatemsvc
|
||||
|
||||
align 16
|
||||
onerow:
|
||||
movaps xmm0, xmmword ptr [yuv2rgb_temp]
|
||||
movaps xmm1, xmmword ptr [yuv2rgb_temp+16]
|
||||
movaps xmm2, xmmword ptr [yuv2rgb_temp+32]
|
||||
movaps xmm0, xmmword ptr [ecx]
|
||||
movaps xmm1, xmmword ptr [ecx+16]
|
||||
movaps xmm2, xmmword ptr [ecx+32]
|
||||
|
||||
// If masm directives worked properly in inline asm, I'd be using them,
|
||||
// but I'm not inclined to write ~70 line #defines to simulate them.
|
||||
|
@ -100,13 +139,13 @@ ihatemsvc:
|
|||
movaps xmm5, xmm2
|
||||
|
||||
movaps xmm6, xmmword ptr [mb8+edi]
|
||||
psubusb xmm6, xmmword ptr [Y_bias]
|
||||
psubusb xmm6, xmmword ptr [edx+Y_BIAS]
|
||||
movaps xmm7, xmm6
|
||||
psllw xmm6, 8 // xmm6 <- Y << 8 for pixels 0,2,4,6,8,10,12,14
|
||||
pand xmm7, xmmword ptr [Y_mask] // xmm7 <- Y << 8 for pixels 1,3,5,7,9,11,13,15
|
||||
pand xmm7, xmmword ptr [edx+Y_MASK] // xmm7 <- Y << 8 for pixels 1,3,5,7,9,11,13,15
|
||||
|
||||
pmulhuw xmm6, xmmword ptr [Y_coefficients]
|
||||
pmulhuw xmm7, xmmword ptr [Y_coefficients]
|
||||
pmulhuw xmm6, xmmword ptr [edx+Y_COEFF]
|
||||
pmulhuw xmm7, xmmword ptr [edx+Y_COEFF]
|
||||
|
||||
paddsw xmm0, xmm6
|
||||
paddsw xmm3, xmm7
|
||||
|
@ -116,7 +155,7 @@ ihatemsvc:
|
|||
paddsw xmm5, xmm7
|
||||
|
||||
// round
|
||||
movaps xmm6, xmmword ptr [round_1bit]
|
||||
movaps xmm6, xmmword ptr [edx+ROUND_1BIT]
|
||||
paddw xmm0, xmm6
|
||||
paddw xmm1, xmm6
|
||||
paddw xmm2, xmm6
|
||||
|
@ -176,8 +215,13 @@ ihatemsvc:
|
|||
asm(
|
||||
".intel_syntax noprefix\n"
|
||||
"mov eax, 1\n"
|
||||
"mov esi, 0\n"
|
||||
"mov edi, 0\n"
|
||||
"xor esi, esi\n"
|
||||
"xor edi, edi\n"
|
||||
|
||||
// Use ecx and edx as base pointers, to allow for Mod/RM form on memOps.
|
||||
// This saves 2-3 bytes per instruction where these are used. :)
|
||||
"mov ecx, offset yuv2rgb_temp\n"
|
||||
"mov edx, offset sse2_tables+64\n"
|
||||
|
||||
".align 16\n"
|
||||
"tworows:\n"
|
||||
|
@ -193,29 +237,29 @@ ihatemsvc:
|
|||
// unfortunately I don't think this will matter despite being
|
||||
// technically potentially a little faster, but this is
|
||||
// equivalent to an add or sub
|
||||
"pxor xmm2, xmmword ptr [C_bias]\n" // xmm2 <-- 8 x (Cb - 128) << 8
|
||||
"pxor xmm0, xmmword ptr [C_bias]\n" // xmm0 <-- 8 x (Cr - 128) << 8
|
||||
"pxor xmm2, xmmword ptr [edx+C_BIAS]\n" // xmm2 <-- 8 x (Cb - 128) << 8
|
||||
"pxor xmm0, xmmword ptr [edx+C_BIAS]\n" // xmm0 <-- 8 x (Cr - 128) << 8
|
||||
|
||||
"movaps xmm1, xmm0\n"
|
||||
"movaps xmm3, xmm2\n"
|
||||
"pmulhw xmm1, xmmword ptr [GCr_coefficients]\n"
|
||||
"pmulhw xmm3, xmmword ptr [GCb_coefficients]\n"
|
||||
"pmulhw xmm0, xmmword ptr [RCr_coefficients]\n"
|
||||
"pmulhw xmm2, xmmword ptr [BCb_coefficients]\n"
|
||||
"pmulhw xmm1, xmmword ptr [edx+GCr_COEFF]\n"
|
||||
"pmulhw xmm3, xmmword ptr [edx+GCb_COEFF]\n"
|
||||
"pmulhw xmm0, xmmword ptr [edx+RCr_COEFF]\n"
|
||||
"pmulhw xmm2, xmmword ptr [edx+BCb_COEFF]\n"
|
||||
"paddsw xmm1, xmm3\n"
|
||||
// store for the next line; looking at the code above
|
||||
// compared to the code below, I have to wonder whether
|
||||
// this was worth the hassle
|
||||
"movaps xmmword ptr [yuv2rgb_temp], xmm0\n"
|
||||
"movaps xmmword ptr [yuv2rgb_temp+16], xmm1\n"
|
||||
"movaps xmmword ptr [yuv2rgb_temp+32], xmm2\n"
|
||||
"movaps xmmword ptr [ecx], xmm0\n"
|
||||
"movaps xmmword ptr [ecx+16], xmm1\n"
|
||||
"movaps xmmword ptr [ecx+32], xmm2\n"
|
||||
"jmp ihategcctoo\n"
|
||||
|
||||
".align 16\n"
|
||||
"onerow:\n"
|
||||
"movaps xmm0, xmmword ptr [yuv2rgb_temp]\n"
|
||||
"movaps xmm1, xmmword ptr [yuv2rgb_temp+16]\n"
|
||||
"movaps xmm2, xmmword ptr [yuv2rgb_temp+32]\n"
|
||||
"movaps xmm0, xmmword ptr [ecx]\n"
|
||||
"movaps xmm1, xmmword ptr [ecx+16]\n"
|
||||
"movaps xmm2, xmmword ptr [ecx+32]\n"
|
||||
|
||||
"ihategcctoo:\n"
|
||||
"movaps xmm3, xmm0\n"
|
||||
|
@ -223,13 +267,13 @@ ihatemsvc:
|
|||
"movaps xmm5, xmm2\n"
|
||||
|
||||
"movaps xmm6, xmmword ptr [mb8+edi]\n"
|
||||
"psubusb xmm6, xmmword ptr [Y_bias]\n"
|
||||
"psubusb xmm6, xmmword ptr [edx+Y_BIAS]\n"
|
||||
"movaps xmm7, xmm6\n"
|
||||
"psllw xmm6, 8\n" // xmm6 <- Y << 8 for pixels 0,2,4,6,8,10,12,14
|
||||
"pand xmm7, xmmword ptr [Y_mask]\n" // xmm7 <- Y << 8 for pixels 1,3,5,7,9,11,13,15
|
||||
"pand xmm7, xmmword ptr [edx+Y_MASK]\n" // xmm7 <- Y << 8 for pixels 1,3,5,7,9,11,13,15
|
||||
|
||||
"pmulhuw xmm6, xmmword ptr [Y_coefficients]\n"
|
||||
"pmulhuw xmm7, xmmword ptr [Y_coefficients]\n"
|
||||
"pmulhuw xmm6, xmmword ptr [edx+Y_COEFF]\n"
|
||||
"pmulhuw xmm7, xmmword ptr [edx+Y_COEFF]\n"
|
||||
|
||||
"paddsw xmm0, xmm6\n"
|
||||
"paddsw xmm3, xmm7\n"
|
||||
|
@ -239,7 +283,7 @@ ihatemsvc:
|
|||
"paddsw xmm5, xmm7\n"
|
||||
|
||||
// round
|
||||
"movaps xmm6, xmmword ptr [round_1bit]\n"
|
||||
"movaps xmm6, xmmword ptr [edx+ROUND_1BIT]\n"
|
||||
"paddw xmm0, xmm6\n"
|
||||
"paddw xmm1, xmm6\n"
|
||||
"paddw xmm2, xmm6\n"
|
||||
|
@ -299,6 +343,8 @@ ihatemsvc:
|
|||
#else
|
||||
#error Unsupported compiler
|
||||
#endif
|
||||
|
||||
FreezeXMMRegs(0);
|
||||
}
|
||||
|
||||
void yuv2rgb_init(void)
|
||||
|
|
|
@ -18,5 +18,5 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
void yuv2rgb_sse2(void);
|
||||
void yuv2rgb_init(void);
|
||||
extern void yuv2rgb_sse2(void);
|
||||
extern void yuv2rgb_init(void);
|
||||
|
|
|
@ -115,10 +115,24 @@ static __forceinline u32 timeGetTime()
|
|||
# define __unused
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Forceinline macro that is enabled for RELEASE/PUBLIC builds ONLY. (non-inline in devel)
|
||||
// This is useful because forceinline can make certain types of debugging problematic since
|
||||
// functions that look like they should be called won't breakpoint since their code is inlined.
|
||||
// Henceforth, use release_inline for things which we want inlined on public/release builds but
|
||||
// *not* in devel builds.
|
||||
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
# define __releaseinline
|
||||
#else
|
||||
# define __releaseinline __forceinline
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Emitter Instance Identifiers. If you add a new emitter, do it here also.
|
||||
// Note: Currently most of the instances map back to 0, since existing dynarec code all
|
||||
// shares iCore and must therefore all share the same emitter instance.
|
||||
// (note: these don't really belong here per-se, but it's an easy spot to use for now)
|
||||
enum
|
||||
{
|
||||
EmitterId_R5900 = 0,
|
||||
|
|
|
@ -357,7 +357,7 @@ void SysExecute()
|
|||
catch( R5900Exception::BaseExcept& ex )
|
||||
{
|
||||
Console::Error( ex.cMessage() );
|
||||
Console::Error( fmt_string( "(EE) PC: 0x%8.8x \tCycle:0x8.8x", ex.cpuState.pc, ex.cpuState.cycle ).c_str() );
|
||||
Console::Error( fmt_string( "(EE) PC: 0x%8.8x \tCycle: 0x%8.8x", ex.cpuState.pc, ex.cpuState.cycle ).c_str() );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -57,16 +57,7 @@ __forceinline static int _limit( int a, int max )
|
|||
_vifRegs->offset++; \
|
||||
}
|
||||
|
||||
// Forceinline macro that is enabled for RELEASE/PUBLIC builds ONLY.
|
||||
// This is useful because forceinline can make certain types of debugging problematic since
|
||||
// functions that look like they should be called won't breakpoint since their code is inlined.
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
# define __pub_inline
|
||||
#else
|
||||
# define __pub_inline __forceinline
|
||||
#endif
|
||||
|
||||
static __pub_inline void writeX( u32 *dest, u32 data ) {
|
||||
static __releaseinline void writeX( u32 *dest, u32 data ) {
|
||||
if (_vifRegs->code & 0x10000000) {
|
||||
switch ( _vif->cl ) {
|
||||
case 0: n = (_vifRegs->mask) & 0x3; break;
|
||||
|
@ -105,7 +96,7 @@ static __pub_inline void writeX( u32 *dest, u32 data ) {
|
|||
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x\n", *dest,_vifRegs->mode,_vifRegs->r0,data);
|
||||
}
|
||||
|
||||
static __pub_inline void writeY( u32 *dest, u32 data ) {
|
||||
static __releaseinline void writeY( u32 *dest, u32 data ) {
|
||||
if (_vifRegs->code & 0x10000000) {
|
||||
switch ( _vif->cl ) {
|
||||
case 0: n = (_vifRegs->mask >> 2) & 0x3; break;
|
||||
|
@ -144,7 +135,7 @@ static __pub_inline void writeY( u32 *dest, u32 data ) {
|
|||
// VIF_LOG("writeY %8.8x : Mode %d, r1 = %x, data %8.8x\n", *dest,_vifRegs->mode,_vifRegs->r1,data);
|
||||
}
|
||||
|
||||
static __pub_inline void writeZ( u32 *dest, u32 data ) {
|
||||
static __releaseinline void writeZ( u32 *dest, u32 data ) {
|
||||
if (_vifRegs->code & 0x10000000) {
|
||||
switch ( _vif->cl ) {
|
||||
case 0: n = (_vifRegs->mask >> 4) & 0x3; break;
|
||||
|
@ -183,7 +174,7 @@ static __pub_inline void writeZ( u32 *dest, u32 data ) {
|
|||
// VIF_LOG("writeZ %8.8x : Mode %d, r2 = %x, data %8.8x\n", *dest,_vifRegs->mode,_vifRegs->r2,data);
|
||||
}
|
||||
|
||||
static __pub_inline void writeW( u32 *dest, u32 data ) {
|
||||
static __releaseinline void writeW( u32 *dest, u32 data ) {
|
||||
if (_vifRegs->code & 0x10000000) {
|
||||
switch ( _vif->cl ) {
|
||||
case 0: n = (_vifRegs->mask >> 6) & 0x3; break;
|
||||
|
|
|
@ -54,6 +54,8 @@ void DlgItem_GetText( HWND hwnd, int dlgId, string& dest )
|
|||
}
|
||||
}
|
||||
|
||||
// strips path information so that absolute paths are reduced to relative paths
|
||||
// where appropriate.
|
||||
static const char* _stripPathInfo( const char* src )
|
||||
{
|
||||
const char* retval = src;
|
||||
|
@ -65,7 +67,9 @@ static const char* _stripPathInfo( const char* src )
|
|||
workingfold++;
|
||||
}
|
||||
|
||||
if( *retval == 0 ) return src;
|
||||
// If a difference is found before we reach the end of our pcsx2 working folder, it
|
||||
// means we need to use the fully absolute path form the user.
|
||||
if( *workingfold != 0 ) return src;
|
||||
|
||||
while( (*retval != 0) && (*retval == '\\') ) retval++;
|
||||
|
||||
|
@ -246,7 +250,7 @@ void IniFile::MemcardSettings( PcsxConfig& conf )
|
|||
Path::Combine( g_WorkingFolder, m_Default_MemcardsDir[0] ) );
|
||||
|
||||
Entry( "Slot2_Path", conf.Mcd[1].Filename,
|
||||
Path::Combine( g_WorkingFolder, m_Default_MemcardsDir[0] ) );
|
||||
Path::Combine( g_WorkingFolder, m_Default_MemcardsDir[1] ) );
|
||||
|
||||
Entry( "Slot1_Enabled", conf.Mcd[0].Enabled, true );
|
||||
Entry( "Slot2_Enabled", conf.Mcd[1].Enabled, true );
|
||||
|
|
|
@ -67,7 +67,7 @@ void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR)
|
|||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// MMX Register Freezing
|
||||
#ifndef __INTEL_COMPILER
|
||||
extern "C"
|
||||
{
|
||||
|
@ -86,28 +86,31 @@ __forceinline void FreezeMMXRegs_(int save)
|
|||
|
||||
#ifdef _MSC_VER
|
||||
__asm {
|
||||
movntq mmword ptr [g_globalMMXData + 0], mm0
|
||||
movntq mmword ptr [g_globalMMXData + 8], mm1
|
||||
movntq mmword ptr [g_globalMMXData + 16], mm2
|
||||
movntq mmword ptr [g_globalMMXData + 24], mm3
|
||||
movntq mmword ptr [g_globalMMXData + 32], mm4
|
||||
movntq mmword ptr [g_globalMMXData + 40], mm5
|
||||
movntq mmword ptr [g_globalMMXData + 48], mm6
|
||||
movntq mmword ptr [g_globalMMXData + 56], mm7
|
||||
mov ecx, offset g_globalMMXData
|
||||
movntq mmword ptr [ecx+0], mm0
|
||||
movntq mmword ptr [ecx+8], mm1
|
||||
movntq mmword ptr [ecx+16], mm2
|
||||
movntq mmword ptr [ecx+24], mm3
|
||||
movntq mmword ptr [ecx+32], mm4
|
||||
movntq mmword ptr [ecx+40], mm5
|
||||
movntq mmword ptr [ecx+48], mm6
|
||||
movntq mmword ptr [ecx+56], mm7
|
||||
emms
|
||||
}
|
||||
#else
|
||||
__asm__(".intel_syntax noprefix\n"
|
||||
"movq [%0+0x00], mm0\n"
|
||||
"movq [%0+0x08], mm1\n"
|
||||
"movq [%0+0x10], mm2\n"
|
||||
"movq [%0+0x18], mm3\n"
|
||||
"movq [%0+0x20], mm4\n"
|
||||
"movq [%0+0x28], mm5\n"
|
||||
"movq [%0+0x30], mm6\n"
|
||||
"movq [%0+0x38], mm7\n"
|
||||
"emms\n"
|
||||
".att_syntax\n" : : "r"(g_globalMMXData) );
|
||||
__asm__(
|
||||
".intel_syntax noprefix\n"
|
||||
"movq [%0+0x00], mm0\n"
|
||||
"movq [%0+0x08], mm1\n"
|
||||
"movq [%0+0x10], mm2\n"
|
||||
"movq [%0+0x18], mm3\n"
|
||||
"movq [%0+0x20], mm4\n"
|
||||
"movq [%0+0x28], mm5\n"
|
||||
"movq [%0+0x30], mm6\n"
|
||||
"movq [%0+0x38], mm7\n"
|
||||
"emms\n"
|
||||
".att_syntax\n" : : "r"(g_globalMMXData)
|
||||
);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -123,40 +126,44 @@ __forceinline void FreezeMMXRegs_(int save)
|
|||
|
||||
#ifdef _MSC_VER
|
||||
__asm {
|
||||
movq mm0, mmword ptr [g_globalMMXData + 0]
|
||||
movq mm1, mmword ptr [g_globalMMXData + 8]
|
||||
movq mm2, mmword ptr [g_globalMMXData + 16]
|
||||
movq mm3, mmword ptr [g_globalMMXData + 24]
|
||||
movq mm4, mmword ptr [g_globalMMXData + 32]
|
||||
movq mm5, mmword ptr [g_globalMMXData + 40]
|
||||
movq mm6, mmword ptr [g_globalMMXData + 48]
|
||||
movq mm7, mmword ptr [g_globalMMXData + 56]
|
||||
mov ecx, offset g_globalMMXData
|
||||
movq mm0, mmword ptr [ecx+0]
|
||||
movq mm1, mmword ptr [ecx+8]
|
||||
movq mm2, mmword ptr [ecx+16]
|
||||
movq mm3, mmword ptr [ecx+24]
|
||||
movq mm4, mmword ptr [ecx+32]
|
||||
movq mm5, mmword ptr [ecx+40]
|
||||
movq mm6, mmword ptr [ecx+48]
|
||||
movq mm7, mmword ptr [ecx+56]
|
||||
emms
|
||||
}
|
||||
#else
|
||||
__asm__(".intel_syntax noprefix\n"
|
||||
"movq mm0, [%0+0x00]\n"
|
||||
"movq mm1, [%0+0x08]\n"
|
||||
"movq mm2, [%0+0x10]\n"
|
||||
"movq mm3, [%0+0x18]\n"
|
||||
"movq mm4, [%0+0x20]\n"
|
||||
"movq mm5, [%0+0x28]\n"
|
||||
"movq mm6, [%0+0x30]\n"
|
||||
"movq mm7, [%0+0x38]\n"
|
||||
"emms\n"
|
||||
".att_syntax\n" : : "r"(g_globalMMXData) );
|
||||
__asm__(
|
||||
".intel_syntax noprefix\n"
|
||||
"movq mm0, [%0+0x00]\n"
|
||||
"movq mm1, [%0+0x08]\n"
|
||||
"movq mm2, [%0+0x10]\n"
|
||||
"movq mm3, [%0+0x18]\n"
|
||||
"movq mm4, [%0+0x20]\n"
|
||||
"movq mm5, [%0+0x28]\n"
|
||||
"movq mm6, [%0+0x30]\n"
|
||||
"movq mm7, [%0+0x38]\n"
|
||||
"emms\n"
|
||||
".att_syntax\n" : : "r"(g_globalMMXData)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
// XMM Register Freezing
|
||||
__forceinline void FreezeXMMRegs_(int save)
|
||||
{
|
||||
//SysPrintf("FreezeXMMRegs_(%d); [%d]\n", save, g_globalXMMSaved);
|
||||
assert( g_EEFreezeRegs );
|
||||
|
||||
if( save ) {
|
||||
if( save )
|
||||
{
|
||||
g_globalXMMSaved++;
|
||||
if( g_globalXMMSaved > 1 ){
|
||||
//SysPrintf("XMM Already saved\n");
|
||||
|
@ -166,31 +173,35 @@ __forceinline void FreezeXMMRegs_(int save)
|
|||
|
||||
#ifdef _MSC_VER
|
||||
__asm {
|
||||
movaps xmmword ptr [g_globalXMMData + 0x00], xmm0
|
||||
movaps xmmword ptr [g_globalXMMData + 0x10], xmm1
|
||||
movaps xmmword ptr [g_globalXMMData + 0x20], xmm2
|
||||
movaps xmmword ptr [g_globalXMMData + 0x30], xmm3
|
||||
movaps xmmword ptr [g_globalXMMData + 0x40], xmm4
|
||||
movaps xmmword ptr [g_globalXMMData + 0x50], xmm5
|
||||
movaps xmmword ptr [g_globalXMMData + 0x60], xmm6
|
||||
movaps xmmword ptr [g_globalXMMData + 0x70], xmm7
|
||||
mov ecx, offset g_globalXMMData
|
||||
movaps xmmword ptr [ecx+0x00], xmm0
|
||||
movaps xmmword ptr [ecx+0x10], xmm1
|
||||
movaps xmmword ptr [ecx+0x20], xmm2
|
||||
movaps xmmword ptr [ecx+0x30], xmm3
|
||||
movaps xmmword ptr [ecx+0x40], xmm4
|
||||
movaps xmmword ptr [ecx+0x50], xmm5
|
||||
movaps xmmword ptr [ecx+0x60], xmm6
|
||||
movaps xmmword ptr [ecx+0x70], xmm7
|
||||
}
|
||||
|
||||
#else
|
||||
__asm__(".intel_syntax noprefix\n"
|
||||
"movaps [%0+0x00], xmm0\n"
|
||||
"movaps [%0+0x10], xmm1\n"
|
||||
"movaps [%0+0x20], xmm2\n"
|
||||
"movaps [%0+0x30], xmm3\n"
|
||||
"movaps [%0+0x40], xmm4\n"
|
||||
"movaps [%0+0x50], xmm5\n"
|
||||
"movaps [%0+0x60], xmm6\n"
|
||||
"movaps [%0+0x70], xmm7\n"
|
||||
".att_syntax\n" : : "r"(g_globalXMMData) );
|
||||
__asm__(
|
||||
".intel_syntax noprefix\n"
|
||||
"movaps [%0+0x00], xmm0\n"
|
||||
"movaps [%0+0x10], xmm1\n"
|
||||
"movaps [%0+0x20], xmm2\n"
|
||||
"movaps [%0+0x30], xmm3\n"
|
||||
"movaps [%0+0x40], xmm4\n"
|
||||
"movaps [%0+0x50], xmm5\n"
|
||||
"movaps [%0+0x60], xmm6\n"
|
||||
"movaps [%0+0x70], xmm7\n"
|
||||
".att_syntax\n" : : "r"(g_globalXMMData) );
|
||||
);
|
||||
|
||||
#endif // _MSC_VER
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
if( g_globalXMMSaved==0 )
|
||||
{
|
||||
//SysPrintf("XMM Regs not saved!\n");
|
||||
|
@ -202,28 +213,32 @@ __forceinline void FreezeXMMRegs_(int save)
|
|||
if( g_globalXMMSaved > 0 ) return;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__asm {
|
||||
movaps xmm0, xmmword ptr [g_globalXMMData + 0x00]
|
||||
movaps xmm1, xmmword ptr [g_globalXMMData + 0x10]
|
||||
movaps xmm2, xmmword ptr [g_globalXMMData + 0x20]
|
||||
movaps xmm3, xmmword ptr [g_globalXMMData + 0x30]
|
||||
movaps xmm4, xmmword ptr [g_globalXMMData + 0x40]
|
||||
movaps xmm5, xmmword ptr [g_globalXMMData + 0x50]
|
||||
movaps xmm6, xmmword ptr [g_globalXMMData + 0x60]
|
||||
movaps xmm7, xmmword ptr [g_globalXMMData + 0x70]
|
||||
__asm
|
||||
{
|
||||
mov ecx, offset g_globalXMMData
|
||||
movaps xmm0, xmmword ptr [ecx+0x00]
|
||||
movaps xmm1, xmmword ptr [ecx+0x10]
|
||||
movaps xmm2, xmmword ptr [ecx+0x20]
|
||||
movaps xmm3, xmmword ptr [ecx+0x30]
|
||||
movaps xmm4, xmmword ptr [ecx+0x40]
|
||||
movaps xmm5, xmmword ptr [ecx+0x50]
|
||||
movaps xmm6, xmmword ptr [ecx+0x60]
|
||||
movaps xmm7, xmmword ptr [ecx+0x70]
|
||||
}
|
||||
|
||||
#else
|
||||
__asm__(".intel_syntax noprefix\n"
|
||||
"movaps xmm0, [%0+0x00]\n"
|
||||
"movaps xmm1, [%0+0x10]\n"
|
||||
"movaps xmm2, [%0+0x20]\n"
|
||||
"movaps xmm3, [%0+0x30]\n"
|
||||
"movaps xmm4, [%0+0x40]\n"
|
||||
"movaps xmm5, [%0+0x50]\n"
|
||||
"movaps xmm6, [%0+0x60]\n"
|
||||
"movaps xmm7, [%0+0x70]\n"
|
||||
".att_syntax\n" : : "r"(g_globalXMMData) );
|
||||
__asm__(
|
||||
".intel_syntax noprefix\n"
|
||||
"movaps xmm0, [%0+0x00]\n"
|
||||
"movaps xmm1, [%0+0x10]\n"
|
||||
"movaps xmm2, [%0+0x20]\n"
|
||||
"movaps xmm3, [%0+0x30]\n"
|
||||
"movaps xmm4, [%0+0x40]\n"
|
||||
"movaps xmm5, [%0+0x50]\n"
|
||||
"movaps xmm6, [%0+0x60]\n"
|
||||
"movaps xmm7, [%0+0x70]\n"
|
||||
".att_syntax\n" : : "r"(g_globalXMMData) );
|
||||
);
|
||||
|
||||
#endif // _MSC_VER
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue