mirror of https://github.com/PCSX2/pcsx2.git
microVU:
- Added microVU_Clamp.inl to the project file which holds all of mVU's clamping routines. - Added tmmk's optimized clamping method for sse4 (preserve sign clamp mode only) <-- untested - Not using regalloc for preserved-sign non-sse4 code anymore since it seems to be bugged (thanks to nneeve for pointing it out) - Extra mode was using preserve-sign code before; but changed it now to never preserve nan sign... - Fixed a bug in extra clamp modes where it was clamping all 4 vectors on SS SSE instructions, destroying upper 3 vectors... After these changes the compatibility of extra / preserve sign clamp modes have gone up. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2315 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
e01cfedd2b
commit
316e7ffccc
|
@ -651,6 +651,10 @@
|
|||
RelativePath="..\..\x86\microVU_Branch.inl"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\microVU_Clamp.inl"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\microVU_Compile.inl"
|
||||
>
|
||||
|
|
|
@ -144,6 +144,7 @@ struct microVU {
|
|||
__aligned16 u32 macFlag[4]; // 4 instances of mac flag (used in execution)
|
||||
__aligned16 u32 clipFlag[4]; // 4 instances of clip flag (used in execution)
|
||||
__aligned16 u32 xmmPQb[4]; // Backup for xmmPQ
|
||||
__aligned16 u32 xmmCTemp[4]; // Backup used in mVUclamp2()
|
||||
|
||||
u32 index; // VU Index (VU0 or VU1)
|
||||
u32 cop2; // VU is in COP2 mode? (No/Yes)
|
||||
|
@ -208,6 +209,7 @@ typedef void (__fastcall *mVUrecCall)(u32, u32);
|
|||
|
||||
|
||||
// Include all the *.inl files (Needed because C++ sucks with templates and *.cpp files)
|
||||
#include "microVU_Clamp.inl"
|
||||
#include "microVU_Misc.inl"
|
||||
#include "microVU_Log.inl"
|
||||
#include "microVU_Analyze.inl"
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Clamp Functions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
const __aligned16 u32 sse4_minvals[2][4] = {
|
||||
{ 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000
|
||||
{ 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111
|
||||
};
|
||||
const __aligned16 u32 sse4_maxvals[2][4] = {
|
||||
{ 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000
|
||||
{ 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111
|
||||
};
|
||||
|
||||
// Used for Result Clamping
|
||||
// Note: This function will not preserve NaN values' sign.
|
||||
// The theory behind this is that when we compute a result, and we've
|
||||
// gotten a NaN value, then something went wrong; and the NaN's sign
|
||||
// is not to be trusted. Games like positive values better usually,
|
||||
// and its faster... so just always make NaNs into positive infinity.
|
||||
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
break;
|
||||
default:
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Used for Operand Clamping
|
||||
// Note 1: If 'preserve sign' mode is on, it will preserve the sign of NaN values.
|
||||
// Note 2: Using regalloc here seems to contaminate some regs in certain games.
|
||||
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
|
||||
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
|
||||
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 0xf;
|
||||
SSE4_PMINSD_M128_to_XMM(reg, (uptr)&sse4_maxvals[i][0]);
|
||||
SSE4_PMINUD_M128_to_XMM(reg, (uptr)&sse4_minvals[i][0]);
|
||||
return;
|
||||
}
|
||||
int regT1b = 0;
|
||||
if (regT1 < 0) {
|
||||
regT1b = 1; regT1=(reg+1)%8;
|
||||
SSE_MOVAPS_XMM_to_M128((uptr)mVU->xmmCTemp, regT1);
|
||||
//regT1 = mVU->regAlloc->allocReg();
|
||||
}
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MOVSS_XMM_to_XMM (regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
|
||||
SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals);
|
||||
SSE_ORPS_XMM_to_XMM (reg, regT1);
|
||||
break;
|
||||
default:
|
||||
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
SSE_ORPS_XMM_to_XMM (reg, regT1);
|
||||
break;
|
||||
}
|
||||
//if (regT1b) mVU->regAlloc->clearNeeded(regT1);
|
||||
if (regT1b) SSE_MOVAPS_M128_to_XMM(regT1, (uptr)mVU->xmmCTemp);
|
||||
}
|
||||
else mVUclamp1(reg, regT1, xyzw, bClampE);
|
||||
}
|
||||
|
||||
// Used for operand clamping on every SSE instruction (add/sub/mul/div)
|
||||
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
|
||||
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
|
||||
}
|
||||
|
||||
// Used for result clamping on every SSE instruction (add/sub/mul/div)
|
||||
// Note: Disabled in "preserve sign" mode because in certain cases it
|
||||
// makes too much code-gen, and you get jump8-overflows in certain
|
||||
// emulated opcodes (causing crashes). Since we're clamping the operands
|
||||
// with mVUclamp3, we should almost never be getting a NaN result,
|
||||
// but this clamp is just a precaution just-in-case.
|
||||
void mVUclamp4(int reg, int regT1, int xyzw) {
|
||||
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1);
|
||||
}
|
|
@ -15,60 +15,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Clamp Functions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
// Used for Result Clamping
|
||||
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
break;
|
||||
default:
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Used for Operand Clamping
|
||||
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
|
||||
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE)) {
|
||||
int regT1b = 0;
|
||||
if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; }
|
||||
switch (xyzw) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
SSE_MOVSS_XMM_to_XMM (regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
|
||||
SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals);
|
||||
SSE_ORPS_XMM_to_XMM (reg, regT1);
|
||||
break;
|
||||
default:
|
||||
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
|
||||
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
|
||||
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
|
||||
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
|
||||
SSE_ORPS_XMM_to_XMM (reg, regT1);
|
||||
break;
|
||||
}
|
||||
if (regT1b) mVU->regAlloc->clearNeeded(regT1);
|
||||
}
|
||||
else mVUclamp1(reg, regT1, xyzw, bClampE);
|
||||
}
|
||||
|
||||
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
|
||||
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
|
||||
}
|
||||
|
||||
void mVUclamp4(int reg, int regT1, int xyzw) {
|
||||
if (clampE) mVUclamp1(reg, regT1, xyzw, 1);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
|
||||
//------------------------------------------------------------------
|
||||
|
@ -455,11 +401,11 @@ void ADD_SS(microVU* mVU, int to, int from, int t1, int t2) {
|
|||
if (t2b) mVU->regAlloc->clearNeeded(t2);
|
||||
}
|
||||
|
||||
#define clampOp(opX) { \
|
||||
mVUclamp3(mVU, to, t1, 0xf); \
|
||||
mVUclamp3(mVU, from, t1, 0xf); \
|
||||
opX(to, from); \
|
||||
mVUclamp4(to, t1, 0xf); \
|
||||
#define clampOp(opX, isPS) { \
|
||||
mVUclamp3(mVU, to, t1, (isPS)?0xf:0x8); \
|
||||
mVUclamp3(mVU, from, t1, (isPS)?0xf:0x8); \
|
||||
opX(to, from); \
|
||||
mVUclamp4(to, t1, (isPS)?0xf:0x8); \
|
||||
}
|
||||
|
||||
void SSE_MAXPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
|
@ -479,36 +425,36 @@ void SSE_MINSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
|||
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
|
||||
}
|
||||
void SSE_ADD2SS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
if (!CHECK_VUADDSUBHACK) { clampOp(SSE_ADDSS_XMM_to_XMM); }
|
||||
if (!CHECK_VUADDSUBHACK) { clampOp(SSE_ADDSS_XMM_to_XMM, 0); }
|
||||
else { ADD_SS(mVU, to, from, t1, t2); }
|
||||
}
|
||||
|
||||
void SSE_ADD2PS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_ADDPS_XMM_to_XMM);
|
||||
clampOp(SSE_ADDPS_XMM_to_XMM, 1);
|
||||
}
|
||||
void SSE_ADDPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_ADDPS_XMM_to_XMM);
|
||||
clampOp(SSE_ADDPS_XMM_to_XMM, 1);
|
||||
}
|
||||
void SSE_ADDSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_ADDSS_XMM_to_XMM);
|
||||
clampOp(SSE_ADDSS_XMM_to_XMM, 0);
|
||||
}
|
||||
void SSE_SUBPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_SUBPS_XMM_to_XMM);
|
||||
clampOp(SSE_SUBPS_XMM_to_XMM, 1);
|
||||
}
|
||||
void SSE_SUBSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_SUBSS_XMM_to_XMM);
|
||||
clampOp(SSE_SUBSS_XMM_to_XMM, 0);
|
||||
}
|
||||
void SSE_MULPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_MULPS_XMM_to_XMM);
|
||||
clampOp(SSE_MULPS_XMM_to_XMM, 1);
|
||||
}
|
||||
void SSE_MULSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_MULSS_XMM_to_XMM);
|
||||
clampOp(SSE_MULSS_XMM_to_XMM, 0);
|
||||
}
|
||||
void SSE_DIVPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_DIVPS_XMM_to_XMM);
|
||||
clampOp(SSE_DIVPS_XMM_to_XMM, 1);
|
||||
}
|
||||
void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
|
||||
clampOp(SSE_DIVSS_XMM_to_XMM);
|
||||
clampOp(SSE_DIVSS_XMM_to_XMM, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue