- Added microVU_Clamp.inl to the project file which holds all of mVU's clamping routines.
- Added tmmk's optimized clamping method for sse4 (preserve sign clamp mode only) <-- untested
- Not using regalloc for preserved-sign non-sse4 code anymore since it seems to be bugged (thanks to nneeve for pointing it out)
- Extra mode was using preserve-sign code before; but changed it now to never preserve nan sign...
- Fixed a bug in extra clamp modes where it was clamping all 4 vectors on SS SSE instructions, destroying upper 3 vectors...

After these changes the compatibility of extra / preserve sign clamp modes have gone up.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2315 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-12-06 01:49:40 +00:00
parent e01cfedd2b
commit 316e7ffccc
4 changed files with 127 additions and 69 deletions

View File

@ -651,6 +651,10 @@
RelativePath="..\..\x86\microVU_Branch.inl"
>
</File>
<File
RelativePath="..\..\x86\microVU_Clamp.inl"
>
</File>
<File
RelativePath="..\..\x86\microVU_Compile.inl"
>

View File

@ -144,6 +144,7 @@ struct microVU {
__aligned16 u32 macFlag[4]; // 4 instances of mac flag (used in execution)
__aligned16 u32 clipFlag[4]; // 4 instances of clip flag (used in execution)
__aligned16 u32 xmmPQb[4]; // Backup for xmmPQ
__aligned16 u32 xmmCTemp[4]; // Backup used in mVUclamp2()
u32 index; // VU Index (VU0 or VU1)
u32 cop2; // VU is in COP2 mode? (No/Yes)
@ -208,6 +209,7 @@ typedef void (__fastcall *mVUrecCall)(u32, u32);
// Include all the *.inl files (Needed because C++ sucks with templates and *.cpp files)
#include "microVU_Clamp.inl"
#include "microVU_Misc.inl"
#include "microVU_Log.inl"
#include "microVU_Analyze.inl"

106
pcsx2/x86/microVU_Clamp.inl Normal file
View File

@ -0,0 +1,106 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
//------------------------------------------------------------------
// Micro VU - Clamp Functions
//------------------------------------------------------------------
const __aligned16 u32 sse4_minvals[2][4] = {
{ 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000
{ 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111
};
const __aligned16 u32 sse4_maxvals[2][4] = {
{ 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000
{ 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111
};
// Used for Result Clamping
// Note: This function will not preserve NaN values' sign.
// The theory behind this is that when we compute a result, and we've
// gotten a NaN value, then something went wrong; and the NaN's sign
// is not to be trusted. Games like positive values better usually,
// and its faster... so just always make NaNs into positive infinity.
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
switch (xyzw) {
case 1: case 2: case 4: case 8:
SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals);
break;
default:
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
break;
}
}
}
// Used for Operand Clamping
// Note 1: If 'preserve sign' mode is on, it will preserve the sign of NaN values.
// Note 2: Using regalloc here seems to contaminate some regs in certain games.
// Must be some specific case I've overlooked (or I used regalloc improperly on an opcode)
// so we just use a temporary mem location for our backup for now... (non-sse4 version only)
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE && CHECK_VU_SIGN_OVERFLOW)) {
if (x86caps.hasStreamingSIMD4Extensions) {
int i = (xyzw==1||xyzw==2||xyzw==4||xyzw==8) ? 0: 0xf;
SSE4_PMINSD_M128_to_XMM(reg, (uptr)&sse4_maxvals[i][0]);
SSE4_PMINUD_M128_to_XMM(reg, (uptr)&sse4_minvals[i][0]);
return;
}
int regT1b = 0;
if (regT1 < 0) {
regT1b = 1; regT1=(reg+1)%8;
SSE_MOVAPS_XMM_to_M128((uptr)mVU->xmmCTemp, regT1);
//regT1 = mVU->regAlloc->allocReg();
}
switch (xyzw) {
case 1: case 2: case 4: case 8:
SSE_MOVSS_XMM_to_XMM (regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals);
SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals);
SSE_ORPS_XMM_to_XMM (reg, regT1);
break;
default:
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
SSE_ORPS_XMM_to_XMM (reg, regT1);
break;
}
//if (regT1b) mVU->regAlloc->clearNeeded(regT1);
if (regT1b) SSE_MOVAPS_M128_to_XMM(regT1, (uptr)mVU->xmmCTemp);
}
else mVUclamp1(reg, regT1, xyzw, bClampE);
}
// Used for operand clamping on every SSE instruction (add/sub/mul/div)
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
}
// Used for result clamping on every SSE instruction (add/sub/mul/div)
// Note: Disabled in "preserve sign" mode because in certain cases it
// makes too much code-gen, and you get jump8-overflows in certain
// emulated opcodes (causing crashes). Since we're clamping the operands
// with mVUclamp3, we should almost never be getting a NaN result,
// but this clamp is just a precaution just-in-case.
void mVUclamp4(int reg, int regT1, int xyzw) {
if (clampE && !CHECK_VU_SIGN_OVERFLOW) mVUclamp1(reg, regT1, xyzw, 1);
}

View File

@ -15,60 +15,6 @@
#pragma once
//------------------------------------------------------------------
// Micro VU - Clamp Functions
//------------------------------------------------------------------
// Used for Result Clamping
void mVUclamp1(int reg, int regT1, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_OVERFLOW) || (clampE && bClampE)) {
switch (xyzw) {
case 1: case 2: case 4: case 8:
SSE_MINSS_M32_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXSS_M32_to_XMM(reg, (uptr)mVUglob.minvals);
break;
default:
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
break;
}
}
}
// Used for Operand Clamping
void mVUclamp2(microVU* mVU, int reg, int regT1, int xyzw, bool bClampE = 0) {
if ((!clampE && CHECK_VU_SIGN_OVERFLOW) || (clampE && bClampE)) {
int regT1b = 0;
if (regT1 < 0) { regT1 = mVU->regAlloc->allocReg(); regT1b = 1; }
switch (xyzw) {
case 1: case 2: case 4: case 8:
SSE_MOVSS_XMM_to_XMM (regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
SSE_MINSS_M32_to_XMM (reg, (uptr)mVUglob.maxvals);
SSE_MAXSS_M32_to_XMM (reg, (uptr)mVUglob.minvals);
SSE_ORPS_XMM_to_XMM (reg, regT1);
break;
default:
SSE_MOVAPS_XMM_to_XMM(regT1, reg);
SSE_ANDPS_M128_to_XMM(regT1, (uptr)mVUglob.signbit);
SSE_MINPS_M128_to_XMM(reg, (uptr)mVUglob.maxvals);
SSE_MAXPS_M128_to_XMM(reg, (uptr)mVUglob.minvals);
SSE_ORPS_XMM_to_XMM (reg, regT1);
break;
}
if (regT1b) mVU->regAlloc->clearNeeded(regT1);
}
else mVUclamp1(reg, regT1, xyzw, bClampE);
}
void mVUclamp3(microVU* mVU, int reg, int regT1, int xyzw) {
if (clampE) mVUclamp2(mVU, reg, regT1, xyzw, 1);
}
void mVUclamp4(int reg, int regT1, int xyzw) {
if (clampE) mVUclamp1(reg, regT1, xyzw, 1);
}
//------------------------------------------------------------------
// Micro VU - Reg Loading/Saving/Shuffling/Unpacking/Merging...
//------------------------------------------------------------------
@ -455,11 +401,11 @@ void ADD_SS(microVU* mVU, int to, int from, int t1, int t2) {
if (t2b) mVU->regAlloc->clearNeeded(t2);
}
#define clampOp(opX) { \
mVUclamp3(mVU, to, t1, 0xf); \
mVUclamp3(mVU, from, t1, 0xf); \
opX(to, from); \
mVUclamp4(to, t1, 0xf); \
#define clampOp(opX, isPS) { \
mVUclamp3(mVU, to, t1, (isPS)?0xf:0x8); \
mVUclamp3(mVU, from, t1, (isPS)?0xf:0x8); \
opX(to, from); \
mVUclamp4(to, t1, (isPS)?0xf:0x8); \
}
void SSE_MAXPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
@ -479,36 +425,36 @@ void SSE_MINSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
else { MIN_MAX_SS(mVU, to, from, t1, 1); }
}
void SSE_ADD2SS(mV, int to, int from, int t1 = -1, int t2 = -1) {
if (!CHECK_VUADDSUBHACK) { clampOp(SSE_ADDSS_XMM_to_XMM); }
if (!CHECK_VUADDSUBHACK) { clampOp(SSE_ADDSS_XMM_to_XMM, 0); }
else { ADD_SS(mVU, to, from, t1, t2); }
}
void SSE_ADD2PS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_ADDPS_XMM_to_XMM);
clampOp(SSE_ADDPS_XMM_to_XMM, 1);
}
void SSE_ADDPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_ADDPS_XMM_to_XMM);
clampOp(SSE_ADDPS_XMM_to_XMM, 1);
}
void SSE_ADDSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_ADDSS_XMM_to_XMM);
clampOp(SSE_ADDSS_XMM_to_XMM, 0);
}
void SSE_SUBPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_SUBPS_XMM_to_XMM);
clampOp(SSE_SUBPS_XMM_to_XMM, 1);
}
void SSE_SUBSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_SUBSS_XMM_to_XMM);
clampOp(SSE_SUBSS_XMM_to_XMM, 0);
}
void SSE_MULPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_MULPS_XMM_to_XMM);
clampOp(SSE_MULPS_XMM_to_XMM, 1);
}
void SSE_MULSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_MULSS_XMM_to_XMM);
clampOp(SSE_MULSS_XMM_to_XMM, 0);
}
void SSE_DIVPS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_DIVPS_XMM_to_XMM);
clampOp(SSE_DIVPS_XMM_to_XMM, 1);
}
void SSE_DIVSS(mV, int to, int from, int t1 = -1, int t2 = -1) {
clampOp(SSE_DIVSS_XMM_to_XMM);
clampOp(SSE_DIVSS_XMM_to_XMM, 0);
}
//------------------------------------------------------------------