fixed/added/optimized some VU stuff. i plan to go over most (all) VU micro instructions over the next few days/weeks, if i don't get bored of it that is ;)

3 down, 100+ more to go! :D

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@495 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-12-25 18:39:06 +00:00 committed by Gregory Hainaut
parent 8aa7f3c3d1
commit f93e8c1beb
6 changed files with 255 additions and 182 deletions

View File

@ -1516,7 +1516,7 @@ void ipu_vq(struct macroblock_rgb16 *rgb16, u8* indx4){
SysPrintf("IPU: VQ not implemented");
}
void ipu_copy(struct macroblock_8 *mb8, struct macroblock_16 *mb16){
void ipu_copy(struct macroblock_8 *mb8, struct macroblock_16 *mb16) {
unsigned char *s=(unsigned char*)mb8;
signed short *d=(signed short*)mb16;
int i;
@ -1749,7 +1749,7 @@ int IPU1dma()
break;
default:
SysPrintf("IPU ERROR: different transfer mode!, Please report to PCSX2 Team\n");
Console::Error("IPU ERROR: different transfer mode!, Please report to PCSX2 Team\n");
break;
}

View File

@ -724,26 +724,17 @@ void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw)
void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw)
{
if( cpucaps.hasStreamingSIMD4Extensions ) {
switch (xyzw) {
case 0: if( dstreg != srcreg ) {
SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(0, 0, 0));} break;
case 1: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0)); break;
case 2: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(2, 0, 0)); break;
case 3: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0)); break;
}
}
else {
switch (xyzw) {
case 0: if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); break;
case 1: if( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg);
else { if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0x55); }
break;
case 2: SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg); break;
case 3: if( cpucaps.hasStreamingSIMD3Extensions && dstreg != srcreg ) { SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg); SSE_MOVHLPS_XMM_to_XMM(dstreg, dstreg); }
else { if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0xff); }
break;
}
switch (xyzw) {
case 0: if( dstreg != srcreg ) SSE_MOVSS_XMM_to_XMM(dstreg, srcreg); break;
case 1: if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0));
else if ( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg);
else { if( dstreg != srcreg ) { SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); } SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0x55); }
break;
case 2: SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg); break;
case 3: if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0));
else if ( cpucaps.hasStreamingSIMD3Extensions && dstreg != srcreg ) { SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg); SSE_MOVHLPS_XMM_to_XMM(dstreg, dstreg); }
else { if( dstreg != srcreg ) { SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); } SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0xff); }
break;
}
}
@ -765,65 +756,168 @@ void _vuMoveSS(VURegs * VU, int dstreg, int srcreg)
}
// 1 - src, 0 - dest wzyx
void VU_MERGE0(int dest, int src) { // 0000
void VU_MERGE0(int dest, int src) { // 0000s
}
void VU_MERGE1(int dest, int src) { // 1000
SSE_MOVHLPS_XMM_to_XMM(src, dest);
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc4);
}
void VU_MERGE1b(int dest, int src) { // 1000s
SSE_SHUFPS_XMM_to_XMM(src, src, 0x27);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0x27);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27);
}
void VU_MERGE2(int dest, int src) { // 0100
SSE_MOVHLPS_XMM_to_XMM(src, dest);
SSE_SHUFPS_XMM_to_XMM(dest, src, 0x64);
}
void VU_MERGE3(int dest, int src) { // 1100
void VU_MERGE2b(int dest, int src) { // 0100s
SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6);
}
void VU_MERGE3(int dest, int src) { // 1100s
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4);
}
void VU_MERGE4(int dest, int src) { // 0010s
void VU_MERGE4(int dest, int src) { // 0010
SSE_MOVSS_XMM_to_XMM(src, dest);
SSE_SHUFPS_XMM_to_XMM(src, dest, 0xe4);
SSE_MOVAPS_XMM_to_XMM(dest, src);
}
void VU_MERGE4b(int dest, int src) { // 0010s
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
}
void VU_MERGE5(int dest, int src) { // 1010
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xd8);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd8);
}
void VU_MERGE5b(int dest, int src) { // 1010s
SSE_SHUFPS_XMM_to_XMM(src, src, 0x27);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0x27);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
}
void VU_MERGE6(int dest, int src) { // 0110
SSE_SHUFPS_XMM_to_XMM(dest, src, 0x9c);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x78);
}
void VU_MERGE7(int dest, int src) { // 1110s
void VU_MERGE6b(int dest, int src) { // 0110s
SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
}
void VU_MERGE7(int dest, int src) { // 1110
SSE_MOVSS_XMM_to_XMM(src, dest);
SSE_MOVAPS_XMM_to_XMM(dest, src);
}
void VU_MERGE8(int dest, int src) { // 0001
void VU_MERGE7b(int dest, int src) { // 1110s
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
}
void VU_MERGE8(int dest, int src) { // 0001s
SSE_MOVSS_XMM_to_XMM(dest, src);
}
void VU_MERGE9(int dest, int src) { // 1001
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc9);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd2);
}
void VU_MERGE9b(int dest, int src) { // 1001s
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0x27);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0x27);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27);
}
void VU_MERGE10(int dest, int src) { // 0101
SSE_SHUFPS_XMM_to_XMM(dest, src, 0x8d);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x72);
}
void VU_MERGE11(int dest, int src) { // 1101
void VU_MERGE10b(int dest, int src) { // 0101s
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6);
}
void VU_MERGE11(int dest, int src) { // 1101s
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4);
}
void VU_MERGE12(int dest, int src) { // 0011s
void VU_MERGE12(int dest, int src) { // 0011
SSE_SHUFPS_XMM_to_XMM(src, dest, 0xe4);
SSE_MOVAPS_XMM_to_XMM(dest, src);
}
void VU_MERGE13(int dest, int src) { // 1011s
void VU_MERGE12b(int dest, int src) { // 0011
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
}
void VU_MERGE13(int dest, int src) { // 1011
SSE_MOVHLPS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, dest, 0x64);
SSE_MOVAPS_XMM_to_XMM(dest, src);
}
void VU_MERGE14(int dest, int src) { // 0111s
void VU_MERGE13b(int dest, int src) { // 1011s
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0x27);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0x27);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
}
void VU_MERGE14(int dest, int src) { // 0111
SSE_MOVHLPS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, dest, 0xc4);
SSE_MOVAPS_XMM_to_XMM(dest, src);
}
void VU_MERGE14b(int dest, int src) { // 0111s
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6);
SSE_MOVSS_XMM_to_XMM(dest, src);
SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6);
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6);
}
void VU_MERGE15(int dest, int src) { // 1111s
SSE_MOVAPS_XMM_to_XMM(dest, src);
}
@ -836,18 +930,34 @@ static VUMERGEFN s_VuMerge[16] = {
VU_MERGE8, VU_MERGE9, VU_MERGE10, VU_MERGE11,
VU_MERGE12, VU_MERGE13, VU_MERGE14, VU_MERGE15 };
void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw)
{
xyzw &= 0xf;
static VUMERGEFN s_VuMerge2[16] = {
VU_MERGE0, VU_MERGE1b, VU_MERGE2b, VU_MERGE3,
VU_MERGE4b, VU_MERGE5b, VU_MERGE6b, VU_MERGE7b,
VU_MERGE8, VU_MERGE9b, VU_MERGE10b, VU_MERGE11,
VU_MERGE12b, VU_MERGE13b, VU_MERGE14b, VU_MERGE15 };
if(dest != src && xyzw != 0) {
if(cpucaps.hasStreamingSIMD4Extensions) {
// Modifies the Source Reg!
void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw) {
xyzw &= 0xf;
if ( (dest != src) && (xyzw != 0) ) {
if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) {
xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3);
SSE4_BLENDPS_XMM_to_XMM(dest, src, xyzw);
}
else s_VuMerge[xyzw](dest, src);
}
}
// Doesn't Modify the Source Reg! (ToDo: s_VuMerge2() has room for optimization)
void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw) {
xyzw &= 0xf;
if ( (dest != src) && (xyzw != 0) ) {
if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) {
xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3);
SSE4_BLENDPS_XMM_to_XMM(dest, src, xyzw);
}
else s_VuMerge2[xyzw](dest, src);
}
}
//------------------------------------------------------------------

View File

@ -112,6 +112,7 @@ void _vuMoveSS(VURegs * VU, int dstreg, int srcreg);
void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw);
void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw);
void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw);
void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw);
#define VU_MERGE_REGS(dest, src) { \
VU_MERGE_REGS_CUSTOM(dest, src, _X_Y_Z_W); \
}

View File

@ -835,11 +835,10 @@ void recVUMI_LQI(VURegs *VU, int info)
//------------------------------------------------------------------
// _saveEAX()
// _saveEAX() ToDo: Needs Checking/Fixing! (cottonvibes)
//------------------------------------------------------------------
void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
{
int t1reg;
assert( offset < 0x80000000 );
if( _Fs_ == 0 ) {
@ -996,90 +995,51 @@ void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info)
}
}
break;
default:
//SysPrintf("SAVEEAX Default %d\n", _X_Y_Z_W);
default: // ToDo: Needs checking! (cottonvibes)
SysPrintf("SAVEEAX Default %d\n", _X_Y_Z_W);
// EEREC_D is a temp reg
// find the first nonwrite reg
/*
t1reg = _vuGetTempXMMreg(info);
if( t1reg < 0 ) {
for(t1reg = 0; t1reg < XMMREGS; ++t1reg) {
if( xmmregs[t1reg].inuse && !(xmmregs[t1reg].mode&MODE_WRITE) ) break;
}
if( t1reg == XMMREGS ) t1reg = -1;
else {
if( t1reg != EEREC_S ) _allocTempXMMreg(XMMT_FPS, t1reg);
}
else if( t1reg != EEREC_S ) _allocTempXMMreg(XMMT_FPS, t1reg);
}
*/
// do it with one reg
//SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S);
if( t1reg >= 0 ) {
// found a temp reg
if( VU == &VU1 ) {
if( x86reg >= 0 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset);
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
}
else {
if( x86reg >= 0 ) SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset);
else {
if( offset & 15 ) SSE_MOVUPS_M128_to_XMM(EEREC_TEMP, offset);
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
}
}
if( t1reg != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S);
VU_MERGE_REGS(EEREC_TEMP, t1reg);
if( VU == &VU1 ) {
if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_TEMP);
}
else {
if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
else SSE_MOVUPS_XMM_to_M128(offset, EEREC_TEMP);
}
if( t1reg != EEREC_S ) _freeXMMreg(t1reg);
else {
// read back the data
SSE_MOVAPS_M128_to_XMM(EEREC_S, (uptr)&VU->VF[_Fs_]);
}
if( VU == &VU1 ) {
if( x86reg >= 0 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset);
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
}
else {
// do it with one reg
SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S);
if( VU == &VU1 ) {
if( x86reg >= 0 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset);
if( x86reg >= 0 ) SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset);
else {
if( offset & 15 ) SSE_MOVUPS_M128_to_XMM(EEREC_TEMP, offset);
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
}
else {
if( x86reg >= 0 ) SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset);
else {
if( offset & 15 ) SSE_MOVUPS_M128_to_XMM(EEREC_TEMP, offset);
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
}
}
VU_MERGE_REGS(EEREC_TEMP, EEREC_S);
if( VU == &VU1 ) {
if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_TEMP);
}
else {
if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
else {
if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_TEMP);
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_TEMP);
}
}
// read back the data
SSE_MOVAPS_M128_to_XMM(EEREC_S, (uptr)&VU->VF[_Fs_]);
}
VU_MERGE_REGS_SAFE(EEREC_TEMP, EEREC_S, _X_Y_Z_W);
if( VU == &VU1 ) {
if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_TEMP);
}
else {
if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
else {
if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_TEMP);
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_TEMP);
}
}
// read back the data
//SSE_MOVAPS_M128_to_XMM(EEREC_S, (uptr)&VU->VF[_Fs_]);
break;
}
}
@ -1094,9 +1054,7 @@ void recVUMI_SQ(VURegs *VU, int info)
s16 imm;
imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff);
if ( _Ft_ == 0 ) {
_saveEAX(VU, -1, (uptr)GET_VU_MEM(VU, (int)imm * 16), info);
}
if ( _Ft_ == 0 ) _saveEAX(VU, -1, (uptr)GET_VU_MEM(VU, (int)imm * 16), info);
else {
int ftreg = ALLOCVI(_Ft_, MODE_READ);
_saveEAX(VU, recVUTransformAddr(ftreg, VU, _Ft_, imm), (uptr)VU->Mem, info);
@ -1110,9 +1068,8 @@ void recVUMI_SQ(VURegs *VU, int info)
//------------------------------------------------------------------
void recVUMI_SQD(VURegs *VU, int info)
{
if (_Ft_ == 0) {
_saveEAX(VU, -1, (uptr)VU->Mem, info);
} else {
if (_Ft_ == 0) _saveEAX(VU, -1, (uptr)VU->Mem, info);
else {
int ftreg = ALLOCVI(_Ft_, MODE_READ|MODE_WRITE);
SUB16ItoR( ftreg, 1 );
_saveEAX(VU, recVUTransformAddr(ftreg, VU, _Ft_, 0), (uptr)VU->Mem, info);
@ -1126,9 +1083,8 @@ void recVUMI_SQD(VURegs *VU, int info)
//------------------------------------------------------------------
void recVUMI_SQI(VURegs *VU, int info)
{
if (_Ft_ == 0) {
_saveEAX(VU, -1, (uptr)VU->Mem, info);
} else {
if (_Ft_ == 0) _saveEAX(VU, -1, (uptr)VU->Mem, info);
else {
int ftreg = ALLOCVI(_Ft_, MODE_READ|MODE_WRITE);
_saveEAX(VU, recVUTransformAddr(ftreg, VU, _Ft_, 0), (uptr)VU->Mem, info);

View File

@ -103,22 +103,22 @@ static const PCSX2_ALIGNED16(int SSEmovMask[ 16 ][ 4 ]) =
static const PCSX2_ALIGNED16(u32 const_abs_table[16][4]) =
{
{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff },
{ 0xffffffff, 0xffffffff, 0x7fffffff, 0xffffffff },
{ 0xffffffff, 0xffffffff, 0x7fffffff, 0x7fffffff },
{ 0xffffffff, 0x7fffffff, 0xffffffff, 0xffffffff },
{ 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff },
{ 0xffffffff, 0x7fffffff, 0x7fffffff, 0xffffffff },
{ 0xffffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff },
{ 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff },
{ 0x7fffffff, 0xffffffff, 0xffffffff, 0x7fffffff },
{ 0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff },
{ 0x7fffffff, 0xffffffff, 0x7fffffff, 0x7fffffff },
{ 0x7fffffff, 0x7fffffff, 0xffffffff, 0xffffffff },
{ 0x7fffffff, 0x7fffffff, 0xffffffff, 0x7fffffff },
{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0xffffffff },
{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff },
{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //0001
{ 0xffffffff, 0xffffffff, 0x7fffffff, 0xffffffff }, //0010
{ 0xffffffff, 0xffffffff, 0x7fffffff, 0x7fffffff }, //0011
{ 0xffffffff, 0x7fffffff, 0xffffffff, 0xffffffff }, //0100
{ 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff }, //0101
{ 0xffffffff, 0x7fffffff, 0x7fffffff, 0xffffffff }, //0110
{ 0xffffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //0111
{ 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000
{ 0x7fffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //1001
{ 0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff }, //1010
{ 0x7fffffff, 0xffffffff, 0x7fffffff, 0x7fffffff }, //1011
{ 0x7fffffff, 0x7fffffff, 0xffffffff, 0xffffffff }, //1100
{ 0x7fffffff, 0x7fffffff, 0xffffffff, 0x7fffffff }, //1101
{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0xffffffff }, //1110
{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1111
};
static const PCSX2_ALIGNED16(float recMult_float_to_int4[4]) = { 16.0, 16.0, 16.0, 16.0 };
@ -236,7 +236,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
//-------------------------Optional Code: Denormals Are Zero------------------------------
if (CHECK_VU_UNDERFLOW) { // Sets underflow/denormals to zero
SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg (t1reg = denormals are positive zero)
VU_MERGE_REGS_CUSTOM(t1reg, reg, (15 - flipMask[_X_Y_Z_W])); // Send t1reg the vectors that shouldn't be modified (since reg was flipped, we need a mask to get the unmodified vectors)
VU_MERGE_REGS_SAFE(t1reg, reg, (15 - flipMask[_X_Y_Z_W])); // Send t1reg the vectors that shouldn't be modified (since reg was flipped, we need a mask to get the unmodified vectors)
// Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); // Only keep the sign bit for each vector
SSE_ORPS_XMM_to_XMM(reg, t1reg); // Denormals are Signed Zero, and unmodified vectors stay the same!
@ -305,40 +305,45 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
//------------------------------------------------------------------
// *VU Upper Instructions!*
//
// Note: * = Checked for errors by cottonvibes
//------------------------------------------------------------------
//------------------------------------------------------------------
// ABS
// ABS*
//------------------------------------------------------------------
void recVUMI_ABS(VURegs *VU, int info)
void recVUMI_ABS(VURegs *VU, int info)
{
if ( _Ft_ == 0 ) return;
//SysPrintf("recVUMI_ABS()\n");
if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return;
if (_X_Y_Z_W != 0xf) { // here we use a temp reg because not all xyzw are being modified
if ((_X_Y_Z_W == 0x8) || (_X_Y_Z_W == 0xf)) {
VU_MERGE_REGS(EEREC_T, EEREC_S);
SSE_ANDPS_M128_to_XMM(EEREC_T, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] );
}
else { // Use a temp reg because VU_MERGE_REGS() modifies source reg!
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] );
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
}
else { // all xyzw are being modified, so no need to use temp reg
if( EEREC_T != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
SSE_ANDPS_M128_to_XMM(EEREC_T, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] );
}
}
//------------------------------------------------------------------
//------------------------------------------------------------------
// ADD
// ADD*, ADD_iq, ADD_xyzw*
//------------------------------------------------------------------
PCSX2_ALIGNED16(float s_two[4]) = {0,0,0,2};
void recVUMI_ADD(VURegs *VU, int info)
{
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
//SysPrintf("recVUMI_ADD()\n");
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP);
if( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2
if( _X_Y_Z_W != 0xf ) {
if ( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2
if ( _X_Y_Z_W == 0x8 ) SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)s_two);
else if ( _X_Y_Z_W != 0xf ) {
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_two);
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
}
@ -346,8 +351,8 @@ void recVUMI_ADD(VURegs *VU, int info)
}
else {
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat( info, EEREC_S, _X_Y_Z_W);
vuFloat( info, EEREC_T, _X_Y_Z_W);
vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
vuFloat5( EEREC_T, EEREC_TEMP, _X_Y_Z_W);
}
if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add
if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -360,7 +365,6 @@ void recVUMI_ADD(VURegs *VU, int info)
else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
}
else { // All xyzw being modified (xyzw == 1111)
@ -372,7 +376,7 @@ void recVUMI_ADD(VURegs *VU, int info)
}
}
}
flagUpdate:
recUpdateFlags(VU, EEREC_D, info);
}
@ -433,30 +437,31 @@ void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info)
}
}
}
recUpdateFlags(VU, EEREC_D, info);
}
void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info)
{
//SysPrintf("recVUMI_ADD_xyzw()\n");
if ( _X_Y_Z_W == 0 ) goto flagUpdate;
if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP);
if (CHECK_VU_EXTRA_OVERFLOW) {
vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
if (_Fs_) vuFloat5( EEREC_S, EEREC_TEMP, _X_Y_Z_W);
if (_Ft_) vuFloat5( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) );
}
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
if( _Ft_ == 0 && xyzw < 3 ) {
// just move
if( _X_Y_Z_W != 0xf ) {
if ( _Ft_ == 0 && xyzw < 3 ) { // just move since adding zero
if ( _X_Y_Z_W == 0x8 ) { VU_MERGE_REGS(EEREC_D, EEREC_S); }
else if ( _X_Y_Z_W != 0xf ) {
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
}
else if( EEREC_D != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
else if ( EEREC_D != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
}
else if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP) ) {
if( xyzw == 0 ) {
if( EEREC_D == EEREC_T ) {
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S);
}
else if ( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP) ) {
if ( xyzw == 0 ) {
if ( EEREC_D == EEREC_T ) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S);
else {
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T);
@ -464,38 +469,27 @@ void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info)
}
else {
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
if ( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
}
}
else if( _Fs_ == 0 && !_W ) { // ToDo: Check this! (cottonvibes)
// just move
else if( _Fs_ == 0 && !_W ) { // just move
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
// SSE_MOVAPS_XMM_to_M128((u32)s_tempmem, EEREC_TEMP);
// SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
// SSE_CMPNLTPS_M128_to_XMM(EEREC_TEMP, (u32)s_FloatMinMax);
// SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)s_tempmem);
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
}
else {
if( _X_Y_Z_W != 0xf || EEREC_D == EEREC_S || EEREC_D == EEREC_TEMP)
if ( _X_Y_Z_W != 0xf ) {
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
if (_X_Y_Z_W != 0xf) {
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
}
else {
if( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
else if( EEREC_D == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
else {
_unpackVF_xyzw(EEREC_D, EEREC_T, xyzw);
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
}
if( EEREC_D == EEREC_TEMP ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); }
else if( EEREC_D == EEREC_S ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); }
else { _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); }
}
}
flagUpdate:
recUpdateFlags(VU, EEREC_D, info);
}
@ -2502,7 +2496,6 @@ void recVUMI_ITOF0( VURegs *VU, int info )
if (_X_Y_Z_W != 0xf) {
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
vuFloat( info, EEREC_TEMP, 15); // Clamp infinities
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
xmmregs[EEREC_T].mode |= MODE_WRITE;
@ -2519,12 +2512,12 @@ void recVUMI_ITOFX(VURegs *VU, int addr, int info)
if (_X_Y_Z_W != 0xf) {
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
vuFloat( info, EEREC_TEMP, 15); // Clamp infinities
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
xmmregs[EEREC_T].mode |= MODE_WRITE;
} else {
}
else {
SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities

View File

@ -27,6 +27,19 @@
CALLFunc((uptr)VU##MI_##f); \
}
#define REC_VUOPs(VU, f) { \
_freeXMMregs(); \
X86_32CODE(_freeMMXregs(); SetFPUstate();) \
if (VU==&VU1) { \
MOV32ItoM((uptr)&VU1.code, (u32)VU1.code); \
CALLFunc((uptr)VU1MI_##f); \
} \
else { \
MOV32ItoM((uptr)&VU0.code, (u32)VU0.code); \
CALLFunc((uptr)VU0MI_##f); \
} \
}
#define REC_VUOPFLAGS(VU, f) { \
_freeXMMregs(/*&VU*/); \
X86_32CODE(_freeMMXregs(); SetFPUstate();) \