fpu's RSQRT now has pre-opcode clamping if you enable Extra Overflow Speedhack; and fixed some stuff for linux GCC compiler, thanks Shanoah!

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@67 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-08-29 05:21:56 +00:00 committed by Gregory Hainaut
parent 12d63f137a
commit 88ba4e0b4a
3 changed files with 48 additions and 27 deletions

View File

@ -19,6 +19,11 @@
#ifndef __GS_H__
#define __GS_H__
// GCC needs these includes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct
{
u32 SIGID;

View File

@ -2049,7 +2049,7 @@ int VIF1transfer(u32 *data, int size, int istag) {
if (vif1.irq && vif1.tag.size == 0) {
vif1.vifstalled = 1;
if(((vif1Regs->code >> 24) & 0x7f) != 0x7)vif1Regs->stat|= VIF1_STAT_VIS;
if(((vif1Regs->code >> 24) & 0x7f) != 0x7)vif1Regs->stat|= VIF1_STAT_VIS; // Note: commenting this out fixes WALL-E
//else SysPrintf("Stall on Vif1 MARK\n");
// spiderman doesn't break on qw boundaries
vif1.irqoffset = transferred%4; // cannot lose the offset

View File

@ -1036,43 +1036,59 @@ void recRSQRT_S_xmm(int info)
switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) {
case PROCESS_EE_S:
if( EEREC_D == EEREC_S ) {
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
if (CHECK_EXTRA_OVERFLOW) {
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
ClampValues(t0reg);
ClampValues(EEREC_D);
SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg);
}
else { SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); }
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
else {
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
if (CHECK_EXTRA_OVERFLOW) {
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
ClampValues(t0reg);
ClampValues(EEREC_D);
SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg);
}
else { SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); }
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
break;
case PROCESS_EE_T:
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
case PROCESS_EE_T:
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
if (CHECK_EXTRA_OVERFLOW) { ClampValues(EEREC_T); ClampValues(EEREC_D); }
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
break;
default:
if( (info & PROCESS_EE_T) && (info & PROCESS_EE_S) ) {
if( EEREC_D == EEREC_T ){
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
else if( EEREC_D == EEREC_S ){
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
} else {
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
case (PROCESS_EE_S | PROCESS_EE_T):
if( EEREC_D == EEREC_S ) {
if (CHECK_EXTRA_OVERFLOW) { ClampValues(EEREC_T); ClampValues(EEREC_D); }
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
else {
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
}else{
SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
if (CHECK_EXTRA_OVERFLOW) { ClampValues(EEREC_T); ClampValues(EEREC_D); }
SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T);
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
}
break;
default:
SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]);
if (CHECK_EXTRA_OVERFLOW) {
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]);
ClampValues(t0reg);
ClampValues(EEREC_D);
SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg);
}
else { SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); }
SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg);
break;
}
_freeXMMreg(t0reg);