diff --git a/pcsx2/MMI.cpp b/pcsx2/MMI.cpp
index f4948bc151..d5a9784844 100644
--- a/pcsx2/MMI.cpp
+++ b/pcsx2/MMI.cpp
@@ -1486,8 +1486,8 @@ void PMULTUW() {
 __forceinline void  _PDIVUW(int dd, int ss) 
 {
 	if (cpuRegs.GPR.r[_Rt_].UL[ss] != 0) { 
-		cpuRegs.LO.UD[dd] = (u64)cpuRegs.GPR.r[_Rs_].UL[ss] / (u64)cpuRegs.GPR.r[_Rt_].UL[ss]; 
-		cpuRegs.HI.UD[dd] = (u64)cpuRegs.GPR.r[_Rs_].UL[ss] % (u64)cpuRegs.GPR.r[_Rt_].UL[ss]; 
+		cpuRegs.LO.SD[dd] = (s32)(cpuRegs.GPR.r[_Rs_].UL[ss] / cpuRegs.GPR.r[_Rt_].UL[ss]); 
+		cpuRegs.HI.SD[dd] = (s32)(cpuRegs.GPR.r[_Rs_].UL[ss] % cpuRegs.GPR.r[_Rt_].UL[ss]); 
 	}
 }
 
diff --git a/pcsx2/x86/iFPUd.cpp b/pcsx2/x86/iFPUd.cpp
index d02bf18a18..701d59ae09 100644
--- a/pcsx2/x86/iFPUd.cpp
+++ b/pcsx2/x86/iFPUd.cpp
@@ -191,7 +191,7 @@ void ToDouble(int reg)
 // converts really large normal numbers to PS2 signed max
 // converts really small normal numbers to zero (flush)
 // doesn't handle inf/nan/denormal
-void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc)
+void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub)
 {
 	if (flags)
 		AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO | FPUflagU));
@@ -229,6 +229,7 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc)
 	u8 *end3 = JMP8(0);
  
 	x86SetJ8(to_underflow);
+	u8 *end4;
 	if (flags && FPU_FLAGS_UNDERFLOW) //set underflow flags if not zero
 	{
 		SSE2_XORPD_XMM_to_XMM(absreg, absreg);
@@ -236,6 +237,19 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc)
 		u8 *is_zero = JE8(0);
  
 		OR32ItoM((uptr)&fpuRegs.fprc[31], (FPUflagU | FPUflagSU));
+		if (addsub)
+		{
+			//On ADD/SUB, the PS2 simply leaves the mantissa bits as they are (after normalization)
+			//IEEE either clears them (FtZ) or returns the denormalized result.
+			//not thoroughly tested : other operations such as MUL and DIV seem to clear all mantissa bits?
+			SSE_MOVAPS_XMM_to_XMM(absreg, reg);
+			SSE2_PSLLQ_I8_to_XMM(reg, 12); //mantissa bits
+			SSE2_PSRLQ_I8_to_XMM(reg, 41);
+			SSE2_PSRLQ_I8_to_XMM(absreg, 63); //sign bit
+			SSE2_PSLLQ_I8_to_XMM(absreg, 31);
+			SSE2_POR_XMM_to_XMM(reg, absreg);
+			end4 = JMP8(0);
+		}
  
 		x86SetJ8(is_zero);
 	}
@@ -245,13 +259,15 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc)
 	x86SetJ8(end);
 	x86SetJ8(end2);
 	x86SetJ8(end3);
+	if (flags && FPU_FLAGS_UNDERFLOW && addsub)
+		x86SetJ8(end4);
 }
  
 //mustn't use EAX/ECX/EDX/x86regs (MUL)
-void ToPS2FPU(int reg, bool flags, int absreg, bool acc)
+void ToPS2FPU(int reg, bool flags, int absreg, bool acc, bool addsub = false)
 {
 	if (FPU_RESULT)
-		ToPS2FPU_Full(reg, flags, absreg, acc);
+		ToPS2FPU_Full(reg, flags, absreg, acc, addsub);
 	else
 	{
 		SSE2_CVTSD2SS_XMM_to_XMM(reg, reg); //clamp
@@ -415,24 +431,24 @@ void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
 }
 
 //------------------------------------------------------------------
-// CommutativeOp XMM (used for ADD, MUL, MAX, MIN and SUB opcodes)
+// CommutativeOp XMM (used for ADD and SUB opcodes. that's it.)
 //------------------------------------------------------------------
 static void (*recFPUOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = {
-	SSE2_ADDSD_XMM_to_XMM, NULL, NULL, NULL, SSE2_SUBSD_XMM_to_XMM };
+	SSE2_ADDSD_XMM_to_XMM, SSE2_SUBSD_XMM_to_XMM };
  
 void recFPUOp(int info, int regd, int op, bool acc) 
 {
 	int sreg, treg;
 	ALLOC_S(sreg); ALLOC_T(treg);
  
-	if (FPU_ADD_SUB_HACK && (op == 0 || op == 4)) //ADD or SUB
+	if (FPU_ADD_SUB_HACK) //ADD or SUB
 		FPU_ADD_SUB(sreg, treg);
  
 	ToDouble(sreg); ToDouble(treg);
  
 	recFPUOpXMM_to_XMM[op](sreg, treg);
  
-	ToPS2FPU(sreg, true, treg, acc);
+	ToPS2FPU(sreg, true, treg, acc, true);
 	SSE_MOVSS_XMM_to_XMM(regd, sreg);
  
 	_freeXMMreg(sreg); _freeXMMreg(treg);
@@ -715,7 +731,7 @@ void recMaddsub(int info, int regd, int op, bool acc)
 	else
 		SSE2_ADDSD_XMM_to_XMM(treg, sreg);
 
-	ToPS2FPU(treg, true, sreg, acc);
+	ToPS2FPU(treg, true, sreg, acc, true);
 	x86SetJ32(skipall);
 
 	SSE_MOVSS_XMM_to_XMM(regd, treg);
@@ -865,7 +881,7 @@ FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS);
  
 void recSUB_S_xmm(int info)
 {
-	recFPUOp(info, EEREC_D, 4, false);
+	recFPUOp(info, EEREC_D, 1, false);
 }
  
 FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
@@ -873,7 +889,7 @@ FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT);
  
 void recSUBA_S_xmm(int info) 
 { 
-	recFPUOp(info, EEREC_ACC, 4, true);
+	recFPUOp(info, EEREC_ACC, 1, true);
 }
  
 FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT);