spu/interp: optimize floating point exception checking

read once the MXCSR register and use his value to check for all exceptions
This commit is contained in:
elad 2018-02-28 18:28:38 +02:00 committed by Ivan
parent 72e54e8b60
commit 6e8ccbcf17
1 changed files with 21 additions and 18 deletions

View File

@ -1614,7 +1614,7 @@ inline bool isdenormal(double x)
void spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op) void spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO); fesetround(FE_TOWARDZERO);
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
const float a = spu.gpr[op.ra]._f[i]; const float a = spu.gpr[op.ra]._f[i];
@ -1634,7 +1634,7 @@ void spu_interpreter_precise::FREST(SPUThread& spu, spu_opcode_t op)
void spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op) void spu_interpreter_precise::FRSQEST(SPUThread& spu, spu_opcode_t op)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO); fesetround(FE_TOWARDZERO);
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
const float a = spu.gpr[op.ra]._f[i]; const float a = spu.gpr[op.ra]._f[i];
@ -1677,7 +1677,7 @@ void spu_interpreter_precise::FCGT(SPUThread& spu, spu_opcode_t op)
static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub) static void FA_FS(SPUThread& spu, spu_opcode_t op, bool sub)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO); fesetround(FE_TOWARDZERO);
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
{ {
const float a = spu.gpr[op.ra]._f[w]; const float a = spu.gpr[op.ra]._f[w];
@ -1766,7 +1766,7 @@ void spu_interpreter_precise::FS(SPUThread& spu, spu_opcode_t op) { FA_FS(spu, o
void spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op) void spu_interpreter_precise::FM(SPUThread& spu, spu_opcode_t op)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO); fesetround(FE_TOWARDZERO);
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
{ {
const float a = spu.gpr[op.ra]._f[w]; const float a = spu.gpr[op.ra]._f[w];
@ -1899,18 +1899,19 @@ static void DFASM(SPUThread& spu, spu_opcode_t op, DoubleOp operation)
case DFASM_S: result = a - b; break; case DFASM_S: result = a - b; break;
case DFASM_M: result = a * b; break; case DFASM_M: result = a * b; break;
} }
if (fetestexcept(FE_INVALID)) const u32 e = _mm_getcsr();
if (e & _MM_MASK_INVALID)
{ {
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINV); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
result = DOUBLE_NAN; result = DOUBLE_NAN;
} }
else else
{ {
if (fetestexcept(FE_OVERFLOW)) if (e & _MM_MASK_OVERFLOW)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
if (fetestexcept(FE_UNDERFLOW)) if (e & _MM_MASK_UNDERFLOW)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
if (fetestexcept(FE_INEXACT)) if (e & _MM_MASK_INEXACT)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
} }
} }
@ -1959,18 +1960,19 @@ static void DFMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub)
SetHostRoundingMode(spu.fpscr.checkSliceRounding(i)); SetHostRoundingMode(spu.fpscr.checkSliceRounding(i));
feclearexcept(FE_ALL_EXCEPT); feclearexcept(FE_ALL_EXCEPT);
result = fma(a, b, sub ? -c : c); result = fma(a, b, sub ? -c : c);
if (fetestexcept(FE_INVALID)) const u32 e = _mm_getcsr();
if (e & _MM_MASK_INVALID)
{ {
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINV); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINV);
result = DOUBLE_NAN; result = DOUBLE_NAN;
} }
else else
{ {
if (fetestexcept(FE_OVERFLOW)) if (e & _MM_MASK_OVERFLOW)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
if (fetestexcept(FE_UNDERFLOW)) if (e & _MM_MASK_UNDERFLOW)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
if (fetestexcept(FE_INEXACT)) if (e & _MM_MASK_INEXACT)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
if (neg) result = -result; if (neg) result = -result;
} }
@ -2033,11 +2035,12 @@ void spu_interpreter_precise::FRDS(SPUThread& spu, spu_opcode_t op)
{ {
feclearexcept(FE_ALL_EXCEPT); feclearexcept(FE_ALL_EXCEPT);
spu.gpr[op.rt]._f[i * 2 + 1] = (float)a; spu.gpr[op.rt]._f[i * 2 + 1] = (float)a;
if (fetestexcept(FE_OVERFLOW)) const u32 e = _mm_getcsr();
if (e & _MM_MASK_OVERFLOW)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DOVF);
if (fetestexcept(FE_UNDERFLOW)) if (e & _MM_MASK_UNDERFLOW)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DUNF);
if (fetestexcept(FE_INEXACT)) if (e & _MM_MASK_INEXACT)
spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX); spu.fpscr.setDoublePrecisionExceptionFlags(i, FPSCR_DINX);
} }
spu.gpr[op.rt]._u32[i * 2] = 0; spu.gpr[op.rt]._u32[i * 2] = 0;
@ -2131,7 +2134,7 @@ void spu_interpreter_precise::CFLTU(SPUThread& spu, spu_opcode_t op)
void spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op) void spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO); fesetround(FE_TOWARDZERO);
const int scale = 155 - (op.i8 & 0xff); //unsigned immediate const int scale = 155 - (op.i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
@ -2154,7 +2157,7 @@ void spu_interpreter_precise::CSFLT(SPUThread& spu, spu_opcode_t op)
void spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op) void spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO); fesetround(FE_TOWARDZERO);
const int scale = 155 - (op.i8 & 0xff); //unsigned immediate const int scale = 155 - (op.i8 & 0xff); //unsigned immediate
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
@ -2177,7 +2180,7 @@ void spu_interpreter_precise::CUFLT(SPUThread& spu, spu_opcode_t op)
static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub) static void FMA(SPUThread& spu, spu_opcode_t op, bool neg, bool sub)
{ {
SetHostRoundingMode(FPSCR_RN_ZERO); fesetround(FE_TOWARDZERO);
for (int w = 0; w < 4; w++) for (int w = 0; w < 4; w++)
{ {
float a = spu.gpr[op.ra]._f[w]; float a = spu.gpr[op.ra]._f[w];