PPCAnalyst: Rework the store-safe logic
The output of instructions like fabsx and ps_sel is store-safe if and only if the relevant inputs are. The old code was always marking the output as store-safe if the output was a single, and never otherwise. Also, the old code was treating the output of psq_l/psq_lu as store-safe, which seems incorrect (if dequantization is disabled).
This commit is contained in:
parent
e2b5026652
commit
1845c5948d
|
@ -97,16 +97,16 @@ static std::array<GekkoOPTemplate, 13> table4 =
|
||||||
{{ //SUBOP10
|
{{ //SUBOP10
|
||||||
{0, Interpreter::ps_cmpu0, {"ps_cmpu0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
|
{0, Interpreter::ps_cmpu0, {"ps_cmpu0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{32, Interpreter::ps_cmpo0, {"ps_cmpo0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
|
{32, Interpreter::ps_cmpo0, {"ps_cmpo0", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{40, Interpreter::ps_neg, {"ps_neg", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{40, Interpreter::ps_neg, {"ps_neg", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{136, Interpreter::ps_nabs, {"ps_nabs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{136, Interpreter::ps_nabs, {"ps_nabs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{264, Interpreter::ps_abs, {"ps_abs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{264, Interpreter::ps_abs, {"ps_abs", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{64, Interpreter::ps_cmpu1, {"ps_cmpu1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
|
{64, Interpreter::ps_cmpu1, {"ps_cmpu1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{72, Interpreter::ps_mr, {"ps_mr", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{72, Interpreter::ps_mr, {"ps_mr", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{96, Interpreter::ps_cmpo1, {"ps_cmpo1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
|
{96, Interpreter::ps_cmpo1, {"ps_cmpo1", OpType::PS, FL_IN_FLOAT_AB | FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{528, Interpreter::ps_merge00, {"ps_merge00", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{528, Interpreter::ps_merge00, {"ps_merge00", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{560, Interpreter::ps_merge01, {"ps_merge01", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{560, Interpreter::ps_merge01, {"ps_merge01", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{592, Interpreter::ps_merge10, {"ps_merge10", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{592, Interpreter::ps_merge10, {"ps_merge10", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{624, Interpreter::ps_merge11, {"ps_merge11", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{624, Interpreter::ps_merge11, {"ps_merge11", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_IN_FLOAT_AB_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
|
|
||||||
{1014, Interpreter::dcbz_l, {"dcbz_l", OpType::System, FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}},
|
{1014, Interpreter::dcbz_l, {"dcbz_l", OpType::System, FL_IN_A0B | FL_LOADSTORE, 1, 0, 0, 0}},
|
||||||
}};
|
}};
|
||||||
|
@ -122,7 +122,7 @@ static std::array<GekkoOPTemplate, 17> table4_2 =
|
||||||
{18, Interpreter::ps_div, {"ps_div", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}},
|
{18, Interpreter::ps_div, {"ps_div", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 17, 0, 0, 0}},
|
||||||
{20, Interpreter::ps_sub, {"ps_sub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{20, Interpreter::ps_sub, {"ps_sub", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{21, Interpreter::ps_add, {"ps_add", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{21, Interpreter::ps_add, {"ps_add", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{23, Interpreter::ps_sel, {"ps_sel", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{23, Interpreter::ps_sel, {"ps_sel", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{24, Interpreter::ps_res, {"ps_res", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{24, Interpreter::ps_res, {"ps_res", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{25, Interpreter::ps_mul, {"ps_mul", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{25, Interpreter::ps_mul, {"ps_mul", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{26, Interpreter::ps_rsqrte, {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}},
|
{26, Interpreter::ps_rsqrte, {"ps_rsqrte", OpType::PS, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 2, 0, 0, 0}},
|
||||||
|
@ -318,7 +318,7 @@ static std::array<GekkoOPTemplate, 9> table59 =
|
||||||
|
|
||||||
static std::array<GekkoOPTemplate, 15> table63 =
|
static std::array<GekkoOPTemplate, 15> table63 =
|
||||||
{{
|
{{
|
||||||
{264, Interpreter::fabsx, {"fabsx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{264, Interpreter::fabsx, {"fabsx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
|
|
||||||
// FIXME: fcmp modifies the FPRF flags, but if the flags are clobbered later,
|
// FIXME: fcmp modifies the FPRF flags, but if the flags are clobbered later,
|
||||||
// we don't actually need to calculate or store them here. So FL_READ_FPRF and FL_SET_FPRF is not
|
// we don't actually need to calculate or store them here. So FL_READ_FPRF and FL_SET_FPRF is not
|
||||||
|
@ -329,9 +329,9 @@ static std::array<GekkoOPTemplate, 15> table63 =
|
||||||
|
|
||||||
{14, Interpreter::fctiwx, {"fctiwx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{14, Interpreter::fctiwx, {"fctiwx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{15, Interpreter::fctiwzx, {"fctiwzx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{15, Interpreter::fctiwzx, {"fctiwzx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{72, Interpreter::fmrx, {"fmrx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{72, Interpreter::fmrx, {"fmrx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_IN_FLOAT_B_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{136, Interpreter::fnabsx, {"fnabsx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{136, Interpreter::fnabsx, {"fnabsx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_IN_FLOAT_B_BITEXACT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{40, Interpreter::fnegx, {"fnegx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{40, Interpreter::fnegx, {"fnegx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_IN_FLOAT_B_BITEXACT | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{12, Interpreter::frspx, {"frspx", OpType::DoubleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{12, Interpreter::frspx, {"frspx", OpType::DoubleFP, FL_OUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
|
|
||||||
{64, Interpreter::mcrfs, {"mcrfs", OpType::SystemFP, FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
|
{64, Interpreter::mcrfs, {"mcrfs", OpType::SystemFP, FL_SET_CRn | FL_USE_FPU | FL_READ_FPRF, 1, 0, 0, 0}},
|
||||||
|
@ -347,7 +347,7 @@ static std::array<GekkoOPTemplate, 10> table63_2 =
|
||||||
{18, Interpreter::fdivx, {"fdivx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}},
|
{18, Interpreter::fdivx, {"fdivx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 31, 0, 0, 0}},
|
||||||
{20, Interpreter::fsubx, {"fsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{20, Interpreter::fsubx, {"fsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{21, Interpreter::faddx, {"faddx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{21, Interpreter::faddx, {"faddx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AB | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{23, Interpreter::fselx, {"fselx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
{23, Interpreter::fselx, {"fselx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_IN_FLOAT_BC_BITEXACT | FL_RC_BIT_F | FL_USE_FPU, 1, 0, 0, 0}},
|
||||||
{25, Interpreter::fmulx, {"fmulx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{25, Interpreter::fmulx, {"fmulx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_AC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{26, Interpreter::frsqrtex, {"frsqrtex", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{26, Interpreter::frsqrtex, {"frsqrtex", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_B | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
{28, Interpreter::fmsubx, {"fmsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
{28, Interpreter::fmsubx, {"fmsubx", OpType::DoubleFP, FL_INOUT_FLOAT_D | FL_IN_FLOAT_ABC | FL_RC_BIT_F | FL_USE_FPU | FL_SET_FPRF, 1, 0, 0, 0}},
|
||||||
|
|
|
@ -971,37 +971,63 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock* block, CodeBuffer* buffer, std:
|
||||||
op.fprIsStoreSafe = fprIsStoreSafe;
|
op.fprIsStoreSafe = fprIsStoreSafe;
|
||||||
if (op.fregOut >= 0)
|
if (op.fregOut >= 0)
|
||||||
{
|
{
|
||||||
fprIsSingle[op.fregOut] = false;
|
|
||||||
fprIsDuplicated[op.fregOut] = false;
|
|
||||||
fprIsStoreSafe[op.fregOut] = false;
|
|
||||||
// Single, duplicated, and doesn't need PPC_FP.
|
|
||||||
if (op.opinfo->type == OpType::SingleFP)
|
if (op.opinfo->type == OpType::SingleFP)
|
||||||
{
|
{
|
||||||
fprIsSingle[op.fregOut] = true;
|
fprIsSingle[op.fregOut] = true;
|
||||||
fprIsDuplicated[op.fregOut] = true;
|
fprIsDuplicated[op.fregOut] = true;
|
||||||
fprIsStoreSafe[op.fregOut] = true;
|
|
||||||
}
|
}
|
||||||
// Single and duplicated, but might be a denormal (not safe to skip PPC_FP).
|
else if (!strncmp(op.opinfo->opname, "lfs", 3))
|
||||||
// TODO: if we go directly from a load to store, skip conversion entirely?
|
|
||||||
// TODO: if we go directly from a load to a float instruction, and the value isn't used
|
|
||||||
// for anything else, we can skip PPC_FP on a load too.
|
|
||||||
if (!strncmp(op.opinfo->opname, "lfs", 3))
|
|
||||||
{
|
{
|
||||||
fprIsSingle[op.fregOut] = true;
|
fprIsSingle[op.fregOut] = true;
|
||||||
fprIsDuplicated[op.fregOut] = true;
|
fprIsDuplicated[op.fregOut] = true;
|
||||||
}
|
}
|
||||||
// Paired are still floats, but the top/bottom halves may differ.
|
else if (op.opinfo->type == OpType::PS || op.opinfo->type == OpType::LoadPS)
|
||||||
if (op.opinfo->type == OpType::PS || op.opinfo->type == OpType::LoadPS)
|
|
||||||
{
|
{
|
||||||
fprIsSingle[op.fregOut] = true;
|
fprIsSingle[op.fregOut] = true;
|
||||||
fprIsStoreSafe[op.fregOut] = true;
|
fprIsDuplicated[op.fregOut] = false;
|
||||||
}
|
}
|
||||||
// Careful: changing the float mode in a block breaks this optimization, since
|
else
|
||||||
// a previous float op might have had had FTZ off while the later store has FTZ
|
{
|
||||||
// on. So, discard all information we have.
|
fprIsSingle[op.fregOut] = false;
|
||||||
|
fprIsDuplicated[op.fregOut] = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (!strncmp(op.opinfo->opname, "mtfs", 4))
|
if (!strncmp(op.opinfo->opname, "mtfs", 4))
|
||||||
|
{
|
||||||
|
// Careful: changing the float mode in a block breaks the store-safe optimization,
|
||||||
|
// since a previous float op might have had FTZ off while the later store has FTZ on.
|
||||||
|
// So, discard all information we have.
|
||||||
fprIsStoreSafe = BitSet32(0);
|
fprIsStoreSafe = BitSet32(0);
|
||||||
}
|
}
|
||||||
|
else if (op.opinfo->flags &
|
||||||
|
(FL_IN_FLOAT_A_BITEXACT | FL_IN_FLOAT_B_BITEXACT | FL_IN_FLOAT_C_BITEXACT))
|
||||||
|
{
|
||||||
|
// If the instruction copies bits between registers (without flushing denormals to zero
|
||||||
|
// or turning SNaN into QNaN), the output is store-safe if the inputs are.
|
||||||
|
|
||||||
|
BitSet32 bitexact_inputs;
|
||||||
|
if (op.opinfo->flags & FL_IN_FLOAT_A_BITEXACT)
|
||||||
|
bitexact_inputs[code->inst.FA] = true;
|
||||||
|
if (op.opinfo->flags & FL_IN_FLOAT_B_BITEXACT)
|
||||||
|
bitexact_inputs[code->inst.FB] = true;
|
||||||
|
if (op.opinfo->flags & FL_IN_FLOAT_C_BITEXACT)
|
||||||
|
bitexact_inputs[code->inst.FC] = true;
|
||||||
|
|
||||||
|
fprIsStoreSafe[op.fregOut] = (fprIsStoreSafe & bitexact_inputs) == bitexact_inputs;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Other FPU instructions are store-safe if they perform a single-precision
|
||||||
|
// arithmetic operation.
|
||||||
|
|
||||||
|
// TODO: if we go directly from a load to store, skip conversion entirely?
|
||||||
|
// TODO: if we go directly from a load to a float instruction, and the value isn't used
|
||||||
|
// for anything else, we can use fast single -> double conversion after the load.
|
||||||
|
|
||||||
|
fprIsStoreSafe[op.fregOut] =
|
||||||
|
(op.opinfo->type == OpType::SingleFP || op.opinfo->type == OpType::PS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (op.opinfo->type == OpType::StorePS || op.opinfo->type == OpType::LoadPS)
|
if (op.opinfo->type == OpType::StorePS || op.opinfo->type == OpType::LoadPS)
|
||||||
{
|
{
|
||||||
|
|
|
@ -61,8 +61,9 @@ struct CodeOp // 16B
|
||||||
// instruction)
|
// instruction)
|
||||||
BitSet32 fprIsDuplicated;
|
BitSet32 fprIsDuplicated;
|
||||||
// whether an fpr is the output of a single-precision arithmetic instruction, i.e. whether we can
|
// whether an fpr is the output of a single-precision arithmetic instruction, i.e. whether we can
|
||||||
// safely
|
// convert between single and double formats by just using the host machine's instruction for it.
|
||||||
// skip PPC_FP.
|
// (The reason why we can't always do this is because some games rely on the exact bits of
|
||||||
|
// denormals and SNaNs being preserved as long as no arithmetic operation is performed on them.)
|
||||||
BitSet32 fprIsStoreSafe;
|
BitSet32 fprIsStoreSafe;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -59,6 +59,11 @@ enum
|
||||||
FL_OUT_FLOAT_D = (1 << 28), // frD is used as a destination.
|
FL_OUT_FLOAT_D = (1 << 28), // frD is used as a destination.
|
||||||
// Used in the case of double ops (they don't modify the top half of the output)
|
// Used in the case of double ops (they don't modify the top half of the output)
|
||||||
FL_INOUT_FLOAT_D = FL_IN_FLOAT_D | FL_OUT_FLOAT_D,
|
FL_INOUT_FLOAT_D = FL_IN_FLOAT_D | FL_OUT_FLOAT_D,
|
||||||
|
FL_IN_FLOAT_A_BITEXACT = (1 << 29), // The output is based on the exact bits in frA.
|
||||||
|
FL_IN_FLOAT_B_BITEXACT = (1 << 30), // The output is based on the exact bits in frB.
|
||||||
|
FL_IN_FLOAT_C_BITEXACT = (1 << 31), // The output is based on the exact bits in frC.
|
||||||
|
FL_IN_FLOAT_AB_BITEXACT = FL_IN_FLOAT_A_BITEXACT | FL_IN_FLOAT_B_BITEXACT,
|
||||||
|
FL_IN_FLOAT_BC_BITEXACT = FL_IN_FLOAT_B_BITEXACT | FL_IN_FLOAT_C_BITEXACT,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class OpType
|
enum class OpType
|
||||||
|
|
Loading…
Reference in New Issue