Add separate VMX/fpu mxcsr
Add support for constant operands for most fpu instructions Remove constant folding for most fpu cpde half float
This commit is contained in:
parent
3185b0ac9c
commit
968f656d96
|
@ -692,6 +692,12 @@ void X64Backend::InitializeBackendContext(void* ctx) {
|
|||
X64BackendContext* bctx = reinterpret_cast<X64BackendContext*>(
|
||||
reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext));
|
||||
bctx->ResolveFunction_Ptr = reinterpret_cast<void*>(&ResolveFunction);
|
||||
bctx->mxcsr_fpu =
|
||||
DEFAULT_FPU_MXCSR; // idk if this is right, check on rgh what the
|
||||
// rounding on ppc is at startup
|
||||
bctx->mxcsr_vmx = DEFAULT_VMX_MXCSR;
|
||||
bctx->flags = 0;
|
||||
// https://media.discordapp.net/attachments/440280035056943104/1000765256643125308/unknown.png
|
||||
bctx->Ox1000 = 0x1000;
|
||||
}
|
||||
} // namespace x64
|
||||
|
|
|
@ -37,9 +37,17 @@ typedef void (*ResolveFunctionThunk)();
|
|||
// negatively index the membase reg)
|
||||
struct X64BackendContext {
|
||||
void* ResolveFunction_Ptr; // cached pointer to resolvefunction
|
||||
unsigned int mxcsr_fpu; //currently, the way we implement rounding mode affects both vmx and the fpu
|
||||
unsigned int mxcsr_vmx;
|
||||
unsigned int flags; //bit 0 = 0 if mxcsr is fpu, else it is vmx
|
||||
unsigned int Ox1000; // constant 0x1000 so we can shrink each tail emitted
|
||||
// add of it by... 2 bytes lol
|
||||
};
|
||||
constexpr unsigned int DEFAULT_VMX_MXCSR =
|
||||
0x8000 | // flush to zero
|
||||
0x0040 | (_MM_MASK_MASK); // default rounding mode for vmx
|
||||
|
||||
constexpr unsigned int DEFAULT_FPU_MXCSR = 0x1F80;
|
||||
|
||||
class X64Backend : public Backend {
|
||||
public:
|
||||
|
|
|
@ -320,6 +320,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
|
|||
// Body.
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
ForgetMxcsrMode(); // at start of block, mxcsr mode is undefined
|
||||
|
||||
// Mark block labels.
|
||||
auto label = block->label_head;
|
||||
while (label) {
|
||||
|
@ -490,6 +492,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) {
|
|||
|
||||
void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
|
||||
assert_not_null(function);
|
||||
ForgetMxcsrMode();
|
||||
auto fn = static_cast<X64Function*>(function);
|
||||
// Resolve address to the function to call and store in rax.
|
||||
|
||||
|
@ -564,6 +567,7 @@ void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
|
|||
|
||||
void X64Emitter::CallIndirect(const hir::Instr* instr,
|
||||
const Xbyak::Reg64& reg) {
|
||||
ForgetMxcsrMode();
|
||||
// Check if return.
|
||||
if (instr->flags & hir::CALL_POSSIBLE_RETURN) {
|
||||
cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]);
|
||||
|
@ -617,6 +621,7 @@ uint64_t UndefinedCallExtern(void* raw_context, uint64_t function_ptr) {
|
|||
return 0;
|
||||
}
|
||||
void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
|
||||
ForgetMxcsrMode();
|
||||
bool undefined = true;
|
||||
if (function->behavior() == Function::Behavior::kBuiltin) {
|
||||
auto builtin_function = static_cast<const BuiltinFunction*>(function);
|
||||
|
@ -696,11 +701,13 @@ Xbyak::Reg64 X64Emitter::GetNativeParam(uint32_t param) {
|
|||
}
|
||||
|
||||
// Important: If you change these, you must update the thunks in x64_backend.cc!
|
||||
Xbyak::Reg64 X64Emitter::GetContextReg() { return rsi; }
|
||||
Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdi; }
|
||||
Xbyak::Reg64 X64Emitter::GetContextReg() const { return rsi; }
|
||||
Xbyak::Reg64 X64Emitter::GetMembaseReg() const { return rdi; }
|
||||
|
||||
void X64Emitter::ReloadMembase() {
|
||||
mov(GetMembaseReg(), qword[GetContextReg() + 8]); // membase
|
||||
mov(GetMembaseReg(),
|
||||
qword[GetContextReg() +
|
||||
offsetof(ppc::PPCContext, virtual_membase)]); // membase
|
||||
}
|
||||
|
||||
// Len Assembly Byte Sequence
|
||||
|
@ -917,7 +924,7 @@ static const vec128_t xmm_consts[] = {
|
|||
/* XMMQNaN */ vec128i(0x7FC00000u),
|
||||
/* XMMInt127 */ vec128i(0x7Fu),
|
||||
/* XMM2To32 */ vec128f(0x1.0p32f),
|
||||
/* xmminf */ vec128i(0x7f800000),
|
||||
/* XMMFloatInf */ vec128i(0x7f800000),
|
||||
|
||||
/* XMMIntsToBytes*/
|
||||
v128_setr_bytes(0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
|
||||
|
@ -938,9 +945,7 @@ static const vec128_t xmm_consts[] = {
|
|||
/*XMMVSRShlByteshuf*/
|
||||
v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80),
|
||||
// XMMVSRMask
|
||||
vec128b(1)
|
||||
|
||||
};
|
||||
vec128b(1)};
|
||||
|
||||
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
|
||||
for (auto& vec : xmm_consts) {
|
||||
|
@ -1347,7 +1352,7 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) {
|
|||
|
||||
return SimdDomain::DONTCARE;
|
||||
}
|
||||
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) {
|
||||
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
|
||||
/*
|
||||
index context ptr negatively to get to backend ctx field
|
||||
*/
|
||||
|
@ -1368,6 +1373,93 @@ Xbyak::Label& X64Emitter::NewCachedLabel() {
|
|||
label_cache_.push_back(tmp);
|
||||
return *tmp;
|
||||
}
|
||||
|
||||
template<bool switching_to_fpu>
|
||||
static void ChangeMxcsrModeDynamicHelper(X64Emitter& e) {
|
||||
auto flags = e.GetBackendFlagsPtr();
|
||||
if (switching_to_fpu) {
|
||||
e.btr(flags, 0); // bit 0 set to 0 = is fpu mode
|
||||
} else {
|
||||
e.bts(flags, 0); // bit 0 set to 1 = is vmx mode
|
||||
}
|
||||
Xbyak::Label& come_back = e.NewCachedLabel();
|
||||
|
||||
Xbyak::Label& reload_bailout =
|
||||
e.AddToTail([&come_back](X64Emitter& e, Xbyak::Label& thislabel) {
|
||||
e.L(thislabel);
|
||||
if (switching_to_fpu) {
|
||||
e.LoadFpuMxcsrDirect();
|
||||
} else {
|
||||
e.LoadVmxMxcsrDirect();
|
||||
}
|
||||
e.jmp(come_back, X64Emitter::T_NEAR);
|
||||
});
|
||||
if (switching_to_fpu) {
|
||||
e.jc(reload_bailout,
|
||||
X64Emitter::T_NEAR); // if carry flag was set, we were VMX mxcsr mode.
|
||||
} else {
|
||||
e.jnc(reload_bailout,
|
||||
X64Emitter::T_NEAR); // if carry flag was set, we were VMX mxcsr mode.
|
||||
}
|
||||
e.L(come_back);
|
||||
}
|
||||
|
||||
bool X64Emitter::ChangeMxcsrMode(MXCSRMode new_mode, bool already_set) {
|
||||
if (new_mode == mxcsr_mode_) {
|
||||
return false;
|
||||
}
|
||||
assert_true(new_mode != MXCSRMode::Unknown);
|
||||
|
||||
if (mxcsr_mode_ == MXCSRMode::Unknown) {
|
||||
// check the mode dynamically
|
||||
mxcsr_mode_ = new_mode;
|
||||
if (!already_set) {
|
||||
if (new_mode == MXCSRMode::Fpu) {
|
||||
ChangeMxcsrModeDynamicHelper<true>(*this);
|
||||
} else if (new_mode == MXCSRMode::Vmx) {
|
||||
ChangeMxcsrModeDynamicHelper<false>(*this);
|
||||
} else {
|
||||
assert_unhandled_case(new_mode);
|
||||
}
|
||||
} else { //even if already set, we still need to update flags to reflect our mode
|
||||
if (new_mode == MXCSRMode::Fpu) {
|
||||
btr(GetBackendFlagsPtr(), 0);
|
||||
} else if (new_mode == MXCSRMode::Vmx) {
|
||||
bts(GetBackendFlagsPtr(), 0);
|
||||
} else {
|
||||
assert_unhandled_case(new_mode);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
mxcsr_mode_ = new_mode;
|
||||
if (!already_set) {
|
||||
if (new_mode == MXCSRMode::Fpu) {
|
||||
|
||||
LoadFpuMxcsrDirect();
|
||||
btr(GetBackendFlagsPtr(), 0);
|
||||
return true;
|
||||
} else if (new_mode == MXCSRMode::Vmx) {
|
||||
LoadVmxMxcsrDirect();
|
||||
bts(GetBackendFlagsPtr(), 0);
|
||||
return true;
|
||||
} else {
|
||||
assert_unhandled_case(new_mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void X64Emitter::LoadFpuMxcsrDirect() {
|
||||
vldmxcsr(GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_fpu)));
|
||||
}
|
||||
void X64Emitter::LoadVmxMxcsrDirect() {
|
||||
vldmxcsr(GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_vmx)));
|
||||
}
|
||||
Xbyak::Address X64Emitter::GetBackendFlagsPtr() const {
|
||||
Xbyak::Address pt = GetBackendCtxPtr(offsetof(X64BackendContext, flags));
|
||||
pt.setBit(32);
|
||||
return pt;
|
||||
}
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
|
|
|
@ -65,6 +65,12 @@ enum class SimdDomain : uint32_t {
|
|||
// CONFLICTING means its used in multiple domains)
|
||||
};
|
||||
|
||||
enum class MXCSRMode : uint32_t {
|
||||
Unknown,
|
||||
Fpu,
|
||||
Vmx
|
||||
};
|
||||
|
||||
static SimdDomain PickDomain2(SimdDomain dom1, SimdDomain dom2) {
|
||||
if (dom1 == dom2) {
|
||||
return dom1;
|
||||
|
@ -283,8 +289,8 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
|
||||
Xbyak::Reg64 GetNativeParam(uint32_t param);
|
||||
|
||||
Xbyak::Reg64 GetContextReg();
|
||||
Xbyak::Reg64 GetMembaseReg();
|
||||
Xbyak::Reg64 GetContextReg() const;
|
||||
Xbyak::Reg64 GetMembaseReg() const;
|
||||
bool CanUseMembaseLow32As0() const { return may_use_membase32_as_zero_reg_; }
|
||||
void ReloadMembase();
|
||||
|
||||
|
@ -295,7 +301,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
void MovMem64(const Xbyak::RegExp& addr, uint64_t v);
|
||||
|
||||
Xbyak::Address GetXmmConstPtr(XmmConst id);
|
||||
Xbyak::Address GetBackendCtxPtr(int offset_in_x64backendctx);
|
||||
Xbyak::Address GetBackendCtxPtr(int offset_in_x64backendctx) const;
|
||||
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, float v);
|
||||
void LoadConstantXmm(Xbyak::Xmm dest, double v);
|
||||
|
@ -304,6 +310,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
Xbyak::Address StashConstantXmm(int index, float v);
|
||||
Xbyak::Address StashConstantXmm(int index, double v);
|
||||
Xbyak::Address StashConstantXmm(int index, const vec128_t& v);
|
||||
Xbyak::Address GetBackendFlagsPtr() const;
|
||||
void* FindByteConstantOffset(unsigned bytevalue);
|
||||
void* FindWordConstantOffset(unsigned wordvalue);
|
||||
void* FindDwordConstantOffset(unsigned bytevalue);
|
||||
|
@ -319,6 +326,16 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
size_t stack_size() const { return stack_size_; }
|
||||
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
|
||||
|
||||
void ForgetMxcsrMode() {
|
||||
mxcsr_mode_ = MXCSRMode::Unknown;
|
||||
}
|
||||
/*
|
||||
returns true if had to load mxcsr. DOT_PRODUCT can use this to skip clearing the overflow flag, as it will never be set in the vmx fpscr
|
||||
*/
|
||||
bool ChangeMxcsrMode(MXCSRMode new_mode, bool already_set=false);//already_set means that the caller already did vldmxcsr, used for SET_ROUNDING_MODE
|
||||
|
||||
void LoadFpuMxcsrDirect(); //unsafe, does not change mxcsr_mode_
|
||||
void LoadVmxMxcsrDirect(); //unsafe, does not change mxcsr_mode_
|
||||
protected:
|
||||
void* Emplace(const EmitFunctionInfo& func_info,
|
||||
GuestFunction* function = nullptr);
|
||||
|
@ -359,6 +376,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
std::vector<Xbyak::Label*>
|
||||
label_cache_; // for creating labels that need to be referenced much
|
||||
// later by tail emitters
|
||||
MXCSRMode mxcsr_mode_ = MXCSRMode::Unknown;
|
||||
};
|
||||
|
||||
} // namespace x64
|
||||
|
|
|
@ -616,7 +616,31 @@ struct Sequence {
|
|||
}
|
||||
}
|
||||
};
|
||||
template <typename T>
|
||||
static Xmm GetInputRegOrConstant(X64Emitter& e, const T& input,
|
||||
Xmm xmm_to_use_if_const) {
|
||||
if (input.is_constant) {
|
||||
using constant_type = std::remove_reference_t<decltype(input.constant())>;
|
||||
|
||||
if constexpr (std::is_integral_v<constant_type>) {
|
||||
vec128_t input_constant = vec128b(0);
|
||||
if constexpr (sizeof(constant_type) == 4) {
|
||||
input_constant.i32[0] = input.constant();
|
||||
|
||||
} else if constexpr (sizeof(constant_type) == 8) {
|
||||
input_constant.low = input.constant();
|
||||
} else {
|
||||
assert_unhandled_case(sizeof(constant_type));
|
||||
}
|
||||
e.LoadConstantXmm(xmm_to_use_if_const, input_constant);
|
||||
} else {
|
||||
e.LoadConstantXmm(xmm_to_use_if_const, input.constant());
|
||||
}
|
||||
return xmm_to_use_if_const;
|
||||
} else {
|
||||
return input;
|
||||
}
|
||||
}
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
|
|
|
@ -257,6 +257,7 @@ struct CALL_TRUE_I8
|
|||
e.jz(skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.L(skip);
|
||||
e.ForgetMxcsrMode();
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_I16
|
||||
|
@ -268,6 +269,7 @@ struct CALL_TRUE_I16
|
|||
e.jz(skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.L(skip);
|
||||
e.ForgetMxcsrMode();
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_I32
|
||||
|
@ -279,6 +281,7 @@ struct CALL_TRUE_I32
|
|||
e.jz(skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.L(skip);
|
||||
e.ForgetMxcsrMode();
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_I64
|
||||
|
@ -290,6 +293,7 @@ struct CALL_TRUE_I64
|
|||
e.jz(skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.L(skip);
|
||||
e.ForgetMxcsrMode();
|
||||
}
|
||||
};
|
||||
struct CALL_TRUE_F32
|
||||
|
@ -301,6 +305,7 @@ struct CALL_TRUE_F32
|
|||
e.jz(skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.L(skip);
|
||||
e.ForgetMxcsrMode();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -313,6 +318,7 @@ struct CALL_TRUE_F64
|
|||
e.jz(skip);
|
||||
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
|
||||
e.L(skip);
|
||||
e.ForgetMxcsrMode();
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_CALL_TRUE, CALL_TRUE_I8, CALL_TRUE_I16,
|
||||
|
@ -326,6 +332,7 @@ struct CALL_INDIRECT
|
|||
: Sequence<CALL_INDIRECT, I<OPCODE_CALL_INDIRECT, VoidOp, I64Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.CallIndirect(i.instr, i.src1);
|
||||
e.ForgetMxcsrMode();
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT, CALL_INDIRECT);
|
||||
|
|
|
@ -16,7 +16,13 @@
|
|||
|
||||
// For OPCODE_PACK/OPCODE_UNPACK
|
||||
#include "third_party/half/include/half.hpp"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/cpu/backend/x64/x64_stack_layout.h"
|
||||
|
||||
DEFINE_bool(use_extended_range_half, true,
|
||||
"Emulate extended range half-precision, may be slower on games "
|
||||
"that use it heavily",
|
||||
"CPU");
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace backend {
|
||||
|
@ -31,6 +37,8 @@ struct VECTOR_CONVERT_I2F
|
|||
: Sequence<VECTOR_CONVERT_I2F,
|
||||
I<OPCODE_VECTOR_CONVERT_I2F, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3);
|
||||
// flags = ARITHMETIC_UNSIGNED
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
// Round manually to (1.stored mantissa bits * 2^31) or to 2^32 to the
|
||||
|
@ -46,8 +54,8 @@ struct VECTOR_CONVERT_I2F
|
|||
// be 4294967296.0f.
|
||||
// xmm0 = src + 0b01111111 + ((src >> 8) & 1)
|
||||
// (xmm1 also used to launch reg + mem early and to require it late)
|
||||
e.vpaddd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMInt127));
|
||||
e.vpslld(e.xmm0, i.src1, 31 - 8);
|
||||
e.vpaddd(e.xmm1, src1, e.GetXmmConstPtr(XMMInt127));
|
||||
e.vpslld(e.xmm0, src1, 31 - 8);
|
||||
e.vpsrld(e.xmm0, e.xmm0, 31);
|
||||
e.vpaddd(e.xmm0, e.xmm0, e.xmm1);
|
||||
// xmm0 = (0xFF800000 | 23 explicit mantissa bits), or 0 if overflowed
|
||||
|
@ -63,13 +71,13 @@ struct VECTOR_CONVERT_I2F
|
|||
|
||||
// Convert from signed integer to float.
|
||||
// xmm1 = [0x00000000, 0x7FFFFFFF] case result
|
||||
e.vcvtdq2ps(e.xmm1, i.src1);
|
||||
e.vcvtdq2ps(e.xmm1, src1);
|
||||
|
||||
// Merge the two ways depending on whether the number is >= 0x80000000
|
||||
// (has high bit set).
|
||||
e.vblendvps(i.dest, e.xmm1, e.xmm0, i.src1);
|
||||
e.vblendvps(i.dest, e.xmm1, e.xmm0, src1);
|
||||
} else {
|
||||
e.vcvtdq2ps(i.dest, i.src1);
|
||||
e.vcvtdq2ps(i.dest, src1);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -82,9 +90,11 @@ struct VECTOR_CONVERT_F2I
|
|||
: Sequence<VECTOR_CONVERT_F2I,
|
||||
I<OPCODE_VECTOR_CONVERT_F2I, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3);
|
||||
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
||||
// clamp to min 0
|
||||
e.vmaxps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMZero));
|
||||
e.vmaxps(e.xmm0, src1, e.GetXmmConstPtr(XMMZero));
|
||||
|
||||
// xmm1 = mask of values >= (unsigned)INT_MIN
|
||||
e.vcmpgeps(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS));
|
||||
|
@ -108,14 +118,14 @@ struct VECTOR_CONVERT_F2I
|
|||
e.vpor(i.dest, i.dest, e.xmm0);
|
||||
} else {
|
||||
// xmm2 = NaN mask
|
||||
e.vcmpunordps(e.xmm2, i.src1, i.src1);
|
||||
e.vcmpunordps(e.xmm2, src1, src1);
|
||||
|
||||
// convert packed floats to packed dwords
|
||||
e.vcvttps2dq(e.xmm0, i.src1);
|
||||
e.vcvttps2dq(e.xmm0, src1);
|
||||
|
||||
// (high bit) xmm1 = dest is indeterminate and i.src1 >= 0
|
||||
e.vpcmpeqd(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMIntMin));
|
||||
e.vpandn(e.xmm1, i.src1, e.xmm1);
|
||||
e.vpandn(e.xmm1, src1, e.xmm1);
|
||||
|
||||
// saturate positive values
|
||||
e.vblendvps(i.dest, e.xmm0, e.GetXmmConstPtr(XMMIntMax), e.xmm1);
|
||||
|
@ -131,6 +141,7 @@ struct VECTOR_DENORMFLUSH
|
|||
: Sequence<VECTOR_DENORMFLUSH,
|
||||
I<OPCODE_VECTOR_DENORMFLUSH, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
e.vxorps(e.xmm1, e.xmm1, e.xmm1); // 0.25 P0123
|
||||
|
||||
e.vandps(e.xmm0, i.src1,
|
||||
|
@ -352,6 +363,7 @@ struct VECTOR_COMPARE_EQ_V128
|
|||
e.vpcmpeqd(dest, src1, src2);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
e.vcmpeqps(dest, src1, src2);
|
||||
break;
|
||||
}
|
||||
|
@ -380,6 +392,7 @@ struct VECTOR_COMPARE_SGT_V128
|
|||
e.vpcmpgtd(dest, src1, src2);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
e.vcmpgtps(dest, src1, src2);
|
||||
break;
|
||||
}
|
||||
|
@ -414,6 +427,7 @@ struct VECTOR_COMPARE_SGE_V128
|
|||
e.vpor(dest, e.xmm0);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
e.vcmpgeps(dest, src1, src2);
|
||||
break;
|
||||
}
|
||||
|
@ -441,6 +455,7 @@ struct VECTOR_COMPARE_UGT_V128
|
|||
sign_addr = e.GetXmmConstPtr(XMMSignMaskI32);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskF32);
|
||||
break;
|
||||
default:
|
||||
|
@ -498,6 +513,7 @@ struct VECTOR_COMPARE_UGE_V128
|
|||
sign_addr = e.GetXmmConstPtr(XMMSignMaskI32);
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
sign_addr = e.GetXmmConstPtr(XMMSignMaskF32);
|
||||
break;
|
||||
}
|
||||
|
@ -620,6 +636,7 @@ struct VECTOR_ADD
|
|||
case FLOAT32_TYPE:
|
||||
assert_false(is_unsigned);
|
||||
assert_false(saturate);
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
e.vaddps(dest, src1, src2);
|
||||
break;
|
||||
default:
|
||||
|
@ -711,6 +728,7 @@ struct VECTOR_SUB
|
|||
}
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
e.vsubps(dest, src1, src2);
|
||||
break;
|
||||
default:
|
||||
|
@ -2003,6 +2021,7 @@ EMITTER_OPCODE_TABLE(OPCODE_SWIZZLE, SWIZZLE);
|
|||
// ============================================================================
|
||||
struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
switch (i.instr->flags & PACK_TYPE_MODE) {
|
||||
case PACK_TYPE_D3DCOLOR:
|
||||
EmitD3DCOLOR(e, i);
|
||||
|
@ -2062,9 +2081,14 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
|||
alignas(16) uint16_t b[8];
|
||||
_mm_store_ps(a, src1);
|
||||
std::memset(b, 0, sizeof(b));
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
b[7 - i] = half_float::detail::float2half<std::round_toward_zero>(a[i]);
|
||||
if (!cvars::use_extended_range_half) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
b[7 - i] = half_float::detail::float2half<std::round_toward_zero>(a[i]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
b[7 - i] = float_to_xenos_half(a[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return _mm_load_si128(reinterpret_cast<__m128i*>(b));
|
||||
|
@ -2074,7 +2098,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
|||
// http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx
|
||||
// dest = [(src1.x | src1.y), 0, 0, 0]
|
||||
|
||||
if (e.IsFeatureEnabled(kX64EmitF16C)) {
|
||||
if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
|
||||
Xmm src;
|
||||
if (i.src1.is_constant) {
|
||||
src = i.dest;
|
||||
|
@ -2101,10 +2125,15 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
|||
alignas(16) uint16_t b[8];
|
||||
_mm_store_ps(a, src1);
|
||||
std::memset(b, 0, sizeof(b));
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
b[7 - (i ^ 2)] =
|
||||
half_float::detail::float2half<std::round_toward_zero>(a[i]);
|
||||
if (!cvars::use_extended_range_half) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
b[7 - (i ^ 2)] =
|
||||
half_float::detail::float2half<std::round_toward_zero>(a[i]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
b[7 - (i ^ 2)] = float_to_xenos_half(a[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return _mm_load_si128(reinterpret_cast<__m128i*>(b));
|
||||
|
@ -2113,7 +2142,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
|
|||
assert_true(i.src2.value->IsConstantZero());
|
||||
// dest = [(src1.z | src1.w), (src1.x | src1.y), 0, 0]
|
||||
|
||||
if (e.IsFeatureEnabled(kX64EmitF16C)) {
|
||||
if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
|
||||
Xmm src;
|
||||
if (i.src1.is_constant) {
|
||||
src = i.dest;
|
||||
|
@ -2420,6 +2449,7 @@ EMITTER_OPCODE_TABLE(OPCODE_PACK, PACK);
|
|||
// ============================================================================
|
||||
struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
switch (i.instr->flags & PACK_TYPE_MODE) {
|
||||
case PACK_TYPE_D3DCOLOR:
|
||||
EmitD3DCOLOR(e, i);
|
||||
|
@ -2478,10 +2508,15 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
|||
alignas(16) float b[4];
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
b[i] = half_float::detail::half2float(a[VEC128_W(6 + i)]);
|
||||
if (!cvars::use_extended_range_half) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
b[i] = half_float::detail::half2float(a[VEC128_W(6 + i)]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
b[i] = xenos_half_to_float(a[VEC128_W(6 + i)]);
|
||||
}
|
||||
}
|
||||
|
||||
// Constants, or something
|
||||
b[2] = 0.f;
|
||||
b[3] = 1.f;
|
||||
|
@ -2501,7 +2536,9 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
|||
// Also zero out the high end.
|
||||
// TODO(benvanik): special case constant unpacks that just get 0/1/etc.
|
||||
|
||||
if (e.IsFeatureEnabled(kX64EmitF16C)) {
|
||||
if (e.IsFeatureEnabled(kX64EmitF16C) &&
|
||||
!cvars::use_extended_range_half) { // todo: can use cvtph and bit logic
|
||||
// to implement
|
||||
Xmm src;
|
||||
if (i.src1.is_constant) {
|
||||
src = i.dest;
|
||||
|
@ -2534,16 +2571,21 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
|||
alignas(16) uint16_t a[8];
|
||||
alignas(16) float b[4];
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
b[i] = half_float::detail::half2float(a[VEC128_W(4 + i)]);
|
||||
if (!cvars::use_extended_range_half) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
b[i] = half_float::detail::half2float(a[VEC128_W(4 + i)]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
b[i] = xenos_half_to_float(a[VEC128_W(4 + i)]);
|
||||
}
|
||||
}
|
||||
|
||||
return _mm_load_ps(b);
|
||||
}
|
||||
static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) {
|
||||
// src = [(dest.x | dest.y), (dest.z | dest.w), 0, 0]
|
||||
if (e.IsFeatureEnabled(kX64EmitF16C)) {
|
||||
if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
|
||||
Xmm src;
|
||||
if (i.src1.is_constant) {
|
||||
src = i.dest;
|
||||
|
@ -2805,6 +2847,32 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
|
|||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK);
|
||||
|
||||
struct SET_NJM_I8 : Sequence<SET_NJM_I8, I<OPCODE_SET_NJM, VoidOp, I8Op>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr_vmx = e.GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_vmx));
|
||||
|
||||
addr_vmx.setBit(32);
|
||||
if (i.src1.is_constant) {
|
||||
if (i.src1.constant() == 0) {
|
||||
// turn off daz/flush2z
|
||||
e.mov(addr_vmx, _MM_MASK_MASK);
|
||||
|
||||
} else {
|
||||
e.mov(addr_vmx, DEFAULT_VMX_MXCSR);
|
||||
}
|
||||
|
||||
} else {
|
||||
e.test(i.src1, i.src1);
|
||||
e.mov(e.edx, DEFAULT_VMX_MXCSR);
|
||||
e.mov(e.eax, _MM_MASK_MASK);
|
||||
|
||||
e.cmove(e.edx, e.eax);
|
||||
e.mov(addr_vmx, e.edx);
|
||||
}
|
||||
e.ChangeMxcsrMode(MXCSRMode::Vmx);
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_SET_NJM, SET_NJM_I8);
|
||||
} // namespace x64
|
||||
} // namespace backend
|
||||
} // namespace cpu
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -20,6 +20,9 @@
|
|||
DEFINE_bool(inline_mmio_access, true, "Inline constant MMIO loads and stores.",
|
||||
"CPU");
|
||||
|
||||
DEFINE_bool(permit_float_constant_evaluation, false, "Allow float constant evaluation, may produce incorrect results and break games math",
|
||||
"CPU");
|
||||
|
||||
namespace xe {
|
||||
namespace cpu {
|
||||
namespace compiler {
|
||||
|
@ -68,8 +71,24 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
|||
result = false;
|
||||
auto block = builder->first_block();
|
||||
while (block) {
|
||||
auto i = block->instr_head;
|
||||
while (i) {
|
||||
for (auto i = block->instr_head; i; i = i->next) {
|
||||
if (((i->opcode->flags & OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING) != 0) &&
|
||||
!cvars::permit_float_constant_evaluation) {
|
||||
continue;
|
||||
}
|
||||
bool might_be_floatop = false;
|
||||
|
||||
i->VisitValueOperands(
|
||||
[&might_be_floatop](Value* current_opnd, uint32_t opnd_index) {
|
||||
might_be_floatop |= current_opnd->MaybeFloaty();
|
||||
});
|
||||
if (i->dest) {
|
||||
might_be_floatop |= i->dest->MaybeFloaty();
|
||||
}
|
||||
|
||||
bool should_skip_because_of_float =
|
||||
might_be_floatop && !cvars::permit_float_constant_evaluation;
|
||||
|
||||
auto v = i->dest;
|
||||
switch (i->opcode->num) {
|
||||
case OPCODE_DEBUG_BREAK_TRUE:
|
||||
|
@ -452,7 +471,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
|||
break;
|
||||
|
||||
case OPCODE_ADD:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant() &&
|
||||
!should_skip_because_of_float) {
|
||||
v->set_from(i->src1.value);
|
||||
v->Add(i->src2.value);
|
||||
i->Remove();
|
||||
|
@ -481,7 +501,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
|||
}
|
||||
break;
|
||||
case OPCODE_SUB:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant() &&
|
||||
!should_skip_because_of_float) {
|
||||
v->set_from(i->src1.value);
|
||||
v->Sub(i->src2.value);
|
||||
i->Remove();
|
||||
|
@ -489,32 +510,34 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
|||
}
|
||||
break;
|
||||
case OPCODE_MUL:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->Mul(i->src2.value);
|
||||
i->Remove();
|
||||
result = true;
|
||||
} else if (i->src1.value->IsConstant() ||
|
||||
i->src2.value->IsConstant()) {
|
||||
// Reorder the sources to make things simpler.
|
||||
// s1 = non-const, s2 = const
|
||||
auto s1 =
|
||||
i->src1.value->IsConstant() ? i->src2.value : i->src1.value;
|
||||
auto s2 =
|
||||
i->src1.value->IsConstant() ? i->src1.value : i->src2.value;
|
||||
|
||||
// Multiplication by one = no-op
|
||||
if (s2->type != VEC128_TYPE && s2->IsConstantOne()) {
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(s1);
|
||||
if (!should_skip_because_of_float) {
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->Mul(i->src2.value);
|
||||
i->Remove();
|
||||
result = true;
|
||||
} else if (s2->type == VEC128_TYPE) {
|
||||
auto& c = s2->constant;
|
||||
if (c.v128.f32[0] == 1.f && c.v128.f32[1] == 1.f &&
|
||||
c.v128.f32[2] == 1.f && c.v128.f32[3] == 1.f) {
|
||||
} else if (i->src1.value->IsConstant() ||
|
||||
i->src2.value->IsConstant()) {
|
||||
// Reorder the sources to make things simpler.
|
||||
// s1 = non-const, s2 = const
|
||||
auto s1 =
|
||||
i->src1.value->IsConstant() ? i->src2.value : i->src1.value;
|
||||
auto s2 =
|
||||
i->src1.value->IsConstant() ? i->src1.value : i->src2.value;
|
||||
|
||||
// Multiplication by one = no-op
|
||||
if (s2->type != VEC128_TYPE && s2->IsConstantOne()) {
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(s1);
|
||||
result = true;
|
||||
} else if (s2->type == VEC128_TYPE) {
|
||||
auto& c = s2->constant;
|
||||
if (c.v128.f32[0] == 1.f && c.v128.f32[1] == 1.f &&
|
||||
c.v128.f32[2] == 1.f && c.v128.f32[3] == 1.f) {
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(s1);
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -528,75 +551,32 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
|||
}
|
||||
break;
|
||||
case OPCODE_DIV:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->Div(i->src2.value, (i->flags & ARITHMETIC_UNSIGNED) != 0);
|
||||
i->Remove();
|
||||
result = true;
|
||||
} else if (i->src2.value->IsConstant()) {
|
||||
// Division by one = no-op.
|
||||
Value* src1 = i->src1.value;
|
||||
if (i->src2.value->type != VEC128_TYPE &&
|
||||
i->src2.value->IsConstantOne()) {
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(src1);
|
||||
if (!should_skip_because_of_float) {
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->Div(i->src2.value, (i->flags & ARITHMETIC_UNSIGNED) != 0);
|
||||
i->Remove();
|
||||
result = true;
|
||||
} else if (i->src2.value->type == VEC128_TYPE) {
|
||||
auto& c = i->src2.value->constant;
|
||||
if (c.v128.f32[0] == 1.f && c.v128.f32[1] == 1.f &&
|
||||
c.v128.f32[2] == 1.f && c.v128.f32[3] == 1.f) {
|
||||
} else if (i->src2.value->IsConstant()) {
|
||||
// Division by one = no-op.
|
||||
Value* src1 = i->src1.value;
|
||||
if (i->src2.value->type != VEC128_TYPE &&
|
||||
i->src2.value->IsConstantOne()) {
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(src1);
|
||||
result = true;
|
||||
} else if (i->src2.value->type == VEC128_TYPE) {
|
||||
auto& c = i->src2.value->constant;
|
||||
if (c.v128.f32[0] == 1.f && c.v128.f32[1] == 1.f &&
|
||||
c.v128.f32[2] == 1.f && c.v128.f32[3] == 1.f) {
|
||||
i->Replace(&OPCODE_ASSIGN_info, 0);
|
||||
i->set_src1(src1);
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OPCODE_MUL_ADD:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
if (i->src3.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
Value::MulAdd(v, i->src1.value, i->src2.value, i->src3.value);
|
||||
i->Remove();
|
||||
result = true;
|
||||
} else {
|
||||
// Multiply part is constant.
|
||||
Value* mul = builder->AllocValue();
|
||||
mul->set_from(i->src1.value);
|
||||
mul->Mul(i->src2.value);
|
||||
|
||||
Value* add = i->src3.value;
|
||||
i->Replace(&OPCODE_ADD_info, 0);
|
||||
i->set_src1(mul);
|
||||
i->set_src2(add);
|
||||
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OPCODE_MUL_SUB:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
// Multiply part is constant.
|
||||
if (i->src3.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
Value::MulSub(v, i->src1.value, i->src2.value, i->src3.value);
|
||||
i->Remove();
|
||||
result = true;
|
||||
} else {
|
||||
// Multiply part is constant.
|
||||
Value* mul = builder->AllocValue();
|
||||
mul->set_from(i->src1.value);
|
||||
mul->Mul(i->src2.value);
|
||||
|
||||
Value* add = i->src3.value;
|
||||
i->Replace(&OPCODE_SUB_info, 0);
|
||||
i->set_src1(mul);
|
||||
i->set_src2(add);
|
||||
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case OPCODE_MAX:
|
||||
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
|
@ -925,7 +905,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
|||
result = true;
|
||||
}
|
||||
break;
|
||||
case OPCODE_VECTOR_DENORMFLUSH:
|
||||
case OPCODE_VECTOR_DENORMFLUSH: // this one is okay to constant
|
||||
// evaluate, since it is just bit math
|
||||
if (i->src1.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->DenormalFlush();
|
||||
|
@ -933,19 +914,10 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
|||
result = true;
|
||||
}
|
||||
break;
|
||||
case OPCODE_TO_SINGLE:
|
||||
if (i->src1.value->IsConstant()) {
|
||||
v->set_from(i->src1.value);
|
||||
v->ToSingle();
|
||||
i->Remove();
|
||||
result = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// Ignored.
|
||||
break;
|
||||
}
|
||||
i = i->next;
|
||||
}
|
||||
|
||||
block = block->next;
|
||||
|
|
|
@ -1287,7 +1287,11 @@ void HIRBuilder::SetRoundingMode(Value* value) {
|
|||
Instr* i = AppendInstr(OPCODE_SET_ROUNDING_MODE_info, 0);
|
||||
i->set_src1(value);
|
||||
}
|
||||
|
||||
void HIRBuilder::SetNJM(Value* value) {
|
||||
ASSERT_INTEGER_TYPE(value);
|
||||
Instr* i = AppendInstr(OPCODE_SET_NJM_info, 0);
|
||||
i->set_src1(value);
|
||||
}
|
||||
Value* HIRBuilder::Max(Value* value1, Value* value2) {
|
||||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
|
||||
|
@ -1632,7 +1636,7 @@ Value* HIRBuilder::Div(Value* value1, Value* value2,
|
|||
Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
|
||||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
ASSERT_TYPES_EQUAL(value1, value3);
|
||||
|
||||
#if 0
|
||||
bool c1 = value1->IsConstant();
|
||||
bool c2 = value2->IsConstant();
|
||||
if (c1 && c2) {
|
||||
|
@ -1640,7 +1644,7 @@ Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
|
|||
dest->Mul(value2);
|
||||
return Add(dest, value3);
|
||||
}
|
||||
|
||||
#endif
|
||||
Instr* i = AppendInstr(OPCODE_MUL_ADD_info, 0, AllocValue(value1->type));
|
||||
i->set_src1(value1);
|
||||
i->set_src2(value2);
|
||||
|
@ -1651,7 +1655,7 @@ Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
|
|||
Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) {
|
||||
ASSERT_TYPES_EQUAL(value1, value2);
|
||||
ASSERT_TYPES_EQUAL(value1, value3);
|
||||
|
||||
#if 0
|
||||
bool c1 = value1->IsConstant();
|
||||
bool c2 = value2->IsConstant();
|
||||
if (c1 && c2) {
|
||||
|
@ -1659,7 +1663,7 @@ Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) {
|
|||
dest->Mul(value2);
|
||||
return Sub(dest, value3);
|
||||
}
|
||||
|
||||
#endif
|
||||
Instr* i = AppendInstr(OPCODE_MUL_SUB_info, 0, AllocValue(value1->type));
|
||||
i->set_src1(value1);
|
||||
i->set_src2(value2);
|
||||
|
|
|
@ -264,7 +264,7 @@ class HIRBuilder {
|
|||
Value* new_value);
|
||||
Value* AtomicAdd(Value* address, Value* value);
|
||||
Value* AtomicSub(Value* address, Value* value);
|
||||
|
||||
void SetNJM(Value* value);
|
||||
protected:
|
||||
void DumpValue(StringBuffer* str, Value* value);
|
||||
void DumpOp(StringBuffer* str, OpcodeSignatureType sig_type, Instr::Op* op);
|
||||
|
|
|
@ -284,6 +284,7 @@ enum Opcode {
|
|||
OPCODE_TO_SINGLE, // i could not find a decent name to assign to this opcode,
|
||||
// as we already have OPCODE_ROUND. round double to float (
|
||||
// ppc "single" fpu instruction result rounding behavior )
|
||||
OPCODE_SET_NJM,
|
||||
__OPCODE_MAX_VALUE, // Keep at end.
|
||||
};
|
||||
|
||||
|
@ -295,6 +296,7 @@ enum OpcodeFlags {
|
|||
OPCODE_FLAG_IGNORE = (1 << 5),
|
||||
OPCODE_FLAG_HIDE = (1 << 6),
|
||||
OPCODE_FLAG_PAIRED_PREV = (1 << 7),
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING = (1 << 8)
|
||||
};
|
||||
|
||||
enum OpcodeSignatureType {
|
||||
|
|
|
@ -151,25 +151,25 @@ DEFINE_OPCODE(
|
|||
OPCODE_CONVERT,
|
||||
"convert",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_ROUND,
|
||||
"round",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_CONVERT_I2F,
|
||||
"vector_convert_i2f",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_VECTOR_CONVERT_F2I,
|
||||
"vector_convert_f2i",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOAD_VECTOR_SHL,
|
||||
|
@ -456,13 +456,13 @@ DEFINE_OPCODE(
|
|||
OPCODE_MUL_ADD,
|
||||
"mul_add",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_MUL_SUB,
|
||||
"mul_sub",
|
||||
OPCODE_SIG_V_V_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_NEG,
|
||||
|
@ -480,43 +480,43 @@ DEFINE_OPCODE(
|
|||
OPCODE_SQRT,
|
||||
"sqrt",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_RSQRT,
|
||||
"rsqrt",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_RECIP,
|
||||
"recip",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_POW2,
|
||||
"pow2",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_LOG2,
|
||||
"log2",
|
||||
OPCODE_SIG_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DOT_PRODUCT_3,
|
||||
"dot_product_3",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_DOT_PRODUCT_4,
|
||||
"dot_product_4",
|
||||
OPCODE_SIG_V_V_V,
|
||||
0)
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_AND,
|
||||
|
@ -685,5 +685,11 @@ DEFINE_OPCODE(
|
|||
OPCODE_TO_SINGLE,
|
||||
"to_single",
|
||||
OPCODE_SIG_V_V,
|
||||
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING
|
||||
)
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_SET_NJM,
|
||||
"set_njm",
|
||||
OPCODE_SIG_X_V,
|
||||
0
|
||||
)
|
|
@ -199,7 +199,7 @@ void Value::Truncate(TypeName target_type) {
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//WARNING: this does not handle rounding flags at all!
|
||||
void Value::Convert(TypeName target_type, RoundMode round_mode) {
|
||||
switch (type) {
|
||||
case FLOAT32_TYPE:
|
||||
|
@ -401,7 +401,7 @@ void Value::MulHi(Value* other, bool is_unsigned) {
|
|||
32);
|
||||
}
|
||||
break;
|
||||
case INT64_TYPE:
|
||||
case INT64_TYPE: {
|
||||
#if XE_COMPILER_MSVC
|
||||
if (is_unsigned) {
|
||||
constant.i64 = __umulh(constant.i64, other->constant.i64);
|
||||
|
@ -409,17 +409,19 @@ void Value::MulHi(Value* other, bool is_unsigned) {
|
|||
constant.i64 = __mulh(constant.i64, other->constant.i64);
|
||||
}
|
||||
#else
|
||||
unsigned __int128 product;
|
||||
if (is_unsigned) {
|
||||
constant.i64 = static_cast<uint64_t>(
|
||||
static_cast<unsigned __int128>(constant.i64) *
|
||||
static_cast<unsigned __int128>(other->constant.i64));
|
||||
product = static_cast<unsigned __int128>(constant.i64) *
|
||||
static_cast<unsigned __int128>(other->constant.i64);
|
||||
} else {
|
||||
constant.i64 =
|
||||
static_cast<uint64_t>(static_cast<__int128>(constant.i64) *
|
||||
static_cast<__int128>(other->constant.i64));
|
||||
product = static_cast<unsigned __int128>(
|
||||
static_cast<__int128>(constant.i64) *
|
||||
static_cast<__int128>(other->constant.i64));
|
||||
}
|
||||
constant.i64 = static_cast<int64_t>(product >> 64);
|
||||
#endif // XE_COMPILER_MSVC
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert_unhandled_case(type);
|
||||
break;
|
||||
|
@ -495,52 +497,6 @@ void Value::Max(Value* other) {
|
|||
}
|
||||
}
|
||||
|
||||
void Value::MulAdd(Value* dest, Value* value1, Value* value2, Value* value3) {
|
||||
switch (dest->type) {
|
||||
case VEC128_TYPE:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
dest->constant.v128.f32[i] =
|
||||
(value1->constant.v128.f32[i] * value2->constant.v128.f32[i]) +
|
||||
value3->constant.v128.f32[i];
|
||||
}
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
dest->constant.f32 =
|
||||
(value1->constant.f32 * value2->constant.f32) + value3->constant.f32;
|
||||
break;
|
||||
case FLOAT64_TYPE:
|
||||
dest->constant.f64 =
|
||||
(value1->constant.f64 * value2->constant.f64) + value3->constant.f64;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(dest->type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Value::MulSub(Value* dest, Value* value1, Value* value2, Value* value3) {
|
||||
switch (dest->type) {
|
||||
case VEC128_TYPE:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
dest->constant.v128.f32[i] =
|
||||
(value1->constant.v128.f32[i] * value2->constant.v128.f32[i]) -
|
||||
value3->constant.v128.f32[i];
|
||||
}
|
||||
break;
|
||||
case FLOAT32_TYPE:
|
||||
dest->constant.f32 =
|
||||
(value1->constant.f32 * value2->constant.f32) - value3->constant.f32;
|
||||
break;
|
||||
case FLOAT64_TYPE:
|
||||
dest->constant.f64 =
|
||||
(value1->constant.f64 * value2->constant.f64) - value3->constant.f64;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(dest->type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Value::Neg() {
|
||||
switch (type) {
|
||||
case INT8_TYPE:
|
||||
|
@ -1643,11 +1599,7 @@ void Value::DenormalFlush() {
|
|||
constant.v128.u32[i] = current_element;
|
||||
}
|
||||
}
|
||||
void Value::ToSingle() {
|
||||
assert_true(type == FLOAT64_TYPE);
|
||||
|
||||
constant.f64 = static_cast<double>(static_cast<float>(constant.f64));
|
||||
}
|
||||
void Value::CountLeadingZeros(const Value* other) {
|
||||
switch (other->type) {
|
||||
case INT8_TYPE:
|
||||
|
|
|
@ -563,8 +563,7 @@ class Value {
|
|||
void MulHi(Value* other, bool is_unsigned);
|
||||
void Div(Value* other, bool is_unsigned);
|
||||
void Max(Value* other);
|
||||
static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3);
|
||||
static void MulSub(Value* dest, Value* value1, Value* value2, Value* value3);
|
||||
|
||||
void Neg();
|
||||
void Abs();
|
||||
void Sqrt();
|
||||
|
@ -603,7 +602,6 @@ class Value {
|
|||
bool saturate);
|
||||
void ByteSwap();
|
||||
void DenormalFlush();
|
||||
void ToSingle();
|
||||
void CountLeadingZeros(const Value* other);
|
||||
bool Compare(Opcode opcode, Value* other);
|
||||
hir::Instr* GetDefSkipAssigns();
|
||||
|
@ -615,7 +613,10 @@ class Value {
|
|||
// returns true if every single use is as an operand to a single instruction
|
||||
// (add var2, var1, var1)
|
||||
bool AllUsesByOneInsn() const;
|
||||
|
||||
//the maybe is here because this includes vec128, which is untyped data that can be treated as float or int depending on the context
|
||||
bool MaybeFloaty() const {
|
||||
return type == FLOAT32_TYPE || type == FLOAT64_TYPE || type == VEC128_TYPE;
|
||||
}
|
||||
private:
|
||||
static bool CompareInt8(Opcode opcode, Value* a, Value* b);
|
||||
static bool CompareInt16(Opcode opcode, Value* a, Value* b);
|
||||
|
|
|
@ -364,7 +364,16 @@ int InstrEmit_mfvscr(PPCHIRBuilder& f, const InstrData& i) {
|
|||
|
||||
int InstrEmit_mtvscr(PPCHIRBuilder& f, const InstrData& i) {
|
||||
// is this the right format?
|
||||
//todo: what mtvscr does with the unused bits is implementation defined, figure out what it does
|
||||
|
||||
|
||||
Value* v = f.LoadVR(i.VX128_1.RB);
|
||||
|
||||
|
||||
Value* has_njm_value = f.Extract(v, (uint8_t)3, INT32_TYPE);
|
||||
|
||||
f.SetNJM(f.IsTrue(f.And(has_njm_value, f.LoadConstantInt32(65536))));
|
||||
|
||||
f.StoreContext(offsetof(PPCContext, vscr_vec), v);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -382,7 +382,6 @@ int InstrEmit_mtfsfx(PPCHIRBuilder& f, const InstrData& i) {
|
|||
return 1;
|
||||
} else {
|
||||
assert_zero(i.XFL.W);
|
||||
|
||||
// Store under control of mask.
|
||||
// Expand the mask from 8 bits -> 32 bits.
|
||||
uint32_t mask = 0;
|
||||
|
@ -402,7 +401,7 @@ int InstrEmit_mtfsfx(PPCHIRBuilder& f, const InstrData& i) {
|
|||
|
||||
// Update the system rounding mode.
|
||||
if (mask & 0x7) {
|
||||
f.SetRoundingMode(v);
|
||||
f.SetRoundingMode(f.And(v, f.LoadConstantInt32(7)));
|
||||
}
|
||||
}
|
||||
if (i.XFL.Rc) {
|
||||
|
@ -425,7 +424,7 @@ int InstrEmit_mtfsfix(PPCHIRBuilder& f, const InstrData& i) {
|
|||
|
||||
// Update the system rounding mode.
|
||||
if (mask & 0x7) {
|
||||
f.SetRoundingMode(fpscr);
|
||||
f.SetRoundingMode(f.And(fpscr, f.LoadConstantInt32(7)));
|
||||
}
|
||||
|
||||
if (i.X.Rc) {
|
||||
|
|
|
@ -64,9 +64,13 @@ DEFINE_string(
|
|||
"or the module specified by the game. Leave blank to launch the default "
|
||||
"module.",
|
||||
"General");
|
||||
DEFINE_bool(allow_game_relative_writes, false,
|
||||
"Not useful to non-developers. Allows code to write to paths "
|
||||
"relative to game://. Used for "
|
||||
"generating test data to compare with original hardware. ",
|
||||
"General");
|
||||
|
||||
namespace xe {
|
||||
|
||||
using namespace xe::literals;
|
||||
|
||||
Emulator::GameConfigLoadCallback::GameConfigLoadCallback(Emulator& emulator)
|
||||
|
@ -282,7 +286,8 @@ const std::unique_ptr<vfs::Device> Emulator::CreateVfsDeviceBasedOnPath(
|
|||
auto extension = xe::utf8::lower_ascii(xe::path_to_utf8(path.extension()));
|
||||
if (extension == ".xex" || extension == ".elf" || extension == ".exe") {
|
||||
auto parent_path = path.parent_path();
|
||||
return std::make_unique<vfs::HostPathDevice>(mount_path, parent_path, true);
|
||||
return std::make_unique<vfs::HostPathDevice>(
|
||||
mount_path, parent_path, !cvars::allow_game_relative_writes);
|
||||
} else {
|
||||
return std::make_unique<vfs::DiscImageDevice>(mount_path, path);
|
||||
}
|
||||
|
@ -653,8 +658,8 @@ bool Emulator::ExceptionCallback(Exception* ex) {
|
|||
// debugger.
|
||||
return false;
|
||||
} else if (processor()->is_debugger_attached()) {
|
||||
// Let the debugger handle this exception. It may decide to continue past it
|
||||
// (if it was a stepping breakpoint, etc).
|
||||
// Let the debugger handle this exception. It may decide to continue past
|
||||
// it (if it was a stepping breakpoint, etc).
|
||||
return processor()->OnUnhandledException(ex);
|
||||
}
|
||||
|
||||
|
@ -823,8 +828,8 @@ static std::string format_version(xex2_version version) {
|
|||
|
||||
X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
|
||||
const std::string_view module_path) {
|
||||
// Making changes to the UI (setting the icon) and executing game config load
|
||||
// callbacks which expect to be called from the UI thread.
|
||||
// Making changes to the UI (setting the icon) and executing game config
|
||||
// load callbacks which expect to be called from the UI thread.
|
||||
assert_true(display_window_->app_context().IsInUIThread());
|
||||
|
||||
// Setup NullDevices for raw HDD partition accesses
|
||||
|
@ -832,12 +837,12 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
|
|||
// By using a NullDevice that just returns success to all IO requests it
|
||||
// should allow games to believe cache/raw disk was accessed successfully
|
||||
|
||||
// NOTE: this should probably be moved to xenia_main.cc, but right now we need
|
||||
// to register the \Device\Harddisk0\ NullDevice _after_ the
|
||||
// NOTE: this should probably be moved to xenia_main.cc, but right now we
|
||||
// need to register the \Device\Harddisk0\ NullDevice _after_ the
|
||||
// \Device\Harddisk0\Partition1 HostPathDevice, otherwise requests to
|
||||
// Partition1 will go to this. Registering during CompleteLaunch allows us to
|
||||
// make sure any HostPathDevices are ready beforehand.
|
||||
// (see comment above cache:\ device registration for more info about why)
|
||||
// Partition1 will go to this. Registering during CompleteLaunch allows us
|
||||
// to make sure any HostPathDevices are ready beforehand. (see comment above
|
||||
// cache:\ device registration for more info about why)
|
||||
auto null_paths = {std::string("\\Partition0"), std::string("\\Cache0"),
|
||||
std::string("\\Cache1")};
|
||||
auto null_device =
|
||||
|
@ -900,8 +905,8 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
|
|||
if (module->title_id()) {
|
||||
auto title_id = fmt::format("{:08X}", module->title_id());
|
||||
|
||||
// Load the per-game configuration file and make sure updates are handled by
|
||||
// the callbacks.
|
||||
// Load the per-game configuration file and make sure updates are handled
|
||||
// by the callbacks.
|
||||
config::LoadGameConfig(title_id);
|
||||
assert_true(game_config_load_callback_loop_next_index_ == SIZE_MAX);
|
||||
game_config_load_callback_loop_next_index_ = 0;
|
||||
|
@ -934,10 +939,10 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
|
|||
}
|
||||
}
|
||||
|
||||
// Initializing the shader storage in a blocking way so the user doesn't miss
|
||||
// the initial seconds - for instance, sound from an intro video may start
|
||||
// playing before the video can be seen if doing this in parallel with the
|
||||
// main thread.
|
||||
// Initializing the shader storage in a blocking way so the user doesn't
|
||||
// miss the initial seconds - for instance, sound from an intro video may
|
||||
// start playing before the video can be seen if doing this in parallel with
|
||||
// the main thread.
|
||||
on_shader_storage_initialization(true);
|
||||
graphics_system_->InitializeShaderStorage(cache_root_, title_id_.value(),
|
||||
true);
|
||||
|
|
Loading…
Reference in New Issue