Add separate VMX/fpu mxcsr

Add support for constant operands for most fpu instructions
Remove constant folding for most fpu cpde
half float
This commit is contained in:
chss95cs@gmail.com 2022-07-31 08:56:36 -07:00
parent 3185b0ac9c
commit 968f656d96
18 changed files with 687 additions and 611 deletions

View File

@ -692,6 +692,12 @@ void X64Backend::InitializeBackendContext(void* ctx) {
X64BackendContext* bctx = reinterpret_cast<X64BackendContext*>(
reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext));
bctx->ResolveFunction_Ptr = reinterpret_cast<void*>(&ResolveFunction);
bctx->mxcsr_fpu =
DEFAULT_FPU_MXCSR; // idk if this is right, check on rgh what the
// rounding on ppc is at startup
bctx->mxcsr_vmx = DEFAULT_VMX_MXCSR;
bctx->flags = 0;
// https://media.discordapp.net/attachments/440280035056943104/1000765256643125308/unknown.png
bctx->Ox1000 = 0x1000;
}
} // namespace x64

View File

@ -37,9 +37,17 @@ typedef void (*ResolveFunctionThunk)();
// negatively index the membase reg)
struct X64BackendContext {
void* ResolveFunction_Ptr; // cached pointer to resolvefunction
unsigned int mxcsr_fpu; //currently, the way we implement rounding mode affects both vmx and the fpu
unsigned int mxcsr_vmx;
unsigned int flags; //bit 0 = 0 if mxcsr is fpu, else it is vmx
unsigned int Ox1000; // constant 0x1000 so we can shrink each tail emitted
// add of it by... 2 bytes lol
};
constexpr unsigned int DEFAULT_VMX_MXCSR =
0x8000 | // flush to zero
0x0040 | (_MM_MASK_MASK); // default rounding mode for vmx
constexpr unsigned int DEFAULT_FPU_MXCSR = 0x1F80;
class X64Backend : public Backend {
public:

View File

@ -320,6 +320,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
// Body.
auto block = builder->first_block();
while (block) {
ForgetMxcsrMode(); // at start of block, mxcsr mode is undefined
// Mark block labels.
auto label = block->label_head;
while (label) {
@ -490,6 +492,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) {
void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
assert_not_null(function);
ForgetMxcsrMode();
auto fn = static_cast<X64Function*>(function);
// Resolve address to the function to call and store in rax.
@ -564,6 +567,7 @@ void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
void X64Emitter::CallIndirect(const hir::Instr* instr,
const Xbyak::Reg64& reg) {
ForgetMxcsrMode();
// Check if return.
if (instr->flags & hir::CALL_POSSIBLE_RETURN) {
cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]);
@ -617,6 +621,7 @@ uint64_t UndefinedCallExtern(void* raw_context, uint64_t function_ptr) {
return 0;
}
void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
ForgetMxcsrMode();
bool undefined = true;
if (function->behavior() == Function::Behavior::kBuiltin) {
auto builtin_function = static_cast<const BuiltinFunction*>(function);
@ -696,11 +701,13 @@ Xbyak::Reg64 X64Emitter::GetNativeParam(uint32_t param) {
}
// Important: If you change these, you must update the thunks in x64_backend.cc!
Xbyak::Reg64 X64Emitter::GetContextReg() { return rsi; }
Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdi; }
Xbyak::Reg64 X64Emitter::GetContextReg() const { return rsi; }
Xbyak::Reg64 X64Emitter::GetMembaseReg() const { return rdi; }
void X64Emitter::ReloadMembase() {
mov(GetMembaseReg(), qword[GetContextReg() + 8]); // membase
mov(GetMembaseReg(),
qword[GetContextReg() +
offsetof(ppc::PPCContext, virtual_membase)]); // membase
}
// Len Assembly Byte Sequence
@ -917,7 +924,7 @@ static const vec128_t xmm_consts[] = {
/* XMMQNaN */ vec128i(0x7FC00000u),
/* XMMInt127 */ vec128i(0x7Fu),
/* XMM2To32 */ vec128f(0x1.0p32f),
/* xmminf */ vec128i(0x7f800000),
/* XMMFloatInf */ vec128i(0x7f800000),
/* XMMIntsToBytes*/
v128_setr_bytes(0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
@ -938,9 +945,7 @@ static const vec128_t xmm_consts[] = {
/*XMMVSRShlByteshuf*/
v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80),
// XMMVSRMask
vec128b(1)
};
vec128b(1)};
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
for (auto& vec : xmm_consts) {
@ -1347,7 +1352,7 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) {
return SimdDomain::DONTCARE;
}
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) {
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
/*
index context ptr negatively to get to backend ctx field
*/
@ -1368,6 +1373,93 @@ Xbyak::Label& X64Emitter::NewCachedLabel() {
label_cache_.push_back(tmp);
return *tmp;
}
template<bool switching_to_fpu>
static void ChangeMxcsrModeDynamicHelper(X64Emitter& e) {
auto flags = e.GetBackendFlagsPtr();
if (switching_to_fpu) {
e.btr(flags, 0); // bit 0 set to 0 = is fpu mode
} else {
e.bts(flags, 0); // bit 0 set to 1 = is vmx mode
}
Xbyak::Label& come_back = e.NewCachedLabel();
Xbyak::Label& reload_bailout =
e.AddToTail([&come_back](X64Emitter& e, Xbyak::Label& thislabel) {
e.L(thislabel);
if (switching_to_fpu) {
e.LoadFpuMxcsrDirect();
} else {
e.LoadVmxMxcsrDirect();
}
e.jmp(come_back, X64Emitter::T_NEAR);
});
if (switching_to_fpu) {
e.jc(reload_bailout,
X64Emitter::T_NEAR); // if carry flag was set, we were VMX mxcsr mode.
} else {
e.jnc(reload_bailout,
X64Emitter::T_NEAR); // if carry flag was set, we were VMX mxcsr mode.
}
e.L(come_back);
}
bool X64Emitter::ChangeMxcsrMode(MXCSRMode new_mode, bool already_set) {
if (new_mode == mxcsr_mode_) {
return false;
}
assert_true(new_mode != MXCSRMode::Unknown);
if (mxcsr_mode_ == MXCSRMode::Unknown) {
// check the mode dynamically
mxcsr_mode_ = new_mode;
if (!already_set) {
if (new_mode == MXCSRMode::Fpu) {
ChangeMxcsrModeDynamicHelper<true>(*this);
} else if (new_mode == MXCSRMode::Vmx) {
ChangeMxcsrModeDynamicHelper<false>(*this);
} else {
assert_unhandled_case(new_mode);
}
} else { //even if already set, we still need to update flags to reflect our mode
if (new_mode == MXCSRMode::Fpu) {
btr(GetBackendFlagsPtr(), 0);
} else if (new_mode == MXCSRMode::Vmx) {
bts(GetBackendFlagsPtr(), 0);
} else {
assert_unhandled_case(new_mode);
}
}
} else {
mxcsr_mode_ = new_mode;
if (!already_set) {
if (new_mode == MXCSRMode::Fpu) {
LoadFpuMxcsrDirect();
btr(GetBackendFlagsPtr(), 0);
return true;
} else if (new_mode == MXCSRMode::Vmx) {
LoadVmxMxcsrDirect();
bts(GetBackendFlagsPtr(), 0);
return true;
} else {
assert_unhandled_case(new_mode);
}
}
}
return false;
}
void X64Emitter::LoadFpuMxcsrDirect() {
vldmxcsr(GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_fpu)));
}
void X64Emitter::LoadVmxMxcsrDirect() {
vldmxcsr(GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_vmx)));
}
Xbyak::Address X64Emitter::GetBackendFlagsPtr() const {
Xbyak::Address pt = GetBackendCtxPtr(offsetof(X64BackendContext, flags));
pt.setBit(32);
return pt;
}
} // namespace x64
} // namespace backend
} // namespace cpu

View File

@ -65,6 +65,12 @@ enum class SimdDomain : uint32_t {
// CONFLICTING means its used in multiple domains)
};
enum class MXCSRMode : uint32_t {
Unknown,
Fpu,
Vmx
};
static SimdDomain PickDomain2(SimdDomain dom1, SimdDomain dom2) {
if (dom1 == dom2) {
return dom1;
@ -283,8 +289,8 @@ class X64Emitter : public Xbyak::CodeGenerator {
Xbyak::Reg64 GetNativeParam(uint32_t param);
Xbyak::Reg64 GetContextReg();
Xbyak::Reg64 GetMembaseReg();
Xbyak::Reg64 GetContextReg() const;
Xbyak::Reg64 GetMembaseReg() const;
bool CanUseMembaseLow32As0() const { return may_use_membase32_as_zero_reg_; }
void ReloadMembase();
@ -295,7 +301,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
void MovMem64(const Xbyak::RegExp& addr, uint64_t v);
Xbyak::Address GetXmmConstPtr(XmmConst id);
Xbyak::Address GetBackendCtxPtr(int offset_in_x64backendctx);
Xbyak::Address GetBackendCtxPtr(int offset_in_x64backendctx) const;
void LoadConstantXmm(Xbyak::Xmm dest, float v);
void LoadConstantXmm(Xbyak::Xmm dest, double v);
@ -304,6 +310,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
Xbyak::Address StashConstantXmm(int index, float v);
Xbyak::Address StashConstantXmm(int index, double v);
Xbyak::Address StashConstantXmm(int index, const vec128_t& v);
Xbyak::Address GetBackendFlagsPtr() const;
void* FindByteConstantOffset(unsigned bytevalue);
void* FindWordConstantOffset(unsigned wordvalue);
void* FindDwordConstantOffset(unsigned bytevalue);
@ -319,6 +326,16 @@ class X64Emitter : public Xbyak::CodeGenerator {
size_t stack_size() const { return stack_size_; }
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
void ForgetMxcsrMode() {
mxcsr_mode_ = MXCSRMode::Unknown;
}
/*
returns true if had to load mxcsr. DOT_PRODUCT can use this to skip clearing the overflow flag, as it will never be set in the vmx fpscr
*/
bool ChangeMxcsrMode(MXCSRMode new_mode, bool already_set=false);//already_set means that the caller already did vldmxcsr, used for SET_ROUNDING_MODE
void LoadFpuMxcsrDirect(); //unsafe, does not change mxcsr_mode_
void LoadVmxMxcsrDirect(); //unsafe, does not change mxcsr_mode_
protected:
void* Emplace(const EmitFunctionInfo& func_info,
GuestFunction* function = nullptr);
@ -359,6 +376,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
std::vector<Xbyak::Label*>
label_cache_; // for creating labels that need to be referenced much
// later by tail emitters
MXCSRMode mxcsr_mode_ = MXCSRMode::Unknown;
};
} // namespace x64

View File

@ -616,7 +616,31 @@ struct Sequence {
}
}
};
template <typename T>
static Xmm GetInputRegOrConstant(X64Emitter& e, const T& input,
Xmm xmm_to_use_if_const) {
if (input.is_constant) {
using constant_type = std::remove_reference_t<decltype(input.constant())>;
if constexpr (std::is_integral_v<constant_type>) {
vec128_t input_constant = vec128b(0);
if constexpr (sizeof(constant_type) == 4) {
input_constant.i32[0] = input.constant();
} else if constexpr (sizeof(constant_type) == 8) {
input_constant.low = input.constant();
} else {
assert_unhandled_case(sizeof(constant_type));
}
e.LoadConstantXmm(xmm_to_use_if_const, input_constant);
} else {
e.LoadConstantXmm(xmm_to_use_if_const, input.constant());
}
return xmm_to_use_if_const;
} else {
return input;
}
}
} // namespace x64
} // namespace backend
} // namespace cpu

View File

@ -257,6 +257,7 @@ struct CALL_TRUE_I8
e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip);
e.ForgetMxcsrMode();
}
};
struct CALL_TRUE_I16
@ -268,6 +269,7 @@ struct CALL_TRUE_I16
e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip);
e.ForgetMxcsrMode();
}
};
struct CALL_TRUE_I32
@ -279,6 +281,7 @@ struct CALL_TRUE_I32
e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip);
e.ForgetMxcsrMode();
}
};
struct CALL_TRUE_I64
@ -290,6 +293,7 @@ struct CALL_TRUE_I64
e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip);
e.ForgetMxcsrMode();
}
};
struct CALL_TRUE_F32
@ -301,6 +305,7 @@ struct CALL_TRUE_F32
e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip);
e.ForgetMxcsrMode();
}
};
@ -313,6 +318,7 @@ struct CALL_TRUE_F64
e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip);
e.ForgetMxcsrMode();
}
};
EMITTER_OPCODE_TABLE(OPCODE_CALL_TRUE, CALL_TRUE_I8, CALL_TRUE_I16,
@ -326,6 +332,7 @@ struct CALL_INDIRECT
: Sequence<CALL_INDIRECT, I<OPCODE_CALL_INDIRECT, VoidOp, I64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.CallIndirect(i.instr, i.src1);
e.ForgetMxcsrMode();
}
};
EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT, CALL_INDIRECT);

View File

@ -16,7 +16,13 @@
// For OPCODE_PACK/OPCODE_UNPACK
#include "third_party/half/include/half.hpp"
#include "xenia/base/cvar.h"
#include "xenia/cpu/backend/x64/x64_stack_layout.h"
DEFINE_bool(use_extended_range_half, true,
"Emulate extended range half-precision, may be slower on games "
"that use it heavily",
"CPU");
namespace xe {
namespace cpu {
namespace backend {
@ -31,6 +37,8 @@ struct VECTOR_CONVERT_I2F
: Sequence<VECTOR_CONVERT_I2F,
I<OPCODE_VECTOR_CONVERT_I2F, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3);
// flags = ARITHMETIC_UNSIGNED
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
// Round manually to (1.stored mantissa bits * 2^31) or to 2^32 to the
@ -46,8 +54,8 @@ struct VECTOR_CONVERT_I2F
// be 4294967296.0f.
// xmm0 = src + 0b01111111 + ((src >> 8) & 1)
// (xmm1 also used to launch reg + mem early and to require it late)
e.vpaddd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMInt127));
e.vpslld(e.xmm0, i.src1, 31 - 8);
e.vpaddd(e.xmm1, src1, e.GetXmmConstPtr(XMMInt127));
e.vpslld(e.xmm0, src1, 31 - 8);
e.vpsrld(e.xmm0, e.xmm0, 31);
e.vpaddd(e.xmm0, e.xmm0, e.xmm1);
// xmm0 = (0xFF800000 | 23 explicit mantissa bits), or 0 if overflowed
@ -63,13 +71,13 @@ struct VECTOR_CONVERT_I2F
// Convert from signed integer to float.
// xmm1 = [0x00000000, 0x7FFFFFFF] case result
e.vcvtdq2ps(e.xmm1, i.src1);
e.vcvtdq2ps(e.xmm1, src1);
// Merge the two ways depending on whether the number is >= 0x80000000
// (has high bit set).
e.vblendvps(i.dest, e.xmm1, e.xmm0, i.src1);
e.vblendvps(i.dest, e.xmm1, e.xmm0, src1);
} else {
e.vcvtdq2ps(i.dest, i.src1);
e.vcvtdq2ps(i.dest, src1);
}
}
};
@ -82,9 +90,11 @@ struct VECTOR_CONVERT_F2I
: Sequence<VECTOR_CONVERT_F2I,
I<OPCODE_VECTOR_CONVERT_F2I, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3);
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
// clamp to min 0
e.vmaxps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMZero));
e.vmaxps(e.xmm0, src1, e.GetXmmConstPtr(XMMZero));
// xmm1 = mask of values >= (unsigned)INT_MIN
e.vcmpgeps(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS));
@ -108,14 +118,14 @@ struct VECTOR_CONVERT_F2I
e.vpor(i.dest, i.dest, e.xmm0);
} else {
// xmm2 = NaN mask
e.vcmpunordps(e.xmm2, i.src1, i.src1);
e.vcmpunordps(e.xmm2, src1, src1);
// convert packed floats to packed dwords
e.vcvttps2dq(e.xmm0, i.src1);
e.vcvttps2dq(e.xmm0, src1);
// (high bit) xmm1 = dest is indeterminate and i.src1 >= 0
e.vpcmpeqd(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMIntMin));
e.vpandn(e.xmm1, i.src1, e.xmm1);
e.vpandn(e.xmm1, src1, e.xmm1);
// saturate positive values
e.vblendvps(i.dest, e.xmm0, e.GetXmmConstPtr(XMMIntMax), e.xmm1);
@ -131,6 +141,7 @@ struct VECTOR_DENORMFLUSH
: Sequence<VECTOR_DENORMFLUSH,
I<OPCODE_VECTOR_DENORMFLUSH, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vxorps(e.xmm1, e.xmm1, e.xmm1); // 0.25 P0123
e.vandps(e.xmm0, i.src1,
@ -352,6 +363,7 @@ struct VECTOR_COMPARE_EQ_V128
e.vpcmpeqd(dest, src1, src2);
break;
case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vcmpeqps(dest, src1, src2);
break;
}
@ -380,6 +392,7 @@ struct VECTOR_COMPARE_SGT_V128
e.vpcmpgtd(dest, src1, src2);
break;
case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vcmpgtps(dest, src1, src2);
break;
}
@ -414,6 +427,7 @@ struct VECTOR_COMPARE_SGE_V128
e.vpor(dest, e.xmm0);
break;
case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vcmpgeps(dest, src1, src2);
break;
}
@ -441,6 +455,7 @@ struct VECTOR_COMPARE_UGT_V128
sign_addr = e.GetXmmConstPtr(XMMSignMaskI32);
break;
case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
sign_addr = e.GetXmmConstPtr(XMMSignMaskF32);
break;
default:
@ -498,6 +513,7 @@ struct VECTOR_COMPARE_UGE_V128
sign_addr = e.GetXmmConstPtr(XMMSignMaskI32);
break;
case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
sign_addr = e.GetXmmConstPtr(XMMSignMaskF32);
break;
}
@ -620,6 +636,7 @@ struct VECTOR_ADD
case FLOAT32_TYPE:
assert_false(is_unsigned);
assert_false(saturate);
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vaddps(dest, src1, src2);
break;
default:
@ -711,6 +728,7 @@ struct VECTOR_SUB
}
break;
case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vsubps(dest, src1, src2);
break;
default:
@ -2003,6 +2021,7 @@ EMITTER_OPCODE_TABLE(OPCODE_SWIZZLE, SWIZZLE);
// ============================================================================
struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
switch (i.instr->flags & PACK_TYPE_MODE) {
case PACK_TYPE_D3DCOLOR:
EmitD3DCOLOR(e, i);
@ -2062,9 +2081,14 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
alignas(16) uint16_t b[8];
_mm_store_ps(a, src1);
std::memset(b, 0, sizeof(b));
for (int i = 0; i < 2; i++) {
b[7 - i] = half_float::detail::float2half<std::round_toward_zero>(a[i]);
if (!cvars::use_extended_range_half) {
for (int i = 0; i < 2; i++) {
b[7 - i] = half_float::detail::float2half<std::round_toward_zero>(a[i]);
}
} else {
for (int i = 0; i < 2; i++) {
b[7 - i] = float_to_xenos_half(a[i]);
}
}
return _mm_load_si128(reinterpret_cast<__m128i*>(b));
@ -2074,7 +2098,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
// http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx
// dest = [(src1.x | src1.y), 0, 0, 0]
if (e.IsFeatureEnabled(kX64EmitF16C)) {
if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
Xmm src;
if (i.src1.is_constant) {
src = i.dest;
@ -2101,10 +2125,15 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
alignas(16) uint16_t b[8];
_mm_store_ps(a, src1);
std::memset(b, 0, sizeof(b));
for (int i = 0; i < 4; i++) {
b[7 - (i ^ 2)] =
half_float::detail::float2half<std::round_toward_zero>(a[i]);
if (!cvars::use_extended_range_half) {
for (int i = 0; i < 4; i++) {
b[7 - (i ^ 2)] =
half_float::detail::float2half<std::round_toward_zero>(a[i]);
}
} else {
for (int i = 0; i < 4; i++) {
b[7 - (i ^ 2)] = float_to_xenos_half(a[i]);
}
}
return _mm_load_si128(reinterpret_cast<__m128i*>(b));
@ -2113,7 +2142,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
assert_true(i.src2.value->IsConstantZero());
// dest = [(src1.z | src1.w), (src1.x | src1.y), 0, 0]
if (e.IsFeatureEnabled(kX64EmitF16C)) {
if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
Xmm src;
if (i.src1.is_constant) {
src = i.dest;
@ -2420,6 +2449,7 @@ EMITTER_OPCODE_TABLE(OPCODE_PACK, PACK);
// ============================================================================
struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
switch (i.instr->flags & PACK_TYPE_MODE) {
case PACK_TYPE_D3DCOLOR:
EmitD3DCOLOR(e, i);
@ -2478,10 +2508,15 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
alignas(16) float b[4];
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
for (int i = 0; i < 2; i++) {
b[i] = half_float::detail::half2float(a[VEC128_W(6 + i)]);
if (!cvars::use_extended_range_half) {
for (int i = 0; i < 2; i++) {
b[i] = half_float::detail::half2float(a[VEC128_W(6 + i)]);
}
} else {
for (int i = 0; i < 2; i++) {
b[i] = xenos_half_to_float(a[VEC128_W(6 + i)]);
}
}
// Constants, or something
b[2] = 0.f;
b[3] = 1.f;
@ -2501,7 +2536,9 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
// Also zero out the high end.
// TODO(benvanik): special case constant unpacks that just get 0/1/etc.
if (e.IsFeatureEnabled(kX64EmitF16C)) {
if (e.IsFeatureEnabled(kX64EmitF16C) &&
!cvars::use_extended_range_half) { // todo: can use cvtph and bit logic
// to implement
Xmm src;
if (i.src1.is_constant) {
src = i.dest;
@ -2534,16 +2571,21 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
alignas(16) uint16_t a[8];
alignas(16) float b[4];
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
for (int i = 0; i < 4; i++) {
b[i] = half_float::detail::half2float(a[VEC128_W(4 + i)]);
if (!cvars::use_extended_range_half) {
for (int i = 0; i < 4; i++) {
b[i] = half_float::detail::half2float(a[VEC128_W(4 + i)]);
}
} else {
for (int i = 0; i < 4; i++) {
b[i] = xenos_half_to_float(a[VEC128_W(4 + i)]);
}
}
return _mm_load_ps(b);
}
static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) {
// src = [(dest.x | dest.y), (dest.z | dest.w), 0, 0]
if (e.IsFeatureEnabled(kX64EmitF16C)) {
if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
Xmm src;
if (i.src1.is_constant) {
src = i.dest;
@ -2805,6 +2847,32 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
};
EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK);
struct SET_NJM_I8 : Sequence<SET_NJM_I8, I<OPCODE_SET_NJM, VoidOp, I8Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr_vmx = e.GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_vmx));
addr_vmx.setBit(32);
if (i.src1.is_constant) {
if (i.src1.constant() == 0) {
// turn off daz/flush2z
e.mov(addr_vmx, _MM_MASK_MASK);
} else {
e.mov(addr_vmx, DEFAULT_VMX_MXCSR);
}
} else {
e.test(i.src1, i.src1);
e.mov(e.edx, DEFAULT_VMX_MXCSR);
e.mov(e.eax, _MM_MASK_MASK);
e.cmove(e.edx, e.eax);
e.mov(addr_vmx, e.edx);
}
e.ChangeMxcsrMode(MXCSRMode::Vmx);
}
};
EMITTER_OPCODE_TABLE(OPCODE_SET_NJM, SET_NJM_I8);
} // namespace x64
} // namespace backend
} // namespace cpu

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,9 @@
DEFINE_bool(inline_mmio_access, true, "Inline constant MMIO loads and stores.",
"CPU");
DEFINE_bool(permit_float_constant_evaluation, false, "Allow float constant evaluation, may produce incorrect results and break games math",
"CPU");
namespace xe {
namespace cpu {
namespace compiler {
@ -68,8 +71,24 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
result = false;
auto block = builder->first_block();
while (block) {
auto i = block->instr_head;
while (i) {
for (auto i = block->instr_head; i; i = i->next) {
if (((i->opcode->flags & OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING) != 0) &&
!cvars::permit_float_constant_evaluation) {
continue;
}
bool might_be_floatop = false;
i->VisitValueOperands(
[&might_be_floatop](Value* current_opnd, uint32_t opnd_index) {
might_be_floatop |= current_opnd->MaybeFloaty();
});
if (i->dest) {
might_be_floatop |= i->dest->MaybeFloaty();
}
bool should_skip_because_of_float =
might_be_floatop && !cvars::permit_float_constant_evaluation;
auto v = i->dest;
switch (i->opcode->num) {
case OPCODE_DEBUG_BREAK_TRUE:
@ -452,7 +471,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
break;
case OPCODE_ADD:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
if (i->src1.value->IsConstant() && i->src2.value->IsConstant() &&
!should_skip_because_of_float) {
v->set_from(i->src1.value);
v->Add(i->src2.value);
i->Remove();
@ -481,7 +501,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
}
break;
case OPCODE_SUB:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
if (i->src1.value->IsConstant() && i->src2.value->IsConstant() &&
!should_skip_because_of_float) {
v->set_from(i->src1.value);
v->Sub(i->src2.value);
i->Remove();
@ -489,32 +510,34 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
}
break;
case OPCODE_MUL:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->Mul(i->src2.value);
i->Remove();
result = true;
} else if (i->src1.value->IsConstant() ||
i->src2.value->IsConstant()) {
// Reorder the sources to make things simpler.
// s1 = non-const, s2 = const
auto s1 =
i->src1.value->IsConstant() ? i->src2.value : i->src1.value;
auto s2 =
i->src1.value->IsConstant() ? i->src1.value : i->src2.value;
// Multiplication by one = no-op
if (s2->type != VEC128_TYPE && s2->IsConstantOne()) {
i->Replace(&OPCODE_ASSIGN_info, 0);
i->set_src1(s1);
if (!should_skip_because_of_float) {
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->Mul(i->src2.value);
i->Remove();
result = true;
} else if (s2->type == VEC128_TYPE) {
auto& c = s2->constant;
if (c.v128.f32[0] == 1.f && c.v128.f32[1] == 1.f &&
c.v128.f32[2] == 1.f && c.v128.f32[3] == 1.f) {
} else if (i->src1.value->IsConstant() ||
i->src2.value->IsConstant()) {
// Reorder the sources to make things simpler.
// s1 = non-const, s2 = const
auto s1 =
i->src1.value->IsConstant() ? i->src2.value : i->src1.value;
auto s2 =
i->src1.value->IsConstant() ? i->src1.value : i->src2.value;
// Multiplication by one = no-op
if (s2->type != VEC128_TYPE && s2->IsConstantOne()) {
i->Replace(&OPCODE_ASSIGN_info, 0);
i->set_src1(s1);
result = true;
} else if (s2->type == VEC128_TYPE) {
auto& c = s2->constant;
if (c.v128.f32[0] == 1.f && c.v128.f32[1] == 1.f &&
c.v128.f32[2] == 1.f && c.v128.f32[3] == 1.f) {
i->Replace(&OPCODE_ASSIGN_info, 0);
i->set_src1(s1);
result = true;
}
}
}
}
@ -528,75 +551,32 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
}
break;
case OPCODE_DIV:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->Div(i->src2.value, (i->flags & ARITHMETIC_UNSIGNED) != 0);
i->Remove();
result = true;
} else if (i->src2.value->IsConstant()) {
// Division by one = no-op.
Value* src1 = i->src1.value;
if (i->src2.value->type != VEC128_TYPE &&
i->src2.value->IsConstantOne()) {
i->Replace(&OPCODE_ASSIGN_info, 0);
i->set_src1(src1);
if (!should_skip_because_of_float) {
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
v->Div(i->src2.value, (i->flags & ARITHMETIC_UNSIGNED) != 0);
i->Remove();
result = true;
} else if (i->src2.value->type == VEC128_TYPE) {
auto& c = i->src2.value->constant;
if (c.v128.f32[0] == 1.f && c.v128.f32[1] == 1.f &&
c.v128.f32[2] == 1.f && c.v128.f32[3] == 1.f) {
} else if (i->src2.value->IsConstant()) {
// Division by one = no-op.
Value* src1 = i->src1.value;
if (i->src2.value->type != VEC128_TYPE &&
i->src2.value->IsConstantOne()) {
i->Replace(&OPCODE_ASSIGN_info, 0);
i->set_src1(src1);
result = true;
} else if (i->src2.value->type == VEC128_TYPE) {
auto& c = i->src2.value->constant;
if (c.v128.f32[0] == 1.f && c.v128.f32[1] == 1.f &&
c.v128.f32[2] == 1.f && c.v128.f32[3] == 1.f) {
i->Replace(&OPCODE_ASSIGN_info, 0);
i->set_src1(src1);
result = true;
}
}
}
}
break;
case OPCODE_MUL_ADD:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
if (i->src3.value->IsConstant()) {
v->set_from(i->src1.value);
Value::MulAdd(v, i->src1.value, i->src2.value, i->src3.value);
i->Remove();
result = true;
} else {
// Multiply part is constant.
Value* mul = builder->AllocValue();
mul->set_from(i->src1.value);
mul->Mul(i->src2.value);
Value* add = i->src3.value;
i->Replace(&OPCODE_ADD_info, 0);
i->set_src1(mul);
i->set_src2(add);
result = true;
}
}
break;
case OPCODE_MUL_SUB:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
// Multiply part is constant.
if (i->src3.value->IsConstant()) {
v->set_from(i->src1.value);
Value::MulSub(v, i->src1.value, i->src2.value, i->src3.value);
i->Remove();
result = true;
} else {
// Multiply part is constant.
Value* mul = builder->AllocValue();
mul->set_from(i->src1.value);
mul->Mul(i->src2.value);
Value* add = i->src3.value;
i->Replace(&OPCODE_SUB_info, 0);
i->set_src1(mul);
i->set_src2(add);
result = true;
}
}
break;
case OPCODE_MAX:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value);
@ -925,7 +905,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
result = true;
}
break;
case OPCODE_VECTOR_DENORMFLUSH:
case OPCODE_VECTOR_DENORMFLUSH: // this one is okay to constant
// evaluate, since it is just bit math
if (i->src1.value->IsConstant()) {
v->set_from(i->src1.value);
v->DenormalFlush();
@ -933,19 +914,10 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
result = true;
}
break;
case OPCODE_TO_SINGLE:
if (i->src1.value->IsConstant()) {
v->set_from(i->src1.value);
v->ToSingle();
i->Remove();
result = true;
}
break;
default:
// Ignored.
break;
}
i = i->next;
}
block = block->next;

View File

@ -1287,7 +1287,11 @@ void HIRBuilder::SetRoundingMode(Value* value) {
Instr* i = AppendInstr(OPCODE_SET_ROUNDING_MODE_info, 0);
i->set_src1(value);
}
void HIRBuilder::SetNJM(Value* value) {
ASSERT_INTEGER_TYPE(value);
Instr* i = AppendInstr(OPCODE_SET_NJM_info, 0);
i->set_src1(value);
}
Value* HIRBuilder::Max(Value* value1, Value* value2) {
ASSERT_TYPES_EQUAL(value1, value2);
@ -1632,7 +1636,7 @@ Value* HIRBuilder::Div(Value* value1, Value* value2,
Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
ASSERT_TYPES_EQUAL(value1, value2);
ASSERT_TYPES_EQUAL(value1, value3);
#if 0
bool c1 = value1->IsConstant();
bool c2 = value2->IsConstant();
if (c1 && c2) {
@ -1640,7 +1644,7 @@ Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
dest->Mul(value2);
return Add(dest, value3);
}
#endif
Instr* i = AppendInstr(OPCODE_MUL_ADD_info, 0, AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);
@ -1651,7 +1655,7 @@ Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) {
ASSERT_TYPES_EQUAL(value1, value2);
ASSERT_TYPES_EQUAL(value1, value3);
#if 0
bool c1 = value1->IsConstant();
bool c2 = value2->IsConstant();
if (c1 && c2) {
@ -1659,7 +1663,7 @@ Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) {
dest->Mul(value2);
return Sub(dest, value3);
}
#endif
Instr* i = AppendInstr(OPCODE_MUL_SUB_info, 0, AllocValue(value1->type));
i->set_src1(value1);
i->set_src2(value2);

View File

@ -264,7 +264,7 @@ class HIRBuilder {
Value* new_value);
Value* AtomicAdd(Value* address, Value* value);
Value* AtomicSub(Value* address, Value* value);
void SetNJM(Value* value);
protected:
void DumpValue(StringBuffer* str, Value* value);
void DumpOp(StringBuffer* str, OpcodeSignatureType sig_type, Instr::Op* op);

View File

@ -284,6 +284,7 @@ enum Opcode {
OPCODE_TO_SINGLE, // i could not find a decent name to assign to this opcode,
// as we already have OPCODE_ROUND. round double to float (
// ppc "single" fpu instruction result rounding behavior )
OPCODE_SET_NJM,
__OPCODE_MAX_VALUE, // Keep at end.
};
@ -295,6 +296,7 @@ enum OpcodeFlags {
OPCODE_FLAG_IGNORE = (1 << 5),
OPCODE_FLAG_HIDE = (1 << 6),
OPCODE_FLAG_PAIRED_PREV = (1 << 7),
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING = (1 << 8)
};
enum OpcodeSignatureType {

View File

@ -151,25 +151,25 @@ DEFINE_OPCODE(
OPCODE_CONVERT,
"convert",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_ROUND,
"round",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_VECTOR_CONVERT_I2F,
"vector_convert_i2f",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_VECTOR_CONVERT_F2I,
"vector_convert_f2i",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_LOAD_VECTOR_SHL,
@ -456,13 +456,13 @@ DEFINE_OPCODE(
OPCODE_MUL_ADD,
"mul_add",
OPCODE_SIG_V_V_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_MUL_SUB,
"mul_sub",
OPCODE_SIG_V_V_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_NEG,
@ -480,43 +480,43 @@ DEFINE_OPCODE(
OPCODE_SQRT,
"sqrt",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_RSQRT,
"rsqrt",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_RECIP,
"recip",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_POW2,
"pow2",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_LOG2,
"log2",
OPCODE_SIG_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_DOT_PRODUCT_3,
"dot_product_3",
OPCODE_SIG_V_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_DOT_PRODUCT_4,
"dot_product_4",
OPCODE_SIG_V_V_V,
0)
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE(
OPCODE_AND,
@ -685,5 +685,11 @@ DEFINE_OPCODE(
OPCODE_TO_SINGLE,
"to_single",
OPCODE_SIG_V_V,
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING
)
DEFINE_OPCODE(
OPCODE_SET_NJM,
"set_njm",
OPCODE_SIG_X_V,
0
)

View File

@ -199,7 +199,7 @@ void Value::Truncate(TypeName target_type) {
return;
}
}
//WARNING: this does not handle rounding flags at all!
void Value::Convert(TypeName target_type, RoundMode round_mode) {
switch (type) {
case FLOAT32_TYPE:
@ -401,7 +401,7 @@ void Value::MulHi(Value* other, bool is_unsigned) {
32);
}
break;
case INT64_TYPE:
case INT64_TYPE: {
#if XE_COMPILER_MSVC
if (is_unsigned) {
constant.i64 = __umulh(constant.i64, other->constant.i64);
@ -409,17 +409,19 @@ void Value::MulHi(Value* other, bool is_unsigned) {
constant.i64 = __mulh(constant.i64, other->constant.i64);
}
#else
unsigned __int128 product;
if (is_unsigned) {
constant.i64 = static_cast<uint64_t>(
static_cast<unsigned __int128>(constant.i64) *
static_cast<unsigned __int128>(other->constant.i64));
product = static_cast<unsigned __int128>(constant.i64) *
static_cast<unsigned __int128>(other->constant.i64);
} else {
constant.i64 =
static_cast<uint64_t>(static_cast<__int128>(constant.i64) *
static_cast<__int128>(other->constant.i64));
product = static_cast<unsigned __int128>(
static_cast<__int128>(constant.i64) *
static_cast<__int128>(other->constant.i64));
}
constant.i64 = static_cast<int64_t>(product >> 64);
#endif // XE_COMPILER_MSVC
break;
}
default:
assert_unhandled_case(type);
break;
@ -495,52 +497,6 @@ void Value::Max(Value* other) {
}
}
void Value::MulAdd(Value* dest, Value* value1, Value* value2, Value* value3) {
switch (dest->type) {
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
dest->constant.v128.f32[i] =
(value1->constant.v128.f32[i] * value2->constant.v128.f32[i]) +
value3->constant.v128.f32[i];
}
break;
case FLOAT32_TYPE:
dest->constant.f32 =
(value1->constant.f32 * value2->constant.f32) + value3->constant.f32;
break;
case FLOAT64_TYPE:
dest->constant.f64 =
(value1->constant.f64 * value2->constant.f64) + value3->constant.f64;
break;
default:
assert_unhandled_case(dest->type);
break;
}
}
void Value::MulSub(Value* dest, Value* value1, Value* value2, Value* value3) {
switch (dest->type) {
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
dest->constant.v128.f32[i] =
(value1->constant.v128.f32[i] * value2->constant.v128.f32[i]) -
value3->constant.v128.f32[i];
}
break;
case FLOAT32_TYPE:
dest->constant.f32 =
(value1->constant.f32 * value2->constant.f32) - value3->constant.f32;
break;
case FLOAT64_TYPE:
dest->constant.f64 =
(value1->constant.f64 * value2->constant.f64) - value3->constant.f64;
break;
default:
assert_unhandled_case(dest->type);
break;
}
}
void Value::Neg() {
switch (type) {
case INT8_TYPE:
@ -1643,11 +1599,7 @@ void Value::DenormalFlush() {
constant.v128.u32[i] = current_element;
}
}
void Value::ToSingle() {
assert_true(type == FLOAT64_TYPE);
constant.f64 = static_cast<double>(static_cast<float>(constant.f64));
}
void Value::CountLeadingZeros(const Value* other) {
switch (other->type) {
case INT8_TYPE:

View File

@ -563,8 +563,7 @@ class Value {
void MulHi(Value* other, bool is_unsigned);
void Div(Value* other, bool is_unsigned);
void Max(Value* other);
static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3);
static void MulSub(Value* dest, Value* value1, Value* value2, Value* value3);
void Neg();
void Abs();
void Sqrt();
@ -603,7 +602,6 @@ class Value {
bool saturate);
void ByteSwap();
void DenormalFlush();
void ToSingle();
void CountLeadingZeros(const Value* other);
bool Compare(Opcode opcode, Value* other);
hir::Instr* GetDefSkipAssigns();
@ -615,7 +613,10 @@ class Value {
// returns true if every single use is as an operand to a single instruction
// (add var2, var1, var1)
bool AllUsesByOneInsn() const;
//the maybe is here because this includes vec128, which is untyped data that can be treated as float or int depending on the context
bool MaybeFloaty() const {
return type == FLOAT32_TYPE || type == FLOAT64_TYPE || type == VEC128_TYPE;
}
private:
static bool CompareInt8(Opcode opcode, Value* a, Value* b);
static bool CompareInt16(Opcode opcode, Value* a, Value* b);

View File

@ -364,7 +364,16 @@ int InstrEmit_mfvscr(PPCHIRBuilder& f, const InstrData& i) {
int InstrEmit_mtvscr(PPCHIRBuilder& f, const InstrData& i) {
// is this the right format?
//todo: what mtvscr does with the unused bits is implementation defined, figure out what it does
Value* v = f.LoadVR(i.VX128_1.RB);
Value* has_njm_value = f.Extract(v, (uint8_t)3, INT32_TYPE);
f.SetNJM(f.IsTrue(f.And(has_njm_value, f.LoadConstantInt32(65536))));
f.StoreContext(offsetof(PPCContext, vscr_vec), v);
return 0;
}

View File

@ -382,7 +382,6 @@ int InstrEmit_mtfsfx(PPCHIRBuilder& f, const InstrData& i) {
return 1;
} else {
assert_zero(i.XFL.W);
// Store under control of mask.
// Expand the mask from 8 bits -> 32 bits.
uint32_t mask = 0;
@ -402,7 +401,7 @@ int InstrEmit_mtfsfx(PPCHIRBuilder& f, const InstrData& i) {
// Update the system rounding mode.
if (mask & 0x7) {
f.SetRoundingMode(v);
f.SetRoundingMode(f.And(v, f.LoadConstantInt32(7)));
}
}
if (i.XFL.Rc) {
@ -425,7 +424,7 @@ int InstrEmit_mtfsfix(PPCHIRBuilder& f, const InstrData& i) {
// Update the system rounding mode.
if (mask & 0x7) {
f.SetRoundingMode(fpscr);
f.SetRoundingMode(f.And(fpscr, f.LoadConstantInt32(7)));
}
if (i.X.Rc) {

View File

@ -64,9 +64,13 @@ DEFINE_string(
"or the module specified by the game. Leave blank to launch the default "
"module.",
"General");
DEFINE_bool(allow_game_relative_writes, false,
"Not useful to non-developers. Allows code to write to paths "
"relative to game://. Used for "
"generating test data to compare with original hardware. ",
"General");
namespace xe {
using namespace xe::literals;
Emulator::GameConfigLoadCallback::GameConfigLoadCallback(Emulator& emulator)
@ -282,7 +286,8 @@ const std::unique_ptr<vfs::Device> Emulator::CreateVfsDeviceBasedOnPath(
auto extension = xe::utf8::lower_ascii(xe::path_to_utf8(path.extension()));
if (extension == ".xex" || extension == ".elf" || extension == ".exe") {
auto parent_path = path.parent_path();
return std::make_unique<vfs::HostPathDevice>(mount_path, parent_path, true);
return std::make_unique<vfs::HostPathDevice>(
mount_path, parent_path, !cvars::allow_game_relative_writes);
} else {
return std::make_unique<vfs::DiscImageDevice>(mount_path, path);
}
@ -653,8 +658,8 @@ bool Emulator::ExceptionCallback(Exception* ex) {
// debugger.
return false;
} else if (processor()->is_debugger_attached()) {
// Let the debugger handle this exception. It may decide to continue past it
// (if it was a stepping breakpoint, etc).
// Let the debugger handle this exception. It may decide to continue past
// it (if it was a stepping breakpoint, etc).
return processor()->OnUnhandledException(ex);
}
@ -823,8 +828,8 @@ static std::string format_version(xex2_version version) {
X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
const std::string_view module_path) {
// Making changes to the UI (setting the icon) and executing game config load
// callbacks which expect to be called from the UI thread.
// Making changes to the UI (setting the icon) and executing game config
// load callbacks which expect to be called from the UI thread.
assert_true(display_window_->app_context().IsInUIThread());
// Setup NullDevices for raw HDD partition accesses
@ -832,12 +837,12 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
// By using a NullDevice that just returns success to all IO requests it
// should allow games to believe cache/raw disk was accessed successfully
// NOTE: this should probably be moved to xenia_main.cc, but right now we need
// to register the \Device\Harddisk0\ NullDevice _after_ the
// NOTE: this should probably be moved to xenia_main.cc, but right now we
// need to register the \Device\Harddisk0\ NullDevice _after_ the
// \Device\Harddisk0\Partition1 HostPathDevice, otherwise requests to
// Partition1 will go to this. Registering during CompleteLaunch allows us to
// make sure any HostPathDevices are ready beforehand.
// (see comment above cache:\ device registration for more info about why)
// Partition1 will go to this. Registering during CompleteLaunch allows us
// to make sure any HostPathDevices are ready beforehand. (see comment above
// cache:\ device registration for more info about why)
auto null_paths = {std::string("\\Partition0"), std::string("\\Cache0"),
std::string("\\Cache1")};
auto null_device =
@ -900,8 +905,8 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
if (module->title_id()) {
auto title_id = fmt::format("{:08X}", module->title_id());
// Load the per-game configuration file and make sure updates are handled by
// the callbacks.
// Load the per-game configuration file and make sure updates are handled
// by the callbacks.
config::LoadGameConfig(title_id);
assert_true(game_config_load_callback_loop_next_index_ == SIZE_MAX);
game_config_load_callback_loop_next_index_ = 0;
@ -934,10 +939,10 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
}
}
// Initializing the shader storage in a blocking way so the user doesn't miss
// the initial seconds - for instance, sound from an intro video may start
// playing before the video can be seen if doing this in parallel with the
// main thread.
// Initializing the shader storage in a blocking way so the user doesn't
// miss the initial seconds - for instance, sound from an intro video may
// start playing before the video can be seen if doing this in parallel with
// the main thread.
on_shader_storage_initialization(true);
graphics_system_->InitializeShaderStorage(cache_root_, title_id_.value(),
true);