Merge pull request #57 from chrisps/canary_experimental

Add separate VMX/fpu mxcsr
This commit is contained in:
Radosław Gliński 2022-07-31 18:43:30 +02:00 committed by GitHub
commit 332f69f36b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 687 additions and 611 deletions

View File

@ -692,6 +692,12 @@ void X64Backend::InitializeBackendContext(void* ctx) {
X64BackendContext* bctx = reinterpret_cast<X64BackendContext*>( X64BackendContext* bctx = reinterpret_cast<X64BackendContext*>(
reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext)); reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext));
bctx->ResolveFunction_Ptr = reinterpret_cast<void*>(&ResolveFunction); bctx->ResolveFunction_Ptr = reinterpret_cast<void*>(&ResolveFunction);
bctx->mxcsr_fpu =
DEFAULT_FPU_MXCSR; // idk if this is right, check on rgh what the
// rounding on ppc is at startup
bctx->mxcsr_vmx = DEFAULT_VMX_MXCSR;
bctx->flags = 0;
// https://media.discordapp.net/attachments/440280035056943104/1000765256643125308/unknown.png
bctx->Ox1000 = 0x1000; bctx->Ox1000 = 0x1000;
} }
} // namespace x64 } // namespace x64

View File

@ -37,9 +37,17 @@ typedef void (*ResolveFunctionThunk)();
// negatively index the membase reg) // negatively index the membase reg)
struct X64BackendContext { struct X64BackendContext {
void* ResolveFunction_Ptr; // cached pointer to resolvefunction void* ResolveFunction_Ptr; // cached pointer to resolvefunction
unsigned int mxcsr_fpu; //currently, the way we implement rounding mode affects both vmx and the fpu
unsigned int mxcsr_vmx;
unsigned int flags; //bit 0 = 0 if mxcsr is fpu, else it is vmx
unsigned int Ox1000; // constant 0x1000 so we can shrink each tail emitted unsigned int Ox1000; // constant 0x1000 so we can shrink each tail emitted
// add of it by... 2 bytes lol // add of it by... 2 bytes lol
}; };
constexpr unsigned int DEFAULT_VMX_MXCSR =
0x8000 | // flush to zero
0x0040 | (_MM_MASK_MASK); // default rounding mode for vmx
constexpr unsigned int DEFAULT_FPU_MXCSR = 0x1F80;
class X64Backend : public Backend { class X64Backend : public Backend {
public: public:

View File

@ -320,6 +320,8 @@ bool X64Emitter::Emit(HIRBuilder* builder, EmitFunctionInfo& func_info) {
// Body. // Body.
auto block = builder->first_block(); auto block = builder->first_block();
while (block) { while (block) {
ForgetMxcsrMode(); // at start of block, mxcsr mode is undefined
// Mark block labels. // Mark block labels.
auto label = block->label_head; auto label = block->label_head;
while (label) { while (label) {
@ -490,6 +492,7 @@ uint64_t ResolveFunction(void* raw_context, uint64_t target_address) {
void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) { void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
assert_not_null(function); assert_not_null(function);
ForgetMxcsrMode();
auto fn = static_cast<X64Function*>(function); auto fn = static_cast<X64Function*>(function);
// Resolve address to the function to call and store in rax. // Resolve address to the function to call and store in rax.
@ -564,6 +567,7 @@ void X64Emitter::Call(const hir::Instr* instr, GuestFunction* function) {
void X64Emitter::CallIndirect(const hir::Instr* instr, void X64Emitter::CallIndirect(const hir::Instr* instr,
const Xbyak::Reg64& reg) { const Xbyak::Reg64& reg) {
ForgetMxcsrMode();
// Check if return. // Check if return.
if (instr->flags & hir::CALL_POSSIBLE_RETURN) { if (instr->flags & hir::CALL_POSSIBLE_RETURN) {
cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]); cmp(reg.cvt32(), dword[rsp + StackLayout::GUEST_RET_ADDR]);
@ -617,6 +621,7 @@ uint64_t UndefinedCallExtern(void* raw_context, uint64_t function_ptr) {
return 0; return 0;
} }
void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) { void X64Emitter::CallExtern(const hir::Instr* instr, const Function* function) {
ForgetMxcsrMode();
bool undefined = true; bool undefined = true;
if (function->behavior() == Function::Behavior::kBuiltin) { if (function->behavior() == Function::Behavior::kBuiltin) {
auto builtin_function = static_cast<const BuiltinFunction*>(function); auto builtin_function = static_cast<const BuiltinFunction*>(function);
@ -696,11 +701,13 @@ Xbyak::Reg64 X64Emitter::GetNativeParam(uint32_t param) {
} }
// Important: If you change these, you must update the thunks in x64_backend.cc! // Important: If you change these, you must update the thunks in x64_backend.cc!
Xbyak::Reg64 X64Emitter::GetContextReg() { return rsi; } Xbyak::Reg64 X64Emitter::GetContextReg() const { return rsi; }
Xbyak::Reg64 X64Emitter::GetMembaseReg() { return rdi; } Xbyak::Reg64 X64Emitter::GetMembaseReg() const { return rdi; }
void X64Emitter::ReloadMembase() { void X64Emitter::ReloadMembase() {
mov(GetMembaseReg(), qword[GetContextReg() + 8]); // membase mov(GetMembaseReg(),
qword[GetContextReg() +
offsetof(ppc::PPCContext, virtual_membase)]); // membase
} }
// Len Assembly Byte Sequence // Len Assembly Byte Sequence
@ -917,7 +924,7 @@ static const vec128_t xmm_consts[] = {
/* XMMQNaN */ vec128i(0x7FC00000u), /* XMMQNaN */ vec128i(0x7FC00000u),
/* XMMInt127 */ vec128i(0x7Fu), /* XMMInt127 */ vec128i(0x7Fu),
/* XMM2To32 */ vec128f(0x1.0p32f), /* XMM2To32 */ vec128f(0x1.0p32f),
/* xmminf */ vec128i(0x7f800000), /* XMMFloatInf */ vec128i(0x7f800000),
/* XMMIntsToBytes*/ /* XMMIntsToBytes*/
v128_setr_bytes(0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, v128_setr_bytes(0, 4, 8, 12, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
@ -938,9 +945,7 @@ static const vec128_t xmm_consts[] = {
/*XMMVSRShlByteshuf*/ /*XMMVSRShlByteshuf*/
v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80), v128_setr_bytes(13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 0x80),
// XMMVSRMask // XMMVSRMask
vec128b(1) vec128b(1)};
};
void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) { void* X64Emitter::FindByteConstantOffset(unsigned bytevalue) {
for (auto& vec : xmm_consts) { for (auto& vec : xmm_consts) {
@ -1347,7 +1352,7 @@ SimdDomain X64Emitter::DeduceSimdDomain(const hir::Value* for_value) {
return SimdDomain::DONTCARE; return SimdDomain::DONTCARE;
} }
Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) { Xbyak::Address X64Emitter::GetBackendCtxPtr(int offset_in_x64backendctx) const {
/* /*
index context ptr negatively to get to backend ctx field index context ptr negatively to get to backend ctx field
*/ */
@ -1368,6 +1373,93 @@ Xbyak::Label& X64Emitter::NewCachedLabel() {
label_cache_.push_back(tmp); label_cache_.push_back(tmp);
return *tmp; return *tmp;
} }
template<bool switching_to_fpu>
static void ChangeMxcsrModeDynamicHelper(X64Emitter& e) {
auto flags = e.GetBackendFlagsPtr();
if (switching_to_fpu) {
e.btr(flags, 0); // bit 0 set to 0 = is fpu mode
} else {
e.bts(flags, 0); // bit 0 set to 1 = is vmx mode
}
Xbyak::Label& come_back = e.NewCachedLabel();
Xbyak::Label& reload_bailout =
e.AddToTail([&come_back](X64Emitter& e, Xbyak::Label& thislabel) {
e.L(thislabel);
if (switching_to_fpu) {
e.LoadFpuMxcsrDirect();
} else {
e.LoadVmxMxcsrDirect();
}
e.jmp(come_back, X64Emitter::T_NEAR);
});
if (switching_to_fpu) {
e.jc(reload_bailout,
X64Emitter::T_NEAR); // if carry flag was set, we were VMX mxcsr mode.
} else {
e.jnc(reload_bailout,
X64Emitter::T_NEAR); // if carry flag was set, we were VMX mxcsr mode.
}
e.L(come_back);
}
bool X64Emitter::ChangeMxcsrMode(MXCSRMode new_mode, bool already_set) {
if (new_mode == mxcsr_mode_) {
return false;
}
assert_true(new_mode != MXCSRMode::Unknown);
if (mxcsr_mode_ == MXCSRMode::Unknown) {
// check the mode dynamically
mxcsr_mode_ = new_mode;
if (!already_set) {
if (new_mode == MXCSRMode::Fpu) {
ChangeMxcsrModeDynamicHelper<true>(*this);
} else if (new_mode == MXCSRMode::Vmx) {
ChangeMxcsrModeDynamicHelper<false>(*this);
} else {
assert_unhandled_case(new_mode);
}
} else { //even if already set, we still need to update flags to reflect our mode
if (new_mode == MXCSRMode::Fpu) {
btr(GetBackendFlagsPtr(), 0);
} else if (new_mode == MXCSRMode::Vmx) {
bts(GetBackendFlagsPtr(), 0);
} else {
assert_unhandled_case(new_mode);
}
}
} else {
mxcsr_mode_ = new_mode;
if (!already_set) {
if (new_mode == MXCSRMode::Fpu) {
LoadFpuMxcsrDirect();
btr(GetBackendFlagsPtr(), 0);
return true;
} else if (new_mode == MXCSRMode::Vmx) {
LoadVmxMxcsrDirect();
bts(GetBackendFlagsPtr(), 0);
return true;
} else {
assert_unhandled_case(new_mode);
}
}
}
return false;
}
void X64Emitter::LoadFpuMxcsrDirect() {
vldmxcsr(GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_fpu)));
}
void X64Emitter::LoadVmxMxcsrDirect() {
vldmxcsr(GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_vmx)));
}
Xbyak::Address X64Emitter::GetBackendFlagsPtr() const {
Xbyak::Address pt = GetBackendCtxPtr(offsetof(X64BackendContext, flags));
pt.setBit(32);
return pt;
}
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace cpu } // namespace cpu

View File

@ -65,6 +65,12 @@ enum class SimdDomain : uint32_t {
// CONFLICTING means its used in multiple domains) // CONFLICTING means its used in multiple domains)
}; };
enum class MXCSRMode : uint32_t {
Unknown,
Fpu,
Vmx
};
static SimdDomain PickDomain2(SimdDomain dom1, SimdDomain dom2) { static SimdDomain PickDomain2(SimdDomain dom1, SimdDomain dom2) {
if (dom1 == dom2) { if (dom1 == dom2) {
return dom1; return dom1;
@ -283,8 +289,8 @@ class X64Emitter : public Xbyak::CodeGenerator {
Xbyak::Reg64 GetNativeParam(uint32_t param); Xbyak::Reg64 GetNativeParam(uint32_t param);
Xbyak::Reg64 GetContextReg(); Xbyak::Reg64 GetContextReg() const;
Xbyak::Reg64 GetMembaseReg(); Xbyak::Reg64 GetMembaseReg() const;
bool CanUseMembaseLow32As0() const { return may_use_membase32_as_zero_reg_; } bool CanUseMembaseLow32As0() const { return may_use_membase32_as_zero_reg_; }
void ReloadMembase(); void ReloadMembase();
@ -295,7 +301,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
void MovMem64(const Xbyak::RegExp& addr, uint64_t v); void MovMem64(const Xbyak::RegExp& addr, uint64_t v);
Xbyak::Address GetXmmConstPtr(XmmConst id); Xbyak::Address GetXmmConstPtr(XmmConst id);
Xbyak::Address GetBackendCtxPtr(int offset_in_x64backendctx); Xbyak::Address GetBackendCtxPtr(int offset_in_x64backendctx) const;
void LoadConstantXmm(Xbyak::Xmm dest, float v); void LoadConstantXmm(Xbyak::Xmm dest, float v);
void LoadConstantXmm(Xbyak::Xmm dest, double v); void LoadConstantXmm(Xbyak::Xmm dest, double v);
@ -304,6 +310,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
Xbyak::Address StashConstantXmm(int index, float v); Xbyak::Address StashConstantXmm(int index, float v);
Xbyak::Address StashConstantXmm(int index, double v); Xbyak::Address StashConstantXmm(int index, double v);
Xbyak::Address StashConstantXmm(int index, const vec128_t& v); Xbyak::Address StashConstantXmm(int index, const vec128_t& v);
Xbyak::Address GetBackendFlagsPtr() const;
void* FindByteConstantOffset(unsigned bytevalue); void* FindByteConstantOffset(unsigned bytevalue);
void* FindWordConstantOffset(unsigned wordvalue); void* FindWordConstantOffset(unsigned wordvalue);
void* FindDwordConstantOffset(unsigned bytevalue); void* FindDwordConstantOffset(unsigned bytevalue);
@ -319,6 +326,16 @@ class X64Emitter : public Xbyak::CodeGenerator {
size_t stack_size() const { return stack_size_; } size_t stack_size() const { return stack_size_; }
SimdDomain DeduceSimdDomain(const hir::Value* for_value); SimdDomain DeduceSimdDomain(const hir::Value* for_value);
void ForgetMxcsrMode() {
mxcsr_mode_ = MXCSRMode::Unknown;
}
/*
returns true if had to load mxcsr. DOT_PRODUCT can use this to skip clearing the overflow flag, as it will never be set in the vmx fpscr
*/
bool ChangeMxcsrMode(MXCSRMode new_mode, bool already_set=false);//already_set means that the caller already did vldmxcsr, used for SET_ROUNDING_MODE
void LoadFpuMxcsrDirect(); //unsafe, does not change mxcsr_mode_
void LoadVmxMxcsrDirect(); //unsafe, does not change mxcsr_mode_
protected: protected:
void* Emplace(const EmitFunctionInfo& func_info, void* Emplace(const EmitFunctionInfo& func_info,
GuestFunction* function = nullptr); GuestFunction* function = nullptr);
@ -359,6 +376,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
std::vector<Xbyak::Label*> std::vector<Xbyak::Label*>
label_cache_; // for creating labels that need to be referenced much label_cache_; // for creating labels that need to be referenced much
// later by tail emitters // later by tail emitters
MXCSRMode mxcsr_mode_ = MXCSRMode::Unknown;
}; };
} // namespace x64 } // namespace x64

View File

@ -616,7 +616,31 @@ struct Sequence {
} }
} }
}; };
template <typename T>
static Xmm GetInputRegOrConstant(X64Emitter& e, const T& input,
Xmm xmm_to_use_if_const) {
if (input.is_constant) {
using constant_type = std::remove_reference_t<decltype(input.constant())>;
if constexpr (std::is_integral_v<constant_type>) {
vec128_t input_constant = vec128b(0);
if constexpr (sizeof(constant_type) == 4) {
input_constant.i32[0] = input.constant();
} else if constexpr (sizeof(constant_type) == 8) {
input_constant.low = input.constant();
} else {
assert_unhandled_case(sizeof(constant_type));
}
e.LoadConstantXmm(xmm_to_use_if_const, input_constant);
} else {
e.LoadConstantXmm(xmm_to_use_if_const, input.constant());
}
return xmm_to_use_if_const;
} else {
return input;
}
}
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace cpu } // namespace cpu

View File

@ -257,6 +257,7 @@ struct CALL_TRUE_I8
e.jz(skip); e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value)); e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip); e.L(skip);
e.ForgetMxcsrMode();
} }
}; };
struct CALL_TRUE_I16 struct CALL_TRUE_I16
@ -268,6 +269,7 @@ struct CALL_TRUE_I16
e.jz(skip); e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value)); e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip); e.L(skip);
e.ForgetMxcsrMode();
} }
}; };
struct CALL_TRUE_I32 struct CALL_TRUE_I32
@ -279,6 +281,7 @@ struct CALL_TRUE_I32
e.jz(skip); e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value)); e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip); e.L(skip);
e.ForgetMxcsrMode();
} }
}; };
struct CALL_TRUE_I64 struct CALL_TRUE_I64
@ -290,6 +293,7 @@ struct CALL_TRUE_I64
e.jz(skip); e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value)); e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip); e.L(skip);
e.ForgetMxcsrMode();
} }
}; };
struct CALL_TRUE_F32 struct CALL_TRUE_F32
@ -301,6 +305,7 @@ struct CALL_TRUE_F32
e.jz(skip); e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value)); e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip); e.L(skip);
e.ForgetMxcsrMode();
} }
}; };
@ -313,6 +318,7 @@ struct CALL_TRUE_F64
e.jz(skip); e.jz(skip);
e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value)); e.Call(i.instr, static_cast<GuestFunction*>(i.src2.value));
e.L(skip); e.L(skip);
e.ForgetMxcsrMode();
} }
}; };
EMITTER_OPCODE_TABLE(OPCODE_CALL_TRUE, CALL_TRUE_I8, CALL_TRUE_I16, EMITTER_OPCODE_TABLE(OPCODE_CALL_TRUE, CALL_TRUE_I8, CALL_TRUE_I16,
@ -326,6 +332,7 @@ struct CALL_INDIRECT
: Sequence<CALL_INDIRECT, I<OPCODE_CALL_INDIRECT, VoidOp, I64Op>> { : Sequence<CALL_INDIRECT, I<OPCODE_CALL_INDIRECT, VoidOp, I64Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.CallIndirect(i.instr, i.src1); e.CallIndirect(i.instr, i.src1);
e.ForgetMxcsrMode();
} }
}; };
EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT, CALL_INDIRECT); EMITTER_OPCODE_TABLE(OPCODE_CALL_INDIRECT, CALL_INDIRECT);

View File

@ -16,7 +16,13 @@
// For OPCODE_PACK/OPCODE_UNPACK // For OPCODE_PACK/OPCODE_UNPACK
#include "third_party/half/include/half.hpp" #include "third_party/half/include/half.hpp"
#include "xenia/base/cvar.h"
#include "xenia/cpu/backend/x64/x64_stack_layout.h"
DEFINE_bool(use_extended_range_half, true,
"Emulate extended range half-precision, may be slower on games "
"that use it heavily",
"CPU");
namespace xe { namespace xe {
namespace cpu { namespace cpu {
namespace backend { namespace backend {
@ -31,6 +37,8 @@ struct VECTOR_CONVERT_I2F
: Sequence<VECTOR_CONVERT_I2F, : Sequence<VECTOR_CONVERT_I2F,
I<OPCODE_VECTOR_CONVERT_I2F, V128Op, V128Op>> { I<OPCODE_VECTOR_CONVERT_I2F, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3);
// flags = ARITHMETIC_UNSIGNED // flags = ARITHMETIC_UNSIGNED
if (i.instr->flags & ARITHMETIC_UNSIGNED) { if (i.instr->flags & ARITHMETIC_UNSIGNED) {
// Round manually to (1.stored mantissa bits * 2^31) or to 2^32 to the // Round manually to (1.stored mantissa bits * 2^31) or to 2^32 to the
@ -46,8 +54,8 @@ struct VECTOR_CONVERT_I2F
// be 4294967296.0f. // be 4294967296.0f.
// xmm0 = src + 0b01111111 + ((src >> 8) & 1) // xmm0 = src + 0b01111111 + ((src >> 8) & 1)
// (xmm1 also used to launch reg + mem early and to require it late) // (xmm1 also used to launch reg + mem early and to require it late)
e.vpaddd(e.xmm1, i.src1, e.GetXmmConstPtr(XMMInt127)); e.vpaddd(e.xmm1, src1, e.GetXmmConstPtr(XMMInt127));
e.vpslld(e.xmm0, i.src1, 31 - 8); e.vpslld(e.xmm0, src1, 31 - 8);
e.vpsrld(e.xmm0, e.xmm0, 31); e.vpsrld(e.xmm0, e.xmm0, 31);
e.vpaddd(e.xmm0, e.xmm0, e.xmm1); e.vpaddd(e.xmm0, e.xmm0, e.xmm1);
// xmm0 = (0xFF800000 | 23 explicit mantissa bits), or 0 if overflowed // xmm0 = (0xFF800000 | 23 explicit mantissa bits), or 0 if overflowed
@ -63,13 +71,13 @@ struct VECTOR_CONVERT_I2F
// Convert from signed integer to float. // Convert from signed integer to float.
// xmm1 = [0x00000000, 0x7FFFFFFF] case result // xmm1 = [0x00000000, 0x7FFFFFFF] case result
e.vcvtdq2ps(e.xmm1, i.src1); e.vcvtdq2ps(e.xmm1, src1);
// Merge the two ways depending on whether the number is >= 0x80000000 // Merge the two ways depending on whether the number is >= 0x80000000
// (has high bit set). // (has high bit set).
e.vblendvps(i.dest, e.xmm1, e.xmm0, i.src1); e.vblendvps(i.dest, e.xmm1, e.xmm0, src1);
} else { } else {
e.vcvtdq2ps(i.dest, i.src1); e.vcvtdq2ps(i.dest, src1);
} }
} }
}; };
@ -82,9 +90,11 @@ struct VECTOR_CONVERT_F2I
: Sequence<VECTOR_CONVERT_F2I, : Sequence<VECTOR_CONVERT_F2I,
I<OPCODE_VECTOR_CONVERT_F2I, V128Op, V128Op>> { I<OPCODE_VECTOR_CONVERT_F2I, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
Xmm src1 = GetInputRegOrConstant(e, i.src1, e.xmm3);
if (i.instr->flags & ARITHMETIC_UNSIGNED) { if (i.instr->flags & ARITHMETIC_UNSIGNED) {
// clamp to min 0 // clamp to min 0
e.vmaxps(e.xmm0, i.src1, e.GetXmmConstPtr(XMMZero)); e.vmaxps(e.xmm0, src1, e.GetXmmConstPtr(XMMZero));
// xmm1 = mask of values >= (unsigned)INT_MIN // xmm1 = mask of values >= (unsigned)INT_MIN
e.vcmpgeps(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS)); e.vcmpgeps(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMPosIntMinPS));
@ -108,14 +118,14 @@ struct VECTOR_CONVERT_F2I
e.vpor(i.dest, i.dest, e.xmm0); e.vpor(i.dest, i.dest, e.xmm0);
} else { } else {
// xmm2 = NaN mask // xmm2 = NaN mask
e.vcmpunordps(e.xmm2, i.src1, i.src1); e.vcmpunordps(e.xmm2, src1, src1);
// convert packed floats to packed dwords // convert packed floats to packed dwords
e.vcvttps2dq(e.xmm0, i.src1); e.vcvttps2dq(e.xmm0, src1);
// (high bit) xmm1 = dest is indeterminate and i.src1 >= 0 // (high bit) xmm1 = dest is indeterminate and i.src1 >= 0
e.vpcmpeqd(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMIntMin)); e.vpcmpeqd(e.xmm1, e.xmm0, e.GetXmmConstPtr(XMMIntMin));
e.vpandn(e.xmm1, i.src1, e.xmm1); e.vpandn(e.xmm1, src1, e.xmm1);
// saturate positive values // saturate positive values
e.vblendvps(i.dest, e.xmm0, e.GetXmmConstPtr(XMMIntMax), e.xmm1); e.vblendvps(i.dest, e.xmm0, e.GetXmmConstPtr(XMMIntMax), e.xmm1);
@ -131,6 +141,7 @@ struct VECTOR_DENORMFLUSH
: Sequence<VECTOR_DENORMFLUSH, : Sequence<VECTOR_DENORMFLUSH,
I<OPCODE_VECTOR_DENORMFLUSH, V128Op, V128Op>> { I<OPCODE_VECTOR_DENORMFLUSH, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vxorps(e.xmm1, e.xmm1, e.xmm1); // 0.25 P0123 e.vxorps(e.xmm1, e.xmm1, e.xmm1); // 0.25 P0123
e.vandps(e.xmm0, i.src1, e.vandps(e.xmm0, i.src1,
@ -352,6 +363,7 @@ struct VECTOR_COMPARE_EQ_V128
e.vpcmpeqd(dest, src1, src2); e.vpcmpeqd(dest, src1, src2);
break; break;
case FLOAT32_TYPE: case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vcmpeqps(dest, src1, src2); e.vcmpeqps(dest, src1, src2);
break; break;
} }
@ -380,6 +392,7 @@ struct VECTOR_COMPARE_SGT_V128
e.vpcmpgtd(dest, src1, src2); e.vpcmpgtd(dest, src1, src2);
break; break;
case FLOAT32_TYPE: case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vcmpgtps(dest, src1, src2); e.vcmpgtps(dest, src1, src2);
break; break;
} }
@ -414,6 +427,7 @@ struct VECTOR_COMPARE_SGE_V128
e.vpor(dest, e.xmm0); e.vpor(dest, e.xmm0);
break; break;
case FLOAT32_TYPE: case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vcmpgeps(dest, src1, src2); e.vcmpgeps(dest, src1, src2);
break; break;
} }
@ -441,6 +455,7 @@ struct VECTOR_COMPARE_UGT_V128
sign_addr = e.GetXmmConstPtr(XMMSignMaskI32); sign_addr = e.GetXmmConstPtr(XMMSignMaskI32);
break; break;
case FLOAT32_TYPE: case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
sign_addr = e.GetXmmConstPtr(XMMSignMaskF32); sign_addr = e.GetXmmConstPtr(XMMSignMaskF32);
break; break;
default: default:
@ -498,6 +513,7 @@ struct VECTOR_COMPARE_UGE_V128
sign_addr = e.GetXmmConstPtr(XMMSignMaskI32); sign_addr = e.GetXmmConstPtr(XMMSignMaskI32);
break; break;
case FLOAT32_TYPE: case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
sign_addr = e.GetXmmConstPtr(XMMSignMaskF32); sign_addr = e.GetXmmConstPtr(XMMSignMaskF32);
break; break;
} }
@ -620,6 +636,7 @@ struct VECTOR_ADD
case FLOAT32_TYPE: case FLOAT32_TYPE:
assert_false(is_unsigned); assert_false(is_unsigned);
assert_false(saturate); assert_false(saturate);
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vaddps(dest, src1, src2); e.vaddps(dest, src1, src2);
break; break;
default: default:
@ -711,6 +728,7 @@ struct VECTOR_SUB
} }
break; break;
case FLOAT32_TYPE: case FLOAT32_TYPE:
e.ChangeMxcsrMode(MXCSRMode::Vmx);
e.vsubps(dest, src1, src2); e.vsubps(dest, src1, src2);
break; break;
default: default:
@ -2003,6 +2021,7 @@ EMITTER_OPCODE_TABLE(OPCODE_SWIZZLE, SWIZZLE);
// ============================================================================ // ============================================================================
struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> { struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
switch (i.instr->flags & PACK_TYPE_MODE) { switch (i.instr->flags & PACK_TYPE_MODE) {
case PACK_TYPE_D3DCOLOR: case PACK_TYPE_D3DCOLOR:
EmitD3DCOLOR(e, i); EmitD3DCOLOR(e, i);
@ -2062,10 +2081,15 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
alignas(16) uint16_t b[8]; alignas(16) uint16_t b[8];
_mm_store_ps(a, src1); _mm_store_ps(a, src1);
std::memset(b, 0, sizeof(b)); std::memset(b, 0, sizeof(b));
if (!cvars::use_extended_range_half) {
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
b[7 - i] = half_float::detail::float2half<std::round_toward_zero>(a[i]); b[7 - i] = half_float::detail::float2half<std::round_toward_zero>(a[i]);
} }
} else {
for (int i = 0; i < 2; i++) {
b[7 - i] = float_to_xenos_half(a[i]);
}
}
return _mm_load_si128(reinterpret_cast<__m128i*>(b)); return _mm_load_si128(reinterpret_cast<__m128i*>(b));
} }
@ -2074,7 +2098,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
// http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx // http://blogs.msdn.com/b/chuckw/archive/2012/09/11/directxmath-f16c-and-fma.aspx
// dest = [(src1.x | src1.y), 0, 0, 0] // dest = [(src1.x | src1.y), 0, 0, 0]
if (e.IsFeatureEnabled(kX64EmitF16C)) { if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
Xmm src; Xmm src;
if (i.src1.is_constant) { if (i.src1.is_constant) {
src = i.dest; src = i.dest;
@ -2101,11 +2125,16 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
alignas(16) uint16_t b[8]; alignas(16) uint16_t b[8];
_mm_store_ps(a, src1); _mm_store_ps(a, src1);
std::memset(b, 0, sizeof(b)); std::memset(b, 0, sizeof(b));
if (!cvars::use_extended_range_half) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
b[7 - (i ^ 2)] = b[7 - (i ^ 2)] =
half_float::detail::float2half<std::round_toward_zero>(a[i]); half_float::detail::float2half<std::round_toward_zero>(a[i]);
} }
} else {
for (int i = 0; i < 4; i++) {
b[7 - (i ^ 2)] = float_to_xenos_half(a[i]);
}
}
return _mm_load_si128(reinterpret_cast<__m128i*>(b)); return _mm_load_si128(reinterpret_cast<__m128i*>(b));
} }
@ -2113,7 +2142,7 @@ struct PACK : Sequence<PACK, I<OPCODE_PACK, V128Op, V128Op, V128Op>> {
assert_true(i.src2.value->IsConstantZero()); assert_true(i.src2.value->IsConstantZero());
// dest = [(src1.z | src1.w), (src1.x | src1.y), 0, 0] // dest = [(src1.z | src1.w), (src1.x | src1.y), 0, 0]
if (e.IsFeatureEnabled(kX64EmitF16C)) { if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
Xmm src; Xmm src;
if (i.src1.is_constant) { if (i.src1.is_constant) {
src = i.dest; src = i.dest;
@ -2420,6 +2449,7 @@ EMITTER_OPCODE_TABLE(OPCODE_PACK, PACK);
// ============================================================================ // ============================================================================
struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> { struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
e.ChangeMxcsrMode(MXCSRMode::Vmx);
switch (i.instr->flags & PACK_TYPE_MODE) { switch (i.instr->flags & PACK_TYPE_MODE) {
case PACK_TYPE_D3DCOLOR: case PACK_TYPE_D3DCOLOR:
EmitD3DCOLOR(e, i); EmitD3DCOLOR(e, i);
@ -2478,10 +2508,15 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
alignas(16) float b[4]; alignas(16) float b[4];
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1); _mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
if (!cvars::use_extended_range_half) {
for (int i = 0; i < 2; i++) { for (int i = 0; i < 2; i++) {
b[i] = half_float::detail::half2float(a[VEC128_W(6 + i)]); b[i] = half_float::detail::half2float(a[VEC128_W(6 + i)]);
} }
} else {
for (int i = 0; i < 2; i++) {
b[i] = xenos_half_to_float(a[VEC128_W(6 + i)]);
}
}
// Constants, or something // Constants, or something
b[2] = 0.f; b[2] = 0.f;
b[3] = 1.f; b[3] = 1.f;
@ -2501,7 +2536,9 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
// Also zero out the high end. // Also zero out the high end.
// TODO(benvanik): special case constant unpacks that just get 0/1/etc. // TODO(benvanik): special case constant unpacks that just get 0/1/etc.
if (e.IsFeatureEnabled(kX64EmitF16C)) { if (e.IsFeatureEnabled(kX64EmitF16C) &&
!cvars::use_extended_range_half) { // todo: can use cvtph and bit logic
// to implement
Xmm src; Xmm src;
if (i.src1.is_constant) { if (i.src1.is_constant) {
src = i.dest; src = i.dest;
@ -2534,16 +2571,21 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
alignas(16) uint16_t a[8]; alignas(16) uint16_t a[8];
alignas(16) float b[4]; alignas(16) float b[4];
_mm_store_si128(reinterpret_cast<__m128i*>(a), src1); _mm_store_si128(reinterpret_cast<__m128i*>(a), src1);
if (!cvars::use_extended_range_half) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
b[i] = half_float::detail::half2float(a[VEC128_W(4 + i)]); b[i] = half_float::detail::half2float(a[VEC128_W(4 + i)]);
} }
} else {
for (int i = 0; i < 4; i++) {
b[i] = xenos_half_to_float(a[VEC128_W(4 + i)]);
}
}
return _mm_load_ps(b); return _mm_load_ps(b);
} }
static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) { static void EmitFLOAT16_4(X64Emitter& e, const EmitArgType& i) {
// src = [(dest.x | dest.y), (dest.z | dest.w), 0, 0] // src = [(dest.x | dest.y), (dest.z | dest.w), 0, 0]
if (e.IsFeatureEnabled(kX64EmitF16C)) { if (e.IsFeatureEnabled(kX64EmitF16C) && !cvars::use_extended_range_half) {
Xmm src; Xmm src;
if (i.src1.is_constant) { if (i.src1.is_constant) {
src = i.dest; src = i.dest;
@ -2805,6 +2847,32 @@ struct UNPACK : Sequence<UNPACK, I<OPCODE_UNPACK, V128Op, V128Op>> {
}; };
EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK); EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK);
struct SET_NJM_I8 : Sequence<SET_NJM_I8, I<OPCODE_SET_NJM, VoidOp, I8Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr_vmx = e.GetBackendCtxPtr(offsetof(X64BackendContext, mxcsr_vmx));
addr_vmx.setBit(32);
if (i.src1.is_constant) {
if (i.src1.constant() == 0) {
// turn off daz/flush2z
e.mov(addr_vmx, _MM_MASK_MASK);
} else {
e.mov(addr_vmx, DEFAULT_VMX_MXCSR);
}
} else {
e.test(i.src1, i.src1);
e.mov(e.edx, DEFAULT_VMX_MXCSR);
e.mov(e.eax, _MM_MASK_MASK);
e.cmove(e.edx, e.eax);
e.mov(addr_vmx, e.edx);
}
e.ChangeMxcsrMode(MXCSRMode::Vmx);
}
};
EMITTER_OPCODE_TABLE(OPCODE_SET_NJM, SET_NJM_I8);
} // namespace x64 } // namespace x64
} // namespace backend } // namespace backend
} // namespace cpu } // namespace cpu

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,9 @@
DEFINE_bool(inline_mmio_access, true, "Inline constant MMIO loads and stores.", DEFINE_bool(inline_mmio_access, true, "Inline constant MMIO loads and stores.",
"CPU"); "CPU");
DEFINE_bool(permit_float_constant_evaluation, false, "Allow float constant evaluation, may produce incorrect results and break games math",
"CPU");
namespace xe { namespace xe {
namespace cpu { namespace cpu {
namespace compiler { namespace compiler {
@ -68,8 +71,24 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
result = false; result = false;
auto block = builder->first_block(); auto block = builder->first_block();
while (block) { while (block) {
auto i = block->instr_head; for (auto i = block->instr_head; i; i = i->next) {
while (i) { if (((i->opcode->flags & OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING) != 0) &&
!cvars::permit_float_constant_evaluation) {
continue;
}
bool might_be_floatop = false;
i->VisitValueOperands(
[&might_be_floatop](Value* current_opnd, uint32_t opnd_index) {
might_be_floatop |= current_opnd->MaybeFloaty();
});
if (i->dest) {
might_be_floatop |= i->dest->MaybeFloaty();
}
bool should_skip_because_of_float =
might_be_floatop && !cvars::permit_float_constant_evaluation;
auto v = i->dest; auto v = i->dest;
switch (i->opcode->num) { switch (i->opcode->num) {
case OPCODE_DEBUG_BREAK_TRUE: case OPCODE_DEBUG_BREAK_TRUE:
@ -452,7 +471,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
break; break;
case OPCODE_ADD: case OPCODE_ADD:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { if (i->src1.value->IsConstant() && i->src2.value->IsConstant() &&
!should_skip_because_of_float) {
v->set_from(i->src1.value); v->set_from(i->src1.value);
v->Add(i->src2.value); v->Add(i->src2.value);
i->Remove(); i->Remove();
@ -481,7 +501,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
} }
break; break;
case OPCODE_SUB: case OPCODE_SUB:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { if (i->src1.value->IsConstant() && i->src2.value->IsConstant() &&
!should_skip_because_of_float) {
v->set_from(i->src1.value); v->set_from(i->src1.value);
v->Sub(i->src2.value); v->Sub(i->src2.value);
i->Remove(); i->Remove();
@ -489,6 +510,7 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
} }
break; break;
case OPCODE_MUL: case OPCODE_MUL:
if (!should_skip_because_of_float) {
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value); v->set_from(i->src1.value);
v->Mul(i->src2.value); v->Mul(i->src2.value);
@ -518,6 +540,7 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
} }
} }
} }
}
break; break;
case OPCODE_MUL_HI: case OPCODE_MUL_HI:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
@ -528,6 +551,7 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
} }
break; break;
case OPCODE_DIV: case OPCODE_DIV:
if (!should_skip_because_of_float) {
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) { if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
v->set_from(i->src1.value); v->set_from(i->src1.value);
v->Div(i->src2.value, (i->flags & ARITHMETIC_UNSIGNED) != 0); v->Div(i->src2.value, (i->flags & ARITHMETIC_UNSIGNED) != 0);
@ -551,50 +575,6 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
} }
} }
} }
break;
case OPCODE_MUL_ADD:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
if (i->src3.value->IsConstant()) {
v->set_from(i->src1.value);
Value::MulAdd(v, i->src1.value, i->src2.value, i->src3.value);
i->Remove();
result = true;
} else {
// Multiply part is constant.
Value* mul = builder->AllocValue();
mul->set_from(i->src1.value);
mul->Mul(i->src2.value);
Value* add = i->src3.value;
i->Replace(&OPCODE_ADD_info, 0);
i->set_src1(mul);
i->set_src2(add);
result = true;
}
}
break;
case OPCODE_MUL_SUB:
if (i->src1.value->IsConstant() && i->src2.value->IsConstant()) {
// Multiply part is constant.
if (i->src3.value->IsConstant()) {
v->set_from(i->src1.value);
Value::MulSub(v, i->src1.value, i->src2.value, i->src3.value);
i->Remove();
result = true;
} else {
// Multiply part is constant.
Value* mul = builder->AllocValue();
mul->set_from(i->src1.value);
mul->Mul(i->src2.value);
Value* add = i->src3.value;
i->Replace(&OPCODE_SUB_info, 0);
i->set_src1(mul);
i->set_src2(add);
result = true;
}
} }
break; break;
case OPCODE_MAX: case OPCODE_MAX:
@ -925,7 +905,8 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
result = true; result = true;
} }
break; break;
case OPCODE_VECTOR_DENORMFLUSH: case OPCODE_VECTOR_DENORMFLUSH: // this one is okay to constant
// evaluate, since it is just bit math
if (i->src1.value->IsConstant()) { if (i->src1.value->IsConstant()) {
v->set_from(i->src1.value); v->set_from(i->src1.value);
v->DenormalFlush(); v->DenormalFlush();
@ -933,19 +914,10 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
result = true; result = true;
} }
break; break;
case OPCODE_TO_SINGLE:
if (i->src1.value->IsConstant()) {
v->set_from(i->src1.value);
v->ToSingle();
i->Remove();
result = true;
}
break;
default: default:
// Ignored. // Ignored.
break; break;
} }
i = i->next;
} }
block = block->next; block = block->next;

View File

@ -1287,7 +1287,11 @@ void HIRBuilder::SetRoundingMode(Value* value) {
Instr* i = AppendInstr(OPCODE_SET_ROUNDING_MODE_info, 0); Instr* i = AppendInstr(OPCODE_SET_ROUNDING_MODE_info, 0);
i->set_src1(value); i->set_src1(value);
} }
void HIRBuilder::SetNJM(Value* value) {
ASSERT_INTEGER_TYPE(value);
Instr* i = AppendInstr(OPCODE_SET_NJM_info, 0);
i->set_src1(value);
}
Value* HIRBuilder::Max(Value* value1, Value* value2) { Value* HIRBuilder::Max(Value* value1, Value* value2) {
ASSERT_TYPES_EQUAL(value1, value2); ASSERT_TYPES_EQUAL(value1, value2);
@ -1632,7 +1636,7 @@ Value* HIRBuilder::Div(Value* value1, Value* value2,
Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) { Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
ASSERT_TYPES_EQUAL(value1, value2); ASSERT_TYPES_EQUAL(value1, value2);
ASSERT_TYPES_EQUAL(value1, value3); ASSERT_TYPES_EQUAL(value1, value3);
#if 0
bool c1 = value1->IsConstant(); bool c1 = value1->IsConstant();
bool c2 = value2->IsConstant(); bool c2 = value2->IsConstant();
if (c1 && c2) { if (c1 && c2) {
@ -1640,7 +1644,7 @@ Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
dest->Mul(value2); dest->Mul(value2);
return Add(dest, value3); return Add(dest, value3);
} }
#endif
Instr* i = AppendInstr(OPCODE_MUL_ADD_info, 0, AllocValue(value1->type)); Instr* i = AppendInstr(OPCODE_MUL_ADD_info, 0, AllocValue(value1->type));
i->set_src1(value1); i->set_src1(value1);
i->set_src2(value2); i->set_src2(value2);
@ -1651,7 +1655,7 @@ Value* HIRBuilder::MulAdd(Value* value1, Value* value2, Value* value3) {
Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) { Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) {
ASSERT_TYPES_EQUAL(value1, value2); ASSERT_TYPES_EQUAL(value1, value2);
ASSERT_TYPES_EQUAL(value1, value3); ASSERT_TYPES_EQUAL(value1, value3);
#if 0
bool c1 = value1->IsConstant(); bool c1 = value1->IsConstant();
bool c2 = value2->IsConstant(); bool c2 = value2->IsConstant();
if (c1 && c2) { if (c1 && c2) {
@ -1659,7 +1663,7 @@ Value* HIRBuilder::MulSub(Value* value1, Value* value2, Value* value3) {
dest->Mul(value2); dest->Mul(value2);
return Sub(dest, value3); return Sub(dest, value3);
} }
#endif
Instr* i = AppendInstr(OPCODE_MUL_SUB_info, 0, AllocValue(value1->type)); Instr* i = AppendInstr(OPCODE_MUL_SUB_info, 0, AllocValue(value1->type));
i->set_src1(value1); i->set_src1(value1);
i->set_src2(value2); i->set_src2(value2);

View File

@ -264,7 +264,7 @@ class HIRBuilder {
Value* new_value); Value* new_value);
Value* AtomicAdd(Value* address, Value* value); Value* AtomicAdd(Value* address, Value* value);
Value* AtomicSub(Value* address, Value* value); Value* AtomicSub(Value* address, Value* value);
void SetNJM(Value* value);
protected: protected:
void DumpValue(StringBuffer* str, Value* value); void DumpValue(StringBuffer* str, Value* value);
void DumpOp(StringBuffer* str, OpcodeSignatureType sig_type, Instr::Op* op); void DumpOp(StringBuffer* str, OpcodeSignatureType sig_type, Instr::Op* op);

View File

@ -284,6 +284,7 @@ enum Opcode {
OPCODE_TO_SINGLE, // i could not find a decent name to assign to this opcode, OPCODE_TO_SINGLE, // i could not find a decent name to assign to this opcode,
// as we already have OPCODE_ROUND. round double to float ( // as we already have OPCODE_ROUND. round double to float (
// ppc "single" fpu instruction result rounding behavior ) // ppc "single" fpu instruction result rounding behavior )
OPCODE_SET_NJM,
__OPCODE_MAX_VALUE, // Keep at end. __OPCODE_MAX_VALUE, // Keep at end.
}; };
@ -295,6 +296,7 @@ enum OpcodeFlags {
OPCODE_FLAG_IGNORE = (1 << 5), OPCODE_FLAG_IGNORE = (1 << 5),
OPCODE_FLAG_HIDE = (1 << 6), OPCODE_FLAG_HIDE = (1 << 6),
OPCODE_FLAG_PAIRED_PREV = (1 << 7), OPCODE_FLAG_PAIRED_PREV = (1 << 7),
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING = (1 << 8)
}; };
enum OpcodeSignatureType { enum OpcodeSignatureType {

View File

@ -151,25 +151,25 @@ DEFINE_OPCODE(
OPCODE_CONVERT, OPCODE_CONVERT,
"convert", "convert",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ROUND, OPCODE_ROUND,
"round", "round",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_CONVERT_I2F, OPCODE_VECTOR_CONVERT_I2F,
"vector_convert_i2f", "vector_convert_i2f",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_VECTOR_CONVERT_F2I, OPCODE_VECTOR_CONVERT_F2I,
"vector_convert_f2i", "vector_convert_f2i",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOAD_VECTOR_SHL, OPCODE_LOAD_VECTOR_SHL,
@ -456,13 +456,13 @@ DEFINE_OPCODE(
OPCODE_MUL_ADD, OPCODE_MUL_ADD,
"mul_add", "mul_add",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_MUL_SUB, OPCODE_MUL_SUB,
"mul_sub", "mul_sub",
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_NEG, OPCODE_NEG,
@ -480,43 +480,43 @@ DEFINE_OPCODE(
OPCODE_SQRT, OPCODE_SQRT,
"sqrt", "sqrt",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_RSQRT, OPCODE_RSQRT,
"rsqrt", "rsqrt",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_RECIP, OPCODE_RECIP,
"recip", "recip",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_POW2, OPCODE_POW2,
"pow2", "pow2",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_LOG2, OPCODE_LOG2,
"log2", "log2",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DOT_PRODUCT_3, OPCODE_DOT_PRODUCT_3,
"dot_product_3", "dot_product_3",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_DOT_PRODUCT_4, OPCODE_DOT_PRODUCT_4,
"dot_product_4", "dot_product_4",
OPCODE_SIG_V_V_V, OPCODE_SIG_V_V_V,
0) OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_AND, OPCODE_AND,
@ -685,5 +685,11 @@ DEFINE_OPCODE(
OPCODE_TO_SINGLE, OPCODE_TO_SINGLE,
"to_single", "to_single",
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
OPCODE_FLAG_DISALLOW_CONSTANT_FOLDING
)
DEFINE_OPCODE(
OPCODE_SET_NJM,
"set_njm",
OPCODE_SIG_X_V,
0 0
) )

View File

@ -199,7 +199,7 @@ void Value::Truncate(TypeName target_type) {
return; return;
} }
} }
//WARNING: this does not handle rounding flags at all!
void Value::Convert(TypeName target_type, RoundMode round_mode) { void Value::Convert(TypeName target_type, RoundMode round_mode) {
switch (type) { switch (type) {
case FLOAT32_TYPE: case FLOAT32_TYPE:
@ -401,7 +401,7 @@ void Value::MulHi(Value* other, bool is_unsigned) {
32); 32);
} }
break; break;
case INT64_TYPE: case INT64_TYPE: {
#if XE_COMPILER_MSVC #if XE_COMPILER_MSVC
if (is_unsigned) { if (is_unsigned) {
constant.i64 = __umulh(constant.i64, other->constant.i64); constant.i64 = __umulh(constant.i64, other->constant.i64);
@ -409,17 +409,19 @@ void Value::MulHi(Value* other, bool is_unsigned) {
constant.i64 = __mulh(constant.i64, other->constant.i64); constant.i64 = __mulh(constant.i64, other->constant.i64);
} }
#else #else
unsigned __int128 product;
if (is_unsigned) { if (is_unsigned) {
constant.i64 = static_cast<uint64_t>( product = static_cast<unsigned __int128>(constant.i64) *
static_cast<unsigned __int128>(constant.i64) * static_cast<unsigned __int128>(other->constant.i64);
static_cast<unsigned __int128>(other->constant.i64));
} else { } else {
constant.i64 = product = static_cast<unsigned __int128>(
static_cast<uint64_t>(static_cast<__int128>(constant.i64) * static_cast<__int128>(constant.i64) *
static_cast<__int128>(other->constant.i64)); static_cast<__int128>(other->constant.i64));
} }
constant.i64 = static_cast<int64_t>(product >> 64);
#endif // XE_COMPILER_MSVC #endif // XE_COMPILER_MSVC
break; break;
}
default: default:
assert_unhandled_case(type); assert_unhandled_case(type);
break; break;
@ -495,52 +497,6 @@ void Value::Max(Value* other) {
} }
} }
void Value::MulAdd(Value* dest, Value* value1, Value* value2, Value* value3) {
switch (dest->type) {
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
dest->constant.v128.f32[i] =
(value1->constant.v128.f32[i] * value2->constant.v128.f32[i]) +
value3->constant.v128.f32[i];
}
break;
case FLOAT32_TYPE:
dest->constant.f32 =
(value1->constant.f32 * value2->constant.f32) + value3->constant.f32;
break;
case FLOAT64_TYPE:
dest->constant.f64 =
(value1->constant.f64 * value2->constant.f64) + value3->constant.f64;
break;
default:
assert_unhandled_case(dest->type);
break;
}
}
void Value::MulSub(Value* dest, Value* value1, Value* value2, Value* value3) {
switch (dest->type) {
case VEC128_TYPE:
for (int i = 0; i < 4; i++) {
dest->constant.v128.f32[i] =
(value1->constant.v128.f32[i] * value2->constant.v128.f32[i]) -
value3->constant.v128.f32[i];
}
break;
case FLOAT32_TYPE:
dest->constant.f32 =
(value1->constant.f32 * value2->constant.f32) - value3->constant.f32;
break;
case FLOAT64_TYPE:
dest->constant.f64 =
(value1->constant.f64 * value2->constant.f64) - value3->constant.f64;
break;
default:
assert_unhandled_case(dest->type);
break;
}
}
void Value::Neg() { void Value::Neg() {
switch (type) { switch (type) {
case INT8_TYPE: case INT8_TYPE:
@ -1643,11 +1599,7 @@ void Value::DenormalFlush() {
constant.v128.u32[i] = current_element; constant.v128.u32[i] = current_element;
} }
} }
void Value::ToSingle() {
assert_true(type == FLOAT64_TYPE);
constant.f64 = static_cast<double>(static_cast<float>(constant.f64));
}
void Value::CountLeadingZeros(const Value* other) { void Value::CountLeadingZeros(const Value* other) {
switch (other->type) { switch (other->type) {
case INT8_TYPE: case INT8_TYPE:

View File

@ -563,8 +563,7 @@ class Value {
void MulHi(Value* other, bool is_unsigned); void MulHi(Value* other, bool is_unsigned);
void Div(Value* other, bool is_unsigned); void Div(Value* other, bool is_unsigned);
void Max(Value* other); void Max(Value* other);
static void MulAdd(Value* dest, Value* value1, Value* value2, Value* value3);
static void MulSub(Value* dest, Value* value1, Value* value2, Value* value3);
void Neg(); void Neg();
void Abs(); void Abs();
void Sqrt(); void Sqrt();
@ -603,7 +602,6 @@ class Value {
bool saturate); bool saturate);
void ByteSwap(); void ByteSwap();
void DenormalFlush(); void DenormalFlush();
void ToSingle();
void CountLeadingZeros(const Value* other); void CountLeadingZeros(const Value* other);
bool Compare(Opcode opcode, Value* other); bool Compare(Opcode opcode, Value* other);
hir::Instr* GetDefSkipAssigns(); hir::Instr* GetDefSkipAssigns();
@ -615,7 +613,10 @@ class Value {
// returns true if every single use is as an operand to a single instruction // returns true if every single use is as an operand to a single instruction
// (add var2, var1, var1) // (add var2, var1, var1)
bool AllUsesByOneInsn() const; bool AllUsesByOneInsn() const;
//the maybe is here because this includes vec128, which is untyped data that can be treated as float or int depending on the context
bool MaybeFloaty() const {
return type == FLOAT32_TYPE || type == FLOAT64_TYPE || type == VEC128_TYPE;
}
private: private:
static bool CompareInt8(Opcode opcode, Value* a, Value* b); static bool CompareInt8(Opcode opcode, Value* a, Value* b);
static bool CompareInt16(Opcode opcode, Value* a, Value* b); static bool CompareInt16(Opcode opcode, Value* a, Value* b);

View File

@ -364,7 +364,16 @@ int InstrEmit_mfvscr(PPCHIRBuilder& f, const InstrData& i) {
int InstrEmit_mtvscr(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_mtvscr(PPCHIRBuilder& f, const InstrData& i) {
// is this the right format? // is this the right format?
//todo: what mtvscr does with the unused bits is implementation defined, figure out what it does
Value* v = f.LoadVR(i.VX128_1.RB); Value* v = f.LoadVR(i.VX128_1.RB);
Value* has_njm_value = f.Extract(v, (uint8_t)3, INT32_TYPE);
f.SetNJM(f.IsTrue(f.And(has_njm_value, f.LoadConstantInt32(65536))));
f.StoreContext(offsetof(PPCContext, vscr_vec), v); f.StoreContext(offsetof(PPCContext, vscr_vec), v);
return 0; return 0;
} }

View File

@ -382,7 +382,6 @@ int InstrEmit_mtfsfx(PPCHIRBuilder& f, const InstrData& i) {
return 1; return 1;
} else { } else {
assert_zero(i.XFL.W); assert_zero(i.XFL.W);
// Store under control of mask. // Store under control of mask.
// Expand the mask from 8 bits -> 32 bits. // Expand the mask from 8 bits -> 32 bits.
uint32_t mask = 0; uint32_t mask = 0;
@ -402,7 +401,7 @@ int InstrEmit_mtfsfx(PPCHIRBuilder& f, const InstrData& i) {
// Update the system rounding mode. // Update the system rounding mode.
if (mask & 0x7) { if (mask & 0x7) {
f.SetRoundingMode(v); f.SetRoundingMode(f.And(v, f.LoadConstantInt32(7)));
} }
} }
if (i.XFL.Rc) { if (i.XFL.Rc) {
@ -425,7 +424,7 @@ int InstrEmit_mtfsfix(PPCHIRBuilder& f, const InstrData& i) {
// Update the system rounding mode. // Update the system rounding mode.
if (mask & 0x7) { if (mask & 0x7) {
f.SetRoundingMode(fpscr); f.SetRoundingMode(f.And(fpscr, f.LoadConstantInt32(7)));
} }
if (i.X.Rc) { if (i.X.Rc) {

View File

@ -64,9 +64,13 @@ DEFINE_string(
"or the module specified by the game. Leave blank to launch the default " "or the module specified by the game. Leave blank to launch the default "
"module.", "module.",
"General"); "General");
DEFINE_bool(allow_game_relative_writes, false,
"Not useful to non-developers. Allows code to write to paths "
"relative to game://. Used for "
"generating test data to compare with original hardware. ",
"General");
namespace xe { namespace xe {
using namespace xe::literals; using namespace xe::literals;
Emulator::GameConfigLoadCallback::GameConfigLoadCallback(Emulator& emulator) Emulator::GameConfigLoadCallback::GameConfigLoadCallback(Emulator& emulator)
@ -282,7 +286,8 @@ const std::unique_ptr<vfs::Device> Emulator::CreateVfsDeviceBasedOnPath(
auto extension = xe::utf8::lower_ascii(xe::path_to_utf8(path.extension())); auto extension = xe::utf8::lower_ascii(xe::path_to_utf8(path.extension()));
if (extension == ".xex" || extension == ".elf" || extension == ".exe") { if (extension == ".xex" || extension == ".elf" || extension == ".exe") {
auto parent_path = path.parent_path(); auto parent_path = path.parent_path();
return std::make_unique<vfs::HostPathDevice>(mount_path, parent_path, true); return std::make_unique<vfs::HostPathDevice>(
mount_path, parent_path, !cvars::allow_game_relative_writes);
} else { } else {
return std::make_unique<vfs::DiscImageDevice>(mount_path, path); return std::make_unique<vfs::DiscImageDevice>(mount_path, path);
} }
@ -653,8 +658,8 @@ bool Emulator::ExceptionCallback(Exception* ex) {
// debugger. // debugger.
return false; return false;
} else if (processor()->is_debugger_attached()) { } else if (processor()->is_debugger_attached()) {
// Let the debugger handle this exception. It may decide to continue past it // Let the debugger handle this exception. It may decide to continue past
// (if it was a stepping breakpoint, etc). // it (if it was a stepping breakpoint, etc).
return processor()->OnUnhandledException(ex); return processor()->OnUnhandledException(ex);
} }
@ -823,8 +828,8 @@ static std::string format_version(xex2_version version) {
X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path, X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
const std::string_view module_path) { const std::string_view module_path) {
// Making changes to the UI (setting the icon) and executing game config load // Making changes to the UI (setting the icon) and executing game config
// callbacks which expect to be called from the UI thread. // load callbacks which expect to be called from the UI thread.
assert_true(display_window_->app_context().IsInUIThread()); assert_true(display_window_->app_context().IsInUIThread());
// Setup NullDevices for raw HDD partition accesses // Setup NullDevices for raw HDD partition accesses
@ -832,12 +837,12 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
// By using a NullDevice that just returns success to all IO requests it // By using a NullDevice that just returns success to all IO requests it
// should allow games to believe cache/raw disk was accessed successfully // should allow games to believe cache/raw disk was accessed successfully
// NOTE: this should probably be moved to xenia_main.cc, but right now we need // NOTE: this should probably be moved to xenia_main.cc, but right now we
// to register the \Device\Harddisk0\ NullDevice _after_ the // need to register the \Device\Harddisk0\ NullDevice _after_ the
// \Device\Harddisk0\Partition1 HostPathDevice, otherwise requests to // \Device\Harddisk0\Partition1 HostPathDevice, otherwise requests to
// Partition1 will go to this. Registering during CompleteLaunch allows us to // Partition1 will go to this. Registering during CompleteLaunch allows us
// make sure any HostPathDevices are ready beforehand. // to make sure any HostPathDevices are ready beforehand. (see comment above
// (see comment above cache:\ device registration for more info about why) // cache:\ device registration for more info about why)
auto null_paths = {std::string("\\Partition0"), std::string("\\Cache0"), auto null_paths = {std::string("\\Partition0"), std::string("\\Cache0"),
std::string("\\Cache1")}; std::string("\\Cache1")};
auto null_device = auto null_device =
@ -900,8 +905,8 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
if (module->title_id()) { if (module->title_id()) {
auto title_id = fmt::format("{:08X}", module->title_id()); auto title_id = fmt::format("{:08X}", module->title_id());
// Load the per-game configuration file and make sure updates are handled by // Load the per-game configuration file and make sure updates are handled
// the callbacks. // by the callbacks.
config::LoadGameConfig(title_id); config::LoadGameConfig(title_id);
assert_true(game_config_load_callback_loop_next_index_ == SIZE_MAX); assert_true(game_config_load_callback_loop_next_index_ == SIZE_MAX);
game_config_load_callback_loop_next_index_ = 0; game_config_load_callback_loop_next_index_ = 0;
@ -934,10 +939,10 @@ X_STATUS Emulator::CompleteLaunch(const std::filesystem::path& path,
} }
} }
// Initializing the shader storage in a blocking way so the user doesn't miss // Initializing the shader storage in a blocking way so the user doesn't
// the initial seconds - for instance, sound from an intro video may start // miss the initial seconds - for instance, sound from an intro video may
// playing before the video can be seen if doing this in parallel with the // start playing before the video can be seen if doing this in parallel with
// main thread. // the main thread.
on_shader_storage_initialization(true); on_shader_storage_initialization(true);
graphics_system_->InitializeShaderStorage(cache_root_, title_id_.value(), graphics_system_->InitializeShaderStorage(cache_root_, title_id_.value(),
true); true);