diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index cdc25f136..839a729e4 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -14,9 +14,6 @@ #include #include -// TODO(benvanik): reimplement packing functions -#include - // TODO(benvanik): make a compile time flag? //#define DYNAMIC_REGISTER_ACCESS_CHECK(address) false #define DYNAMIC_REGISTER_ACCESS_CHECK(address) \ @@ -38,10 +35,10 @@ using alloy::hir::Value; using alloy::runtime::Function; using alloy::runtime::FunctionInfo; -#define IPRINT -#define IFLUSH() -#define DPRINT -#define DFLUSH() +#define IPRINT(...) (void()) +#define IFLUSH() (void()) +#define DPRINT(...) (void()) +#define DFLUSH() (void()) //#define IPRINT if (ics.thread_state->thread_id() == 1) printf //#define IFLUSH() fflush(stdout) @@ -101,7 +98,7 @@ uint32_t AllocConstant(TranslationContext& ctx, Value* value) { uint32_t AllocLabel(TranslationContext& ctx, Label* label) { // If it's a back-branch to an already tagged label avoid setting up // a reference. - uint32_t value = (uint32_t)label->tag; + uint32_t value = *reinterpret_cast(label->tag); if (value & 0x80000000) { // Already set. return AllocConstant(ctx, value & ~0x80000000); @@ -124,11 +121,11 @@ uint32_t AllocLabel(TranslationContext& ctx, Label* label) { uint32_t AllocDynamicRegister(TranslationContext& ctx, Value* value) { if (value->flags & VALUE_IS_ALLOCATED) { - return (uint32_t)value->tag; + return *reinterpret_cast(value->tag); } else { value->flags |= VALUE_IS_ALLOCATED; auto reg = ctx.register_count++; - value->tag = (void*)reg; + value->tag = reinterpret_cast(reg); return (uint32_t)reg; } } @@ -207,6 +204,7 @@ int TranslateInvalid(TranslationContext& ctx, Instr* i) { uint32_t IntCode_COMMENT(IntCodeState& ics, const IntCode* i) { char* value = (char*)(i->src1_reg | ((uint64_t)i->src2_reg << 32)); + (void)(value); IPRINT("XE[t] :%d: %s\n", ics.thread_state->thread_id(), value); IFLUSH(); return IA_NEXT; @@ -1186,12 +1184,7 @@ int Translate_LOAD_VECTOR_SHR(TranslationContext& ctx, Instr* i) { } uint32_t IntCode_LOAD_CLOCK(IntCodeState& ics, const IntCode* i) { - LARGE_INTEGER counter; - uint64_t time = 0; - if (QueryPerformanceCounter(&counter)) { - time = counter.QuadPart; - } - ics.rf[i->dest_reg].i64 = time; + ics.rf[i->dest_reg].i64 = poly::threading::ticks(); return IA_NEXT; } int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) { @@ -2664,7 +2657,7 @@ int Translate_MUL(TranslationContext& ctx, Instr* i) { } } -namespace { +#if !XE_COMPILER_MSVC uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low, uint64_t yi_high) { // 128bit multiply, simplified for two input 64bit integers. @@ -2680,7 +2673,6 @@ uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low, uint64_t f = yi_high & LO_WORD; uint64_t e = (yi_high & HI_WORD) >> 32LL; uint64_t acc = d * h; - uint64_t o1 = acc & LO_WORD; acc >>= 32LL; uint64_t carry = 0; @@ -2692,7 +2684,6 @@ uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low, if (acc < ac2) { carry++; } - uint64_t rv2_lo = o1 | (acc << 32LL); ac2 = (acc >> 32LL) | (carry << 32LL); carry = 0; @@ -2719,7 +2710,7 @@ uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low, return rv2_hi; } -} +#endif // !XE_COMPILER_MSVC uint32_t IntCode_MUL_HI_I8_I8(IntCodeState& ics, const IntCode* i) { int16_t v = (int16_t)ics.rf[i->src1_reg].i8 * (int16_t)ics.rf[i->src2_reg].i8; @@ -3565,33 +3556,21 @@ int Translate_BYTE_SWAP(TranslationContext& ctx, Instr* i) { uint32_t IntCode_CNTLZ_I8(IntCodeState& ics, const IntCode* i) { // CHECK assert_always(); - DWORD index; - DWORD mask = ics.rf[i->src1_reg].i8; - BOOLEAN is_nonzero = _BitScanReverse(&index, mask); - ics.rf[i->dest_reg].i8 = is_nonzero ? (int8_t)(index - 24) ^ 0x7 : 8; + ics.rf[i->dest_reg].i8 = poly::lzcnt(ics.rf[i->src1_reg].i8); return IA_NEXT; } uint32_t IntCode_CNTLZ_I16(IntCodeState& ics, const IntCode* i) { // CHECK assert_always(); - DWORD index; - DWORD mask = ics.rf[i->src1_reg].i16; - BOOLEAN is_nonzero = _BitScanReverse(&index, mask); - ics.rf[i->dest_reg].i8 = is_nonzero ? (int8_t)(index - 16) ^ 0xF : 16; + ics.rf[i->dest_reg].i8 = poly::lzcnt(ics.rf[i->src1_reg].i16); return IA_NEXT; } uint32_t IntCode_CNTLZ_I32(IntCodeState& ics, const IntCode* i) { - DWORD index; - DWORD mask = ics.rf[i->src1_reg].i32; - BOOLEAN is_nonzero = _BitScanReverse(&index, mask); - ics.rf[i->dest_reg].i8 = is_nonzero ? (int8_t)index ^ 0x1F : 32; + ics.rf[i->dest_reg].i8 = poly::lzcnt(ics.rf[i->src1_reg].i32); return IA_NEXT; } uint32_t IntCode_CNTLZ_I64(IntCodeState& ics, const IntCode* i) { - DWORD index; - DWORD64 mask = ics.rf[i->src1_reg].i64; - BOOLEAN is_nonzero = _BitScanReverse64(&index, mask); - ics.rf[i->dest_reg].i8 = is_nonzero ? (int8_t)index ^ 0x3F : 64; + ics.rf[i->dest_reg].i8 = poly::lzcnt(ics.rf[i->src1_reg].i64); return IA_NEXT; } int Translate_CNTLZ(TranslationContext& ctx, Instr* i) { @@ -3872,21 +3851,18 @@ uint32_t IntCode_PACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128; dest.ix = dest.iy = dest.iz = 0; - dest.iw = - ((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) | - DirectX::PackedVector::XMConvertFloatToHalf(src1.y); + dest.iw = (uint32_t(poly::float_to_half(src1.x)) << 16) | + poly::float_to_half(src1.y); return IA_NEXT; } uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) { const vec128_t& src1 = ics.rf[i->src1_reg].v128; vec128_t& dest = ics.rf[i->dest_reg].v128; dest.ix = dest.iy = 0; - dest.iz = - ((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) | - DirectX::PackedVector::XMConvertFloatToHalf(src1.y); - dest.iw = - ((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.z) << 16) | - DirectX::PackedVector::XMConvertFloatToHalf(src1.w); + dest.iz = (uint32_t(poly::float_to_half(src1.x)) << 16) | + poly::float_to_half(src1.y); + dest.iw = (uint32_t(poly::float_to_half(src1.z)) << 16) | + poly::float_to_half(src1.w); return IA_NEXT; } uint32_t IntCode_PACK_SHORT_2(IntCodeState& ics, const IntCode* i) { @@ -3932,7 +3908,7 @@ uint32_t IntCode_UNPACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) { vec128_t& dest = ics.rf[i->dest_reg].v128; uint32_t src = src1.iw; for (int n = 0; n < 2; n++) { - dest.f4[n] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src); + dest.f4[n] = poly::half_to_float(uint16_t(src)); src >>= 16; } dest.f4[2] = 0.0f; @@ -3944,7 +3920,7 @@ uint32_t IntCode_UNPACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) { vec128_t& dest = ics.rf[i->dest_reg].v128; uint64_t src = src1.iz | ((uint64_t)src1.iw << 32); for (int n = 0; n < 4; n++) { - dest.f4[n] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src); + dest.f4[n] = poly::half_to_float(uint16_t(src)); src >>= 16; } return IA_NEXT; diff --git a/src/alloy/frontend/ppc/ppc_instr_tables.h b/src/alloy/frontend/ppc/ppc_instr_tables.h index 998b31039..336d28c7d 100644 --- a/src/alloy/frontend/ppc/ppc_instr_tables.h +++ b/src/alloy/frontend/ppc/ppc_instr_tables.h @@ -10,6 +10,8 @@ #ifndef ALLOY_FRONTEND_PPC_PPC_INSTR_TABLES_H_ #define ALLOY_FRONTEND_PPC_PPC_INSTR_TABLES_H_ +#include + #include namespace alloy { diff --git a/src/alloy/hir/value.cc b/src/alloy/hir/value.cc index d25b6bea0..770bdfe53 100644 --- a/src/alloy/hir/value.cc +++ b/src/alloy/hir/value.cc @@ -9,6 +9,8 @@ #include +#include + namespace alloy { namespace hir { diff --git a/src/alloy/runtime/debugger.h b/src/alloy/runtime/debugger.h index d9b97a8a9..bacb58f3f 100644 --- a/src/alloy/runtime/debugger.h +++ b/src/alloy/runtime/debugger.h @@ -12,6 +12,7 @@ #include #include +#include #include #include diff --git a/src/poly/assert.h b/src/poly/assert.h index 50e083d16..80d1898c4 100644 --- a/src/poly/assert.h +++ b/src/poly/assert.h @@ -70,7 +70,7 @@ namespace poly { poly_assert((expr) != nullptr || !message) #define assert_unhandled_case(variable) \ - assert_always("unhandled switch("## #variable##") case") + assert_always("unhandled switch(" #variable ") case") } // namespace poly diff --git a/src/poly/atomic.h b/src/poly/atomic.h index dab7e39bc..634b2d101 100644 --- a/src/poly/atomic.h +++ b/src/poly/atomic.h @@ -15,32 +15,35 @@ #include #include +#if XE_LIKE_OSX +#include +#endif // XE_LIKE_OSX + namespace poly { // These functions are modeled off of the Apple OSAtomic routines // http://developer.apple.com/library/mac/#documentation/DriversKernelHardware/Reference/libkern_ref/OSAtomic_h/ #if XE_LIKE_OSX -#include inline int32_t atomic_inc(volatile int32_t* value) { - return OSAtomicIncrement32Barrier(reinterpret_cast(value)); + return OSAtomicIncrement32Barrier(reinterpret_cast(value)); } inline int32_t atomic_dec(volatile int32_t* value) { - return OSAtomicDecrement32Barrier(reinterpret_cast(value)); + return OSAtomicDecrement32Barrier(reinterpret_cast(value)); } inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) { - // + return OSAtomicCompareAndSwap32Barrier(*value, new_value, value); } inline int64_t atomic_exchange(int64_t new_value, volatile int64_t* value) { - // + return OSAtomicCompareAndSwap64Barrier(*value, new_value, value); } inline int32_t atomic_cas(int32_t old_value, int32_t new_value, volatile int32_t* value) { return OSAtomicCompareAndSwap32Barrier( - old_value, new_value, reinterpret_cast(value)); + old_value, new_value, reinterpret_cast(value)); } #elif XE_LIKE_WIN32 @@ -77,10 +80,10 @@ inline int32_t atomic_dec(volatile int32_t* value) { } inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) { - // + return __sync_val_compare_and_swap(*value, value, new_value); } inline int64_t atomic_exchange(int64_t new_value, volatile int64_t* value) { - // + return __sync_val_compare_and_swap(*value, value, new_value); } inline int32_t atomic_cas(int32_t old_value, int32_t new_value, diff --git a/src/poly/math.cc b/src/poly/math.cc new file mode 100644 index 000000000..af7c661f7 --- /dev/null +++ b/src/poly/math.cc @@ -0,0 +1,69 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +namespace poly { + +// TODO(benvanik): replace with alternate implementation. +// XMConvertFloatToHalf +// Copyright (c) Microsoft Corporation. All rights reserved. +uint16_t float_to_half(float value) { + uint32_t Result; + uint32_t IValue = ((uint32_t *)(&value))[0]; + uint32_t Sign = (IValue & 0x80000000U) >> 16U; + IValue = IValue & 0x7FFFFFFFU; // Hack off the sign + if (IValue > 0x47FFEFFFU) { + // The number is too large to be represented as a half. Saturate to + // infinity. + Result = 0x7FFFU; + } else { + if (IValue < 0x38800000U) { + // The number is too small to be represented as a normalized half. + // Convert it to a denormalized value. + uint32_t Shift = 113U - (IValue >> 23U); + IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift; + } else { + // Rebias the exponent to represent the value as a normalized half. + IValue += 0xC8000000U; + } + Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U) & 0x7FFFU; + } + return (uint16_t)(Result | Sign); +} + +// TODO(benvanik): replace with alternate implementation. +// XMConvertHalfToFloat +// Copyright (c) Microsoft Corporation. All rights reserved. +float half_to_float(uint16_t value) { + uint32_t Mantissa = (uint32_t)(value & 0x03FF); + uint32_t Exponent; + if ((value & 0x7C00) != 0) { + // The value is normalized + Exponent = (uint32_t)((value >> 10) & 0x1F); + } else if (Mantissa != 0) { + // The value is denormalized + // Normalize the value in the resulting float + Exponent = 1; + do { + Exponent--; + Mantissa <<= 1; + } while ((Mantissa & 0x0400) == 0); + Mantissa &= 0x03FF; + } else { + // The value is zero + Exponent = (uint32_t)-112; + } + uint32_t Result = ((value & 0x8000) << 16) | // Sign + ((Exponent + 112) << 23) | // Exponent + (Mantissa << 13); // Mantissa + return *(float *)&Result; +} + +} // namespace poly diff --git a/src/poly/math.h b/src/poly/math.h index 7a20fade4..57b0190d1 100644 --- a/src/poly/math.h +++ b/src/poly/math.h @@ -25,6 +25,7 @@ namespace poly { // return value is the size of the input operand (8, 16, 32, or 64). If the most // significant bit of value is one, the return value is zero. #if XE_COMPILER_MSVC +#if 1 inline uint8_t lzcnt(uint8_t v) { return static_cast(__lzcnt16(v) - 8); } @@ -32,6 +33,32 @@ inline uint8_t lzcnt(uint16_t v) { return static_cast(__lzcnt16(v)); } inline uint8_t lzcnt(uint32_t v) { return static_cast(__lzcnt(v)); } inline uint8_t lzcnt(uint64_t v) { return static_cast(__lzcnt64(v)); } #else +inline uint8_t lzcnt(uint8_t v) { + DWORD index; + DWORD mask = v; + BOOLEAN is_nonzero = _BitScanReverse(&index, mask); + return static_cast(is_nonzero ? int8_t(index - 24) ^ 0x7 : 8); +} +inline uint8_t lzcnt(uint16_t v) { + DWORD index; + DWORD mask = v; + BOOLEAN is_nonzero = _BitScanReverse(&index, mask); + return static_cast(is_nonzero ? int8_t(index - 16) ^ 0xF : 16); +} +inline uint8_t lzcnt(uint32_t v) { + DWORD index; + DWORD mask = v; + BOOLEAN is_nonzero = _BitScanReverse(&index, mask); + return static_cast(is_nonzero ? int8_t(index) ^ 0x1F : 32); +} +inline uint8_t lzcnt(uint64_t v) { + DWORD index; + DWORD64 mask = v; + BOOLEAN is_nonzero = _BitScanReverse64(&index, mask); + return static_cast(is_nonzero ? int8_t(index) ^ 0x3F : 64); +} +#endif // LZCNT supported +#else inline uint8_t lzcnt(uint8_t v) { return static_cast(__builtin_clzs(v) - 8); } @@ -121,6 +148,9 @@ int64_t m128_i64(const __m128& v) { return m128_i64(_mm_castps_pd(v)); } +uint16_t float_to_half(float value); +float half_to_float(uint16_t value); + } // namespace poly #endif // POLY_MATH_H_ diff --git a/src/poly/sources.gypi b/src/poly/sources.gypi index c93d66a5d..f9a0fe960 100644 --- a/src/poly/sources.gypi +++ b/src/poly/sources.gypi @@ -5,6 +5,7 @@ 'atomic.h', 'config.h', 'cxx_compat.h', + 'math.cc', 'math.h', 'platform.h', 'poly-private.h', diff --git a/src/poly/threading.h b/src/poly/threading.h index 6fb792b2d..5059aaf3a 100644 --- a/src/poly/threading.h +++ b/src/poly/threading.h @@ -18,6 +18,9 @@ namespace poly { namespace threading { +// Gets the current high-perforance tick count. +uint64_t ticks(); + // Gets a stable thread-specific ID, but may not be. Use for informative // purposes only. uint32_t current_thread_id(); diff --git a/src/poly/threading_mac.cc b/src/poly/threading_mac.cc index 55b508f35..5ee15b677 100644 --- a/src/poly/threading_mac.cc +++ b/src/poly/threading_mac.cc @@ -9,12 +9,16 @@ #include +#include +#include #include #include namespace poly { namespace threading { +uint64_t ticks() { return mach_absolute_time(); } + uint32_t current_thread_id() { mach_port_t tid = pthread_mach_thread_np(pthread_self()); return static_cast(tid); diff --git a/src/poly/threading_win.cc b/src/poly/threading_win.cc index b95a6c455..27689ab28 100644 --- a/src/poly/threading_win.cc +++ b/src/poly/threading_win.cc @@ -14,6 +14,15 @@ namespace poly { namespace threading { +uint64_t ticks() { + LARGE_INTEGER counter; + uint64_t time = 0; + if (QueryPerformanceCounter(&counter)) { + time = counter.QuadPart; + } + return time; +} + uint32_t current_thread_id() { return static_cast(GetCurrentThreadId()); } diff --git a/src/xenia/types.h b/src/xenia/types.h index 78a60de8c..59a07c8da 100644 --- a/src/xenia/types.h +++ b/src/xenia/types.h @@ -11,6 +11,7 @@ #define XENIA_TYPES_H_ #include +#include #include