Fixing a bunch of alloy clang issues.
This commit is contained in:
parent
9b78dd977b
commit
7ee79318e8
|
@ -14,9 +14,6 @@
|
||||||
#include <alloy/runtime/symbol_info.h>
|
#include <alloy/runtime/symbol_info.h>
|
||||||
#include <alloy/runtime/thread_state.h>
|
#include <alloy/runtime/thread_state.h>
|
||||||
|
|
||||||
// TODO(benvanik): reimplement packing functions
|
|
||||||
#include <DirectXPackedVector.h>
|
|
||||||
|
|
||||||
// TODO(benvanik): make a compile time flag?
|
// TODO(benvanik): make a compile time flag?
|
||||||
//#define DYNAMIC_REGISTER_ACCESS_CHECK(address) false
|
//#define DYNAMIC_REGISTER_ACCESS_CHECK(address) false
|
||||||
#define DYNAMIC_REGISTER_ACCESS_CHECK(address) \
|
#define DYNAMIC_REGISTER_ACCESS_CHECK(address) \
|
||||||
|
@ -38,10 +35,10 @@ using alloy::hir::Value;
|
||||||
using alloy::runtime::Function;
|
using alloy::runtime::Function;
|
||||||
using alloy::runtime::FunctionInfo;
|
using alloy::runtime::FunctionInfo;
|
||||||
|
|
||||||
#define IPRINT
|
#define IPRINT(...) (void())
|
||||||
#define IFLUSH()
|
#define IFLUSH() (void())
|
||||||
#define DPRINT
|
#define DPRINT(...) (void())
|
||||||
#define DFLUSH()
|
#define DFLUSH() (void())
|
||||||
|
|
||||||
//#define IPRINT if (ics.thread_state->thread_id() == 1) printf
|
//#define IPRINT if (ics.thread_state->thread_id() == 1) printf
|
||||||
//#define IFLUSH() fflush(stdout)
|
//#define IFLUSH() fflush(stdout)
|
||||||
|
@ -101,7 +98,7 @@ uint32_t AllocConstant(TranslationContext& ctx, Value* value) {
|
||||||
uint32_t AllocLabel(TranslationContext& ctx, Label* label) {
|
uint32_t AllocLabel(TranslationContext& ctx, Label* label) {
|
||||||
// If it's a back-branch to an already tagged label avoid setting up
|
// If it's a back-branch to an already tagged label avoid setting up
|
||||||
// a reference.
|
// a reference.
|
||||||
uint32_t value = (uint32_t)label->tag;
|
uint32_t value = *reinterpret_cast<uint32_t*>(label->tag);
|
||||||
if (value & 0x80000000) {
|
if (value & 0x80000000) {
|
||||||
// Already set.
|
// Already set.
|
||||||
return AllocConstant(ctx, value & ~0x80000000);
|
return AllocConstant(ctx, value & ~0x80000000);
|
||||||
|
@ -124,11 +121,11 @@ uint32_t AllocLabel(TranslationContext& ctx, Label* label) {
|
||||||
|
|
||||||
uint32_t AllocDynamicRegister(TranslationContext& ctx, Value* value) {
|
uint32_t AllocDynamicRegister(TranslationContext& ctx, Value* value) {
|
||||||
if (value->flags & VALUE_IS_ALLOCATED) {
|
if (value->flags & VALUE_IS_ALLOCATED) {
|
||||||
return (uint32_t)value->tag;
|
return *reinterpret_cast<uint32_t*>(value->tag);
|
||||||
} else {
|
} else {
|
||||||
value->flags |= VALUE_IS_ALLOCATED;
|
value->flags |= VALUE_IS_ALLOCATED;
|
||||||
auto reg = ctx.register_count++;
|
auto reg = ctx.register_count++;
|
||||||
value->tag = (void*)reg;
|
value->tag = reinterpret_cast<void*>(reg);
|
||||||
return (uint32_t)reg;
|
return (uint32_t)reg;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -207,6 +204,7 @@ int TranslateInvalid(TranslationContext& ctx, Instr* i) {
|
||||||
|
|
||||||
uint32_t IntCode_COMMENT(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_COMMENT(IntCodeState& ics, const IntCode* i) {
|
||||||
char* value = (char*)(i->src1_reg | ((uint64_t)i->src2_reg << 32));
|
char* value = (char*)(i->src1_reg | ((uint64_t)i->src2_reg << 32));
|
||||||
|
(void)(value);
|
||||||
IPRINT("XE[t] :%d: %s\n", ics.thread_state->thread_id(), value);
|
IPRINT("XE[t] :%d: %s\n", ics.thread_state->thread_id(), value);
|
||||||
IFLUSH();
|
IFLUSH();
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
|
@ -1186,12 +1184,7 @@ int Translate_LOAD_VECTOR_SHR(TranslationContext& ctx, Instr* i) {
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t IntCode_LOAD_CLOCK(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_LOAD_CLOCK(IntCodeState& ics, const IntCode* i) {
|
||||||
LARGE_INTEGER counter;
|
ics.rf[i->dest_reg].i64 = poly::threading::ticks();
|
||||||
uint64_t time = 0;
|
|
||||||
if (QueryPerformanceCounter(&counter)) {
|
|
||||||
time = counter.QuadPart;
|
|
||||||
}
|
|
||||||
ics.rf[i->dest_reg].i64 = time;
|
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) {
|
int Translate_LOAD_CLOCK(TranslationContext& ctx, Instr* i) {
|
||||||
|
@ -2664,7 +2657,7 @@ int Translate_MUL(TranslationContext& ctx, Instr* i) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
#if !XE_COMPILER_MSVC
|
||||||
uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low,
|
uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low,
|
||||||
uint64_t yi_high) {
|
uint64_t yi_high) {
|
||||||
// 128bit multiply, simplified for two input 64bit integers.
|
// 128bit multiply, simplified for two input 64bit integers.
|
||||||
|
@ -2680,7 +2673,6 @@ uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low,
|
||||||
uint64_t f = yi_high & LO_WORD;
|
uint64_t f = yi_high & LO_WORD;
|
||||||
uint64_t e = (yi_high & HI_WORD) >> 32LL;
|
uint64_t e = (yi_high & HI_WORD) >> 32LL;
|
||||||
uint64_t acc = d * h;
|
uint64_t acc = d * h;
|
||||||
uint64_t o1 = acc & LO_WORD;
|
|
||||||
acc >>= 32LL;
|
acc >>= 32LL;
|
||||||
uint64_t carry = 0;
|
uint64_t carry = 0;
|
||||||
|
|
||||||
|
@ -2692,7 +2684,6 @@ uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low,
|
||||||
if (acc < ac2) {
|
if (acc < ac2) {
|
||||||
carry++;
|
carry++;
|
||||||
}
|
}
|
||||||
uint64_t rv2_lo = o1 | (acc << 32LL);
|
|
||||||
ac2 = (acc >> 32LL) | (carry << 32LL);
|
ac2 = (acc >> 32LL) | (carry << 32LL);
|
||||||
carry = 0;
|
carry = 0;
|
||||||
|
|
||||||
|
@ -2719,7 +2710,7 @@ uint64_t Mul128(uint64_t xi_low, uint64_t xi_high, uint64_t yi_low,
|
||||||
|
|
||||||
return rv2_hi;
|
return rv2_hi;
|
||||||
}
|
}
|
||||||
}
|
#endif // !XE_COMPILER_MSVC
|
||||||
|
|
||||||
uint32_t IntCode_MUL_HI_I8_I8(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_MUL_HI_I8_I8(IntCodeState& ics, const IntCode* i) {
|
||||||
int16_t v = (int16_t)ics.rf[i->src1_reg].i8 * (int16_t)ics.rf[i->src2_reg].i8;
|
int16_t v = (int16_t)ics.rf[i->src1_reg].i8 * (int16_t)ics.rf[i->src2_reg].i8;
|
||||||
|
@ -3565,33 +3556,21 @@ int Translate_BYTE_SWAP(TranslationContext& ctx, Instr* i) {
|
||||||
uint32_t IntCode_CNTLZ_I8(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_CNTLZ_I8(IntCodeState& ics, const IntCode* i) {
|
||||||
// CHECK
|
// CHECK
|
||||||
assert_always();
|
assert_always();
|
||||||
DWORD index;
|
ics.rf[i->dest_reg].i8 = poly::lzcnt(ics.rf[i->src1_reg].i8);
|
||||||
DWORD mask = ics.rf[i->src1_reg].i8;
|
|
||||||
BOOLEAN is_nonzero = _BitScanReverse(&index, mask);
|
|
||||||
ics.rf[i->dest_reg].i8 = is_nonzero ? (int8_t)(index - 24) ^ 0x7 : 8;
|
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
uint32_t IntCode_CNTLZ_I16(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_CNTLZ_I16(IntCodeState& ics, const IntCode* i) {
|
||||||
// CHECK
|
// CHECK
|
||||||
assert_always();
|
assert_always();
|
||||||
DWORD index;
|
ics.rf[i->dest_reg].i8 = poly::lzcnt(ics.rf[i->src1_reg].i16);
|
||||||
DWORD mask = ics.rf[i->src1_reg].i16;
|
|
||||||
BOOLEAN is_nonzero = _BitScanReverse(&index, mask);
|
|
||||||
ics.rf[i->dest_reg].i8 = is_nonzero ? (int8_t)(index - 16) ^ 0xF : 16;
|
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
uint32_t IntCode_CNTLZ_I32(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_CNTLZ_I32(IntCodeState& ics, const IntCode* i) {
|
||||||
DWORD index;
|
ics.rf[i->dest_reg].i8 = poly::lzcnt(ics.rf[i->src1_reg].i32);
|
||||||
DWORD mask = ics.rf[i->src1_reg].i32;
|
|
||||||
BOOLEAN is_nonzero = _BitScanReverse(&index, mask);
|
|
||||||
ics.rf[i->dest_reg].i8 = is_nonzero ? (int8_t)index ^ 0x1F : 32;
|
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
uint32_t IntCode_CNTLZ_I64(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_CNTLZ_I64(IntCodeState& ics, const IntCode* i) {
|
||||||
DWORD index;
|
ics.rf[i->dest_reg].i8 = poly::lzcnt(ics.rf[i->src1_reg].i64);
|
||||||
DWORD64 mask = ics.rf[i->src1_reg].i64;
|
|
||||||
BOOLEAN is_nonzero = _BitScanReverse64(&index, mask);
|
|
||||||
ics.rf[i->dest_reg].i8 = is_nonzero ? (int8_t)index ^ 0x3F : 64;
|
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
int Translate_CNTLZ(TranslationContext& ctx, Instr* i) {
|
int Translate_CNTLZ(TranslationContext& ctx, Instr* i) {
|
||||||
|
@ -3872,21 +3851,18 @@ uint32_t IntCode_PACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) {
|
||||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||||
dest.ix = dest.iy = dest.iz = 0;
|
dest.ix = dest.iy = dest.iz = 0;
|
||||||
dest.iw =
|
dest.iw = (uint32_t(poly::float_to_half(src1.x)) << 16) |
|
||||||
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) |
|
poly::float_to_half(src1.y);
|
||||||
DirectX::PackedVector::XMConvertFloatToHalf(src1.y);
|
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_PACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) {
|
||||||
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
const vec128_t& src1 = ics.rf[i->src1_reg].v128;
|
||||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||||
dest.ix = dest.iy = 0;
|
dest.ix = dest.iy = 0;
|
||||||
dest.iz =
|
dest.iz = (uint32_t(poly::float_to_half(src1.x)) << 16) |
|
||||||
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.x) << 16) |
|
poly::float_to_half(src1.y);
|
||||||
DirectX::PackedVector::XMConvertFloatToHalf(src1.y);
|
dest.iw = (uint32_t(poly::float_to_half(src1.z)) << 16) |
|
||||||
dest.iw =
|
poly::float_to_half(src1.w);
|
||||||
((uint32_t)DirectX::PackedVector::XMConvertFloatToHalf(src1.z) << 16) |
|
|
||||||
DirectX::PackedVector::XMConvertFloatToHalf(src1.w);
|
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
}
|
}
|
||||||
uint32_t IntCode_PACK_SHORT_2(IntCodeState& ics, const IntCode* i) {
|
uint32_t IntCode_PACK_SHORT_2(IntCodeState& ics, const IntCode* i) {
|
||||||
|
@ -3932,7 +3908,7 @@ uint32_t IntCode_UNPACK_FLOAT16_2(IntCodeState& ics, const IntCode* i) {
|
||||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||||
uint32_t src = src1.iw;
|
uint32_t src = src1.iw;
|
||||||
for (int n = 0; n < 2; n++) {
|
for (int n = 0; n < 2; n++) {
|
||||||
dest.f4[n] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src);
|
dest.f4[n] = poly::half_to_float(uint16_t(src));
|
||||||
src >>= 16;
|
src >>= 16;
|
||||||
}
|
}
|
||||||
dest.f4[2] = 0.0f;
|
dest.f4[2] = 0.0f;
|
||||||
|
@ -3944,7 +3920,7 @@ uint32_t IntCode_UNPACK_FLOAT16_4(IntCodeState& ics, const IntCode* i) {
|
||||||
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
vec128_t& dest = ics.rf[i->dest_reg].v128;
|
||||||
uint64_t src = src1.iz | ((uint64_t)src1.iw << 32);
|
uint64_t src = src1.iz | ((uint64_t)src1.iw << 32);
|
||||||
for (int n = 0; n < 4; n++) {
|
for (int n = 0; n < 4; n++) {
|
||||||
dest.f4[n] = DirectX::PackedVector::XMConvertHalfToFloat((uint16_t)src);
|
dest.f4[n] = poly::half_to_float(uint16_t(src));
|
||||||
src >>= 16;
|
src >>= 16;
|
||||||
}
|
}
|
||||||
return IA_NEXT;
|
return IA_NEXT;
|
||||||
|
|
|
@ -10,6 +10,8 @@
|
||||||
#ifndef ALLOY_FRONTEND_PPC_PPC_INSTR_TABLES_H_
|
#ifndef ALLOY_FRONTEND_PPC_PPC_INSTR_TABLES_H_
|
||||||
#define ALLOY_FRONTEND_PPC_PPC_INSTR_TABLES_H_
|
#define ALLOY_FRONTEND_PPC_PPC_INSTR_TABLES_H_
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#include <alloy/frontend/ppc/ppc_instr.h>
|
#include <alloy/frontend/ppc/ppc_instr.h>
|
||||||
|
|
||||||
namespace alloy {
|
namespace alloy {
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
|
|
||||||
#include <alloy/hir/value.h>
|
#include <alloy/hir/value.h>
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
namespace alloy {
|
namespace alloy {
|
||||||
namespace hir {
|
namespace hir {
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <alloy/core.h>
|
#include <alloy/core.h>
|
||||||
|
|
|
@ -70,7 +70,7 @@ namespace poly {
|
||||||
poly_assert((expr) != nullptr || !message)
|
poly_assert((expr) != nullptr || !message)
|
||||||
|
|
||||||
#define assert_unhandled_case(variable) \
|
#define assert_unhandled_case(variable) \
|
||||||
assert_always("unhandled switch("## #variable##") case")
|
assert_always("unhandled switch(" #variable ") case")
|
||||||
|
|
||||||
} // namespace poly
|
} // namespace poly
|
||||||
|
|
||||||
|
|
|
@ -15,32 +15,35 @@
|
||||||
#include <poly/config.h>
|
#include <poly/config.h>
|
||||||
#include <poly/platform.h>
|
#include <poly/platform.h>
|
||||||
|
|
||||||
|
#if XE_LIKE_OSX
|
||||||
|
#include <libkern/OSAtomic.h>
|
||||||
|
#endif // XE_LIKE_OSX
|
||||||
|
|
||||||
namespace poly {
|
namespace poly {
|
||||||
|
|
||||||
// These functions are modeled off of the Apple OSAtomic routines
|
// These functions are modeled off of the Apple OSAtomic routines
|
||||||
// http://developer.apple.com/library/mac/#documentation/DriversKernelHardware/Reference/libkern_ref/OSAtomic_h/
|
// http://developer.apple.com/library/mac/#documentation/DriversKernelHardware/Reference/libkern_ref/OSAtomic_h/
|
||||||
|
|
||||||
#if XE_LIKE_OSX
|
#if XE_LIKE_OSX
|
||||||
#include <libkern/OSAtomic.h>
|
|
||||||
|
|
||||||
inline int32_t atomic_inc(volatile int32_t* value) {
|
inline int32_t atomic_inc(volatile int32_t* value) {
|
||||||
return OSAtomicIncrement32Barrier(reinterpret_cast<volatile LONG*>(value));
|
return OSAtomicIncrement32Barrier(reinterpret_cast<volatile int32_t*>(value));
|
||||||
}
|
}
|
||||||
inline int32_t atomic_dec(volatile int32_t* value) {
|
inline int32_t atomic_dec(volatile int32_t* value) {
|
||||||
return OSAtomicDecrement32Barrier(reinterpret_cast<volatile LONG*>(value));
|
return OSAtomicDecrement32Barrier(reinterpret_cast<volatile int32_t*>(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) {
|
inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) {
|
||||||
//
|
return OSAtomicCompareAndSwap32Barrier(*value, new_value, value);
|
||||||
}
|
}
|
||||||
inline int64_t atomic_exchange(int64_t new_value, volatile int64_t* value) {
|
inline int64_t atomic_exchange(int64_t new_value, volatile int64_t* value) {
|
||||||
//
|
return OSAtomicCompareAndSwap64Barrier(*value, new_value, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int32_t atomic_cas(int32_t old_value, int32_t new_value,
|
inline int32_t atomic_cas(int32_t old_value, int32_t new_value,
|
||||||
volatile int32_t* value) {
|
volatile int32_t* value) {
|
||||||
return OSAtomicCompareAndSwap32Barrier(
|
return OSAtomicCompareAndSwap32Barrier(
|
||||||
old_value, new_value, reinterpret_cast<volatile LONG*>(value));
|
old_value, new_value, reinterpret_cast<volatile int32_t*>(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif XE_LIKE_WIN32
|
#elif XE_LIKE_WIN32
|
||||||
|
@ -77,10 +80,10 @@ inline int32_t atomic_dec(volatile int32_t* value) {
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) {
|
inline int32_t atomic_exchange(int32_t new_value, volatile int32_t* value) {
|
||||||
//
|
return __sync_val_compare_and_swap(*value, value, new_value);
|
||||||
}
|
}
|
||||||
inline int64_t atomic_exchange(int64_t new_value, volatile int64_t* value) {
|
inline int64_t atomic_exchange(int64_t new_value, volatile int64_t* value) {
|
||||||
//
|
return __sync_val_compare_and_swap(*value, value, new_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int32_t atomic_cas(int32_t old_value, int32_t new_value,
|
inline int32_t atomic_cas(int32_t old_value, int32_t new_value,
|
||||||
|
|
|
@ -0,0 +1,69 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <poly/math.h>
|
||||||
|
|
||||||
|
namespace poly {
|
||||||
|
|
||||||
|
// TODO(benvanik): replace with alternate implementation.
|
||||||
|
// XMConvertFloatToHalf
|
||||||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
uint16_t float_to_half(float value) {
|
||||||
|
uint32_t Result;
|
||||||
|
uint32_t IValue = ((uint32_t *)(&value))[0];
|
||||||
|
uint32_t Sign = (IValue & 0x80000000U) >> 16U;
|
||||||
|
IValue = IValue & 0x7FFFFFFFU; // Hack off the sign
|
||||||
|
if (IValue > 0x47FFEFFFU) {
|
||||||
|
// The number is too large to be represented as a half. Saturate to
|
||||||
|
// infinity.
|
||||||
|
Result = 0x7FFFU;
|
||||||
|
} else {
|
||||||
|
if (IValue < 0x38800000U) {
|
||||||
|
// The number is too small to be represented as a normalized half.
|
||||||
|
// Convert it to a denormalized value.
|
||||||
|
uint32_t Shift = 113U - (IValue >> 23U);
|
||||||
|
IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
|
||||||
|
} else {
|
||||||
|
// Rebias the exponent to represent the value as a normalized half.
|
||||||
|
IValue += 0xC8000000U;
|
||||||
|
}
|
||||||
|
Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U) & 0x7FFFU;
|
||||||
|
}
|
||||||
|
return (uint16_t)(Result | Sign);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(benvanik): replace with alternate implementation.
|
||||||
|
// XMConvertHalfToFloat
|
||||||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
float half_to_float(uint16_t value) {
|
||||||
|
uint32_t Mantissa = (uint32_t)(value & 0x03FF);
|
||||||
|
uint32_t Exponent;
|
||||||
|
if ((value & 0x7C00) != 0) {
|
||||||
|
// The value is normalized
|
||||||
|
Exponent = (uint32_t)((value >> 10) & 0x1F);
|
||||||
|
} else if (Mantissa != 0) {
|
||||||
|
// The value is denormalized
|
||||||
|
// Normalize the value in the resulting float
|
||||||
|
Exponent = 1;
|
||||||
|
do {
|
||||||
|
Exponent--;
|
||||||
|
Mantissa <<= 1;
|
||||||
|
} while ((Mantissa & 0x0400) == 0);
|
||||||
|
Mantissa &= 0x03FF;
|
||||||
|
} else {
|
||||||
|
// The value is zero
|
||||||
|
Exponent = (uint32_t)-112;
|
||||||
|
}
|
||||||
|
uint32_t Result = ((value & 0x8000) << 16) | // Sign
|
||||||
|
((Exponent + 112) << 23) | // Exponent
|
||||||
|
(Mantissa << 13); // Mantissa
|
||||||
|
return *(float *)&Result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace poly
|
|
@ -25,6 +25,7 @@ namespace poly {
|
||||||
// return value is the size of the input operand (8, 16, 32, or 64). If the most
|
// return value is the size of the input operand (8, 16, 32, or 64). If the most
|
||||||
// significant bit of value is one, the return value is zero.
|
// significant bit of value is one, the return value is zero.
|
||||||
#if XE_COMPILER_MSVC
|
#if XE_COMPILER_MSVC
|
||||||
|
#if 1
|
||||||
inline uint8_t lzcnt(uint8_t v) {
|
inline uint8_t lzcnt(uint8_t v) {
|
||||||
return static_cast<uint8_t>(__lzcnt16(v) - 8);
|
return static_cast<uint8_t>(__lzcnt16(v) - 8);
|
||||||
}
|
}
|
||||||
|
@ -32,6 +33,32 @@ inline uint8_t lzcnt(uint16_t v) { return static_cast<uint8_t>(__lzcnt16(v)); }
|
||||||
inline uint8_t lzcnt(uint32_t v) { return static_cast<uint8_t>(__lzcnt(v)); }
|
inline uint8_t lzcnt(uint32_t v) { return static_cast<uint8_t>(__lzcnt(v)); }
|
||||||
inline uint8_t lzcnt(uint64_t v) { return static_cast<uint8_t>(__lzcnt64(v)); }
|
inline uint8_t lzcnt(uint64_t v) { return static_cast<uint8_t>(__lzcnt64(v)); }
|
||||||
#else
|
#else
|
||||||
|
inline uint8_t lzcnt(uint8_t v) {
|
||||||
|
DWORD index;
|
||||||
|
DWORD mask = v;
|
||||||
|
BOOLEAN is_nonzero = _BitScanReverse(&index, mask);
|
||||||
|
return static_cast<uint8_t>(is_nonzero ? int8_t(index - 24) ^ 0x7 : 8);
|
||||||
|
}
|
||||||
|
inline uint8_t lzcnt(uint16_t v) {
|
||||||
|
DWORD index;
|
||||||
|
DWORD mask = v;
|
||||||
|
BOOLEAN is_nonzero = _BitScanReverse(&index, mask);
|
||||||
|
return static_cast<uint8_t>(is_nonzero ? int8_t(index - 16) ^ 0xF : 16);
|
||||||
|
}
|
||||||
|
inline uint8_t lzcnt(uint32_t v) {
|
||||||
|
DWORD index;
|
||||||
|
DWORD mask = v;
|
||||||
|
BOOLEAN is_nonzero = _BitScanReverse(&index, mask);
|
||||||
|
return static_cast<uint8_t>(is_nonzero ? int8_t(index) ^ 0x1F : 32);
|
||||||
|
}
|
||||||
|
inline uint8_t lzcnt(uint64_t v) {
|
||||||
|
DWORD index;
|
||||||
|
DWORD64 mask = v;
|
||||||
|
BOOLEAN is_nonzero = _BitScanReverse64(&index, mask);
|
||||||
|
return static_cast<uint8_t>(is_nonzero ? int8_t(index) ^ 0x3F : 64);
|
||||||
|
}
|
||||||
|
#endif // LZCNT supported
|
||||||
|
#else
|
||||||
inline uint8_t lzcnt(uint8_t v) {
|
inline uint8_t lzcnt(uint8_t v) {
|
||||||
return static_cast<uint8_t>(__builtin_clzs(v) - 8);
|
return static_cast<uint8_t>(__builtin_clzs(v) - 8);
|
||||||
}
|
}
|
||||||
|
@ -121,6 +148,9 @@ int64_t m128_i64(const __m128& v) {
|
||||||
return m128_i64<N>(_mm_castps_pd(v));
|
return m128_i64<N>(_mm_castps_pd(v));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint16_t float_to_half(float value);
|
||||||
|
float half_to_float(uint16_t value);
|
||||||
|
|
||||||
} // namespace poly
|
} // namespace poly
|
||||||
|
|
||||||
#endif // POLY_MATH_H_
|
#endif // POLY_MATH_H_
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
'atomic.h',
|
'atomic.h',
|
||||||
'config.h',
|
'config.h',
|
||||||
'cxx_compat.h',
|
'cxx_compat.h',
|
||||||
|
'math.cc',
|
||||||
'math.h',
|
'math.h',
|
||||||
'platform.h',
|
'platform.h',
|
||||||
'poly-private.h',
|
'poly-private.h',
|
||||||
|
|
|
@ -18,6 +18,9 @@
|
||||||
namespace poly {
|
namespace poly {
|
||||||
namespace threading {
|
namespace threading {
|
||||||
|
|
||||||
|
// Gets the current high-perforance tick count.
|
||||||
|
uint64_t ticks();
|
||||||
|
|
||||||
// Gets a stable thread-specific ID, but may not be. Use for informative
|
// Gets a stable thread-specific ID, but may not be. Use for informative
|
||||||
// purposes only.
|
// purposes only.
|
||||||
uint32_t current_thread_id();
|
uint32_t current_thread_id();
|
||||||
|
|
|
@ -9,12 +9,16 @@
|
||||||
|
|
||||||
#include <poly/threading.h>
|
#include <poly/threading.h>
|
||||||
|
|
||||||
|
#include <mach/mach.h>
|
||||||
|
#include <mach/mach_time.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
namespace poly {
|
namespace poly {
|
||||||
namespace threading {
|
namespace threading {
|
||||||
|
|
||||||
|
uint64_t ticks() { return mach_absolute_time(); }
|
||||||
|
|
||||||
uint32_t current_thread_id() {
|
uint32_t current_thread_id() {
|
||||||
mach_port_t tid = pthread_mach_thread_np(pthread_self());
|
mach_port_t tid = pthread_mach_thread_np(pthread_self());
|
||||||
return static_cast<uint32_t>(tid);
|
return static_cast<uint32_t>(tid);
|
||||||
|
|
|
@ -14,6 +14,15 @@
|
||||||
namespace poly {
|
namespace poly {
|
||||||
namespace threading {
|
namespace threading {
|
||||||
|
|
||||||
|
uint64_t ticks() {
|
||||||
|
LARGE_INTEGER counter;
|
||||||
|
uint64_t time = 0;
|
||||||
|
if (QueryPerformanceCounter(&counter)) {
|
||||||
|
time = counter.QuadPart;
|
||||||
|
}
|
||||||
|
return time;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t current_thread_id() {
|
uint32_t current_thread_id() {
|
||||||
return static_cast<uint32_t>(GetCurrentThreadId());
|
return static_cast<uint32_t>(GetCurrentThreadId());
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#define XENIA_TYPES_H_
|
#define XENIA_TYPES_H_
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
#include <xenia/platform.h>
|
#include <xenia/platform.h>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue