Moving around some math macros.

2014-08-16 17:18:20 -07:00 · 2014-08-16 17:18:20 -07:00 · 5b83cf5fd1
parent 54ce9db743
commit 5b83cf5fd1
23 changed files with 148 additions and 137 deletions
--- a/src/alloy/backend/ivm/ivm_assembler.cc
+++ b/src/alloy/backend/ivm/ivm_assembler.cc
@ -78,12 +78,12 @@ int IVMAssembler::Assemble(FunctionInfo* symbol_info, HIRBuilder* builder,
    auto slot = *it;
    size_t type_size = GetTypeSize(slot->type);
    // Align to natural size.
-    stack_offset = XEALIGN(stack_offset, type_size);
+    stack_offset = poly::align(stack_offset, type_size);
    slot->set_constant((uint32_t)stack_offset);
    stack_offset += type_size;
  }
  // Ensure 16b alignment.
-  stack_offset = XEALIGN(stack_offset, 16);
+  stack_offset = poly::align(stack_offset, 16ull);
  ctx.stack_size = stack_offset;

  auto block = builder->first_block();
--- a/src/alloy/backend/x64/x64_code_cache_win.cc
+++ b/src/alloy/backend/x64/x64_code_cache_win.cc
@ -63,11 +63,11 @@ void* X64CodeCache::PlaceCode(void* machine_code, size_t code_size,
  size_t alloc_size = code_size;

  // Add unwind info into the allocation size. Keep things 16b aligned.
-  alloc_size += XEROUNDUP(X64CodeChunk::UNWIND_INFO_SIZE, 16);
+  alloc_size += poly::round_up(X64CodeChunk::UNWIND_INFO_SIZE, 16);

  // Always move the code to land on 16b alignment. We do this by rounding up
  // to 16b so that all offsets are aligned.
-  alloc_size = XEROUNDUP(alloc_size, 16);
+  alloc_size = poly::round_up(alloc_size, 16);

  lock_.lock();

@ -106,7 +106,8 @@ X64CodeChunk::X64CodeChunk(size_t chunk_size)
  buffer = (uint8_t*)VirtualAlloc(NULL, capacity, MEM_RESERVE | MEM_COMMIT,
                                  PAGE_EXECUTE_READWRITE);

-  fn_table_capacity = (uint32_t)XEROUNDUP(capacity / ESTIMATED_FN_SIZE, 16);
+  fn_table_capacity =
+      static_cast<uint32_t>(poly::round_up(capacity / ESTIMATED_FN_SIZE, 16));
  size_t table_size = fn_table_capacity * sizeof(RUNTIME_FUNCTION);
  fn_table = (RUNTIME_FUNCTION*)xe_malloc(table_size);
  fn_table_count = 0;
--- a/src/alloy/backend/x64/x64_emitter.cc
+++ b/src/alloy/backend/x64/x64_emitter.cc
@ -121,13 +121,13 @@ int X64Emitter::Emit(HIRBuilder* builder, size_t& out_stack_size) {
    auto slot = *it;
    size_t type_size = GetTypeSize(slot->type);
    // Align to natural size.
-    stack_offset = XEALIGN(stack_offset, type_size);
+    stack_offset = poly::align(stack_offset, type_size);
    slot->set_constant((uint32_t)stack_offset);
    stack_offset += type_size;
  }
  // Ensure 16b alignment.
  stack_offset -= StackLayout::GUEST_STACK_SIZE;
-  stack_offset = XEALIGN(stack_offset, 16);
+  stack_offset = poly::align(stack_offset, 16ull);

  // Function prolog.
  // Must be 16b aligned.
--- a/src/alloy/frontend/ppc/ppc_disasm.cc
+++ b/src/alloy/frontend/ppc/ppc_disasm.cc
@ -287,13 +287,13 @@ void Disasm_bx(InstrData& i, StringBuffer* str) {
 void Disasm_bcx(InstrData& i, StringBuffer* str) {
  const char* s0 = i.B.LK ? "lr, " : "";
  const char* s1;
-  if (!XESELECTBITS(i.B.BO, 2, 2)) {
+  if (!select_bits(i.B.BO, 2, 2)) {
    s1 = "ctr, ";
  } else {
    s1 = "";
  }
  char s2[8] = {0};
-  if (!XESELECTBITS(i.B.BO, 4, 4)) {
+  if (!select_bits(i.B.BO, 4, 4)) {
    xesnprintfa(s2, XECOUNT(s2), "cr%d, ", i.B.BI >> 2);
  }
  uint32_t nia;
@ -309,7 +309,7 @@ void Disasm_bcctrx(InstrData& i, StringBuffer* str) {
  // TODO(benvanik): mnemonics
  const char* s0 = i.XL.LK ? "lr, " : "";
  char s2[8] = {0};
-  if (!XESELECTBITS(i.XL.BO, 4, 4)) {
+  if (!select_bits(i.XL.BO, 4, 4)) {
    xesnprintfa(s2, XECOUNT(s2), "cr%d, ", i.XL.BI >> 2);
  }
  str->Append("%-8s %s%sctr", i.type->name, s0, s2);
@ -321,13 +321,13 @@ void Disasm_bclrx(InstrData& i, StringBuffer* str) {
    name = "blr";
  }
  const char* s1;
-  if (!XESELECTBITS(i.XL.BO, 2, 2)) {
+  if (!select_bits(i.XL.BO, 2, 2)) {
    s1 = "ctr, ";
  } else {
    s1 = "";
  }
  char s2[8] = {0};
-  if (!XESELECTBITS(i.XL.BO, 4, 4)) {
+  if (!select_bits(i.XL.BO, 4, 4)) {
    xesnprintfa(s2, XECOUNT(s2), "cr%d, ", i.XL.BI >> 2);
  }
  str->Append("%-8s %s%s", name, s1, s2);
--- a/src/alloy/frontend/ppc/ppc_emit_control.cc
+++ b/src/alloy/frontend/ppc/ppc_emit_control.cc
@ -176,7 +176,7 @@ XEEMITTER(bcx, 0x40000000, B)(PPCHIRBuilder& f, InstrData& i) {
  // 43210 (real)

  Value* ctr_ok = NULL;
-  if (XESELECTBITS(i.B.BO, 2, 2)) {
+  if (select_bits(i.B.BO, 2, 2)) {
    // Ignore ctr.
  } else {
    // Decrement counter.
@ -187,7 +187,7 @@ XEEMITTER(bcx, 0x40000000, B)(PPCHIRBuilder& f, InstrData& i) {
    ctr = f.Truncate(ctr, INT32_TYPE);
    // TODO(benvanik): could do something similar to cond and avoid the
    // is_true/branch_true pairing.
-    if (XESELECTBITS(i.B.BO, 1, 1)) {
+    if (select_bits(i.B.BO, 1, 1)) {
      ctr_ok = f.IsFalse(ctr);
    } else {
      ctr_ok = f.IsTrue(ctr);
@ -196,12 +196,12 @@ XEEMITTER(bcx, 0x40000000, B)(PPCHIRBuilder& f, InstrData& i) {

  Value* cond_ok = NULL;
  bool not_cond_ok = false;
-  if (XESELECTBITS(i.B.BO, 4, 4)) {
+  if (select_bits(i.B.BO, 4, 4)) {
    // Ignore cond.
  } else {
    Value* cr = f.LoadCRField(i.B.BI >> 2, i.B.BI & 3);
    cond_ok = cr;
-    if (XESELECTBITS(i.B.BO, 3, 3)) {
+    if (select_bits(i.B.BO, 3, 3)) {
      // Expect true.
      not_cond_ok = false;
    } else {
@ -248,12 +248,12 @@ XEEMITTER(bcctrx, 0x4C000420, XL)(PPCHIRBuilder& f, InstrData& i) {

  Value* cond_ok = NULL;
  bool not_cond_ok = false;
-  if (XESELECTBITS(i.XL.BO, 4, 4)) {
+  if (select_bits(i.XL.BO, 4, 4)) {
    // Ignore cond.
  } else {
    Value* cr = f.LoadCRField(i.XL.BI >> 2, i.XL.BI & 3);
    cond_ok = cr;
-    if (XESELECTBITS(i.XL.BO, 3, 3)) {
+    if (select_bits(i.XL.BO, 3, 3)) {
      // Expect true.
      not_cond_ok = false;
    } else {
@ -282,7 +282,7 @@ XEEMITTER(bclrx, 0x4C000020, XL)(PPCHIRBuilder& f, InstrData& i) {
  // 43210 (real)

  Value* ctr_ok = NULL;
-  if (XESELECTBITS(i.XL.BO, 2, 2)) {
+  if (select_bits(i.XL.BO, 2, 2)) {
    // Ignore ctr.
  } else {
    // Decrement counter.
@ -293,7 +293,7 @@ XEEMITTER(bclrx, 0x4C000020, XL)(PPCHIRBuilder& f, InstrData& i) {
    ctr = f.Truncate(ctr, INT32_TYPE);
    // TODO(benvanik): could do something similar to cond and avoid the
    // is_true/branch_true pairing.
-    if (XESELECTBITS(i.XL.BO, 1, 1)) {
+    if (select_bits(i.XL.BO, 1, 1)) {
      ctr_ok = f.IsFalse(ctr);
    } else {
      ctr_ok = f.IsTrue(ctr);
@ -302,12 +302,12 @@ XEEMITTER(bclrx, 0x4C000020, XL)(PPCHIRBuilder& f, InstrData& i) {

  Value* cond_ok = NULL;
  bool not_cond_ok = false;
-  if (XESELECTBITS(i.XL.BO, 4, 4)) {
+  if (select_bits(i.XL.BO, 4, 4)) {
    // Ignore cond.
  } else {
    Value* cr = f.LoadCRField(i.XL.BI >> 2, i.XL.BI & 3);
    cond_ok = cr;
-    if (XESELECTBITS(i.XL.BO, 3, 3)) {
+    if (select_bits(i.XL.BO, 3, 3)) {
      // Expect true.
      not_cond_ok = false;
    } else {
--- a/src/alloy/frontend/ppc/ppc_instr.cc
+++ b/src/alloy/frontend/ppc/ppc_instr.cc
@ -322,48 +322,48 @@ InstrType* GetInstrType(uint32_t code) {
  switch (code >> 26) {
    case 4:
      // Opcode = 4, index = bits 10-0 (10)
-      slot = alloy::frontend::ppc::tables::instr_table_4[XESELECTBITS(code, 0,
-                                                                      10)];
+      slot =
+          alloy::frontend::ppc::tables::instr_table_4[select_bits(code, 0, 10)];
      break;
    case 19:
      // Opcode = 19, index = bits 10-1 (10)
-      slot = alloy::frontend::ppc::tables::instr_table_19[XESELECTBITS(code, 1,
-                                                                       10)];
+      slot = alloy::frontend::ppc::tables::instr_table_19[select_bits(code, 1,
+                                                                      10)];
      break;
    case 30:
      // Opcode = 30, index = bits 4-1 (4)
      // Special cased to an uber instruction.
-      slot = alloy::frontend::ppc::tables::instr_table_30[XESELECTBITS(code, 0,
-                                                                       0)];
+      slot =
+          alloy::frontend::ppc::tables::instr_table_30[select_bits(code, 0, 0)];
      break;
    case 31:
      // Opcode = 31, index = bits 10-1 (10)
-      slot = alloy::frontend::ppc::tables::instr_table_31[XESELECTBITS(code, 1,
-                                                                       10)];
+      slot = alloy::frontend::ppc::tables::instr_table_31[select_bits(code, 1,
+                                                                      10)];
      break;
    case 58:
      // Opcode = 58, index = bits 1-0 (2)
-      slot = alloy::frontend::ppc::tables::instr_table_58[XESELECTBITS(code, 0,
-                                                                       1)];
+      slot =
+          alloy::frontend::ppc::tables::instr_table_58[select_bits(code, 0, 1)];
      break;
    case 59:
      // Opcode = 59, index = bits 5-1 (5)
-      slot = alloy::frontend::ppc::tables::instr_table_59[XESELECTBITS(code, 1,
-                                                                       5)];
+      slot =
+          alloy::frontend::ppc::tables::instr_table_59[select_bits(code, 1, 5)];
      break;
    case 62:
      // Opcode = 62, index = bits 1-0 (2)
-      slot = alloy::frontend::ppc::tables::instr_table_62[XESELECTBITS(code, 0,
-                                                                       1)];
+      slot =
+          alloy::frontend::ppc::tables::instr_table_62[select_bits(code, 0, 1)];
      break;
    case 63:
      // Opcode = 63, index = bits 10-1 (10)
-      slot = alloy::frontend::ppc::tables::instr_table_63[XESELECTBITS(code, 1,
-                                                                       10)];
+      slot = alloy::frontend::ppc::tables::instr_table_63[select_bits(code, 1,
+                                                                      10)];
      break;
    default:
      slot =
-          alloy::frontend::ppc::tables::instr_table[XESELECTBITS(code, 26, 31)];
+          alloy::frontend::ppc::tables::instr_table[select_bits(code, 26, 31)];
      break;
  }
  if (slot && slot->opcode) {
--- a/src/alloy/frontend/ppc/ppc_instr.h
+++ b/src/alloy/frontend/ppc/ppc_instr.h
@ -23,6 +23,14 @@ namespace alloy {
 namespace frontend {
 namespace ppc {

+inline uint32_t make_bitmask(uint32_t a, uint32_t b) {
+  return (static_cast<uint32_t>(-1) >> (31 - b)) & ~((1u << a) - 1);
+}
+
+inline uint32_t select_bits(uint32_t value, uint32_t a, uint32_t b) {
+  return (value & make_bitmask(a, b)) >> a;
+}
+
 // TODO(benvanik): rename these
 typedef enum {
  kXEPPCInstrFormatI = 0,
--- a/src/alloy/frontend/ppc/ppc_instr_tables.h
+++ b/src/alloy/frontend/ppc/ppc_instr_tables.h
@ -96,7 +96,7 @@ static InstrType** instr_table_prep(InstrType* unprep, int unprep_count, int a,
  int prep_count = (int)pow(2.0, b - a + 1);
  InstrType** prep = (InstrType**)xe_calloc(prep_count * sizeof(void*));
  for (int n = 0; n < unprep_count; n++) {
-    int ordinal = XESELECTBITS(unprep[n].opcode, a, b);
+    int ordinal = select_bits(unprep[n].opcode, a, b);
    prep[ordinal] = &unprep[n];
  }
  return prep;
@ -108,7 +108,7 @@ static InstrType** instr_table_prep_63(InstrType* unprep, int unprep_count,
  int prep_count = (int)pow(2.0, b - a + 1);
  InstrType** prep = (InstrType**)xe_calloc(prep_count * sizeof(void*));
  for (int n = 0; n < unprep_count; n++) {
-    int ordinal = XESELECTBITS(unprep[n].opcode, a, b);
+    int ordinal = select_bits(unprep[n].opcode, a, b);
    if (unprep[n].format == kXEPPCInstrFormatA) {
      // Must splat this into all of the slots that it could be in.
      for (int m = 0; m < 32; m++) {
--- a/src/alloy/frontend/ppc/ppc_scanner.cc
+++ b/src/alloy/frontend/ppc/ppc_scanner.cc
@ -50,8 +50,8 @@ int PPCScanner::FindExtents(FunctionInfo* symbol_info) {

  XELOGSDB("Analyzing function %.8X...", symbol_info->address());

-  uint32_t start_address = symbol_info->address();
-  uint32_t end_address = symbol_info->end_address();
+  uint32_t start_address = static_cast<uint32_t>(symbol_info->address());
+  uint32_t end_address = static_cast<uint32_t>(symbol_info->end_address());
  uint32_t address = start_address;
  uint32_t furthest_target = start_address;
  size_t blocks_found = 0;
--- a/src/poly/math.h
+++ b/src/poly/math.h
@ -20,6 +20,32 @@

 namespace poly {

+// Rounds up the given value to the given alignment.
+template <typename T>
+T align(T value, T alignment) {
+  return (value + alignment - 1) & ~(alignment - 1);
+}
+
+// Rounds the given number up to the next highest multiple.
+template <typename T, typename V>
+T round_up(T value, V multiple) {
+  return value + multiple - 1 - (value - 1) % multiple;
+}
+
+// Gets the next power of two value that is greater than or equal to the given
+// value.
+template <typename T>
+T next_pow2(T value) {
+  value--;
+  value |= value >> 1;
+  value |= value >> 2;
+  value |= value >> 4;
+  value |= value >> 8;
+  value |= value >> 16;
+  value++;
+  return value;
+}
+
 // lzcnt instruction, typed for integers of all sizes.
 // The number of leading zero bits in the value parameter. If value is zero, the
 // return value is the size of the input operand (8, 16, 32, or 64). If the most
--- a/src/xenia/core.h
+++ b/src/xenia/core.h
@ -17,7 +17,6 @@ namespace xe {
  using Memory = alloy::Memory;
 }  // namespace xe

-#include <xenia/core/hash.h>
 #include <xenia/core/mmap.h>
 #include <xenia/core/pal.h>
 #include <xenia/core/ref.h>
--- a/src/xenia/core/hash.cc
+++ b/src/xenia/core/hash.cc
@ -38,19 +38,21 @@

 #include <xenia/core/hash.h>

+#include <string.h>
 #include <algorithm>
-#include <string.h>  // for memcpy and memset
+
+namespace xe {

 namespace {

 typedef std::pair<uint64_t, uint64_t> uint128_t;

-inline uint64_t Uint128Low64(const uint128_t& x) { return x.first; }
-inline uint64_t Uint128High64(const uint128_t& x) { return x.second; }
+inline uint64_t Uint128Low64(const uint128_t &x) { return x.first; }
+inline uint64_t Uint128High64(const uint128_t &x) { return x.second; }

 // Hash 128 input bits down to 64 bits of output.
 // This is intended to be a reasonably good hash function.
-inline uint64_t Hash128to64(const uint128_t& x) {
+inline uint64_t Hash128to64(const uint128_t &x) {
  // Murmur-inspired hashing.
  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
@ -61,37 +63,14 @@ inline uint64_t Hash128to64(const uint128_t& x) {
  return b;
 }

-using namespace std;
-
-#if 0
-static uint64_t UNALIGNED_LOAD64(const char *p) {
-  uint64_t result;
-  memcpy(&result, p, sizeof(result));
-  return result;
-}
-static uint32_t UNALIGNED_LOAD32(const char *p) {
-  uint32_t result;
-  memcpy(&result, p, sizeof(result));
-  return result;
-}
-#else
 XEFORCEINLINE uint64_t UNALIGNED_LOAD64(const char *p) {
-  const uint64_t* p64 = (const uint64_t*)p;
+  const uint64_t *p64 = (const uint64_t *)p;
  return *p64;
 }
 XEFORCEINLINE uint32_t UNALIGNED_LOAD32(const char *p) {
-  const uint32_t* p32 = (const uint32_t*)p;
+  const uint32_t *p32 = (const uint32_t *)p;
  return *p32;
 }
-#endif
-
-#if XE_CPU_BIGENDIAN
-#define uint32_t_in_expected_order(x) (poly::byte_swap(x))
-#define uint64_in_expected_order(x) (poly::byte_swap(x))
-#else
-#define uint32_t_in_expected_order(x) (x)
-#define uint64_in_expected_order(x) (x)
-#endif  // XE_CPU_BIGENDIAN

 #if !defined(LIKELY)
 #if HAVE_BUILTIN_EXPECT
@ -101,13 +80,9 @@ XEFORCEINLINE uint32_t UNALIGNED_LOAD32(const char *p) {
 #endif
 #endif

-static uint64_t Fetch64(const char *p) {
-  return uint64_in_expected_order(UNALIGNED_LOAD64(p));
-}
+static uint64_t Fetch64(const char *p) { return UNALIGNED_LOAD64(p); }

-static uint32_t Fetch32(const char *p) {
-  return uint32_t_in_expected_order(UNALIGNED_LOAD32(p));
-}
+static uint32_t Fetch32(const char *p) { return UNALIGNED_LOAD32(p); }

 // Some primes between 2^63 and 2^64 for various uses.
 static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
@ -119,8 +94,7 @@ static const uint32_t c1 = 0xcc9e2d51;
 static const uint32_t c2 = 0x1b873593;

 // A 32-bit to 32-bit integer hash copied from Murmur3.
-static uint32_t fmix(uint32_t h)
-{
+static uint32_t fmix(uint32_t h) {
  h ^= h >> 16;
  h *= 0x85ebca6b;
  h ^= h >> 13;
@ -135,7 +109,11 @@ static uint32_t Rotate32(uint32_t val, int shift) {
 }

 #undef PERMUTE3
-#define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
+#define PERMUTE3(a, b, c) \
+  do {                    \
+    std::swap(a, b);      \
+    std::swap(a, c);      \
+  } while (0)

 static uint32_t Mur(uint32_t a, uint32_t h) {
  // Helper from Murmur3 for combining two 32-bit values.
@ -180,9 +158,9 @@ static uint32_t Hash32Len5to12(const char *s, size_t len) {

 uint32_t CityHash32(const char *s, size_t len) {
  if (len <= 24) {
-    return len <= 12 ?
-        (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
-        Hash32Len13to24(s, len);
+    return len <= 12
+               ? (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len))
+               : Hash32Len13to24(s, len);
  }

  // len > 24
@ -254,9 +232,7 @@ static uint64_t Rotate(uint64_t val, int shift) {
  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
 }

-static uint64_t ShiftMix(uint64_t val) {
-  return val ^ (val >> 47);
-}
+static uint64_t ShiftMix(uint64_t val) { return val ^ (val >> 47); }

 static uint64_t HashLen16(uint64_t u, uint64_t v) {
  return Hash128to64(uint128_t(u, v));
@ -311,7 +287,7 @@ static uint64_t HashLen17to32(const char *s, size_t len) {

 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
 // Callers do best to use "random-looking" values for a and b.
-static pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
+static std::pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
    uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
  a += w;
  b = Rotate(b + a + z, 21);
@ -319,18 +295,15 @@ static pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
  a += x;
  a += y;
  b += Rotate(a, 44);
-  return make_pair(a + z, b + c);
+  return std::make_pair(a + z, b + c);
 }

 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
-static pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
-    const char* s, uint64_t a, uint64_t b) {
-  return WeakHashLen32WithSeeds(Fetch64(s),
-                                Fetch64(s + 8),
-                                Fetch64(s + 16),
-                                Fetch64(s + 24),
-                                a,
-                                b);
+static std::pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(const char *s,
+                                                            uint64_t a,
+                                                            uint64_t b) {
+  return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16),
+                                Fetch64(s + 24), a, b);
 }

 // Return an 8-byte hash for 33 to 64 bytes.
@ -371,8 +344,10 @@ uint64_t CityHash64(const char *s, size_t len) {
  uint64_t x = Fetch64(s + len - 40);
  uint64_t y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
  uint64_t z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
-  pair<uint64_t, uint64_t> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
-  pair<uint64_t, uint64_t> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
+  std::pair<uint64_t, uint64_t> v =
+      WeakHashLen32WithSeeds(s + len - 64, len, z);
+  std::pair<uint64_t, uint64_t> w =
+      WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
  x = x * k1 + Fetch64(s);

  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
@ -395,7 +370,8 @@ uint64_t CityHash64(const char *s, size_t len) {

 }  // anonymous namespace

-
-uint64_t xe_hash64(const void* data, size_t length, uint64_t seed) {
-  return HashLen16(CityHash64((const char*)data, length) - k2, seed);
+uint64_t hash64(const void *data, size_t length, uint64_t seed) {
+  return HashLen16(CityHash64((const char *)data, length) - k2, seed);
 }
+
+}  // namespace xe
--- a/src/xenia/core/hash.h
+++ b/src/xenia/core/hash.h
@ -12,8 +12,19 @@

 #include <xenia/common.h>

+namespace xe {

-uint64_t xe_hash64(const void* data, size_t length, uint64_t seed = 0);
+inline size_t hash_combine(size_t seed) { return seed; }

+template <typename T, typename... Ts>
+size_t hash_combine(size_t seed, const T& v, const Ts&... vs) {
+  std::hash<T> hasher;
+  seed ^= hasher(v) + 0x9E3779B9 + (seed << 6) + (seed >> 2);
+  return hash_combine(seed, vs...);
+}
+
+uint64_t hash64(const void* data, size_t length, uint64_t seed = 0);
+
+}  // namespace xe

 #endif  // XENIA_CORE_HASH_H_
--- a/src/xenia/cpu/xenon_memory.cc
+++ b/src/xenia/cpu/xenon_memory.cc
@ -13,6 +13,7 @@
 #include <mutex>

 #include <gflags/gflags.h>
+#include <poly/math.h>

 using namespace alloy;
 using namespace xe::cpu;
@ -510,7 +511,7 @@ uint64_t XenonMemoryHeap::Alloc(
  size_t heap_guard_size = FLAGS_heap_guard_pages * 4096;
  if (heap_guard_size) {
    alignment = std::max(alignment, static_cast<uint32_t>(heap_guard_size));
-    alloc_size = (uint32_t)XEROUNDUP(size, heap_guard_size);
+    alloc_size = static_cast<uint32_t>(poly::round_up(size, heap_guard_size));
  }
  uint8_t* p = (uint8_t*)mspace_memalign(space_, alignment,
                                         alloc_size + heap_guard_size * 2);
--- a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc
+++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc
@ -9,6 +9,7 @@

 #include <xenia/gpu/d3d11/d3d11_geometry_shader.h>

+#include <xenia/core/hash.h>
 #include <xenia/gpu/gpu-private.h>
 #include <xenia/gpu/d3d11/d3d11_shader_resource.h>
 #include <xenia/gpu/d3d11/d3d11_shader_translator.h>
@ -95,7 +96,7 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) {
  if (FLAGS_dump_shaders.size()) {
    base_path = FLAGS_dump_shaders.c_str();
  }
-  uint64_t hash = xe_hash64(shader_source, strlen(shader_source)); // ?
+  uint64_t hash = hash64(shader_source, strlen(shader_source)); // ?
  char file_name[poly::max_path];
  xesnprintfa(file_name, XECOUNT(file_name),
      "%s/gen_%.16llX.gs",
--- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc
+++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc
@ -9,6 +9,7 @@

 #include <xenia/gpu/d3d11/d3d11_graphics_driver.h>

+#include <xenia/core/hash.h>
 #include <xenia/gpu/gpu-private.h>
 #include <xenia/gpu/buffer_resource.h>
 #include <xenia/gpu/shader_resource.h>
--- a/src/xenia/gpu/d3d11/d3d11_shader_resource.cc
+++ b/src/xenia/gpu/d3d11/d3d11_shader_resource.cc
@ -9,6 +9,7 @@

 #include <xenia/gpu/d3d11/d3d11_shader_resource.h>

+#include <xenia/core/hash.h>
 #include <xenia/gpu/gpu-private.h>
 #include <xenia/gpu/d3d11/d3d11_geometry_shader.h>
 #include <xenia/gpu/d3d11/d3d11_resource_cache.h>
@ -46,7 +47,7 @@ ID3D10Blob* D3D11ShaderCompile(XE_GPU_SHADER_TYPE type,
  if (FLAGS_dump_shaders.size()) {
    base_path = FLAGS_dump_shaders.c_str();
  }
-  size_t hash = xe_hash64(disasm_source, strlen(disasm_source)); // ?
+  size_t hash = hash64(disasm_source, strlen(disasm_source)); // ?
  char file_name[poly::max_path];
  xesnprintfa(file_name, XECOUNT(file_name),
      "%s/gen_%.16llX.%s",
--- a/src/xenia/gpu/resource_cache.cc
+++ b/src/xenia/gpu/resource_cache.cc
@ -11,6 +11,7 @@

 #include <algorithm>

+#include <xenia/core/hash.h>

 using namespace std;
 using namespace xe;
@ -108,7 +109,7 @@ VertexBufferResource* ResourceCache::FetchVertexBuffer(

 uint64_t ResourceCache::HashRange(const MemoryRange& memory_range) {
  // We could do something smarter here to potentially early exit.
-  return xe_hash64(memory_range.host_base, memory_range.length);
+  return hash64(memory_range.host_base, memory_range.length);
 }

 void ResourceCache::SyncRange(uint32_t address, int length) {
--- a/src/xenia/gpu/sampler_state_resource.h
+++ b/src/xenia/gpu/sampler_state_resource.h
@ -10,6 +10,7 @@
 #ifndef XENIA_GPU_SAMPLER_STATE_RESOURCE_H_
 #define XENIA_GPU_SAMPLER_STATE_RESOURCE_H_

+#include <xenia/core/hash.h>
 #include <xenia/gpu/resource.h>
 #include <xenia/gpu/xenos/ucode.h>
 #include <xenia/gpu/xenos/xenos.h>
--- a/src/xenia/gpu/texture_resource.cc
+++ b/src/xenia/gpu/texture_resource.cc
@ -9,6 +9,7 @@

 #include <xenia/gpu/texture_resource.h>

+#include <poly/math.h>
 #include <xenia/gpu/xenos/ucode.h>
 #include <xenia/gpu/xenos/xenos.h>

@ -253,16 +254,16 @@ void TextureResource::Info::CalculateTextureSizes2D(
        width_multiple = minimum_multiple;
      }
    }
-    size_2d.input_width = XEROUNDUP(size_2d.logical_width, width_multiple);
-    size_2d.input_height = XEROUNDUP(size_2d.logical_height, 32);
+    size_2d.input_width = poly::round_up(size_2d.logical_width, width_multiple);
+    size_2d.input_height = poly::round_up(size_2d.logical_height, 32);
    size_2d.output_width = size_2d.logical_width;
    size_2d.output_height = size_2d.logical_height;
  } else {
    // must be 128x128
-    size_2d.input_width = XEROUNDUP(size_2d.logical_width, 128);
-    size_2d.input_height = XEROUNDUP(size_2d.logical_height, 128);
-    size_2d.output_width = XENEXTPOW2(size_2d.logical_width);
-    size_2d.output_height = XENEXTPOW2(size_2d.logical_height);
+    size_2d.input_width = poly::round_up(size_2d.logical_width, 128);
+    size_2d.input_height = poly::round_up(size_2d.logical_height, 128);
+    size_2d.output_width = poly::next_pow2(size_2d.logical_width);
+    size_2d.output_height = poly::next_pow2(size_2d.logical_height);
  }

  size_2d.logical_pitch = (size_2d.logical_width / block_size) * texel_pitch;
--- a/src/xenia/kernel/xboxkrnl_io.cc
+++ b/src/xenia/kernel/xboxkrnl_io.cc
@ -9,13 +9,14 @@

 #include <xenia/common.h>
 #include <xenia/core.h>
-#include <xenia/xbox.h>
+#include <xenia/core/hash.h>
 #include <xenia/kernel/async_request.h>
 #include <xenia/kernel/kernel_state.h>
 #include <xenia/kernel/xboxkrnl_private.h>
 #include <xenia/kernel/objects/xevent.h>
 #include <xenia/kernel/objects/xfile.h>
 #include <xenia/kernel/util/shim_utils.h>
+#include <xenia/xbox.h>


 namespace xe {
--- a/src/xenia/kernel/xboxkrnl_memory.cc
+++ b/src/xenia/kernel/xboxkrnl_memory.cc
@ -7,6 +7,7 @@
 ******************************************************************************
 */

+#include <poly/math.h>
 #include <xenia/common.h>
 #include <xenia/core.h>
 #include <xenia/xbox.h>
@ -251,8 +252,8 @@ SHIM_CALL MmAllocatePhysicalMemoryEx_shim(
  }

  // Round up the region size and alignment to the next page.
-  uint32_t adjusted_size = XEROUNDUP(region_size, page_size);
-  uint32_t adjusted_alignment = XEROUNDUP(alignment, page_size);
+  uint32_t adjusted_size = poly::round_up(region_size, page_size);
+  uint32_t adjusted_alignment = poly::round_up(alignment, page_size);

  // Callers can pick an address to allocate with min_addr_range/max_addr_range
  // and the memory must be allocated there. I haven't seen a game do this,
@ -433,7 +434,7 @@ SHIM_CALL ExAllocatePoolTypeWithTag_shim(
  uint32_t alignment = 8;
  uint32_t adjusted_size = size;
  if (adjusted_size < 4 * 1024) {
-    adjusted_size = XEROUNDUP(adjusted_size, 4 * 1024);
+    adjusted_size = poly::round_up(adjusted_size, 4 * 1024);
  } else {
    alignment = 4 * 1024;
  }
--- a/src/xenia/types.h
+++ b/src/xenia/types.h
@ -91,29 +91,10 @@ typedef XECACHEALIGN volatile void xe_aligned_void_t;
 #define XECOUNT(array)          (sizeof(array) / sizeof(array[0]))
 #endif  // MSVC

-XEFORCEINLINE size_t hash_combine(size_t seed) {
-  return seed;
-}
-template <typename T, typename... Ts>
-size_t hash_combine(size_t seed, const T& v, const Ts&... vs) {
-  std::hash<T> hasher;
-  seed ^= hasher(v) + 0x9E3779B9 + (seed << 6) + (seed >> 2);
-  return hash_combine(seed, vs...);
-}
-
 #if XE_PLATFORM_WIN32
 #define XESAFERELEASE(p)        if (p) { p->Release(); }
 #endif  // WIN32

-#define XEBITMASK(a, b) (((unsigned) -1 >> (31 - (b))) & ~((1U << (a)) - 1))
-#define XESELECTBITS(value, a, b) ((value & XEBITMASK(a, b)) >> a)
-
-#define XEROUNDUP(v, multiple)  ((v) + (multiple) - 1 - ((v) - 1) % (multiple))
-static inline uint32_t XENEXTPOW2(uint32_t v) {
-  v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v++; return v;
-}
-#define XEALIGN(value, align) ((value + align - 1) & ~(align - 1))
-
 #define XEFAIL()                goto XECLEANUP
 #define XEEXPECT(expr)          if (!(expr)         ) { goto XECLEANUP; }
 #define XEEXPECTTRUE(expr)      if (!(expr)         ) { goto XECLEANUP; }