diff --git a/src/alloy/backend/x64/x64_sequence.inl b/src/alloy/backend/x64/x64_sequence.inl index 50466b3af..d0f9e539d 100644 --- a/src/alloy/backend/x64/x64_sequence.inl +++ b/src/alloy/backend/x64/x64_sequence.inl @@ -183,33 +183,37 @@ protected: template struct I8 : ValueOp, KEY_TYPE_V_I8, Reg8, int8_t, TAG> { + typedef ValueOp, KEY_TYPE_V_I8, Reg8, int8_t, TAG> BASE; const int8_t constant() const { - assert_true(is_constant); - return value->constant.i8; + assert_true(BASE::is_constant); + return BASE::value->constant.i8; } }; template struct I16 : ValueOp, KEY_TYPE_V_I16, Reg16, int16_t, TAG> { + typedef ValueOp, KEY_TYPE_V_I16, Reg16, int16_t, TAG> BASE; const int16_t constant() const { - assert_true(is_constant); - return value->constant.i16; + assert_true(BASE::is_constant); + return BASE::value->constant.i16; } }; template struct I32 : ValueOp, KEY_TYPE_V_I32, Reg32, int32_t, TAG> { + typedef ValueOp, KEY_TYPE_V_I32, Reg32, int32_t, TAG> BASE; const int32_t constant() const { - assert_true(is_constant); - return value->constant.i32; + assert_true(BASE::is_constant); + return BASE::value->constant.i32; } }; template struct I64 : ValueOp, KEY_TYPE_V_I64, Reg64, int64_t, TAG> { + typedef ValueOp, KEY_TYPE_V_I64, Reg64, int64_t, TAG> BASE; const int64_t constant() const { - assert_true(is_constant); - return value->constant.i64; + assert_true(BASE::is_constant); + return BASE::value->constant.i64; } bool ConstantFitsIn32Reg() const override { - int64_t v = value->constant.i64; + int64_t v = BASE::value->constant.i64; if ((v & ~0x7FFFFFFF) == 0) { // Fits under 31 bits, so just load using normal mov. return true; @@ -222,23 +226,26 @@ struct I64 : ValueOp, KEY_TYPE_V_I64, Reg64, int64_t, TAG> { }; template struct F32 : ValueOp, KEY_TYPE_V_F32, Xmm, float, TAG> { + typedef ValueOp, KEY_TYPE_V_F32, Xmm, float, TAG> BASE; const float constant() const { - assert_true(is_constant); - return value->constant.f32; + assert_true(BASE::is_constant); + return BASE::value->constant.f32; } }; template struct F64 : ValueOp, KEY_TYPE_V_F64, Xmm, double, TAG> { + typedef ValueOp, KEY_TYPE_V_F64, Xmm, double, TAG> BASE; const double constant() const { - assert_true(is_constant); - return value->constant.f64; + assert_true(BASE::is_constant); + return BASE::value->constant.f64; } }; template struct V128 : ValueOp, KEY_TYPE_V_V128, Xmm, vec128_t, TAG> { + typedef ValueOp, KEY_TYPE_V_V128, Xmm, vec128_t, TAG> BASE; const vec128_t& constant() const { - assert_true(is_constant); - return value->constant.v128; + assert_true(BASE::is_constant); + return BASE::value->constant.v128; } }; @@ -308,6 +315,7 @@ template struct I; template struct I : DestField { + typedef DestField BASE; static const hir::Opcode opcode = OPCODE; static const uint32_t key = InstrKey::Construct::value; static const KeyType dest_type = DEST::key_type; @@ -316,7 +324,7 @@ protected: template friend struct SequenceFields; bool Load(const Instr* i, TagTable& tag_table) { if (InstrKey(i).value == key && - LoadDest(i, tag_table)) { + BASE::LoadDest(i, tag_table)) { instr = i; return true; } @@ -325,6 +333,7 @@ protected: }; template struct I : DestField { + typedef DestField BASE; static const hir::Opcode opcode = OPCODE; static const uint32_t key = InstrKey::Construct::value; static const KeyType dest_type = DEST::key_type; @@ -335,7 +344,7 @@ protected: template friend struct SequenceFields; bool Load(const Instr* i, TagTable& tag_table) { if (InstrKey(i).value == key && - LoadDest(i, tag_table) && + BASE::LoadDest(i, tag_table) && tag_table.CheckTag(i->src1)) { instr = i; src1.Load(i->src1); @@ -346,6 +355,7 @@ protected: }; template struct I : DestField { + typedef DestField BASE; static const hir::Opcode opcode = OPCODE; static const uint32_t key = InstrKey::Construct::value; static const KeyType dest_type = DEST::key_type; @@ -358,7 +368,7 @@ protected: template friend struct SequenceFields; bool Load(const Instr* i, TagTable& tag_table) { if (InstrKey(i).value == key && - LoadDest(i, tag_table) && + BASE::LoadDest(i, tag_table) && tag_table.CheckTag(i->src1) && tag_table.CheckTag(i->src2)) { instr = i; @@ -371,6 +381,7 @@ protected: }; template struct I : DestField { + typedef DestField BASE; static const hir::Opcode opcode = OPCODE; static const uint32_t key = InstrKey::Construct::value; static const KeyType dest_type = DEST::key_type; @@ -385,7 +396,7 @@ protected: template friend struct SequenceFields; bool Load(const Instr* i, TagTable& tag_table) { if (InstrKey(i).value == key && - LoadDest(i, tag_table) && + BASE::LoadDest(i, tag_table) && tag_table.CheckTag(i->src1) && tag_table.CheckTag(i->src2) && tag_table.CheckTag(i->src3)) { @@ -404,7 +415,6 @@ struct SequenceFields; template struct SequenceFields { I1 i1; - typedef typename I1 I1Type; protected: template friend struct Sequence; bool Check(const Instr* i, TagTable& tag_table, const Instr** new_tail) { @@ -516,9 +526,10 @@ const Reg64 GetTempReg(X64Emitter& e) { template struct SingleSequence : public Sequence, T> { + typedef Sequence, T> BASE; typedef T EmitArgType; static const uint32_t head_key = T::key; - static void Emit(X64Emitter& e, const EmitArgs& _) { + static void Emit(X64Emitter& e, const typename BASE::EmitArgs& _) { SEQ::Emit(e, _.i1); } @@ -547,7 +558,7 @@ struct SingleSequence : public Sequence, T> { if (i.src1.ConstantFitsIn32Reg()) { reg_const_fn(e, i.dest, static_cast(i.src1.constant())); } else { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src1.constant()); reg_reg_fn(e, i.dest, temp); } @@ -560,7 +571,7 @@ struct SingleSequence : public Sequence, T> { if (i.src2.ConstantFitsIn32Reg()) { reg_const_fn(e, i.dest, static_cast(i.src2.constant())); } else { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src2.constant()); reg_reg_fn(e, i.dest, temp); } @@ -586,7 +597,7 @@ struct SingleSequence : public Sequence, T> { if (i.src1.is_constant) { assert_true(!i.src2.is_constant); if (i.dest == i.src2) { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src2); e.mov(i.dest, i.src1.constant()); reg_reg_fn(e, i.dest, temp); @@ -599,7 +610,7 @@ struct SingleSequence : public Sequence, T> { if (i.src2.ConstantFitsIn32Reg()) { reg_const_fn(e, i.dest, static_cast(i.src2.constant())); } else { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src2.constant()); reg_reg_fn(e, i.dest, temp); } @@ -608,7 +619,7 @@ struct SingleSequence : public Sequence, T> { if (i.src2.ConstantFitsIn32Reg()) { reg_const_fn(e, i.dest, static_cast(i.src2.constant())); } else { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src2.constant()); reg_reg_fn(e, i.dest, temp); } @@ -617,7 +628,7 @@ struct SingleSequence : public Sequence, T> { if (i.dest == i.src1) { reg_reg_fn(e, i.dest, i.src2); } else if (i.dest == i.src2) { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src2); e.mov(i.dest, i.src1); reg_reg_fn(e, i.dest, temp); @@ -667,7 +678,7 @@ struct SingleSequence : public Sequence, T> { if (i.src1.ConstantFitsIn32Reg()) { reg_const_fn(e, i.src2, static_cast(i.src1.constant())); } else { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src1.constant()); reg_reg_fn(e, i.src2, temp); } @@ -675,7 +686,7 @@ struct SingleSequence : public Sequence, T> { if (i.src2.ConstantFitsIn32Reg()) { reg_const_fn(e, i.src1, static_cast(i.src2.constant())); } else { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src2.constant()); reg_reg_fn(e, i.src1, temp); } @@ -692,7 +703,7 @@ struct SingleSequence : public Sequence, T> { if (i.src1.ConstantFitsIn32Reg()) { reg_const_fn(e, i.dest, i.src2, static_cast(i.src1.constant()), true); } else { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src1.constant()); reg_reg_fn(e, i.dest, i.src2, temp, true); } @@ -700,7 +711,7 @@ struct SingleSequence : public Sequence, T> { if (i.src2.ConstantFitsIn32Reg()) { reg_const_fn(e, i.dest, i.src1, static_cast(i.src2.constant()), false); } else { - auto temp = GetTempReg(e); + auto temp = GetTempReg(e); e.mov(temp, i.src2.constant()); reg_reg_fn(e, i.dest, i.src1, temp, false); } @@ -721,8 +732,6 @@ static const tag_t TAG5 = 5; static const tag_t TAG6 = 6; static const tag_t TAG7 = 7; -typedef bool (*SequenceSelectFn)(X64Emitter&, const Instr*, const Instr**); - template void Register() { sequence_table.insert({ T::head_key, T::Select }); diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index b89916585..cb6f7d260 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -39,11 +39,12 @@ using namespace Xbyak; using namespace alloy::hir; using namespace alloy::runtime; +typedef bool (*SequenceSelectFn)(X64Emitter&, const Instr*, const Instr**); +std::unordered_multimap sequence_table; + // Utilities/types used only in this file: #include -std::unordered_multimap sequence_table; - // Selects the right byte/word/etc from a vector. We need to flip logical // indices (0,1,2,3,4,5,6,7,...) = (3,2,1,0,7,6,5,4,...) #define VEC128_B(n) ((n) ^ 0x3) @@ -63,7 +64,7 @@ EMITTER(COMMENT, MATCH(I)) { // TODO(benvanik): don't just leak this memory. auto str_copy = strdup(str); e.mov(e.rdx, reinterpret_cast(str_copy)); - e.CallNative(TraceString); + e.CallNative(reinterpret_cast(TraceString)); } } }; @@ -1104,12 +1105,7 @@ EMITTER(LOAD_CLOCK, MATCH(I>)) { e.mov(i.dest, e.rax); } static uint64_t LoadClock(void* raw_context) { - LARGE_INTEGER counter; - uint64_t time = 0; - if (QueryPerformanceCounter(&counter)) { - time = counter.QuadPart; - } - return time; + return poly::threading::ticks(); } }; EMITTER_OPCODE_TABLE( @@ -1245,7 +1241,7 @@ EMITTER(LOAD_CONTEXT_I8, MATCH(I, OffsetOp>)) { if (IsTracingData()) { e.mov(e.r8, e.byte[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextLoadI8); + e.CallNative(reinterpret_cast(TraceContextLoadI8)); } } }; @@ -1256,7 +1252,7 @@ EMITTER(LOAD_CONTEXT_I16, MATCH(I, OffsetOp>)) { if (IsTracingData()) { e.mov(e.r8, e.word[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextLoadI16); + e.CallNative(reinterpret_cast(TraceContextLoadI16)); } } }; @@ -1267,7 +1263,7 @@ EMITTER(LOAD_CONTEXT_I32, MATCH(I, OffsetOp>)) { if (IsTracingData()) { e.mov(e.r8, e.dword[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextLoadI32); + e.CallNative(reinterpret_cast(TraceContextLoadI32)); } } }; @@ -1278,7 +1274,7 @@ EMITTER(LOAD_CONTEXT_I64, MATCH(I, OffsetOp>)) { if (IsTracingData()) { e.mov(e.r8, e.qword[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextLoadI64); + e.CallNative(reinterpret_cast(TraceContextLoadI64)); } } }; @@ -1289,7 +1285,7 @@ EMITTER(LOAD_CONTEXT_F32, MATCH(I, OffsetOp>)) { if (IsTracingData()) { e.lea(e.r8, e.dword[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextLoadF32); + e.CallNative(reinterpret_cast(TraceContextLoadF32)); } } }; @@ -1300,7 +1296,7 @@ EMITTER(LOAD_CONTEXT_F64, MATCH(I, OffsetOp>)) { if (IsTracingData()) { e.lea(e.r8, e.qword[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextLoadF64); + e.CallNative(reinterpret_cast(TraceContextLoadF64)); } } }; @@ -1311,7 +1307,7 @@ EMITTER(LOAD_CONTEXT_V128, MATCH(I, OffsetOp>)) { if (IsTracingData()) { e.lea(e.r8, e.ptr[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextLoadV128); + e.CallNative(reinterpret_cast(TraceContextLoadV128)); } } }; @@ -1341,7 +1337,7 @@ EMITTER(STORE_CONTEXT_I8, MATCH(I>) if (IsTracingData()) { e.mov(e.r8, e.byte[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextStoreI8); + e.CallNative(reinterpret_cast(TraceContextStoreI8)); } } }; @@ -1356,7 +1352,7 @@ EMITTER(STORE_CONTEXT_I16, MATCH(I if (IsTracingData()) { e.mov(e.r8, e.word[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextStoreI16); + e.CallNative(reinterpret_cast(TraceContextStoreI16)); } } }; @@ -1371,7 +1367,7 @@ EMITTER(STORE_CONTEXT_I32, MATCH(I if (IsTracingData()) { e.mov(e.r8, e.dword[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextStoreI32); + e.CallNative(reinterpret_cast(TraceContextStoreI32)); } } }; @@ -1386,7 +1382,7 @@ EMITTER(STORE_CONTEXT_I64, MATCH(I if (IsTracingData()) { e.mov(e.r8, e.qword[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextStoreI64); + e.CallNative(reinterpret_cast(TraceContextStoreI64)); } } }; @@ -1401,7 +1397,7 @@ EMITTER(STORE_CONTEXT_F32, MATCH(I if (IsTracingData()) { e.lea(e.r8, e.dword[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextStoreF32); + e.CallNative(reinterpret_cast(TraceContextStoreF32)); } } }; @@ -1416,7 +1412,7 @@ EMITTER(STORE_CONTEXT_F64, MATCH(I if (IsTracingData()) { e.lea(e.r8, e.qword[addr]); e.mov(e.rdx, i.src1.value); - e.CallNative(TraceContextStoreF64); + e.CallNative(reinterpret_cast(TraceContextStoreF64)); } } }; @@ -1432,7 +1428,7 @@ EMITTER(STORE_CONTEXT_V128, MATCH(I(TraceContextStoreV128)); } } }; @@ -1473,7 +1469,7 @@ EMITTER(LOAD_I8, MATCH(I, I64<>>)) { if (IsTracingData()) { e.mov(e.r8b, i.dest); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryLoadI8); + e.CallNative(reinterpret_cast(TraceMemoryLoadI8)); } } }; @@ -1484,7 +1480,7 @@ EMITTER(LOAD_I16, MATCH(I, I64<>>)) { if (IsTracingData()) { e.mov(e.r8w, i.dest); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryLoadI16); + e.CallNative(reinterpret_cast(TraceMemoryLoadI16)); } } }; @@ -1495,7 +1491,7 @@ EMITTER(LOAD_I32, MATCH(I, I64<>>)) { if (IsTracingData()) { e.mov(e.r8d, i.dest); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryLoadI32); + e.CallNative(reinterpret_cast(TraceMemoryLoadI32)); } } }; @@ -1506,7 +1502,7 @@ EMITTER(LOAD_I64, MATCH(I, I64<>>)) { if (IsTracingData()) { e.mov(e.r8, i.dest); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryLoadI64); + e.CallNative(reinterpret_cast(TraceMemoryLoadI64)); } } }; @@ -1517,7 +1513,7 @@ EMITTER(LOAD_F32, MATCH(I, I64<>>)) { if (IsTracingData()) { e.lea(e.r8, e.dword[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryLoadF32); + e.CallNative(reinterpret_cast(TraceMemoryLoadF32)); } } }; @@ -1528,7 +1524,7 @@ EMITTER(LOAD_F64, MATCH(I, I64<>>)) { if (IsTracingData()) { e.lea(e.r8, e.qword[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryLoadF64); + e.CallNative(reinterpret_cast(TraceMemoryLoadF64)); } } }; @@ -1540,7 +1536,7 @@ EMITTER(LOAD_V128, MATCH(I, I64<>>)) { if (IsTracingData()) { e.lea(e.r8, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryLoadV128); + e.CallNative(reinterpret_cast(TraceMemoryLoadV128)); } } }; @@ -1578,7 +1574,7 @@ EMITTER(STORE_I8, MATCH(I, I8<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8b, e.byte[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryStoreI8); + e.CallNative(reinterpret_cast(TraceMemoryStoreI8)); } } }; @@ -1595,7 +1591,7 @@ EMITTER(STORE_I16, MATCH(I, I16<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8w, e.word[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryStoreI16); + e.CallNative(reinterpret_cast(TraceMemoryStoreI16)); } } }; @@ -1612,7 +1608,7 @@ EMITTER(STORE_I32, MATCH(I, I32<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8d, e.dword[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryStoreI32); + e.CallNative(reinterpret_cast(TraceMemoryStoreI32)); } } }; @@ -1629,7 +1625,7 @@ EMITTER(STORE_I64, MATCH(I, I64<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8, e.qword[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryStoreI64); + e.CallNative(reinterpret_cast(TraceMemoryStoreI64)); } } }; @@ -1646,7 +1642,7 @@ EMITTER(STORE_F32, MATCH(I, F32<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryStoreF32); + e.CallNative(reinterpret_cast(TraceMemoryStoreF32)); } } }; @@ -1663,7 +1659,7 @@ EMITTER(STORE_F64, MATCH(I, F64<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryStoreF64); + e.CallNative(reinterpret_cast(TraceMemoryStoreF64)); } } }; @@ -1681,7 +1677,7 @@ EMITTER(STORE_V128, MATCH(I, V128<>>)) { auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); e.lea(e.rdx, e.ptr[addr]); - e.CallNative(TraceMemoryStoreV128); + e.CallNative(reinterpret_cast(TraceMemoryStoreV128)); } } }; @@ -2099,7 +2095,7 @@ EMITTER_OPCODE_TABLE( // OPCODE_COMPARE_* // ============================================================================ #define EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, type, reg_type) \ - EMITTER(COMPARE_##op##_##type, MATCH(I, type<>, type<>>)) { \ + EMITTER(COMPARE_##op##_##type, MATCH(I, type<>, type<>>)) { \ static void Emit(X64Emitter& e, const EmitArgType& i) { \ EmitAssociativeCompareOp( \ e, i, \ @@ -2119,7 +2115,7 @@ EMITTER_OPCODE_TABLE( EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32, Reg32); \ EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64, Reg64); \ EMITTER_OPCODE_TABLE( \ - OPCODE_COMPARE_##op##, \ + OPCODE_COMPARE_##op, \ COMPARE_##op##_I8, \ COMPARE_##op##_I16, \ COMPARE_##op##_I32, \ @@ -2135,13 +2131,13 @@ EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setb); // http://x86.renejeschke.de/html/file_module_x86_id_288.html #define EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(op, instr) \ - EMITTER(COMPARE_##op##_F32, MATCH(I, F32<>, F32<>>)) { \ + EMITTER(COMPARE_##op##_F32, MATCH(I, F32<>, F32<>>)) { \ static void Emit(X64Emitter& e, const EmitArgType& i) { \ e.vcomiss(i.src1, i.src2); \ e.instr(i.dest); \ } \ }; \ - EMITTER(COMPARE_##op##_F64, MATCH(I, F64<>, F64<>>)) { \ + EMITTER(COMPARE_##op##_F64, MATCH(I, F64<>, F64<>>)) { \ static void Emit(X64Emitter& e, const EmitArgType& i) { \ if (i.src1.is_constant) { \ e.LoadConstantXmm(e.xmm0, i.src1.constant()); \ @@ -3479,39 +3475,44 @@ EMITTER_OPCODE_TABLE( // http://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html EMITTER(POW2_F32, MATCH(I, F32<>>)) { static __m128 EmulatePow2(__m128 src) { - float result = static_cast(pow(2, src.m128_f32[0])); + float src_value; + _mm_store_ss(&src_value, src); + float result = std::pow(2, src_value); return _mm_load_ss(&result); } static void Emit(X64Emitter& e, const EmitArgType& i) { assert_always(); e.lea(e.r8, e.StashXmm(i.src1)); - e.CallNativeSafe(EmulatePow2); + e.CallNativeSafe(reinterpret_cast(EmulatePow2)); e.vmovaps(i.dest, e.xmm0); } }; EMITTER(POW2_F64, MATCH(I, F64<>>)) { static __m128d EmulatePow2(__m128 src) { - double result = pow(2, src.m128_f32[0]); + double src_value; + _mm_store_sd(&src_value, src); + double result = std::pow(2, src_value); return _mm_load_sd(&result); } static void Emit(X64Emitter& e, const EmitArgType& i) { assert_always(); e.lea(e.r8, e.StashXmm(i.src1)); - e.CallNativeSafe(EmulatePow2); + e.CallNativeSafe(reinterpret_cast(EmulatePow2)); e.vmovaps(i.dest, e.xmm0); } }; EMITTER(POW2_V128, MATCH(I, V128<>>)) { static __m128 EmulatePow2(__m128 src) { - __m128 result; + alignas(16) float values[4]; + _mm_store_ps(values, src); for (size_t i = 0; i < 4; ++i) { - result.m128_f32[i] = static_cast(pow(2, src.m128_f32[i])); + values[i] = std::pow(2, values[i]); } - return result; + return _mm_load_ps(values); } static void Emit(X64Emitter& e, const EmitArgType& i) { e.lea(e.r8, e.StashXmm(i.src1)); - e.CallNativeSafe(EmulatePow2); + e.CallNativeSafe(reinterpret_cast(EmulatePow2)); e.vmovaps(i.dest, e.xmm0); } }; @@ -3530,39 +3531,44 @@ EMITTER_OPCODE_TABLE( // TODO(benvanik): this emulated fn destroys all xmm registers! don't do it! EMITTER(LOG2_F32, MATCH(I, F32<>>)) { static __m128 EmulateLog2(__m128 src) { - float result = log2(src.m128_f32[0]); + float src_value; + _mm_store_ss(&src_value, src); + float result = std::log2(src_value); return _mm_load_ss(&result); } static void Emit(X64Emitter& e, const EmitArgType& i) { assert_always(); e.lea(e.r8, e.StashXmm(i.src1)); - e.CallNativeSafe(EmulateLog2); + e.CallNativeSafe(reinterpret_cast(EmulateLog2)); e.vmovaps(i.dest, e.xmm0); } }; EMITTER(LOG2_F64, MATCH(I, F64<>>)) { static __m128d EmulateLog2(__m128d src) { - double result = log2(src.m128d_f64[0]); + double src_value; + _mm_store_sd(&src_value, src); + double result = std::log2(src_value); return _mm_load_sd(&result); } static void Emit(X64Emitter& e, const EmitArgType& i) { assert_always(); e.lea(e.r8, e.StashXmm(i.src1)); - e.CallNativeSafe(EmulateLog2); + e.CallNativeSafe(reinterpret_cast(EmulateLog2)); e.vmovaps(i.dest, e.xmm0); } }; EMITTER(LOG2_V128, MATCH(I, V128<>>)) { static __m128 EmulateLog2(__m128 src) { - __m128 result; + alignas(16) float values[4]; + _mm_store_ps(values, src); for (size_t i = 0; i < 4; ++i) { - result.m128_f32[i] = log2(src.m128_f32[i]); + values[i] = std::log2(values[i]); } - return result; + return _mm_load_ps(values); } static void Emit(X64Emitter& e, const EmitArgType& i) { e.lea(e.r8, e.StashXmm(i.src1)); - e.CallNativeSafe(EmulateLog2); + e.CallNativeSafe(reinterpret_cast(EmulateLog2)); e.vmovaps(i.dest, e.xmm0); } }; @@ -4996,7 +5002,7 @@ EMITTER_OPCODE_TABLE( -void alloy::backend::x64::RegisterSequences() { +void RegisterSequences() { #define REGISTER_EMITTER_OPCODE_TABLE(opcode) Register_##opcode() REGISTER_EMITTER_OPCODE_TABLE(OPCODE_COMMENT); REGISTER_EMITTER_OPCODE_TABLE(OPCODE_NOP); @@ -5109,7 +5115,7 @@ void alloy::backend::x64::RegisterSequences() { //REGISTER_EMITTER_OPCODE_TABLE(OPCODE_ATOMIC_SUB); } -bool alloy::backend::x64::SelectSequence(X64Emitter& e, const Instr* i, const Instr** new_tail) { +bool SelectSequence(X64Emitter& e, const Instr* i, const Instr** new_tail) { const InstrKey key(i); const auto its = sequence_table.equal_range(key); for (auto it = its.first; it != its.second; ++it) { diff --git a/src/alloy/compiler/passes/value_reduction_pass.cc b/src/alloy/compiler/passes/value_reduction_pass.cc index 4c5cf2e28..1d51de912 100644 --- a/src/alloy/compiler/passes/value_reduction_pass.cc +++ b/src/alloy/compiler/passes/value_reduction_pass.cc @@ -43,7 +43,7 @@ void ValueReductionPass::ComputeLastUse(Value* value) { // Note that this list isn't sorted (unfortunately), so we have to scan // them all. uint32_t max_ordinal = 0; - Value::Use* last_use = NULL; + Value::Use* last_use = nullptr; auto use = value->use_head; while (use) { if (!last_use || use->instr->ordinal >= max_ordinal) { @@ -52,7 +52,7 @@ void ValueReductionPass::ComputeLastUse(Value* value) { } use = use->next; } - value->last_use = last_use->instr; + value->last_use = last_use ? last_use->instr : nullptr; } int ValueReductionPass::Run(HIRBuilder* builder) { diff --git a/src/alloy/frontend/ppc/ppc_disasm.cc b/src/alloy/frontend/ppc/ppc_disasm.cc index 8c832ce2f..d7a46b561 100644 --- a/src/alloy/frontend/ppc/ppc_disasm.cc +++ b/src/alloy/frontend/ppc/ppc_disasm.cc @@ -248,6 +248,9 @@ void Disasm_dcbf(InstrData& i, StringBuffer* str) { case 3: name = "dcbflp"; break; + default: + name = "dcbf.??"; + break; } str->Append("%-8s r%d, r%d", name, i.X.RA, i.X.RB); } diff --git a/src/alloy/hir/value.h b/src/alloy/hir/value.h index f88f5d6cd..1a3097480 100644 --- a/src/alloy/hir/value.h +++ b/src/alloy/hir/value.h @@ -34,7 +34,7 @@ enum TypeName { MAX_TYPENAME, }; -static size_t GetTypeSize(TypeName type_name) { +inline size_t GetTypeSize(TypeName type_name) { switch (type_name) { case INT8_TYPE: return 1; diff --git a/src/poly/cxx_compat.h b/src/poly/cxx_compat.h index d2d03897f..ba68ca0a8 100644 --- a/src/poly/cxx_compat.h +++ b/src/poly/cxx_compat.h @@ -23,6 +23,13 @@ #define thread_local __thread #endif // XE_COMPILER_MSVC +// C++11 alignas keyword. +// This will hopefully be coming soon, as most of the alignment spec is in the +// latest CTP. +#if XE_COMPILER_MSVC +#define alignas(N) __declspec(align(N)) +#endif // XE_COMPILER_MSVC + namespace poly {} // namespace poly #endif // POLY_CXX_COMPAT_H_ diff --git a/src/poly/threading.h b/src/poly/threading.h index 5059aaf3a..431dda291 100644 --- a/src/poly/threading.h +++ b/src/poly/threading.h @@ -18,7 +18,7 @@ namespace poly { namespace threading { -// Gets the current high-perforance tick count. +// Gets the current high-performance tick count. uint64_t ticks(); // Gets a stable thread-specific ID, but may not be. Use for informative