From 93ea56179a5cb9e0d3e41d7f5a16aa510be7fcff Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 4 Jan 2014 00:50:48 -0800 Subject: [PATCH] Moving LoadAcquire/StoreRelease behavior up into HIR. --- src/alloy/backend/ivm/ivm_intcode.cc | 168 +++--------------- .../x64/lowering/lowering_sequences.cc | 26 ++- src/alloy/frontend/ppc/ppc_context.h | 3 + src/alloy/frontend/ppc/ppc_hir_builder.cc | 23 +++ src/alloy/frontend/ppc/ppc_hir_builder.h | 3 + src/alloy/hir/hir_builder.cc | 34 ++-- src/alloy/hir/hir_builder.h | 3 +- src/alloy/hir/opcodes.h | 3 +- src/alloy/hir/opcodes.inl | 18 +- src/xenia/atomic.h | 2 + src/xenia/cpu/xenon_thread_state.cc | 11 +- 11 files changed, 97 insertions(+), 197 deletions(-) diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 615ce1771..91a11192d 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -1351,61 +1351,6 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->dest->type]); } -uint32_t IntCode_LOAD_ACQUIRE_I8(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - xe_atomic_exchange_32(address, ics.reserve_address); - ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.membase + address)); - return IA_NEXT; -} -uint32_t IntCode_LOAD_ACQUIRE_I16(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - xe_atomic_exchange_32(address, ics.reserve_address); - ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.membase + address)); - return IA_NEXT; -} -uint32_t IntCode_LOAD_ACQUIRE_I32(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - xe_atomic_exchange_32(address, ics.reserve_address); - ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.membase + address)); - return IA_NEXT; -} -uint32_t IntCode_LOAD_ACQUIRE_I64(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - xe_atomic_exchange_32(address, ics.reserve_address); - ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.membase + address)); - return IA_NEXT; -} -uint32_t IntCode_LOAD_ACQUIRE_F32(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - xe_atomic_exchange_32(address, ics.reserve_address); - ics.rf[i->dest_reg].f32 = *((float*)(ics.membase + address)); - return IA_NEXT; -} -uint32_t IntCode_LOAD_ACQUIRE_F64(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - xe_atomic_exchange_32(address, ics.reserve_address); - ics.rf[i->dest_reg].f64 = *((double*)(ics.membase + address)); - return IA_NEXT; -} -uint32_t IntCode_LOAD_ACQUIRE_V128(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - xe_atomic_exchange_32(address, ics.reserve_address); - ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.membase + (address & ~0xF))); - return IA_NEXT; -} -int Translate_LOAD_ACQUIRE(TranslationContext& ctx, Instr* i) { - static IntCodeFn fns[] = { - IntCode_LOAD_ACQUIRE_I8, - IntCode_LOAD_ACQUIRE_I16, - IntCode_LOAD_ACQUIRE_I32, - IntCode_LOAD_ACQUIRE_I64, - IntCode_LOAD_ACQUIRE_F32, - IntCode_LOAD_ACQUIRE_F64, - IntCode_LOAD_ACQUIRE_V128, - }; - return DispatchToC(ctx, i, fns[i->dest->type]); -} - uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { uint32_t address = ics.rf[i->src1_reg].u32; if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { @@ -1502,89 +1447,6 @@ int Translate_STORE(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->src2.value->type]); } -uint32_t IntCode_STORE_RELEASE_I8(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - int8_t stored = 0; - if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { - *((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8; - stored = 1; - } - ics.rf[i->dest_reg].i8 = stored; - return IA_NEXT; -} -uint32_t IntCode_STORE_RELEASE_I16(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - int8_t stored = 0; - if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { - *((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16; - stored = 1; - } - ics.rf[i->dest_reg].i8 = stored; - return IA_NEXT; -} -uint32_t IntCode_STORE_RELEASE_I32(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - int8_t stored = 0; - if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { - *((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32; - stored = 1; - } - ics.rf[i->dest_reg].i8 = stored; - return IA_NEXT; -} -uint32_t IntCode_STORE_RELEASE_I64(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - int8_t stored = 0; - if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { - *((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64; - stored = 1; - } - ics.rf[i->dest_reg].i8 = stored; - return IA_NEXT; -} -uint32_t IntCode_STORE_RELEASE_F32(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - int8_t stored = 0; - if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { - *((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32; - stored = 1; - } - ics.rf[i->dest_reg].i8 = stored; - return IA_NEXT; -} -uint32_t IntCode_STORE_RELEASE_F64(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - int8_t stored = 0; - if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { - *((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64; - stored = 1; - } - ics.rf[i->dest_reg].i8 = stored; - return IA_NEXT; -} -uint32_t IntCode_STORE_RELEASE_V128(IntCodeState& ics, const IntCode* i) { - uint32_t address = ics.rf[i->src1_reg].u32; - int8_t stored = 0; - if (xe_atomic_exchange_32(0, ics.reserve_address) == address) { - *((vec128_t*)(ics.membase + (address & ~0xF))) = ics.rf[i->src2_reg].v128; - stored = 1; - } - ics.rf[i->dest_reg].i8 = stored; - return IA_NEXT; -} -int Translate_STORE_RELEASE(TranslationContext& ctx, Instr* i) { - static IntCodeFn fns[] = { - IntCode_STORE_RELEASE_I8, - IntCode_STORE_RELEASE_I16, - IntCode_STORE_RELEASE_I32, - IntCode_STORE_RELEASE_I64, - IntCode_STORE_RELEASE_F32, - IntCode_STORE_RELEASE_F64, - IntCode_STORE_RELEASE_V128, - }; - return DispatchToC(ctx, i, fns[i->src2.value->type]); -} - uint32_t IntCode_PREFETCH(IntCodeState& ics, const IntCode* i) { return IA_NEXT; } @@ -3121,6 +2983,33 @@ int Translate_SWIZZLE(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->src1.value->type]); } +uint32_t IntCode_ATOMIC_EXCHANGE_I32(IntCodeState& ics, const IntCode* i) { + auto address = (uint8_t*)ics.rf[i->src1_reg].u64; + auto new_value = ics.rf[i->src2_reg].u32; + auto old_value = xe_atomic_exchange_32(new_value, address); + ics.rf[i->dest_reg].u32 = old_value; + return IA_NEXT; +} +uint32_t IntCode_ATOMIC_EXCHANGE_I64(IntCodeState& ics, const IntCode* i) { + auto address = (uint8_t*)ics.rf[i->src1_reg].u64; + auto new_value = ics.rf[i->src2_reg].u64; + auto old_value = xe_atomic_exchange_64(new_value, address); + ics.rf[i->dest_reg].u64 = old_value; + return IA_NEXT; +} +int Translate_ATOMIC_EXCHANGE(TranslationContext& ctx, Instr* i) { + static IntCodeFn fns[] = { + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + IntCode_ATOMIC_EXCHANGE_I32, + IntCode_ATOMIC_EXCHANGE_I64, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + IntCode_INVALID_TYPE, + }; + return DispatchToC(ctx, i, fns[i->src2.value->type]); +} + typedef int (*TranslateFn)(TranslationContext& ctx, Instr* i); static const TranslateFn dispatch_table[] = { Translate_COMMENT, @@ -3163,9 +3052,7 @@ static const TranslateFn dispatch_table[] = { Translate_STORE_CONTEXT, Translate_LOAD, - Translate_LOAD_ACQUIRE, Translate_STORE, - Translate_STORE_RELEASE, Translate_PREFETCH, TranslateInvalid, //Translate_MAX, @@ -3224,6 +3111,7 @@ static const TranslateFn dispatch_table[] = { Translate_SWIZZLE, TranslateInvalid, //Translate_COMPARE_EXCHANGE, + Translate_ATOMIC_EXCHANGE, TranslateInvalid, //Translate_ATOMIC_ADD, TranslateInvalid, //Translate_ATOMIC_SUB, }; diff --git a/src/alloy/backend/x64/lowering/lowering_sequences.cc b/src/alloy/backend/x64/lowering/lowering_sequences.cc index 76eb10e74..8f7038f5d 100644 --- a/src/alloy/backend/x64/lowering/lowering_sequences.cc +++ b/src/alloy/backend/x64/lowering/lowering_sequences.cc @@ -257,25 +257,17 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { // -------------------------------------------------------------------------- table->AddSequence(OPCODE_LOAD, [](LIRBuilder& lb, Instr*& instr) { - // TODO - instr = instr->next; - return true; - }); - - table->AddSequence(OPCODE_LOAD_ACQUIRE, [](LIRBuilder& lb, Instr*& instr) { - // TODO + // TODO(benvanik): dynamic register access check + // mov reg, [membase + address.32] + // TODO(benvanik): special for f32/f64/v128 instr = instr->next; return true; }); table->AddSequence(OPCODE_STORE, [](LIRBuilder& lb, Instr*& instr) { - // TODO - instr = instr->next; - return true; - }); - - table->AddSequence(OPCODE_STORE_RELEASE, [](LIRBuilder& lb, Instr*& instr) { - // TODO + // TODO(benvanik): dynamic register access check + // mov [membase + address.32], reg + // TODO(benvanik): special for f32/f64/v128 instr = instr->next; return true; }); @@ -616,6 +608,12 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) { return true; }); + table->AddSequence(OPCODE_ATOMIC_EXCHANGE, [](LIRBuilder& lb, Instr*& instr) { + // TODO + instr = instr->next; + return true; + }); + table->AddSequence(OPCODE_ATOMIC_ADD, [](LIRBuilder& lb, Instr*& instr) { // TODO instr = instr->next; diff --git a/src/alloy/frontend/ppc/ppc_context.h b/src/alloy/frontend/ppc/ppc_context.h index 6e6f2e8c8..a7e984754 100644 --- a/src/alloy/frontend/ppc/ppc_context.h +++ b/src/alloy/frontend/ppc/ppc_context.h @@ -185,6 +185,9 @@ typedef struct XECACHEALIGN64 PPCContext_s { // fpscr.value = (fpscr.value & ~0x000F8000) | v; // } + // Reserve address for load acquire/store release. Shared. + uint32_t* reserve_address; + // Runtime-specific data pointer. Used on callbacks to get access to the // current runtime and its data. uint8_t* membase; diff --git a/src/alloy/frontend/ppc/ppc_hir_builder.cc b/src/alloy/frontend/ppc/ppc_hir_builder.cc index dd7c0d32d..69780ea93 100644 --- a/src/alloy/frontend/ppc/ppc_hir_builder.cc +++ b/src/alloy/frontend/ppc/ppc_hir_builder.cc @@ -141,6 +141,8 @@ int PPCHIRBuilder::Emit(FunctionInfo* symbol_info) { // splits blocks we don't have weird pointers. if (prev_instr && prev_instr->next) { instr_offset_list_[offset] = prev_instr->next; + } else if (prev_instr) { + instr_offset_list_[offset] = prev_instr->block->next->instr_head; } else if (current_block_) { instr_offset_list_[offset] = current_block_->instr_head; } else if (block_tail_) { @@ -326,3 +328,24 @@ void PPCHIRBuilder::StoreVR(uint32_t reg, Value* value) { StoreContext( offsetof(PPCContext, v) + reg * 16, value); } + +Value* PPCHIRBuilder::LoadAcquire( + Value* address, TypeName type, uint32_t load_flags) { + AtomicExchange( + LoadContext(offsetof(PPCContext, reserve_address), INT64_TYPE), + Truncate(address, INT32_TYPE)); + return Load(address, type, load_flags); +} + +Value* PPCHIRBuilder::StoreRelease( + Value* address, Value* value, uint32_t store_flags) { + Value* old_address = AtomicExchange( + LoadContext(offsetof(PPCContext, reserve_address), INT64_TYPE), + LoadZero(INT32_TYPE)); + Value* eq = CompareEQ(Truncate(address, INT32_TYPE), old_address); + auto skip_label = NewLabel(); + BranchFalse(eq, skip_label, BRANCH_UNLIKELY); + Store(address, value, store_flags); + MarkLabel(skip_label); + return eq; +} diff --git a/src/alloy/frontend/ppc/ppc_hir_builder.h b/src/alloy/frontend/ppc/ppc_hir_builder.h index a4dee98cc..6b85b8c1a 100644 --- a/src/alloy/frontend/ppc/ppc_hir_builder.h +++ b/src/alloy/frontend/ppc/ppc_hir_builder.h @@ -63,6 +63,9 @@ public: Value* LoadVR(uint32_t reg); void StoreVR(uint32_t reg, Value* value); + Value* LoadAcquire(Value* address, hir::TypeName type, uint32_t load_flags = 0); + Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0); + private: void AnnotateLabel(uint64_t address, Label* label); diff --git a/src/alloy/hir/hir_builder.cc b/src/alloy/hir/hir_builder.cc index a46b3fbfe..c147ae0fc 100644 --- a/src/alloy/hir/hir_builder.cc +++ b/src/alloy/hir/hir_builder.cc @@ -874,17 +874,6 @@ Value* HIRBuilder::Load( return i->dest; } -Value* HIRBuilder::LoadAcquire( - Value* address, TypeName type, uint32_t load_flags) { - ASSERT_ADDRESS_TYPE(address); - Instr* i = AppendInstr( - OPCODE_LOAD_ACQUIRE_info, load_flags, - AllocValue(type)); - i->set_src1(address); - i->src2.value = i->src3.value = NULL; - return i->dest; -} - void HIRBuilder::Store( Value* address, Value* value, uint32_t store_flags) { ASSERT_ADDRESS_TYPE(address); @@ -894,17 +883,6 @@ void HIRBuilder::Store( i->src3.value = NULL; } -Value* HIRBuilder::StoreRelease( - Value* address, Value* value, uint32_t store_flags) { - ASSERT_ADDRESS_TYPE(address); - Instr* i = AppendInstr(OPCODE_STORE_RELEASE_info, store_flags, - AllocValue(INT8_TYPE)); - i->set_src1(address); - i->set_src2(value); - i->src3.value = NULL; - return i->dest; -} - void HIRBuilder::Prefetch( Value* address, size_t length, uint32_t prefetch_flags) { ASSERT_ADDRESS_TYPE(address); @@ -1625,6 +1603,18 @@ Value* HIRBuilder::CompareExchange( return i->dest; } +Value* HIRBuilder::AtomicExchange(Value* address, Value* new_value) { + ASSERT_ADDRESS_TYPE(address); + ASSERT_INTEGER_TYPE(new_value); + Instr* i = AppendInstr( + OPCODE_ATOMIC_EXCHANGE_info, 0, + AllocValue(new_value->type)); + i->set_src1(address); + i->set_src2(new_value); + i->src3.value = NULL; + return i->dest; +} + Value* HIRBuilder::AtomicAdd(Value* address, Value* value) { ASSERT_ADDRESS_TYPE(address); ASSERT_INTEGER_TYPE(value); diff --git a/src/alloy/hir/hir_builder.h b/src/alloy/hir/hir_builder.h index 050d398ec..c2097cc6d 100644 --- a/src/alloy/hir/hir_builder.h +++ b/src/alloy/hir/hir_builder.h @@ -121,9 +121,7 @@ public: void StoreContext(size_t offset, Value* value); Value* Load(Value* address, TypeName type, uint32_t load_flags = 0); - Value* LoadAcquire(Value* address, TypeName type, uint32_t load_flags = 0); void Store(Value* address, Value* value, uint32_t store_flags = 0); - Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0); void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0); Value* Max(Value* value1, Value* value2); @@ -195,6 +193,7 @@ public: Value* CompareExchange(Value* address, Value* compare_value, Value* exchange_value); + Value* AtomicExchange(Value* address, Value* new_value); Value* AtomicAdd(Value* address, Value* value); Value* AtomicSub(Value* address, Value* value); diff --git a/src/alloy/hir/opcodes.h b/src/alloy/hir/opcodes.h index 65fa531b9..eca180ff7 100644 --- a/src/alloy/hir/opcodes.h +++ b/src/alloy/hir/opcodes.h @@ -104,9 +104,7 @@ enum Opcode { OPCODE_STORE_CONTEXT, OPCODE_LOAD, - OPCODE_LOAD_ACQUIRE, OPCODE_STORE, - OPCODE_STORE_RELEASE, OPCODE_PREFETCH, OPCODE_MAX, @@ -165,6 +163,7 @@ enum Opcode { OPCODE_SWIZZLE, OPCODE_COMPARE_EXCHANGE, + OPCODE_ATOMIC_EXCHANGE, OPCODE_ATOMIC_ADD, OPCODE_ATOMIC_SUB, diff --git a/src/alloy/hir/opcodes.inl b/src/alloy/hir/opcodes.inl index cecc62bde..120b52c1a 100644 --- a/src/alloy/hir/opcodes.inl +++ b/src/alloy/hir/opcodes.inl @@ -188,24 +188,12 @@ DEFINE_OPCODE( OPCODE_SIG_V_V, OPCODE_FLAG_MEMORY); -DEFINE_OPCODE( - OPCODE_LOAD_ACQUIRE, - "load_acquire", - OPCODE_SIG_V_V, - OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE); - DEFINE_OPCODE( OPCODE_STORE, "store", OPCODE_SIG_X_V_V, OPCODE_FLAG_MEMORY); -DEFINE_OPCODE( - OPCODE_STORE_RELEASE, - "store_release", - OPCODE_SIG_V_V_V, - OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE); - DEFINE_OPCODE( OPCODE_PREFETCH, "prefetch", @@ -517,6 +505,12 @@ DEFINE_OPCODE( OPCODE_SIG_V_V_V_V, OPCODE_FLAG_VOLATILE); +DEFINE_OPCODE( + OPCODE_ATOMIC_EXCHANGE, + "atomic_exchange", + OPCODE_SIG_V_V_V, + OPCODE_FLAG_VOLATILE); + DEFINE_OPCODE( OPCODE_ATOMIC_ADD, "atomic_add", diff --git a/src/xenia/atomic.h b/src/xenia/atomic.h index 15705dfb7..d6fd5fb92 100644 --- a/src/xenia/atomic.h +++ b/src/xenia/atomic.h @@ -53,6 +53,8 @@ typedef OSQueueHead xe_atomic_stack_t; ((void)InterlockedExchangeSubtract((volatile unsigned*)value, amount)) #define xe_atomic_exchange_32(newValue, value) \ InterlockedExchange((volatile LONG*)value, newValue) +#define xe_atomic_exchange_64(newValue, value) \ + InterlockedExchange64((volatile LONGLONG*)value, newValue) #define xe_atomic_cas_32(oldValue, newValue, value) \ (InterlockedCompareExchange((volatile LONG*)value, newValue, oldValue) == oldValue) diff --git a/src/xenia/cpu/xenon_thread_state.cc b/src/xenia/cpu/xenon_thread_state.cc index 925388e7b..4653296e1 100644 --- a/src/xenia/cpu/xenon_thread_state.cc +++ b/src/xenia/cpu/xenon_thread_state.cc @@ -36,13 +36,14 @@ XenonThreadState::XenonThreadState( xe_zero_struct(context_, sizeof(PPCContext)); // Stash pointers to common structures that callbacks may need. - context_->membase = memory_->membase(); - context_->runtime = runtime; - context_->thread_state = this; + context_->reserve_address = memory_->reserve_address(); + context_->membase = memory_->membase(); + context_->runtime = runtime; + context_->thread_state = this; // Set initial registers. - context_->r[1] = stack_address_ + stack_size; - context_->r[13] = thread_state_address_; + context_->r[1] = stack_address_ + stack_size; + context_->r[13] = thread_state_address_; raw_context_ = context_;