Moving LoadAcquire/StoreRelease behavior up into HIR.

This commit is contained in:
Ben Vanik 2014-01-04 00:50:48 -08:00
parent 88b631b160
commit 93ea56179a
11 changed files with 97 additions and 197 deletions

View File

@ -1351,61 +1351,6 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->dest->type]);
}
uint32_t IntCode_LOAD_ACQUIRE_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_I16(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_I32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_I64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_F32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].f32 = *((float*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_F64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].f64 = *((double*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_V128(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.membase + (address & ~0xF)));
return IA_NEXT;
}
int Translate_LOAD_ACQUIRE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_LOAD_ACQUIRE_I8,
IntCode_LOAD_ACQUIRE_I16,
IntCode_LOAD_ACQUIRE_I32,
IntCode_LOAD_ACQUIRE_I64,
IntCode_LOAD_ACQUIRE_F32,
IntCode_LOAD_ACQUIRE_F64,
IntCode_LOAD_ACQUIRE_V128,
};
return DispatchToC(ctx, i, fns[i->dest->type]);
}
uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
@ -1502,89 +1447,6 @@ int Translate_STORE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->src2.value->type]);
}
uint32_t IntCode_STORE_RELEASE_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_I16(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_I32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_I64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_F32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_F64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_V128(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((vec128_t*)(ics.membase + (address & ~0xF))) = ics.rf[i->src2_reg].v128;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
int Translate_STORE_RELEASE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_STORE_RELEASE_I8,
IntCode_STORE_RELEASE_I16,
IntCode_STORE_RELEASE_I32,
IntCode_STORE_RELEASE_I64,
IntCode_STORE_RELEASE_F32,
IntCode_STORE_RELEASE_F64,
IntCode_STORE_RELEASE_V128,
};
return DispatchToC(ctx, i, fns[i->src2.value->type]);
}
uint32_t IntCode_PREFETCH(IntCodeState& ics, const IntCode* i) {
return IA_NEXT;
}
@ -3121,6 +2983,33 @@ int Translate_SWIZZLE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->src1.value->type]);
}
uint32_t IntCode_ATOMIC_EXCHANGE_I32(IntCodeState& ics, const IntCode* i) {
auto address = (uint8_t*)ics.rf[i->src1_reg].u64;
auto new_value = ics.rf[i->src2_reg].u32;
auto old_value = xe_atomic_exchange_32(new_value, address);
ics.rf[i->dest_reg].u32 = old_value;
return IA_NEXT;
}
uint32_t IntCode_ATOMIC_EXCHANGE_I64(IntCodeState& ics, const IntCode* i) {
auto address = (uint8_t*)ics.rf[i->src1_reg].u64;
auto new_value = ics.rf[i->src2_reg].u64;
auto old_value = xe_atomic_exchange_64(new_value, address);
ics.rf[i->dest_reg].u64 = old_value;
return IA_NEXT;
}
int Translate_ATOMIC_EXCHANGE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_ATOMIC_EXCHANGE_I32,
IntCode_ATOMIC_EXCHANGE_I64,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
return DispatchToC(ctx, i, fns[i->src2.value->type]);
}
typedef int (*TranslateFn)(TranslationContext& ctx, Instr* i);
static const TranslateFn dispatch_table[] = {
Translate_COMMENT,
@ -3163,9 +3052,7 @@ static const TranslateFn dispatch_table[] = {
Translate_STORE_CONTEXT,
Translate_LOAD,
Translate_LOAD_ACQUIRE,
Translate_STORE,
Translate_STORE_RELEASE,
Translate_PREFETCH,
TranslateInvalid, //Translate_MAX,
@ -3224,6 +3111,7 @@ static const TranslateFn dispatch_table[] = {
Translate_SWIZZLE,
TranslateInvalid, //Translate_COMPARE_EXCHANGE,
Translate_ATOMIC_EXCHANGE,
TranslateInvalid, //Translate_ATOMIC_ADD,
TranslateInvalid, //Translate_ATOMIC_SUB,
};

View File

@ -257,25 +257,17 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) {
// --------------------------------------------------------------------------
table->AddSequence(OPCODE_LOAD, [](LIRBuilder& lb, Instr*& instr) {
// TODO
instr = instr->next;
return true;
});
table->AddSequence(OPCODE_LOAD_ACQUIRE, [](LIRBuilder& lb, Instr*& instr) {
// TODO
// TODO(benvanik): dynamic register access check
// mov reg, [membase + address.32]
// TODO(benvanik): special for f32/f64/v128
instr = instr->next;
return true;
});
table->AddSequence(OPCODE_STORE, [](LIRBuilder& lb, Instr*& instr) {
// TODO
instr = instr->next;
return true;
});
table->AddSequence(OPCODE_STORE_RELEASE, [](LIRBuilder& lb, Instr*& instr) {
// TODO
// TODO(benvanik): dynamic register access check
// mov [membase + address.32], reg
// TODO(benvanik): special for f32/f64/v128
instr = instr->next;
return true;
});
@ -616,6 +608,12 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) {
return true;
});
table->AddSequence(OPCODE_ATOMIC_EXCHANGE, [](LIRBuilder& lb, Instr*& instr) {
// TODO
instr = instr->next;
return true;
});
table->AddSequence(OPCODE_ATOMIC_ADD, [](LIRBuilder& lb, Instr*& instr) {
// TODO
instr = instr->next;

View File

@ -185,6 +185,9 @@ typedef struct XECACHEALIGN64 PPCContext_s {
// fpscr.value = (fpscr.value & ~0x000F8000) | v;
// }
// Reserve address for load acquire/store release. Shared.
uint32_t* reserve_address;
// Runtime-specific data pointer. Used on callbacks to get access to the
// current runtime and its data.
uint8_t* membase;

View File

@ -141,6 +141,8 @@ int PPCHIRBuilder::Emit(FunctionInfo* symbol_info) {
// splits blocks we don't have weird pointers.
if (prev_instr && prev_instr->next) {
instr_offset_list_[offset] = prev_instr->next;
} else if (prev_instr) {
instr_offset_list_[offset] = prev_instr->block->next->instr_head;
} else if (current_block_) {
instr_offset_list_[offset] = current_block_->instr_head;
} else if (block_tail_) {
@ -326,3 +328,24 @@ void PPCHIRBuilder::StoreVR(uint32_t reg, Value* value) {
StoreContext(
offsetof(PPCContext, v) + reg * 16, value);
}
Value* PPCHIRBuilder::LoadAcquire(
Value* address, TypeName type, uint32_t load_flags) {
AtomicExchange(
LoadContext(offsetof(PPCContext, reserve_address), INT64_TYPE),
Truncate(address, INT32_TYPE));
return Load(address, type, load_flags);
}
Value* PPCHIRBuilder::StoreRelease(
Value* address, Value* value, uint32_t store_flags) {
Value* old_address = AtomicExchange(
LoadContext(offsetof(PPCContext, reserve_address), INT64_TYPE),
LoadZero(INT32_TYPE));
Value* eq = CompareEQ(Truncate(address, INT32_TYPE), old_address);
auto skip_label = NewLabel();
BranchFalse(eq, skip_label, BRANCH_UNLIKELY);
Store(address, value, store_flags);
MarkLabel(skip_label);
return eq;
}

View File

@ -63,6 +63,9 @@ public:
Value* LoadVR(uint32_t reg);
void StoreVR(uint32_t reg, Value* value);
Value* LoadAcquire(Value* address, hir::TypeName type, uint32_t load_flags = 0);
Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0);
private:
void AnnotateLabel(uint64_t address, Label* label);

View File

@ -874,17 +874,6 @@ Value* HIRBuilder::Load(
return i->dest;
}
Value* HIRBuilder::LoadAcquire(
Value* address, TypeName type, uint32_t load_flags) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(
OPCODE_LOAD_ACQUIRE_info, load_flags,
AllocValue(type));
i->set_src1(address);
i->src2.value = i->src3.value = NULL;
return i->dest;
}
void HIRBuilder::Store(
Value* address, Value* value, uint32_t store_flags) {
ASSERT_ADDRESS_TYPE(address);
@ -894,17 +883,6 @@ void HIRBuilder::Store(
i->src3.value = NULL;
}
Value* HIRBuilder::StoreRelease(
Value* address, Value* value, uint32_t store_flags) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_STORE_RELEASE_info, store_flags,
AllocValue(INT8_TYPE));
i->set_src1(address);
i->set_src2(value);
i->src3.value = NULL;
return i->dest;
}
void HIRBuilder::Prefetch(
Value* address, size_t length, uint32_t prefetch_flags) {
ASSERT_ADDRESS_TYPE(address);
@ -1625,6 +1603,18 @@ Value* HIRBuilder::CompareExchange(
return i->dest;
}
Value* HIRBuilder::AtomicExchange(Value* address, Value* new_value) {
ASSERT_ADDRESS_TYPE(address);
ASSERT_INTEGER_TYPE(new_value);
Instr* i = AppendInstr(
OPCODE_ATOMIC_EXCHANGE_info, 0,
AllocValue(new_value->type));
i->set_src1(address);
i->set_src2(new_value);
i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::AtomicAdd(Value* address, Value* value) {
ASSERT_ADDRESS_TYPE(address);
ASSERT_INTEGER_TYPE(value);

View File

@ -121,9 +121,7 @@ public:
void StoreContext(size_t offset, Value* value);
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
Value* LoadAcquire(Value* address, TypeName type, uint32_t load_flags = 0);
void Store(Value* address, Value* value, uint32_t store_flags = 0);
Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0);
void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0);
Value* Max(Value* value1, Value* value2);
@ -195,6 +193,7 @@ public:
Value* CompareExchange(Value* address,
Value* compare_value, Value* exchange_value);
Value* AtomicExchange(Value* address, Value* new_value);
Value* AtomicAdd(Value* address, Value* value);
Value* AtomicSub(Value* address, Value* value);

View File

@ -104,9 +104,7 @@ enum Opcode {
OPCODE_STORE_CONTEXT,
OPCODE_LOAD,
OPCODE_LOAD_ACQUIRE,
OPCODE_STORE,
OPCODE_STORE_RELEASE,
OPCODE_PREFETCH,
OPCODE_MAX,
@ -165,6 +163,7 @@ enum Opcode {
OPCODE_SWIZZLE,
OPCODE_COMPARE_EXCHANGE,
OPCODE_ATOMIC_EXCHANGE,
OPCODE_ATOMIC_ADD,
OPCODE_ATOMIC_SUB,

View File

@ -188,24 +188,12 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V,
OPCODE_FLAG_MEMORY);
DEFINE_OPCODE(
OPCODE_LOAD_ACQUIRE,
"load_acquire",
OPCODE_SIG_V_V,
OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE);
DEFINE_OPCODE(
OPCODE_STORE,
"store",
OPCODE_SIG_X_V_V,
OPCODE_FLAG_MEMORY);
DEFINE_OPCODE(
OPCODE_STORE_RELEASE,
"store_release",
OPCODE_SIG_V_V_V,
OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE);
DEFINE_OPCODE(
OPCODE_PREFETCH,
"prefetch",
@ -517,6 +505,12 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V_V_V,
OPCODE_FLAG_VOLATILE);
DEFINE_OPCODE(
OPCODE_ATOMIC_EXCHANGE,
"atomic_exchange",
OPCODE_SIG_V_V_V,
OPCODE_FLAG_VOLATILE);
DEFINE_OPCODE(
OPCODE_ATOMIC_ADD,
"atomic_add",

View File

@ -53,6 +53,8 @@ typedef OSQueueHead xe_atomic_stack_t;
((void)InterlockedExchangeSubtract((volatile unsigned*)value, amount))
#define xe_atomic_exchange_32(newValue, value) \
InterlockedExchange((volatile LONG*)value, newValue)
#define xe_atomic_exchange_64(newValue, value) \
InterlockedExchange64((volatile LONGLONG*)value, newValue)
#define xe_atomic_cas_32(oldValue, newValue, value) \
(InterlockedCompareExchange((volatile LONG*)value, newValue, oldValue) == oldValue)

View File

@ -36,13 +36,14 @@ XenonThreadState::XenonThreadState(
xe_zero_struct(context_, sizeof(PPCContext));
// Stash pointers to common structures that callbacks may need.
context_->membase = memory_->membase();
context_->runtime = runtime;
context_->thread_state = this;
context_->reserve_address = memory_->reserve_address();
context_->membase = memory_->membase();
context_->runtime = runtime;
context_->thread_state = this;
// Set initial registers.
context_->r[1] = stack_address_ + stack_size;
context_->r[13] = thread_state_address_;
context_->r[1] = stack_address_ + stack_size;
context_->r[13] = thread_state_address_;
raw_context_ = context_;