Moving LoadAcquire/StoreRelease behavior up into HIR.

This commit is contained in:
Ben Vanik 2014-01-04 00:50:48 -08:00
parent 88b631b160
commit 93ea56179a
11 changed files with 97 additions and 197 deletions

View File

@ -1351,61 +1351,6 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->dest->type]); return DispatchToC(ctx, i, fns[i->dest->type]);
} }
uint32_t IntCode_LOAD_ACQUIRE_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].i8 = *((int8_t*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_I16(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].i16 = *((int16_t*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_I32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].i32 = *((int32_t*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_I64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].i64 = *((int64_t*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_F32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].f32 = *((float*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_F64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].f64 = *((double*)(ics.membase + address));
return IA_NEXT;
}
uint32_t IntCode_LOAD_ACQUIRE_V128(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
xe_atomic_exchange_32(address, ics.reserve_address);
ics.rf[i->dest_reg].v128 = *((vec128_t*)(ics.membase + (address & ~0xF)));
return IA_NEXT;
}
int Translate_LOAD_ACQUIRE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_LOAD_ACQUIRE_I8,
IntCode_LOAD_ACQUIRE_I16,
IntCode_LOAD_ACQUIRE_I32,
IntCode_LOAD_ACQUIRE_I64,
IntCode_LOAD_ACQUIRE_F32,
IntCode_LOAD_ACQUIRE_F64,
IntCode_LOAD_ACQUIRE_V128,
};
return DispatchToC(ctx, i, fns[i->dest->type]);
}
uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
@ -1502,89 +1447,6 @@ int Translate_STORE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->src2.value->type]); return DispatchToC(ctx, i, fns[i->src2.value->type]);
} }
uint32_t IntCode_STORE_RELEASE_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_I16(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_I32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_I64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_F32(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_F64(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
uint32_t IntCode_STORE_RELEASE_V128(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32;
int8_t stored = 0;
if (xe_atomic_exchange_32(0, ics.reserve_address) == address) {
*((vec128_t*)(ics.membase + (address & ~0xF))) = ics.rf[i->src2_reg].v128;
stored = 1;
}
ics.rf[i->dest_reg].i8 = stored;
return IA_NEXT;
}
int Translate_STORE_RELEASE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_STORE_RELEASE_I8,
IntCode_STORE_RELEASE_I16,
IntCode_STORE_RELEASE_I32,
IntCode_STORE_RELEASE_I64,
IntCode_STORE_RELEASE_F32,
IntCode_STORE_RELEASE_F64,
IntCode_STORE_RELEASE_V128,
};
return DispatchToC(ctx, i, fns[i->src2.value->type]);
}
uint32_t IntCode_PREFETCH(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_PREFETCH(IntCodeState& ics, const IntCode* i) {
return IA_NEXT; return IA_NEXT;
} }
@ -3121,6 +2983,33 @@ int Translate_SWIZZLE(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->src1.value->type]); return DispatchToC(ctx, i, fns[i->src1.value->type]);
} }
uint32_t IntCode_ATOMIC_EXCHANGE_I32(IntCodeState& ics, const IntCode* i) {
auto address = (uint8_t*)ics.rf[i->src1_reg].u64;
auto new_value = ics.rf[i->src2_reg].u32;
auto old_value = xe_atomic_exchange_32(new_value, address);
ics.rf[i->dest_reg].u32 = old_value;
return IA_NEXT;
}
uint32_t IntCode_ATOMIC_EXCHANGE_I64(IntCodeState& ics, const IntCode* i) {
auto address = (uint8_t*)ics.rf[i->src1_reg].u64;
auto new_value = ics.rf[i->src2_reg].u64;
auto old_value = xe_atomic_exchange_64(new_value, address);
ics.rf[i->dest_reg].u64 = old_value;
return IA_NEXT;
}
int Translate_ATOMIC_EXCHANGE(TranslationContext& ctx, Instr* i) {
static IntCodeFn fns[] = {
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_ATOMIC_EXCHANGE_I32,
IntCode_ATOMIC_EXCHANGE_I64,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
IntCode_INVALID_TYPE,
};
return DispatchToC(ctx, i, fns[i->src2.value->type]);
}
typedef int (*TranslateFn)(TranslationContext& ctx, Instr* i); typedef int (*TranslateFn)(TranslationContext& ctx, Instr* i);
static const TranslateFn dispatch_table[] = { static const TranslateFn dispatch_table[] = {
Translate_COMMENT, Translate_COMMENT,
@ -3163,9 +3052,7 @@ static const TranslateFn dispatch_table[] = {
Translate_STORE_CONTEXT, Translate_STORE_CONTEXT,
Translate_LOAD, Translate_LOAD,
Translate_LOAD_ACQUIRE,
Translate_STORE, Translate_STORE,
Translate_STORE_RELEASE,
Translate_PREFETCH, Translate_PREFETCH,
TranslateInvalid, //Translate_MAX, TranslateInvalid, //Translate_MAX,
@ -3224,6 +3111,7 @@ static const TranslateFn dispatch_table[] = {
Translate_SWIZZLE, Translate_SWIZZLE,
TranslateInvalid, //Translate_COMPARE_EXCHANGE, TranslateInvalid, //Translate_COMPARE_EXCHANGE,
Translate_ATOMIC_EXCHANGE,
TranslateInvalid, //Translate_ATOMIC_ADD, TranslateInvalid, //Translate_ATOMIC_ADD,
TranslateInvalid, //Translate_ATOMIC_SUB, TranslateInvalid, //Translate_ATOMIC_SUB,
}; };

View File

@ -257,25 +257,17 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) {
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
table->AddSequence(OPCODE_LOAD, [](LIRBuilder& lb, Instr*& instr) { table->AddSequence(OPCODE_LOAD, [](LIRBuilder& lb, Instr*& instr) {
// TODO // TODO(benvanik): dynamic register access check
instr = instr->next; // mov reg, [membase + address.32]
return true; // TODO(benvanik): special for f32/f64/v128
});
table->AddSequence(OPCODE_LOAD_ACQUIRE, [](LIRBuilder& lb, Instr*& instr) {
// TODO
instr = instr->next; instr = instr->next;
return true; return true;
}); });
table->AddSequence(OPCODE_STORE, [](LIRBuilder& lb, Instr*& instr) { table->AddSequence(OPCODE_STORE, [](LIRBuilder& lb, Instr*& instr) {
// TODO // TODO(benvanik): dynamic register access check
instr = instr->next; // mov [membase + address.32], reg
return true; // TODO(benvanik): special for f32/f64/v128
});
table->AddSequence(OPCODE_STORE_RELEASE, [](LIRBuilder& lb, Instr*& instr) {
// TODO
instr = instr->next; instr = instr->next;
return true; return true;
}); });
@ -616,6 +608,12 @@ void alloy::backend::x64::lowering::RegisterSequences(LoweringTable* table) {
return true; return true;
}); });
table->AddSequence(OPCODE_ATOMIC_EXCHANGE, [](LIRBuilder& lb, Instr*& instr) {
// TODO
instr = instr->next;
return true;
});
table->AddSequence(OPCODE_ATOMIC_ADD, [](LIRBuilder& lb, Instr*& instr) { table->AddSequence(OPCODE_ATOMIC_ADD, [](LIRBuilder& lb, Instr*& instr) {
// TODO // TODO
instr = instr->next; instr = instr->next;

View File

@ -185,6 +185,9 @@ typedef struct XECACHEALIGN64 PPCContext_s {
// fpscr.value = (fpscr.value & ~0x000F8000) | v; // fpscr.value = (fpscr.value & ~0x000F8000) | v;
// } // }
// Reserve address for load acquire/store release. Shared.
uint32_t* reserve_address;
// Runtime-specific data pointer. Used on callbacks to get access to the // Runtime-specific data pointer. Used on callbacks to get access to the
// current runtime and its data. // current runtime and its data.
uint8_t* membase; uint8_t* membase;

View File

@ -141,6 +141,8 @@ int PPCHIRBuilder::Emit(FunctionInfo* symbol_info) {
// splits blocks we don't have weird pointers. // splits blocks we don't have weird pointers.
if (prev_instr && prev_instr->next) { if (prev_instr && prev_instr->next) {
instr_offset_list_[offset] = prev_instr->next; instr_offset_list_[offset] = prev_instr->next;
} else if (prev_instr) {
instr_offset_list_[offset] = prev_instr->block->next->instr_head;
} else if (current_block_) { } else if (current_block_) {
instr_offset_list_[offset] = current_block_->instr_head; instr_offset_list_[offset] = current_block_->instr_head;
} else if (block_tail_) { } else if (block_tail_) {
@ -326,3 +328,24 @@ void PPCHIRBuilder::StoreVR(uint32_t reg, Value* value) {
StoreContext( StoreContext(
offsetof(PPCContext, v) + reg * 16, value); offsetof(PPCContext, v) + reg * 16, value);
} }
Value* PPCHIRBuilder::LoadAcquire(
Value* address, TypeName type, uint32_t load_flags) {
AtomicExchange(
LoadContext(offsetof(PPCContext, reserve_address), INT64_TYPE),
Truncate(address, INT32_TYPE));
return Load(address, type, load_flags);
}
Value* PPCHIRBuilder::StoreRelease(
Value* address, Value* value, uint32_t store_flags) {
Value* old_address = AtomicExchange(
LoadContext(offsetof(PPCContext, reserve_address), INT64_TYPE),
LoadZero(INT32_TYPE));
Value* eq = CompareEQ(Truncate(address, INT32_TYPE), old_address);
auto skip_label = NewLabel();
BranchFalse(eq, skip_label, BRANCH_UNLIKELY);
Store(address, value, store_flags);
MarkLabel(skip_label);
return eq;
}

View File

@ -63,6 +63,9 @@ public:
Value* LoadVR(uint32_t reg); Value* LoadVR(uint32_t reg);
void StoreVR(uint32_t reg, Value* value); void StoreVR(uint32_t reg, Value* value);
Value* LoadAcquire(Value* address, hir::TypeName type, uint32_t load_flags = 0);
Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0);
private: private:
void AnnotateLabel(uint64_t address, Label* label); void AnnotateLabel(uint64_t address, Label* label);

View File

@ -874,17 +874,6 @@ Value* HIRBuilder::Load(
return i->dest; return i->dest;
} }
Value* HIRBuilder::LoadAcquire(
Value* address, TypeName type, uint32_t load_flags) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(
OPCODE_LOAD_ACQUIRE_info, load_flags,
AllocValue(type));
i->set_src1(address);
i->src2.value = i->src3.value = NULL;
return i->dest;
}
void HIRBuilder::Store( void HIRBuilder::Store(
Value* address, Value* value, uint32_t store_flags) { Value* address, Value* value, uint32_t store_flags) {
ASSERT_ADDRESS_TYPE(address); ASSERT_ADDRESS_TYPE(address);
@ -894,17 +883,6 @@ void HIRBuilder::Store(
i->src3.value = NULL; i->src3.value = NULL;
} }
Value* HIRBuilder::StoreRelease(
Value* address, Value* value, uint32_t store_flags) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_STORE_RELEASE_info, store_flags,
AllocValue(INT8_TYPE));
i->set_src1(address);
i->set_src2(value);
i->src3.value = NULL;
return i->dest;
}
void HIRBuilder::Prefetch( void HIRBuilder::Prefetch(
Value* address, size_t length, uint32_t prefetch_flags) { Value* address, size_t length, uint32_t prefetch_flags) {
ASSERT_ADDRESS_TYPE(address); ASSERT_ADDRESS_TYPE(address);
@ -1625,6 +1603,18 @@ Value* HIRBuilder::CompareExchange(
return i->dest; return i->dest;
} }
Value* HIRBuilder::AtomicExchange(Value* address, Value* new_value) {
ASSERT_ADDRESS_TYPE(address);
ASSERT_INTEGER_TYPE(new_value);
Instr* i = AppendInstr(
OPCODE_ATOMIC_EXCHANGE_info, 0,
AllocValue(new_value->type));
i->set_src1(address);
i->set_src2(new_value);
i->src3.value = NULL;
return i->dest;
}
Value* HIRBuilder::AtomicAdd(Value* address, Value* value) { Value* HIRBuilder::AtomicAdd(Value* address, Value* value) {
ASSERT_ADDRESS_TYPE(address); ASSERT_ADDRESS_TYPE(address);
ASSERT_INTEGER_TYPE(value); ASSERT_INTEGER_TYPE(value);

View File

@ -121,9 +121,7 @@ public:
void StoreContext(size_t offset, Value* value); void StoreContext(size_t offset, Value* value);
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0); Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
Value* LoadAcquire(Value* address, TypeName type, uint32_t load_flags = 0);
void Store(Value* address, Value* value, uint32_t store_flags = 0); void Store(Value* address, Value* value, uint32_t store_flags = 0);
Value* StoreRelease(Value* address, Value* value, uint32_t store_flags = 0);
void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0); void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0);
Value* Max(Value* value1, Value* value2); Value* Max(Value* value1, Value* value2);
@ -195,6 +193,7 @@ public:
Value* CompareExchange(Value* address, Value* CompareExchange(Value* address,
Value* compare_value, Value* exchange_value); Value* compare_value, Value* exchange_value);
Value* AtomicExchange(Value* address, Value* new_value);
Value* AtomicAdd(Value* address, Value* value); Value* AtomicAdd(Value* address, Value* value);
Value* AtomicSub(Value* address, Value* value); Value* AtomicSub(Value* address, Value* value);

View File

@ -104,9 +104,7 @@ enum Opcode {
OPCODE_STORE_CONTEXT, OPCODE_STORE_CONTEXT,
OPCODE_LOAD, OPCODE_LOAD,
OPCODE_LOAD_ACQUIRE,
OPCODE_STORE, OPCODE_STORE,
OPCODE_STORE_RELEASE,
OPCODE_PREFETCH, OPCODE_PREFETCH,
OPCODE_MAX, OPCODE_MAX,
@ -165,6 +163,7 @@ enum Opcode {
OPCODE_SWIZZLE, OPCODE_SWIZZLE,
OPCODE_COMPARE_EXCHANGE, OPCODE_COMPARE_EXCHANGE,
OPCODE_ATOMIC_EXCHANGE,
OPCODE_ATOMIC_ADD, OPCODE_ATOMIC_ADD,
OPCODE_ATOMIC_SUB, OPCODE_ATOMIC_SUB,

View File

@ -188,24 +188,12 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V, OPCODE_SIG_V_V,
OPCODE_FLAG_MEMORY); OPCODE_FLAG_MEMORY);
DEFINE_OPCODE(
OPCODE_LOAD_ACQUIRE,
"load_acquire",
OPCODE_SIG_V_V,
OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE);
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_STORE, OPCODE_STORE,
"store", "store",
OPCODE_SIG_X_V_V, OPCODE_SIG_X_V_V,
OPCODE_FLAG_MEMORY); OPCODE_FLAG_MEMORY);
DEFINE_OPCODE(
OPCODE_STORE_RELEASE,
"store_release",
OPCODE_SIG_V_V_V,
OPCODE_FLAG_MEMORY | OPCODE_FLAG_VOLATILE);
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PREFETCH, OPCODE_PREFETCH,
"prefetch", "prefetch",
@ -517,6 +505,12 @@ DEFINE_OPCODE(
OPCODE_SIG_V_V_V_V, OPCODE_SIG_V_V_V_V,
OPCODE_FLAG_VOLATILE); OPCODE_FLAG_VOLATILE);
DEFINE_OPCODE(
OPCODE_ATOMIC_EXCHANGE,
"atomic_exchange",
OPCODE_SIG_V_V_V,
OPCODE_FLAG_VOLATILE);
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_ATOMIC_ADD, OPCODE_ATOMIC_ADD,
"atomic_add", "atomic_add",

View File

@ -53,6 +53,8 @@ typedef OSQueueHead xe_atomic_stack_t;
((void)InterlockedExchangeSubtract((volatile unsigned*)value, amount)) ((void)InterlockedExchangeSubtract((volatile unsigned*)value, amount))
#define xe_atomic_exchange_32(newValue, value) \ #define xe_atomic_exchange_32(newValue, value) \
InterlockedExchange((volatile LONG*)value, newValue) InterlockedExchange((volatile LONG*)value, newValue)
#define xe_atomic_exchange_64(newValue, value) \
InterlockedExchange64((volatile LONGLONG*)value, newValue)
#define xe_atomic_cas_32(oldValue, newValue, value) \ #define xe_atomic_cas_32(oldValue, newValue, value) \
(InterlockedCompareExchange((volatile LONG*)value, newValue, oldValue) == oldValue) (InterlockedCompareExchange((volatile LONG*)value, newValue, oldValue) == oldValue)

View File

@ -36,6 +36,7 @@ XenonThreadState::XenonThreadState(
xe_zero_struct(context_, sizeof(PPCContext)); xe_zero_struct(context_, sizeof(PPCContext));
// Stash pointers to common structures that callbacks may need. // Stash pointers to common structures that callbacks may need.
context_->reserve_address = memory_->reserve_address();
context_->membase = memory_->membase(); context_->membase = memory_->membase();
context_->runtime = runtime; context_->runtime = runtime;
context_->thread_state = this; context_->thread_state = this;