[CPU] Data cache control instructions

This commit is contained in:
Triang3l 2019-08-18 16:14:51 +03:00
parent c19dc9c7d6
commit bc4b68db45
6 changed files with 105 additions and 32 deletions

View File

@ -1037,15 +1037,87 @@ EMITTER_OPCODE_TABLE(OPCODE_STORE, STORE_I8, STORE_I16, STORE_I32, STORE_I64,
STORE_F32, STORE_F64, STORE_V128); STORE_F32, STORE_F64, STORE_V128);
// ============================================================================ // ============================================================================
// OPCODE_PREFETCH // OPCODE_CACHE_CONTROL
// ============================================================================ // ============================================================================
struct PREFETCH struct CACHE_CONTROL
: Sequence<PREFETCH, I<OPCODE_PREFETCH, VoidOp, I64Op, OffsetOp>> { : Sequence<CACHE_CONTROL,
I<OPCODE_CACHE_CONTROL, VoidOp, I64Op, OffsetOp>> {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
// TODO(benvanik): prefetch addr -> length. bool is_clflush = false, is_prefetch = false;
switch (CacheControlType(i.instr->flags)) {
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH:
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE:
is_prefetch = true;
break;
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE:
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH:
is_clflush = true;
break;
default:
assert_unhandled_case(CacheControlType(i.instr->flags));
return;
}
size_t cache_line_size = i.src2.value;
RegExp addr;
uint32_t address_constant;
if (i.src1.is_constant) {
// TODO(benvanik): figure out how to do this without a temp.
// Since the constant is often 0x8... if we tried to use that as a
// displacement it would be sign extended and mess things up.
address_constant = static_cast<uint32_t>(i.src1.constant());
if (address_constant < 0x80000000) {
addr = e.GetMembaseReg() + address_constant;
} else {
if (address_constant >= 0xE0000000 &&
xe::memory::allocation_granularity() > 0x1000) {
e.mov(e.eax, address_constant + 0x1000);
} else {
e.mov(e.eax, address_constant);
}
addr = e.GetMembaseReg() + e.rax;
}
} else {
if (xe::memory::allocation_granularity() > 0x1000) {
// Emulate the 4 KB physical address offset in 0xE0000000+ when can't do
// it via memory mapping.
e.cmp(i.src1.reg().cvt32(), 0xE0000000);
e.setae(e.al);
e.movzx(e.eax, e.al);
e.shl(e.eax, 12);
e.add(e.eax, i.src1.reg().cvt32());
} else {
// Clear the top 32 bits, as they are likely garbage.
// TODO(benvanik): find a way to avoid doing this.
e.mov(e.eax, i.src1.reg().cvt32());
}
addr = e.GetMembaseReg() + e.rax;
}
if (is_clflush) {
e.clflush(e.ptr[addr]);
}
if (is_prefetch) {
e.prefetcht0(e.ptr[addr]);
}
if (cache_line_size >= 128) {
// Prefetch the other 64 bytes of the 128-byte cache line.
if (i.src1.is_constant && address_constant < 0x80000000) {
addr = e.GetMembaseReg() + (address_constant ^ 64);
} else {
e.xor_(e.eax, 64);
}
if (is_clflush) {
e.clflush(e.ptr[addr]);
}
if (is_prefetch) {
e.prefetcht0(e.ptr[addr]);
}
assert_true(cache_line_size == 128);
}
} }
}; };
EMITTER_OPCODE_TABLE(OPCODE_PREFETCH, PREFETCH); EMITTER_OPCODE_TABLE(OPCODE_CACHE_CONTROL, CACHE_CONTROL);
// ============================================================================ // ============================================================================
// OPCODE_MEMORY_BARRIER // OPCODE_MEMORY_BARRIER

View File

@ -1277,12 +1277,12 @@ void HIRBuilder::Memset(Value* address, Value* value, Value* length) {
i->set_src3(length); i->set_src3(length);
} }
void HIRBuilder::Prefetch(Value* address, size_t length, void HIRBuilder::CacheControl(Value* address, size_t cache_line_size,
uint32_t prefetch_flags) { CacheControlType type) {
ASSERT_ADDRESS_TYPE(address); ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_PREFETCH_info, prefetch_flags); Instr* i = AppendInstr(OPCODE_CACHE_CONTROL_info, uint32_t(type));
i->set_src1(address); i->set_src1(address);
i->src2.offset = length; i->src2.offset = cache_line_size;
i->src3.value = NULL; i->src3.value = NULL;
} }

View File

@ -157,7 +157,8 @@ class HIRBuilder {
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0); Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
void Store(Value* address, Value* value, uint32_t store_flags = 0); void Store(Value* address, Value* value, uint32_t store_flags = 0);
void Memset(Value* address, Value* value, Value* length); void Memset(Value* address, Value* value, Value* length);
void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0); void CacheControl(Value* address, size_t cache_line_size,
CacheControlType type);
void MemoryBarrier(); void MemoryBarrier();
void SetRoundingMode(Value* value); void SetRoundingMode(Value* value);

View File

@ -39,9 +39,11 @@ enum LoadStoreFlags {
LOAD_STORE_BYTE_SWAP = 1 << 0, LOAD_STORE_BYTE_SWAP = 1 << 0,
}; };
enum PrefetchFlags { enum CacheControlType {
PREFETCH_LOAD = (1 << 1), CACHE_CONTOROL_TYPE_DATA_TOUCH,
PREFETCH_STORE = (1 << 2), CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE,
CACHE_CONTOROL_TYPE_DATA_STORE,
CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH,
}; };
enum ArithmeticFlags { enum ArithmeticFlags {
@ -158,7 +160,7 @@ enum Opcode {
OPCODE_LOAD, OPCODE_LOAD,
OPCODE_STORE, OPCODE_STORE,
OPCODE_MEMSET, OPCODE_MEMSET,
OPCODE_PREFETCH, OPCODE_CACHE_CONTROL,
OPCODE_MEMORY_BARRIER, OPCODE_MEMORY_BARRIER,
OPCODE_MAX, OPCODE_MAX,
OPCODE_VECTOR_MAX, OPCODE_VECTOR_MAX,

View File

@ -262,8 +262,8 @@ DEFINE_OPCODE(
OPCODE_FLAG_MEMORY) OPCODE_FLAG_MEMORY)
DEFINE_OPCODE( DEFINE_OPCODE(
OPCODE_PREFETCH, OPCODE_CACHE_CONTROL,
"prefetch", "cache_control",
OPCODE_SIG_X_V_O, OPCODE_SIG_X_V_O,
OPCODE_FLAG_MEMORY) OPCODE_FLAG_MEMORY)

View File

@ -1074,36 +1074,34 @@ int InstrEmit_stfsx(PPCHIRBuilder& f, const InstrData& i) {
} }
// Cache management (A-27) // Cache management (A-27)
// dcbf, dcbst, dcbt, dcbtst work with 128-byte cache lines, not 32-byte cache
// blocks, on the Xenon:
// https://github.com/ValveSoftware/source-sdk-2013/blob/master/mp/src/mathlib/sseconst.cpp#L321
// https://randomascii.wordpress.com/2018/01/07/finding-a-cpu-design-bug-in-the-xbox-360/
int InstrEmit_dcbf(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_dcbf(PPCHIRBuilder& f, const InstrData& i) {
// No-op for now. Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
// TODO(benvanik): use prefetch f.CacheControl(ea, 128,
// XEINSTRNOTIMPLEMENTED(); CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH);
f.Nop();
return 0; return 0;
} }
int InstrEmit_dcbst(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_dcbst(PPCHIRBuilder& f, const InstrData& i) {
// No-op for now. Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
// TODO(benvanik): use prefetch f.CacheControl(ea, 128, CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE);
// XEINSTRNOTIMPLEMENTED();
f.Nop();
return 0; return 0;
} }
int InstrEmit_dcbt(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_dcbt(PPCHIRBuilder& f, const InstrData& i) {
// No-op for now. Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
// TODO(benvanik): use prefetch f.CacheControl(ea, 128, CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH);
// XEINSTRNOTIMPLEMENTED();
f.Nop();
return 0; return 0;
} }
int InstrEmit_dcbtst(PPCHIRBuilder& f, const InstrData& i) { int InstrEmit_dcbtst(PPCHIRBuilder& f, const InstrData& i) {
// No-op for now. Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
// TODO(benvanik): use prefetch f.CacheControl(ea, 128,
// XEINSTRNOTIMPLEMENTED(); CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE);
f.Nop();
return 0; return 0;
} }