Merge pull request #1441 from Triang3l/dcb-instructions

[CPU] Data cache control instructions
This commit is contained in:
Triang3l 2019-08-18 16:43:46 +03:00 committed by GitHub
commit 56b168216d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 105 additions and 32 deletions

View File

@ -1037,15 +1037,87 @@ EMITTER_OPCODE_TABLE(OPCODE_STORE, STORE_I8, STORE_I16, STORE_I32, STORE_I64,
STORE_F32, STORE_F64, STORE_V128);
// ============================================================================
// OPCODE_PREFETCH
// OPCODE_CACHE_CONTROL
// ============================================================================
struct PREFETCH
: Sequence<PREFETCH, I<OPCODE_PREFETCH, VoidOp, I64Op, OffsetOp>> {
struct CACHE_CONTROL
: Sequence<CACHE_CONTROL,
I<OPCODE_CACHE_CONTROL, VoidOp, I64Op, OffsetOp>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
// TODO(benvanik): prefetch addr -> length.
bool is_clflush = false, is_prefetch = false;
switch (CacheControlType(i.instr->flags)) {
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH:
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE:
is_prefetch = true;
break;
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE:
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH:
is_clflush = true;
break;
default:
assert_unhandled_case(CacheControlType(i.instr->flags));
return;
}
size_t cache_line_size = i.src2.value;
RegExp addr;
uint32_t address_constant;
if (i.src1.is_constant) {
// TODO(benvanik): figure out how to do this without a temp.
// Since the constant is often 0x8... if we tried to use that as a
// displacement it would be sign extended and mess things up.
address_constant = static_cast<uint32_t>(i.src1.constant());
if (address_constant < 0x80000000) {
addr = e.GetMembaseReg() + address_constant;
} else {
if (address_constant >= 0xE0000000 &&
xe::memory::allocation_granularity() > 0x1000) {
e.mov(e.eax, address_constant + 0x1000);
} else {
e.mov(e.eax, address_constant);
}
addr = e.GetMembaseReg() + e.rax;
}
} else {
if (xe::memory::allocation_granularity() > 0x1000) {
// Emulate the 4 KB physical address offset in 0xE0000000+ when can't do
// it via memory mapping.
e.cmp(i.src1.reg().cvt32(), 0xE0000000);
e.setae(e.al);
e.movzx(e.eax, e.al);
e.shl(e.eax, 12);
e.add(e.eax, i.src1.reg().cvt32());
} else {
// Clear the top 32 bits, as they are likely garbage.
// TODO(benvanik): find a way to avoid doing this.
e.mov(e.eax, i.src1.reg().cvt32());
}
addr = e.GetMembaseReg() + e.rax;
}
if (is_clflush) {
e.clflush(e.ptr[addr]);
}
if (is_prefetch) {
e.prefetcht0(e.ptr[addr]);
}
if (cache_line_size >= 128) {
// Prefetch the other 64 bytes of the 128-byte cache line.
if (i.src1.is_constant && address_constant < 0x80000000) {
addr = e.GetMembaseReg() + (address_constant ^ 64);
} else {
e.xor_(e.eax, 64);
}
if (is_clflush) {
e.clflush(e.ptr[addr]);
}
if (is_prefetch) {
e.prefetcht0(e.ptr[addr]);
}
assert_true(cache_line_size == 128);
}
}
};
EMITTER_OPCODE_TABLE(OPCODE_PREFETCH, PREFETCH);
EMITTER_OPCODE_TABLE(OPCODE_CACHE_CONTROL, CACHE_CONTROL);
// ============================================================================
// OPCODE_MEMORY_BARRIER

View File

@ -1277,12 +1277,12 @@ void HIRBuilder::Memset(Value* address, Value* value, Value* length) {
i->set_src3(length);
}
void HIRBuilder::Prefetch(Value* address, size_t length,
uint32_t prefetch_flags) {
void HIRBuilder::CacheControl(Value* address, size_t cache_line_size,
CacheControlType type) {
ASSERT_ADDRESS_TYPE(address);
Instr* i = AppendInstr(OPCODE_PREFETCH_info, prefetch_flags);
Instr* i = AppendInstr(OPCODE_CACHE_CONTROL_info, uint32_t(type));
i->set_src1(address);
i->src2.offset = length;
i->src2.offset = cache_line_size;
i->src3.value = NULL;
}

View File

@ -157,7 +157,8 @@ class HIRBuilder {
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
void Store(Value* address, Value* value, uint32_t store_flags = 0);
void Memset(Value* address, Value* value, Value* length);
void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0);
void CacheControl(Value* address, size_t cache_line_size,
CacheControlType type);
void MemoryBarrier();
void SetRoundingMode(Value* value);

View File

@ -39,9 +39,11 @@ enum LoadStoreFlags {
LOAD_STORE_BYTE_SWAP = 1 << 0,
};
enum PrefetchFlags {
PREFETCH_LOAD = (1 << 1),
PREFETCH_STORE = (1 << 2),
enum CacheControlType {
CACHE_CONTOROL_TYPE_DATA_TOUCH,
CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE,
CACHE_CONTOROL_TYPE_DATA_STORE,
CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH,
};
enum ArithmeticFlags {
@ -158,7 +160,7 @@ enum Opcode {
OPCODE_LOAD,
OPCODE_STORE,
OPCODE_MEMSET,
OPCODE_PREFETCH,
OPCODE_CACHE_CONTROL,
OPCODE_MEMORY_BARRIER,
OPCODE_MAX,
OPCODE_VECTOR_MAX,

View File

@ -262,8 +262,8 @@ DEFINE_OPCODE(
OPCODE_FLAG_MEMORY)
DEFINE_OPCODE(
OPCODE_PREFETCH,
"prefetch",
OPCODE_CACHE_CONTROL,
"cache_control",
OPCODE_SIG_X_V_O,
OPCODE_FLAG_MEMORY)

View File

@ -1074,36 +1074,34 @@ int InstrEmit_stfsx(PPCHIRBuilder& f, const InstrData& i) {
}
// Cache management (A-27)
// dcbf, dcbst, dcbt, dcbtst work with 128-byte cache lines, not 32-byte cache
// blocks, on the Xenon:
// https://github.com/ValveSoftware/source-sdk-2013/blob/master/mp/src/mathlib/sseconst.cpp#L321
// https://randomascii.wordpress.com/2018/01/07/finding-a-cpu-design-bug-in-the-xbox-360/
int InstrEmit_dcbf(PPCHIRBuilder& f, const InstrData& i) {
// No-op for now.
// TODO(benvanik): use prefetch
// XEINSTRNOTIMPLEMENTED();
f.Nop();
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
f.CacheControl(ea, 128,
CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH);
return 0;
}
int InstrEmit_dcbst(PPCHIRBuilder& f, const InstrData& i) {
// No-op for now.
// TODO(benvanik): use prefetch
// XEINSTRNOTIMPLEMENTED();
f.Nop();
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
f.CacheControl(ea, 128, CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE);
return 0;
}
int InstrEmit_dcbt(PPCHIRBuilder& f, const InstrData& i) {
// No-op for now.
// TODO(benvanik): use prefetch
// XEINSTRNOTIMPLEMENTED();
f.Nop();
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
f.CacheControl(ea, 128, CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH);
return 0;
}
int InstrEmit_dcbtst(PPCHIRBuilder& f, const InstrData& i) {
// No-op for now.
// TODO(benvanik): use prefetch
// XEINSTRNOTIMPLEMENTED();
f.Nop();
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
f.CacheControl(ea, 128,
CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE);
return 0;
}