Merge pull request #1441 from Triang3l/dcb-instructions
[CPU] Data cache control instructions
This commit is contained in:
commit
56b168216d
|
@ -1037,15 +1037,87 @@ EMITTER_OPCODE_TABLE(OPCODE_STORE, STORE_I8, STORE_I16, STORE_I32, STORE_I64,
|
|||
STORE_F32, STORE_F64, STORE_V128);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_PREFETCH
|
||||
// OPCODE_CACHE_CONTROL
|
||||
// ============================================================================
|
||||
struct PREFETCH
|
||||
: Sequence<PREFETCH, I<OPCODE_PREFETCH, VoidOp, I64Op, OffsetOp>> {
|
||||
struct CACHE_CONTROL
|
||||
: Sequence<CACHE_CONTROL,
|
||||
I<OPCODE_CACHE_CONTROL, VoidOp, I64Op, OffsetOp>> {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
// TODO(benvanik): prefetch addr -> length.
|
||||
bool is_clflush = false, is_prefetch = false;
|
||||
switch (CacheControlType(i.instr->flags)) {
|
||||
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH:
|
||||
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE:
|
||||
is_prefetch = true;
|
||||
break;
|
||||
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE:
|
||||
case CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH:
|
||||
is_clflush = true;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(CacheControlType(i.instr->flags));
|
||||
return;
|
||||
}
|
||||
size_t cache_line_size = i.src2.value;
|
||||
|
||||
RegExp addr;
|
||||
uint32_t address_constant;
|
||||
if (i.src1.is_constant) {
|
||||
// TODO(benvanik): figure out how to do this without a temp.
|
||||
// Since the constant is often 0x8... if we tried to use that as a
|
||||
// displacement it would be sign extended and mess things up.
|
||||
address_constant = static_cast<uint32_t>(i.src1.constant());
|
||||
if (address_constant < 0x80000000) {
|
||||
addr = e.GetMembaseReg() + address_constant;
|
||||
} else {
|
||||
if (address_constant >= 0xE0000000 &&
|
||||
xe::memory::allocation_granularity() > 0x1000) {
|
||||
e.mov(e.eax, address_constant + 0x1000);
|
||||
} else {
|
||||
e.mov(e.eax, address_constant);
|
||||
}
|
||||
addr = e.GetMembaseReg() + e.rax;
|
||||
}
|
||||
} else {
|
||||
if (xe::memory::allocation_granularity() > 0x1000) {
|
||||
// Emulate the 4 KB physical address offset in 0xE0000000+ when can't do
|
||||
// it via memory mapping.
|
||||
e.cmp(i.src1.reg().cvt32(), 0xE0000000);
|
||||
e.setae(e.al);
|
||||
e.movzx(e.eax, e.al);
|
||||
e.shl(e.eax, 12);
|
||||
e.add(e.eax, i.src1.reg().cvt32());
|
||||
} else {
|
||||
// Clear the top 32 bits, as they are likely garbage.
|
||||
// TODO(benvanik): find a way to avoid doing this.
|
||||
e.mov(e.eax, i.src1.reg().cvt32());
|
||||
}
|
||||
addr = e.GetMembaseReg() + e.rax;
|
||||
}
|
||||
if (is_clflush) {
|
||||
e.clflush(e.ptr[addr]);
|
||||
}
|
||||
if (is_prefetch) {
|
||||
e.prefetcht0(e.ptr[addr]);
|
||||
}
|
||||
|
||||
if (cache_line_size >= 128) {
|
||||
// Prefetch the other 64 bytes of the 128-byte cache line.
|
||||
if (i.src1.is_constant && address_constant < 0x80000000) {
|
||||
addr = e.GetMembaseReg() + (address_constant ^ 64);
|
||||
} else {
|
||||
e.xor_(e.eax, 64);
|
||||
}
|
||||
if (is_clflush) {
|
||||
e.clflush(e.ptr[addr]);
|
||||
}
|
||||
if (is_prefetch) {
|
||||
e.prefetcht0(e.ptr[addr]);
|
||||
}
|
||||
assert_true(cache_line_size == 128);
|
||||
}
|
||||
}
|
||||
};
|
||||
EMITTER_OPCODE_TABLE(OPCODE_PREFETCH, PREFETCH);
|
||||
EMITTER_OPCODE_TABLE(OPCODE_CACHE_CONTROL, CACHE_CONTROL);
|
||||
|
||||
// ============================================================================
|
||||
// OPCODE_MEMORY_BARRIER
|
||||
|
|
|
@ -1277,12 +1277,12 @@ void HIRBuilder::Memset(Value* address, Value* value, Value* length) {
|
|||
i->set_src3(length);
|
||||
}
|
||||
|
||||
void HIRBuilder::Prefetch(Value* address, size_t length,
|
||||
uint32_t prefetch_flags) {
|
||||
void HIRBuilder::CacheControl(Value* address, size_t cache_line_size,
|
||||
CacheControlType type) {
|
||||
ASSERT_ADDRESS_TYPE(address);
|
||||
Instr* i = AppendInstr(OPCODE_PREFETCH_info, prefetch_flags);
|
||||
Instr* i = AppendInstr(OPCODE_CACHE_CONTROL_info, uint32_t(type));
|
||||
i->set_src1(address);
|
||||
i->src2.offset = length;
|
||||
i->src2.offset = cache_line_size;
|
||||
i->src3.value = NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -157,7 +157,8 @@ class HIRBuilder {
|
|||
Value* Load(Value* address, TypeName type, uint32_t load_flags = 0);
|
||||
void Store(Value* address, Value* value, uint32_t store_flags = 0);
|
||||
void Memset(Value* address, Value* value, Value* length);
|
||||
void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0);
|
||||
void CacheControl(Value* address, size_t cache_line_size,
|
||||
CacheControlType type);
|
||||
void MemoryBarrier();
|
||||
|
||||
void SetRoundingMode(Value* value);
|
||||
|
|
|
@ -39,9 +39,11 @@ enum LoadStoreFlags {
|
|||
LOAD_STORE_BYTE_SWAP = 1 << 0,
|
||||
};
|
||||
|
||||
enum PrefetchFlags {
|
||||
PREFETCH_LOAD = (1 << 1),
|
||||
PREFETCH_STORE = (1 << 2),
|
||||
enum CacheControlType {
|
||||
CACHE_CONTOROL_TYPE_DATA_TOUCH,
|
||||
CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE,
|
||||
CACHE_CONTOROL_TYPE_DATA_STORE,
|
||||
CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH,
|
||||
};
|
||||
|
||||
enum ArithmeticFlags {
|
||||
|
@ -158,7 +160,7 @@ enum Opcode {
|
|||
OPCODE_LOAD,
|
||||
OPCODE_STORE,
|
||||
OPCODE_MEMSET,
|
||||
OPCODE_PREFETCH,
|
||||
OPCODE_CACHE_CONTROL,
|
||||
OPCODE_MEMORY_BARRIER,
|
||||
OPCODE_MAX,
|
||||
OPCODE_VECTOR_MAX,
|
||||
|
|
|
@ -262,8 +262,8 @@ DEFINE_OPCODE(
|
|||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
DEFINE_OPCODE(
|
||||
OPCODE_PREFETCH,
|
||||
"prefetch",
|
||||
OPCODE_CACHE_CONTROL,
|
||||
"cache_control",
|
||||
OPCODE_SIG_X_V_O,
|
||||
OPCODE_FLAG_MEMORY)
|
||||
|
||||
|
|
|
@ -1074,36 +1074,34 @@ int InstrEmit_stfsx(PPCHIRBuilder& f, const InstrData& i) {
|
|||
}
|
||||
|
||||
// Cache management (A-27)
|
||||
// dcbf, dcbst, dcbt, dcbtst work with 128-byte cache lines, not 32-byte cache
|
||||
// blocks, on the Xenon:
|
||||
// https://github.com/ValveSoftware/source-sdk-2013/blob/master/mp/src/mathlib/sseconst.cpp#L321
|
||||
// https://randomascii.wordpress.com/2018/01/07/finding-a-cpu-design-bug-in-the-xbox-360/
|
||||
|
||||
int InstrEmit_dcbf(PPCHIRBuilder& f, const InstrData& i) {
|
||||
// No-op for now.
|
||||
// TODO(benvanik): use prefetch
|
||||
// XEINSTRNOTIMPLEMENTED();
|
||||
f.Nop();
|
||||
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
||||
f.CacheControl(ea, 128,
|
||||
CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int InstrEmit_dcbst(PPCHIRBuilder& f, const InstrData& i) {
|
||||
// No-op for now.
|
||||
// TODO(benvanik): use prefetch
|
||||
// XEINSTRNOTIMPLEMENTED();
|
||||
f.Nop();
|
||||
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
||||
f.CacheControl(ea, 128, CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int InstrEmit_dcbt(PPCHIRBuilder& f, const InstrData& i) {
|
||||
// No-op for now.
|
||||
// TODO(benvanik): use prefetch
|
||||
// XEINSTRNOTIMPLEMENTED();
|
||||
f.Nop();
|
||||
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
||||
f.CacheControl(ea, 128, CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int InstrEmit_dcbtst(PPCHIRBuilder& f, const InstrData& i) {
|
||||
// No-op for now.
|
||||
// TODO(benvanik): use prefetch
|
||||
// XEINSTRNOTIMPLEMENTED();
|
||||
f.Nop();
|
||||
Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB);
|
||||
f.CacheControl(ea, 128,
|
||||
CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue