From bc4b68db4502d9843f975cdc4c2d1a2815b3d825 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 18 Aug 2019 16:14:51 +0300 Subject: [PATCH] [CPU] Data cache control instructions --- src/xenia/cpu/backend/x64/x64_seq_memory.cc | 82 +++++++++++++++++++-- src/xenia/cpu/hir/hir_builder.cc | 8 +- src/xenia/cpu/hir/hir_builder.h | 3 +- src/xenia/cpu/hir/opcodes.h | 10 ++- src/xenia/cpu/hir/opcodes.inl | 4 +- src/xenia/cpu/ppc/ppc_emit_memory.cc | 30 ++++---- 6 files changed, 105 insertions(+), 32 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_seq_memory.cc b/src/xenia/cpu/backend/x64/x64_seq_memory.cc index d0b344e55..819285567 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_memory.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_memory.cc @@ -1037,15 +1037,87 @@ EMITTER_OPCODE_TABLE(OPCODE_STORE, STORE_I8, STORE_I16, STORE_I32, STORE_I64, STORE_F32, STORE_F64, STORE_V128); // ============================================================================ -// OPCODE_PREFETCH +// OPCODE_CACHE_CONTROL // ============================================================================ -struct PREFETCH - : Sequence> { +struct CACHE_CONTROL + : Sequence> { static void Emit(X64Emitter& e, const EmitArgType& i) { - // TODO(benvanik): prefetch addr -> length. + bool is_clflush = false, is_prefetch = false; + switch (CacheControlType(i.instr->flags)) { + case CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH: + case CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE: + is_prefetch = true; + break; + case CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE: + case CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH: + is_clflush = true; + break; + default: + assert_unhandled_case(CacheControlType(i.instr->flags)); + return; + } + size_t cache_line_size = i.src2.value; + + RegExp addr; + uint32_t address_constant; + if (i.src1.is_constant) { + // TODO(benvanik): figure out how to do this without a temp. + // Since the constant is often 0x8... if we tried to use that as a + // displacement it would be sign extended and mess things up. + address_constant = static_cast(i.src1.constant()); + if (address_constant < 0x80000000) { + addr = e.GetMembaseReg() + address_constant; + } else { + if (address_constant >= 0xE0000000 && + xe::memory::allocation_granularity() > 0x1000) { + e.mov(e.eax, address_constant + 0x1000); + } else { + e.mov(e.eax, address_constant); + } + addr = e.GetMembaseReg() + e.rax; + } + } else { + if (xe::memory::allocation_granularity() > 0x1000) { + // Emulate the 4 KB physical address offset in 0xE0000000+ when can't do + // it via memory mapping. + e.cmp(i.src1.reg().cvt32(), 0xE0000000); + e.setae(e.al); + e.movzx(e.eax, e.al); + e.shl(e.eax, 12); + e.add(e.eax, i.src1.reg().cvt32()); + } else { + // Clear the top 32 bits, as they are likely garbage. + // TODO(benvanik): find a way to avoid doing this. + e.mov(e.eax, i.src1.reg().cvt32()); + } + addr = e.GetMembaseReg() + e.rax; + } + if (is_clflush) { + e.clflush(e.ptr[addr]); + } + if (is_prefetch) { + e.prefetcht0(e.ptr[addr]); + } + + if (cache_line_size >= 128) { + // Prefetch the other 64 bytes of the 128-byte cache line. + if (i.src1.is_constant && address_constant < 0x80000000) { + addr = e.GetMembaseReg() + (address_constant ^ 64); + } else { + e.xor_(e.eax, 64); + } + if (is_clflush) { + e.clflush(e.ptr[addr]); + } + if (is_prefetch) { + e.prefetcht0(e.ptr[addr]); + } + assert_true(cache_line_size == 128); + } } }; -EMITTER_OPCODE_TABLE(OPCODE_PREFETCH, PREFETCH); +EMITTER_OPCODE_TABLE(OPCODE_CACHE_CONTROL, CACHE_CONTROL); // ============================================================================ // OPCODE_MEMORY_BARRIER diff --git a/src/xenia/cpu/hir/hir_builder.cc b/src/xenia/cpu/hir/hir_builder.cc index e461996b5..1a1c60b5d 100644 --- a/src/xenia/cpu/hir/hir_builder.cc +++ b/src/xenia/cpu/hir/hir_builder.cc @@ -1277,12 +1277,12 @@ void HIRBuilder::Memset(Value* address, Value* value, Value* length) { i->set_src3(length); } -void HIRBuilder::Prefetch(Value* address, size_t length, - uint32_t prefetch_flags) { +void HIRBuilder::CacheControl(Value* address, size_t cache_line_size, + CacheControlType type) { ASSERT_ADDRESS_TYPE(address); - Instr* i = AppendInstr(OPCODE_PREFETCH_info, prefetch_flags); + Instr* i = AppendInstr(OPCODE_CACHE_CONTROL_info, uint32_t(type)); i->set_src1(address); - i->src2.offset = length; + i->src2.offset = cache_line_size; i->src3.value = NULL; } diff --git a/src/xenia/cpu/hir/hir_builder.h b/src/xenia/cpu/hir/hir_builder.h index 8cfe67d02..d728facbd 100644 --- a/src/xenia/cpu/hir/hir_builder.h +++ b/src/xenia/cpu/hir/hir_builder.h @@ -157,7 +157,8 @@ class HIRBuilder { Value* Load(Value* address, TypeName type, uint32_t load_flags = 0); void Store(Value* address, Value* value, uint32_t store_flags = 0); void Memset(Value* address, Value* value, Value* length); - void Prefetch(Value* address, size_t length, uint32_t prefetch_flags = 0); + void CacheControl(Value* address, size_t cache_line_size, + CacheControlType type); void MemoryBarrier(); void SetRoundingMode(Value* value); diff --git a/src/xenia/cpu/hir/opcodes.h b/src/xenia/cpu/hir/opcodes.h index 6afa28555..d0e29347c 100644 --- a/src/xenia/cpu/hir/opcodes.h +++ b/src/xenia/cpu/hir/opcodes.h @@ -39,9 +39,11 @@ enum LoadStoreFlags { LOAD_STORE_BYTE_SWAP = 1 << 0, }; -enum PrefetchFlags { - PREFETCH_LOAD = (1 << 1), - PREFETCH_STORE = (1 << 2), +enum CacheControlType { + CACHE_CONTOROL_TYPE_DATA_TOUCH, + CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE, + CACHE_CONTOROL_TYPE_DATA_STORE, + CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH, }; enum ArithmeticFlags { @@ -158,7 +160,7 @@ enum Opcode { OPCODE_LOAD, OPCODE_STORE, OPCODE_MEMSET, - OPCODE_PREFETCH, + OPCODE_CACHE_CONTROL, OPCODE_MEMORY_BARRIER, OPCODE_MAX, OPCODE_VECTOR_MAX, diff --git a/src/xenia/cpu/hir/opcodes.inl b/src/xenia/cpu/hir/opcodes.inl index 389570f50..9ee033aa5 100644 --- a/src/xenia/cpu/hir/opcodes.inl +++ b/src/xenia/cpu/hir/opcodes.inl @@ -262,8 +262,8 @@ DEFINE_OPCODE( OPCODE_FLAG_MEMORY) DEFINE_OPCODE( - OPCODE_PREFETCH, - "prefetch", + OPCODE_CACHE_CONTROL, + "cache_control", OPCODE_SIG_X_V_O, OPCODE_FLAG_MEMORY) diff --git a/src/xenia/cpu/ppc/ppc_emit_memory.cc b/src/xenia/cpu/ppc/ppc_emit_memory.cc index 79c951bcf..b24172579 100644 --- a/src/xenia/cpu/ppc/ppc_emit_memory.cc +++ b/src/xenia/cpu/ppc/ppc_emit_memory.cc @@ -1074,36 +1074,34 @@ int InstrEmit_stfsx(PPCHIRBuilder& f, const InstrData& i) { } // Cache management (A-27) +// dcbf, dcbst, dcbt, dcbtst work with 128-byte cache lines, not 32-byte cache +// blocks, on the Xenon: +// https://github.com/ValveSoftware/source-sdk-2013/blob/master/mp/src/mathlib/sseconst.cpp#L321 +// https://randomascii.wordpress.com/2018/01/07/finding-a-cpu-design-bug-in-the-xbox-360/ int InstrEmit_dcbf(PPCHIRBuilder& f, const InstrData& i) { - // No-op for now. - // TODO(benvanik): use prefetch - // XEINSTRNOTIMPLEMENTED(); - f.Nop(); + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); + f.CacheControl(ea, 128, + CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE_AND_FLUSH); return 0; } int InstrEmit_dcbst(PPCHIRBuilder& f, const InstrData& i) { - // No-op for now. - // TODO(benvanik): use prefetch - // XEINSTRNOTIMPLEMENTED(); - f.Nop(); + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); + f.CacheControl(ea, 128, CacheControlType::CACHE_CONTOROL_TYPE_DATA_STORE); return 0; } int InstrEmit_dcbt(PPCHIRBuilder& f, const InstrData& i) { - // No-op for now. - // TODO(benvanik): use prefetch - // XEINSTRNOTIMPLEMENTED(); - f.Nop(); + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); + f.CacheControl(ea, 128, CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH); return 0; } int InstrEmit_dcbtst(PPCHIRBuilder& f, const InstrData& i) { - // No-op for now. - // TODO(benvanik): use prefetch - // XEINSTRNOTIMPLEMENTED(); - f.Nop(); + Value* ea = CalculateEA_0(f, i.X.RA, i.X.RB); + f.CacheControl(ea, 128, + CacheControlType::CACHE_CONTOROL_TYPE_DATA_TOUCH_FOR_STORE); return 0; }