From 756cfbb4845867eb05f38b19c7a1933fdb043be9 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Fri, 19 Dec 2014 17:29:27 -0800 Subject: [PATCH] Removing dirty page table hack. --- src/alloy/backend/ivm/ivm_function.cc | 1 - src/alloy/backend/ivm/ivm_intcode.cc | 13 ------ src/alloy/backend/ivm/ivm_intcode.h | 1 - src/alloy/backend/x64/x64_emitter.cc | 5 --- src/alloy/backend/x64/x64_emitter.h | 2 - src/alloy/backend/x64/x64_sequences.cc | 16 ------- src/alloy/memory.h | 5 --- src/xenia/gpu/resource.h | 2 +- src/xenia/gpu/resource_cache.cc | 59 ++------------------------ src/xenia/memory.cc | 9 +--- src/xenia/memory.h | 5 --- 11 files changed, 5 insertions(+), 113 deletions(-) diff --git a/src/alloy/backend/ivm/ivm_function.cc b/src/alloy/backend/ivm/ivm_function.cc index aab30ce7b..fa99c5a11 100644 --- a/src/alloy/backend/ivm/ivm_function.cc +++ b/src/alloy/backend/ivm/ivm_function.cc @@ -120,7 +120,6 @@ int IVMFunction::CallImpl(ThreadState* thread_state, uint64_t return_address) { ics.locals = local_stack; ics.context = (uint8_t*)thread_state->raw_context(); ics.membase = memory->membase(); - ics.page_table = ics.membase + memory->page_table(); ics.did_carry = 0; ics.did_saturate = 0; ics.thread_state = thread_state; diff --git a/src/alloy/backend/ivm/ivm_intcode.cc b/src/alloy/backend/ivm/ivm_intcode.cc index 13e5012b2..7da3f960c 100644 --- a/src/alloy/backend/ivm/ivm_intcode.cc +++ b/src/alloy/backend/ivm/ivm_intcode.cc @@ -1467,12 +1467,6 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) { return DispatchToC(ctx, i, fns[i->dest->type]); } -void MarkPageDirty(IntCodeState& ics, uint32_t address) { - // 16KB pages. - if (ics.page_table) { - ics.page_table[(address >> 14) & 0x7FFF] = 1; - } -} uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { uint32_t address = ics.rf[i->src1_reg].u32; if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { @@ -1483,7 +1477,6 @@ uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { ics.rf[i->src2_reg].u8); DFLUSH(); *((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8; - MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) { @@ -1497,7 +1490,6 @@ uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) { ics.rf[i->src2_reg].u16); DFLUSH(); *((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16; - MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) { @@ -1511,7 +1503,6 @@ uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) { ics.rf[i->src2_reg].u32); DFLUSH(); *((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32; - MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) { @@ -1525,7 +1516,6 @@ uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) { ics.rf[i->src2_reg].u64); DFLUSH(); *((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64; - MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) { @@ -1534,7 +1524,6 @@ uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) { ics.rf[i->src2_reg].u32); DFLUSH(); *((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32; - MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) { @@ -1543,7 +1532,6 @@ uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) { ics.rf[i->src2_reg].u64); DFLUSH(); *((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64; - MarkPageDirty(ics, address); return IA_NEXT; } uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) { @@ -1555,7 +1543,6 @@ uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) { ics.rf[i->src2_reg].v128.uz, ics.rf[i->src2_reg].v128.uw); DFLUSH(); *((vec128_t*)(ics.membase + address)) = ics.rf[i->src2_reg].v128; - MarkPageDirty(ics, address); return IA_NEXT; } int Translate_STORE(TranslationContext& ctx, Instr* i) { diff --git a/src/alloy/backend/ivm/ivm_intcode.h b/src/alloy/backend/ivm/ivm_intcode.h index 025f533bc..6a0a6f8f5 100644 --- a/src/alloy/backend/ivm/ivm_intcode.h +++ b/src/alloy/backend/ivm/ivm_intcode.h @@ -42,7 +42,6 @@ typedef struct { uint8_t* locals; uint8_t* context; uint8_t* membase; - uint8_t* page_table; int8_t did_carry; int8_t did_saturate; runtime::ThreadState* thread_state; diff --git a/src/alloy/backend/x64/x64_emitter.cc b/src/alloy/backend/x64/x64_emitter.cc index 5f0597f32..c26aa59a1 100644 --- a/src/alloy/backend/x64/x64_emitter.cc +++ b/src/alloy/backend/x64/x64_emitter.cc @@ -749,11 +749,6 @@ void X64Emitter::StoreEflags() { #endif // STORE_EFLAGS } -uint32_t X64Emitter::page_table_address() const { - uint64_t addr = runtime_->memory()->page_table(); - return static_cast(addr); -} - bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) { if ((v & ~0x7FFFFFFF) == 0) { // Fits under 31 bits, so just load using normal mov. diff --git a/src/alloy/backend/x64/x64_emitter.h b/src/alloy/backend/x64/x64_emitter.h index c178cff0b..b54bc8267 100644 --- a/src/alloy/backend/x64/x64_emitter.h +++ b/src/alloy/backend/x64/x64_emitter.h @@ -163,8 +163,6 @@ class X64Emitter : public Xbyak::CodeGenerator { void LoadEflags(); void StoreEflags(); - uint32_t page_table_address() const; - // Moves a 64bit immediate into memory. bool ConstantFitsIn32Reg(uint64_t v); void MovMem64(const Xbyak::RegExp& addr, uint64_t v); diff --git a/src/alloy/backend/x64/x64_sequences.cc b/src/alloy/backend/x64/x64_sequences.cc index b927a5258..f11d26d7e 100644 --- a/src/alloy/backend/x64/x64_sequences.cc +++ b/src/alloy/backend/x64/x64_sequences.cc @@ -1539,15 +1539,6 @@ EMITTER_OPCODE_TABLE( // OPCODE_STORE // ============================================================================ // Note: most *should* be aligned, but needs to be checked! -void EmitMarkPageDirty(X64Emitter& e, RegExp& addr) { - // 16KB pages. - auto page_table_address = e.page_table_address(); - if (page_table_address) { - e.shr(e.eax, 14); - e.and(e.eax, 0x7FFF); - e.mov(e.byte[e.rdx + e.rax + page_table_address], 1); - } -} EMITTER(STORE_I8, MATCH(I, I8<>>)) { static void Emit(X64Emitter& e, const EmitArgType& i) { auto addr = ComputeMemoryAddress(e, i.src1); @@ -1556,7 +1547,6 @@ EMITTER(STORE_I8, MATCH(I, I8<>>)) { } else { e.mov(e.byte[addr], i.src2); } - EmitMarkPageDirty(e, addr); if (IsTracingData()) { auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8b, e.byte[addr]); @@ -1573,7 +1563,6 @@ EMITTER(STORE_I16, MATCH(I, I16<>>)) { } else { e.mov(e.word[addr], i.src2); } - EmitMarkPageDirty(e, addr); if (IsTracingData()) { auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8w, e.word[addr]); @@ -1590,7 +1579,6 @@ EMITTER(STORE_I32, MATCH(I, I32<>>)) { } else { e.mov(e.dword[addr], i.src2); } - EmitMarkPageDirty(e, addr); if (IsTracingData()) { auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8d, e.dword[addr]); @@ -1607,7 +1595,6 @@ EMITTER(STORE_I64, MATCH(I, I64<>>)) { } else { e.mov(e.qword[addr], i.src2); } - EmitMarkPageDirty(e, addr); if (IsTracingData()) { auto addr = ComputeMemoryAddress(e, i.src1); e.mov(e.r8, e.qword[addr]); @@ -1624,7 +1611,6 @@ EMITTER(STORE_F32, MATCH(I, F32<>>)) { } else { e.vmovss(e.dword[addr], i.src2); } - EmitMarkPageDirty(e, addr); if (IsTracingData()) { auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); @@ -1641,7 +1627,6 @@ EMITTER(STORE_F64, MATCH(I, F64<>>)) { } else { e.vmovsd(e.qword[addr], i.src2); } - EmitMarkPageDirty(e, addr); if (IsTracingData()) { auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); @@ -1659,7 +1644,6 @@ EMITTER(STORE_V128, MATCH(I, V128<>>)) { } else { e.vmovaps(e.ptr[addr], i.src2); } - EmitMarkPageDirty(e, addr); if (IsTracingData()) { auto addr = ComputeMemoryAddress(e, i.src1); e.lea(e.r8, e.ptr[addr]); diff --git a/src/alloy/memory.h b/src/alloy/memory.h index 3b3b7249e..3d858eab9 100644 --- a/src/alloy/memory.h +++ b/src/alloy/memory.h @@ -27,9 +27,6 @@ class Memory { inline uint64_t* reserve_address() { return &reserve_address_; } inline uint64_t* reserve_value() { return &reserve_value_; } - // TODO(benvanik): remove with GPU refactor. - virtual uint64_t page_table() const = 0; - uint64_t trace_base() const { return trace_base_; } void set_trace_base(uint64_t value) { trace_base_ = value; } @@ -66,8 +63,6 @@ class SimpleMemory : public Memory { SimpleMemory(size_t capacity); ~SimpleMemory() override; - uint64_t page_table() const override { return 0; } - // TODO(benvanik): remove with IVM. uint8_t LoadI8(uint64_t address) override; uint16_t LoadI16(uint64_t address) override; diff --git a/src/xenia/gpu/resource.h b/src/xenia/gpu/resource.h index 1fb56b3d8..98bcb9245 100644 --- a/src/xenia/gpu/resource.h +++ b/src/xenia/gpu/resource.h @@ -76,7 +76,7 @@ public: } virtual bool Equals(const void* info_ptr, size_t info_length) = 0; - bool is_dirty() const { return dirtied_; } + bool is_dirty() const { return true; } void MarkDirty(uint32_t lo_address, uint32_t hi_address); protected: diff --git a/src/xenia/gpu/resource_cache.cc b/src/xenia/gpu/resource_cache.cc index 3288125ef..10d6203d4 100644 --- a/src/xenia/gpu/resource_cache.cc +++ b/src/xenia/gpu/resource_cache.cc @@ -114,60 +114,7 @@ uint64_t ResourceCache::HashRange(const MemoryRange& memory_range) { void ResourceCache::SyncRange(uint32_t address, int length) { SCOPE_profile_cpu_f("gpu"); - - // Scan the page table in sync with our resource list. This means - // we have O(n) complexity for updates, though we could definitely - // make this faster/cleaner. - // TODO(benvanik): actually do this right. - // For now we assume the page table in the range of our resources - // will not be changing, which allows us to do a foreach(res) and reload - // and then clear the table. - - // total bytes = (512 * 1024 * 1024) / (16 * 1024) = 32768 - // each byte = 1 page - // Walk as qwords so we can clear things up faster. - uint64_t* page_table = reinterpret_cast( - memory_->Translate(memory_->page_table())); - uint32_t page_size = 16 * 1024; // 16KB pages - - uint32_t lo_address = address % 0x20000000; - uint32_t hi_address = lo_address + length; - hi_address = (hi_address / page_size) * page_size + page_size; - int start_page = lo_address / page_size; - int end_page = hi_address / page_size; - - { - SCOPE_profile_cpu_i("gpu", "SyncRange:mark"); - auto it = lo_address > page_size ? - paged_resources_.upper_bound(lo_address - page_size) : - paged_resources_.begin(); - auto end_it = paged_resources_.lower_bound(hi_address + page_size); - while (it != end_it) { - const auto& memory_range = it->second->memory_range(); - int lo_page = (memory_range.guest_base % 0x20000000) / page_size; - int hi_page = lo_page + (memory_range.length / page_size); - lo_page = std::max(lo_page, start_page); - hi_page = std::min(hi_page, end_page); - if (lo_page > hi_page) { - ++it; - continue; - } - for (int i = lo_page / 8; i <= hi_page / 8; ++i) { - uint64_t page_flags = page_table[i]; - if (page_flags) { - // Dirty! - it->second->MarkDirty(i * 8 * page_size, (i * 8 + 7) * page_size); - } - } - ++it; - } - } - - // Reset page table. - { - SCOPE_profile_cpu_i("gpu", "SyncRange:reset"); - for (auto i = start_page / 8; i <= end_page / 8; ++i) { - page_table[i] = 0; - } - } + // TODO(benvanik): something interesting? + //uint32_t lo_address = address % 0x20000000; + //uint32_t hi_address = lo_address + length; } diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index cdde5c9c0..a3d90a7ac 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -114,8 +114,7 @@ class xe::MemoryHeap { }; uint32_t MemoryHeap::next_heap_id_ = 1; -Memory::Memory() - : alloy::Memory(), mapping_(0), mapping_base_(0), page_table_(0) { +Memory::Memory() : mapping_(0), mapping_base_(nullptr) { virtual_heap_ = new MemoryHeap(this, false); physical_heap_ = new MemoryHeap(this, true); } @@ -202,12 +201,6 @@ int Memory::Initialize() { return 1; } - // Allocate dirty page table. - // This must live within our low heap. Ideally we'd hardcode the address but - // this is more flexible. - page_table_ = physical_heap_->Alloc(0, (512 * 1024 * 1024) / (16 * 1024), - X_MEM_COMMIT, 16 * 1024); - return 0; } diff --git a/src/xenia/memory.h b/src/xenia/memory.h index 5b4e41bcf..3745ca856 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -49,9 +49,6 @@ class Memory : public alloy::Memory { int Initialize() override; - // TODO(benvanik): remove with GPU refactor. - uint64_t page_table() const override { return page_table_; } - bool AddMappedRange(uint64_t address, uint64_t mask, uint64_t size, void* context, cpu::MMIOReadCallback read_callback, cpu::MMIOWriteCallback write_callback); @@ -102,8 +99,6 @@ class Memory : public alloy::Memory { MemoryHeap* virtual_heap_; MemoryHeap* physical_heap_; - uint64_t page_table_; - friend class MemoryHeap; };