Removing dirty page table hack.

This commit is contained in:
Ben Vanik 2014-12-19 17:29:27 -08:00
parent 71fab4bbb7
commit 756cfbb484
11 changed files with 5 additions and 113 deletions

View File

@ -120,7 +120,6 @@ int IVMFunction::CallImpl(ThreadState* thread_state, uint64_t return_address) {
ics.locals = local_stack; ics.locals = local_stack;
ics.context = (uint8_t*)thread_state->raw_context(); ics.context = (uint8_t*)thread_state->raw_context();
ics.membase = memory->membase(); ics.membase = memory->membase();
ics.page_table = ics.membase + memory->page_table();
ics.did_carry = 0; ics.did_carry = 0;
ics.did_saturate = 0; ics.did_saturate = 0;
ics.thread_state = thread_state; ics.thread_state = thread_state;

View File

@ -1467,12 +1467,6 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) {
return DispatchToC(ctx, i, fns[i->dest->type]); return DispatchToC(ctx, i, fns[i->dest->type]);
} }
void MarkPageDirty(IntCodeState& ics, uint32_t address) {
// 16KB pages.
if (ics.page_table) {
ics.page_table[(address >> 14) & 0x7FFF] = 1;
}
}
uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) {
uint32_t address = ics.rf[i->src1_reg].u32; uint32_t address = ics.rf[i->src1_reg].u32;
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) { if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
@ -1483,7 +1477,6 @@ uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) {
ics.rf[i->src2_reg].u8); ics.rf[i->src2_reg].u8);
DFLUSH(); DFLUSH();
*((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8; *((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) {
@ -1497,7 +1490,6 @@ uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) {
ics.rf[i->src2_reg].u16); ics.rf[i->src2_reg].u16);
DFLUSH(); DFLUSH();
*((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16; *((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) {
@ -1511,7 +1503,6 @@ uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->src2_reg].u32); ics.rf[i->src2_reg].u32);
DFLUSH(); DFLUSH();
*((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32; *((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) {
@ -1525,7 +1516,6 @@ uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->src2_reg].u64); ics.rf[i->src2_reg].u64);
DFLUSH(); DFLUSH();
*((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64; *((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) {
@ -1534,7 +1524,6 @@ uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) {
ics.rf[i->src2_reg].u32); ics.rf[i->src2_reg].u32);
DFLUSH(); DFLUSH();
*((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32; *((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) {
@ -1543,7 +1532,6 @@ uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) {
ics.rf[i->src2_reg].u64); ics.rf[i->src2_reg].u64);
DFLUSH(); DFLUSH();
*((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64; *((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) { uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) {
@ -1555,7 +1543,6 @@ uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) {
ics.rf[i->src2_reg].v128.uz, ics.rf[i->src2_reg].v128.uw); ics.rf[i->src2_reg].v128.uz, ics.rf[i->src2_reg].v128.uw);
DFLUSH(); DFLUSH();
*((vec128_t*)(ics.membase + address)) = ics.rf[i->src2_reg].v128; *((vec128_t*)(ics.membase + address)) = ics.rf[i->src2_reg].v128;
MarkPageDirty(ics, address);
return IA_NEXT; return IA_NEXT;
} }
int Translate_STORE(TranslationContext& ctx, Instr* i) { int Translate_STORE(TranslationContext& ctx, Instr* i) {

View File

@ -42,7 +42,6 @@ typedef struct {
uint8_t* locals; uint8_t* locals;
uint8_t* context; uint8_t* context;
uint8_t* membase; uint8_t* membase;
uint8_t* page_table;
int8_t did_carry; int8_t did_carry;
int8_t did_saturate; int8_t did_saturate;
runtime::ThreadState* thread_state; runtime::ThreadState* thread_state;

View File

@ -749,11 +749,6 @@ void X64Emitter::StoreEflags() {
#endif // STORE_EFLAGS #endif // STORE_EFLAGS
} }
uint32_t X64Emitter::page_table_address() const {
uint64_t addr = runtime_->memory()->page_table();
return static_cast<uint32_t>(addr);
}
bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) { bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) {
if ((v & ~0x7FFFFFFF) == 0) { if ((v & ~0x7FFFFFFF) == 0) {
// Fits under 31 bits, so just load using normal mov. // Fits under 31 bits, so just load using normal mov.

View File

@ -163,8 +163,6 @@ class X64Emitter : public Xbyak::CodeGenerator {
void LoadEflags(); void LoadEflags();
void StoreEflags(); void StoreEflags();
uint32_t page_table_address() const;
// Moves a 64bit immediate into memory. // Moves a 64bit immediate into memory.
bool ConstantFitsIn32Reg(uint64_t v); bool ConstantFitsIn32Reg(uint64_t v);
void MovMem64(const Xbyak::RegExp& addr, uint64_t v); void MovMem64(const Xbyak::RegExp& addr, uint64_t v);

View File

@ -1539,15 +1539,6 @@ EMITTER_OPCODE_TABLE(
// OPCODE_STORE // OPCODE_STORE
// ============================================================================ // ============================================================================
// Note: most *should* be aligned, but needs to be checked! // Note: most *should* be aligned, but needs to be checked!
void EmitMarkPageDirty(X64Emitter& e, RegExp& addr) {
// 16KB pages.
auto page_table_address = e.page_table_address();
if (page_table_address) {
e.shr(e.eax, 14);
e.and(e.eax, 0x7FFF);
e.mov(e.byte[e.rdx + e.rax + page_table_address], 1);
}
}
EMITTER(STORE_I8, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I8<>>)) { EMITTER(STORE_I8, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I8<>>)) {
static void Emit(X64Emitter& e, const EmitArgType& i) { static void Emit(X64Emitter& e, const EmitArgType& i) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
@ -1556,7 +1547,6 @@ EMITTER(STORE_I8, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I8<>>)) {
} else { } else {
e.mov(e.byte[addr], i.src2); e.mov(e.byte[addr], i.src2);
} }
EmitMarkPageDirty(e, addr);
if (IsTracingData()) { if (IsTracingData()) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r8b, e.byte[addr]); e.mov(e.r8b, e.byte[addr]);
@ -1573,7 +1563,6 @@ EMITTER(STORE_I16, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I16<>>)) {
} else { } else {
e.mov(e.word[addr], i.src2); e.mov(e.word[addr], i.src2);
} }
EmitMarkPageDirty(e, addr);
if (IsTracingData()) { if (IsTracingData()) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r8w, e.word[addr]); e.mov(e.r8w, e.word[addr]);
@ -1590,7 +1579,6 @@ EMITTER(STORE_I32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I32<>>)) {
} else { } else {
e.mov(e.dword[addr], i.src2); e.mov(e.dword[addr], i.src2);
} }
EmitMarkPageDirty(e, addr);
if (IsTracingData()) { if (IsTracingData()) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r8d, e.dword[addr]); e.mov(e.r8d, e.dword[addr]);
@ -1607,7 +1595,6 @@ EMITTER(STORE_I64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I64<>>)) {
} else { } else {
e.mov(e.qword[addr], i.src2); e.mov(e.qword[addr], i.src2);
} }
EmitMarkPageDirty(e, addr);
if (IsTracingData()) { if (IsTracingData()) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.mov(e.r8, e.qword[addr]); e.mov(e.r8, e.qword[addr]);
@ -1624,7 +1611,6 @@ EMITTER(STORE_F32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F32<>>)) {
} else { } else {
e.vmovss(e.dword[addr], i.src2); e.vmovss(e.dword[addr], i.src2);
} }
EmitMarkPageDirty(e, addr);
if (IsTracingData()) { if (IsTracingData()) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.lea(e.r8, e.ptr[addr]); e.lea(e.r8, e.ptr[addr]);
@ -1641,7 +1627,6 @@ EMITTER(STORE_F64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F64<>>)) {
} else { } else {
e.vmovsd(e.qword[addr], i.src2); e.vmovsd(e.qword[addr], i.src2);
} }
EmitMarkPageDirty(e, addr);
if (IsTracingData()) { if (IsTracingData()) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.lea(e.r8, e.ptr[addr]); e.lea(e.r8, e.ptr[addr]);
@ -1659,7 +1644,6 @@ EMITTER(STORE_V128, MATCH(I<OPCODE_STORE, VoidOp, I64<>, V128<>>)) {
} else { } else {
e.vmovaps(e.ptr[addr], i.src2); e.vmovaps(e.ptr[addr], i.src2);
} }
EmitMarkPageDirty(e, addr);
if (IsTracingData()) { if (IsTracingData()) {
auto addr = ComputeMemoryAddress(e, i.src1); auto addr = ComputeMemoryAddress(e, i.src1);
e.lea(e.r8, e.ptr[addr]); e.lea(e.r8, e.ptr[addr]);

View File

@ -27,9 +27,6 @@ class Memory {
inline uint64_t* reserve_address() { return &reserve_address_; } inline uint64_t* reserve_address() { return &reserve_address_; }
inline uint64_t* reserve_value() { return &reserve_value_; } inline uint64_t* reserve_value() { return &reserve_value_; }
// TODO(benvanik): remove with GPU refactor.
virtual uint64_t page_table() const = 0;
uint64_t trace_base() const { return trace_base_; } uint64_t trace_base() const { return trace_base_; }
void set_trace_base(uint64_t value) { trace_base_ = value; } void set_trace_base(uint64_t value) { trace_base_ = value; }
@ -66,8 +63,6 @@ class SimpleMemory : public Memory {
SimpleMemory(size_t capacity); SimpleMemory(size_t capacity);
~SimpleMemory() override; ~SimpleMemory() override;
uint64_t page_table() const override { return 0; }
// TODO(benvanik): remove with IVM. // TODO(benvanik): remove with IVM.
uint8_t LoadI8(uint64_t address) override; uint8_t LoadI8(uint64_t address) override;
uint16_t LoadI16(uint64_t address) override; uint16_t LoadI16(uint64_t address) override;

View File

@ -76,7 +76,7 @@ public:
} }
virtual bool Equals(const void* info_ptr, size_t info_length) = 0; virtual bool Equals(const void* info_ptr, size_t info_length) = 0;
bool is_dirty() const { return dirtied_; } bool is_dirty() const { return true; }
void MarkDirty(uint32_t lo_address, uint32_t hi_address); void MarkDirty(uint32_t lo_address, uint32_t hi_address);
protected: protected:

View File

@ -114,60 +114,7 @@ uint64_t ResourceCache::HashRange(const MemoryRange& memory_range) {
void ResourceCache::SyncRange(uint32_t address, int length) { void ResourceCache::SyncRange(uint32_t address, int length) {
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): something interesting?
// Scan the page table in sync with our resource list. This means //uint32_t lo_address = address % 0x20000000;
// we have O(n) complexity for updates, though we could definitely //uint32_t hi_address = lo_address + length;
// make this faster/cleaner.
// TODO(benvanik): actually do this right.
// For now we assume the page table in the range of our resources
// will not be changing, which allows us to do a foreach(res) and reload
// and then clear the table.
// total bytes = (512 * 1024 * 1024) / (16 * 1024) = 32768
// each byte = 1 page
// Walk as qwords so we can clear things up faster.
uint64_t* page_table = reinterpret_cast<uint64_t*>(
memory_->Translate(memory_->page_table()));
uint32_t page_size = 16 * 1024; // 16KB pages
uint32_t lo_address = address % 0x20000000;
uint32_t hi_address = lo_address + length;
hi_address = (hi_address / page_size) * page_size + page_size;
int start_page = lo_address / page_size;
int end_page = hi_address / page_size;
{
SCOPE_profile_cpu_i("gpu", "SyncRange:mark");
auto it = lo_address > page_size ?
paged_resources_.upper_bound(lo_address - page_size) :
paged_resources_.begin();
auto end_it = paged_resources_.lower_bound(hi_address + page_size);
while (it != end_it) {
const auto& memory_range = it->second->memory_range();
int lo_page = (memory_range.guest_base % 0x20000000) / page_size;
int hi_page = lo_page + (memory_range.length / page_size);
lo_page = std::max(lo_page, start_page);
hi_page = std::min(hi_page, end_page);
if (lo_page > hi_page) {
++it;
continue;
}
for (int i = lo_page / 8; i <= hi_page / 8; ++i) {
uint64_t page_flags = page_table[i];
if (page_flags) {
// Dirty!
it->second->MarkDirty(i * 8 * page_size, (i * 8 + 7) * page_size);
}
}
++it;
}
}
// Reset page table.
{
SCOPE_profile_cpu_i("gpu", "SyncRange:reset");
for (auto i = start_page / 8; i <= end_page / 8; ++i) {
page_table[i] = 0;
}
}
} }

View File

@ -114,8 +114,7 @@ class xe::MemoryHeap {
}; };
uint32_t MemoryHeap::next_heap_id_ = 1; uint32_t MemoryHeap::next_heap_id_ = 1;
Memory::Memory() Memory::Memory() : mapping_(0), mapping_base_(nullptr) {
: alloy::Memory(), mapping_(0), mapping_base_(0), page_table_(0) {
virtual_heap_ = new MemoryHeap(this, false); virtual_heap_ = new MemoryHeap(this, false);
physical_heap_ = new MemoryHeap(this, true); physical_heap_ = new MemoryHeap(this, true);
} }
@ -202,12 +201,6 @@ int Memory::Initialize() {
return 1; return 1;
} }
// Allocate dirty page table.
// This must live within our low heap. Ideally we'd hardcode the address but
// this is more flexible.
page_table_ = physical_heap_->Alloc(0, (512 * 1024 * 1024) / (16 * 1024),
X_MEM_COMMIT, 16 * 1024);
return 0; return 0;
} }

View File

@ -49,9 +49,6 @@ class Memory : public alloy::Memory {
int Initialize() override; int Initialize() override;
// TODO(benvanik): remove with GPU refactor.
uint64_t page_table() const override { return page_table_; }
bool AddMappedRange(uint64_t address, uint64_t mask, uint64_t size, bool AddMappedRange(uint64_t address, uint64_t mask, uint64_t size,
void* context, cpu::MMIOReadCallback read_callback, void* context, cpu::MMIOReadCallback read_callback,
cpu::MMIOWriteCallback write_callback); cpu::MMIOWriteCallback write_callback);
@ -102,8 +99,6 @@ class Memory : public alloy::Memory {
MemoryHeap* virtual_heap_; MemoryHeap* virtual_heap_;
MemoryHeap* physical_heap_; MemoryHeap* physical_heap_;
uint64_t page_table_;
friend class MemoryHeap; friend class MemoryHeap;
}; };