Removing dirty page table hack.
This commit is contained in:
parent
71fab4bbb7
commit
756cfbb484
|
@ -120,7 +120,6 @@ int IVMFunction::CallImpl(ThreadState* thread_state, uint64_t return_address) {
|
|||
ics.locals = local_stack;
|
||||
ics.context = (uint8_t*)thread_state->raw_context();
|
||||
ics.membase = memory->membase();
|
||||
ics.page_table = ics.membase + memory->page_table();
|
||||
ics.did_carry = 0;
|
||||
ics.did_saturate = 0;
|
||||
ics.thread_state = thread_state;
|
||||
|
|
|
@ -1467,12 +1467,6 @@ int Translate_LOAD(TranslationContext& ctx, Instr* i) {
|
|||
return DispatchToC(ctx, i, fns[i->dest->type]);
|
||||
}
|
||||
|
||||
void MarkPageDirty(IntCodeState& ics, uint32_t address) {
|
||||
// 16KB pages.
|
||||
if (ics.page_table) {
|
||||
ics.page_table[(address >> 14) & 0x7FFF] = 1;
|
||||
}
|
||||
}
|
||||
uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) {
|
||||
uint32_t address = ics.rf[i->src1_reg].u32;
|
||||
if (DYNAMIC_REGISTER_ACCESS_CHECK(address)) {
|
||||
|
@ -1483,7 +1477,6 @@ uint32_t IntCode_STORE_I8(IntCodeState& ics, const IntCode* i) {
|
|||
ics.rf[i->src2_reg].u8);
|
||||
DFLUSH();
|
||||
*((int8_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i8;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) {
|
||||
|
@ -1497,7 +1490,6 @@ uint32_t IntCode_STORE_I16(IntCodeState& ics, const IntCode* i) {
|
|||
ics.rf[i->src2_reg].u16);
|
||||
DFLUSH();
|
||||
*((int16_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i16;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) {
|
||||
|
@ -1511,7 +1503,6 @@ uint32_t IntCode_STORE_I32(IntCodeState& ics, const IntCode* i) {
|
|||
ics.rf[i->src2_reg].u32);
|
||||
DFLUSH();
|
||||
*((int32_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i32;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) {
|
||||
|
@ -1525,7 +1516,6 @@ uint32_t IntCode_STORE_I64(IntCodeState& ics, const IntCode* i) {
|
|||
ics.rf[i->src2_reg].u64);
|
||||
DFLUSH();
|
||||
*((int64_t*)(ics.membase + address)) = ics.rf[i->src2_reg].i64;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) {
|
||||
|
@ -1534,7 +1524,6 @@ uint32_t IntCode_STORE_F32(IntCodeState& ics, const IntCode* i) {
|
|||
ics.rf[i->src2_reg].u32);
|
||||
DFLUSH();
|
||||
*((float*)(ics.membase + address)) = ics.rf[i->src2_reg].f32;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) {
|
||||
|
@ -1543,7 +1532,6 @@ uint32_t IntCode_STORE_F64(IntCodeState& ics, const IntCode* i) {
|
|||
ics.rf[i->src2_reg].u64);
|
||||
DFLUSH();
|
||||
*((double*)(ics.membase + address)) = ics.rf[i->src2_reg].f64;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) {
|
||||
|
@ -1555,7 +1543,6 @@ uint32_t IntCode_STORE_V128(IntCodeState& ics, const IntCode* i) {
|
|||
ics.rf[i->src2_reg].v128.uz, ics.rf[i->src2_reg].v128.uw);
|
||||
DFLUSH();
|
||||
*((vec128_t*)(ics.membase + address)) = ics.rf[i->src2_reg].v128;
|
||||
MarkPageDirty(ics, address);
|
||||
return IA_NEXT;
|
||||
}
|
||||
int Translate_STORE(TranslationContext& ctx, Instr* i) {
|
||||
|
|
|
@ -42,7 +42,6 @@ typedef struct {
|
|||
uint8_t* locals;
|
||||
uint8_t* context;
|
||||
uint8_t* membase;
|
||||
uint8_t* page_table;
|
||||
int8_t did_carry;
|
||||
int8_t did_saturate;
|
||||
runtime::ThreadState* thread_state;
|
||||
|
|
|
@ -749,11 +749,6 @@ void X64Emitter::StoreEflags() {
|
|||
#endif // STORE_EFLAGS
|
||||
}
|
||||
|
||||
uint32_t X64Emitter::page_table_address() const {
|
||||
uint64_t addr = runtime_->memory()->page_table();
|
||||
return static_cast<uint32_t>(addr);
|
||||
}
|
||||
|
||||
bool X64Emitter::ConstantFitsIn32Reg(uint64_t v) {
|
||||
if ((v & ~0x7FFFFFFF) == 0) {
|
||||
// Fits under 31 bits, so just load using normal mov.
|
||||
|
|
|
@ -163,8 +163,6 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
|||
void LoadEflags();
|
||||
void StoreEflags();
|
||||
|
||||
uint32_t page_table_address() const;
|
||||
|
||||
// Moves a 64bit immediate into memory.
|
||||
bool ConstantFitsIn32Reg(uint64_t v);
|
||||
void MovMem64(const Xbyak::RegExp& addr, uint64_t v);
|
||||
|
|
|
@ -1539,15 +1539,6 @@ EMITTER_OPCODE_TABLE(
|
|||
// OPCODE_STORE
|
||||
// ============================================================================
|
||||
// Note: most *should* be aligned, but needs to be checked!
|
||||
void EmitMarkPageDirty(X64Emitter& e, RegExp& addr) {
|
||||
// 16KB pages.
|
||||
auto page_table_address = e.page_table_address();
|
||||
if (page_table_address) {
|
||||
e.shr(e.eax, 14);
|
||||
e.and(e.eax, 0x7FFF);
|
||||
e.mov(e.byte[e.rdx + e.rax + page_table_address], 1);
|
||||
}
|
||||
}
|
||||
EMITTER(STORE_I8, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I8<>>)) {
|
||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||
|
@ -1556,7 +1547,6 @@ EMITTER(STORE_I8, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I8<>>)) {
|
|||
} else {
|
||||
e.mov(e.byte[addr], i.src2);
|
||||
}
|
||||
EmitMarkPageDirty(e, addr);
|
||||
if (IsTracingData()) {
|
||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||
e.mov(e.r8b, e.byte[addr]);
|
||||
|
@ -1573,7 +1563,6 @@ EMITTER(STORE_I16, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I16<>>)) {
|
|||
} else {
|
||||
e.mov(e.word[addr], i.src2);
|
||||
}
|
||||
EmitMarkPageDirty(e, addr);
|
||||
if (IsTracingData()) {
|
||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||
e.mov(e.r8w, e.word[addr]);
|
||||
|
@ -1590,7 +1579,6 @@ EMITTER(STORE_I32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I32<>>)) {
|
|||
} else {
|
||||
e.mov(e.dword[addr], i.src2);
|
||||
}
|
||||
EmitMarkPageDirty(e, addr);
|
||||
if (IsTracingData()) {
|
||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||
e.mov(e.r8d, e.dword[addr]);
|
||||
|
@ -1607,7 +1595,6 @@ EMITTER(STORE_I64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, I64<>>)) {
|
|||
} else {
|
||||
e.mov(e.qword[addr], i.src2);
|
||||
}
|
||||
EmitMarkPageDirty(e, addr);
|
||||
if (IsTracingData()) {
|
||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||
e.mov(e.r8, e.qword[addr]);
|
||||
|
@ -1624,7 +1611,6 @@ EMITTER(STORE_F32, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F32<>>)) {
|
|||
} else {
|
||||
e.vmovss(e.dword[addr], i.src2);
|
||||
}
|
||||
EmitMarkPageDirty(e, addr);
|
||||
if (IsTracingData()) {
|
||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||
e.lea(e.r8, e.ptr[addr]);
|
||||
|
@ -1641,7 +1627,6 @@ EMITTER(STORE_F64, MATCH(I<OPCODE_STORE, VoidOp, I64<>, F64<>>)) {
|
|||
} else {
|
||||
e.vmovsd(e.qword[addr], i.src2);
|
||||
}
|
||||
EmitMarkPageDirty(e, addr);
|
||||
if (IsTracingData()) {
|
||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||
e.lea(e.r8, e.ptr[addr]);
|
||||
|
@ -1659,7 +1644,6 @@ EMITTER(STORE_V128, MATCH(I<OPCODE_STORE, VoidOp, I64<>, V128<>>)) {
|
|||
} else {
|
||||
e.vmovaps(e.ptr[addr], i.src2);
|
||||
}
|
||||
EmitMarkPageDirty(e, addr);
|
||||
if (IsTracingData()) {
|
||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||
e.lea(e.r8, e.ptr[addr]);
|
||||
|
|
|
@ -27,9 +27,6 @@ class Memory {
|
|||
inline uint64_t* reserve_address() { return &reserve_address_; }
|
||||
inline uint64_t* reserve_value() { return &reserve_value_; }
|
||||
|
||||
// TODO(benvanik): remove with GPU refactor.
|
||||
virtual uint64_t page_table() const = 0;
|
||||
|
||||
uint64_t trace_base() const { return trace_base_; }
|
||||
void set_trace_base(uint64_t value) { trace_base_ = value; }
|
||||
|
||||
|
@ -66,8 +63,6 @@ class SimpleMemory : public Memory {
|
|||
SimpleMemory(size_t capacity);
|
||||
~SimpleMemory() override;
|
||||
|
||||
uint64_t page_table() const override { return 0; }
|
||||
|
||||
// TODO(benvanik): remove with IVM.
|
||||
uint8_t LoadI8(uint64_t address) override;
|
||||
uint16_t LoadI16(uint64_t address) override;
|
||||
|
|
|
@ -76,7 +76,7 @@ public:
|
|||
}
|
||||
virtual bool Equals(const void* info_ptr, size_t info_length) = 0;
|
||||
|
||||
bool is_dirty() const { return dirtied_; }
|
||||
bool is_dirty() const { return true; }
|
||||
void MarkDirty(uint32_t lo_address, uint32_t hi_address);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -114,60 +114,7 @@ uint64_t ResourceCache::HashRange(const MemoryRange& memory_range) {
|
|||
|
||||
void ResourceCache::SyncRange(uint32_t address, int length) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Scan the page table in sync with our resource list. This means
|
||||
// we have O(n) complexity for updates, though we could definitely
|
||||
// make this faster/cleaner.
|
||||
// TODO(benvanik): actually do this right.
|
||||
// For now we assume the page table in the range of our resources
|
||||
// will not be changing, which allows us to do a foreach(res) and reload
|
||||
// and then clear the table.
|
||||
|
||||
// total bytes = (512 * 1024 * 1024) / (16 * 1024) = 32768
|
||||
// each byte = 1 page
|
||||
// Walk as qwords so we can clear things up faster.
|
||||
uint64_t* page_table = reinterpret_cast<uint64_t*>(
|
||||
memory_->Translate(memory_->page_table()));
|
||||
uint32_t page_size = 16 * 1024; // 16KB pages
|
||||
|
||||
uint32_t lo_address = address % 0x20000000;
|
||||
uint32_t hi_address = lo_address + length;
|
||||
hi_address = (hi_address / page_size) * page_size + page_size;
|
||||
int start_page = lo_address / page_size;
|
||||
int end_page = hi_address / page_size;
|
||||
|
||||
{
|
||||
SCOPE_profile_cpu_i("gpu", "SyncRange:mark");
|
||||
auto it = lo_address > page_size ?
|
||||
paged_resources_.upper_bound(lo_address - page_size) :
|
||||
paged_resources_.begin();
|
||||
auto end_it = paged_resources_.lower_bound(hi_address + page_size);
|
||||
while (it != end_it) {
|
||||
const auto& memory_range = it->second->memory_range();
|
||||
int lo_page = (memory_range.guest_base % 0x20000000) / page_size;
|
||||
int hi_page = lo_page + (memory_range.length / page_size);
|
||||
lo_page = std::max(lo_page, start_page);
|
||||
hi_page = std::min(hi_page, end_page);
|
||||
if (lo_page > hi_page) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
for (int i = lo_page / 8; i <= hi_page / 8; ++i) {
|
||||
uint64_t page_flags = page_table[i];
|
||||
if (page_flags) {
|
||||
// Dirty!
|
||||
it->second->MarkDirty(i * 8 * page_size, (i * 8 + 7) * page_size);
|
||||
}
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
// Reset page table.
|
||||
{
|
||||
SCOPE_profile_cpu_i("gpu", "SyncRange:reset");
|
||||
for (auto i = start_page / 8; i <= end_page / 8; ++i) {
|
||||
page_table[i] = 0;
|
||||
}
|
||||
}
|
||||
// TODO(benvanik): something interesting?
|
||||
//uint32_t lo_address = address % 0x20000000;
|
||||
//uint32_t hi_address = lo_address + length;
|
||||
}
|
||||
|
|
|
@ -114,8 +114,7 @@ class xe::MemoryHeap {
|
|||
};
|
||||
uint32_t MemoryHeap::next_heap_id_ = 1;
|
||||
|
||||
Memory::Memory()
|
||||
: alloy::Memory(), mapping_(0), mapping_base_(0), page_table_(0) {
|
||||
Memory::Memory() : mapping_(0), mapping_base_(nullptr) {
|
||||
virtual_heap_ = new MemoryHeap(this, false);
|
||||
physical_heap_ = new MemoryHeap(this, true);
|
||||
}
|
||||
|
@ -202,12 +201,6 @@ int Memory::Initialize() {
|
|||
return 1;
|
||||
}
|
||||
|
||||
// Allocate dirty page table.
|
||||
// This must live within our low heap. Ideally we'd hardcode the address but
|
||||
// this is more flexible.
|
||||
page_table_ = physical_heap_->Alloc(0, (512 * 1024 * 1024) / (16 * 1024),
|
||||
X_MEM_COMMIT, 16 * 1024);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -49,9 +49,6 @@ class Memory : public alloy::Memory {
|
|||
|
||||
int Initialize() override;
|
||||
|
||||
// TODO(benvanik): remove with GPU refactor.
|
||||
uint64_t page_table() const override { return page_table_; }
|
||||
|
||||
bool AddMappedRange(uint64_t address, uint64_t mask, uint64_t size,
|
||||
void* context, cpu::MMIOReadCallback read_callback,
|
||||
cpu::MMIOWriteCallback write_callback);
|
||||
|
@ -102,8 +99,6 @@ class Memory : public alloy::Memory {
|
|||
MemoryHeap* virtual_heap_;
|
||||
MemoryHeap* physical_heap_;
|
||||
|
||||
uint64_t page_table_;
|
||||
|
||||
friend class MemoryHeap;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue