Basic hacky write watching for texture invalidation. Doesn't scale.

This commit is contained in:
Ben Vanik 2015-01-04 16:28:42 -08:00
parent 55c4488ab2
commit 0529fdb84d
7 changed files with 207 additions and 41 deletions

View File

@ -78,6 +78,109 @@ bool MMIOHandler::CheckStore(uint64_t address, uint64_t value) {
return false;
}
uintptr_t MMIOHandler::AddWriteWatch(uint32_t guest_address, size_t length,
WriteWatchCallback callback,
void* callback_context,
void* callback_data) {
uint32_t base_address = guest_address;
if (base_address > 0xA0000000) {
base_address -= 0xA0000000;
}
// Add to table. The slot reservation may evict a previous watch, which
// could include our target, so we do it first.
auto entry = new WriteWatchEntry();
entry->address = base_address;
entry->length = uint32_t(length);
entry->callback = callback;
entry->callback_context = callback_context;
entry->callback_data = callback_data;
write_watch_mutex_.lock();
write_watches_.push_back(entry);
write_watch_mutex_.unlock();
// Make the desired range read only under all address spaces.
auto host_address = mapping_base_ + base_address;
DWORD old_protect;
VirtualProtect(host_address, length, PAGE_READONLY, &old_protect);
VirtualProtect(host_address + 0xA0000000, length, PAGE_READONLY,
&old_protect);
VirtualProtect(host_address + 0xC0000000, length, PAGE_READONLY,
&old_protect);
VirtualProtect(host_address + 0xE0000000, length, PAGE_READONLY,
&old_protect);
return reinterpret_cast<uintptr_t>(entry);
}
void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) {
auto host_address = mapping_base_ + entry->address;
DWORD old_protect;
VirtualProtect(host_address, entry->length, PAGE_READWRITE, nullptr);
VirtualProtect(host_address + 0xA0000000, entry->length, PAGE_READWRITE,
&old_protect);
VirtualProtect(host_address + 0xC0000000, entry->length, PAGE_READWRITE,
&old_protect);
VirtualProtect(host_address + 0xE0000000, entry->length, PAGE_READWRITE,
&old_protect);
}
void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) {
auto entry = reinterpret_cast<WriteWatchEntry*>(watch_handle);
// Allow access to the range again.
ClearWriteWatch(entry);
// Remove from table.
write_watch_mutex_.lock();
auto it = std::find(write_watches_.begin(), write_watches_.end(), entry);
if (it != write_watches_.end()) {
write_watches_.erase(it);
}
write_watch_mutex_.unlock();
delete entry;
}
bool MMIOHandler::CheckWriteWatch(void* thread_state, uint64_t fault_address) {
uint32_t guest_address = uint32_t(fault_address - uintptr_t(mapping_base_));
uint32_t base_address = guest_address;
if (base_address > 0xA0000000) {
base_address -= 0xA0000000;
}
std::list<WriteWatchEntry*> pending_invalidates;
write_watch_mutex_.lock();
for (auto it = write_watches_.begin(); it != write_watches_.end();) {
auto entry = *it;
if (entry->address <= base_address &&
entry->address + entry->length > base_address) {
// Hit!
pending_invalidates.push_back(entry);
// TODO(benvanik): outside of lock?
ClearWriteWatch(entry);
auto erase_it = it;
++it;
write_watches_.erase(erase_it);
continue;
}
++it;
}
write_watch_mutex_.unlock();
if (pending_invalidates.empty()) {
// Rethrow access violation - range was not being watched.
return false;
}
while (!pending_invalidates.empty()) {
auto entry = pending_invalidates.back();
pending_invalidates.pop_back();
entry->callback(entry->callback_context, entry->callback_data,
guest_address);
delete entry;
}
// Range was watched, so lets eat this access violation.
return true;
}
bool MMIOHandler::HandleAccessFault(void* thread_state,
uint64_t fault_address) {
// Access violations are pretty rare, so we can do a linear search here.
@ -92,7 +195,7 @@ bool MMIOHandler::HandleAccessFault(void* thread_state,
if (!range) {
// Access is not found within any range, so fail and let the caller handle
// it (likely by aborting).
return false;
return CheckWriteWatch(thread_state, fault_address);
}
// TODO(benvanik): replace with simple check of mov (that's all
@ -175,8 +278,7 @@ bool MMIOHandler::HandleAccessFault(void* thread_state,
}
range->write(range->context, fault_address & 0xFFFFFFFF, value);
} else {
// Unknown MMIO instruction type.
assert_always();
assert_always("Unknown MMIO instruction type");
return false;
}

View File

@ -10,7 +10,9 @@
#ifndef XENIA_CPU_MMIO_HANDLER_H_
#define XENIA_CPU_MMIO_HANDLER_H_
#include <list>
#include <memory>
#include <mutex>
#include <vector>
namespace xe {
@ -19,6 +21,9 @@ namespace cpu {
typedef uint64_t (*MMIOReadCallback)(void* context, uint64_t addr);
typedef void (*MMIOWriteCallback)(void* context, uint64_t addr, uint64_t value);
typedef void (*WriteWatchCallback)(void* context_ptr, void* data_ptr,
uint32_t address);
// NOTE: only one can exist at a time!
class MMIOHandler {
public:
@ -34,14 +39,30 @@ class MMIOHandler {
bool CheckLoad(uint64_t address, uint64_t* out_value);
bool CheckStore(uint64_t address, uint64_t value);
uintptr_t AddWriteWatch(uint32_t guest_address, size_t length,
WriteWatchCallback callback, void* callback_context,
void* callback_data);
void CancelWriteWatch(uintptr_t watch_handle);
public:
bool HandleAccessFault(void* thread_state, uint64_t fault_address);
protected:
struct WriteWatchEntry {
uint32_t address;
uint32_t length;
WriteWatchCallback callback;
void* callback_context;
void* callback_data;
};
MMIOHandler(uint8_t* mapping_base) : mapping_base_(mapping_base) {}
virtual bool Initialize() = 0;
void ClearWriteWatch(WriteWatchEntry* entry);
bool CheckWriteWatch(void* thread_state, uint64_t fault_address);
virtual uint64_t GetThreadStateRip(void* thread_state_ptr) = 0;
virtual void SetThreadStateRip(void* thread_state_ptr, uint64_t rip) = 0;
virtual uint64_t* GetThreadStateRegPtr(void* thread_state_ptr,
@ -59,6 +80,10 @@ class MMIOHandler {
};
std::vector<MMIORange> mapped_ranges_;
// TODO(benvanik): data structure magic.
std::mutex write_watch_mutex_;
std::list<WriteWatchEntry*> write_watches_;
static MMIOHandler* global_handler_;
};

View File

@ -183,7 +183,7 @@ bool CommandProcessor::SetupGL() {
}
// Texture cache that keeps track of any textures/samplers used.
if (!texture_cache_.Initialize(membase_, &scratch_buffer_)) {
if (!texture_cache_.Initialize(memory_, &scratch_buffer_)) {
PLOGE("Unable to initialize texture cache");
return false;
}

View File

@ -22,7 +22,7 @@ using namespace xe::gpu::xenos;
extern "C" GLEWContext* glewGetContext();
extern "C" WGLEWContext* wglewGetContext();
TextureCache::TextureCache() : membase_(nullptr), scratch_buffer_(nullptr) {
TextureCache::TextureCache() : memory_(nullptr), scratch_buffer_(nullptr) {
invalidated_textures_sets_[0].reserve(64);
invalidated_textures_sets_[1].reserve(64);
invalidated_textures_ = &invalidated_textures_sets_[0];
@ -30,9 +30,8 @@ TextureCache::TextureCache() : membase_(nullptr), scratch_buffer_(nullptr) {
TextureCache::~TextureCache() { Shutdown(); }
bool TextureCache::Initialize(uint8_t* membase,
CircularBuffer* scratch_buffer) {
membase_ = membase;
bool TextureCache::Initialize(Memory* memory, CircularBuffer* scratch_buffer) {
memory_ = memory;
scratch_buffer_ = scratch_buffer;
return true;
}
@ -59,18 +58,22 @@ void TextureCache::Scavenge() {
}
void TextureCache::Clear() {
// Kill all textures - some may be in the eviction list, but that's fine
// as we will clear that below.
while (texture_entries_.size()) {
auto entry = texture_entries_.begin()->second;
EvictTexture(entry);
}
EvictAllTextures();
// Samplers must go last, as textures depend on them.
while (sampler_entries_.size()) {
auto entry = sampler_entries_.begin()->second;
EvictSampler(entry);
}
}
void TextureCache::EvictAllTextures() {
// Kill all textures - some may be in the eviction list, but that's fine
// as we will clear that below.
while (texture_entries_.size()) {
auto entry = texture_entries_.begin()->second;
EvictTexture(entry);
}
{
std::lock_guard<std::mutex> lock(invalidated_textures_mutex_);
@ -79,13 +82,6 @@ void TextureCache::Clear() {
}
}
//typedef void (*WriteWatchCallback)(void* context, void* data, void* address);
//uintptr_t AddWriteWatch(void* address, size_t length,
// WriteWatchCallback callback, void* callback_context,
// void* callback_data) {
// //
//}
TextureCache::TextureEntryView* TextureCache::Demand(
const TextureInfo& texture_info, const SamplerInfo& sampler_info) {
uint64_t texture_hash = texture_info.hash();
@ -121,6 +117,7 @@ TextureCache::TextureEntryView* TextureCache::Demand(
view->texture_sampler_handle = glGetTextureSamplerHandleARB(
texture_entry->handle, sampler_entry->handle);
if (!view->texture_sampler_handle) {
assert_always("Unable to get texture handle?");
return nullptr;
}
glMakeTextureHandleResidentARB(view->texture_sampler_handle);
@ -261,6 +258,12 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
const uint64_t hash = opt_hash ? opt_hash : texture_info.hash();
for (auto it = texture_entries_.find(hash); it != texture_entries_.end();
++it) {
if (it->second->pending_invalidation) {
// Whoa, we've been invalidated! Let's scavenge to cleanup and try again.
// TODO(benvanik): reuse existing texture storage.
Scavenge();
break;
}
if (it->second->texture_info == texture_info) {
// Found in cache!
return it->second;
@ -270,6 +273,8 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
// Not found, create.
auto entry = std::make_unique<TextureEntry>();
entry->texture_info = texture_info;
entry->write_watch_handle = 0;
entry->pending_invalidation = false;
entry->handle = 0;
GLenum target;
@ -331,10 +336,24 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
return nullptr;
}
// AddWriteWatch(host_base, length, [](void* context_ptr, void* data_ptr,
// void* address) {
// //
//}, this, &entry);
// Add a write watch. If any data in the given range is touched we'll get a
// callback and evict the texture. We could reuse the storage, though the
// driver is likely in a better position to pool that kind of stuff.
entry->write_watch_handle = memory_->AddWriteWatch(
texture_info.guest_address, texture_info.input_length,
[](void* context_ptr, void* data_ptr, uint32_t address) {
auto self = reinterpret_cast<TextureCache*>(context_ptr);
auto touched_entry = reinterpret_cast<TextureEntry*>(data_ptr);
// Clear watch handle first so we don't redundantly
// remove.
touched_entry->write_watch_handle = 0;
touched_entry->pending_invalidation = true;
// Add to pending list so Scavenge will clean it up.
self->invalidated_textures_mutex_.lock();
self->invalidated_textures_->push_back(touched_entry);
self->invalidated_textures_mutex_.unlock();
},
this, entry.get());
// Add to map - map takes ownership.
auto entry_ptr = entry.get();
@ -343,9 +362,10 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
}
void TextureCache::EvictTexture(TextureEntry* entry) {
/*if (entry->write_watch_handle) {
// remove from watch list
}*/
if (entry->write_watch_handle) {
memory_->CancelWriteWatch(entry->write_watch_handle);
entry->write_watch_handle = 0;
}
for (auto& view : entry->views) {
glMakeTextureHandleNonResidentARB(view->texture_sampler_handle);
@ -394,8 +414,7 @@ void TextureSwap(Endian endianness, void* dest, const void* src,
bool TextureCache::UploadTexture2D(GLuint texture,
const TextureInfo& texture_info) {
auto host_address =
reinterpret_cast<const uint8_t*>(membase_ + texture_info.guest_address);
const auto host_address = memory_->Translate(texture_info.guest_address);
GLenum internal_format = GL_RGBA8;
GLenum format = GL_RGBA;

View File

@ -18,6 +18,7 @@
#include <xenia/gpu/gl4/gl_context.h>
#include <xenia/gpu/sampler_info.h>
#include <xenia/gpu/texture_info.h>
#include <xenia/memory.h>
namespace xe {
namespace gpu {
@ -36,18 +37,21 @@ class TextureCache {
};
struct TextureEntry {
TextureInfo texture_info;
uintptr_t write_watch_handle;
GLuint handle;
bool pending_invalidation;
std::vector<std::unique_ptr<TextureEntryView>> views;
};
TextureCache();
~TextureCache();
bool Initialize(uint8_t* membase, CircularBuffer* scratch_buffer);
bool Initialize(Memory* memory, CircularBuffer* scratch_buffer);
void Shutdown();
void Scavenge();
void Clear();
void EvictAllTextures();
TextureEntryView* Demand(const TextureInfo& texture_info,
const SamplerInfo& sampler_info);
@ -62,7 +66,7 @@ class TextureCache {
bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);
uint8_t* membase_;
Memory* memory_;
CircularBuffer* scratch_buffer_;
std::unordered_map<uint64_t, SamplerEntry*> sampler_entries_;
std::unordered_map<uint64_t, TextureEntry*> texture_entries_;

View File

@ -209,15 +209,15 @@ const static struct {
uint64_t virtual_address_end;
uint64_t target_address;
} map_info[] = {
0x00000000, 0x3FFFFFFF, 0x00000000, // (1024mb) - virtual 4k pages
0x40000000, 0x7EFFFFFF, 0x40000000, // (1024mb) - virtual 64k pages (cont)
0x7F000000, 0x7F0FFFFF, 0x00000000, // (1mb) - GPU writeback
0x7F100000, 0x7FFFFFFF, 0x00100000, // (15mb) - XPS?
0x80000000, 0x8FFFFFFF, 0x80000000, // (256mb) - xex 64k pages
0x90000000, 0x9FFFFFFF, 0x80000000, // (256mb) - xex 4k pages
0xA0000000, 0xBFFFFFFF, 0x00000000, // (512mb) - physical 64k pages
0xC0000000, 0xDFFFFFFF, 0x00000000, // - physical 16mb pages
0xE0000000, 0xFFFFFFFF, 0x00000000, // - physical 4k pages
0x00000000, 0x3FFFFFFF, 0x00000000, // (1024mb) - virtual 4k pages
0x40000000, 0x7EFFFFFF, 0x40000000, // (1024mb) - virtual 64k pages (cont)
0x7F000000, 0x7F0FFFFF, 0x00000000, // (1mb) - GPU writeback
0x7F100000, 0x7FFFFFFF, 0x00100000, // (15mb) - XPS?
0x80000000, 0x8FFFFFFF, 0x80000000, // (256mb) - xex 64k pages
0x90000000, 0x9FFFFFFF, 0x80000000, // (256mb) - xex 4k pages
0xA0000000, 0xBFFFFFFF, 0x00000000, // (512mb) - physical 64k pages
0xC0000000, 0xDFFFFFFF, 0x00000000, // - physical 16mb pages
0xE0000000, 0xFFFFFFFF, 0x00000000, // - physical 4k pages
};
int Memory::MapViews(uint8_t* mapping_base) {
assert_true(poly::countof(map_info) == poly::countof(views_.all_views));
@ -270,6 +270,17 @@ bool Memory::AddMappedRange(uint64_t address, uint64_t mask, uint64_t size,
read_callback, write_callback);
}
uintptr_t Memory::AddWriteWatch(uint32_t guest_address, size_t length,
cpu::WriteWatchCallback callback,
void* callback_context, void* callback_data) {
return mmio_handler_->AddWriteWatch(guest_address, length, callback,
callback_context, callback_data);
}
void Memory::CancelWriteWatch(uintptr_t watch_handle) {
mmio_handler_->CancelWriteWatch(watch_handle);
}
uint8_t Memory::LoadI8(uint64_t address) {
uint64_t value;
if (!mmio_handler_->CheckLoad(address, &value)) {

View File

@ -53,6 +53,11 @@ class Memory : public alloy::Memory {
void* context, cpu::MMIOReadCallback read_callback,
cpu::MMIOWriteCallback write_callback);
uintptr_t AddWriteWatch(uint32_t guest_address, size_t length,
cpu::WriteWatchCallback callback,
void* callback_context, void* callback_data);
void CancelWriteWatch(uintptr_t watch_handle);
uint8_t LoadI8(uint64_t address) override;
uint16_t LoadI16(uint64_t address) override;
uint32_t LoadI32(uint64_t address) override;