diff --git a/src/xenia/cpu/mmio_handler.cc b/src/xenia/cpu/mmio_handler.cc index 299d2704e..eb6834b38 100644 --- a/src/xenia/cpu/mmio_handler.cc +++ b/src/xenia/cpu/mmio_handler.cc @@ -78,6 +78,109 @@ bool MMIOHandler::CheckStore(uint64_t address, uint64_t value) { return false; } +uintptr_t MMIOHandler::AddWriteWatch(uint32_t guest_address, size_t length, + WriteWatchCallback callback, + void* callback_context, + void* callback_data) { + uint32_t base_address = guest_address; + if (base_address > 0xA0000000) { + base_address -= 0xA0000000; + } + + // Add to table. The slot reservation may evict a previous watch, which + // could include our target, so we do it first. + auto entry = new WriteWatchEntry(); + entry->address = base_address; + entry->length = uint32_t(length); + entry->callback = callback; + entry->callback_context = callback_context; + entry->callback_data = callback_data; + write_watch_mutex_.lock(); + write_watches_.push_back(entry); + write_watch_mutex_.unlock(); + + // Make the desired range read only under all address spaces. + auto host_address = mapping_base_ + base_address; + DWORD old_protect; + VirtualProtect(host_address, length, PAGE_READONLY, &old_protect); + VirtualProtect(host_address + 0xA0000000, length, PAGE_READONLY, + &old_protect); + VirtualProtect(host_address + 0xC0000000, length, PAGE_READONLY, + &old_protect); + VirtualProtect(host_address + 0xE0000000, length, PAGE_READONLY, + &old_protect); + + return reinterpret_cast(entry); +} + +void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) { + auto host_address = mapping_base_ + entry->address; + DWORD old_protect; + VirtualProtect(host_address, entry->length, PAGE_READWRITE, nullptr); + VirtualProtect(host_address + 0xA0000000, entry->length, PAGE_READWRITE, + &old_protect); + VirtualProtect(host_address + 0xC0000000, entry->length, PAGE_READWRITE, + &old_protect); + VirtualProtect(host_address + 0xE0000000, entry->length, PAGE_READWRITE, + &old_protect); +} + +void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) { + auto entry = reinterpret_cast(watch_handle); + + // Allow access to the range again. + ClearWriteWatch(entry); + + // Remove from table. + write_watch_mutex_.lock(); + auto it = std::find(write_watches_.begin(), write_watches_.end(), entry); + if (it != write_watches_.end()) { + write_watches_.erase(it); + } + write_watch_mutex_.unlock(); + + delete entry; +} + +bool MMIOHandler::CheckWriteWatch(void* thread_state, uint64_t fault_address) { + uint32_t guest_address = uint32_t(fault_address - uintptr_t(mapping_base_)); + uint32_t base_address = guest_address; + if (base_address > 0xA0000000) { + base_address -= 0xA0000000; + } + std::list pending_invalidates; + write_watch_mutex_.lock(); + for (auto it = write_watches_.begin(); it != write_watches_.end();) { + auto entry = *it; + if (entry->address <= base_address && + entry->address + entry->length > base_address) { + // Hit! + pending_invalidates.push_back(entry); + // TODO(benvanik): outside of lock? + ClearWriteWatch(entry); + auto erase_it = it; + ++it; + write_watches_.erase(erase_it); + continue; + } + ++it; + } + write_watch_mutex_.unlock(); + if (pending_invalidates.empty()) { + // Rethrow access violation - range was not being watched. + return false; + } + while (!pending_invalidates.empty()) { + auto entry = pending_invalidates.back(); + pending_invalidates.pop_back(); + entry->callback(entry->callback_context, entry->callback_data, + guest_address); + delete entry; + } + // Range was watched, so lets eat this access violation. + return true; +} + bool MMIOHandler::HandleAccessFault(void* thread_state, uint64_t fault_address) { // Access violations are pretty rare, so we can do a linear search here. @@ -92,7 +195,7 @@ bool MMIOHandler::HandleAccessFault(void* thread_state, if (!range) { // Access is not found within any range, so fail and let the caller handle // it (likely by aborting). - return false; + return CheckWriteWatch(thread_state, fault_address); } // TODO(benvanik): replace with simple check of mov (that's all @@ -175,8 +278,7 @@ bool MMIOHandler::HandleAccessFault(void* thread_state, } range->write(range->context, fault_address & 0xFFFFFFFF, value); } else { - // Unknown MMIO instruction type. - assert_always(); + assert_always("Unknown MMIO instruction type"); return false; } diff --git a/src/xenia/cpu/mmio_handler.h b/src/xenia/cpu/mmio_handler.h index 586c04840..b872a81cb 100644 --- a/src/xenia/cpu/mmio_handler.h +++ b/src/xenia/cpu/mmio_handler.h @@ -10,7 +10,9 @@ #ifndef XENIA_CPU_MMIO_HANDLER_H_ #define XENIA_CPU_MMIO_HANDLER_H_ +#include #include +#include #include namespace xe { @@ -19,6 +21,9 @@ namespace cpu { typedef uint64_t (*MMIOReadCallback)(void* context, uint64_t addr); typedef void (*MMIOWriteCallback)(void* context, uint64_t addr, uint64_t value); +typedef void (*WriteWatchCallback)(void* context_ptr, void* data_ptr, + uint32_t address); + // NOTE: only one can exist at a time! class MMIOHandler { public: @@ -34,14 +39,30 @@ class MMIOHandler { bool CheckLoad(uint64_t address, uint64_t* out_value); bool CheckStore(uint64_t address, uint64_t value); + uintptr_t AddWriteWatch(uint32_t guest_address, size_t length, + WriteWatchCallback callback, void* callback_context, + void* callback_data); + void CancelWriteWatch(uintptr_t watch_handle); + public: bool HandleAccessFault(void* thread_state, uint64_t fault_address); protected: + struct WriteWatchEntry { + uint32_t address; + uint32_t length; + WriteWatchCallback callback; + void* callback_context; + void* callback_data; + }; + MMIOHandler(uint8_t* mapping_base) : mapping_base_(mapping_base) {} virtual bool Initialize() = 0; + void ClearWriteWatch(WriteWatchEntry* entry); + bool CheckWriteWatch(void* thread_state, uint64_t fault_address); + virtual uint64_t GetThreadStateRip(void* thread_state_ptr) = 0; virtual void SetThreadStateRip(void* thread_state_ptr, uint64_t rip) = 0; virtual uint64_t* GetThreadStateRegPtr(void* thread_state_ptr, @@ -59,6 +80,10 @@ class MMIOHandler { }; std::vector mapped_ranges_; + // TODO(benvanik): data structure magic. + std::mutex write_watch_mutex_; + std::list write_watches_; + static MMIOHandler* global_handler_; }; diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index c686a94af..f499dc1ad 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -183,7 +183,7 @@ bool CommandProcessor::SetupGL() { } // Texture cache that keeps track of any textures/samplers used. - if (!texture_cache_.Initialize(membase_, &scratch_buffer_)) { + if (!texture_cache_.Initialize(memory_, &scratch_buffer_)) { PLOGE("Unable to initialize texture cache"); return false; } diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc index f6ed565da..6193b19d2 100644 --- a/src/xenia/gpu/gl4/texture_cache.cc +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -22,7 +22,7 @@ using namespace xe::gpu::xenos; extern "C" GLEWContext* glewGetContext(); extern "C" WGLEWContext* wglewGetContext(); -TextureCache::TextureCache() : membase_(nullptr), scratch_buffer_(nullptr) { +TextureCache::TextureCache() : memory_(nullptr), scratch_buffer_(nullptr) { invalidated_textures_sets_[0].reserve(64); invalidated_textures_sets_[1].reserve(64); invalidated_textures_ = &invalidated_textures_sets_[0]; @@ -30,9 +30,8 @@ TextureCache::TextureCache() : membase_(nullptr), scratch_buffer_(nullptr) { TextureCache::~TextureCache() { Shutdown(); } -bool TextureCache::Initialize(uint8_t* membase, - CircularBuffer* scratch_buffer) { - membase_ = membase; +bool TextureCache::Initialize(Memory* memory, CircularBuffer* scratch_buffer) { + memory_ = memory; scratch_buffer_ = scratch_buffer; return true; } @@ -59,18 +58,22 @@ void TextureCache::Scavenge() { } void TextureCache::Clear() { - // Kill all textures - some may be in the eviction list, but that's fine - // as we will clear that below. - while (texture_entries_.size()) { - auto entry = texture_entries_.begin()->second; - EvictTexture(entry); - } + EvictAllTextures(); // Samplers must go last, as textures depend on them. while (sampler_entries_.size()) { auto entry = sampler_entries_.begin()->second; EvictSampler(entry); } +} + +void TextureCache::EvictAllTextures() { + // Kill all textures - some may be in the eviction list, but that's fine + // as we will clear that below. + while (texture_entries_.size()) { + auto entry = texture_entries_.begin()->second; + EvictTexture(entry); + } { std::lock_guard lock(invalidated_textures_mutex_); @@ -79,13 +82,6 @@ void TextureCache::Clear() { } } -//typedef void (*WriteWatchCallback)(void* context, void* data, void* address); -//uintptr_t AddWriteWatch(void* address, size_t length, -// WriteWatchCallback callback, void* callback_context, -// void* callback_data) { -// // -//} - TextureCache::TextureEntryView* TextureCache::Demand( const TextureInfo& texture_info, const SamplerInfo& sampler_info) { uint64_t texture_hash = texture_info.hash(); @@ -121,6 +117,7 @@ TextureCache::TextureEntryView* TextureCache::Demand( view->texture_sampler_handle = glGetTextureSamplerHandleARB( texture_entry->handle, sampler_entry->handle); if (!view->texture_sampler_handle) { + assert_always("Unable to get texture handle?"); return nullptr; } glMakeTextureHandleResidentARB(view->texture_sampler_handle); @@ -261,6 +258,12 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( const uint64_t hash = opt_hash ? opt_hash : texture_info.hash(); for (auto it = texture_entries_.find(hash); it != texture_entries_.end(); ++it) { + if (it->second->pending_invalidation) { + // Whoa, we've been invalidated! Let's scavenge to cleanup and try again. + // TODO(benvanik): reuse existing texture storage. + Scavenge(); + break; + } if (it->second->texture_info == texture_info) { // Found in cache! return it->second; @@ -270,6 +273,8 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( // Not found, create. auto entry = std::make_unique(); entry->texture_info = texture_info; + entry->write_watch_handle = 0; + entry->pending_invalidation = false; entry->handle = 0; GLenum target; @@ -331,10 +336,24 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( return nullptr; } - // AddWriteWatch(host_base, length, [](void* context_ptr, void* data_ptr, - // void* address) { - // // - //}, this, &entry); + // Add a write watch. If any data in the given range is touched we'll get a + // callback and evict the texture. We could reuse the storage, though the + // driver is likely in a better position to pool that kind of stuff. + entry->write_watch_handle = memory_->AddWriteWatch( + texture_info.guest_address, texture_info.input_length, + [](void* context_ptr, void* data_ptr, uint32_t address) { + auto self = reinterpret_cast(context_ptr); + auto touched_entry = reinterpret_cast(data_ptr); + // Clear watch handle first so we don't redundantly + // remove. + touched_entry->write_watch_handle = 0; + touched_entry->pending_invalidation = true; + // Add to pending list so Scavenge will clean it up. + self->invalidated_textures_mutex_.lock(); + self->invalidated_textures_->push_back(touched_entry); + self->invalidated_textures_mutex_.unlock(); + }, + this, entry.get()); // Add to map - map takes ownership. auto entry_ptr = entry.get(); @@ -343,9 +362,10 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture( } void TextureCache::EvictTexture(TextureEntry* entry) { - /*if (entry->write_watch_handle) { - // remove from watch list - }*/ + if (entry->write_watch_handle) { + memory_->CancelWriteWatch(entry->write_watch_handle); + entry->write_watch_handle = 0; + } for (auto& view : entry->views) { glMakeTextureHandleNonResidentARB(view->texture_sampler_handle); @@ -394,8 +414,7 @@ void TextureSwap(Endian endianness, void* dest, const void* src, bool TextureCache::UploadTexture2D(GLuint texture, const TextureInfo& texture_info) { - auto host_address = - reinterpret_cast(membase_ + texture_info.guest_address); + const auto host_address = memory_->Translate(texture_info.guest_address); GLenum internal_format = GL_RGBA8; GLenum format = GL_RGBA; diff --git a/src/xenia/gpu/gl4/texture_cache.h b/src/xenia/gpu/gl4/texture_cache.h index 05187ef28..8eedfc476 100644 --- a/src/xenia/gpu/gl4/texture_cache.h +++ b/src/xenia/gpu/gl4/texture_cache.h @@ -18,6 +18,7 @@ #include #include #include +#include namespace xe { namespace gpu { @@ -36,18 +37,21 @@ class TextureCache { }; struct TextureEntry { TextureInfo texture_info; + uintptr_t write_watch_handle; GLuint handle; + bool pending_invalidation; std::vector> views; }; TextureCache(); ~TextureCache(); - bool Initialize(uint8_t* membase, CircularBuffer* scratch_buffer); + bool Initialize(Memory* memory, CircularBuffer* scratch_buffer); void Shutdown(); void Scavenge(); void Clear(); + void EvictAllTextures(); TextureEntryView* Demand(const TextureInfo& texture_info, const SamplerInfo& sampler_info); @@ -62,7 +66,7 @@ class TextureCache { bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info); - uint8_t* membase_; + Memory* memory_; CircularBuffer* scratch_buffer_; std::unordered_map sampler_entries_; std::unordered_map texture_entries_; diff --git a/src/xenia/memory.cc b/src/xenia/memory.cc index a3d90a7ac..7f387e07a 100644 --- a/src/xenia/memory.cc +++ b/src/xenia/memory.cc @@ -209,15 +209,15 @@ const static struct { uint64_t virtual_address_end; uint64_t target_address; } map_info[] = { - 0x00000000, 0x3FFFFFFF, 0x00000000, // (1024mb) - virtual 4k pages - 0x40000000, 0x7EFFFFFF, 0x40000000, // (1024mb) - virtual 64k pages (cont) - 0x7F000000, 0x7F0FFFFF, 0x00000000, // (1mb) - GPU writeback - 0x7F100000, 0x7FFFFFFF, 0x00100000, // (15mb) - XPS? - 0x80000000, 0x8FFFFFFF, 0x80000000, // (256mb) - xex 64k pages - 0x90000000, 0x9FFFFFFF, 0x80000000, // (256mb) - xex 4k pages - 0xA0000000, 0xBFFFFFFF, 0x00000000, // (512mb) - physical 64k pages - 0xC0000000, 0xDFFFFFFF, 0x00000000, // - physical 16mb pages - 0xE0000000, 0xFFFFFFFF, 0x00000000, // - physical 4k pages + 0x00000000, 0x3FFFFFFF, 0x00000000, // (1024mb) - virtual 4k pages + 0x40000000, 0x7EFFFFFF, 0x40000000, // (1024mb) - virtual 64k pages (cont) + 0x7F000000, 0x7F0FFFFF, 0x00000000, // (1mb) - GPU writeback + 0x7F100000, 0x7FFFFFFF, 0x00100000, // (15mb) - XPS? + 0x80000000, 0x8FFFFFFF, 0x80000000, // (256mb) - xex 64k pages + 0x90000000, 0x9FFFFFFF, 0x80000000, // (256mb) - xex 4k pages + 0xA0000000, 0xBFFFFFFF, 0x00000000, // (512mb) - physical 64k pages + 0xC0000000, 0xDFFFFFFF, 0x00000000, // - physical 16mb pages + 0xE0000000, 0xFFFFFFFF, 0x00000000, // - physical 4k pages }; int Memory::MapViews(uint8_t* mapping_base) { assert_true(poly::countof(map_info) == poly::countof(views_.all_views)); @@ -270,6 +270,17 @@ bool Memory::AddMappedRange(uint64_t address, uint64_t mask, uint64_t size, read_callback, write_callback); } +uintptr_t Memory::AddWriteWatch(uint32_t guest_address, size_t length, + cpu::WriteWatchCallback callback, + void* callback_context, void* callback_data) { + return mmio_handler_->AddWriteWatch(guest_address, length, callback, + callback_context, callback_data); +} + +void Memory::CancelWriteWatch(uintptr_t watch_handle) { + mmio_handler_->CancelWriteWatch(watch_handle); +} + uint8_t Memory::LoadI8(uint64_t address) { uint64_t value; if (!mmio_handler_->CheckLoad(address, &value)) { diff --git a/src/xenia/memory.h b/src/xenia/memory.h index bcdc254d7..2258ee151 100644 --- a/src/xenia/memory.h +++ b/src/xenia/memory.h @@ -53,6 +53,11 @@ class Memory : public alloy::Memory { void* context, cpu::MMIOReadCallback read_callback, cpu::MMIOWriteCallback write_callback); + uintptr_t AddWriteWatch(uint32_t guest_address, size_t length, + cpu::WriteWatchCallback callback, + void* callback_context, void* callback_data); + void CancelWriteWatch(uintptr_t watch_handle); + uint8_t LoadI8(uint64_t address) override; uint16_t LoadI16(uint64_t address) override; uint32_t LoadI32(uint64_t address) override;