Merge pull request #556 from DrChat/spv_translator

Vulkan Changes
This commit is contained in:
Justin Moore 2016-06-03 20:48:55 -05:00
commit 41a9004976
63 changed files with 5789 additions and 1790 deletions

View File

@ -78,7 +78,7 @@ std::unique_ptr<gpu::GraphicsSystem> CreateGraphicsSystem() {
std::unique_ptr<gpu::GraphicsSystem> best; std::unique_ptr<gpu::GraphicsSystem> best;
best = std::unique_ptr<gpu::GraphicsSystem>( best = std::unique_ptr<gpu::GraphicsSystem>(
new xe::gpu::gl4::GL4GraphicsSystem()); new xe::gpu::vulkan::VulkanGraphicsSystem());
if (best) { if (best) {
return best; return best;
} }

View File

@ -87,13 +87,12 @@ bool MMIOHandler::CheckStore(uint32_t virtual_address, uint32_t value) {
return false; return false;
} }
uintptr_t MMIOHandler::AddPhysicalWriteWatch(uint32_t guest_address, uintptr_t MMIOHandler::AddPhysicalAccessWatch(uint32_t guest_address,
size_t length, size_t length, WatchType type,
WriteWatchCallback callback, AccessWatchCallback callback,
void* callback_context, void* callback_context,
void* callback_data) { void* callback_data) {
uint32_t base_address = guest_address; uint32_t base_address = guest_address & 0x1FFFFFFF;
assert_true(base_address < 0x1FFFFFFF);
// Can only protect sizes matching system page size. // Can only protect sizes matching system page size.
// This means we need to round up, which will cause spurious access // This means we need to round up, which will cause spurious access
@ -103,32 +102,45 @@ uintptr_t MMIOHandler::AddPhysicalWriteWatch(uint32_t guest_address,
xe::memory::page_size()); xe::memory::page_size());
base_address = base_address - (base_address % xe::memory::page_size()); base_address = base_address - (base_address % xe::memory::page_size());
auto lock = global_critical_region_.Acquire();
// Add to table. The slot reservation may evict a previous watch, which // Add to table. The slot reservation may evict a previous watch, which
// could include our target, so we do it first. // could include our target, so we do it first.
auto entry = new WriteWatchEntry(); auto entry = new AccessWatchEntry();
entry->address = base_address; entry->address = base_address;
entry->length = uint32_t(length); entry->length = uint32_t(length);
entry->callback = callback; entry->callback = callback;
entry->callback_context = callback_context; entry->callback_context = callback_context;
entry->callback_data = callback_data; entry->callback_data = callback_data;
global_critical_region_.mutex().lock(); access_watches_.push_back(entry);
write_watches_.push_back(entry);
global_critical_region_.mutex().unlock();
// Make the desired range read only under all address spaces. auto page_access = memory::PageAccess::kNoAccess;
switch (type) {
case kWatchWrite:
page_access = memory::PageAccess::kReadOnly;
break;
case kWatchReadWrite:
page_access = memory::PageAccess::kNoAccess;
break;
default:
assert_unhandled_case(type);
break;
}
// Protect the range under all address spaces
memory::Protect(physical_membase_ + entry->address, entry->length, memory::Protect(physical_membase_ + entry->address, entry->length,
xe::memory::PageAccess::kReadOnly, nullptr); page_access, nullptr);
memory::Protect(virtual_membase_ + 0xA0000000 + entry->address, entry->length, memory::Protect(virtual_membase_ + 0xA0000000 + entry->address, entry->length,
xe::memory::PageAccess::kReadOnly, nullptr); page_access, nullptr);
memory::Protect(virtual_membase_ + 0xC0000000 + entry->address, entry->length, memory::Protect(virtual_membase_ + 0xC0000000 + entry->address, entry->length,
xe::memory::PageAccess::kReadOnly, nullptr); page_access, nullptr);
memory::Protect(virtual_membase_ + 0xE0000000 + entry->address, entry->length, memory::Protect(virtual_membase_ + 0xE0000000 + entry->address, entry->length,
xe::memory::PageAccess::kReadOnly, nullptr); page_access, nullptr);
return reinterpret_cast<uintptr_t>(entry); return reinterpret_cast<uintptr_t>(entry);
} }
void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) { void MMIOHandler::ClearAccessWatch(AccessWatchEntry* entry) {
memory::Protect(physical_membase_ + entry->address, entry->length, memory::Protect(physical_membase_ + entry->address, entry->length,
xe::memory::PageAccess::kReadWrite, nullptr); xe::memory::PageAccess::kReadWrite, nullptr);
memory::Protect(virtual_membase_ + 0xA0000000 + entry->address, entry->length, memory::Protect(virtual_membase_ + 0xA0000000 + entry->address, entry->length,
@ -139,19 +151,20 @@ void MMIOHandler::ClearWriteWatch(WriteWatchEntry* entry) {
xe::memory::PageAccess::kReadWrite, nullptr); xe::memory::PageAccess::kReadWrite, nullptr);
} }
void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) { void MMIOHandler::CancelAccessWatch(uintptr_t watch_handle) {
auto entry = reinterpret_cast<WriteWatchEntry*>(watch_handle); auto entry = reinterpret_cast<AccessWatchEntry*>(watch_handle);
auto lock = global_critical_region_.Acquire();
// Allow access to the range again. // Allow access to the range again.
ClearWriteWatch(entry); ClearAccessWatch(entry);
// Remove from table. // Remove from table.
global_critical_region_.mutex().lock(); auto it = std::find(access_watches_.begin(), access_watches_.end(), entry);
auto it = std::find(write_watches_.begin(), write_watches_.end(), entry); assert_false(it == access_watches_.end());
if (it != write_watches_.end()) {
write_watches_.erase(it); if (it != access_watches_.end()) {
access_watches_.erase(it);
} }
global_critical_region_.mutex().unlock();
delete entry; delete entry;
} }
@ -159,18 +172,19 @@ void MMIOHandler::CancelWriteWatch(uintptr_t watch_handle) {
void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) { void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) {
auto lock = global_critical_region_.Acquire(); auto lock = global_critical_region_.Acquire();
for (auto it = write_watches_.begin(); it != write_watches_.end();) { for (auto it = access_watches_.begin(); it != access_watches_.end();) {
auto entry = *it; auto entry = *it;
if ((entry->address <= physical_address && if ((entry->address <= physical_address &&
entry->address + entry->length > physical_address) || entry->address + entry->length > physical_address) ||
(entry->address >= physical_address && (entry->address >= physical_address &&
entry->address < physical_address + length)) { entry->address < physical_address + length)) {
// This watch lies within the range. End it. // This watch lies within the range. End it.
ClearWriteWatch(entry); ClearAccessWatch(entry);
entry->callback(entry->callback_context, entry->callback_data, entry->callback(entry->callback_context, entry->callback_data,
entry->address); entry->address);
it = write_watches_.erase(it); it = access_watches_.erase(it);
delete entry;
continue; continue;
} }
@ -178,50 +192,49 @@ void MMIOHandler::InvalidateRange(uint32_t physical_address, size_t length) {
} }
} }
bool MMIOHandler::CheckWriteWatch(uint64_t fault_address) { bool MMIOHandler::IsRangeWatched(uint32_t physical_address, size_t length) {
uint32_t physical_address = uint32_t(fault_address); auto lock = global_critical_region_.Acquire();
if (physical_address > 0x1FFFFFFF) {
physical_address &= 0x1FFFFFFF; for (auto it = access_watches_.begin(); it != access_watches_.end(); ++it) {
} auto entry = *it;
std::list<WriteWatchEntry*> pending_invalidates; if ((entry->address <= physical_address &&
global_critical_region_.mutex().lock(); entry->address + entry->length > physical_address) ||
// Now that we hold the lock, recheck and see if the pages are still (entry->address >= physical_address &&
// protected. entry->address < physical_address + length)) {
memory::PageAccess cur_access; // This watch lies within the range.
size_t page_length = memory::page_size(); return true;
memory::QueryProtect((void*)fault_address, page_length, cur_access); }
if (cur_access != memory::PageAccess::kReadOnly &&
cur_access != memory::PageAccess::kNoAccess) {
// Another thread has cleared this write watch. Abort.
global_critical_region_.mutex().unlock();
return true;
} }
for (auto it = write_watches_.begin(); it != write_watches_.end();) { return false;
}
bool MMIOHandler::CheckAccessWatch(uint32_t physical_address) {
auto lock = global_critical_region_.Acquire();
bool hit = false;
for (auto it = access_watches_.begin(); it != access_watches_.end();) {
auto entry = *it; auto entry = *it;
if (entry->address <= physical_address && if (entry->address <= physical_address &&
entry->address + entry->length > physical_address) { entry->address + entry->length > physical_address) {
// Hit! Remove the writewatch. // Hit! Remove the watch.
pending_invalidates.push_back(entry); hit = true;
ClearAccessWatch(entry);
entry->callback(entry->callback_context, entry->callback_data,
physical_address);
ClearWriteWatch(entry); it = access_watches_.erase(it);
it = write_watches_.erase(it); delete entry;
continue; continue;
} }
++it; ++it;
} }
global_critical_region_.mutex().unlock();
if (pending_invalidates.empty()) { if (!hit) {
// Rethrow access violation - range was not being watched. // Rethrow access violation - range was not being watched.
return false; return false;
} }
while (!pending_invalidates.empty()) {
auto entry = pending_invalidates.back();
pending_invalidates.pop_back();
entry->callback(entry->callback_context, entry->callback_data,
physical_address);
delete entry;
}
// Range was watched, so lets eat this access violation. // Range was watched, so lets eat this access violation.
return true; return true;
} }
@ -414,9 +427,33 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
} }
} }
if (!range) { if (!range) {
auto fault_address = reinterpret_cast<uint8_t*>(ex->fault_address());
uint32_t guest_address = 0;
if (fault_address >= virtual_membase_ &&
fault_address < physical_membase_) {
// Faulting on a virtual address.
guest_address = static_cast<uint32_t>(ex->fault_address()) & 0x1FFFFFFF;
} else {
// Faulting on a physical address.
guest_address = static_cast<uint32_t>(ex->fault_address());
}
// HACK: Recheck if the pages are still protected (race condition - another
// thread clears the writewatch we just hit)
// Do this under the lock so we don't introduce another race condition.
auto lock = global_critical_region_.Acquire();
memory::PageAccess cur_access;
size_t page_length = memory::page_size();
memory::QueryProtect((void*)fault_address, page_length, cur_access);
if (cur_access != memory::PageAccess::kReadOnly &&
cur_access != memory::PageAccess::kNoAccess) {
// Another thread has cleared this write watch. Abort.
return true;
}
// Access is not found within any range, so fail and let the caller handle // Access is not found within any range, so fail and let the caller handle
// it (likely by aborting). // it (likely by aborting).
return CheckWriteWatch(ex->fault_address()); return CheckAccessWatch(guest_address);
} }
auto rip = ex->pc(); auto rip = ex->pc();

View File

@ -28,9 +28,8 @@ typedef uint32_t (*MMIOReadCallback)(void* ppc_context, void* callback_context,
uint32_t addr); uint32_t addr);
typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context, typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context,
uint32_t addr, uint32_t value); uint32_t addr, uint32_t value);
typedef void (*AccessWatchCallback)(void* context_ptr, void* data_ptr,
typedef void (*WriteWatchCallback)(void* context_ptr, void* data_ptr, uint32_t address);
uint32_t address);
struct MMIORange { struct MMIORange {
uint32_t address; uint32_t address;
@ -46,6 +45,12 @@ class MMIOHandler {
public: public:
virtual ~MMIOHandler(); virtual ~MMIOHandler();
enum WatchType {
kWatchInvalid = 0,
kWatchWrite = 1,
kWatchReadWrite = 2,
};
static std::unique_ptr<MMIOHandler> Install(uint8_t* virtual_membase, static std::unique_ptr<MMIOHandler> Install(uint8_t* virtual_membase,
uint8_t* physical_membase, uint8_t* physical_membase,
uint8_t* membase_end); uint8_t* membase_end);
@ -59,17 +64,24 @@ class MMIOHandler {
bool CheckLoad(uint32_t virtual_address, uint32_t* out_value); bool CheckLoad(uint32_t virtual_address, uint32_t* out_value);
bool CheckStore(uint32_t virtual_address, uint32_t value); bool CheckStore(uint32_t virtual_address, uint32_t value);
uintptr_t AddPhysicalWriteWatch(uint32_t guest_address, size_t length, // Memory watches: These are one-shot alarms that fire a callback (in the
WriteWatchCallback callback, // context of the thread that caused the callback) when a memory range is
void* callback_context, void* callback_data); // either written to or read from, depending on the watch type. These fire as
void CancelWriteWatch(uintptr_t watch_handle); // soon as a read/write happens, and only fire once.
// These watches may be spuriously fired if memory is accessed nearby.
uintptr_t AddPhysicalAccessWatch(uint32_t guest_address, size_t length,
WatchType type, AccessWatchCallback callback,
void* callback_context, void* callback_data);
void CancelAccessWatch(uintptr_t watch_handle);
void InvalidateRange(uint32_t physical_address, size_t length); void InvalidateRange(uint32_t physical_address, size_t length);
bool IsRangeWatched(uint32_t physical_address, size_t length);
protected: protected:
struct WriteWatchEntry { struct AccessWatchEntry {
uint32_t address; uint32_t address;
uint32_t length; uint32_t length;
WriteWatchCallback callback; WatchType type;
AccessWatchCallback callback;
void* callback_context; void* callback_context;
void* callback_data; void* callback_data;
}; };
@ -83,8 +95,8 @@ class MMIOHandler {
static bool ExceptionCallbackThunk(Exception* ex, void* data); static bool ExceptionCallbackThunk(Exception* ex, void* data);
bool ExceptionCallback(Exception* ex); bool ExceptionCallback(Exception* ex);
void ClearWriteWatch(WriteWatchEntry* entry); void ClearAccessWatch(AccessWatchEntry* entry);
bool CheckWriteWatch(uint64_t fault_address); bool CheckAccessWatch(uint32_t guest_address);
uint8_t* virtual_membase_; uint8_t* virtual_membase_;
uint8_t* physical_membase_; uint8_t* physical_membase_;
@ -94,7 +106,7 @@ class MMIOHandler {
xe::global_critical_region global_critical_region_; xe::global_critical_region global_critical_region_;
// TODO(benvanik): data structure magic. // TODO(benvanik): data structure magic.
std::list<WriteWatchEntry*> write_watches_; std::list<AccessWatchEntry*> access_watches_;
static MMIOHandler* global_handler_; static MMIOHandler* global_handler_;
}; };

View File

@ -84,9 +84,9 @@ class CommandProcessor {
swap_request_handler_ = fn; swap_request_handler_ = fn;
} }
void RequestFrameTrace(const std::wstring& root_path); virtual void RequestFrameTrace(const std::wstring& root_path);
void BeginTracing(const std::wstring& root_path); virtual void BeginTracing(const std::wstring& root_path);
void EndTracing(); virtual void EndTracing();
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); void InitializeRingBuffer(uint32_t ptr, uint32_t page_count);
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);

View File

@ -427,7 +427,7 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
// Not found, create. // Not found, create.
auto entry = std::make_unique<TextureEntry>(); auto entry = std::make_unique<TextureEntry>();
entry->texture_info = texture_info; entry->texture_info = texture_info;
entry->write_watch_handle = 0; entry->access_watch_handle = 0;
entry->pending_invalidation = false; entry->pending_invalidation = false;
entry->handle = 0; entry->handle = 0;
@ -442,6 +442,7 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
// Found! Acquire the handle and remove the readbuffer entry. // Found! Acquire the handle and remove the readbuffer entry.
read_buffer_textures_.erase(it); read_buffer_textures_.erase(it);
entry->handle = read_buffer_entry->handle; entry->handle = read_buffer_entry->handle;
entry->access_watch_handle = read_buffer_entry->access_watch_handle;
delete read_buffer_entry; delete read_buffer_entry;
// TODO(benvanik): set more texture properties? swizzle/etc? // TODO(benvanik): set more texture properties? swizzle/etc?
auto entry_ptr = entry.get(); auto entry_ptr = entry.get();
@ -495,14 +496,15 @@ TextureCache::TextureEntry* TextureCache::LookupOrInsertTexture(
// Add a write watch. If any data in the given range is touched we'll get a // Add a write watch. If any data in the given range is touched we'll get a
// callback and evict the texture. We could reuse the storage, though the // callback and evict the texture. We could reuse the storage, though the
// driver is likely in a better position to pool that kind of stuff. // driver is likely in a better position to pool that kind of stuff.
entry->write_watch_handle = memory_->AddPhysicalWriteWatch( entry->access_watch_handle = memory_->AddPhysicalAccessWatch(
texture_info.guest_address, texture_info.input_length, texture_info.guest_address, texture_info.input_length,
cpu::MMIOHandler::kWatchWrite,
[](void* context_ptr, void* data_ptr, uint32_t address) { [](void* context_ptr, void* data_ptr, uint32_t address) {
auto self = reinterpret_cast<TextureCache*>(context_ptr); auto self = reinterpret_cast<TextureCache*>(context_ptr);
auto touched_entry = reinterpret_cast<TextureEntry*>(data_ptr); auto touched_entry = reinterpret_cast<TextureEntry*>(data_ptr);
// Clear watch handle first so we don't redundantly // Clear watch handle first so we don't redundantly
// remove. // remove.
touched_entry->write_watch_handle = 0; touched_entry->access_watch_handle = 0;
touched_entry->pending_invalidation = true; touched_entry->pending_invalidation = true;
// Add to pending list so Scavenge will clean it up. // Add to pending list so Scavenge will clean it up.
self->invalidated_textures_mutex_.lock(); self->invalidated_textures_mutex_.lock();
@ -574,14 +576,27 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address,
dest_rect, GL_LINEAR, swap_channels); dest_rect, GL_LINEAR, swap_channels);
} }
// HACK: remove texture from write watch list so readback won't kill us. // Setup a read/write access watch. If the game tries to touch the memory
// Not needed now, as readback is disabled. // we were supposed to populate with this texture, then we'll actually
/* // populate it.
if (texture_entry->write_watch_handle) { if (texture_entry->access_watch_handle) {
memory_->CancelWriteWatch(texture_entry->write_watch_handle); memory_->CancelAccessWatch(texture_entry->access_watch_handle);
texture_entry->write_watch_handle = 0; texture_entry->access_watch_handle = 0;
} }
//*/
texture_entry->access_watch_handle = memory_->AddPhysicalAccessWatch(
guest_address, texture_entry->texture_info.input_length,
cpu::MMIOHandler::kWatchReadWrite,
[](void* context, void* data, uint32_t address) {
auto touched_entry = reinterpret_cast<TextureEntry*>(data);
touched_entry->access_watch_handle = 0;
// This happens. RDR resolves to a texture then upsizes it, BF1943
// writes to a resolved texture.
// TODO (for Vulkan): Copy this texture back into system memory.
// assert_always();
},
nullptr, texture_entry);
return texture_entry->handle; return texture_entry->handle;
} }
@ -618,6 +633,20 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address,
entry->block_height = block_height; entry->block_height = block_height;
entry->format = format; entry->format = format;
entry->access_watch_handle = memory_->AddPhysicalAccessWatch(
guest_address, block_height * block_width * 4,
cpu::MMIOHandler::kWatchReadWrite,
[](void* context, void* data, uint32_t address) {
auto entry = reinterpret_cast<ReadBufferTexture*>(data);
entry->access_watch_handle = 0;
// This happens. RDR resolves to a texture then upsizes it, BF1943
// writes to a resolved texture.
// TODO (for Vulkan): Copy this texture back into system memory.
// assert_always();
},
nullptr, entry.get());
glCreateTextures(GL_TEXTURE_2D, 1, &entry->handle); glCreateTextures(GL_TEXTURE_2D, 1, &entry->handle);
glTextureParameteri(entry->handle, GL_TEXTURE_BASE_LEVEL, 0); glTextureParameteri(entry->handle, GL_TEXTURE_BASE_LEVEL, 0);
glTextureParameteri(entry->handle, GL_TEXTURE_MAX_LEVEL, 1); glTextureParameteri(entry->handle, GL_TEXTURE_MAX_LEVEL, 1);
@ -636,9 +665,9 @@ GLuint TextureCache::ConvertTexture(Blitter* blitter, uint32_t guest_address,
} }
void TextureCache::EvictTexture(TextureEntry* entry) { void TextureCache::EvictTexture(TextureEntry* entry) {
if (entry->write_watch_handle) { if (entry->access_watch_handle) {
memory_->CancelWriteWatch(entry->write_watch_handle); memory_->CancelAccessWatch(entry->access_watch_handle);
entry->write_watch_handle = 0; entry->access_watch_handle = 0;
} }
for (auto& view : entry->views) { for (auto& view : entry->views) {

View File

@ -44,7 +44,7 @@ class TextureCache {
}; };
struct TextureEntry { struct TextureEntry {
TextureInfo texture_info; TextureInfo texture_info;
uintptr_t write_watch_handle; uintptr_t access_watch_handle;
GLuint handle; GLuint handle;
bool pending_invalidation; bool pending_invalidation;
std::vector<std::unique_ptr<TextureEntryView>> views; std::vector<std::unique_ptr<TextureEntryView>> views;
@ -74,8 +74,12 @@ class TextureCache {
TextureFormat format, bool swap_channels, TextureFormat format, bool swap_channels,
GLuint src_texture, Rect2D src_rect, Rect2D dest_rect); GLuint src_texture, Rect2D src_rect, Rect2D dest_rect);
TextureEntry* LookupAddress(uint32_t guest_address, uint32_t width,
uint32_t height, TextureFormat format);
private: private:
struct ReadBufferTexture { struct ReadBufferTexture {
uintptr_t access_watch_handle;
uint32_t guest_address; uint32_t guest_address;
uint32_t logical_width; uint32_t logical_width;
uint32_t logical_height; uint32_t logical_height;
@ -90,8 +94,6 @@ class TextureCache {
void EvictSampler(SamplerEntry* entry); void EvictSampler(SamplerEntry* entry);
TextureEntry* LookupOrInsertTexture(const TextureInfo& texture_info, TextureEntry* LookupOrInsertTexture(const TextureInfo& texture_info,
uint64_t opt_hash = 0); uint64_t opt_hash = 0);
TextureEntry* LookupAddress(uint32_t guest_address, uint32_t width,
uint32_t height, TextureFormat format);
void EvictTexture(TextureEntry* entry); void EvictTexture(TextureEntry* entry);
bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info); bool UploadTexture2D(GLuint texture, const TextureInfo& texture_info);

View File

@ -22,6 +22,8 @@ project("xenia-gpu")
project_root.."/third_party/gflags/src", project_root.."/third_party/gflags/src",
}) })
local_platform_files() local_platform_files()
local_platform_files("spirv")
local_platform_files("spirv/passes")
group("src") group("src")
project("xenia-gpu-shader-compiler") project("xenia-gpu-shader-compiler")

View File

@ -99,6 +99,17 @@ struct InstructionResult {
bool has_all_writes() const { bool has_all_writes() const {
return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3]; return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3];
} }
// Returns number of components written
uint32_t num_writes() const {
uint32_t total = 0;
for (int i = 0; i < 4; i++) {
if (write_mask[i]) {
total++;
}
}
return total;
}
// Returns true if any non-constant components are written. // Returns true if any non-constant components are written.
bool stores_non_constants() const { bool stores_non_constants() const {
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
@ -547,6 +558,9 @@ class Shader {
// True if the shader was translated and prepared without error. // True if the shader was translated and prepared without error.
bool is_valid() const { return is_valid_; } bool is_valid() const { return is_valid_; }
// True if the shader has already been translated.
bool is_translated() const { return is_translated_; }
// Errors that occurred during translation. // Errors that occurred during translation.
const std::vector<Error>& errors() const { return errors_; } const std::vector<Error>& errors() const { return errors_; }
@ -591,6 +605,7 @@ class Shader {
bool writes_color_targets_[4] = {false, false, false, false}; bool writes_color_targets_[4] = {false, false, false, false};
bool is_valid_ = false; bool is_valid_ = false;
bool is_translated_ = false;
std::vector<Error> errors_; std::vector<Error> errors_;
std::string ucode_disassembly_; std::string ucode_disassembly_;

View File

@ -51,6 +51,7 @@ void ShaderTranslator::Reset() {
ucode_disasm_buffer_.Reset(); ucode_disasm_buffer_.Reset();
ucode_disasm_line_number_ = 0; ucode_disasm_line_number_ = 0;
previous_ucode_disasm_scan_offset_ = 0; previous_ucode_disasm_scan_offset_ = 0;
register_count_ = 64;
total_attrib_count_ = 0; total_attrib_count_ = 0;
vertex_bindings_.clear(); vertex_bindings_.clear();
texture_bindings_.clear(); texture_bindings_.clear();
@ -95,9 +96,21 @@ bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) {
return true; return true;
} }
bool ShaderTranslator::Translate(Shader* shader,
xenos::xe_gpu_program_cntl_t cntl) {
Reset();
register_count_ = shader->type() == ShaderType::kVertex ? cntl.vs_regs + 1
: cntl.ps_regs + 1;
return TranslateInternal(shader);
}
bool ShaderTranslator::Translate(Shader* shader) { bool ShaderTranslator::Translate(Shader* shader) {
Reset(); Reset();
return TranslateInternal(shader);
}
bool ShaderTranslator::TranslateInternal(Shader* shader) {
shader_type_ = shader->type(); shader_type_ = shader->type();
ucode_dwords_ = shader->ucode_dwords(); ucode_dwords_ = shader->ucode_dwords();
ucode_dword_count_ = shader->ucode_dword_count(); ucode_dword_count_ = shader->ucode_dword_count();
@ -155,6 +168,7 @@ bool ShaderTranslator::Translate(Shader* shader) {
} }
shader->is_valid_ = true; shader->is_valid_ = true;
shader->is_translated_ = true;
for (const auto& error : shader->errors_) { for (const auto& error : shader->errors_) {
if (error.is_fatal) { if (error.is_fatal) {
shader->is_valid_ = false; shader->is_valid_ = false;
@ -369,9 +383,9 @@ bool ShaderTranslator::TranslateBlocks() {
AddControlFlowTargetLabel(cf_a, &label_addresses); AddControlFlowTargetLabel(cf_a, &label_addresses);
AddControlFlowTargetLabel(cf_b, &label_addresses); AddControlFlowTargetLabel(cf_b, &label_addresses);
PreProcessControlFlowInstruction(cf_index); PreProcessControlFlowInstruction(cf_index, cf_a);
++cf_index; ++cf_index;
PreProcessControlFlowInstruction(cf_index); PreProcessControlFlowInstruction(cf_index, cf_b);
++cf_index; ++cf_index;
} }
@ -672,11 +686,11 @@ void ShaderTranslator::TranslateExecInstructions(
static_cast<FetchOpcode>(ucode_dwords_[instr_offset * 3] & 0x1F); static_cast<FetchOpcode>(ucode_dwords_[instr_offset * 3] & 0x1F);
if (fetch_opcode == FetchOpcode::kVertexFetch) { if (fetch_opcode == FetchOpcode::kVertexFetch) {
auto& op = *reinterpret_cast<const VertexFetchInstruction*>( auto& op = *reinterpret_cast<const VertexFetchInstruction*>(
ucode_dwords_ + instr_offset * 3); ucode_dwords_ + instr_offset * 3);
TranslateVertexFetchInstruction(op); TranslateVertexFetchInstruction(op);
} else { } else {
auto& op = *reinterpret_cast<const TextureFetchInstruction*>( auto& op = *reinterpret_cast<const TextureFetchInstruction*>(
ucode_dwords_ + instr_offset * 3); ucode_dwords_ + instr_offset * 3);
TranslateTextureFetchInstruction(op); TranslateTextureFetchInstruction(op);
} }
} else { } else {
@ -986,16 +1000,19 @@ void ShaderTranslator::TranslateAluInstruction(const AluInstruction& op) {
return; return;
} }
ParsedAluInstruction instr;
if (op.has_vector_op()) { if (op.has_vector_op()) {
const auto& opcode_info = const auto& opcode_info =
alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())]; alu_vector_opcode_infos_[static_cast<int>(op.vector_opcode())];
ParseAluVectorInstruction(op, opcode_info); ParseAluVectorInstruction(op, opcode_info, instr);
ProcessAluInstruction(instr);
} }
if (op.has_scalar_op()) { if (op.has_scalar_op()) {
const auto& opcode_info = const auto& opcode_info =
alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())]; alu_scalar_opcode_infos_[static_cast<int>(op.scalar_opcode())];
ParseAluScalarInstruction(op, opcode_info); ParseAluScalarInstruction(op, opcode_info, instr);
ProcessAluInstruction(instr);
} }
} }
@ -1044,9 +1061,8 @@ void ParseAluInstructionOperand(const AluInstruction& op, int i,
uint32_t a = swizzle & 0x3; uint32_t a = swizzle & 0x3;
out_op->components[0] = GetSwizzleFromComponentIndex(a); out_op->components[0] = GetSwizzleFromComponentIndex(a);
} else if (swizzle_component_count == 2) { } else if (swizzle_component_count == 2) {
swizzle >>= 4; uint32_t a = ((swizzle >> 6) + 3) & 0x3;
uint32_t a = ((swizzle >> 2) + 3) & 0x3; uint32_t b = ((swizzle >> 0) + 0) & 0x3;
uint32_t b = (swizzle + 2) & 0x3;
out_op->components[0] = GetSwizzleFromComponentIndex(a); out_op->components[0] = GetSwizzleFromComponentIndex(a);
out_op->components[1] = GetSwizzleFromComponentIndex(b); out_op->components[1] = GetSwizzleFromComponentIndex(b);
} else { } else {
@ -1088,8 +1104,8 @@ void ParseAluInstructionOperandSpecial(const AluInstruction& op,
} }
void ShaderTranslator::ParseAluVectorInstruction( void ShaderTranslator::ParseAluVectorInstruction(
const AluInstruction& op, const AluOpcodeInfo& opcode_info) { const AluInstruction& op, const AluOpcodeInfo& opcode_info,
ParsedAluInstruction i; ParsedAluInstruction& i) {
i.dword_index = 0; i.dword_index = 0;
i.type = ParsedAluInstruction::Type::kVector; i.type = ParsedAluInstruction::Type::kVector;
i.vector_opcode = op.vector_opcode(); i.vector_opcode = op.vector_opcode();
@ -1126,6 +1142,10 @@ void ShaderTranslator::ParseAluVectorInstruction(
} else { } else {
// Unimplemented. // Unimplemented.
// assert_always(); // assert_always();
XELOGE(
"ShaderTranslator::ParseAluVectorInstruction: Unsupported write "
"to export %d",
dest_num);
i.result.storage_target = InstructionStorageTarget::kNone; i.result.storage_target = InstructionStorageTarget::kNone;
i.result.storage_index = 0; i.result.storage_index = 0;
} }
@ -1203,13 +1223,11 @@ void ShaderTranslator::ParseAluVectorInstruction(
} }
i.Disassemble(&ucode_disasm_buffer_); i.Disassemble(&ucode_disasm_buffer_);
ProcessAluInstruction(i);
} }
void ShaderTranslator::ParseAluScalarInstruction( void ShaderTranslator::ParseAluScalarInstruction(
const AluInstruction& op, const AluOpcodeInfo& opcode_info) { const AluInstruction& op, const AluOpcodeInfo& opcode_info,
ParsedAluInstruction i; ParsedAluInstruction& i) {
i.dword_index = 0; i.dword_index = 0;
i.type = ParsedAluInstruction::Type::kScalar; i.type = ParsedAluInstruction::Type::kScalar;
i.scalar_opcode = op.scalar_opcode(); i.scalar_opcode = op.scalar_opcode();
@ -1319,8 +1337,6 @@ void ShaderTranslator::ParseAluScalarInstruction(
} }
i.Disassemble(&ucode_disasm_buffer_); i.Disassemble(&ucode_disasm_buffer_);
ProcessAluInstruction(i);
} }
} // namespace gpu } // namespace gpu

View File

@ -30,6 +30,7 @@ class ShaderTranslator {
// DEPRECATED(benvanik): remove this when shader cache is removed. // DEPRECATED(benvanik): remove this when shader cache is removed.
bool GatherAllBindingInformation(Shader* shader); bool GatherAllBindingInformation(Shader* shader);
bool Translate(Shader* shader, xenos::xe_gpu_program_cntl_t cntl);
bool Translate(Shader* shader); bool Translate(Shader* shader);
protected: protected:
@ -38,6 +39,8 @@ class ShaderTranslator {
// Resets translator state before beginning translation. // Resets translator state before beginning translation.
virtual void Reset(); virtual void Reset();
// Register count.
uint32_t register_count() const { return register_count_; }
// True if the current shader is a vertex shader. // True if the current shader is a vertex shader.
bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
// True if the current shader is a pixel shader. // True if the current shader is a pixel shader.
@ -79,7 +82,8 @@ class ShaderTranslator {
} }
// Pre-process a control-flow instruction before anything else. // Pre-process a control-flow instruction before anything else.
virtual void PreProcessControlFlowInstruction(uint32_t cf_index) {} virtual void PreProcessControlFlowInstruction(
uint32_t cf_index, const ucode::ControlFlowInstruction& instr) {}
// Handles translation for control flow label addresses. // Handles translation for control flow label addresses.
// This is triggered once for each label required (due to control flow // This is triggered once for each label required (due to control flow
@ -131,6 +135,8 @@ class ShaderTranslator {
int src_swizzle_component_count; int src_swizzle_component_count;
}; };
bool TranslateInternal(Shader* shader);
void MarkUcodeInstruction(uint32_t dword_offset); void MarkUcodeInstruction(uint32_t dword_offset);
void AppendUcodeDisasm(char c); void AppendUcodeDisasm(char c);
void AppendUcodeDisasm(const char* value); void AppendUcodeDisasm(const char* value);
@ -173,14 +179,18 @@ class ShaderTranslator {
void TranslateAluInstruction(const ucode::AluInstruction& op); void TranslateAluInstruction(const ucode::AluInstruction& op);
void ParseAluVectorInstruction(const ucode::AluInstruction& op, void ParseAluVectorInstruction(const ucode::AluInstruction& op,
const AluOpcodeInfo& opcode_info); const AluOpcodeInfo& opcode_info,
ParsedAluInstruction& instr);
void ParseAluScalarInstruction(const ucode::AluInstruction& op, void ParseAluScalarInstruction(const ucode::AluInstruction& op,
const AluOpcodeInfo& opcode_info); const AluOpcodeInfo& opcode_info,
ParsedAluInstruction& instr);
// Input shader metadata and microcode. // Input shader metadata and microcode.
ShaderType shader_type_; ShaderType shader_type_;
const uint32_t* ucode_dwords_; const uint32_t* ucode_dwords_;
size_t ucode_dword_count_; size_t ucode_dword_count_;
xenos::xe_gpu_program_cntl_t program_cntl_;
uint32_t register_count_;
// Accumulated translation errors. // Accumulated translation errors.
std::vector<Shader::Error> errors_; std::vector<Shader::Error> errors_;

View File

@ -0,0 +1,36 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv/compiler.h"
namespace xe {
namespace gpu {
namespace spirv {
Compiler::Compiler() {}
void Compiler::AddPass(std::unique_ptr<CompilerPass> pass) {
compiler_passes_.push_back(std::move(pass));
}
bool Compiler::Compile(spv::Module* module) {
for (auto& pass : compiler_passes_) {
if (!pass->Run(module)) {
return false;
}
}
return true;
}
void Compiler::Reset() { compiler_passes_.clear(); }
} // namespace spirv
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,41 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_COMPILER_H_
#define XENIA_GPU_SPIRV_COMPILER_H_
#include "xenia/base/arena.h"
#include "xenia/gpu/spirv/compiler_pass.h"
#include "third_party/glslang-spirv/SpvBuilder.h"
#include "third_party/spirv/GLSL.std.450.hpp11"
namespace xe {
namespace gpu {
namespace spirv {
// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the
// drivers.
class Compiler {
public:
Compiler();
void AddPass(std::unique_ptr<CompilerPass> pass);
void Reset();
bool Compile(spv::Module* module);
private:
std::vector<std::unique_ptr<CompilerPass>> compiler_passes_;
};
} // namespace spirv
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SPIRV_COMPILER_H_

View File

@ -0,0 +1,37 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_
#define XENIA_GPU_SPIRV_COMPILER_PASS_H_
#include "xenia/base/arena.h"
#include "third_party/glslang-spirv/SpvBuilder.h"
#include "third_party/spirv/GLSL.std.450.hpp11"
namespace xe {
namespace gpu {
namespace spirv {
class CompilerPass {
public:
CompilerPass() = default;
virtual ~CompilerPass() {}
virtual bool Run(spv::Module* module) = 0;
private:
xe::Arena ir_arena_;
};
} // namespace spirv
} // namespace gpu
} // namespace xe
#endif

View File

@ -0,0 +1,30 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h"
namespace xe {
namespace gpu {
namespace spirv {
ControlFlowAnalysisPass::ControlFlowAnalysisPass() {}
bool ControlFlowAnalysisPass::Run(spv::Module* module) {
for (auto function : module->getFunctions()) {
// For each OpBranchConditional, see if we can find a point where control
// flow converges and then append an OpSelectionMerge.
// Potential problems: while loops constructed from branch instructions
}
return true;
}
} // namespace spirv
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,34 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
#include "xenia/gpu/spirv/compiler_pass.h"
namespace xe {
namespace gpu {
namespace spirv {
// Control-flow analysis pass. Runs through control-flow and adds merge opcodes
// where necessary.
class ControlFlowAnalysisPass : public CompilerPass {
public:
ControlFlowAnalysisPass();
bool Run(spv::Module* module) override;
private:
};
} // namespace spirv
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_

View File

@ -0,0 +1,48 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h"
namespace xe {
namespace gpu {
namespace spirv {
ControlFlowSimplificationPass::ControlFlowSimplificationPass() {}
bool ControlFlowSimplificationPass::Run(spv::Module* module) {
for (auto function : module->getFunctions()) {
// Walk through the blocks in the function and merge any blocks which are
// unconditionally dominated.
for (auto it = function->getBlocks().end() - 1;
it != function->getBlocks().begin() - 1;) {
auto block = *it;
if (!block->isUnreachable() && block->getPredecessors().size() == 1) {
auto prev_block = block->getPredecessors()[0];
auto last_instr =
prev_block->getInstruction(prev_block->getInstructionCount() - 1);
if (last_instr->getOpCode() == spv::Op::OpBranch) {
if (prev_block->getSuccessors().size() == 1 &&
prev_block->getSuccessors()[0] == block) {
// We're dominated by this block. Merge into it.
prev_block->merge(block);
block->setUnreachable();
}
}
}
--it;
}
}
return true;
}
} // namespace spirv
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,34 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
#include "xenia/gpu/spirv/compiler_pass.h"
namespace xe {
namespace gpu {
namespace spirv {
// Control-flow simplification pass. Combines adjacent blocks and marks
// any unreachable blocks.
class ControlFlowSimplificationPass : public CompilerPass {
public:
ControlFlowSimplificationPass();
bool Run(spv::Module* module) override;
private:
};
} // namespace spirv
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
****************************************************************************** ******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. * * Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. * * Released under the BSD license - see LICENSE in the root for more details. *
****************************************************************************** ******************************************************************************
*/ */
@ -17,7 +17,9 @@
#include "third_party/glslang-spirv/SpvBuilder.h" #include "third_party/glslang-spirv/SpvBuilder.h"
#include "third_party/spirv/GLSL.std.450.hpp11" #include "third_party/spirv/GLSL.std.450.hpp11"
#include "xenia/gpu/shader_translator.h" #include "xenia/gpu/shader_translator.h"
#include "xenia/gpu/spirv/compiler.h"
#include "xenia/ui/spirv/spirv_disassembler.h" #include "xenia/ui/spirv/spirv_disassembler.h"
#include "xenia/ui/spirv/spirv_validator.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -54,7 +56,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
std::vector<uint8_t> CompleteTranslation() override; std::vector<uint8_t> CompleteTranslation() override;
void PostTranslation(Shader* shader) override; void PostTranslation(Shader* shader) override;
void PreProcessControlFlowInstruction(uint32_t cf_index) override; void PreProcessControlFlowInstruction(
uint32_t cf_index, const ucode::ControlFlowInstruction& instr) override;
void ProcessLabel(uint32_t cf_index) override; void ProcessLabel(uint32_t cf_index) override;
void ProcessControlFlowInstructionBegin(uint32_t cf_index) override; void ProcessControlFlowInstructionBegin(uint32_t cf_index) override;
void ProcessControlFlowInstructionEnd(uint32_t cf_index) override; void ProcessControlFlowInstructionEnd(uint32_t cf_index) override;
@ -91,10 +94,16 @@ class SpirvShaderTranslator : public ShaderTranslator {
// Stores a value based on the specified result information. // Stores a value based on the specified result information.
// The value will be transformed into the appropriate form for the result and // The value will be transformed into the appropriate form for the result and
// the proper components will be selected. // the proper components will be selected.
void StoreToResult(spv::Id source_value_id, const InstructionResult& result, void StoreToResult(spv::Id source_value_id, const InstructionResult& result);
spv::Id predicate_cond = 0);
xe::ui::spirv::SpirvDisassembler disassembler_; xe::ui::spirv::SpirvDisassembler disassembler_;
xe::ui::spirv::SpirvValidator validator_;
xe::gpu::spirv::Compiler compiler_;
// True if there's an open predicated block
bool open_predicated_block_ = false;
bool predicated_block_cond_ = false;
spv::Block* predicated_block_end_ = nullptr;
// TODO(benvanik): replace with something better, make reusable, etc. // TODO(benvanik): replace with something better, make reusable, etc.
std::unique_ptr<spv::Builder> builder_; std::unique_ptr<spv::Builder> builder_;
@ -104,11 +113,10 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Function* translated_main_ = 0; spv::Function* translated_main_ = 0;
// Types. // Types.
spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0; spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0;
spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0; spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0;
spv::Id vec4_uint_type_ = 0; spv::Id vec4_uint_type_ = 0;
spv::Id vec4_bool_type_ = 0; spv::Id vec4_bool_type_ = 0;
spv::Id sampled_image_type_ = 0;
// Constants. // Constants.
spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0; spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0;
@ -121,13 +129,19 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id pos_ = 0; spv::Id pos_ = 0;
spv::Id push_consts_ = 0; spv::Id push_consts_ = 0;
spv::Id interpolators_ = 0; spv::Id interpolators_ = 0;
spv::Id frag_outputs_ = 0; spv::Id vertex_id_ = 0;
spv::Id frag_outputs_ = 0, frag_depth_ = 0;
spv::Id samplers_ = 0; spv::Id samplers_ = 0;
spv::Id img_[4] = {0}; // Images {1D, 2D, 3D, Cube} spv::Id tex_[4] = {0}; // Images {1D, 2D, 3D, Cube}
// Map of {binding -> {offset -> spv input}} // Map of {binding -> {offset -> spv input}}
std::map<uint32_t, std::map<uint32_t, spv::Id>> vertex_binding_map_; std::map<uint32_t, std::map<uint32_t, spv::Id>> vertex_binding_map_;
std::map<uint32_t, spv::Block*> cf_blocks_;
struct CFBlock {
spv::Block* block = nullptr;
bool prev_dominates = true;
};
std::map<uint32_t, CFBlock> cf_blocks_;
}; };
} // namespace gpu } // namespace gpu

View File

@ -88,6 +88,66 @@ enum class TextureFormat : uint32_t {
kUnknown = 0xFFFFFFFFu, kUnknown = 0xFFFFFFFFu,
}; };
inline size_t GetTexelSize(TextureFormat format) {
switch (format) {
case TextureFormat::k_1_5_5_5:
return 2;
break;
case TextureFormat::k_2_10_10_10:
return 4;
break;
case TextureFormat::k_4_4_4_4:
return 2;
break;
case TextureFormat::k_5_6_5:
return 2;
break;
case TextureFormat::k_8:
return 1;
break;
case TextureFormat::k_8_8:
return 2;
break;
case TextureFormat::k_8_8_8_8:
return 4;
break;
case TextureFormat::k_16:
return 4;
break;
case TextureFormat::k_16_FLOAT:
return 4;
break;
case TextureFormat::k_16_16:
return 4;
break;
case TextureFormat::k_16_16_FLOAT:
return 4;
break;
case TextureFormat::k_16_16_16_16:
return 8;
break;
case TextureFormat::k_16_16_16_16_FLOAT:
return 8;
break;
case TextureFormat::k_32_FLOAT:
return 4;
break;
case TextureFormat::k_32_32_FLOAT:
return 8;
break;
case TextureFormat::k_32_32_32_32_FLOAT:
return 16;
break;
case TextureFormat::k_10_11_11:
case TextureFormat::k_11_11_10:
return 4;
break;
default:
assert_unhandled_case(format);
return 0;
}
}
inline TextureFormat ColorFormatToTextureFormat(ColorFormat color_format) { inline TextureFormat ColorFormatToTextureFormat(ColorFormat color_format) {
return static_cast<TextureFormat>(color_format); return static_cast<TextureFormat>(color_format);
} }

View File

@ -51,7 +51,7 @@ void TracePlayer::SeekFrame(int target_frame) {
assert_true(frame->start_ptr <= frame->end_ptr); assert_true(frame->start_ptr <= frame->end_ptr);
PlayTrace(frame->start_ptr, frame->end_ptr - frame->start_ptr, PlayTrace(frame->start_ptr, frame->end_ptr - frame->start_ptr,
TracePlaybackMode::kBreakOnSwap); TracePlaybackMode::kBreakOnSwap, false);
} }
void TracePlayer::SeekCommand(int target_command) { void TracePlayer::SeekCommand(int target_command) {
@ -71,11 +71,11 @@ void TracePlayer::SeekCommand(int target_command) {
const auto& previous_command = frame->commands[previous_command_index]; const auto& previous_command = frame->commands[previous_command_index];
PlayTrace(previous_command.end_ptr, PlayTrace(previous_command.end_ptr,
command.end_ptr - previous_command.end_ptr, command.end_ptr - previous_command.end_ptr,
TracePlaybackMode::kBreakOnSwap); TracePlaybackMode::kBreakOnSwap, false);
} else { } else {
// Full playback from frame start. // Full playback from frame start.
PlayTrace(frame->start_ptr, command.end_ptr - frame->start_ptr, PlayTrace(frame->start_ptr, command.end_ptr - frame->start_ptr,
TracePlaybackMode::kBreakOnSwap); TracePlaybackMode::kBreakOnSwap, true);
} }
} }
@ -84,19 +84,25 @@ void TracePlayer::WaitOnPlayback() {
} }
void TracePlayer::PlayTrace(const uint8_t* trace_data, size_t trace_size, void TracePlayer::PlayTrace(const uint8_t* trace_data, size_t trace_size,
TracePlaybackMode playback_mode) { TracePlaybackMode playback_mode,
graphics_system_->command_processor()->CallInThread( bool clear_caches) {
[this, trace_data, trace_size, playback_mode]() { playing_trace_ = true;
PlayTraceOnThread(trace_data, trace_size, playback_mode); graphics_system_->command_processor()->CallInThread([=]() {
}); PlayTraceOnThread(trace_data, trace_size, playback_mode, clear_caches);
});
} }
void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data,
size_t trace_size, size_t trace_size,
TracePlaybackMode playback_mode) { TracePlaybackMode playback_mode,
bool clear_caches) {
auto memory = graphics_system_->memory(); auto memory = graphics_system_->memory();
auto command_processor = graphics_system_->command_processor(); auto command_processor = graphics_system_->command_processor();
if (clear_caches) {
command_processor->ClearCaches();
}
command_processor->set_swap_mode(SwapMode::kIgnored); command_processor->set_swap_mode(SwapMode::kIgnored);
playback_percent_ = 0; playback_percent_ = 0;
auto trace_end = trace_data + trace_size; auto trace_end = trace_data + trace_size;

View File

@ -50,9 +50,9 @@ class TracePlayer : public TraceReader {
private: private:
void PlayTrace(const uint8_t* trace_data, size_t trace_size, void PlayTrace(const uint8_t* trace_data, size_t trace_size,
TracePlaybackMode playback_mode); TracePlaybackMode playback_mode, bool clear_caches);
void PlayTraceOnThread(const uint8_t* trace_data, size_t trace_size, void PlayTraceOnThread(const uint8_t* trace_data, size_t trace_size,
TracePlaybackMode playback_mode); TracePlaybackMode playback_mode, bool clear_caches);
xe::ui::Loop* loop_; xe::ui::Loop* loop_;
GraphicsSystem* graphics_system_; GraphicsSystem* graphics_system_;

View File

@ -75,6 +75,10 @@ void TraceReader::ParseTrace() {
const uint8_t* packet_start_ptr = nullptr; const uint8_t* packet_start_ptr = nullptr;
const uint8_t* last_ptr = trace_ptr; const uint8_t* last_ptr = trace_ptr;
bool pending_break = false; bool pending_break = false;
auto current_command_buffer = new CommandBuffer();
current_frame.command_tree =
std::unique_ptr<CommandBuffer>(current_command_buffer);
while (trace_ptr < trace_data_ + trace_size_) { while (trace_ptr < trace_data_ + trace_size_) {
++current_frame.command_count; ++current_frame.command_count;
auto type = static_cast<TraceCommandType>(xe::load<uint32_t>(trace_ptr)); auto type = static_cast<TraceCommandType>(xe::load<uint32_t>(trace_ptr));
@ -94,11 +98,29 @@ void TraceReader::ParseTrace() {
auto cmd = auto cmd =
reinterpret_cast<const IndirectBufferStartCommand*>(trace_ptr); reinterpret_cast<const IndirectBufferStartCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd) + cmd->count * 4; trace_ptr += sizeof(*cmd) + cmd->count * 4;
// Traverse down a level.
auto sub_command_buffer = new CommandBuffer();
sub_command_buffer->parent = current_command_buffer;
current_command_buffer->commands.push_back(
CommandBuffer::Command(sub_command_buffer));
current_command_buffer = sub_command_buffer;
break; break;
} }
case TraceCommandType::kIndirectBufferEnd: { case TraceCommandType::kIndirectBufferEnd: {
auto cmd = reinterpret_cast<const IndirectBufferEndCommand*>(trace_ptr); auto cmd = reinterpret_cast<const IndirectBufferEndCommand*>(trace_ptr);
trace_ptr += sizeof(*cmd); trace_ptr += sizeof(*cmd);
// IB packet is wrapped in a kPacketStart/kPacketEnd. Skip the end.
auto end_cmd = reinterpret_cast<const PacketEndCommand*>(trace_ptr);
assert_true(end_cmd->type == TraceCommandType::kPacketEnd);
trace_ptr += sizeof(*cmd);
// Go back up a level. If parent is null, this frame started in an
// indirect buffer.
if (current_command_buffer->parent) {
current_command_buffer = current_command_buffer->parent;
}
break; break;
} }
case TraceCommandType::kPacketStart: { case TraceCommandType::kPacketStart: {
@ -125,6 +147,8 @@ void TraceReader::ParseTrace() {
command.end_ptr = trace_ptr; command.end_ptr = trace_ptr;
current_frame.commands.push_back(std::move(command)); current_frame.commands.push_back(std::move(command));
last_ptr = trace_ptr; last_ptr = trace_ptr;
current_command_buffer->commands.push_back(CommandBuffer::Command(
uint32_t(current_frame.commands.size() - 1)));
break; break;
} }
case PacketCategory::kSwap: case PacketCategory::kSwap:
@ -136,6 +160,9 @@ void TraceReader::ParseTrace() {
if (pending_break) { if (pending_break) {
current_frame.end_ptr = trace_ptr; current_frame.end_ptr = trace_ptr;
frames_.push_back(std::move(current_frame)); frames_.push_back(std::move(current_frame));
current_command_buffer = new CommandBuffer();
current_frame.command_tree =
std::unique_ptr<CommandBuffer>(current_command_buffer);
current_frame.start_ptr = trace_ptr; current_frame.start_ptr = trace_ptr;
current_frame.end_ptr = nullptr; current_frame.end_ptr = nullptr;
current_frame.command_count = 0; current_frame.command_count = 0;

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_TRACE_READER_H_ #define XENIA_GPU_TRACE_READER_H_
#include <string> #include <string>
#include <vector>
#include "xenia/base/mapped_memory.h" #include "xenia/base/mapped_memory.h"
#include "xenia/gpu/trace_protocol.h" #include "xenia/gpu/trace_protocol.h"
@ -51,6 +52,42 @@ namespace gpu {
class TraceReader { class TraceReader {
public: public:
struct CommandBuffer {
struct Command {
enum class Type {
kCommand,
kBuffer,
};
Command() {}
Command(Command&& other) {
type = other.type;
command_id = other.command_id;
command_subtree = std::move(other.command_subtree);
}
Command(CommandBuffer* buf) {
type = Type::kBuffer;
command_subtree = std::unique_ptr<CommandBuffer>(buf);
}
Command(uint32_t id) {
type = Type::kCommand;
command_id = id;
}
~Command() = default;
Type type;
uint32_t command_id = -1;
std::unique_ptr<CommandBuffer> command_subtree = nullptr;
};
CommandBuffer() {}
~CommandBuffer() {}
// Parent command buffer, if one exists.
CommandBuffer* parent = nullptr;
std::vector<Command> commands;
};
struct Frame { struct Frame {
struct Command { struct Command {
enum class Type { enum class Type {
@ -74,7 +111,12 @@ class TraceReader {
const uint8_t* start_ptr = nullptr; const uint8_t* start_ptr = nullptr;
const uint8_t* end_ptr = nullptr; const uint8_t* end_ptr = nullptr;
int command_count = 0; int command_count = 0;
// Flat list of all commands in this frame.
std::vector<Command> commands; std::vector<Command> commands;
// Tree of all command buffers
std::unique_ptr<CommandBuffer> command_tree;
}; };
TraceReader() = default; TraceReader() = default;

View File

@ -390,6 +390,66 @@ void TraceViewer::DrawPacketDisassemblerUI() {
ImGui::End(); ImGui::End();
} }
int TraceViewer::RecursiveDrawCommandBufferUI(
const TraceReader::Frame* frame, TraceReader::CommandBuffer* buffer) {
int selected_id = -1;
int column_width = int(ImGui::GetContentRegionMax().x);
for (size_t i = 0; i < buffer->commands.size(); i++) {
switch (buffer->commands[i].type) {
case TraceReader::CommandBuffer::Command::Type::kBuffer: {
auto subtree = buffer->commands[i].command_subtree.get();
if (!subtree->commands.size()) {
continue;
}
ImGui::PushID(int(i));
if (ImGui::TreeNode((void*)0, "Indirect Buffer %d", i)) {
ImGui::Indent();
auto id = RecursiveDrawCommandBufferUI(
frame, buffer->commands[i].command_subtree.get());
ImGui::Unindent();
ImGui::TreePop();
if (id != -1) {
selected_id = id;
}
}
ImGui::PopID();
} break;
case TraceReader::CommandBuffer::Command::Type::kCommand: {
uint32_t command_id = buffer->commands[i].command_id;
const auto& command = frame->commands[command_id];
bool is_selected = command_id == player_->current_command_index();
const char* label;
switch (command.type) {
case TraceReader::Frame::Command::Type::kDraw:
label = "Draw";
break;
case TraceReader::Frame::Command::Type::kSwap:
label = "Swap";
break;
}
ImGui::PushID(command_id);
if (ImGui::Selectable(label, &is_selected)) {
selected_id = command_id;
}
ImGui::SameLine(column_width - 60.0f);
ImGui::Text("%d", command_id);
ImGui::PopID();
// if (did_seek && target_command == i) {
// ImGui::SetScrollPosHere();
// }
} break;
}
}
return selected_id;
}
void TraceViewer::DrawCommandListUI() { void TraceViewer::DrawCommandListUI() {
ImGui::SetNextWindowPos(ImVec2(5, 70), ImGuiSetCond_FirstUseEver); ImGui::SetNextWindowPos(ImVec2(5, 70), ImGuiSetCond_FirstUseEver);
if (!ImGui::Begin("Command List", nullptr, ImVec2(200, 640))) { if (!ImGui::Begin("Command List", nullptr, ImVec2(200, 640))) {
@ -473,31 +533,12 @@ void TraceViewer::DrawCommandListUI() {
ImGui::SetScrollPosHere(); ImGui::SetScrollPosHere();
} }
for (int i = 0; i < int(frame->commands.size()); ++i) { auto id = RecursiveDrawCommandBufferUI(frame, frame->command_tree.get());
ImGui::PushID(i); if (id != -1 && id != player_->current_command_index() &&
is_selected = i == player_->current_command_index(); !player_->is_playing_trace()) {
const auto& command = frame->commands[i]; player_->SeekCommand(id);
const char* label;
switch (command.type) {
case TraceReader::Frame::Command::Type::kDraw:
label = "Draw";
break;
case TraceReader::Frame::Command::Type::kSwap:
label = "Swap";
break;
}
if (ImGui::Selectable(label, &is_selected)) {
if (!player_->is_playing_trace()) {
player_->SeekCommand(i);
}
}
ImGui::SameLine(column_width - 60.0f);
ImGui::Text("%d", i);
ImGui::PopID();
if (did_seek && target_command == i) {
ImGui::SetScrollPosHere();
}
} }
ImGui::EndChild(); ImGui::EndChild();
ImGui::End(); ImGui::End();
} }
@ -639,8 +680,8 @@ void TraceViewer::DrawTextureInfo(
ImGui::Columns(2); ImGui::Columns(2);
ImVec2 button_size(256, 256); ImVec2 button_size(256, 256);
if (ImGui::ImageButton(ImTextureID(texture | ui::ImGuiDrawer::kIgnoreAlpha), if (ImGui::ImageButton(ImTextureID(texture), button_size, ImVec2(0, 0),
button_size, ImVec2(0, 0), ImVec2(1, 1))) { ImVec2(1, 1))) {
// show viewer // show viewer
} }
ImGui::NextColumn(); ImGui::NextColumn();
@ -1108,11 +1149,14 @@ void TraceViewer::DrawStateUI() {
((window_scissor_br >> 16) & 0x7FFF) - ((window_scissor_br >> 16) & 0x7FFF) -
((window_scissor_tl >> 16) & 0x7FFF)); ((window_scissor_tl >> 16) & 0x7FFF));
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_actual = (surface_info >> 18) & 0x3FFF;
uint32_t surface_pitch = surface_info & 0x3FFF; uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = (surface_info >> 16) & 0x3; auto surface_msaa = (surface_info >> 16) & 0x3;
static const char* kMsaaNames[] = { static const char* kMsaaNames[] = {
"1X", "2X", "4X", "1X", "2X", "4X",
}; };
ImGui::BulletText("Surface Pitch - Actual: %d - %d", surface_pitch,
surface_actual);
ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]); ImGui::BulletText("Surface MSAA: %s", kMsaaNames[surface_msaa]);
uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; bool vport_xscale_enable = (vte_control & (1 << 0)) > 0;
@ -1124,6 +1168,9 @@ void TraceViewer::DrawStateUI() {
assert_true(vport_xscale_enable == vport_yscale_enable == assert_true(vport_xscale_enable == vport_yscale_enable ==
vport_zscale_enable == vport_xoffset_enable == vport_zscale_enable == vport_xoffset_enable ==
vport_yoffset_enable == vport_zoffset_enable); vport_yoffset_enable == vport_zoffset_enable);
if (!vport_xscale_enable) {
ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored);
}
ImGui::BulletText( ImGui::BulletText(
"Viewport Offset: %f, %f, %f", "Viewport Offset: %f, %f, %f",
vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0, vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0,
@ -1134,6 +1181,10 @@ void TraceViewer::DrawStateUI() {
vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1, vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1,
vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1, vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1,
vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1); vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1);
if (!vport_xscale_enable) {
ImGui::PopStyleColor();
}
ImGui::BulletText("Vertex Format: %s, %s, %s, %s", ImGui::BulletText("Vertex Format: %s, %s, %s, %s",
((vte_control >> 8) & 0x1) ? "x/w0" : "x", ((vte_control >> 8) & 0x1) ? "x/w0" : "x",
((vte_control >> 8) & 0x1) ? "y/w0" : "y", ((vte_control >> 8) & 0x1) ? "y/w0" : "y",
@ -1318,7 +1369,7 @@ void TraceViewer::DrawStateUI() {
if (write_mask) { if (write_mask) {
auto color_target = GetColorRenderTarget(surface_pitch, surface_msaa, auto color_target = GetColorRenderTarget(surface_pitch, surface_msaa,
color_base, color_format); color_base, color_format);
tex = ImTextureID(color_target | ui::ImGuiDrawer::kIgnoreAlpha); tex = ImTextureID(color_target);
if (ImGui::ImageButton(tex, button_size, ImVec2(0, 0), if (ImGui::ImageButton(tex, button_size, ImVec2(0, 0),
ImVec2(1, 1))) { ImVec2(1, 1))) {
// show viewer // show viewer
@ -1330,10 +1381,9 @@ void TraceViewer::DrawStateUI() {
} }
if (ImGui::IsItemHovered()) { if (ImGui::IsItemHovered()) {
ImGui::BeginTooltip(); ImGui::BeginTooltip();
ImGui::Text( ImGui::Text("Color Target %d (%s), base %.4X, pitch %d, format %d", i,
"Color Target %d (%s), base %.4X, pitch %d, msaa %d, format %d", write_mask ? "enabled" : "disabled", color_base,
i, write_mask ? "enabled" : "disabled", color_base, surface_pitch, surface_pitch, color_format);
surface_msaa, color_format);
if (tex) { if (tex) {
ImVec2 rel_pos; ImVec2 rel_pos;
@ -1407,17 +1457,19 @@ void TraceViewer::DrawStateUI() {
auto button_pos = ImGui::GetCursorScreenPos(); auto button_pos = ImGui::GetCursorScreenPos();
ImVec2 button_size(256, 256); ImVec2 button_size(256, 256);
ImGui::ImageButton( ImGui::ImageButton(ImTextureID(depth_target), button_size, ImVec2(0, 0),
ImTextureID(depth_target | ui::ImGuiDrawer::kIgnoreAlpha), ImVec2(1, 1));
button_size, ImVec2(0, 0), ImVec2(1, 1));
if (ImGui::IsItemHovered()) { if (ImGui::IsItemHovered()) {
ImGui::BeginTooltip(); ImGui::BeginTooltip();
ImGui::Text("Depth Target: base %.4X, pitch %d, format %d", depth_base,
surface_pitch, depth_format);
ImVec2 rel_pos; ImVec2 rel_pos;
rel_pos.x = ImGui::GetMousePos().x - button_pos.x; rel_pos.x = ImGui::GetMousePos().x - button_pos.x;
rel_pos.y = ImGui::GetMousePos().y - button_pos.y; rel_pos.y = ImGui::GetMousePos().y - button_pos.y;
ZoomedImage(ImTextureID(depth_target | ui::ImGuiDrawer::kIgnoreAlpha), ZoomedImage(ImTextureID(depth_target), rel_pos, button_size, 32.f,
rel_pos, button_size, 32.f, ImVec2(256, 256)); ImVec2(256, 256));
ImGui::EndTooltip(); ImGui::EndTooltip();
} }

View File

@ -80,6 +80,8 @@ class TraceViewer {
void DrawUI(); void DrawUI();
void DrawControllerUI(); void DrawControllerUI();
void DrawPacketDisassemblerUI(); void DrawPacketDisassemblerUI();
int RecursiveDrawCommandBufferUI(const TraceReader::Frame* frame,
TraceReader::CommandBuffer* buffer);
void DrawCommandListUI(); void DrawCommandListUI();
void DrawStateUI(); void DrawStateUI();

View File

@ -22,98 +22,19 @@ namespace vulkan {
using xe::ui::vulkan::CheckResult; using xe::ui::vulkan::CheckResult;
// Space kept between tail and head when wrapping.
constexpr VkDeviceSize kDeadZone = 4 * 1024;
constexpr VkDeviceSize kConstantRegisterUniformRange = constexpr VkDeviceSize kConstantRegisterUniformRange =
512 * 4 * 4 + 8 * 4 + 32 * 4; 512 * 4 * 4 + 8 * 4 + 32 * 4;
BufferCache::BufferCache(RegisterFile* register_file, BufferCache::BufferCache(RegisterFile* register_file,
ui::vulkan::VulkanDevice* device, size_t capacity) ui::vulkan::VulkanDevice* device, size_t capacity)
: register_file_(register_file), : register_file_(register_file), device_(*device) {
device_(*device), transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>(device);
transient_capacity_(capacity) { if (!transient_buffer_->Initialize(capacity,
// Uniform buffer. VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VkBufferCreateInfo uniform_buffer_info; VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; VK_BUFFER_USAGE_VERTEX_BUFFER_BIT)) {
uniform_buffer_info.pNext = nullptr; assert_always();
uniform_buffer_info.flags = 0; }
uniform_buffer_info.size = transient_capacity_;
uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
uniform_buffer_info.queueFamilyIndexCount = 0;
uniform_buffer_info.pQueueFamilyIndices = nullptr;
auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr,
&transient_uniform_buffer_);
CheckResult(err, "vkCreateBuffer");
// Index buffer.
VkBufferCreateInfo index_buffer_info;
index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
index_buffer_info.pNext = nullptr;
index_buffer_info.flags = 0;
index_buffer_info.size = transient_capacity_;
index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
index_buffer_info.queueFamilyIndexCount = 0;
index_buffer_info.pQueueFamilyIndices = nullptr;
err = vkCreateBuffer(device_, &index_buffer_info, nullptr,
&transient_index_buffer_);
CheckResult(err, "vkCreateBuffer");
// Vertex buffer.
VkBufferCreateInfo vertex_buffer_info;
vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
vertex_buffer_info.pNext = nullptr;
vertex_buffer_info.flags = 0;
vertex_buffer_info.size = transient_capacity_;
vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
vertex_buffer_info.queueFamilyIndexCount = 0;
vertex_buffer_info.pQueueFamilyIndices = nullptr;
err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr,
&transient_vertex_buffer_);
CheckResult(err, "vkCreateBuffer");
// Allocate the underlying buffer we use for all storage.
// We query all types and take the max alignment.
VkMemoryRequirements uniform_buffer_requirements;
VkMemoryRequirements index_buffer_requirements;
VkMemoryRequirements vertex_buffer_requirements;
vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_,
&uniform_buffer_requirements);
vkGetBufferMemoryRequirements(device_, transient_index_buffer_,
&index_buffer_requirements);
vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_,
&vertex_buffer_requirements);
uniform_buffer_alignment_ = uniform_buffer_requirements.alignment;
index_buffer_alignment_ = index_buffer_requirements.alignment;
vertex_buffer_alignment_ = vertex_buffer_requirements.alignment;
VkMemoryRequirements buffer_requirements;
buffer_requirements.size = transient_capacity_;
buffer_requirements.alignment =
std::max(uniform_buffer_requirements.alignment,
std::max(index_buffer_requirements.alignment,
vertex_buffer_requirements.alignment));
buffer_requirements.memoryTypeBits =
uniform_buffer_requirements.memoryTypeBits |
index_buffer_requirements.memoryTypeBits |
vertex_buffer_requirements.memoryTypeBits;
transient_buffer_memory_ = device->AllocateMemory(
buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
// Alias all buffers to our memory.
vkBindBufferMemory(device_, transient_uniform_buffer_,
transient_buffer_memory_, 0);
vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_,
0);
vkBindBufferMemory(device_, transient_vertex_buffer_,
transient_buffer_memory_, 0);
// Map memory and keep it mapped while we use it.
err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0,
&transient_buffer_data_);
CheckResult(err, "vkMapMemory");
// Descriptor pool used for all of our cached descriptors. // Descriptor pool used for all of our cached descriptors.
// In the steady state we don't allocate anything, so these are all manually // In the steady state we don't allocate anything, so these are all manually
@ -129,8 +50,8 @@ BufferCache::BufferCache(RegisterFile* register_file,
pool_sizes[0].descriptorCount = 2; pool_sizes[0].descriptorCount = 2;
descriptor_pool_info.poolSizeCount = 1; descriptor_pool_info.poolSizeCount = 1;
descriptor_pool_info.pPoolSizes = pool_sizes; descriptor_pool_info.pPoolSizes = pool_sizes;
err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr, auto err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr,
&descriptor_pool_); &descriptor_pool_);
CheckResult(err, "vkCreateDescriptorPool"); CheckResult(err, "vkCreateDescriptorPool");
// Create the descriptor set layout used for our uniform buffer. // Create the descriptor set layout used for our uniform buffer.
@ -180,7 +101,7 @@ BufferCache::BufferCache(RegisterFile* register_file,
// Initialize descriptor set with our buffers. // Initialize descriptor set with our buffers.
VkDescriptorBufferInfo buffer_info; VkDescriptorBufferInfo buffer_info;
buffer_info.buffer = transient_uniform_buffer_; buffer_info.buffer = transient_buffer_->gpu_buffer();
buffer_info.offset = 0; buffer_info.offset = 0;
buffer_info.range = kConstantRegisterUniformRange; buffer_info.range = kConstantRegisterUniformRange;
VkWriteDescriptorSet descriptor_writes[2]; VkWriteDescriptorSet descriptor_writes[2];
@ -212,25 +133,20 @@ BufferCache::~BufferCache() {
&transient_descriptor_set_); &transient_descriptor_set_);
vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr); vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr);
vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr); vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr);
vkUnmapMemory(device_, transient_buffer_memory_); transient_buffer_->Shutdown();
vkFreeMemory(device_, transient_buffer_memory_, nullptr);
vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr);
vkDestroyBuffer(device_, transient_index_buffer_, nullptr);
vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr);
} }
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters( std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
const Shader::ConstantRegisterMap& vertex_constant_register_map, const Shader::ConstantRegisterMap& vertex_constant_register_map,
const Shader::ConstantRegisterMap& pixel_constant_register_map) { const Shader::ConstantRegisterMap& pixel_constant_register_map,
std::shared_ptr<ui::vulkan::Fence> fence) {
// Fat struct, including all registers: // Fat struct, including all registers:
// struct { // struct {
// vec4 float[512]; // vec4 float[512];
// uint bool[8]; // uint bool[8];
// uint loop[32]; // uint loop[32];
// }; // };
size_t total_size = auto offset = AllocateTransientData(kConstantRegisterUniformRange, fence);
xe::round_up(kConstantRegisterUniformRange, uniform_buffer_alignment_);
auto offset = AllocateTransientData(uniform_buffer_alignment_, total_size);
if (offset == VK_WHOLE_SIZE) { if (offset == VK_WHOLE_SIZE) {
// OOM. // OOM.
return {VK_WHOLE_SIZE, VK_WHOLE_SIZE}; return {VK_WHOLE_SIZE, VK_WHOLE_SIZE};
@ -238,8 +154,7 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
// Copy over all the registers. // Copy over all the registers.
const auto& values = register_file_->values; const auto& values = register_file_->values;
uint8_t* dest_ptr = uint8_t* dest_ptr = transient_buffer_->host_base() + offset;
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset;
std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32, std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
(512 * 4 * 4)); (512 * 4 * 4));
dest_ptr += 512 * 4 * 4; dest_ptr += 512 * 4 * 4;
@ -258,8 +173,8 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
// constant indexing. // constant indexing.
#if 0 #if 0
// Allocate space in the buffer for our data. // Allocate space in the buffer for our data.
auto offset = AllocateTransientData(uniform_buffer_alignment_, auto offset =
constant_register_map.packed_byte_length); AllocateTransientData(constant_register_map.packed_byte_length, fence);
if (offset == VK_WHOLE_SIZE) { if (offset == VK_WHOLE_SIZE) {
// OOM. // OOM.
return VK_WHOLE_SIZE; return VK_WHOLE_SIZE;
@ -304,11 +219,12 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
} }
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer( std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
const void* source_ptr, size_t source_length, IndexFormat format) { const void* source_ptr, size_t source_length, IndexFormat format,
std::shared_ptr<ui::vulkan::Fence> fence) {
// TODO(benvanik): check cache. // TODO(benvanik): check cache.
// Allocate space in the buffer for our data. // Allocate space in the buffer for our data.
auto offset = AllocateTransientData(index_buffer_alignment_, source_length); auto offset = AllocateTransientData(source_length, fence);
if (offset == VK_WHOLE_SIZE) { if (offset == VK_WHOLE_SIZE) {
// OOM. // OOM.
return {nullptr, VK_WHOLE_SIZE}; return {nullptr, VK_WHOLE_SIZE};
@ -319,25 +235,24 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
// TODO(benvanik): memcpy then use compute shaders to swap? // TODO(benvanik): memcpy then use compute shaders to swap?
if (format == IndexFormat::kInt16) { if (format == IndexFormat::kInt16) {
// Endian::k8in16, swap half-words. // Endian::k8in16, swap half-words.
xe::copy_and_swap_16_aligned( xe::copy_and_swap_16_aligned(transient_buffer_->host_base() + offset,
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset, source_ptr, source_ptr, source_length / 2);
source_length / 2);
} else if (format == IndexFormat::kInt32) { } else if (format == IndexFormat::kInt32) {
// Endian::k8in32, swap words. // Endian::k8in32, swap words.
xe::copy_and_swap_32_aligned( xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset, source_ptr, source_ptr, source_length / 4);
source_length / 4);
} }
return {transient_index_buffer_, offset}; return {transient_buffer_->gpu_buffer(), offset};
} }
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer( std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
const void* source_ptr, size_t source_length) { const void* source_ptr, size_t source_length, Endian endian,
std::shared_ptr<ui::vulkan::Fence> fence) {
// TODO(benvanik): check cache. // TODO(benvanik): check cache.
// Allocate space in the buffer for our data. // Allocate space in the buffer for our data.
auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length); auto offset = AllocateTransientData(source_length, fence);
if (offset == VK_WHOLE_SIZE) { if (offset == VK_WHOLE_SIZE) {
// OOM. // OOM.
return {nullptr, VK_WHOLE_SIZE}; return {nullptr, VK_WHOLE_SIZE};
@ -345,60 +260,38 @@ std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
// Copy data into the buffer. // Copy data into the buffer.
// TODO(benvanik): memcpy then use compute shaders to swap? // TODO(benvanik): memcpy then use compute shaders to swap?
// Endian::k8in32, swap words. assert_true(endian == Endian::k8in32);
xe::copy_and_swap_32_aligned( if (endian == Endian::k8in32) {
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset, source_ptr, // Endian::k8in32, swap words.
source_length / 4); xe::copy_and_swap_32_aligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 4);
}
return {transient_vertex_buffer_, offset}; return {transient_buffer_->gpu_buffer(), offset};
} }
VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize alignment, VkDeviceSize BufferCache::AllocateTransientData(
VkDeviceSize length) { VkDeviceSize length, std::shared_ptr<ui::vulkan::Fence> fence) {
// Try fast path (if we have space). // Try fast path (if we have space).
VkDeviceSize offset = TryAllocateTransientData(alignment, length); VkDeviceSize offset = TryAllocateTransientData(length, fence);
if (offset != VK_WHOLE_SIZE) { if (offset != VK_WHOLE_SIZE) {
return offset; return offset;
} }
// Ran out of easy allocations. // Ran out of easy allocations.
// Try consuming fences before we panic. // Try consuming fences before we panic.
assert_always("Reclamation not yet implemented"); transient_buffer_->Scavenge();
// Try again. It may still fail if we didn't get enough space back. // Try again. It may still fail if we didn't get enough space back.
return TryAllocateTransientData(alignment, length); offset = TryAllocateTransientData(length, fence);
return offset;
} }
VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize alignment, VkDeviceSize BufferCache::TryAllocateTransientData(
VkDeviceSize length) { VkDeviceSize length, std::shared_ptr<ui::vulkan::Fence> fence) {
if (transient_tail_offset_ >= transient_head_offset_) { auto alloc = transient_buffer_->Acquire(length, fence);
// Tail follows head, so things are easy: if (alloc) {
// | H----T | return alloc->offset;
if (xe::round_up(transient_tail_offset_, alignment) + length <=
transient_capacity_) {
// Allocation fits from tail to end of buffer, so grow.
// | H----**T |
VkDeviceSize offset = xe::round_up(transient_tail_offset_, alignment);
transient_tail_offset_ = offset + length;
return offset;
} else if (length + kDeadZone <= transient_head_offset_) {
// Can't fit at the end, but can fit if we wrap around.
// |**T H----....|
VkDeviceSize offset = 0;
transient_tail_offset_ = length;
return offset;
}
} else {
// Head follows tail, so we're reversed:
// |----T H---|
if (xe::round_up(transient_tail_offset_, alignment) + length + kDeadZone <=
transient_head_offset_) {
// Fits from tail to head.
// |----***T H---|
VkDeviceSize offset = xe::round_up(transient_tail_offset_, alignment);
transient_tail_offset_ = offset + length;
return offset;
}
} }
// No more space. // No more space.
@ -420,9 +313,9 @@ void BufferCache::Flush(VkCommandBuffer command_buffer) {
VkMappedMemoryRange dirty_range; VkMappedMemoryRange dirty_range;
dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
dirty_range.pNext = nullptr; dirty_range.pNext = nullptr;
dirty_range.memory = transient_buffer_memory_; dirty_range.memory = transient_buffer_->gpu_memory();
dirty_range.offset = 0; dirty_range.offset = 0;
dirty_range.size = transient_capacity_; dirty_range.size = transient_buffer_->capacity();
vkFlushMappedMemoryRanges(device_, 1, &dirty_range); vkFlushMappedMemoryRanges(device_, 1, &dirty_range);
} }
@ -434,6 +327,8 @@ void BufferCache::ClearCache() {
// TODO(benvanik): caching. // TODO(benvanik): caching.
} }
void BufferCache::Scavenge() { transient_buffer_->Scavenge(); }
} // namespace vulkan } // namespace vulkan
} // namespace gpu } // namespace gpu
} // namespace xe } // namespace xe

View File

@ -13,6 +13,7 @@
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader.h" #include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/circular_buffer.h"
#include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_device.h" #include "xenia/ui/vulkan/vulkan_device.h"
@ -50,22 +51,24 @@ class BufferCache {
// The returned offsets may alias. // The returned offsets may alias.
std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters( std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
const Shader::ConstantRegisterMap& vertex_constant_register_map, const Shader::ConstantRegisterMap& vertex_constant_register_map,
const Shader::ConstantRegisterMap& pixel_constant_register_map); const Shader::ConstantRegisterMap& pixel_constant_register_map,
std::shared_ptr<ui::vulkan::Fence> fence);
// Uploads index buffer data from guest memory, possibly eliding with // Uploads index buffer data from guest memory, possibly eliding with
// recently uploaded data or cached copies. // recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer. // Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr, std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(
size_t source_length, const void* source_ptr, size_t source_length, IndexFormat format,
IndexFormat format); std::shared_ptr<ui::vulkan::Fence> fence);
// Uploads vertex buffer data from guest memory, possibly eliding with // Uploads vertex buffer data from guest memory, possibly eliding with
// recently uploaded data or cached copies. // recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers. // Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM). // Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr, std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(
size_t source_length); const void* source_ptr, size_t source_length, Endian endian,
std::shared_ptr<ui::vulkan::Fence> fence);
// Flushes all pending data to the GPU. // Flushes all pending data to the GPU.
// Until this is called the GPU is not guaranteed to see any data. // Until this is called the GPU is not guaranteed to see any data.
@ -81,36 +84,26 @@ class BufferCache {
// Clears all cached content and prevents future elision with pending data. // Clears all cached content and prevents future elision with pending data.
void ClearCache(); void ClearCache();
// Wipes all data no longer needed.
void Scavenge();
private: private:
// Allocates a block of memory in the transient buffer. // Allocates a block of memory in the transient buffer.
// When memory is not available fences are checked and space is reclaimed. // When memory is not available fences are checked and space is reclaimed.
// Returns VK_WHOLE_SIZE if requested amount of memory is not available. // Returns VK_WHOLE_SIZE if requested amount of memory is not available.
VkDeviceSize AllocateTransientData(VkDeviceSize alignment, VkDeviceSize AllocateTransientData(VkDeviceSize length,
VkDeviceSize length); std::shared_ptr<ui::vulkan::Fence> fence);
// Tries to allocate a block of memory in the transient buffer. // Tries to allocate a block of memory in the transient buffer.
// Returns VK_WHOLE_SIZE if requested amount of memory is not available. // Returns VK_WHOLE_SIZE if requested amount of memory is not available.
VkDeviceSize TryAllocateTransientData(VkDeviceSize alignment, VkDeviceSize TryAllocateTransientData(
VkDeviceSize length); VkDeviceSize length, std::shared_ptr<ui::vulkan::Fence> fence);
RegisterFile* register_file_ = nullptr; RegisterFile* register_file_ = nullptr;
VkDevice device_ = nullptr; VkDevice device_ = nullptr;
// Staging ringbuffer we cycle through fast. Used for data we don't // Staging ringbuffer we cycle through fast. Used for data we don't
// plan on keeping past the current frame. // plan on keeping past the current frame.
size_t transient_capacity_ = 0; std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
VkBuffer transient_uniform_buffer_ = nullptr;
VkBuffer transient_index_buffer_ = nullptr;
VkBuffer transient_vertex_buffer_ = nullptr;
VkDeviceMemory transient_buffer_memory_ = nullptr;
void* transient_buffer_data_ = nullptr;
VkDeviceSize transient_head_offset_ = 0;
VkDeviceSize transient_tail_offset_ = 0;
// Required alignments for our various types.
// All allocations must start at the appropriate alignment.
VkDeviceSize uniform_buffer_alignment_ = 0;
VkDeviceSize index_buffer_alignment_ = 0;
VkDeviceSize vertex_buffer_alignment_ = 0;
VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorPool descriptor_pool_ = nullptr;
VkDescriptorSetLayout descriptor_set_layout_ = nullptr; VkDescriptorSetLayout descriptor_set_layout_ = nullptr;

View File

@ -17,6 +17,9 @@
#include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" #include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
#include <cinttypes>
#include <string>
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace vulkan { namespace vulkan {
@ -154,40 +157,19 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type,
host_address, dword_count); host_address, dword_count);
shader_map_.insert({data_hash, shader}); shader_map_.insert({data_hash, shader});
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_.Translate(shader)) {
XELOGE("Shader translation failed; marking shader as ignored");
return shader;
}
// Prepare the shader for use (creates our VkShaderModule).
// It could still fail at this point.
if (!shader->Prepare()) {
XELOGE("Shader preparation failed; marking shader as ignored");
return shader;
}
if (shader->is_valid()) {
XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s",
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
guest_address, dword_count * 4,
shader->ucode_disassembly().c_str());
}
// Dump shader files if desired.
if (!FLAGS_dump_shaders.empty()) {
shader->Dump(FLAGS_dump_shaders, "vk");
}
return shader; return shader;
} }
bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
const RenderState* render_state, VkCommandBuffer command_buffer, const RenderState* render_state,
VulkanShader* vertex_shader, VulkanShader* vertex_shader, VulkanShader* pixel_shader,
VulkanShader* pixel_shader, PrimitiveType primitive_type, VkPipeline* pipeline_out) {
PrimitiveType primitive_type) { #if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
assert_not_null(pipeline_out);
// Perform a pass over all registers and state updating our cached structures. // Perform a pass over all registers and state updating our cached structures.
// This will tell us if anything has changed that requires us to either build // This will tell us if anything has changed that requires us to either build
// a new pipeline or use an existing one. // a new pipeline or use an existing one.
@ -208,7 +190,7 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
// Error updating state - bail out. // Error updating state - bail out.
// We are in an indeterminate state, so reset things for the next attempt. // We are in an indeterminate state, so reset things for the next attempt.
current_pipeline_ = nullptr; current_pipeline_ = nullptr;
return false; return update_status;
} }
if (!pipeline) { if (!pipeline) {
// Should have a hash key produced by the UpdateState pass. // Should have a hash key produced by the UpdateState pass.
@ -217,24 +199,12 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
current_pipeline_ = pipeline; current_pipeline_ = pipeline;
if (!pipeline) { if (!pipeline) {
// Unable to create pipeline. // Unable to create pipeline.
return false; return UpdateStatus::kError;
} }
} }
// Bind the pipeline. *pipeline_out = pipeline;
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); return update_status;
// Issue all changed dynamic state information commands.
// TODO(benvanik): dynamic state is kept in the command buffer, so if we
// have issued it before (regardless of pipeline) we don't need to do it now.
// TODO(benvanik): track whether we have issued on the given command buffer.
bool full_dynamic_state = true;
if (!SetDynamicState(command_buffer, full_dynamic_state)) {
// Failed to update state.
return false;
}
return true;
} }
void PipelineCache::ClearCache() { void PipelineCache::ClearCache() {
@ -291,16 +261,140 @@ VkPipeline PipelineCache::GetPipeline(const RenderState* render_state,
pipeline_info.basePipelineHandle = nullptr; pipeline_info.basePipelineHandle = nullptr;
pipeline_info.basePipelineIndex = 0; pipeline_info.basePipelineIndex = 0;
VkPipeline pipeline = nullptr; VkPipeline pipeline = nullptr;
auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info, auto err = vkCreateGraphicsPipelines(device_, pipeline_cache_, 1,
nullptr, &pipeline); &pipeline_info, nullptr, &pipeline);
CheckResult(err, "vkCreateGraphicsPipelines"); CheckResult(err, "vkCreateGraphicsPipelines");
// Dump shader disassembly.
if (FLAGS_vulkan_dump_disasm) {
DumpShaderDisasmNV(pipeline_info);
}
// Add to cache with the hash key for reuse. // Add to cache with the hash key for reuse.
cached_pipelines_.insert({hash_key, pipeline}); cached_pipelines_.insert({hash_key, pipeline});
return pipeline; return pipeline;
} }
bool PipelineCache::TranslateShader(VulkanShader* shader,
xenos::xe_gpu_program_cntl_t cntl) {
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_.Translate(shader, cntl)) {
XELOGE("Shader translation failed; marking shader as ignored");
return false;
}
// Prepare the shader for use (creates our VkShaderModule).
// It could still fail at this point.
if (!shader->Prepare()) {
XELOGE("Shader preparation failed; marking shader as ignored");
return false;
}
if (shader->is_valid()) {
XELOGGPU("Generated %s shader (%db) - hash %.16" PRIX64 ":\n%s\n",
shader->type() == ShaderType::kVertex ? "vertex" : "pixel",
shader->ucode_dword_count() * 4, shader->ucode_data_hash(),
shader->ucode_disassembly().c_str());
}
// Dump shader files if desired.
if (!FLAGS_dump_shaders.empty()) {
shader->Dump(FLAGS_dump_shaders, "vk");
}
return shader->is_valid();
}
void PipelineCache::DumpShaderDisasmNV(
const VkGraphicsPipelineCreateInfo& pipeline_info) {
// !! HACK !!: This only works on NVidia drivers. Dumps shader disasm.
// This code is super ugly. Update this when NVidia includes an official
// way to dump shader disassembly.
VkPipelineCacheCreateInfo pipeline_cache_info;
VkPipelineCache dummy_pipeline_cache;
pipeline_cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
pipeline_cache_info.pNext = nullptr;
pipeline_cache_info.flags = 0;
pipeline_cache_info.initialDataSize = 0;
pipeline_cache_info.pInitialData = nullptr;
auto err = vkCreatePipelineCache(device_, &pipeline_cache_info, nullptr,
&dummy_pipeline_cache);
CheckResult(err, "vkCreatePipelineCache");
// Create a pipeline on the dummy cache and dump it.
VkPipeline dummy_pipeline;
err = vkCreateGraphicsPipelines(device_, dummy_pipeline_cache, 1,
&pipeline_info, nullptr, &dummy_pipeline);
std::vector<uint8_t> pipeline_data;
size_t data_size = 0;
err = vkGetPipelineCacheData(device_, dummy_pipeline_cache, &data_size,
nullptr);
if (err == VK_SUCCESS) {
pipeline_data.resize(data_size);
vkGetPipelineCacheData(device_, dummy_pipeline_cache, &data_size,
pipeline_data.data());
// Scan the data for the disassembly.
std::string disasm_vp, disasm_fp;
const char* disasm_start_vp = nullptr;
const char* disasm_start_fp = nullptr;
size_t search_offset = 0;
const char* search_start =
reinterpret_cast<const char*>(pipeline_data.data());
while (true) {
auto p = reinterpret_cast<const char*>(
memchr(pipeline_data.data() + search_offset, '!',
pipeline_data.size() - search_offset));
if (!p) {
break;
}
if (!strncmp(p, "!!NV", 4)) {
if (!strncmp(p + 4, "vp", 2)) {
disasm_start_vp = p;
} else if (!strncmp(p + 4, "fp", 2)) {
disasm_start_fp = p;
}
if (disasm_start_fp && disasm_start_vp) {
// Found all we needed.
break;
}
}
search_offset = p - search_start;
++search_offset;
}
if (disasm_start_vp) {
disasm_vp = std::string(disasm_start_vp);
// For some reason there's question marks all over the code.
disasm_vp.erase(std::remove(disasm_vp.begin(), disasm_vp.end(), '?'),
disasm_vp.end());
} else {
disasm_vp = std::string("Shader disassembly not available.");
}
if (disasm_start_fp) {
disasm_fp = std::string(disasm_start_fp);
// For some reason there's question marks all over the code.
disasm_fp.erase(std::remove(disasm_fp.begin(), disasm_fp.end(), '?'),
disasm_fp.end());
} else {
disasm_fp = std::string("Shader disassembly not available.");
}
XELOGI("%s\n=====================================\n%s\n", disasm_vp.c_str(),
disasm_fp.c_str());
}
vkDestroyPipelineCache(device_, dummy_pipeline_cache, nullptr);
}
VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type, VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type,
bool is_line_mode) { bool is_line_mode) {
switch (primitive_type) { switch (primitive_type) {
@ -334,10 +428,16 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type,
bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
bool full_update) { bool full_update) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
auto& regs = set_dynamic_state_registers_; auto& regs = set_dynamic_state_registers_;
bool window_offset_dirty = SetShadowRegister(&regs.pa_sc_window_offset, bool window_offset_dirty = SetShadowRegister(&regs.pa_sc_window_offset,
XE_GPU_REG_PA_SC_WINDOW_OFFSET); XE_GPU_REG_PA_SC_WINDOW_OFFSET);
window_offset_dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
// Window parameters. // Window parameters.
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
@ -397,22 +497,21 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
viewport_state_dirty |= SetShadowRegister(&regs.pa_cl_vport_zscale, viewport_state_dirty |= SetShadowRegister(&regs.pa_cl_vport_zscale,
XE_GPU_REG_PA_CL_VPORT_ZSCALE); XE_GPU_REG_PA_CL_VPORT_ZSCALE);
if (viewport_state_dirty) { if (viewport_state_dirty) {
// HACK: no clue where to get these values.
// RB_SURFACE_INFO // RB_SURFACE_INFO
auto surface_msaa = auto surface_msaa =
static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3); static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
// TODO(benvanik): ??
// Apply a multiplier to emulate MSAA.
float window_width_scalar = 1; float window_width_scalar = 1;
float window_height_scalar = 1; float window_height_scalar = 1;
switch (surface_msaa) { switch (surface_msaa) {
case MsaaSamples::k1X: case MsaaSamples::k1X:
break; break;
case MsaaSamples::k2X: case MsaaSamples::k2X:
window_width_scalar = 2; window_height_scalar = 2;
break; break;
case MsaaSamples::k4X: case MsaaSamples::k4X:
window_width_scalar = 2; window_width_scalar = window_height_scalar = 2;
window_height_scalar = 2;
break; break;
} }
@ -429,10 +528,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
vport_yoffset_enable == vport_zoffset_enable); vport_yoffset_enable == vport_zoffset_enable);
VkViewport viewport_rect; VkViewport viewport_rect;
viewport_rect.x = 0; std::memset(&viewport_rect, 0, sizeof(VkViewport));
viewport_rect.y = 0;
viewport_rect.width = 100;
viewport_rect.height = 100;
viewport_rect.minDepth = 0; viewport_rect.minDepth = 0;
viewport_rect.maxDepth = 1; viewport_rect.maxDepth = 1;
@ -443,6 +539,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0; float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0;
float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1; float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1;
float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1; float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1;
window_width_scalar = window_height_scalar = 1; window_width_scalar = window_height_scalar = 1;
float vpw = 2 * window_width_scalar * vsx; float vpw = 2 * window_width_scalar * vsx;
float vph = -2 * window_height_scalar * vsy; float vph = -2 * window_height_scalar * vsy;
@ -490,25 +587,25 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba); vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba);
} }
// VK_DYNAMIC_STATE_LINE_WIDTH if (full_update) {
vkCmdSetLineWidth(command_buffer, 1.0f); // VK_DYNAMIC_STATE_LINE_WIDTH
vkCmdSetLineWidth(command_buffer, 1.0f);
// VK_DYNAMIC_STATE_DEPTH_BIAS // VK_DYNAMIC_STATE_DEPTH_BIAS
vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f); vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f);
// VK_DYNAMIC_STATE_DEPTH_BOUNDS // VK_DYNAMIC_STATE_DEPTH_BOUNDS
vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f); vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f);
// VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
// VK_DYNAMIC_STATE_STENCIL_REFERENCE // VK_DYNAMIC_STATE_STENCIL_REFERENCE
vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
// VK_DYNAMIC_STATE_STENCIL_WRITE_MASK // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
}
// TODO(benvanik): push constants.
bool push_constants_dirty = full_update || viewport_state_dirty; bool push_constants_dirty = full_update || viewport_state_dirty;
push_constants_dirty |= push_constants_dirty |=
@ -539,7 +636,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
push_constants.window_scale[1] = -1.0f; push_constants.window_scale[1] = -1.0f;
} else { } else {
push_constants.window_scale[0] = 1.0f / 2560.0f; push_constants.window_scale[0] = 1.0f / 2560.0f;
push_constants.window_scale[1] = -1.0f / 2560.0f; push_constants.window_scale[1] = 1.0f / 2560.0f;
} }
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
@ -558,7 +655,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
push_constants.vtx_fmt[3] = vtx_w0_fmt; push_constants.vtx_fmt[3] = vtx_w0_fmt;
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// Deprecated in Vulkan, implemented in shader. // Emulated in shader.
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard; // if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
// ALPHATESTENABLE // ALPHATESTENABLE
push_constants.alpha_test[0] = push_constants.alpha_test[0] =
@ -657,16 +754,32 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
bool dirty = false; bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl, dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL); XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
dirty |= regs.vertex_shader != vertex_shader; dirty |= regs.vertex_shader != vertex_shader;
dirty |= regs.pixel_shader != pixel_shader; dirty |= regs.pixel_shader != pixel_shader;
dirty |= regs.primitive_type != primitive_type; dirty |= regs.primitive_type != primitive_type;
regs.vertex_shader = vertex_shader;
regs.pixel_shader = pixel_shader;
regs.primitive_type = primitive_type;
XXH64_update(&hash_state_, &regs, sizeof(regs)); XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) { if (!dirty) {
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
} }
regs.vertex_shader = vertex_shader;
regs.pixel_shader = pixel_shader; xenos::xe_gpu_program_cntl_t sq_program_cntl;
regs.primitive_type = primitive_type; sq_program_cntl.dword_0 = regs.sq_program_cntl;
if (!vertex_shader->is_translated() &&
!TranslateShader(vertex_shader, sq_program_cntl)) {
XELOGE("Failed to translate the vertex shader!");
return UpdateStatus::kError;
}
if (!pixel_shader->is_translated() &&
!TranslateShader(pixel_shader, sq_program_cntl)) {
XELOGE("Failed to translate the pixel shader!");
return UpdateStatus::kError;
}
update_shader_stages_stage_count_ = 0; update_shader_stages_stage_count_ = 0;
@ -723,11 +836,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
bool dirty = false; bool dirty = false;
dirty |= vertex_shader != regs.vertex_shader; dirty |= vertex_shader != regs.vertex_shader;
regs.vertex_shader = vertex_shader;
XXH64_update(&hash_state_, &regs, sizeof(regs)); XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) { if (!dirty) {
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
} }
regs.vertex_shader = vertex_shader;
state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
state_info.pNext = nullptr; state_info.pNext = nullptr;
@ -765,11 +878,14 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
: VK_FORMAT_A2R10G10B10_UNORM_PACK32; : VK_FORMAT_A2R10G10B10_UNORM_PACK32;
break; break;
case VertexFormat::k_10_11_11: case VertexFormat::k_10_11_11:
assert_always("unsupported?"); assert_true(is_signed);
vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32;
break; break;
case VertexFormat::k_11_11_10: case VertexFormat::k_11_11_10:
assert_true(is_signed); // Converted in-shader.
// TODO(DrChat)
assert_always();
// vertex_attrib_descr.format = VK_FORMAT_R32_UINT;
vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32;
break; break;
case VertexFormat::k_16_16: case VertexFormat::k_16_16:
@ -802,19 +918,19 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
is_signed ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32_UINT; is_signed ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32_UINT;
break; break;
case VertexFormat::k_32_FLOAT: case VertexFormat::k_32_FLOAT:
assert_true(is_signed); // assert_true(is_signed);
vertex_attrib_descr.format = VK_FORMAT_R32_SFLOAT; vertex_attrib_descr.format = VK_FORMAT_R32_SFLOAT;
break; break;
case VertexFormat::k_32_32_FLOAT: case VertexFormat::k_32_32_FLOAT:
assert_true(is_signed); // assert_true(is_signed);
vertex_attrib_descr.format = VK_FORMAT_R32G32_SFLOAT; vertex_attrib_descr.format = VK_FORMAT_R32G32_SFLOAT;
break; break;
case VertexFormat::k_32_32_32_FLOAT: case VertexFormat::k_32_32_32_FLOAT:
assert_true(is_signed); // assert_true(is_signed);
vertex_attrib_descr.format = VK_FORMAT_R32G32B32_SFLOAT; vertex_attrib_descr.format = VK_FORMAT_R32G32B32_SFLOAT;
break; break;
case VertexFormat::k_32_32_32_32_FLOAT: case VertexFormat::k_32_32_32_32_FLOAT:
assert_true(is_signed); // assert_true(is_signed);
vertex_attrib_descr.format = VK_FORMAT_R32G32B32A32_SFLOAT; vertex_attrib_descr.format = VK_FORMAT_R32G32B32A32_SFLOAT;
break; break;
default: default:
@ -843,11 +959,11 @@ PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState(
XE_GPU_REG_PA_SU_SC_MODE_CNTL); XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index, dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
regs.primitive_type = primitive_type;
XXH64_update(&hash_state_, &regs, sizeof(regs)); XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) { if (!dirty) {
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
} }
regs.primitive_type = primitive_type;
state_info.sType = state_info.sType =
VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
@ -934,14 +1050,17 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
auto& state_info = update_rasterization_state_info_; auto& state_info = update_rasterization_state_info_;
bool dirty = false; bool dirty = false;
dirty |= regs.primitive_type != primitive_type;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl, dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL); XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_tl, dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_tl,
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL); XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_br, dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_br,
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
dirty |= SetShadowRegister(&regs.pa_sc_viz_query, XE_GPU_REG_PA_SC_VIZ_QUERY);
dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index, dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
regs.primitive_type = primitive_type;
XXH64_update(&hash_state_, &regs, sizeof(regs)); XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) { if (!dirty) {
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
@ -953,10 +1072,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
// TODO(benvanik): right setting? // TODO(benvanik): right setting?
state_info.depthClampEnable = VK_FALSE; state_info.depthClampEnable = VK_FALSE;
// TODO(benvanik): use in depth-only mode?
state_info.rasterizerDiscardEnable = VK_FALSE; state_info.rasterizerDiscardEnable = VK_FALSE;
// KILL_PIX_POST_EARLY_Z
if (regs.pa_sc_viz_query & 0x80) {
state_info.rasterizerDiscardEnable = VK_TRUE;
}
bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0; bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0;
if (poly_mode) { if (poly_mode) {
uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7; uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7;
@ -981,6 +1103,10 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
case 2: case 2:
state_info.cullMode = VK_CULL_MODE_BACK_BIT; state_info.cullMode = VK_CULL_MODE_BACK_BIT;
break; break;
case 3:
// Cull both sides?
assert_always();
break;
} }
if (regs.pa_su_sc_mode_cntl & 0x4) { if (regs.pa_su_sc_mode_cntl & 0x4) {
state_info.frontFace = VK_FRONT_FACE_CLOCKWISE; state_info.frontFace = VK_FRONT_FACE_CLOCKWISE;
@ -1007,18 +1133,53 @@ PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() {
auto& regs = update_multisample_state_regs_; auto& regs = update_multisample_state_regs_;
auto& state_info = update_multisample_state_info_; auto& state_info = update_multisample_state_info_;
bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_sc_aa_config, XE_GPU_REG_PA_SC_AA_CONFIG);
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
state_info.pNext = nullptr; state_info.pNext = nullptr;
state_info.flags = 0; state_info.flags = 0;
state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES (0x7)
// PA_SC_AA_MASK (0xFFFF)
// PA_SU_SC_MODE_CNTL MSAA_ENABLE (0x10000)
// If set, all samples will be sampled at set locations. Otherwise, they're
// all sampled from the pixel center.
if (FLAGS_vulkan_native_msaa) {
auto msaa_num_samples =
static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
switch (msaa_num_samples) {
case MsaaSamples::k1X:
state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
break;
case MsaaSamples::k2X:
state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT;
break;
case MsaaSamples::k4X:
state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
break;
default:
assert_unhandled_case(msaa_num_samples);
break;
}
} else {
state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
}
state_info.sampleShadingEnable = VK_FALSE; state_info.sampleShadingEnable = VK_FALSE;
state_info.minSampleShading = 0; state_info.minSampleShading = 0;
state_info.pSampleMask = nullptr; state_info.pSampleMask = nullptr;
state_info.alphaToCoverageEnable = VK_FALSE; state_info.alphaToCoverageEnable = VK_FALSE;
state_info.alphaToOneEnable = VK_FALSE; state_info.alphaToOneEnable = VK_FALSE;
return UpdateStatus::kCompatible; return UpdateStatus::kMismatch;
} }
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
@ -1038,19 +1199,60 @@ PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
state_info.pNext = nullptr; state_info.pNext = nullptr;
state_info.flags = 0; state_info.flags = 0;
state_info.depthTestEnable = VK_FALSE; static const VkCompareOp compare_func_map[] = {
state_info.depthWriteEnable = VK_FALSE; /* 0 */ VK_COMPARE_OP_NEVER,
state_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; /* 1 */ VK_COMPARE_OP_LESS,
/* 2 */ VK_COMPARE_OP_EQUAL,
/* 3 */ VK_COMPARE_OP_LESS_OR_EQUAL,
/* 4 */ VK_COMPARE_OP_GREATER,
/* 5 */ VK_COMPARE_OP_NOT_EQUAL,
/* 6 */ VK_COMPARE_OP_GREATER_OR_EQUAL,
/* 7 */ VK_COMPARE_OP_ALWAYS,
};
static const VkStencilOp stencil_op_map[] = {
/* 0 */ VK_STENCIL_OP_KEEP,
/* 1 */ VK_STENCIL_OP_ZERO,
/* 2 */ VK_STENCIL_OP_REPLACE,
/* 3 */ VK_STENCIL_OP_INCREMENT_AND_WRAP,
/* 4 */ VK_STENCIL_OP_DECREMENT_AND_WRAP,
/* 5 */ VK_STENCIL_OP_INVERT,
/* 6 */ VK_STENCIL_OP_INCREMENT_AND_CLAMP,
/* 7 */ VK_STENCIL_OP_DECREMENT_AND_CLAMP,
};
// Depth state
// TODO: EARLY_Z_ENABLE (needs to be enabled in shaders)
state_info.depthWriteEnable = !!(regs.rb_depthcontrol & 0x4);
state_info.depthTestEnable = !!(regs.rb_depthcontrol & 0x2);
state_info.stencilTestEnable = !!(regs.rb_depthcontrol & 0x1);
state_info.depthCompareOp =
compare_func_map[(regs.rb_depthcontrol >> 4) & 0x7];
state_info.depthBoundsTestEnable = VK_FALSE; state_info.depthBoundsTestEnable = VK_FALSE;
state_info.stencilTestEnable = VK_FALSE;
state_info.front.failOp = VK_STENCIL_OP_KEEP; uint32_t stencil_ref = (regs.rb_stencilrefmask & 0x000000FF);
state_info.front.passOp = VK_STENCIL_OP_KEEP; uint32_t stencil_read_mask = (regs.rb_stencilrefmask & 0x0000FF00) >> 8;
state_info.front.depthFailOp = VK_STENCIL_OP_KEEP;
state_info.front.compareOp = VK_COMPARE_OP_ALWAYS; // Stencil state
state_info.back.failOp = VK_STENCIL_OP_KEEP; state_info.front.compareOp =
state_info.back.passOp = VK_STENCIL_OP_KEEP; compare_func_map[(regs.rb_depthcontrol >> 8) & 0x7];
state_info.back.depthFailOp = VK_STENCIL_OP_KEEP; state_info.front.failOp = stencil_op_map[(regs.rb_depthcontrol >> 11) & 0x7];
state_info.back.compareOp = VK_COMPARE_OP_ALWAYS; state_info.front.passOp = stencil_op_map[(regs.rb_depthcontrol >> 14) & 0x7];
state_info.front.depthFailOp =
stencil_op_map[(regs.rb_depthcontrol >> 17) & 0x7];
// BACKFACE_ENABLE
if (!!(regs.rb_depthcontrol & 0x80)) {
state_info.back.compareOp =
compare_func_map[(regs.rb_depthcontrol >> 20) & 0x7];
state_info.back.failOp = stencil_op_map[(regs.rb_depthcontrol >> 23) & 0x7];
state_info.back.passOp = stencil_op_map[(regs.rb_depthcontrol >> 26) & 0x7];
state_info.back.depthFailOp =
stencil_op_map[(regs.rb_depthcontrol >> 29) & 0x7];
} else {
// Back state is identical to front state.
std::memcpy(&state_info.back, &state_info.front, sizeof(VkStencilOpState));
}
// Ignored; set dynamically. // Ignored; set dynamically.
state_info.minDepthBounds = 0; state_info.minDepthBounds = 0;
@ -1089,6 +1291,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
dirty |= dirty |=
SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
XXH64_update(&hash_state_, &regs, sizeof(regs)); XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) { if (!dirty) {
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
@ -1101,6 +1304,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
state_info.logicOpEnable = VK_FALSE; state_info.logicOpEnable = VK_FALSE;
state_info.logicOp = VK_LOGIC_OP_NO_OP; state_info.logicOp = VK_LOGIC_OP_NO_OP;
auto enable_mode = static_cast<xenos::ModeControl>(regs.rb_modecontrol & 0x7);
static const VkBlendFactor kBlendFactorMap[] = { static const VkBlendFactor kBlendFactorMap[] = {
/* 0 */ VK_BLEND_FACTOR_ZERO, /* 0 */ VK_BLEND_FACTOR_ZERO,
/* 1 */ VK_BLEND_FACTOR_ONE, /* 1 */ VK_BLEND_FACTOR_ONE,
@ -1153,7 +1358,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() {
// A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE
// Lines up with VkColorComponentFlagBits, where R=bit 1, G=bit 2, etc.. // Lines up with VkColorComponentFlagBits, where R=bit 1, G=bit 2, etc..
uint32_t write_mask = (regs.rb_color_mask >> (i * 4)) & 0xF; uint32_t write_mask = (regs.rb_color_mask >> (i * 4)) & 0xF;
attachment_state.colorWriteMask = write_mask; attachment_state.colorWriteMask =
enable_mode == xenos::ModeControl::kColorDepth ? write_mask : 0;
} }
state_info.attachmentCount = 4; state_info.attachmentCount = 4;

View File

@ -32,6 +32,12 @@ namespace vulkan {
// including shaders, various blend/etc options, and input configuration. // including shaders, various blend/etc options, and input configuration.
class PipelineCache { class PipelineCache {
public: public:
enum class UpdateStatus {
kCompatible,
kMismatch,
kError,
};
PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device, PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device,
VkDescriptorSetLayout uniform_descriptor_set_layout, VkDescriptorSetLayout uniform_descriptor_set_layout,
VkDescriptorSetLayout texture_descriptor_set_layout); VkDescriptorSetLayout texture_descriptor_set_layout);
@ -46,11 +52,17 @@ class PipelineCache {
// otherwise a new one may be created. Any state that can be set dynamically // otherwise a new one may be created. Any state that can be set dynamically
// in the command buffer is issued at this time. // in the command buffer is issued at this time.
// Returns whether the pipeline could be successfully created. // Returns whether the pipeline could be successfully created.
bool ConfigurePipeline(VkCommandBuffer command_buffer, UpdateStatus ConfigurePipeline(VkCommandBuffer command_buffer,
const RenderState* render_state, const RenderState* render_state,
VulkanShader* vertex_shader, VulkanShader* vertex_shader,
VulkanShader* pixel_shader, VulkanShader* pixel_shader,
PrimitiveType primitive_type); PrimitiveType primitive_type,
VkPipeline* pipeline_out);
// Sets required dynamic state on the command buffer.
// Only state that has changed since the last call will be set unless
// full_update is true.
bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update);
// Pipeline layout shared by all pipelines. // Pipeline layout shared by all pipelines.
VkPipelineLayout pipeline_layout() const { return pipeline_layout_; } VkPipelineLayout pipeline_layout() const { return pipeline_layout_; }
@ -63,16 +75,14 @@ class PipelineCache {
// state. // state.
VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key); VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
bool TranslateShader(VulkanShader* shader, xenos::xe_gpu_program_cntl_t cntl);
void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
// Gets a geometry shader used to emulate the given primitive type. // Gets a geometry shader used to emulate the given primitive type.
// Returns nullptr if the primitive doesn't need to be emulated. // Returns nullptr if the primitive doesn't need to be emulated.
VkShaderModule GetGeometryShader(PrimitiveType primitive_type, VkShaderModule GetGeometryShader(PrimitiveType primitive_type,
bool is_line_mode); bool is_line_mode);
// Sets required dynamic state on the command buffer.
// Only state that has changed since the last call will be set unless
// full_update is true.
bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update);
RegisterFile* register_file_ = nullptr; RegisterFile* register_file_ = nullptr;
VkDevice device_ = nullptr; VkDevice device_ = nullptr;
@ -111,12 +121,6 @@ class PipelineCache {
VkPipeline current_pipeline_ = nullptr; VkPipeline current_pipeline_ = nullptr;
private: private:
enum class UpdateStatus {
kCompatible,
kMismatch,
kError,
};
UpdateStatus UpdateState(VulkanShader* vertex_shader, UpdateStatus UpdateState(VulkanShader* vertex_shader,
VulkanShader* pixel_shader, VulkanShader* pixel_shader,
PrimitiveType primitive_type); PrimitiveType primitive_type);
@ -154,6 +158,7 @@ class PipelineCache {
struct UpdateShaderStagesRegisters { struct UpdateShaderStagesRegisters {
PrimitiveType primitive_type; PrimitiveType primitive_type;
uint32_t pa_su_sc_mode_cntl; uint32_t pa_su_sc_mode_cntl;
uint32_t sq_program_cntl;
VulkanShader* vertex_shader; VulkanShader* vertex_shader;
VulkanShader* pixel_shader; VulkanShader* pixel_shader;
@ -205,11 +210,12 @@ class PipelineCache {
VkPipelineViewportStateCreateInfo update_viewport_state_info_; VkPipelineViewportStateCreateInfo update_viewport_state_info_;
struct UpdateRasterizationStateRegisters { struct UpdateRasterizationStateRegisters {
PrimitiveType primitive_type;
uint32_t pa_su_sc_mode_cntl; uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_screen_scissor_tl; uint32_t pa_sc_screen_scissor_tl;
uint32_t pa_sc_screen_scissor_br; uint32_t pa_sc_screen_scissor_br;
uint32_t pa_sc_viz_query;
uint32_t multi_prim_ib_reset_index; uint32_t multi_prim_ib_reset_index;
PrimitiveType prim_type;
UpdateRasterizationStateRegisters() { Reset(); } UpdateRasterizationStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }
@ -217,6 +223,10 @@ class PipelineCache {
VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_; VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_;
struct UpdateMultisampleStateeRegisters { struct UpdateMultisampleStateeRegisters {
uint32_t pa_sc_aa_config;
uint32_t pa_su_sc_mode_cntl;
uint32_t rb_surface_info;
UpdateMultisampleStateeRegisters() { Reset(); } UpdateMultisampleStateeRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_multisample_state_regs_; } update_multisample_state_regs_;
@ -235,6 +245,7 @@ class PipelineCache {
uint32_t rb_colorcontrol; uint32_t rb_colorcontrol;
uint32_t rb_color_mask; uint32_t rb_color_mask;
uint32_t rb_blendcontrol[4]; uint32_t rb_blendcontrol[4];
uint32_t rb_modecontrol;
UpdateColorBlendStateRegisters() { Reset(); } UpdateColorBlendStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }

View File

@ -39,7 +39,7 @@ VkFormat ColorRenderTargetFormatToVkFormat(ColorRenderTargetFormat format) {
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
// WARNING: this is wrong, most likely - no float form in vulkan? // WARNING: this is wrong, most likely - no float form in vulkan?
XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used"); XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used");
return VK_FORMAT_A2R10G10B10_SSCALED_PACK32; return VK_FORMAT_A2R10G10B10_UNORM_PACK32;
case ColorRenderTargetFormat::k_16_16: case ColorRenderTargetFormat::k_16_16:
return VK_FORMAT_R16G16_UNORM; return VK_FORMAT_R16G16_UNORM;
case ColorRenderTargetFormat::k_16_16_16_16: case ColorRenderTargetFormat::k_16_16_16_16:
@ -71,34 +71,6 @@ VkFormat DepthRenderTargetFormatToVkFormat(DepthRenderTargetFormat format) {
} }
} }
// Cached view into the EDRAM memory.
// The image is aliased to a region of the edram_memory_ based on the tile
// parameters.
// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible
// formats?
class CachedTileView {
public:
// Key identifying the view in the cache.
TileViewKey key;
// Image mapped into EDRAM.
VkImage image = nullptr;
// Simple view on the image matching the format.
VkImageView image_view = nullptr;
CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
TileViewKey view_key);
~CachedTileView();
bool IsEqual(const TileViewKey& other_key) const {
auto a = reinterpret_cast<const uint64_t*>(&key);
auto b = reinterpret_cast<const uint64_t*>(&other_key);
return *a == *b;
}
private:
VkDevice device_ = nullptr;
};
// Cached framebuffer referencing tile attachments. // Cached framebuffer referencing tile attachments.
// Each framebuffer is specific to a render pass. Ugh. // Each framebuffer is specific to a render pass. Ugh.
class CachedFramebuffer { class CachedFramebuffer {
@ -151,9 +123,11 @@ class CachedRenderPass {
VkDevice device_ = nullptr; VkDevice device_ = nullptr;
}; };
CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory, CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device,
VkCommandBuffer command_buffer,
VkDeviceMemory edram_memory,
TileViewKey view_key) TileViewKey view_key)
: device_(device), key(std::move(view_key)) { : device_(*device), key(std::move(view_key)) {
// Map format to Vulkan. // Map format to Vulkan.
VkFormat vulkan_format = VK_FORMAT_UNDEFINED; VkFormat vulkan_format = VK_FORMAT_UNDEFINED;
uint32_t bpp = 4; uint32_t bpp = 4;
@ -175,7 +149,8 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format); vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format);
} }
assert_true(vulkan_format != VK_FORMAT_UNDEFINED); assert_true(vulkan_format != VK_FORMAT_UNDEFINED);
assert_true(bpp == 4); // FIXME(DrChat): Was this check necessary?
// assert_true(bpp == 4);
// Create the image with the desired properties. // Create the image with the desired properties.
VkImageCreateInfo image_info; VkImageCreateInfo image_info;
@ -191,8 +166,25 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
image_info.extent.depth = 1; image_info.extent.depth = 1;
image_info.mipLevels = 1; image_info.mipLevels = 1;
image_info.arrayLayers = 1; image_info.arrayLayers = 1;
// TODO(benvanik): native MSAA support? if (FLAGS_vulkan_native_msaa) {
image_info.samples = VK_SAMPLE_COUNT_1_BIT; auto msaa_samples = static_cast<MsaaSamples>(key.msaa_samples);
switch (msaa_samples) {
case MsaaSamples::k1X:
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
break;
case MsaaSamples::k2X:
image_info.samples = VK_SAMPLE_COUNT_2_BIT;
break;
case MsaaSamples::k4X:
image_info.samples = VK_SAMPLE_COUNT_4_BIT;
break;
default:
assert_unhandled_case(msaa_samples);
}
} else {
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
}
sample_count = image_info.samples;
image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
@ -203,19 +195,17 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.queueFamilyIndexCount = 0; image_info.queueFamilyIndexCount = 0;
image_info.pQueueFamilyIndices = nullptr; image_info.pQueueFamilyIndices = nullptr;
image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
auto err = vkCreateImage(device_, &image_info, nullptr, &image); auto err = vkCreateImage(device_, &image_info, nullptr, &image);
CheckResult(err, "vkCreateImage"); CheckResult(err, "vkCreateImage");
// Verify our assumptions about memory layout are correct.
VkDeviceSize edram_offset = key.tile_offset * 5120;
VkMemoryRequirements memory_requirements; VkMemoryRequirements memory_requirements;
vkGetImageMemoryRequirements(device, image, &memory_requirements); vkGetImageMemoryRequirements(*device, image, &memory_requirements);
assert_true(edram_offset + memory_requirements.size <= kEdramBufferCapacity);
assert_true(edram_offset % memory_requirements.alignment == 0);
// Bind to the region of EDRAM we occupy. // Bind to a newly allocated chunk.
err = vkBindImageMemory(device_, image, edram_memory, edram_offset); // TODO: Alias from a really big buffer?
memory = device->AllocateMemory(memory_requirements, 0);
err = vkBindImageMemory(device_, image, memory, 0);
CheckResult(err, "vkBindImageMemory"); CheckResult(err, "vkBindImageMemory");
// Create the image view we'll use to attach it to a framebuffer. // Create the image view we'll use to attach it to a framebuffer.
@ -242,11 +232,37 @@ CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
CheckResult(err, "vkCreateImageView"); CheckResult(err, "vkCreateImageView");
// TODO(benvanik): transition to general layout? // TODO(benvanik): transition to general layout?
VkImageMemoryBarrier image_barrier;
image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_barrier.pNext = nullptr;
image_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
image_barrier.dstAccessMask =
key.color_or_depth ? VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
: VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
image_barrier.dstAccessMask |=
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.image = image;
image_barrier.subresourceRange.aspectMask =
key.color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_barrier.subresourceRange.baseMipLevel = 0;
image_barrier.subresourceRange.levelCount = 1;
image_barrier.subresourceRange.baseArrayLayer = 0;
image_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier);
} }
CachedTileView::~CachedTileView() { CachedTileView::~CachedTileView() {
vkDestroyImageView(device_, image_view, nullptr); vkDestroyImageView(device_, image_view, nullptr);
vkDestroyImage(device_, image, nullptr); vkDestroyImage(device_, image, nullptr);
vkFreeMemory(device_, memory, nullptr);
} }
CachedFramebuffer::CachedFramebuffer( CachedFramebuffer::CachedFramebuffer(
@ -293,8 +309,15 @@ bool CachedFramebuffer::IsCompatible(
const RenderConfiguration& desired_config) const { const RenderConfiguration& desired_config) const {
// We already know all render pass things line up, so let's verify dimensions, // We already know all render pass things line up, so let's verify dimensions,
// edram offsets, etc. We need an exact match. // edram offsets, etc. We need an exact match.
if (desired_config.surface_pitch_px != width || uint32_t surface_pitch_px = desired_config.surface_msaa != MsaaSamples::k4X
desired_config.surface_height_px != height) { ? desired_config.surface_pitch_px
: desired_config.surface_pitch_px * 2;
uint32_t surface_height_px = desired_config.surface_msaa == MsaaSamples::k1X
? desired_config.surface_height_px
: desired_config.surface_height_px * 2;
surface_pitch_px = std::min(surface_pitch_px, 2560u);
surface_height_px = std::min(surface_height_px, 2560u);
if (surface_pitch_px != width || surface_height_px != height) {
return false; return false;
} }
// TODO(benvanik): separate image views from images in tiles and store in fb? // TODO(benvanik): separate image views from images in tiles and store in fb?
@ -327,13 +350,33 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
: device_(device) { : device_(device) {
std::memcpy(&config, &desired_config, sizeof(config)); std::memcpy(&config, &desired_config, sizeof(config));
VkSampleCountFlagBits sample_count;
if (FLAGS_vulkan_native_msaa) {
switch (desired_config.surface_msaa) {
case MsaaSamples::k1X:
sample_count = VK_SAMPLE_COUNT_1_BIT;
break;
case MsaaSamples::k2X:
sample_count = VK_SAMPLE_COUNT_2_BIT;
break;
case MsaaSamples::k4X:
sample_count = VK_SAMPLE_COUNT_4_BIT;
break;
default:
assert_unhandled_case(desired_config.surface_msaa);
break;
}
} else {
sample_count = VK_SAMPLE_COUNT_1_BIT;
}
// Initialize all attachments to default unused. // Initialize all attachments to default unused.
// As we set layout(location=RT) in shaders we must always provide 4. // As we set layout(location=RT) in shaders we must always provide 4.
VkAttachmentDescription attachments[5]; VkAttachmentDescription attachments[5];
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
attachments[i].flags = 0; attachments[i].flags = 0;
attachments[i].format = VK_FORMAT_UNDEFINED; attachments[i].format = VK_FORMAT_UNDEFINED;
attachments[i].samples = VK_SAMPLE_COUNT_1_BIT; attachments[i].samples = sample_count;
attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
@ -344,7 +387,7 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
auto& depth_stencil_attachment = attachments[4]; auto& depth_stencil_attachment = attachments[4];
depth_stencil_attachment.flags = 0; depth_stencil_attachment.flags = 0;
depth_stencil_attachment.format = VK_FORMAT_UNDEFINED; depth_stencil_attachment.format = VK_FORMAT_UNDEFINED;
depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT; depth_stencil_attachment.samples = sample_count;
depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
@ -409,6 +452,11 @@ CachedRenderPass::~CachedRenderPass() {
bool CachedRenderPass::IsCompatible( bool CachedRenderPass::IsCompatible(
const RenderConfiguration& desired_config) const { const RenderConfiguration& desired_config) const {
if (config.surface_msaa != desired_config.surface_msaa &&
FLAGS_vulkan_native_msaa) {
return false;
}
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
// TODO(benvanik): allow compatible vulkan formats. // TODO(benvanik): allow compatible vulkan formats.
if (config.color[i].format != desired_config.color[i].format) { if (config.color[i].format != desired_config.color[i].format) {
@ -423,9 +471,10 @@ bool CachedRenderPass::IsCompatible(
RenderCache::RenderCache(RegisterFile* register_file, RenderCache::RenderCache(RegisterFile* register_file,
ui::vulkan::VulkanDevice* device) ui::vulkan::VulkanDevice* device)
: register_file_(register_file), device_(*device) { : register_file_(register_file), device_(device) {
VkResult status = VK_SUCCESS;
// Create the buffer we'll bind to our memory. // Create the buffer we'll bind to our memory.
// We do this first so we can get the right memory type.
VkBufferCreateInfo buffer_info; VkBufferCreateInfo buffer_info;
buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_info.pNext = nullptr; buffer_info.pNext = nullptr;
@ -436,55 +485,39 @@ RenderCache::RenderCache(RegisterFile* register_file,
buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_info.queueFamilyIndexCount = 0; buffer_info.queueFamilyIndexCount = 0;
buffer_info.pQueueFamilyIndices = nullptr; buffer_info.pQueueFamilyIndices = nullptr;
auto err = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_); status = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_);
CheckResult(err, "vkCreateBuffer"); CheckResult(status, "vkCreateBuffer");
// Query requirements for the buffer. // Query requirements for the buffer.
// It should be 1:1. // It should be 1:1.
VkMemoryRequirements buffer_requirements; VkMemoryRequirements buffer_requirements;
vkGetBufferMemoryRequirements(device_, edram_buffer_, &buffer_requirements); vkGetBufferMemoryRequirements(*device_, edram_buffer_, &buffer_requirements);
assert_true(buffer_requirements.size == kEdramBufferCapacity); assert_true(buffer_requirements.size == kEdramBufferCapacity);
// Create a dummy image so we can see what memory bits it requires.
// They should overlap with the buffer requirements but are likely more
// strict.
VkImageCreateInfo test_image_info;
test_image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
test_image_info.pNext = nullptr;
test_image_info.flags = 0;
test_image_info.imageType = VK_IMAGE_TYPE_2D;
test_image_info.format = VK_FORMAT_R8G8B8A8_UINT;
test_image_info.extent.width = 128;
test_image_info.extent.height = 128;
test_image_info.extent.depth = 1;
test_image_info.mipLevels = 1;
test_image_info.arrayLayers = 1;
test_image_info.samples = VK_SAMPLE_COUNT_1_BIT;
test_image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
test_image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
test_image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
test_image_info.queueFamilyIndexCount = 0;
test_image_info.pQueueFamilyIndices = nullptr;
test_image_info.initialLayout = VK_IMAGE_LAYOUT_GENERAL;
VkImage test_image = nullptr;
err = vkCreateImage(device_, &test_image_info, nullptr, &test_image);
CheckResult(err, "vkCreateImage");
VkMemoryRequirements image_requirements;
vkGetImageMemoryRequirements(device_, test_image, &image_requirements);
vkDestroyImage(device_, test_image, nullptr);
assert_true((image_requirements.memoryTypeBits &
buffer_requirements.memoryTypeBits) != 0);
// Allocate EDRAM memory. // Allocate EDRAM memory.
VkMemoryRequirements memory_requirements;
memory_requirements.size = buffer_requirements.size;
memory_requirements.alignment = buffer_requirements.alignment;
memory_requirements.memoryTypeBits = image_requirements.memoryTypeBits;
// TODO(benvanik): do we need it host visible? // TODO(benvanik): do we need it host visible?
edram_memory_ = device->AllocateMemory(memory_requirements, 0); edram_memory_ = device->AllocateMemory(buffer_requirements);
assert_not_null(edram_memory_);
// Bind buffer to map our entire memory. // Bind buffer to map our entire memory.
vkBindBufferMemory(device_, edram_buffer_, edram_memory_, 0); status = vkBindBufferMemory(*device_, edram_buffer_, edram_memory_, 0);
CheckResult(status, "vkBindBufferMemory");
if (status == VK_SUCCESS) {
// For debugging, upload a grid into the EDRAM buffer.
uint32_t* gpu_data = nullptr;
status = vkMapMemory(*device_, edram_memory_, 0, buffer_requirements.size,
0, reinterpret_cast<void**>(&gpu_data));
CheckResult(status, "vkMapMemory");
if (status == VK_SUCCESS) {
for (int i = 0; i < kEdramBufferCapacity / 4; i++) {
gpu_data[i] = (i % 8) >= 4 ? 0xFF0000FF : 0xFFFFFFFF;
}
vkUnmapMemory(*device_, edram_memory_);
}
}
} }
RenderCache::~RenderCache() { RenderCache::~RenderCache() {
@ -503,13 +536,36 @@ RenderCache::~RenderCache() {
cached_tile_views_.clear(); cached_tile_views_.clear();
// Release underlying EDRAM memory. // Release underlying EDRAM memory.
vkDestroyBuffer(device_, edram_buffer_, nullptr); vkDestroyBuffer(*device_, edram_buffer_, nullptr);
vkFreeMemory(device_, edram_memory_, nullptr); vkFreeMemory(*device_, edram_memory_, nullptr);
}
bool RenderCache::dirty() const {
auto& regs = *register_file_;
auto& cur_regs = shadow_registers_;
bool dirty = false;
dirty |= cur_regs.rb_modecontrol != regs[XE_GPU_REG_RB_MODECONTROL].u32;
dirty |= cur_regs.rb_surface_info != regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
dirty |= cur_regs.rb_color_info != regs[XE_GPU_REG_RB_COLOR_INFO].u32;
dirty |= cur_regs.rb_color1_info != regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
dirty |= cur_regs.rb_color2_info != regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
dirty |= cur_regs.rb_color3_info != regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
dirty |= cur_regs.rb_depth_info != regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
dirty |= cur_regs.pa_sc_window_scissor_tl !=
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
dirty |= cur_regs.pa_sc_window_scissor_br !=
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
return dirty;
} }
const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader, VulkanShader* vertex_shader,
VulkanShader* pixel_shader) { VulkanShader* pixel_shader) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
assert_null(current_command_buffer_); assert_null(current_command_buffer_);
current_command_buffer_ = command_buffer; current_command_buffer_ = command_buffer;
@ -542,13 +598,34 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
} }
// Lookup or generate a new render pass and framebuffer for the new state. // Lookup or generate a new render pass and framebuffer for the new state.
if (!ConfigureRenderPass(config, &render_pass, &framebuffer)) { if (!ConfigureRenderPass(command_buffer, config, &render_pass,
&framebuffer)) {
return nullptr; return nullptr;
} }
current_state_.render_pass = render_pass; current_state_.render_pass = render_pass;
current_state_.render_pass_handle = render_pass->handle; current_state_.render_pass_handle = render_pass->handle;
current_state_.framebuffer = framebuffer; current_state_.framebuffer = framebuffer;
current_state_.framebuffer_handle = framebuffer->handle; current_state_.framebuffer_handle = framebuffer->handle;
// TODO(DrChat): Determine if we actually need an EDRAM buffer.
/*
// Depth
auto depth_target = current_state_.framebuffer->depth_stencil_attachment;
if (depth_target && current_state_.config.depth_stencil.used) {
UpdateTileView(command_buffer, depth_target, true);
}
// Color
for (int i = 0; i < 4; i++) {
auto target = current_state_.framebuffer->color_attachments[i];
if (!target || !current_state_.config.color[i].used) {
continue;
}
UpdateTileView(command_buffer, target, true);
}
*/
} }
if (!render_pass) { if (!render_pass) {
return nullptr; return nullptr;
@ -571,6 +648,15 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px; render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px;
render_pass_begin_info.renderArea.extent.height = config->surface_height_px; render_pass_begin_info.renderArea.extent.height = config->surface_height_px;
if (config->surface_msaa == MsaaSamples::k2X) {
render_pass_begin_info.renderArea.extent.height =
std::min(config->surface_height_px * 2, 2560u);
} else if (config->surface_msaa == MsaaSamples::k4X) {
render_pass_begin_info.renderArea.extent.width *= 2;
render_pass_begin_info.renderArea.extent.height =
std::min(config->surface_height_px * 2, 2560u);
}
// Configure clear color, if clearing. // Configure clear color, if clearing.
// TODO(benvanik): enable clearing here during resolve? // TODO(benvanik): enable clearing here during resolve?
render_pass_begin_info.clearValueCount = 0; render_pass_begin_info.clearValueCount = 0;
@ -601,9 +687,15 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) {
// Guess the height from the scissor height. // Guess the height from the scissor height.
// It's wildly inaccurate, but I've never seen it be bigger than the // It's wildly inaccurate, but I've never seen it be bigger than the
// EDRAM tiling. // EDRAM tiling.
/*
uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF;
uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y;
config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16)); config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16));
*/
// TODO(DrChat): Find an accurate way to get the surface height. Until we do,
// we're going to hardcode it to 2560, as that's the absolute maximum.
config->surface_height_px = 2560;
// Color attachment configuration. // Color attachment configuration.
if (config->mode_control == ModeControl::kColorDepth) { if (config->mode_control == ModeControl::kColorDepth) {
@ -620,12 +712,23 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) {
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA: case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8; config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8;
break; break;
case ColorRenderTargetFormat::k_2_10_10_10_unknown:
config->color[i].format = ColorRenderTargetFormat::k_2_10_10_10;
break;
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
config->color[i].format = ColorRenderTargetFormat::k_2_10_10_10_FLOAT;
break;
} }
// Make sure all unknown bits are unset.
// RDR sets bit 0x00400000
// assert_zero(color_info[i] & ~0x000F0FFF);
} }
} else { } else {
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
config->color[i].edram_base = 0; config->color[i].edram_base = 0;
config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8; config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8;
config->color[i].used = false;
} }
} }
@ -635,15 +738,20 @@ bool RenderCache::ParseConfiguration(RenderConfiguration* config) {
config->depth_stencil.edram_base = regs.rb_depth_info & 0xFFF; config->depth_stencil.edram_base = regs.rb_depth_info & 0xFFF;
config->depth_stencil.format = config->depth_stencil.format =
static_cast<DepthRenderTargetFormat>((regs.rb_depth_info >> 16) & 0x1); static_cast<DepthRenderTargetFormat>((regs.rb_depth_info >> 16) & 0x1);
// Make sure all unknown bits are unset.
// assert_zero(regs.rb_depth_info & ~0x00010FFF);
} else { } else {
config->depth_stencil.edram_base = 0; config->depth_stencil.edram_base = 0;
config->depth_stencil.format = DepthRenderTargetFormat::kD24S8; config->depth_stencil.format = DepthRenderTargetFormat::kD24S8;
config->depth_stencil.used = false;
} }
return true; return true;
} }
bool RenderCache::ConfigureRenderPass(RenderConfiguration* config, bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
RenderConfiguration* config,
CachedRenderPass** out_render_pass, CachedRenderPass** out_render_pass,
CachedFramebuffer** out_framebuffer) { CachedFramebuffer** out_framebuffer) {
*out_render_pass = nullptr; *out_render_pass = nullptr;
@ -662,7 +770,7 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
// If no render pass was found in the cache create a new one. // If no render pass was found in the cache create a new one.
if (!render_pass) { if (!render_pass) {
render_pass = new CachedRenderPass(device_, *config); render_pass = new CachedRenderPass(*device_, *config);
cached_render_passes_.push_back(render_pass); cached_render_passes_.push_back(render_pass);
} }
@ -679,16 +787,25 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
// If no framebuffer was found in the cache create a new one. // If no framebuffer was found in the cache create a new one.
if (!framebuffer) { if (!framebuffer) {
uint32_t tile_width = config->surface_msaa == MsaaSamples::k4X ? 40 : 80;
uint32_t tile_height = config->surface_msaa != MsaaSamples::k1X ? 8 : 16;
CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr, CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr,
nullptr}; nullptr};
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
TileViewKey color_key; TileViewKey color_key;
color_key.tile_offset = config->color[i].edram_base; color_key.tile_offset = config->color[i].edram_base;
color_key.tile_width = config->surface_pitch_px / 80; color_key.tile_width =
color_key.tile_height = config->surface_height_px / 16; xe::round_up(config->surface_pitch_px, tile_width) / tile_width;
// color_key.tile_height =
// xe::round_up(config->surface_height_px, tile_height) / tile_height;
color_key.tile_height = 160;
color_key.color_or_depth = 1; color_key.color_or_depth = 1;
color_key.msaa_samples =
0; // static_cast<uint16_t>(config->surface_msaa);
color_key.edram_format = static_cast<uint16_t>(config->color[i].format); color_key.edram_format = static_cast<uint16_t>(config->color[i].format);
target_color_attachments[i] = GetTileView(color_key); target_color_attachments[i] =
FindOrCreateTileView(command_buffer, color_key);
if (!target_color_attachments) { if (!target_color_attachments) {
XELOGE("Failed to get tile view for color attachment"); XELOGE("Failed to get tile view for color attachment");
return false; return false;
@ -697,21 +814,34 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
TileViewKey depth_stencil_key; TileViewKey depth_stencil_key;
depth_stencil_key.tile_offset = config->depth_stencil.edram_base; depth_stencil_key.tile_offset = config->depth_stencil.edram_base;
depth_stencil_key.tile_width = config->surface_pitch_px / 80; depth_stencil_key.tile_width =
depth_stencil_key.tile_height = config->surface_height_px / 16; xe::round_up(config->surface_pitch_px, tile_width) / tile_width;
// depth_stencil_key.tile_height =
// xe::round_up(config->surface_height_px, tile_height) / tile_height;
depth_stencil_key.tile_height = 160;
depth_stencil_key.color_or_depth = 0; depth_stencil_key.color_or_depth = 0;
depth_stencil_key.msaa_samples =
0; // static_cast<uint16_t>(config->surface_msaa);
depth_stencil_key.edram_format = depth_stencil_key.edram_format =
static_cast<uint16_t>(config->depth_stencil.format); static_cast<uint16_t>(config->depth_stencil.format);
auto target_depth_stencil_attachment = GetTileView(depth_stencil_key); auto target_depth_stencil_attachment =
FindOrCreateTileView(command_buffer, depth_stencil_key);
if (!target_depth_stencil_attachment) { if (!target_depth_stencil_attachment) {
XELOGE("Failed to get tile view for depth/stencil attachment"); XELOGE("Failed to get tile view for depth/stencil attachment");
return false; return false;
} }
uint32_t surface_pitch_px = config->surface_msaa != MsaaSamples::k4X
? config->surface_pitch_px
: config->surface_pitch_px * 2;
uint32_t surface_height_px = config->surface_msaa == MsaaSamples::k1X
? config->surface_height_px
: config->surface_height_px * 2;
surface_pitch_px = std::min(surface_pitch_px, 2560u);
surface_height_px = std::min(surface_height_px, 2560u);
framebuffer = new CachedFramebuffer( framebuffer = new CachedFramebuffer(
device_, render_pass->handle, config->surface_pitch_px, *device_, render_pass->handle, surface_pitch_px, surface_height_px,
config->surface_height_px, target_color_attachments, target_color_attachments, target_depth_stencil_attachment);
target_depth_stencil_attachment);
render_pass->cached_framebuffers.push_back(framebuffer); render_pass->cached_framebuffers.push_back(framebuffer);
} }
@ -720,7 +850,75 @@ bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
return true; return true;
} }
CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) { CachedTileView* RenderCache::FindOrCreateTileView(
VkCommandBuffer command_buffer, const TileViewKey& view_key) {
auto tile_view = FindTileView(view_key);
if (tile_view) {
return tile_view;
}
// Create a new tile and add to the cache.
tile_view =
new CachedTileView(device_, command_buffer, edram_memory_, view_key);
cached_tile_views_.push_back(tile_view);
return tile_view;
}
void RenderCache::UpdateTileView(VkCommandBuffer command_buffer,
CachedTileView* view, bool load,
bool insert_barrier) {
uint32_t tile_width =
view->key.msaa_samples == uint16_t(MsaaSamples::k4X) ? 40 : 80;
uint32_t tile_height =
view->key.msaa_samples != uint16_t(MsaaSamples::k1X) ? 8 : 16;
if (insert_barrier) {
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
if (load) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
} else {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
}
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = edram_buffer_;
barrier.offset = view->key.tile_offset * 5120;
barrier.size = view->key.tile_width * tile_width * view->key.tile_height *
tile_height * view->key.color_or_depth
? 4
: 1;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
}
// TODO(DrChat): Stencil copies.
VkBufferImageCopy region;
region.bufferOffset = view->key.tile_offset * 5120;
region.bufferRowLength = 0;
region.bufferImageHeight = 0;
region.imageSubresource = {0, 0, 0, 1};
region.imageSubresource.aspectMask = view->key.color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT;
region.imageOffset = {0, 0, 0};
region.imageExtent = {view->key.tile_width * tile_width,
view->key.tile_height * tile_height, 1};
if (load) {
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, view->image,
VK_IMAGE_LAYOUT_GENERAL, 1, &region);
} else {
vkCmdCopyImageToBuffer(command_buffer, view->image, VK_IMAGE_LAYOUT_GENERAL,
edram_buffer_, 1, &region);
}
}
CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const {
// Check the cache. // Check the cache.
// TODO(benvanik): better lookup. // TODO(benvanik): better lookup.
for (auto tile_view : cached_tile_views_) { for (auto tile_view : cached_tile_views_) {
@ -729,25 +927,341 @@ CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) {
} }
} }
// Create a new tile and add to the cache. return nullptr;
auto tile_view = new CachedTileView(device_, edram_memory_, view_key);
cached_tile_views_.push_back(tile_view);
return tile_view;
} }
void RenderCache::EndRenderPass() { void RenderCache::EndRenderPass() {
assert_not_null(current_command_buffer_); assert_not_null(current_command_buffer_);
auto command_buffer = current_command_buffer_;
current_command_buffer_ = nullptr;
// End the render pass. // End the render pass.
vkCmdEndRenderPass(command_buffer); vkCmdEndRenderPass(current_command_buffer_);
// Copy all render targets back into our EDRAM buffer.
// Don't bother waiting on this command to complete, as next render pass may
// reuse previous framebuffer attachments. If they need this, they will wait.
// TODO: Should we bother re-tiling the images on copy back?
//
// FIXME: There's a case where we may have a really big render target (as we
// can't get the correct height atm) and we may end up overwriting the valid
// contents of another render target by mistake! Need to reorder copy commands
// to avoid this.
// TODO(DrChat): Determine if we actually need an EDRAM buffer.
/*
std::vector<CachedTileView*> cached_views;
// Depth
auto depth_target = current_state_.framebuffer->depth_stencil_attachment;
if (depth_target && current_state_.config.depth_stencil.used) {
cached_views.push_back(depth_target);
}
// Color
for (int i = 0; i < 4; i++) {
auto target = current_state_.framebuffer->color_attachments[i];
if (!target || !current_state_.config.color[i].used) {
continue;
}
cached_views.push_back(target);
}
std::sort(
cached_views.begin(), cached_views.end(),
[](CachedTileView const* a, CachedTileView const* b) { return *a < *b; });
for (auto view : cached_views) {
UpdateTileView(current_command_buffer_, view, false, false);
}
*/
current_command_buffer_ = nullptr;
} }
void RenderCache::ClearCache() { void RenderCache::ClearCache() {
// TODO(benvanik): caching. // TODO(benvanik): caching.
} }
void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer,
uint32_t edram_base, VkImage image,
VkImageLayout image_layout,
bool color_or_depth, VkOffset3D offset,
VkExtent3D extents) {
// Transition the texture into a transfer destination layout.
VkImageMemoryBarrier image_barrier;
image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_barrier.pNext = nullptr;
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
if (image_layout != VK_IMAGE_LAYOUT_GENERAL &&
image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
image_barrier.srcAccessMask = 0;
image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_barrier.oldLayout = image_layout;
image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_barrier.image = image;
image_barrier.subresourceRange = {0, 0, 1, 0, 1};
image_barrier.subresourceRange.aspectMask =
color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier);
}
VkBufferMemoryBarrier buffer_barrier;
buffer_barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
buffer_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
buffer_barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
buffer_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
buffer_barrier.buffer = edram_buffer_;
buffer_barrier.offset = edram_base * 5120;
// TODO: Calculate this accurately (need texel size)
buffer_barrier.size = extents.width * extents.height * 4;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&buffer_barrier, 0, nullptr);
// Issue the copy command.
// TODO(DrChat): Stencil copies.
VkBufferImageCopy region;
region.bufferOffset = edram_base * 5120;
region.bufferImageHeight = 0;
region.bufferRowLength = 0;
region.imageOffset = offset;
region.imageExtent = extents;
region.imageSubresource = {0, 0, 0, 1};
region.imageSubresource.aspectMask =
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, 1,
&region);
// Transition the image back into its previous layout.
if (image_layout != VK_IMAGE_LAYOUT_GENERAL &&
image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
image_barrier.srcAccessMask = image_barrier.dstAccessMask;
image_barrier.dstAccessMask = 0;
std::swap(image_barrier.oldLayout, image_barrier.newLayout);
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier);
}
}
void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
uint32_t edram_base, uint32_t pitch,
uint32_t height, MsaaSamples num_samples,
VkImage image, VkImageLayout image_layout,
bool color_or_depth, uint32_t format,
VkFilter filter, VkOffset3D offset,
VkExtent3D extents) {
if (color_or_depth) {
// Adjust similar formats for easier matching.
switch (static_cast<ColorRenderTargetFormat>(format)) {
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
format = uint32_t(ColorRenderTargetFormat::k_8_8_8_8);
break;
case ColorRenderTargetFormat::k_2_10_10_10_unknown:
format = uint32_t(ColorRenderTargetFormat::k_2_10_10_10);
break;
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
format = uint32_t(ColorRenderTargetFormat::k_2_10_10_10_FLOAT);
break;
}
}
uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80;
uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16;
// Grab a tile view that represents the source image.
TileViewKey key;
key.color_or_depth = color_or_depth ? 1 : 0;
key.msaa_samples = 0; // static_cast<uint16_t>(num_samples);
key.edram_format = format;
key.tile_offset = edram_base;
key.tile_width = xe::round_up(pitch, tile_width) / tile_width;
// key.tile_height = xe::round_up(height, tile_height) / tile_height;
key.tile_height = 160;
auto tile_view = FindOrCreateTileView(command_buffer, key);
assert_not_null(tile_view);
// Update the view with the latest contents.
// UpdateTileView(command_buffer, tile_view, true, true);
// Transition the image into a transfer destination layout, if needed.
// TODO: Util function for this
VkImageMemoryBarrier image_barrier;
image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_barrier.pNext = nullptr;
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.srcAccessMask = 0;
image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_barrier.oldLayout = image_layout;
image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_barrier.image = image;
image_barrier.subresourceRange = {0, 0, 1, 0, 1};
image_barrier.subresourceRange.aspectMask =
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier);
// If we overflow we'll lose the device here.
assert_true(extents.width <= key.tile_width * tile_width);
assert_true(extents.height <= key.tile_height * tile_height);
// Now issue the blit to the destination.
if (tile_view->sample_count == VK_SAMPLE_COUNT_1_BIT) {
VkImageBlit image_blit;
image_blit.srcSubresource = {0, 0, 0, 1};
image_blit.srcSubresource.aspectMask =
color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_blit.srcOffsets[0] = {0, 0, offset.z};
image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height),
int32_t(extents.depth)};
image_blit.dstSubresource = {0, 0, 0, 1};
image_blit.dstSubresource.aspectMask =
color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_blit.dstOffsets[0] = offset;
image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width),
offset.y + int32_t(extents.height),
offset.z + int32_t(extents.depth)};
vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
image, image_layout, 1, &image_blit, filter);
} else {
VkImageResolve image_resolve;
image_resolve.srcSubresource = {0, 0, 0, 1};
image_resolve.srcSubresource.aspectMask =
color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_resolve.srcOffset = {0, 0, 0};
image_resolve.dstSubresource = {0, 0, 0, 1};
image_resolve.dstSubresource.aspectMask =
color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_resolve.dstOffset = offset;
image_resolve.extent = extents;
vkCmdResolveImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
image, image_layout, 1, &image_resolve);
}
// Transition the image back into its previous layout.
image_barrier.srcAccessMask = image_barrier.dstAccessMask;
image_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
std::swap(image_barrier.oldLayout, image_barrier.newLayout);
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier);
}
void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer,
uint32_t edram_base,
ColorRenderTargetFormat format,
uint32_t pitch, uint32_t height,
MsaaSamples num_samples, float* color) {
// TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
// need to detect this and calculate a value.
// Adjust similar formats for easier matching.
switch (format) {
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
format = ColorRenderTargetFormat::k_8_8_8_8;
break;
case ColorRenderTargetFormat::k_2_10_10_10_unknown:
format = ColorRenderTargetFormat::k_2_10_10_10;
break;
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
format = ColorRenderTargetFormat::k_2_10_10_10_FLOAT;
break;
}
uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80;
uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16;
// Grab a tile view (as we need to clear an image first)
TileViewKey key;
key.color_or_depth = 1;
key.msaa_samples = 0; // static_cast<uint16_t>(num_samples);
key.edram_format = static_cast<uint16_t>(format);
key.tile_offset = edram_base;
key.tile_width = xe::round_up(pitch, tile_width) / tile_width;
// key.tile_height = xe::round_up(height, tile_height) / tile_height;
key.tile_height = 160;
auto tile_view = FindOrCreateTileView(command_buffer, key);
assert_not_null(tile_view);
VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
VkClearColorValue clear_value;
std::memcpy(clear_value.float32, color, sizeof(float) * 4);
// Issue a clear command
vkCmdClearColorImage(command_buffer, tile_view->image,
VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range);
// Copy image back into EDRAM buffer
// UpdateTileView(command_buffer, tile_view, false, false);
}
void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
uint32_t edram_base,
DepthRenderTargetFormat format,
uint32_t pitch, uint32_t height,
MsaaSamples num_samples, float depth,
uint32_t stencil) {
// TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
// need to detect this and calculate a value.
uint32_t tile_width = num_samples == MsaaSamples::k4X ? 40 : 80;
uint32_t tile_height = num_samples != MsaaSamples::k1X ? 8 : 16;
// Grab a tile view (as we need to clear an image first)
TileViewKey key;
key.color_or_depth = 0;
key.msaa_samples = 0; // static_cast<uint16_t>(num_samples);
key.edram_format = static_cast<uint16_t>(format);
key.tile_offset = edram_base;
key.tile_width = xe::round_up(pitch, tile_width) / tile_width;
// key.tile_height = xe::round_up(height, tile_height) / tile_height;
key.tile_height = 160;
auto tile_view = FindOrCreateTileView(command_buffer, key);
assert_not_null(tile_view);
VkImageSubresourceRange range = {
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1,
};
VkClearDepthStencilValue clear_value;
clear_value.depth = depth;
clear_value.stencil = stencil;
// Issue a clear command
vkCmdClearDepthStencilImage(command_buffer, tile_view->image,
VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range);
// Copy image back into EDRAM buffer
// UpdateTileView(command_buffer, tile_view, false, false);
}
void RenderCache::FillEDRAM(VkCommandBuffer command_buffer, uint32_t value) {
vkCmdFillBuffer(command_buffer, edram_buffer_, 0, kEdramBufferCapacity,
value);
}
bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
uint32_t value = register_file_->values[register_name].u32; uint32_t value = register_file_->values[register_name].u32;
if (*dest == value) { if (*dest == value) {

View File

@ -12,6 +12,7 @@
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader.h" #include "xenia/gpu/shader.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan.h"
@ -36,28 +37,67 @@ struct TileViewKey {
uint16_t tile_height; uint16_t tile_height;
// 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
uint16_t color_or_depth : 1; uint16_t color_or_depth : 1;
// Surface MSAA samples
uint16_t msaa_samples : 2;
// Either ColorRenderTargetFormat or DepthRenderTargetFormat. // Either ColorRenderTargetFormat or DepthRenderTargetFormat.
uint16_t edram_format : 15; uint16_t edram_format : 13;
}; };
static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
// Cached view representing EDRAM memory.
// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible
// formats?
class CachedTileView {
public:
// Key identifying the view in the cache.
TileViewKey key;
// Image
VkImage image = nullptr;
// Simple view on the image matching the format.
VkImageView image_view = nullptr;
// Memory buffer
VkDeviceMemory memory = nullptr;
// Image sample count
VkSampleCountFlagBits sample_count = VK_SAMPLE_COUNT_1_BIT;
CachedTileView(ui::vulkan::VulkanDevice* device,
VkCommandBuffer command_buffer, VkDeviceMemory edram_memory,
TileViewKey view_key);
~CachedTileView();
bool IsEqual(const TileViewKey& other_key) const {
auto a = reinterpret_cast<const uint64_t*>(&key);
auto b = reinterpret_cast<const uint64_t*>(&other_key);
return *a == *b;
}
bool operator<(const CachedTileView& other) const {
return key.tile_offset < other.key.tile_offset;
}
private:
VkDevice device_ = nullptr;
};
// Parsed render configuration from the current render state. // Parsed render configuration from the current render state.
struct RenderConfiguration { struct RenderConfiguration {
// Render mode (color+depth, depth-only, etc). // Render mode (color+depth, depth-only, etc).
xenos::ModeControl mode_control; xenos::ModeControl mode_control;
// Target surface pitch, in pixels. // Target surface pitch multiplied by MSAA, in pixels.
uint32_t surface_pitch_px; uint32_t surface_pitch_px;
// ESTIMATED target surface height, in pixels. // ESTIMATED target surface height multiplied by MSAA, in pixels.
uint32_t surface_height_px; uint32_t surface_height_px;
// Surface MSAA setting. // Surface MSAA setting.
MsaaSamples surface_msaa; MsaaSamples surface_msaa;
// Color attachments for the 4 render targets. // Color attachments for the 4 render targets.
struct { struct {
bool used;
uint32_t edram_base; uint32_t edram_base;
ColorRenderTargetFormat format; ColorRenderTargetFormat format;
} color[4]; } color[4];
// Depth/stencil attachment. // Depth/stencil attachment.
struct { struct {
bool used;
uint32_t edram_base; uint32_t edram_base;
DepthRenderTargetFormat format; DepthRenderTargetFormat format;
} depth_stencil; } depth_stencil;
@ -73,6 +113,9 @@ struct RenderState {
// Target framebuffer bound to the render pass. // Target framebuffer bound to the render pass.
CachedFramebuffer* framebuffer = nullptr; CachedFramebuffer* framebuffer = nullptr;
VkFramebuffer framebuffer_handle = nullptr; VkFramebuffer framebuffer_handle = nullptr;
bool color_attachment_written[4] = {false};
bool depth_attachment_written = false;
}; };
// Manages the virtualized EDRAM and the render target cache. // Manages the virtualized EDRAM and the render target cache.
@ -97,9 +140,13 @@ struct RenderState {
// 320px by rounding up to the next tile. // 320px by rounding up to the next tile.
// //
// MSAA and other settings will modify the exact pixel sizes, like 4X makes // MSAA and other settings will modify the exact pixel sizes, like 4X makes
// each tile effectively 40x8px, but they are still all 5120b. As we try to // each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still
// emulate this we adjust our viewport when rendering to stretch pixels as // all 5120b. As we try to emulate this we adjust our viewport when rendering to
// needed. // stretch pixels as needed.
//
// It appears that games also take advantage of MSAA stretching tiles when doing
// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then
// later draw to that view with 1X pitch/height and 1X MSAA.
// //
// The good news is that games cannot read EDRAM directly but must use a copy // The good news is that games cannot read EDRAM directly but must use a copy
// operation to get the data out. That gives us a chance to do whatever we // operation to get the data out. That gives us a chance to do whatever we
@ -217,6 +264,10 @@ class RenderCache {
RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
~RenderCache(); ~RenderCache();
// Call this to determine if you should start a new render pass or continue
// with an already open pass.
bool dirty() const;
// Begins a render pass targeting the state-specified framebuffer formats. // Begins a render pass targeting the state-specified framebuffer formats.
// The command buffer will be transitioned into the render pass phase. // The command buffer will be transitioned into the render pass phase.
const RenderState* BeginRenderPass(VkCommandBuffer command_buffer, const RenderState* BeginRenderPass(VkCommandBuffer command_buffer,
@ -230,24 +281,63 @@ class RenderCache {
// Clears all cached content. // Clears all cached content.
void ClearCache(); void ClearCache();
// Queues commands to copy EDRAM contents into an image.
// The command buffer must not be inside of a render pass when calling this.
void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
VkImage image, VkImageLayout image_layout,
bool color_or_depth, VkOffset3D offset,
VkExtent3D extents);
// Queues commands to blit EDRAM contents into an image.
// The command buffer must not be inside of a render pass when calling this.
void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
uint32_t pitch, uint32_t height, MsaaSamples num_samples,
VkImage image, VkImageLayout image_layout,
bool color_or_depth, uint32_t format, VkFilter filter,
VkOffset3D offset, VkExtent3D extents);
// Queues commands to clear EDRAM contents with a solid color.
// The command buffer must not be inside of a render pass when calling this.
void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base,
ColorRenderTargetFormat format, uint32_t pitch,
uint32_t height, MsaaSamples num_samples, float* color);
// Queues commands to clear EDRAM contents with depth/stencil values.
// The command buffer must not be inside of a render pass when calling this.
void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
uint32_t edram_base,
DepthRenderTargetFormat format, uint32_t pitch,
uint32_t height, MsaaSamples num_samples,
float depth, uint32_t stencil);
// Queues commands to fill EDRAM contents with a constant value.
// The command buffer must not be inside of a render pass when calling this.
void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value);
private: private:
// Parses the current state into a configuration object. // Parses the current state into a configuration object.
bool ParseConfiguration(RenderConfiguration* config); bool ParseConfiguration(RenderConfiguration* config);
// Finds a tile view. Returns nullptr if none found matching the key.
CachedTileView* FindTileView(const TileViewKey& view_key) const;
// Gets or creates a tile view with the given parameters.
CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer,
const TileViewKey& view_key);
void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view,
bool load, bool insert_barrier = true);
// Gets or creates a render pass and frame buffer for the given configuration. // Gets or creates a render pass and frame buffer for the given configuration.
// This attempts to reuse as much as possible across render passes and // This attempts to reuse as much as possible across render passes and
// framebuffers. // framebuffers.
bool ConfigureRenderPass(RenderConfiguration* config, bool ConfigureRenderPass(VkCommandBuffer command_buffer,
RenderConfiguration* config,
CachedRenderPass** out_render_pass, CachedRenderPass** out_render_pass,
CachedFramebuffer** out_framebuffer); CachedFramebuffer** out_framebuffer);
// Gets or creates a tile view with the given parameters.
CachedTileView* GetTileView(const TileViewKey& view_key);
RegisterFile* register_file_ = nullptr; RegisterFile* register_file_ = nullptr;
VkDevice device_ = nullptr; ui::vulkan::VulkanDevice* device_ = nullptr;
// Entire 10MiB of EDRAM, aliased to hell by various VkImages. // Entire 10MiB of EDRAM.
VkDeviceMemory edram_memory_ = nullptr; VkDeviceMemory edram_memory_ = nullptr;
// Buffer overlayed 1:1 with edram_memory_ to allow raw access. // Buffer overlayed 1:1 with edram_memory_ to allow raw access.
VkBuffer edram_buffer_ = nullptr; VkBuffer edram_buffer_ = nullptr;

View File

@ -2,7 +2,7 @@
// source: rect_list.geom // source: rect_list.geom
const uint8_t rect_list_geom[] = { const uint8_t rect_list_geom[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00, 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x08, 0x00,
0xCC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0xCA, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x18, 0x00, 0x00, 0x00,
0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x11, 0x00, 0x02, 0x00, 0x20, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00,
@ -10,8 +10,8 @@ const uint8_t rect_list_geom[] = {
0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x09, 0x00, 0x03, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x69, 0x6E, 0x00, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x00,
0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x04, 0x00,
@ -40,17 +40,13 @@ const uint8_t rect_list_geom[] = {
0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x67, 0x6C, 0x5F, 0x43,
0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00, 0x6C, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61, 0x6E, 0x63, 0x65, 0x00,
0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x05, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x56, 0x65, 0x72, 0x74, 0x05, 0x00, 0x07, 0x00, 0x30, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F,
0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, 0x06, 0x00, 0x04, 0x00, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61, 0x74, 0x6F, 0x72,
0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x05, 0x00, 0x07, 0x00, 0x33, 0x00, 0x00, 0x00,
0x05, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x6F, 0x75, 0x74, 0x5F, 0x69, 0x6E, 0x5F, 0x69, 0x6E, 0x74, 0x65, 0x72, 0x70, 0x6F, 0x6C, 0x61,
0x76, 0x74, 0x78, 0x00, 0x05, 0x00, 0x05, 0x00, 0x32, 0x00, 0x00, 0x00, 0x74, 0x6F, 0x72, 0x73, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
0x56, 0x65, 0x72, 0x74, 0x65, 0x78, 0x44, 0x61, 0x74, 0x61, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
0x06, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
0x6F, 0x00, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x35, 0x00, 0x00, 0x00,
0x69, 0x6E, 0x5F, 0x76, 0x74, 0x78, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
0x66, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x05, 0x00, 0x03, 0x00,
0xB4, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0E, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
@ -65,12 +61,10 @@ const uint8_t rect_list_geom[] = {
0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
0x20, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00,
0x47, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00,
0x1D, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x14, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x02, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
@ -107,25 +101,23 @@ const uint8_t rect_list_geom[] = {
0x03, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x03, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00,
0x0B, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x1C, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x2D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x03, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x2D, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00,
0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
0x03, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x30, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x04, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00,
0x1E, 0x00, 0x03, 0x00, 0x32, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00,
0x1C, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
0x0F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x20, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00,
0x20, 0x00, 0x04, 0x00, 0x36, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x32, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00,
0x65, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
0x3B, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x04, 0x00, 0x63, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x16, 0x00, 0x00, 0x00,
0x17, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
@ -139,7 +131,7 @@ const uint8_t rect_list_geom[] = {
0x1C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x1D, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xF7, 0x00, 0x03, 0x00,
0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00,
0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00,
0xF8, 0x00, 0x02, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
@ -153,286 +145,283 @@ const uint8_t rect_list_geom[] = {
0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x2C, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x2C, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2C, 0x00, 0x00, 0x00, 0x2A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00,
0x38, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x36, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x31, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x30, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
0x39, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
0x3B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x39, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x3B, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x39, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00,
0x3C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x3A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x3B, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x32, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x00,
0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0x41, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x40, 0x00, 0x00, 0x00, 0x3F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x43, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x41, 0x00, 0x00, 0x00,
0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
0x44, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x45, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x43, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x2B, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x46, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x44, 0x00, 0x00, 0x00,
0x45, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00,
0x47, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x46, 0x00, 0x00, 0x00,
0x47, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00,
0x48, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0x46, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00,
0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
0x49, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
0x4B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x4B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00,
0x4C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x4A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
0x4E, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x4C, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x32, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00, 0x4D, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x4E, 0x00, 0x00, 0x00,
0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0x51, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x52, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x50, 0x00, 0x00, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x53, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x51, 0x00, 0x00, 0x00,
0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x50, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
0x54, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x55, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x53, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x2B, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x56, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x54, 0x00, 0x00, 0x00,
0x55, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00,
0x57, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x56, 0x00, 0x00, 0x00,
0x57, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00,
0x58, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x56, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00,
0x5B, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x5D, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00,
0x5E, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x5C, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x60, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x5B, 0x00, 0x00, 0x00, 0x5D, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x5F, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x61, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5F, 0x00, 0x00, 0x00, 0x5E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x16, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x64, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x62, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x66, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x64, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x65, 0x00, 0x00, 0x00,
0xF6, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, 0x67, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00,
0xF8, 0x00, 0x02, 0x00, 0x6B, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x69, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00,
0x6C, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0x6A, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00,
0x6E, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
0xF8, 0x00, 0x02, 0x00, 0x66, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0x6D, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x6E, 0x00, 0x00, 0x00,
0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0x6F, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x6E, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x70, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0x73, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x74, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
0x74, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
0x76, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00,
0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00,
0x75, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00,
0x6D, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x7A, 0x00, 0x00, 0x00,
0x79, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00,
0xF8, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00,
0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x71, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
0x14, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x65, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
0x7F, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00,
0x72, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
0x74, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00,
0x23, 0x00, 0x00, 0x00, 0x75, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00,
0x75, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x77, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00,
0x66, 0x00, 0x00, 0x00, 0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00,
0x79, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x78, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x79, 0x00, 0x00, 0x00,
0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00,
0x77, 0x00, 0x00, 0x00, 0x7A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x26, 0x00, 0x00, 0x00, 0x7C, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x7C, 0x00, 0x00, 0x00, 0x7B, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
0x6A, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x6A, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00,
0x66, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
0x7E, 0x00, 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x66, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00,
0xF9, 0x00, 0x02, 0x00, 0x67, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
0x69, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00,
0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
0x7F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0x80, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x81, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x82, 0x00, 0x00, 0x00,
0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
0x83, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x84, 0x00, 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x2B, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x85, 0x00, 0x00, 0x00,
0x84, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00,
0x86, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00,
0x86, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00,
0x87, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x8A, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x16, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x8D, 0x00, 0x00, 0x00, 0x8C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x36, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00,
0x8F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x31, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00,
0x90, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
0x92, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x92, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x93, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
0x93, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
0x95, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x32, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0x97, 0x00, 0x00, 0x00,
0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x9A, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x16, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x9D, 0x00, 0x00, 0x00, 0x9C, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x36, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x32, 0x00, 0x00, 0x00,
0x9F, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x31, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00,
0xA0, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
0xA2, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0xA2, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00,
0xA3, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
0xA5, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x36, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x32, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x31, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00,
0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00,
0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAC, 0x00, 0x00, 0x00,
0xA9, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00,
0x83, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00,
0xAC, 0x00, 0x00, 0x00, 0xAE, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB0, 0x00, 0x00, 0x00,
0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
0xB1, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x2B, 0x00, 0x00, 0x00, 0xB3, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB3, 0x00, 0x00, 0x00,
0xB2, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xB4, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00,
0xF8, 0x00, 0x02, 0x00, 0xB5, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00,
0xB7, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xF9, 0x00, 0x02, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
0xB9, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
0xBA, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00,
0x06, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00,
0x6D, 0x00, 0x00, 0x00, 0xFA, 0x00, 0x04, 0x00, 0xBB, 0x00, 0x00, 0x00,
0xB6, 0x00, 0x00, 0x00, 0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
0xB6, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
0xBC, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0xBD, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0xBF, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0xC0, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0xC2, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00,
0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00,
0xBF, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
0x41, 0x00, 0x07, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00,
0x35, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0xC5, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00,
0x0A, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00,
0xC7, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00,
0xC9, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0xBC, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0xC9, 0x00, 0x00, 0x00,
0xC8, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xB8, 0x00, 0x00, 0x00,
0xF8, 0x00, 0x02, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0xCA, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0xCB, 0x00, 0x00, 0x00,
0xCA, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0xB4, 0x00, 0x00, 0x00, 0xCB, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00,
0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00, 0xF9, 0x00, 0x02, 0x00,
0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0x7D, 0x00, 0x00, 0x00,
0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0x7E, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00,
0x7E, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
0x80, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x80, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0x81, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00,
0x81, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
0x83, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x83, 0x00, 0x00, 0x00, 0x82, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x2E, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x84, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00,
0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0x86, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x87, 0x00, 0x00, 0x00, 0x86, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x88, 0x00, 0x00, 0x00,
0x87, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
0x89, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x00, 0x89, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x2B, 0x00, 0x00, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x8B, 0x00, 0x00, 0x00,
0x8A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00,
0x8C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x8D, 0x00, 0x00, 0x00,
0x8C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00,
0x8D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x8E, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x90, 0x00, 0x00, 0x00, 0x8F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x16, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x91, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0x93, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x93, 0x00, 0x00, 0x00, 0x92, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x34, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00,
0x95, 0x00, 0x00, 0x00, 0x94, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x30, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
0xDB, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0x96, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0x97, 0x00, 0x00, 0x00, 0x96, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x26, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x98, 0x00, 0x00, 0x00,
0x97, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00,
0x99, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
0x9A, 0x00, 0x00, 0x00, 0x99, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x2B, 0x00, 0x00, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x9B, 0x00, 0x00, 0x00,
0x9A, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x34, 0x00, 0x00, 0x00,
0x9C, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00, 0x9D, 0x00, 0x00, 0x00,
0x9C, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00, 0x30, 0x00, 0x00, 0x00,
0x9D, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x9E, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0xA0, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0xA0, 0x00, 0x00, 0x00, 0x9F, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x16, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x09, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00, 0xA3, 0x00, 0x00, 0x00,
0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0xA3, 0x00, 0x00, 0x00, 0xA2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
0x34, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
0x19, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x2E, 0x00, 0x00, 0x00,
0xA5, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0x30, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00,
0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xA6, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00,
0xA6, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0xA8, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0xA9, 0x00, 0x00, 0x00, 0xA8, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00,
0x0A, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00, 0xA7, 0x00, 0x00, 0x00,
0xA9, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0xAB, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0xAC, 0x00, 0x00, 0x00, 0xAB, 0x00, 0x00, 0x00, 0x83, 0x00, 0x05, 0x00,
0x0A, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x00,
0xAC, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00,
0xAE, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0xAE, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x16, 0x00, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00,
0x12, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00,
0xAF, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2B, 0x00, 0x00, 0x00,
0xB1, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0xB1, 0x00, 0x00, 0x00, 0xB0, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
0xB3, 0x00, 0x00, 0x00, 0xF6, 0x00, 0x04, 0x00, 0xB5, 0x00, 0x00, 0x00,
0xB6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
0xB7, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB7, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00,
0xB2, 0x00, 0x00, 0x00, 0xB1, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
0xB9, 0x00, 0x00, 0x00, 0xB8, 0x00, 0x00, 0x00, 0x6B, 0x00, 0x00, 0x00,
0xFA, 0x00, 0x04, 0x00, 0xB9, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, 0x00,
0xB5, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB4, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00,
0xB2, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
0xBB, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
0x23, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
0x14, 0x00, 0x00, 0x00, 0xBB, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, 0x00, 0xBC, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xBE, 0x00, 0x00, 0x00,
0xB2, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00,
0xBF, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0xBE, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00,
0xC0, 0x00, 0x00, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x7F, 0x00, 0x04, 0x00,
0x0A, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00,
0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00,
0xBD, 0x00, 0x00, 0x00, 0xC1, 0x00, 0x00, 0x00, 0x3D, 0x00, 0x04, 0x00,
0x13, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00, 0xB2, 0x00, 0x00, 0x00,
0x41, 0x00, 0x06, 0x00, 0x23, 0x00, 0x00, 0x00, 0xC4, 0x00, 0x00, 0x00,
0x33, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0xC3, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x0A, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00,
0xC4, 0x00, 0x00, 0x00, 0x81, 0x00, 0x05, 0x00, 0x0A, 0x00, 0x00, 0x00,
0xC6, 0x00, 0x00, 0x00, 0xC2, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x26, 0x00, 0x00, 0x00, 0xC7, 0x00, 0x00, 0x00,
0x30, 0x00, 0x00, 0x00, 0xBA, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x03, 0x00,
0xC7, 0x00, 0x00, 0x00, 0xC6, 0x00, 0x00, 0x00, 0xF9, 0x00, 0x02, 0x00,
0xB6, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00, 0xB6, 0x00, 0x00, 0x00,
0x3D, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00,
0xB2, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
0xC9, 0x00, 0x00, 0x00, 0xC8, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00,
0x3E, 0x00, 0x03, 0x00, 0xB2, 0x00, 0x00, 0x00, 0xC9, 0x00, 0x00, 0x00,
0xF9, 0x00, 0x02, 0x00, 0xB3, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
0xB5, 0x00, 0x00, 0x00, 0xDA, 0x00, 0x01, 0x00, 0xDB, 0x00, 0x01, 0x00,
0xF9, 0x00, 0x02, 0x00, 0x1F, 0x00, 0x00, 0x00, 0xF8, 0x00, 0x02, 0x00,
0x1F, 0x00, 0x00, 0x00, 0xFD, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
}; };

View File

@ -1,7 +1,7 @@
; SPIR-V ; SPIR-V
; Version: 1.0 ; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 1 ; Generator: Khronos Glslang Reference Front End; 1
; Bound: 204 ; Bound: 202
; Schema: 0 ; Schema: 0
OpCapability Geometry OpCapability Geometry
OpCapability GeometryPointSize OpCapability GeometryPointSize
@ -9,7 +9,7 @@
OpCapability GeometryStreams OpCapability GeometryStreams
%1 = OpExtInstImport "GLSL.std.450" %1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450 OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %4 "main" %18 %34 %49 %53 OpEntryPoint Geometry %4 "main" %18 %34 %48 %51
OpExecutionMode %4 Triangles OpExecutionMode %4 Triangles
OpExecutionMode %4 Invocations 1 OpExecutionMode %4 Invocations 1
OpExecutionMode %4 OutputTriangleStrip OpExecutionMode %4 OutputTriangleStrip
@ -27,14 +27,10 @@
OpMemberName %32 1 "gl_PointSize" OpMemberName %32 1 "gl_PointSize"
OpMemberName %32 2 "gl_ClipDistance" OpMemberName %32 2 "gl_ClipDistance"
OpName %34 "" OpName %34 ""
OpName %47 "VertexData" OpName %48 "out_interpolators"
OpMemberName %47 0 "o" OpName %51 "in_interpolators"
OpName %49 "out_vtx" OpName %100 "i"
OpName %50 "VertexData" OpName %178 "i"
OpMemberName %50 0 "o"
OpName %53 "in_vtx"
OpName %102 "i"
OpName %180 "i"
OpMemberDecorate %14 0 BuiltIn Position OpMemberDecorate %14 0 BuiltIn Position
OpMemberDecorate %14 1 BuiltIn PointSize OpMemberDecorate %14 1 BuiltIn PointSize
OpMemberDecorate %14 2 BuiltIn ClipDistance OpMemberDecorate %14 2 BuiltIn ClipDistance
@ -45,10 +41,9 @@
OpDecorate %32 Block OpDecorate %32 Block
OpDecorate %32 Stream 0 OpDecorate %32 Stream 0
OpDecorate %34 Stream 0 OpDecorate %34 Stream 0
OpMemberDecorate %47 0 Location 0 OpDecorate %48 Location 0
OpDecorate %47 Stream 0 OpDecorate %48 Stream 0
OpDecorate %49 Stream 0 OpDecorate %51 Location 0
OpMemberDecorate %50 0 Location 0
%2 = OpTypeVoid %2 = OpTypeVoid
%3 = OpTypeFunction %2 %3 = OpTypeFunction %2
%6 = OpTypeBool %6 = OpTypeBool
@ -77,21 +72,19 @@
%43 = OpTypePointer Output %9 %43 = OpTypePointer Output %9
%45 = OpConstant %11 16 %45 = OpConstant %11 16
%46 = OpTypeArray %10 %45 %46 = OpTypeArray %10 %45
%47 = OpTypeStruct %46 %47 = OpTypePointer Output %46
%48 = OpTypePointer Output %47 %48 = OpVariable %47 Output
%49 = OpVariable %48 Output %49 = OpTypeArray %46 %15
%50 = OpTypeStruct %46 %50 = OpTypePointer Input %49
%51 = OpTypeArray %50 %15 %51 = OpVariable %50 Input
%52 = OpTypePointer Input %51 %52 = OpTypePointer Input %46
%53 = OpVariable %52 Input %99 = OpTypePointer Function %19
%54 = OpTypePointer Input %50 %107 = OpConstant %19 16
%101 = OpTypePointer Function %19
%109 = OpConstant %19 16
%4 = OpFunction %2 None %3 %4 = OpFunction %2 None %3
%5 = OpLabel %5 = OpLabel
%8 = OpVariable %7 Function %8 = OpVariable %7 Function
%102 = OpVariable %101 Function %100 = OpVariable %99 Function
%180 = OpVariable %101 Function %178 = OpVariable %99 Function
%23 = OpAccessChain %22 %18 %20 %20 %21 %23 = OpAccessChain %22 %18 %20 %20 %21
%24 = OpLoad %9 %23 %24 = OpLoad %9 %23
%26 = OpAccessChain %22 %18 %25 %20 %21 %26 = OpAccessChain %22 %18 %25 %20 %21
@ -100,7 +93,7 @@
OpStore %8 %28 OpStore %8 %28
%29 = OpLoad %6 %8 %29 = OpLoad %6 %8
OpSelectionMerge %31 None OpSelectionMerge %31 None
OpBranchConditional %29 %30 %127 OpBranchConditional %29 %30 %125
%30 = OpLabel %30 = OpLabel
%36 = OpAccessChain %35 %18 %20 %20 %36 = OpAccessChain %35 %18 %20 %20
%37 = OpLoad %10 %36 %37 = OpLoad %10 %36
@ -110,216 +103,216 @@
%42 = OpLoad %9 %41 %42 = OpLoad %9 %41
%44 = OpAccessChain %43 %34 %40 %44 = OpAccessChain %43 %34 %40
OpStore %44 %42 OpStore %44 %42
%55 = OpAccessChain %54 %53 %20 %53 = OpAccessChain %52 %51 %20
%56 = OpLoad %50 %55 %54 = OpLoad %46 %53
OpStore %49 %56 OpStore %48 %54
OpEmitVertex OpEmitVertex
%57 = OpAccessChain %35 %18 %40 %20 %55 = OpAccessChain %35 %18 %40 %20
%58 = OpLoad %10 %57 %56 = OpLoad %10 %55
%59 = OpAccessChain %38 %34 %20 %57 = OpAccessChain %38 %34 %20
OpStore %59 %58 OpStore %57 %56
%60 = OpAccessChain %22 %18 %40 %40 %58 = OpAccessChain %22 %18 %40 %40
%61 = OpLoad %9 %60 %59 = OpLoad %9 %58
%62 = OpAccessChain %43 %34 %40 %60 = OpAccessChain %43 %34 %40
OpStore %62 %61 OpStore %60 %59
%63 = OpAccessChain %54 %53 %40 %61 = OpAccessChain %52 %51 %40
%64 = OpLoad %50 %63 %62 = OpLoad %46 %61
OpStore %49 %64 OpStore %48 %62
OpEmitVertex OpEmitVertex
%65 = OpAccessChain %35 %18 %25 %20 %63 = OpAccessChain %35 %18 %25 %20
%66 = OpLoad %10 %65 %64 = OpLoad %10 %63
%67 = OpAccessChain %38 %34 %20 %65 = OpAccessChain %38 %34 %20
OpStore %67 %66 OpStore %65 %64
%68 = OpAccessChain %22 %18 %25 %40 %66 = OpAccessChain %22 %18 %25 %40
%69 = OpLoad %9 %68 %67 = OpLoad %9 %66
%70 = OpAccessChain %43 %34 %40 %68 = OpAccessChain %43 %34 %40
OpStore %70 %69 OpStore %68 %67
%71 = OpAccessChain %54 %53 %25 %69 = OpAccessChain %52 %51 %25
%72 = OpLoad %50 %71 %70 = OpLoad %46 %69
OpStore %49 %72 OpStore %48 %70
OpEmitVertex OpEmitVertex
OpEndPrimitive OpEndPrimitive
%73 = OpAccessChain %35 %18 %25 %20 %71 = OpAccessChain %35 %18 %25 %20
%74 = OpLoad %10 %73 %72 = OpLoad %10 %71
%75 = OpAccessChain %38 %34 %20 %73 = OpAccessChain %38 %34 %20
OpStore %75 %74 OpStore %73 %72
%76 = OpAccessChain %22 %18 %25 %40 %74 = OpAccessChain %22 %18 %25 %40
%77 = OpLoad %9 %76 %75 = OpLoad %9 %74
%78 = OpAccessChain %43 %34 %40 %76 = OpAccessChain %43 %34 %40
OpStore %78 %77 OpStore %76 %75
%79 = OpAccessChain %54 %53 %25 %77 = OpAccessChain %52 %51 %25
%80 = OpLoad %50 %79 %78 = OpLoad %46 %77
OpStore %49 %80 OpStore %48 %78
OpEmitVertex OpEmitVertex
%81 = OpAccessChain %35 %18 %40 %20 %79 = OpAccessChain %35 %18 %40 %20
%82 = OpLoad %10 %81 %80 = OpLoad %10 %79
%83 = OpAccessChain %38 %34 %20 %81 = OpAccessChain %38 %34 %20
OpStore %83 %82 OpStore %81 %80
%84 = OpAccessChain %22 %18 %40 %40 %82 = OpAccessChain %22 %18 %40 %40
%85 = OpLoad %9 %84 %83 = OpLoad %9 %82
%86 = OpAccessChain %43 %34 %40 %84 = OpAccessChain %43 %34 %40
OpStore %86 %85 OpStore %84 %83
%87 = OpAccessChain %54 %53 %40 %85 = OpAccessChain %52 %51 %40
%88 = OpLoad %50 %87 %86 = OpLoad %46 %85
OpStore %49 %88 OpStore %48 %86
OpEmitVertex OpEmitVertex
%89 = OpAccessChain %35 %18 %40 %20 %87 = OpAccessChain %35 %18 %40 %20
%88 = OpLoad %10 %87
%89 = OpAccessChain %35 %18 %25 %20
%90 = OpLoad %10 %89 %90 = OpLoad %10 %89
%91 = OpAccessChain %35 %18 %25 %20 %91 = OpFAdd %10 %88 %90
%92 = OpLoad %10 %91 %92 = OpAccessChain %35 %18 %20 %20
%93 = OpFAdd %10 %90 %92 %93 = OpLoad %10 %92
%94 = OpAccessChain %35 %18 %20 %20 %94 = OpFSub %10 %91 %93
%95 = OpLoad %10 %94 %95 = OpAccessChain %38 %34 %20
%96 = OpFSub %10 %93 %95 OpStore %95 %94
%97 = OpAccessChain %38 %34 %20 %96 = OpAccessChain %22 %18 %25 %40
OpStore %97 %96 %97 = OpLoad %9 %96
%98 = OpAccessChain %22 %18 %25 %40 %98 = OpAccessChain %43 %34 %40
%99 = OpLoad %9 %98 OpStore %98 %97
%100 = OpAccessChain %43 %34 %40 OpStore %100 %20
OpStore %100 %99 OpBranch %101
OpStore %102 %20 %101 = OpLabel
OpBranch %103 OpLoopMerge %103 %104 None
%103 = OpLabel OpBranch %105
OpLoopMerge %105 %106 None
OpBranch %107
%107 = OpLabel
%108 = OpLoad %19 %102
%110 = OpSLessThan %6 %108 %109
OpBranchConditional %110 %104 %105
%104 = OpLabel
%111 = OpLoad %19 %102
%112 = OpLoad %19 %102
%113 = OpAccessChain %35 %53 %20 %20 %112
%114 = OpLoad %10 %113
%115 = OpFNegate %10 %114
%116 = OpLoad %19 %102
%117 = OpAccessChain %35 %53 %40 %20 %116
%118 = OpLoad %10 %117
%119 = OpFAdd %10 %115 %118
%120 = OpLoad %19 %102
%121 = OpAccessChain %35 %53 %25 %20 %120
%122 = OpLoad %10 %121
%123 = OpFAdd %10 %119 %122
%124 = OpAccessChain %38 %49 %20 %111
OpStore %124 %123
OpBranch %106
%106 = OpLabel
%125 = OpLoad %19 %102
%126 = OpIAdd %19 %125 %40
OpStore %102 %126
OpBranch %103
%105 = OpLabel %105 = OpLabel
%106 = OpLoad %19 %100
%108 = OpSLessThan %6 %106 %107
OpBranchConditional %108 %102 %103
%102 = OpLabel
%109 = OpLoad %19 %100
%110 = OpLoad %19 %100
%111 = OpAccessChain %35 %51 %20 %110
%112 = OpLoad %10 %111
%113 = OpFNegate %10 %112
%114 = OpLoad %19 %100
%115 = OpAccessChain %35 %51 %40 %114
%116 = OpLoad %10 %115
%117 = OpFAdd %10 %113 %116
%118 = OpLoad %19 %100
%119 = OpAccessChain %35 %51 %25 %118
%120 = OpLoad %10 %119
%121 = OpFAdd %10 %117 %120
%122 = OpAccessChain %38 %48 %109
OpStore %122 %121
OpBranch %104
%104 = OpLabel
%123 = OpLoad %19 %100
%124 = OpIAdd %19 %123 %40
OpStore %100 %124
OpBranch %101
%103 = OpLabel
OpEmitVertex OpEmitVertex
OpEndPrimitive OpEndPrimitive
OpBranch %31 OpBranch %31
%127 = OpLabel %125 = OpLabel
%128 = OpAccessChain %35 %18 %20 %20 %126 = OpAccessChain %35 %18 %20 %20
%129 = OpLoad %10 %128 %127 = OpLoad %10 %126
%130 = OpAccessChain %38 %34 %20 %128 = OpAccessChain %38 %34 %20
OpStore %130 %129 OpStore %128 %127
%131 = OpAccessChain %22 %18 %20 %40 %129 = OpAccessChain %22 %18 %20 %40
%132 = OpLoad %9 %131 %130 = OpLoad %9 %129
%133 = OpAccessChain %43 %34 %40 %131 = OpAccessChain %43 %34 %40
OpStore %133 %132 OpStore %131 %130
%134 = OpAccessChain %54 %53 %20 %132 = OpAccessChain %52 %51 %20
%135 = OpLoad %50 %134 %133 = OpLoad %46 %132
OpStore %49 %135 OpStore %48 %133
OpEmitVertex OpEmitVertex
%136 = OpAccessChain %35 %18 %40 %20 %134 = OpAccessChain %35 %18 %40 %20
%137 = OpLoad %10 %136 %135 = OpLoad %10 %134
%138 = OpAccessChain %38 %34 %20 %136 = OpAccessChain %38 %34 %20
OpStore %138 %137 OpStore %136 %135
%139 = OpAccessChain %22 %18 %40 %40 %137 = OpAccessChain %22 %18 %40 %40
%140 = OpLoad %9 %139 %138 = OpLoad %9 %137
%141 = OpAccessChain %43 %34 %40 %139 = OpAccessChain %43 %34 %40
OpStore %141 %140 OpStore %139 %138
%142 = OpAccessChain %54 %53 %40 %140 = OpAccessChain %52 %51 %40
%143 = OpLoad %50 %142 %141 = OpLoad %46 %140
OpStore %49 %143 OpStore %48 %141
OpEmitVertex OpEmitVertex
%144 = OpAccessChain %35 %18 %25 %20 %142 = OpAccessChain %35 %18 %25 %20
%145 = OpLoad %10 %144 %143 = OpLoad %10 %142
%146 = OpAccessChain %38 %34 %20 %144 = OpAccessChain %38 %34 %20
OpStore %146 %145 OpStore %144 %143
%147 = OpAccessChain %22 %18 %25 %40 %145 = OpAccessChain %22 %18 %25 %40
%148 = OpLoad %9 %147 %146 = OpLoad %9 %145
%149 = OpAccessChain %43 %34 %40 %147 = OpAccessChain %43 %34 %40
OpStore %149 %148 OpStore %147 %146
%150 = OpAccessChain %54 %53 %25 %148 = OpAccessChain %52 %51 %25
%151 = OpLoad %50 %150 %149 = OpLoad %46 %148
OpStore %49 %151 OpStore %48 %149
OpEmitVertex OpEmitVertex
OpEndPrimitive OpEndPrimitive
%152 = OpAccessChain %35 %18 %20 %20 %150 = OpAccessChain %35 %18 %20 %20
%153 = OpLoad %10 %152 %151 = OpLoad %10 %150
%154 = OpAccessChain %38 %34 %20 %152 = OpAccessChain %38 %34 %20
OpStore %154 %153 OpStore %152 %151
%155 = OpAccessChain %22 %18 %20 %40 %153 = OpAccessChain %22 %18 %20 %40
%156 = OpLoad %9 %155 %154 = OpLoad %9 %153
%157 = OpAccessChain %43 %34 %40 %155 = OpAccessChain %43 %34 %40
OpStore %157 %156 OpStore %155 %154
%158 = OpAccessChain %54 %53 %20 %156 = OpAccessChain %52 %51 %20
%159 = OpLoad %50 %158 %157 = OpLoad %46 %156
OpStore %49 %159 OpStore %48 %157
OpEmitVertex OpEmitVertex
%160 = OpAccessChain %35 %18 %25 %20 %158 = OpAccessChain %35 %18 %25 %20
%161 = OpLoad %10 %160 %159 = OpLoad %10 %158
%162 = OpAccessChain %38 %34 %20 %160 = OpAccessChain %38 %34 %20
OpStore %162 %161 OpStore %160 %159
%163 = OpAccessChain %22 %18 %25 %40 %161 = OpAccessChain %22 %18 %25 %40
%164 = OpLoad %9 %163 %162 = OpLoad %9 %161
%165 = OpAccessChain %43 %34 %40 %163 = OpAccessChain %43 %34 %40
OpStore %165 %164 OpStore %163 %162
%166 = OpAccessChain %54 %53 %25 %164 = OpAccessChain %52 %51 %25
%167 = OpLoad %50 %166 %165 = OpLoad %46 %164
OpStore %49 %167 OpStore %48 %165
OpEmitVertex OpEmitVertex
%168 = OpAccessChain %35 %18 %20 %20 %166 = OpAccessChain %35 %18 %20 %20
%167 = OpLoad %10 %166
%168 = OpAccessChain %35 %18 %25 %20
%169 = OpLoad %10 %168 %169 = OpLoad %10 %168
%170 = OpAccessChain %35 %18 %25 %20 %170 = OpFAdd %10 %167 %169
%171 = OpLoad %10 %170 %171 = OpAccessChain %35 %18 %40 %20
%172 = OpFAdd %10 %169 %171 %172 = OpLoad %10 %171
%173 = OpAccessChain %35 %18 %40 %20 %173 = OpFSub %10 %170 %172
%174 = OpLoad %10 %173 %174 = OpAccessChain %38 %34 %20
%175 = OpFSub %10 %172 %174 OpStore %174 %173
%176 = OpAccessChain %38 %34 %20 %175 = OpAccessChain %22 %18 %25 %40
OpStore %176 %175 %176 = OpLoad %9 %175
%177 = OpAccessChain %22 %18 %25 %40 %177 = OpAccessChain %43 %34 %40
%178 = OpLoad %9 %177 OpStore %177 %176
%179 = OpAccessChain %43 %34 %40 OpStore %178 %20
OpStore %179 %178 OpBranch %179
OpStore %180 %20 %179 = OpLabel
OpBranch %181 OpLoopMerge %181 %182 None
%181 = OpLabel OpBranch %183
OpLoopMerge %183 %184 None
OpBranch %185
%185 = OpLabel
%186 = OpLoad %19 %180
%187 = OpSLessThan %6 %186 %109
OpBranchConditional %187 %182 %183
%182 = OpLabel
%188 = OpLoad %19 %180
%189 = OpLoad %19 %180
%190 = OpAccessChain %35 %53 %20 %20 %189
%191 = OpLoad %10 %190
%192 = OpLoad %19 %180
%193 = OpAccessChain %35 %53 %40 %20 %192
%194 = OpLoad %10 %193
%195 = OpFNegate %10 %194
%196 = OpFAdd %10 %191 %195
%197 = OpLoad %19 %180
%198 = OpAccessChain %35 %53 %25 %20 %197
%199 = OpLoad %10 %198
%200 = OpFAdd %10 %196 %199
%201 = OpAccessChain %38 %49 %20 %188
OpStore %201 %200
OpBranch %184
%184 = OpLabel
%202 = OpLoad %19 %180
%203 = OpIAdd %19 %202 %40
OpStore %180 %203
OpBranch %181
%183 = OpLabel %183 = OpLabel
%184 = OpLoad %19 %178
%185 = OpSLessThan %6 %184 %107
OpBranchConditional %185 %180 %181
%180 = OpLabel
%186 = OpLoad %19 %178
%187 = OpLoad %19 %178
%188 = OpAccessChain %35 %51 %20 %187
%189 = OpLoad %10 %188
%190 = OpLoad %19 %178
%191 = OpAccessChain %35 %51 %40 %190
%192 = OpLoad %10 %191
%193 = OpFNegate %10 %192
%194 = OpFAdd %10 %189 %193
%195 = OpLoad %19 %178
%196 = OpAccessChain %35 %51 %25 %195
%197 = OpLoad %10 %196
%198 = OpFAdd %10 %194 %197
%199 = OpAccessChain %38 %48 %186
OpStore %199 %198
OpBranch %182
%182 = OpLabel
%200 = OpLoad %19 %178
%201 = OpIAdd %19 %200 %40
OpStore %178 %201
OpBranch %179
%181 = OpLabel
OpEmitVertex OpEmitVertex
OpEndPrimitive OpEndPrimitive
OpBranch %31 OpBranch %31

View File

@ -16,11 +16,8 @@ out gl_PerVertex {
float gl_ClipDistance[]; float gl_ClipDistance[];
}; };
struct VertexData { layout(location = 0) in vec4 in_interpolators[][16];
vec4 o[16]; layout(location = 0) out vec4 out_interpolators[16];
};
layout(location = 0) in VertexData in_vtx[];
layout(location = 0) out VertexData out_vtx;
layout(triangles) in; layout(triangles) in;
layout(triangle_strip, max_vertices = 6) out; layout(triangle_strip, max_vertices = 6) out;
@ -35,30 +32,30 @@ void main() {
// 2 ----- [3] // 2 ----- [3]
gl_Position = gl_in[0].gl_Position; gl_Position = gl_in[0].gl_Position;
gl_PointSize = gl_in[0].gl_PointSize; gl_PointSize = gl_in[0].gl_PointSize;
out_vtx = in_vtx[0]; out_interpolators = in_interpolators[0];
EmitVertex(); EmitVertex();
gl_Position = gl_in[1].gl_Position; gl_Position = gl_in[1].gl_Position;
gl_PointSize = gl_in[1].gl_PointSize; gl_PointSize = gl_in[1].gl_PointSize;
out_vtx = in_vtx[1]; out_interpolators = in_interpolators[1];
EmitVertex(); EmitVertex();
gl_Position = gl_in[2].gl_Position; gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize; gl_PointSize = gl_in[2].gl_PointSize;
out_vtx = in_vtx[2]; out_interpolators = in_interpolators[2];
EmitVertex(); EmitVertex();
EndPrimitive(); EndPrimitive();
gl_Position = gl_in[2].gl_Position; gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize; gl_PointSize = gl_in[2].gl_PointSize;
out_vtx = in_vtx[2]; out_interpolators = in_interpolators[2];
EmitVertex(); EmitVertex();
gl_Position = gl_in[1].gl_Position; gl_Position = gl_in[1].gl_Position;
gl_PointSize = gl_in[1].gl_PointSize; gl_PointSize = gl_in[1].gl_PointSize;
out_vtx = in_vtx[1]; out_interpolators = in_interpolators[1];
EmitVertex(); EmitVertex();
gl_Position = (gl_in[1].gl_Position + gl_in[2].gl_Position) - gl_Position = (gl_in[1].gl_Position + gl_in[2].gl_Position) -
gl_in[0].gl_Position; gl_in[0].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize; gl_PointSize = gl_in[2].gl_PointSize;
for (int i = 0; i < 16; ++i) { for (int i = 0; i < 16; ++i) {
out_vtx.o[i] = -in_vtx[0].o[i] + in_vtx[1].o[i] + in_vtx[2].o[i]; out_interpolators[i] = -in_interpolators[0][i] + in_interpolators[1][i] + in_interpolators[2][i];
} }
EmitVertex(); EmitVertex();
EndPrimitive(); EndPrimitive();
@ -70,30 +67,30 @@ void main() {
// [3] ----- 2 // [3] ----- 2
gl_Position = gl_in[0].gl_Position; gl_Position = gl_in[0].gl_Position;
gl_PointSize = gl_in[0].gl_PointSize; gl_PointSize = gl_in[0].gl_PointSize;
out_vtx = in_vtx[0]; out_interpolators = in_interpolators[0];
EmitVertex(); EmitVertex();
gl_Position = gl_in[1].gl_Position; gl_Position = gl_in[1].gl_Position;
gl_PointSize = gl_in[1].gl_PointSize; gl_PointSize = gl_in[1].gl_PointSize;
out_vtx = in_vtx[1]; out_interpolators = in_interpolators[1];
EmitVertex(); EmitVertex();
gl_Position = gl_in[2].gl_Position; gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize; gl_PointSize = gl_in[2].gl_PointSize;
out_vtx = in_vtx[2]; out_interpolators = in_interpolators[2];
EmitVertex(); EmitVertex();
EndPrimitive(); EndPrimitive();
gl_Position = gl_in[0].gl_Position; gl_Position = gl_in[0].gl_Position;
gl_PointSize = gl_in[0].gl_PointSize; gl_PointSize = gl_in[0].gl_PointSize;
out_vtx = in_vtx[0]; out_interpolators = in_interpolators[0];
EmitVertex(); EmitVertex();
gl_Position = gl_in[2].gl_Position; gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize; gl_PointSize = gl_in[2].gl_PointSize;
out_vtx = in_vtx[2]; out_interpolators = in_interpolators[2];
EmitVertex(); EmitVertex();
gl_Position = (gl_in[0].gl_Position + gl_in[2].gl_Position) - gl_Position = (gl_in[0].gl_Position + gl_in[2].gl_Position) -
gl_in[1].gl_Position; gl_in[1].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize; gl_PointSize = gl_in[2].gl_PointSize;
for (int i = 0; i < 16; ++i) { for (int i = 0; i < 16; ++i) {
out_vtx.o[i] = in_vtx[0].o[i] + -in_vtx[1].o[i] + in_vtx[2].o[i]; out_interpolators[i] = in_interpolators[0][i] + -in_interpolators[1][i] + in_interpolators[2][i];
} }
EmitVertex(); EmitVertex();
EndPrimitive(); EndPrimitive();

File diff suppressed because it is too large Load Diff

View File

@ -10,10 +10,16 @@
#ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ #ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
#define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_ #define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
#include <unordered_map>
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
#include "xenia/gpu/sampler_info.h"
#include "xenia/gpu/shader.h" #include "xenia/gpu/shader.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/trace_writer.h" #include "xenia/gpu/trace_writer.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include "xenia/gpu/xenos.h" #include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/circular_buffer.h"
#include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_device.h" #include "xenia/ui/vulkan/vulkan_device.h"
@ -24,8 +30,51 @@ namespace vulkan {
// //
class TextureCache { class TextureCache {
public: public:
TextureCache(RegisterFile* register_file, TraceWriter* trace_writer, struct TextureView;
ui::vulkan::VulkanDevice* device);
// This represents an uploaded Vulkan texture.
struct Texture {
TextureInfo texture_info;
std::vector<std::unique_ptr<TextureView>> views;
// True if we know all info about this texture, false otherwise.
// (e.g. we resolve to system memory and may not know the full details about
// this texture)
bool is_full_texture;
VkFormat format;
VkImage image;
VkImageLayout image_layout;
VkDeviceMemory image_memory;
VkDeviceSize memory_offset;
VkDeviceSize memory_size;
uintptr_t access_watch_handle;
bool pending_invalidation;
// Pointer to the latest usage fence.
std::shared_ptr<ui::vulkan::Fence> in_flight_fence;
};
struct TextureView {
Texture* texture;
VkImageView view;
union {
struct {
// FIXME: This only applies on little-endian platforms!
uint16_t swiz_x : 3;
uint16_t swiz_y : 3;
uint16_t swiz_z : 3;
uint16_t swiz_w : 3;
uint16_t : 4;
};
uint16_t swizzle;
};
};
TextureCache(Memory* memory, RegisterFile* register_file,
TraceWriter* trace_writer, ui::vulkan::VulkanDevice* device);
~TextureCache(); ~TextureCache();
// Descriptor set layout containing all possible texture bindings. // Descriptor set layout containing all possible texture bindings.
@ -36,8 +85,11 @@ class TextureCache {
// Prepares a descriptor set containing the samplers and images for all // Prepares a descriptor set containing the samplers and images for all
// bindings. The textures will be uploaded/converted/etc as needed. // bindings. The textures will be uploaded/converted/etc as needed.
// Requires a fence to be provided that will be signaled when finished
// using the returned descriptor set.
VkDescriptorSet PrepareTextureSet( VkDescriptorSet PrepareTextureSet(
VkCommandBuffer command_buffer, VkCommandBuffer setup_command_buffer,
std::shared_ptr<ui::vulkan::Fence> completion_fence,
const std::vector<Shader::TextureBinding>& vertex_bindings, const std::vector<Shader::TextureBinding>& vertex_bindings,
const std::vector<Shader::TextureBinding>& pixel_bindings); const std::vector<Shader::TextureBinding>& pixel_bindings);
@ -45,45 +97,106 @@ class TextureCache {
// TODO(benvanik): Resolve. // TODO(benvanik): Resolve.
// TODO(benvanik): ReadTexture. // TODO(benvanik): ReadTexture.
// Looks for a texture either containing or matching these parameters.
// Caller is responsible for checking if the texture returned is an exact
// match or just contains the texture given by the parameters.
// If offset_x and offset_y are not null, this may return a texture that
// contains this address at an offset.
Texture* LookupAddress(uint32_t guest_address, uint32_t width,
uint32_t height, TextureFormat format,
VkOffset2D* out_offset = nullptr);
// Demands a texture for the purpose of resolving from EDRAM. This either
// creates a new texture or returns a previously created texture. texture_info
// is not required to be completely filled out, just guest_address and all
// sizes.
//
// It's possible that this may return an image that is larger than the
// requested size (e.g. resolving into a bigger texture) or an image that
// must have an offset applied. If so, the caller must handle this.
// At the very least, it's guaranteed that the image will be large enough to
// hold the requested size.
Texture* DemandResolveTexture(const TextureInfo& texture_info,
TextureFormat format, VkOffset2D* out_offset);
// Clears all cached content. // Clears all cached content.
void ClearCache(); void ClearCache();
// Frees any unused resources
void Scavenge();
private: private:
struct UpdateSetInfo; struct UpdateSetInfo;
void SetupGridImages(); // Cached Vulkan sampler.
struct Sampler {
SamplerInfo sampler_info;
VkSampler sampler;
};
// Allocates a new texture and memory to back it on the GPU.
Texture* AllocateTexture(const TextureInfo& texture_info);
bool FreeTexture(Texture* texture);
// Demands a texture. If command_buffer is null and the texture hasn't been
// uploaded to graphics memory already, we will return null and bail.
Texture* Demand(
const TextureInfo& texture_info, VkCommandBuffer command_buffer = nullptr,
std::shared_ptr<ui::vulkan::Fence> completion_fence = nullptr);
TextureView* DemandView(Texture* texture, uint16_t swizzle);
Sampler* Demand(const SamplerInfo& sampler_info);
// Queues commands to upload a texture from system memory, applying any
// conversions necessary. This may flush the command buffer to the GPU if we
// run out of staging memory.
bool UploadTexture2D(VkCommandBuffer command_buffer,
std::shared_ptr<ui::vulkan::Fence> completion_fence,
Texture* dest, TextureInfo src);
bool SetupTextureBindings( bool SetupTextureBindings(
VkCommandBuffer command_buffer,
std::shared_ptr<ui::vulkan::Fence> completion_fence,
UpdateSetInfo* update_set_info, UpdateSetInfo* update_set_info,
const std::vector<Shader::TextureBinding>& bindings); const std::vector<Shader::TextureBinding>& bindings);
bool SetupTextureBinding(UpdateSetInfo* update_set_info, bool SetupTextureBinding(VkCommandBuffer command_buffer,
std::shared_ptr<ui::vulkan::Fence> completion_fence,
UpdateSetInfo* update_set_info,
const Shader::TextureBinding& binding); const Shader::TextureBinding& binding);
Memory* memory_ = nullptr;
RegisterFile* register_file_ = nullptr; RegisterFile* register_file_ = nullptr;
TraceWriter* trace_writer_ = nullptr; TraceWriter* trace_writer_ = nullptr;
ui::vulkan::VulkanDevice* device_ = nullptr; ui::vulkan::VulkanDevice* device_ = nullptr;
VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorPool descriptor_pool_ = nullptr;
VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr; VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr;
std::list<std::pair<VkDescriptorSet, std::shared_ptr<ui::vulkan::Fence>>>
in_flight_sets_;
VkDeviceMemory grid_image_2d_memory_ = nullptr; ui::vulkan::CircularBuffer staging_buffer_;
VkImage grid_image_2d_ = nullptr; std::unordered_map<uint64_t, Texture*> textures_;
VkImageView grid_image_2d_view_ = nullptr; std::unordered_map<uint64_t, Sampler*> samplers_;
std::vector<Texture*> resolve_textures_;
std::list<Texture*> pending_delete_textures_;
std::mutex invalidated_textures_mutex_;
std::vector<Texture*>* invalidated_textures_;
std::vector<Texture*> invalidated_textures_sets_[2];
std::mutex invalidated_resolve_textures_mutex_;
std::vector<Texture*> invalidated_resolve_textures_;
struct UpdateSetInfo { struct UpdateSetInfo {
// Bitmap of all 32 fetch constants and whether they have been setup yet. // Bitmap of all 32 fetch constants and whether they have been setup yet.
// This prevents duplication across the vertex and pixel shader. // This prevents duplication across the vertex and pixel shader.
uint32_t has_setup_fetch_mask; uint32_t has_setup_fetch_mask;
uint32_t sampler_write_count = 0; uint32_t image_write_count = 0;
VkDescriptorImageInfo sampler_infos[32]; struct ImageSetInfo {
uint32_t image_1d_write_count = 0; Dimension dimension;
VkDescriptorImageInfo image_1d_infos[32]; uint32_t tf_binding;
uint32_t image_2d_write_count = 0; VkDescriptorImageInfo info;
VkDescriptorImageInfo image_2d_infos[32]; } image_infos[32];
uint32_t image_3d_write_count = 0;
VkDescriptorImageInfo image_3d_infos[32];
uint32_t image_cube_write_count = 0;
VkDescriptorImageInfo image_cube_infos[32];
} update_set_info_; } update_set_info_;
}; };

View File

@ -37,9 +37,22 @@ VulkanCommandProcessor::VulkanCommandProcessor(
VulkanCommandProcessor::~VulkanCommandProcessor() = default; VulkanCommandProcessor::~VulkanCommandProcessor() = default;
void VulkanCommandProcessor::RequestFrameTrace(const std::wstring& root_path) {
// Override traces if renderdoc is attached.
if (device_->is_renderdoc_attached()) {
trace_requested_ = true;
return;
}
return CommandProcessor::RequestFrameTrace(root_path);
}
void VulkanCommandProcessor::ClearCaches() { void VulkanCommandProcessor::ClearCaches() {
CommandProcessor::ClearCaches(); CommandProcessor::ClearCaches();
auto status = vkQueueWaitIdle(queue_);
CheckResult(status, "vkQueueWaitIdle");
buffer_cache_->ClearCache(); buffer_cache_->ClearCache();
pipeline_cache_->ClearCache(); pipeline_cache_->ClearCache();
render_cache_->ClearCache(); render_cache_->ClearCache();
@ -69,8 +82,8 @@ bool VulkanCommandProcessor::SetupContext() {
// Initialize the state machine caches. // Initialize the state machine caches.
buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_, buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_,
kDefaultBufferCacheCapacity); kDefaultBufferCacheCapacity);
texture_cache_ = texture_cache_ = std::make_unique<TextureCache>(memory_, register_file_,
std::make_unique<TextureCache>(register_file_, &trace_writer_, device_); &trace_writer_, device_);
pipeline_cache_ = std::make_unique<PipelineCache>( pipeline_cache_ = std::make_unique<PipelineCache>(
register_file_, device_, buffer_cache_->constant_descriptor_set_layout(), register_file_, device_, buffer_cache_->constant_descriptor_set_layout(),
texture_cache_->texture_descriptor_set_layout()); texture_cache_->texture_descriptor_set_layout());
@ -82,6 +95,11 @@ bool VulkanCommandProcessor::SetupContext() {
void VulkanCommandProcessor::ShutdownContext() { void VulkanCommandProcessor::ShutdownContext() {
// TODO(benvanik): wait until idle. // TODO(benvanik): wait until idle.
if (swap_state_.front_buffer_texture) {
// Free swap chain images.
DestroySwapImages();
}
buffer_cache_.reset(); buffer_cache_.reset();
pipeline_cache_.reset(); pipeline_cache_.reset();
render_cache_.reset(); render_cache_.reset();
@ -90,7 +108,7 @@ void VulkanCommandProcessor::ShutdownContext() {
// Free all pools. This must come after all of our caches clean up. // Free all pools. This must come after all of our caches clean up.
command_buffer_pool_.reset(); command_buffer_pool_.reset();
// Release queue, if were using an acquired one. // Release queue, if we were using an acquired one.
if (!queue_mutex_) { if (!queue_mutex_) {
device_->ReleaseQueue(queue_); device_->ReleaseQueue(queue_);
queue_ = nullptr; queue_ = nullptr;
@ -131,24 +149,241 @@ void VulkanCommandProcessor::ReturnFromWait() {
CommandProcessor::ReturnFromWait(); CommandProcessor::ReturnFromWait();
} }
void VulkanCommandProcessor::CreateSwapImages(VkCommandBuffer setup_buffer,
VkExtent2D extents) {
VkImageCreateInfo image_info;
std::memset(&image_info, 0, sizeof(VkImageCreateInfo));
image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
image_info.imageType = VK_IMAGE_TYPE_2D;
image_info.format = VK_FORMAT_R8G8B8A8_UNORM;
image_info.extent = {extents.width, extents.height, 1};
image_info.mipLevels = 1;
image_info.arrayLayers = 1;
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
image_info.usage =
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.queueFamilyIndexCount = 0;
image_info.pQueueFamilyIndices = nullptr;
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkImage image_fb, image_bb;
auto status = vkCreateImage(*device_, &image_info, nullptr, &image_fb);
CheckResult(status, "vkCreateImage");
status = vkCreateImage(*device_, &image_info, nullptr, &image_bb);
CheckResult(status, "vkCreateImage");
// Bind memory to images.
VkMemoryRequirements mem_requirements;
vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements);
fb_memory = device_->AllocateMemory(mem_requirements, 0);
assert_not_null(fb_memory);
status = vkBindImageMemory(*device_, image_fb, fb_memory, 0);
CheckResult(status, "vkBindImageMemory");
vkGetImageMemoryRequirements(*device_, image_fb, &mem_requirements);
bb_memory = device_->AllocateMemory(mem_requirements, 0);
assert_not_null(bb_memory);
status = vkBindImageMemory(*device_, image_bb, bb_memory, 0);
CheckResult(status, "vkBindImageMemory");
std::lock_guard<std::mutex> lock(swap_state_.mutex);
swap_state_.front_buffer_texture = reinterpret_cast<uintptr_t>(image_fb);
swap_state_.back_buffer_texture = reinterpret_cast<uintptr_t>(image_bb);
// Transition both images to general layout.
VkImageMemoryBarrier barrier;
std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier));
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.srcAccessMask = 0;
barrier.dstAccessMask = 0;
barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = image_fb;
barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
barrier.image = image_bb;
vkCmdPipelineBarrier(setup_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
}
void VulkanCommandProcessor::DestroySwapImages() {
std::lock_guard<std::mutex> lock(swap_state_.mutex);
vkDestroyImage(*device_,
reinterpret_cast<VkImage>(swap_state_.front_buffer_texture),
nullptr);
vkDestroyImage(*device_,
reinterpret_cast<VkImage>(swap_state_.back_buffer_texture),
nullptr);
vkFreeMemory(*device_, fb_memory, nullptr);
vkFreeMemory(*device_, bb_memory, nullptr);
swap_state_.front_buffer_texture = 0;
swap_state_.back_buffer_texture = 0;
fb_memory = nullptr;
bb_memory = nullptr;
}
void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
uint32_t frontbuffer_width, uint32_t frontbuffer_width,
uint32_t frontbuffer_height) { uint32_t frontbuffer_height) {
// Ensure we issue any pending draws. SCOPE_profile_cpu_f("gpu");
// draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent);
// Need to finish to be sure the other context sees the right data. // Build a final command buffer that copies the game's frontbuffer texture
// TODO(benvanik): prevent this? fences? // into our backbuffer texture.
// glFinish(); VkCommandBuffer copy_commands = nullptr;
bool opened_batch;
if (context_->WasLost()) { if (command_buffer_pool_->has_open_batch()) {
// We've lost the context due to a TDR. copy_commands = command_buffer_pool_->AcquireEntry();
// TODO: Dump the current commands to a tracefile. opened_batch = false;
assert_always(); } else {
command_buffer_pool_->BeginBatch();
copy_commands = command_buffer_pool_->AcquireEntry();
current_batch_fence_.reset(new ui::vulkan::Fence(*device_));
opened_batch = true;
} }
// Remove any dead textures, etc. VkCommandBufferBeginInfo begin_info;
// texture_cache_.Scavenge(); std::memset(&begin_info, 0, sizeof(begin_info));
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
auto status = vkBeginCommandBuffer(copy_commands, &begin_info);
CheckResult(status, "vkBeginCommandBuffer");
if (!frontbuffer_ptr) {
// Trace viewer does this.
frontbuffer_ptr = last_copy_base_;
}
if (!swap_state_.back_buffer_texture) {
CreateSwapImages(copy_commands, {frontbuffer_width, frontbuffer_height});
}
auto swap_bb = reinterpret_cast<VkImage>(swap_state_.back_buffer_texture);
// Issue the commands to copy the game's frontbuffer to our backbuffer.
auto texture = texture_cache_->LookupAddress(
frontbuffer_ptr, xe::round_up(frontbuffer_width, 32),
xe::round_up(frontbuffer_height, 32), TextureFormat::k_8_8_8_8);
if (texture) {
texture->in_flight_fence = current_batch_fence_;
// Insert a barrier so the GPU finishes writing to the image.
VkImageMemoryBarrier barrier;
std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier));
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.srcAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.oldLayout = texture->image_layout;
barrier.newLayout = texture->image_layout;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = texture->image;
barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
vkCmdPipelineBarrier(copy_commands, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
// Now issue a blit command.
VkImageBlit blit;
std::memset(&blit, 0, sizeof(VkImageBlit));
blit.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
blit.srcOffsets[0] = {0, 0, 0};
blit.srcOffsets[1] = {int32_t(frontbuffer_width),
int32_t(frontbuffer_height), 1};
blit.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
blit.dstOffsets[0] = {0, 0, 0};
blit.dstOffsets[1] = {int32_t(frontbuffer_width),
int32_t(frontbuffer_height), 1};
vkCmdBlitImage(copy_commands, texture->image, texture->image_layout,
swap_bb, VK_IMAGE_LAYOUT_GENERAL, 1, &blit,
VK_FILTER_LINEAR);
std::lock_guard<std::mutex> lock(swap_state_.mutex);
swap_state_.width = frontbuffer_width;
swap_state_.height = frontbuffer_height;
}
status = vkEndCommandBuffer(copy_commands);
CheckResult(status, "vkEndCommandBuffer");
// Queue up current command buffers.
// TODO(benvanik): bigger batches.
std::vector<VkCommandBuffer> submit_buffers;
if (current_command_buffer_) {
if (current_render_state_) {
render_cache_->EndRenderPass();
current_render_state_ = nullptr;
}
status = vkEndCommandBuffer(current_setup_buffer_);
CheckResult(status, "vkEndCommandBuffer");
status = vkEndCommandBuffer(current_command_buffer_);
CheckResult(status, "vkEndCommandBuffer");
// TODO(DrChat): If the setup buffer is empty, don't bother queueing it up.
submit_buffers.push_back(current_setup_buffer_);
submit_buffers.push_back(current_command_buffer_);
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
}
submit_buffers.push_back(copy_commands);
if (!submit_buffers.empty()) {
// TODO(benvanik): move to CP or to host (trace dump, etc).
// This only needs to surround a vkQueueSubmit.
if (queue_mutex_) {
queue_mutex_->lock();
}
VkSubmitInfo submit_info;
std::memset(&submit_info, 0, sizeof(VkSubmitInfo));
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.commandBufferCount = uint32_t(submit_buffers.size());
submit_info.pCommandBuffers = submit_buffers.data();
status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_);
CheckResult(status, "vkQueueSubmit");
if (device_->is_renderdoc_attached() && capturing_) {
device_->EndRenderDocFrameCapture();
capturing_ = false;
}
if (queue_mutex_) {
queue_mutex_->unlock();
}
}
command_buffer_pool_->EndBatch(current_batch_fence_);
// Scavenging.
{
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_i(
"gpu",
"xe::gpu::vulkan::VulkanCommandProcessor::PerformSwap Scavenging");
#endif // FINE_GRAINED_DRAW_SCOPES
command_buffer_pool_->Scavenge();
texture_cache_->Scavenge();
buffer_cache_->Scavenge();
}
current_batch_fence_ = nullptr;
} }
Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type,
@ -178,16 +413,16 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
return IssueCopy(); return IssueCopy();
} }
// TODO(benvanik): move to CP or to host (trace dump, etc). if ((regs[XE_GPU_REG_RB_SURFACE_INFO].u32 & 0x3FFF) == 0) {
if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) { // Doesn't actually draw.
device_->BeginRenderDocFrameCapture(); return true;
} }
// Shaders will have already been defined by previous loads. // Shaders will have already been defined by previous loads.
// We need the to do just about anything so validate here. // We need them to do just about anything so validate here.
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader()); auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
auto pixel_shader = static_cast<VulkanShader*>(active_pixel_shader()); auto pixel_shader = static_cast<VulkanShader*>(active_pixel_shader());
if (!vertex_shader || !vertex_shader->is_valid()) { if (!vertex_shader) {
// Always need a vertex shader. // Always need a vertex shader.
return true; return true;
} }
@ -196,61 +431,142 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Use a dummy pixel shader when required. // Use a dummy pixel shader when required.
// TODO(benvanik): dummy pixel shader. // TODO(benvanik): dummy pixel shader.
assert_not_null(pixel_shader); assert_not_null(pixel_shader);
} else if (!pixel_shader || !pixel_shader->is_valid()) { } else if (!pixel_shader) {
// Need a pixel shader in normal color mode. // Need a pixel shader in normal color mode.
return true; return true;
} }
// TODO(benvanik): bigger batches. bool started_command_buffer = false;
command_buffer_pool_->BeginBatch(); if (!current_command_buffer_) {
VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry(); // TODO(benvanik): bigger batches.
VkCommandBufferBeginInfo command_buffer_begin_info; // TODO(DrChat): Decouple setup buffer from current batch.
command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; command_buffer_pool_->BeginBatch();
command_buffer_begin_info.pNext = nullptr; current_command_buffer_ = command_buffer_pool_->AcquireEntry();
command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; current_setup_buffer_ = command_buffer_pool_->AcquireEntry();
command_buffer_begin_info.pInheritanceInfo = nullptr; current_batch_fence_.reset(new ui::vulkan::Fence(*device_));
auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info);
CheckResult(err, "vkBeginCommandBuffer"); VkCommandBufferBeginInfo command_buffer_begin_info;
command_buffer_begin_info.sType =
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
command_buffer_begin_info.pNext = nullptr;
command_buffer_begin_info.flags =
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
command_buffer_begin_info.pInheritanceInfo = nullptr;
auto status = vkBeginCommandBuffer(current_command_buffer_,
&command_buffer_begin_info);
CheckResult(status, "vkBeginCommandBuffer");
status =
vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info);
CheckResult(status, "vkBeginCommandBuffer");
static uint32_t frame = 0;
if (device_->is_renderdoc_attached() && !capturing_ &&
(FLAGS_vulkan_renderdoc_capture_all || trace_requested_)) {
if (queue_mutex_) {
queue_mutex_->lock();
}
capturing_ = true;
trace_requested_ = false;
device_->BeginRenderDocFrameCapture();
if (queue_mutex_) {
queue_mutex_->unlock();
}
}
started_command_buffer = true;
}
auto command_buffer = current_command_buffer_;
auto setup_buffer = current_setup_buffer_;
// Begin the render pass. // Begin the render pass.
// This will setup our framebuffer and begin the pass in the command buffer. // This will setup our framebuffer and begin the pass in the command buffer.
auto render_state = render_cache_->BeginRenderPass( // This reuses a previous render pass if one is already open.
command_buffer, vertex_shader, pixel_shader); if (render_cache_->dirty() || !current_render_state_) {
if (!render_state) { if (current_render_state_) {
return false; render_cache_->EndRenderPass();
current_render_state_ = nullptr;
}
current_render_state_ = render_cache_->BeginRenderPass(
command_buffer, vertex_shader, pixel_shader);
if (!current_render_state_) {
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
return false;
}
} }
// Configure the pipeline for drawing. // Configure the pipeline for drawing.
// This encodes all render state (blend, depth, etc), our shader stages, // This encodes all render state (blend, depth, etc), our shader stages,
// and our vertex input layout. // and our vertex input layout.
if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_state, VkPipeline pipeline = nullptr;
vertex_shader, pixel_shader, auto pipeline_status = pipeline_cache_->ConfigurePipeline(
primitive_type)) { command_buffer, current_render_state_, vertex_shader, pixel_shader,
primitive_type, &pipeline);
if (pipeline_status == PipelineCache::UpdateStatus::kMismatch ||
started_command_buffer) {
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline);
} else if (pipeline_status == PipelineCache::UpdateStatus::kError) {
render_cache_->EndRenderPass(); render_cache_->EndRenderPass();
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
current_render_state_ = nullptr;
return false; return false;
} }
pipeline_cache_->SetDynamicState(command_buffer, started_command_buffer);
// Pass registers to the shaders. // Pass registers to the shaders.
if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) {
render_cache_->EndRenderPass(); render_cache_->EndRenderPass();
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
current_render_state_ = nullptr;
return false; return false;
} }
// Upload and bind index buffer data (if we have any). // Upload and bind index buffer data (if we have any).
if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) {
render_cache_->EndRenderPass(); render_cache_->EndRenderPass();
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
current_render_state_ = nullptr;
return false; return false;
} }
// Upload and bind all vertex buffer data. // Upload and bind all vertex buffer data.
if (!PopulateVertexBuffers(command_buffer, vertex_shader)) { if (!PopulateVertexBuffers(command_buffer, vertex_shader)) {
render_cache_->EndRenderPass(); render_cache_->EndRenderPass();
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
current_render_state_ = nullptr;
return false; return false;
} }
// Upload and set descriptors for all textures. // Bind samplers/textures.
if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) { // Uploads all textures that need it.
// Setup buffer may be flushed to GPU if the texture cache needs it.
if (!PopulateSamplers(command_buffer, setup_buffer, vertex_shader,
pixel_shader)) {
render_cache_->EndRenderPass(); render_cache_->EndRenderPass();
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
current_render_state_ = nullptr;
return false; return false;
} }
@ -273,68 +589,21 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
vertex_offset, first_instance); vertex_offset, first_instance);
} }
// End the rendering pass.
render_cache_->EndRenderPass();
// TODO(benvanik): bigger batches.
err = vkEndCommandBuffer(command_buffer);
CheckResult(err, "vkEndCommandBuffer");
VkFence fence;
VkFenceCreateInfo fence_info;
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fence_info.pNext = nullptr;
fence_info.flags = 0;
vkCreateFence(*device_, &fence_info, nullptr, &fence);
command_buffer_pool_->EndBatch(fence);
VkSubmitInfo submit_info;
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.pNext = nullptr;
submit_info.waitSemaphoreCount = 0;
submit_info.pWaitSemaphores = nullptr;
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &command_buffer;
submit_info.signalSemaphoreCount = 0;
submit_info.pSignalSemaphores = nullptr;
if (queue_mutex_) {
queue_mutex_->lock();
}
err = vkQueueSubmit(queue_, 1, &submit_info, fence);
if (queue_mutex_) {
queue_mutex_->unlock();
}
CheckResult(err, "vkQueueSubmit");
if (queue_mutex_) {
queue_mutex_->lock();
}
err = vkQueueWaitIdle(queue_);
CheckResult(err, "vkQueueWaitIdle");
err = vkDeviceWaitIdle(*device_);
CheckResult(err, "vkDeviceWaitIdle");
if (queue_mutex_) {
queue_mutex_->unlock();
}
while (command_buffer_pool_->has_pending()) {
command_buffer_pool_->Scavenge();
xe::threading::MaybeYield();
}
vkDestroyFence(*device_, fence, nullptr);
// TODO(benvanik): move to CP or to host (trace dump, etc).
if (FLAGS_vulkan_renderdoc_capture_all && device_->is_renderdoc_attached()) {
device_->EndRenderDocFrameCapture();
}
return true; return true;
} }
bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader, VulkanShader* vertex_shader,
VulkanShader* pixel_shader) { VulkanShader* pixel_shader) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
// Upload the constants the shaders require. // Upload the constants the shaders require.
// These are optional, and if none are defined 0 will be returned. // These are optional, and if none are defined 0 will be returned.
auto constant_offsets = buffer_cache_->UploadConstantRegisters( auto constant_offsets = buffer_cache_->UploadConstantRegisters(
vertex_shader->constant_register_map(), vertex_shader->constant_register_map(),
pixel_shader->constant_register_map()); pixel_shader->constant_register_map(), current_batch_fence_);
if (constant_offsets.first == VK_WHOLE_SIZE || if (constant_offsets.first == VK_WHOLE_SIZE ||
constant_offsets.second == VK_WHOLE_SIZE) { constant_offsets.second == VK_WHOLE_SIZE) {
// Shader wants constants but we couldn't upload them. // Shader wants constants but we couldn't upload them.
@ -387,8 +656,8 @@ bool VulkanCommandProcessor::PopulateIndexBuffer(
size_t source_length = size_t source_length =
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t) info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
: sizeof(uint16_t)); : sizeof(uint16_t));
auto buffer_ref = auto buffer_ref = buffer_cache_->UploadIndexBuffer(
buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format); source_ptr, source_length, info.format, current_batch_fence_);
if (buffer_ref.second == VK_WHOLE_SIZE) { if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer. // Failed to upload buffer.
return false; return false;
@ -413,6 +682,11 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
#endif // FINE_GRAINED_DRAW_SCOPES #endif // FINE_GRAINED_DRAW_SCOPES
auto& vertex_bindings = vertex_shader->vertex_bindings(); auto& vertex_bindings = vertex_shader->vertex_bindings();
if (vertex_bindings.empty()) {
// No bindings.
return true;
}
assert_true(vertex_bindings.size() <= 32); assert_true(vertex_bindings.size() <= 32);
VkBuffer all_buffers[32]; VkBuffer all_buffers[32];
VkDeviceSize all_buffer_offsets[32]; VkDeviceSize all_buffer_offsets[32];
@ -434,7 +708,6 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
fetch = &group->vertex_fetch_2; fetch = &group->vertex_fetch_2;
break; break;
} }
assert_true(fetch->endian == 2);
// TODO(benvanik): compute based on indices or vertex count. // TODO(benvanik): compute based on indices or vertex count.
// THIS CAN BE MASSIVELY INCORRECT (too large). // THIS CAN BE MASSIVELY INCORRECT (too large).
@ -446,8 +719,9 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
const void* source_ptr = const void* source_ptr =
memory_->TranslatePhysical<const void*>(fetch->address << 2); memory_->TranslatePhysical<const void*>(fetch->address << 2);
size_t source_length = valid_range; size_t source_length = valid_range;
auto buffer_ref = auto buffer_ref = buffer_cache_->UploadVertexBuffer(
buffer_cache_->UploadVertexBuffer(source_ptr, source_length); source_ptr, source_length, static_cast<Endian>(fetch->endian),
current_batch_fence_);
if (buffer_ref.second == VK_WHOLE_SIZE) { if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer. // Failed to upload buffer.
return false; return false;
@ -467,6 +741,7 @@ bool VulkanCommandProcessor::PopulateVertexBuffers(
} }
bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer, bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
VkCommandBuffer setup_buffer,
VulkanShader* vertex_shader, VulkanShader* vertex_shader,
VulkanShader* pixel_shader) { VulkanShader* pixel_shader) {
#if FINE_GRAINED_DRAW_SCOPES #if FINE_GRAINED_DRAW_SCOPES
@ -474,14 +749,13 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
#endif // FINE_GRAINED_DRAW_SCOPES #endif // FINE_GRAINED_DRAW_SCOPES
auto descriptor_set = texture_cache_->PrepareTextureSet( auto descriptor_set = texture_cache_->PrepareTextureSet(
command_buffer, vertex_shader->texture_bindings(), setup_buffer, current_batch_fence_, vertex_shader->texture_bindings(),
pixel_shader->texture_bindings()); pixel_shader->texture_bindings());
if (!descriptor_set) { if (!descriptor_set) {
// Unable to bind set. // Unable to bind set.
return false; return false;
} }
// Bind samplers/textures.
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline_cache_->pipeline_layout(), 1, 1, pipeline_cache_->pipeline_layout(), 1, 1,
&descriptor_set, 0, nullptr); &descriptor_set, 0, nullptr);
@ -491,7 +765,294 @@ bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
bool VulkanCommandProcessor::IssueCopy() { bool VulkanCommandProcessor::IssueCopy() {
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): resolve. auto& regs = *register_file_;
// This is used to resolve surfaces, taking them from EDRAM render targets
// to system memory. It can optionally clear color/depth surfaces, too.
// The command buffer has stuff for actually doing this by drawing, however
// we should be able to do it without that much easier.
uint32_t copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
// Render targets 0-3, 4 = depth
uint32_t copy_src_select = copy_control & 0x7;
bool color_clear_enabled = (copy_control >> 8) & 0x1;
bool depth_clear_enabled = (copy_control >> 9) & 0x1;
auto copy_command = static_cast<CopyCommand>((copy_control >> 20) & 0x3);
uint32_t copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
auto copy_dest_endian = static_cast<Endian128>(copy_dest_info & 0x7);
uint32_t copy_dest_array = (copy_dest_info >> 3) & 0x1;
assert_true(copy_dest_array == 0);
uint32_t copy_dest_slice = (copy_dest_info >> 4) & 0x7;
assert_true(copy_dest_slice == 0);
auto copy_dest_format =
static_cast<ColorFormat>((copy_dest_info >> 7) & 0x3F);
uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7;
// assert_true(copy_dest_number == 0); // ?
uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F;
// assert_true(copy_dest_bias == 0);
uint32_t copy_dest_swap = (copy_dest_info >> 25) & 0x1;
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF;
copy_dest_pitch &= 0x3FFF;
// None of this is supported yet:
uint32_t copy_surface_slice = regs[XE_GPU_REG_RB_COPY_SURFACE_SLICE].u32;
assert_true(copy_surface_slice == 0);
uint32_t copy_func = regs[XE_GPU_REG_RB_COPY_FUNC].u32;
assert_true(copy_func == 0);
uint32_t copy_ref = regs[XE_GPU_REG_RB_COPY_REF].u32;
assert_true(copy_ref == 0);
uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32;
assert_true(copy_mask == 0);
// Supported in GL4, not supported here yet.
assert_zero(copy_dest_swap);
// RB_SURFACE_INFO
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
uint32_t surface_pitch = surface_info & 0x3FFF;
auto surface_msaa = static_cast<MsaaSamples>((surface_info >> 16) & 0x3);
// TODO(benvanik): any way to scissor this? a200 has:
// REG_A2XX_RB_COPY_DEST_OFFSET = A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
// A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff);
// but I can't seem to find something similar.
uint32_t dest_logical_width = copy_dest_pitch;
uint32_t dest_logical_height = copy_dest_height;
uint32_t dest_block_width = xe::round_up(dest_logical_width, 32);
uint32_t dest_block_height = xe::round_up(dest_logical_height, 32);
uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32;
int16_t window_offset_x = window_offset & 0x7FFF;
int16_t window_offset_y = (window_offset >> 16) & 0x7FFF;
// Sign-extension
if (window_offset_x & 0x4000) {
window_offset_x |= 0x8000;
}
if (window_offset_y & 0x4000) {
window_offset_y |= 0x8000;
}
size_t read_size = GetTexelSize(ColorFormatToTextureFormat(copy_dest_format));
// Adjust the copy base offset to point to the beginning of the texture, so
// we don't run into hiccups down the road (e.g. resolving the last part going
// backwards).
int32_t dest_offset = window_offset_y * copy_dest_pitch * int(read_size);
dest_offset += window_offset_x * 32 * int(read_size);
copy_dest_base += dest_offset;
// HACK: vertices to use are always in vf0.
int copy_vertex_fetch_slot = 0;
int r =
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + (copy_vertex_fetch_slot / 3) * 6;
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
const xe_gpu_vertex_fetch_t* fetch = nullptr;
switch (copy_vertex_fetch_slot % 3) {
case 0:
fetch = &group->vertex_fetch_0;
break;
case 1:
fetch = &group->vertex_fetch_1;
break;
case 2:
fetch = &group->vertex_fetch_2;
break;
}
assert_true(fetch->type == 3);
assert_true(fetch->endian == 2);
assert_true(fetch->size == 6);
const uint8_t* vertex_addr = memory_->TranslatePhysical(fetch->address << 2);
trace_writer_.WriteMemoryRead(fetch->address << 2, fetch->size * 4);
int32_t dest_min_x = int32_t((std::min(
std::min(
GpuSwap(xe::load<float>(vertex_addr + 0), Endian(fetch->endian)),
GpuSwap(xe::load<float>(vertex_addr + 8), Endian(fetch->endian))),
GpuSwap(xe::load<float>(vertex_addr + 16), Endian(fetch->endian)))));
int32_t dest_max_x = int32_t((std::max(
std::max(
GpuSwap(xe::load<float>(vertex_addr + 0), Endian(fetch->endian)),
GpuSwap(xe::load<float>(vertex_addr + 8), Endian(fetch->endian))),
GpuSwap(xe::load<float>(vertex_addr + 16), Endian(fetch->endian)))));
int32_t dest_min_y = int32_t((std::min(
std::min(
GpuSwap(xe::load<float>(vertex_addr + 4), Endian(fetch->endian)),
GpuSwap(xe::load<float>(vertex_addr + 12), Endian(fetch->endian))),
GpuSwap(xe::load<float>(vertex_addr + 20), Endian(fetch->endian)))));
int32_t dest_max_y = int32_t((std::max(
std::max(
GpuSwap(xe::load<float>(vertex_addr + 4), Endian(fetch->endian)),
GpuSwap(xe::load<float>(vertex_addr + 12), Endian(fetch->endian))),
GpuSwap(xe::load<float>(vertex_addr + 20), Endian(fetch->endian)))));
uint32_t color_edram_base = 0;
uint32_t depth_edram_base = 0;
ColorRenderTargetFormat color_format;
DepthRenderTargetFormat depth_format;
if (copy_src_select <= 3) {
// Source from a color target.
uint32_t color_info[4] = {
regs[XE_GPU_REG_RB_COLOR_INFO].u32, regs[XE_GPU_REG_RB_COLOR1_INFO].u32,
regs[XE_GPU_REG_RB_COLOR2_INFO].u32,
regs[XE_GPU_REG_RB_COLOR3_INFO].u32,
};
color_edram_base = color_info[copy_src_select] & 0xFFF;
color_format = static_cast<ColorRenderTargetFormat>(
(color_info[copy_src_select] >> 16) & 0xF);
}
if (copy_src_select > 3 || depth_clear_enabled) {
// Source from a depth target.
uint32_t depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
depth_edram_base = depth_info & 0xFFF;
depth_format =
static_cast<DepthRenderTargetFormat>((depth_info >> 16) & 0x1);
}
// Demand a resolve texture from the texture cache.
TextureInfo tex_info = {};
tex_info.guest_address = copy_dest_base;
tex_info.width = dest_logical_width - 1;
tex_info.height = dest_logical_height - 1;
tex_info.dimension = gpu::Dimension::k2D;
tex_info.input_length = copy_dest_pitch * copy_dest_height * 4;
tex_info.format_info =
FormatInfo::Get(uint32_t(ColorFormatToTextureFormat(copy_dest_format)));
tex_info.size_2d.logical_width = dest_logical_width;
tex_info.size_2d.logical_height = dest_logical_height;
tex_info.size_2d.block_width = dest_block_width;
tex_info.size_2d.block_height = dest_block_height;
tex_info.size_2d.input_width = dest_block_width;
tex_info.size_2d.input_height = dest_block_height;
tex_info.size_2d.input_pitch = copy_dest_pitch * 4;
auto texture = texture_cache_->DemandResolveTexture(
tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr);
assert_not_null(texture);
texture->in_flight_fence = current_batch_fence_;
// For debugging purposes only (trace viewer)
last_copy_base_ = texture->texture_info.guest_address;
if (!current_command_buffer_) {
command_buffer_pool_->BeginBatch();
current_command_buffer_ = command_buffer_pool_->AcquireEntry();
current_setup_buffer_ = command_buffer_pool_->AcquireEntry();
current_batch_fence_.reset(new ui::vulkan::Fence(*device_));
VkCommandBufferBeginInfo command_buffer_begin_info;
command_buffer_begin_info.sType =
VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
command_buffer_begin_info.pNext = nullptr;
command_buffer_begin_info.flags =
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
command_buffer_begin_info.pInheritanceInfo = nullptr;
auto status = vkBeginCommandBuffer(current_command_buffer_,
&command_buffer_begin_info);
CheckResult(status, "vkBeginCommandBuffer");
status =
vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info);
CheckResult(status, "vkBeginCommandBuffer");
} else if (current_render_state_) {
render_cache_->EndRenderPass();
current_render_state_ = nullptr;
}
auto command_buffer = current_command_buffer_;
if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
// Transition the image to a general layout.
VkImageMemoryBarrier image_barrier;
image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_barrier.pNext = nullptr;
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.srcAccessMask = 0;
image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
image_barrier.image = texture->image;
image_barrier.subresourceRange = {0, 0, 1, 0, 1};
image_barrier.subresourceRange.aspectMask =
copy_src_select <= 3
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
texture->image_layout = VK_IMAGE_LAYOUT_GENERAL;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier);
}
VkOffset3D resolve_offset = {dest_min_x, dest_min_y, 0};
VkExtent3D resolve_extent = {uint32_t(dest_max_x - dest_min_x),
uint32_t(dest_max_y - dest_min_y), 1};
// Ask the render cache to copy to the resolve texture.
auto edram_base = copy_src_select <= 3 ? color_edram_base : depth_edram_base;
uint32_t src_format = copy_src_select <= 3
? static_cast<uint32_t>(color_format)
: static_cast<uint32_t>(depth_format);
switch (copy_command) {
case CopyCommand::kRaw:
/*
render_cache_->RawCopyToImage(command_buffer, edram_base, texture->image,
texture->image_layout, copy_src_select <= 3,
resolve_offset, resolve_extent);
break;
*/
case CopyCommand::kConvert:
render_cache_->BlitToImage(
command_buffer, edram_base, surface_pitch, resolve_extent.height,
surface_msaa, texture->image, texture->image_layout,
copy_src_select <= 3, src_format, VK_FILTER_LINEAR, resolve_offset,
resolve_extent);
break;
case CopyCommand::kConstantOne:
case CopyCommand::kNull:
assert_always();
break;
}
// Perform any requested clears.
uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
assert_true(copy_color_clear == copy_color_clear_low);
if (color_clear_enabled) {
// If color clear is enabled, we can only clear a selected color target!
assert_true(copy_src_select <= 3);
// TODO(benvanik): verify color order.
float color[] = {((copy_color_clear >> 0) & 0xFF) / 255.0f,
((copy_color_clear >> 8) & 0xFF) / 255.0f,
((copy_color_clear >> 16) & 0xFF) / 255.0f,
((copy_color_clear >> 24) & 0xFF) / 255.0f};
// TODO(DrChat): Do we know the surface height at this point?
render_cache_->ClearEDRAMColor(command_buffer, color_edram_base,
color_format, surface_pitch,
resolve_extent.height, surface_msaa, color);
}
if (depth_clear_enabled) {
float depth =
(copy_depth_clear & 0xFFFFFF00) / static_cast<float>(0xFFFFFF00);
uint8_t stencil = copy_depth_clear & 0xFF;
// TODO(DrChat): Do we know the surface height at this point?
render_cache_->ClearEDRAMDepthStencil(
command_buffer, depth_edram_base, depth_format, surface_pitch,
resolve_extent.height, surface_msaa, depth, stencil);
}
return true; return true;
} }

View File

@ -34,12 +34,14 @@
#include "xenia/ui/vulkan/fenced_pools.h" #include "xenia/ui/vulkan/fenced_pools.h"
#include "xenia/ui/vulkan/vulkan_context.h" #include "xenia/ui/vulkan/vulkan_context.h"
#include "xenia/ui/vulkan/vulkan_device.h" #include "xenia/ui/vulkan/vulkan_device.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace vulkan { namespace vulkan {
class VulkanGraphicsSystem; class VulkanGraphicsSystem;
class TextureCache;
class VulkanCommandProcessor : public CommandProcessor { class VulkanCommandProcessor : public CommandProcessor {
public: public:
@ -47,8 +49,11 @@ class VulkanCommandProcessor : public CommandProcessor {
kernel::KernelState* kernel_state); kernel::KernelState* kernel_state);
~VulkanCommandProcessor() override; ~VulkanCommandProcessor() override;
virtual void RequestFrameTrace(const std::wstring& root_path) override;
void ClearCaches() override; void ClearCaches() override;
RenderCache* render_cache() { return render_cache_.get(); }
private: private:
bool SetupContext() override; bool SetupContext() override;
void ShutdownContext() override; void ShutdownContext() override;
@ -57,6 +62,9 @@ class VulkanCommandProcessor : public CommandProcessor {
void PrepareForWait() override; void PrepareForWait() override;
void ReturnFromWait() override; void ReturnFromWait() override;
void CreateSwapImages(VkCommandBuffer setup_buffer, VkExtent2D extents);
void DestroySwapImages();
void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
uint32_t frontbuffer_height) override; uint32_t frontbuffer_height) override;
@ -74,12 +82,17 @@ class VulkanCommandProcessor : public CommandProcessor {
bool PopulateVertexBuffers(VkCommandBuffer command_buffer, bool PopulateVertexBuffers(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader); VulkanShader* vertex_shader);
bool PopulateSamplers(VkCommandBuffer command_buffer, bool PopulateSamplers(VkCommandBuffer command_buffer,
VkCommandBuffer setup_buffer,
VulkanShader* vertex_shader, VulkanShader* vertex_shader,
VulkanShader* pixel_shader); VulkanShader* pixel_shader);
bool IssueCopy() override; bool IssueCopy() override;
xe::ui::vulkan::VulkanDevice* device_ = nullptr; xe::ui::vulkan::VulkanDevice* device_ = nullptr;
// front buffer / back buffer memory
VkDeviceMemory fb_memory = nullptr;
VkDeviceMemory bb_memory = nullptr;
// TODO(benvanik): abstract behind context? // TODO(benvanik): abstract behind context?
// Queue used to submit work. This may be a dedicated queue for the command // Queue used to submit work. This may be a dedicated queue for the command
// processor and no locking will be required for use. If a dedicated queue // processor and no locking will be required for use. If a dedicated queue
@ -88,12 +101,22 @@ class VulkanCommandProcessor : public CommandProcessor {
VkQueue queue_ = nullptr; VkQueue queue_ = nullptr;
std::mutex* queue_mutex_ = nullptr; std::mutex* queue_mutex_ = nullptr;
// Last copy base address, for debugging only.
uint32_t last_copy_base_ = 0;
bool capturing_ = false;
bool trace_requested_ = false;
std::unique_ptr<BufferCache> buffer_cache_; std::unique_ptr<BufferCache> buffer_cache_;
std::unique_ptr<PipelineCache> pipeline_cache_; std::unique_ptr<PipelineCache> pipeline_cache_;
std::unique_ptr<RenderCache> render_cache_; std::unique_ptr<RenderCache> render_cache_;
std::unique_ptr<TextureCache> texture_cache_; std::unique_ptr<TextureCache> texture_cache_;
std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_; std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_;
const RenderState* current_render_state_ = nullptr;
VkCommandBuffer current_command_buffer_ = nullptr;
VkCommandBuffer current_setup_buffer_ = nullptr;
std::shared_ptr<ui::vulkan::Fence> current_batch_fence_;
}; };
} // namespace vulkan } // namespace vulkan

View File

@ -11,3 +11,6 @@
DEFINE_bool(vulkan_renderdoc_capture_all, false, DEFINE_bool(vulkan_renderdoc_capture_all, false,
"Capture everything with RenderDoc."); "Capture everything with RenderDoc.");
DEFINE_bool(vulkan_native_msaa, false, "Use native MSAA");
DEFINE_bool(vulkan_dump_disasm, false,
"Dump shader disassembly. NVIDIA only supported.");

View File

@ -15,5 +15,7 @@
#define FINE_GRAINED_DRAW_SCOPES 1 #define FINE_GRAINED_DRAW_SCOPES 1
DECLARE_bool(vulkan_renderdoc_capture_all); DECLARE_bool(vulkan_renderdoc_capture_all);
DECLARE_bool(vulkan_native_msaa);
DECLARE_bool(vulkan_dump_disasm);
#endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_ #endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_

View File

@ -19,14 +19,14 @@
#include "xenia/gpu/vulkan/vulkan_command_processor.h" #include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h" #include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
#include "xenia/ui/vulkan/vulkan_provider.h" #include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_swap_chain.h"
#include "xenia/ui/window.h" #include "xenia/ui/window.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
namespace vulkan { namespace vulkan {
VulkanGraphicsSystem::VulkanGraphicsSystem() = default; VulkanGraphicsSystem::VulkanGraphicsSystem() {}
VulkanGraphicsSystem::~VulkanGraphicsSystem() = default; VulkanGraphicsSystem::~VulkanGraphicsSystem() = default;
X_STATUS VulkanGraphicsSystem::Setup(cpu::Processor* processor, X_STATUS VulkanGraphicsSystem::Setup(cpu::Processor* processor,
@ -74,12 +74,41 @@ void VulkanGraphicsSystem::Swap(xe::ui::UIEvent* e) {
return; return;
} }
// Blit the frontbuffer. auto swap_chain = display_context_->swap_chain();
// display_context_->blitter()->BlitTexture2D( auto copy_cmd_buffer = swap_chain->copy_cmd_buffer();
// static_cast<GLuint>(swap_state.front_buffer_texture), auto front_buffer =
// Rect2D(0, 0, swap_state.width, swap_state.height), reinterpret_cast<VkImage>(swap_state.front_buffer_texture);
// Rect2D(0, 0, target_window_->width(), target_window_->height()),
// GL_LINEAR, false); VkImageMemoryBarrier barrier;
std::memset(&barrier, 0, sizeof(VkImageMemoryBarrier));
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = front_buffer;
barrier.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
vkCmdPipelineBarrier(copy_cmd_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
VkImageBlit region;
region.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
region.srcOffsets[0] = {0, 0, 0};
region.srcOffsets[1] = {static_cast<int32_t>(swap_state.width),
static_cast<int32_t>(swap_state.height), 1};
region.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
region.dstOffsets[0] = {0, 0, 0};
region.dstOffsets[1] = {static_cast<int32_t>(swap_chain->surface_width()),
static_cast<int32_t>(swap_chain->surface_height()),
1};
vkCmdBlitImage(copy_cmd_buffer, front_buffer, VK_IMAGE_LAYOUT_GENERAL,
swap_chain->surface_image(),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region,
VK_FILTER_LINEAR);
} }
} // namespace vulkan } // namespace vulkan

View File

@ -44,11 +44,11 @@ bool VulkanShader::Prepare() {
shader_info.codeSize = translated_binary_.size(); shader_info.codeSize = translated_binary_.size();
shader_info.pCode = shader_info.pCode =
reinterpret_cast<const uint32_t*>(translated_binary_.data()); reinterpret_cast<const uint32_t*>(translated_binary_.data());
auto err = auto status =
vkCreateShaderModule(device_, &shader_info, nullptr, &shader_module_); vkCreateShaderModule(device_, &shader_info, nullptr, &shader_module_);
CheckResult(err, "vkCreateShaderModule"); CheckResult(status, "vkCreateShaderModule");
return true; return status == VK_SUCCESS;
} }
} // namespace vulkan } // namespace vulkan

View File

@ -49,6 +49,7 @@ enum class PrimitiveType : uint32_t {
kLineLoop = 0x0C, kLineLoop = 0x0C,
kQuadList = 0x0D, kQuadList = 0x0D,
kQuadStrip = 0x0E, kQuadStrip = 0x0E,
kUnknown0x11 = 0x11,
}; };
enum class Dimension : uint32_t { enum class Dimension : uint32_t {
@ -382,7 +383,7 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, {
uint32_t type : 2; uint32_t type : 2;
uint32_t address : 30; uint32_t address : 30;
uint32_t endian : 2; uint32_t endian : 2;
uint32_t size : 24; uint32_t size : 24; // size in words
uint32_t unk1 : 6; uint32_t unk1 : 6;
}); });
XEPACKEDSTRUCTANONYMOUS({ XEPACKEDSTRUCTANONYMOUS({
@ -486,6 +487,46 @@ XEPACKEDUNION(xe_gpu_fetch_group_t, {
}); });
}); });
enum Event {
SAMPLE_STREAMOUTSTATS1 = (1 << 0),
SAMPLE_STREAMOUTSTATS2 = (2 << 0),
SAMPLE_STREAMOUTSTATS3 = (3 << 0),
CACHE_FLUSH_TS = (4 << 0),
CACHE_FLUSH = (6 << 0),
CS_PARTIAL_FLUSH = (7 << 0),
VGT_STREAMOUT_RESET = (10 << 0),
END_OF_PIPE_INCR_DE = (11 << 0),
END_OF_PIPE_IB_END = (12 << 0),
RST_PIX_CNT = (13 << 0),
VS_PARTIAL_FLUSH = (15 << 0),
PS_PARTIAL_FLUSH = (16 << 0),
CACHE_FLUSH_AND_INV_TS_EVENT = (20 << 0),
ZPASS_DONE = (21 << 0),
CACHE_FLUSH_AND_INV_EVENT = (22 << 0),
PERFCOUNTER_START = (23 << 0),
PERFCOUNTER_STOP = (24 << 0),
PIPELINESTAT_START = (25 << 0),
PIPELINESTAT_STOP = (26 << 0),
PERFCOUNTER_SAMPLE = (27 << 0),
SAMPLE_PIPELINESTAT = (30 << 0),
SAMPLE_STREAMOUTSTATS = (32 << 0),
RESET_VTX_CNT = (33 << 0),
VGT_FLUSH = (36 << 0),
BOTTOM_OF_PIPE_TS = (40 << 0),
DB_CACHE_FLUSH_AND_INV = (42 << 0),
FLUSH_AND_INV_DB_DATA_TS = (43 << 0),
FLUSH_AND_INV_DB_META = (44 << 0),
FLUSH_AND_INV_CB_DATA_TS = (45 << 0),
FLUSH_AND_INV_CB_META = (46 << 0),
CS_DONE = (47 << 0),
PS_DONE = (48 << 0),
FLUSH_AND_INV_CB_PIXEL_DATA = (49 << 0),
THREAD_TRACE_START = (51 << 0),
THREAD_TRACE_STOP = (52 << 0),
THREAD_TRACE_FLUSH = (54 << 0),
THREAD_TRACE_FINISH = (55 << 0),
};
// Opcodes (IT_OPCODE) for Type-3 commands in the ringbuffer. // Opcodes (IT_OPCODE) for Type-3 commands in the ringbuffer.
// https://github.com/freedreno/amd-gpu/blob/master/include/api/gsl_pm4types.h // https://github.com/freedreno/amd-gpu/blob/master/include/api/gsl_pm4types.h
// Not sure if all of these are used. // Not sure if all of these are used.
@ -501,7 +542,7 @@ enum Type3Opcode {
PM4_WAIT_FOR_IDLE = 0x26, // wait for the IDLE state of the engine PM4_WAIT_FOR_IDLE = 0x26, // wait for the IDLE state of the engine
PM4_WAIT_REG_MEM = 0x3c, // wait until a register or memory location is a specific value PM4_WAIT_REG_MEM = 0x3c, // wait until a register or memory location is a specific value
PM4_WAIT_REG_EQ = 0x52, // wait until a register location is equal to a specific value PM4_WAIT_REG_EQ = 0x52, // wait until a register location is equal to a specific value
PM4_WAT_REG_GTE = 0x53, // wait until a register location is >= a specific value PM4_WAIT_REG_GTE = 0x53, // wait until a register location is >= a specific value
PM4_WAIT_UNTIL_READ = 0x5c, // wait until a read completes PM4_WAIT_UNTIL_READ = 0x5c, // wait until a read completes
PM4_WAIT_IB_PFD_COMPLETE = 0x5d, // wait until all base/size writes from an IB_PFD packet have completed PM4_WAIT_IB_PFD_COMPLETE = 0x5d, // wait until all base/size writes from an IB_PFD packet have completed

View File

@ -366,7 +366,7 @@ void VdSwap(lpvoid_t buffer_ptr, // ptr into primary ringbuffer
auto dwords = buffer_ptr.as_array<uint32_t>(); auto dwords = buffer_ptr.as_array<uint32_t>();
dwords[0] = xenos::MakePacketType3<xenos::PM4_XE_SWAP, 63>(); dwords[0] = xenos::MakePacketType3<xenos::PM4_XE_SWAP, 63>();
dwords[1] = 'SWAP'; dwords[1] = 'SWAP';
dwords[2] = *frontbuffer_ptr; dwords[2] = (*frontbuffer_ptr) & 0x1FFFFFFF;
// Set by VdCallGraphicsNotificationRoutines. // Set by VdCallGraphicsNotificationRoutines.
dwords[3] = last_frontbuffer_width_; dwords[3] = last_frontbuffer_width_;

View File

@ -376,17 +376,19 @@ cpu::MMIORange* Memory::LookupVirtualMappedRange(uint32_t virtual_address) {
return mmio_handler_->LookupRange(virtual_address); return mmio_handler_->LookupRange(virtual_address);
} }
uintptr_t Memory::AddPhysicalWriteWatch(uint32_t physical_address, uintptr_t Memory::AddPhysicalAccessWatch(uint32_t physical_address,
uint32_t length, uint32_t length,
cpu::WriteWatchCallback callback, cpu::MMIOHandler::WatchType type,
void* callback_context, cpu::AccessWatchCallback callback,
void* callback_data) { void* callback_context,
return mmio_handler_->AddPhysicalWriteWatch( void* callback_data) {
physical_address, length, callback, callback_context, callback_data); return mmio_handler_->AddPhysicalAccessWatch(physical_address, length, type,
callback, callback_context,
callback_data);
} }
void Memory::CancelWriteWatch(uintptr_t watch_handle) { void Memory::CancelAccessWatch(uintptr_t watch_handle) {
mmio_handler_->CancelWriteWatch(watch_handle); mmio_handler_->CancelAccessWatch(watch_handle);
} }
uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment, uint32_t Memory::SystemHeapAlloc(uint32_t size, uint32_t alignment,
@ -453,6 +455,7 @@ bool Memory::Save(ByteStream* stream) {
} }
bool Memory::Restore(ByteStream* stream) { bool Memory::Restore(ByteStream* stream) {
XELOGD("Restoring memory...");
heaps_.v00000000.Restore(stream); heaps_.v00000000.Restore(stream);
heaps_.v40000000.Restore(stream); heaps_.v40000000.Restore(stream);
heaps_.v80000000.Restore(stream); heaps_.v80000000.Restore(stream);
@ -577,6 +580,8 @@ bool BaseHeap::Save(ByteStream* stream) {
} }
bool BaseHeap::Restore(ByteStream* stream) { bool BaseHeap::Restore(ByteStream* stream) {
XELOGD("Heap %.8X-%.8X", heap_base_, heap_base_ + heap_size_);
for (size_t i = 0; i < page_table_.size(); i++) { for (size_t i = 0; i < page_table_.size(); i++) {
auto& page = page_table_[i]; auto& page = page_table_[i];
page.qword = stream->Read<uint64_t>(); page.qword = stream->Read<uint64_t>();
@ -897,7 +902,7 @@ bool BaseHeap::Release(uint32_t base_address, uint32_t* out_region_size) {
auto base_page_entry = page_table_[base_page_number]; auto base_page_entry = page_table_[base_page_number];
if (base_page_entry.base_address != base_page_number) { if (base_page_entry.base_address != base_page_number) {
XELOGE("BaseHeap::Release failed because address is not a region start"); XELOGE("BaseHeap::Release failed because address is not a region start");
// return false; return false;
} }
if (out_region_size) { if (out_region_size) {

View File

@ -303,12 +303,13 @@ class Memory {
// //
// This has a significant performance penalty for writes in in the range or // This has a significant performance penalty for writes in in the range or
// nearby (sharing 64KiB pages). // nearby (sharing 64KiB pages).
uintptr_t AddPhysicalWriteWatch(uint32_t physical_address, uint32_t length, uintptr_t AddPhysicalAccessWatch(uint32_t physical_address, uint32_t length,
cpu::WriteWatchCallback callback, cpu::MMIOHandler::WatchType type,
void* callback_context, void* callback_data); cpu::AccessWatchCallback callback,
void* callback_context, void* callback_data);
// Cancels a write watch requested with AddPhysicalWriteWatch. // Cancels a write watch requested with AddPhysicalAccessWatch.
void CancelWriteWatch(uintptr_t watch_handle); void CancelAccessWatch(uintptr_t watch_handle);
// Allocates virtual memory from the 'system' heap. // Allocates virtual memory from the 'system' heap.
// System memory is kept separate from game memory but is still accessible // System memory is kept separate from game memory but is still accessible

View File

@ -0,0 +1,80 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/spirv/spirv_validator.h"
#include "third_party/spirv-tools/include/spirv-tools/libspirv.h"
#include "xenia/base/logging.h"
namespace xe {
namespace ui {
namespace spirv {
SpirvValidator::Result::Result(spv_text text, spv_diagnostic diagnostic)
: text_(text), diagnostic_(diagnostic) {}
SpirvValidator::Result::~Result() {
if (text_) {
spvTextDestroy(text_);
}
if (diagnostic_) {
spvDiagnosticDestroy(diagnostic_);
}
}
bool SpirvValidator::Result::has_error() const { return !!diagnostic_; }
size_t SpirvValidator::Result::error_word_index() const {
return diagnostic_ ? diagnostic_->position.index : 0;
}
const char* SpirvValidator::Result::error_string() const {
return diagnostic_ ? diagnostic_->error : "";
}
const char* SpirvValidator::Result::text() const {
return text_ ? text_->str : "";
}
std::string SpirvValidator::Result::to_string() const {
return text_ ? std::string(text_->str, text_->length) : "";
}
void SpirvValidator::Result::AppendText(StringBuffer* target_buffer) const {
if (text_) {
target_buffer->AppendBytes(reinterpret_cast<const uint8_t*>(text_->str),
text_->length);
}
}
SpirvValidator::SpirvValidator() : spv_context_(spvContextCreate()) {}
SpirvValidator::~SpirvValidator() { spvContextDestroy(spv_context_); }
std::unique_ptr<SpirvValidator::Result> SpirvValidator::Validate(
const uint32_t* words, size_t word_count) {
spv_text text = nullptr;
spv_diagnostic diagnostic = nullptr;
spv_const_binary_t binary = {words, word_count};
auto result_code =
spvValidate(spv_context_, &binary, SPV_VALIDATE_ALL, &diagnostic);
std::unique_ptr<Result> result(new Result(text, diagnostic));
if (result_code) {
XELOGE("Failed to validate spv: %d", result_code);
if (result->has_error()) {
return result;
} else {
return nullptr;
}
}
return result;
}
} // namespace spirv
} // namespace ui
} // namespace xe

View File

@ -0,0 +1,66 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
#define XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
#include <memory>
#include <string>
#include "xenia/base/string_buffer.h"
#include "xenia/ui/spirv/spirv_util.h"
namespace xe {
namespace ui {
namespace spirv {
class SpirvValidator {
public:
class Result {
public:
Result(spv_text text, spv_diagnostic diagnostic);
~Result();
// True if the result has an error associated with it.
bool has_error() const;
// Index of the error in the provided binary word data.
size_t error_word_index() const;
// Human-readable description of the error.
const char* error_string() const;
// Disassembled source text.
// Returned pointer lifetime is tied to this Result instance.
const char* text() const;
// Converts the disassembled source text to a string.
std::string to_string() const;
// Appends the disassembled source text to the given buffer.
void AppendText(StringBuffer* target_buffer) const;
private:
spv_text text_ = nullptr;
spv_diagnostic diagnostic_ = nullptr;
};
SpirvValidator();
~SpirvValidator();
// Validates the given SPIRV binary.
// The return will be nullptr if validation fails due to a library error.
// The return may have an error set on it if the SPIRV binary is malformed.
std::unique_ptr<Result> Validate(const uint32_t* words, size_t word_count);
private:
spv_context spv_context_ = nullptr;
};
} // namespace spirv
} // namespace ui
} // namespace xe
#endif // XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_

View File

@ -0,0 +1,227 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include <algorithm>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/circular_buffer.h"
namespace xe {
namespace ui {
namespace vulkan {
CircularBuffer::CircularBuffer(VulkanDevice* device) : device_(device) {}
CircularBuffer::~CircularBuffer() { Shutdown(); }
bool CircularBuffer::Initialize(VkDeviceSize capacity, VkBufferUsageFlags usage,
VkDeviceSize alignment) {
VkResult status = VK_SUCCESS;
capacity = xe::round_up(capacity, alignment);
// Create our internal buffer.
VkBufferCreateInfo buffer_info;
buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_info.pNext = nullptr;
buffer_info.flags = 0;
buffer_info.size = capacity;
buffer_info.usage = usage;
buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_info.queueFamilyIndexCount = 0;
buffer_info.pQueueFamilyIndices = nullptr;
status = vkCreateBuffer(*device_, &buffer_info, nullptr, &gpu_buffer_);
CheckResult(status, "vkCreateBuffer");
if (status != VK_SUCCESS) {
return false;
}
VkMemoryRequirements reqs;
vkGetBufferMemoryRequirements(*device_, gpu_buffer_, &reqs);
// Allocate memory from the device to back the buffer.
assert_true(reqs.size == capacity);
reqs.alignment = std::max(alignment, reqs.alignment);
gpu_memory_ = device_->AllocateMemory(reqs);
if (!gpu_memory_) {
XELOGE("CircularBuffer::Initialize - Failed to allocate memory!");
Shutdown();
return false;
}
alignment_ = reqs.alignment;
capacity_ = reqs.size;
gpu_base_ = 0;
// Bind the buffer to its backing memory.
status = vkBindBufferMemory(*device_, gpu_buffer_, gpu_memory_, gpu_base_);
CheckResult(status, "vkBindBufferMemory");
if (status != VK_SUCCESS) {
XELOGE("CircularBuffer::Initialize - Failed to bind memory!");
Shutdown();
return false;
}
// Map the memory so we can access it.
status = vkMapMemory(*device_, gpu_memory_, gpu_base_, capacity_, 0,
reinterpret_cast<void**>(&host_base_));
CheckResult(status, "vkMapMemory");
if (status != VK_SUCCESS) {
XELOGE("CircularBuffer::Initialize - Failed to map memory!");
Shutdown();
return false;
}
return true;
}
void CircularBuffer::Shutdown() {
Clear();
if (host_base_) {
vkUnmapMemory(*device_, gpu_memory_);
host_base_ = nullptr;
}
if (gpu_buffer_) {
vkDestroyBuffer(*device_, gpu_buffer_, nullptr);
gpu_buffer_ = nullptr;
}
if (gpu_memory_) {
vkFreeMemory(*device_, gpu_memory_, nullptr);
gpu_memory_ = nullptr;
}
}
bool CircularBuffer::CanAcquire(VkDeviceSize length) {
// Make sure the length is aligned.
length = xe::round_up(length, alignment_);
if (allocations_.empty()) {
// Read head has caught up to write head (entire buffer available for write)
assert_true(read_head_ == write_head_);
return capacity_ >= length;
} else if (write_head_ < read_head_) {
// Write head wrapped around and is behind read head.
// | write |---- read ----|
return (read_head_ - write_head_) >= length;
} else if (write_head_ > read_head_) {
// Read head behind write head.
// 1. Check if there's enough room from write -> capacity
// | |---- read ----| write |
if ((capacity_ - write_head_) >= length) {
return true;
}
// 2. Check if there's enough room from 0 -> read
// | write |---- read ----| |
if ((read_head_ - 0) >= length) {
return true;
}
}
return false;
}
CircularBuffer::Allocation* CircularBuffer::Acquire(
VkDeviceSize length, std::shared_ptr<Fence> fence) {
VkDeviceSize aligned_length = xe::round_up(length, alignment_);
if (!CanAcquire(aligned_length)) {
return nullptr;
}
assert_true(write_head_ % alignment_ == 0);
if (write_head_ < read_head_) {
// Write head behind read head.
assert_true(read_head_ - write_head_ >= aligned_length);
auto alloc = new Allocation();
alloc->host_ptr = host_base_ + write_head_;
alloc->gpu_memory = gpu_memory_;
alloc->offset = gpu_base_ + write_head_;
alloc->length = length;
alloc->aligned_length = aligned_length;
alloc->fence = fence;
write_head_ += aligned_length;
allocations_.push_back(alloc);
return alloc;
} else {
// Write head equal to/after read head
if (capacity_ - write_head_ >= aligned_length) {
// Free space from write -> capacity
auto alloc = new Allocation();
alloc->host_ptr = host_base_ + write_head_;
alloc->gpu_memory = gpu_memory_;
alloc->offset = gpu_base_ + write_head_;
alloc->length = length;
alloc->aligned_length = aligned_length;
alloc->fence = fence;
write_head_ += aligned_length;
allocations_.push_back(alloc);
return alloc;
} else if ((read_head_ - 0) >= aligned_length) {
// Free space from begin -> read
auto alloc = new Allocation();
alloc->host_ptr = host_base_ + 0;
alloc->gpu_memory = gpu_memory_;
alloc->offset = gpu_base_ + 0;
alloc->length = length;
alloc->aligned_length = aligned_length;
alloc->fence = fence;
write_head_ = aligned_length;
allocations_.push_back(alloc);
return alloc;
}
}
return nullptr;
}
void CircularBuffer::Flush(Allocation* allocation) {
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.memory = gpu_memory_;
range.offset = gpu_base_ + allocation->offset;
range.size = allocation->length;
vkFlushMappedMemoryRanges(*device_, 1, &range);
}
void CircularBuffer::Clear() {
for (auto alloc : allocations_) {
delete alloc;
}
allocations_.clear();
write_head_ = read_head_ = 0;
}
void CircularBuffer::Scavenge() {
for (auto it = allocations_.begin(); it != allocations_.end();) {
if ((*it)->fence->status() != VK_SUCCESS) {
// Don't bother freeing following allocations to ensure proper ordering.
break;
}
if (capacity_ - read_head_ < (*it)->aligned_length) {
// This allocation is stored at the beginning of the buffer.
read_head_ = (*it)->aligned_length;
} else {
read_head_ += (*it)->aligned_length;
}
delete *it;
it = allocations_.erase(it);
}
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -0,0 +1,87 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_
#define XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_
#include <list>
#include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_device.h"
namespace xe {
namespace ui {
namespace vulkan {
// A circular buffer, intended to hold (fairly) temporary memory that will be
// released when a fence is signaled. Best used when allocations are taken
// in-order with command buffer submission.
//
// Allocations loop around the buffer in circles (but are not fragmented at the
// ends of the buffer), where trailing older allocations are freed after use.
class CircularBuffer {
public:
CircularBuffer(VulkanDevice* device);
~CircularBuffer();
struct Allocation {
void* host_ptr;
VkDeviceMemory gpu_memory;
VkDeviceSize offset;
VkDeviceSize length;
VkDeviceSize aligned_length;
// Allocation usage fence. This allocation will be deleted when the fence
// becomes signaled.
std::shared_ptr<Fence> fence;
};
bool Initialize(VkDeviceSize capacity, VkBufferUsageFlags usage,
VkDeviceSize alignment = 256);
void Shutdown();
VkDeviceSize alignment() const { return alignment_; }
VkDeviceSize capacity() const { return capacity_; }
VkBuffer gpu_buffer() const { return gpu_buffer_; }
VkDeviceMemory gpu_memory() const { return gpu_memory_; }
uint8_t* host_base() const { return host_base_; }
bool CanAcquire(VkDeviceSize length);
// Acquires space to hold memory. This allocation is only freed when the fence
// reaches the signaled state.
Allocation* Acquire(VkDeviceSize length, std::shared_ptr<Fence> fence);
void Flush(Allocation* allocation);
// Clears all allocations, regardless of whether they've been consumed or not.
void Clear();
// Frees any allocations whose fences have been signaled.
void Scavenge();
private:
VkDeviceSize capacity_ = 0;
VkDeviceSize alignment_ = 0;
VkDeviceSize write_head_ = 0;
VkDeviceSize read_head_ = 0;
VulkanDevice* device_;
VkBuffer gpu_buffer_ = nullptr;
VkDeviceMemory gpu_memory_ = nullptr;
VkDeviceSize gpu_base_ = 0;
uint8_t* host_base_ = nullptr;
std::list<Allocation*> allocations_;
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_CIRCULAR_BUFFER_H_

View File

@ -14,6 +14,7 @@
#include "xenia/base/assert.h" #include "xenia/base/assert.h"
#include "xenia/ui/vulkan/vulkan.h" #include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe { namespace xe {
namespace ui { namespace ui {
@ -40,13 +41,15 @@ class BaseFencedPool {
// True if one or more batches are still pending on the GPU. // True if one or more batches are still pending on the GPU.
bool has_pending() const { return pending_batch_list_head_ != nullptr; } bool has_pending() const { return pending_batch_list_head_ != nullptr; }
// True if a batch is open.
bool has_open_batch() const { return open_batch_ != nullptr; }
// Checks all pending batches for completion and scavenges their entries. // Checks all pending batches for completion and scavenges their entries.
// This should be called as frequently as reasonable. // This should be called as frequently as reasonable.
void Scavenge() { void Scavenge() {
while (pending_batch_list_head_) { while (pending_batch_list_head_) {
auto batch = pending_batch_list_head_; auto batch = pending_batch_list_head_;
if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) { if (vkGetFenceStatus(device_, *batch->fence) == VK_SUCCESS) {
// Batch has completed. Reclaim. // Batch has completed. Reclaim.
pending_batch_list_head_ = batch->next; pending_batch_list_head_ = batch->next;
if (batch == pending_batch_list_tail_) { if (batch == pending_batch_list_tail_) {
@ -88,6 +91,24 @@ class BaseFencedPool {
open_batch_ = batch; open_batch_ = batch;
} }
// Cancels an open batch, and releases all entries acquired within.
void CancelBatch() {
assert_not_null(open_batch_);
auto batch = open_batch_;
open_batch_ = nullptr;
// Relink the batch back into the free batch list.
batch->next = free_batch_list_head_;
free_batch_list_head_ = batch;
// Relink entries back into free entries list.
batch->entry_list_tail->next = free_entry_list_head_;
free_entry_list_head_ = batch->entry_list_head;
batch->entry_list_head = nullptr;
batch->entry_list_tail = nullptr;
}
// Attempts to acquire an entry from the pool in the current batch. // Attempts to acquire an entry from the pool in the current batch.
// If none are available a new one will be allocated. // If none are available a new one will be allocated.
HANDLE AcquireEntry() { HANDLE AcquireEntry() {
@ -114,7 +135,7 @@ class BaseFencedPool {
// Ends the current batch using the given fence to indicate when the batch // Ends the current batch using the given fence to indicate when the batch
// has completed execution on the GPU. // has completed execution on the GPU.
void EndBatch(VkFence fence) { void EndBatch(std::shared_ptr<Fence> fence) {
assert_not_null(open_batch_); assert_not_null(open_batch_);
// Close and see if we have anything. // Close and see if we have anything.
@ -137,6 +158,7 @@ class BaseFencedPool {
} }
if (pending_batch_list_tail_) { if (pending_batch_list_tail_) {
pending_batch_list_tail_->next = batch; pending_batch_list_tail_->next = batch;
pending_batch_list_tail_ = batch;
} else { } else {
pending_batch_list_tail_ = batch; pending_batch_list_tail_ = batch;
} }
@ -176,7 +198,7 @@ class BaseFencedPool {
Batch* next; Batch* next;
Entry* entry_list_head; Entry* entry_list_head;
Entry* entry_list_tail; Entry* entry_list_tail;
VkFence fence; std::shared_ptr<Fence> fence;
}; };
Batch* free_batch_list_head_ = nullptr; Batch* free_batch_list_head_ = nullptr;

View File

@ -93,8 +93,8 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) {
} }
ENABLE_AND_EXPECT(geometryShader); ENABLE_AND_EXPECT(geometryShader);
ENABLE_AND_EXPECT(depthClamp); ENABLE_AND_EXPECT(depthClamp);
ENABLE_AND_EXPECT(alphaToOne);
ENABLE_AND_EXPECT(multiViewport); ENABLE_AND_EXPECT(multiViewport);
ENABLE_AND_EXPECT(independentBlend);
// TODO(benvanik): add other features. // TODO(benvanik): add other features.
if (any_features_missing) { if (any_features_missing) {
XELOGE( XELOGE(

View File

@ -136,6 +136,46 @@ class LightweightCircularBuffer {
class VulkanImmediateTexture : public ImmediateTexture { class VulkanImmediateTexture : public ImmediateTexture {
public: public:
VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool,
VkDescriptorSetLayout descriptor_set_layout,
VkImageView image_view, VkSampler sampler,
uint32_t width, uint32_t height)
: ImmediateTexture(width, height),
device_(*device),
descriptor_pool_(descriptor_pool),
image_view_(image_view),
sampler_(sampler) {
handle = reinterpret_cast<uintptr_t>(this);
// Create descriptor set used just for this texture.
// It never changes, so we can reuse it and not worry with updates.
VkDescriptorSetAllocateInfo set_alloc_info;
set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
set_alloc_info.pNext = nullptr;
set_alloc_info.descriptorPool = descriptor_pool_;
set_alloc_info.descriptorSetCount = 1;
set_alloc_info.pSetLayouts = &descriptor_set_layout;
auto err =
vkAllocateDescriptorSets(device_, &set_alloc_info, &descriptor_set_);
CheckResult(err, "vkAllocateDescriptorSets");
// Initialize descriptor with our texture.
VkDescriptorImageInfo texture_info;
texture_info.sampler = sampler_;
texture_info.imageView = image_view_;
texture_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
VkWriteDescriptorSet descriptor_write;
descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_write.pNext = nullptr;
descriptor_write.dstSet = descriptor_set_;
descriptor_write.dstBinding = 0;
descriptor_write.dstArrayElement = 0;
descriptor_write.descriptorCount = 1;
descriptor_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptor_write.pImageInfo = &texture_info;
vkUpdateDescriptorSets(device_, 1, &descriptor_write, 0, nullptr);
}
VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool, VulkanImmediateTexture(VulkanDevice* device, VkDescriptorPool descriptor_pool,
VkDescriptorSetLayout descriptor_set_layout, VkDescriptorSetLayout descriptor_set_layout,
VkSampler sampler, uint32_t width, uint32_t height) VkSampler sampler, uint32_t width, uint32_t height)
@ -161,7 +201,7 @@ class VulkanImmediateTexture : public ImmediateTexture {
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.queueFamilyIndexCount = 0; image_info.queueFamilyIndexCount = 0;
image_info.pQueueFamilyIndices = nullptr; image_info.pQueueFamilyIndices = nullptr;
image_info.initialLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
auto err = vkCreateImage(device_, &image_info, nullptr, &image_); auto err = vkCreateImage(device_, &image_info, nullptr, &image_);
CheckResult(err, "vkCreateImage"); CheckResult(err, "vkCreateImage");
@ -221,9 +261,12 @@ class VulkanImmediateTexture : public ImmediateTexture {
~VulkanImmediateTexture() override { ~VulkanImmediateTexture() override {
vkFreeDescriptorSets(device_, descriptor_pool_, 1, &descriptor_set_); vkFreeDescriptorSets(device_, descriptor_pool_, 1, &descriptor_set_);
vkDestroyImageView(device_, image_view_, nullptr);
vkDestroyImage(device_, image_, nullptr); if (device_memory_) {
vkFreeMemory(device_, device_memory_, nullptr); vkDestroyImageView(device_, image_view_, nullptr);
vkDestroyImage(device_, image_, nullptr);
vkFreeMemory(device_, device_memory_, nullptr);
}
} }
void Upload(const uint8_t* src_data) { void Upload(const uint8_t* src_data) {
@ -238,25 +281,49 @@ class VulkanImmediateTexture : public ImmediateTexture {
vkGetImageSubresourceLayout(device_, image_, &subresource, &layout); vkGetImageSubresourceLayout(device_, image_, &subresource, &layout);
// Map memory for upload. // Map memory for upload.
void* gpu_data = nullptr; uint8_t* gpu_data = nullptr;
auto err = auto err = vkMapMemory(device_, device_memory_, 0, layout.size, 0,
vkMapMemory(device_, device_memory_, 0, layout.size, 0, &gpu_data); reinterpret_cast<void**>(&gpu_data));
CheckResult(err, "vkMapMemory"); CheckResult(err, "vkMapMemory");
// Copy the entire texture, hoping its layout matches what we expect. // Copy the entire texture, hoping its layout matches what we expect.
std::memcpy(gpu_data, src_data, layout.size); std::memcpy(gpu_data + layout.offset, src_data, layout.size);
vkUnmapMemory(device_, device_memory_); vkUnmapMemory(device_, device_memory_);
} }
// Queues a command to transition this texture to a new layout. This assumes
// the command buffer WILL be queued and executed by the device.
void TransitionLayout(VkCommandBuffer command_buffer,
VkImageLayout new_layout) {
VkImageMemoryBarrier image_barrier;
image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
image_barrier.pNext = nullptr;
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.srcAccessMask = 0;
image_barrier.dstAccessMask = 0;
image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
image_barrier.newLayout = new_layout;
image_barrier.image = image_;
image_barrier.subresourceRange = {0, 0, 1, 0, 1};
image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
image_layout_ = new_layout;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier);
}
VkDescriptorSet descriptor_set() const { return descriptor_set_; } VkDescriptorSet descriptor_set() const { return descriptor_set_; }
VkImageLayout layout() const { return image_layout_; }
private: private:
VkDevice device_ = nullptr; VkDevice device_ = nullptr;
VkDescriptorPool descriptor_pool_ = nullptr; VkDescriptorPool descriptor_pool_ = nullptr;
VkSampler sampler_ = nullptr; // Not owned. VkSampler sampler_ = nullptr; // Not owned.
VkImage image_ = nullptr; VkImage image_ = nullptr;
VkImageLayout image_layout_ = VK_IMAGE_LAYOUT_UNDEFINED; VkImageLayout image_layout_ = VK_IMAGE_LAYOUT_PREINITIALIZED;
VkDeviceMemory device_memory_ = nullptr; VkDeviceMemory device_memory_ = nullptr;
VkImageView image_view_ = nullptr; VkImageView image_view_ = nullptr;
VkDescriptorSet descriptor_set_ = nullptr; VkDescriptorSet descriptor_set_ = nullptr;
@ -538,7 +605,7 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context)
pipeline_info.renderPass = context_->swap_chain()->render_pass(); pipeline_info.renderPass = context_->swap_chain()->render_pass();
pipeline_info.subpass = 0; pipeline_info.subpass = 0;
pipeline_info.basePipelineHandle = nullptr; pipeline_info.basePipelineHandle = nullptr;
pipeline_info.basePipelineIndex = 0; pipeline_info.basePipelineIndex = -1;
err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr, err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr,
&triangle_pipeline_); &triangle_pipeline_);
CheckResult(err, "vkCreateGraphicsPipelines"); CheckResult(err, "vkCreateGraphicsPipelines");
@ -547,7 +614,7 @@ VulkanImmediateDrawer::VulkanImmediateDrawer(VulkanContext* graphics_context)
pipeline_info.flags = VK_PIPELINE_CREATE_DERIVATIVE_BIT; pipeline_info.flags = VK_PIPELINE_CREATE_DERIVATIVE_BIT;
input_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; input_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
pipeline_info.basePipelineHandle = triangle_pipeline_; pipeline_info.basePipelineHandle = triangle_pipeline_;
pipeline_info.basePipelineIndex = 0; pipeline_info.basePipelineIndex = -1;
err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr, err = vkCreateGraphicsPipelines(*device, nullptr, 1, &pipeline_info, nullptr,
&line_pipeline_); &line_pipeline_);
CheckResult(err, "vkCreateGraphicsPipelines"); CheckResult(err, "vkCreateGraphicsPipelines");
@ -604,6 +671,14 @@ std::unique_ptr<ImmediateTexture> VulkanImmediateDrawer::CreateTexture(
return std::unique_ptr<ImmediateTexture>(texture.release()); return std::unique_ptr<ImmediateTexture>(texture.release());
} }
std::unique_ptr<ImmediateTexture> VulkanImmediateDrawer::WrapTexture(
VkImageView image_view, VkSampler sampler, uint32_t width,
uint32_t height) {
return std::make_unique<VulkanImmediateTexture>(
context_->device(), descriptor_pool_, texture_set_layout_, image_view,
sampler, width, height);
}
void VulkanImmediateDrawer::UpdateTexture(ImmediateTexture* texture, void VulkanImmediateDrawer::UpdateTexture(ImmediateTexture* texture,
const uint8_t* data) { const uint8_t* data) {
static_cast<VulkanImmediateTexture*>(texture)->Upload(data); static_cast<VulkanImmediateTexture*>(texture)->Upload(data);
@ -672,9 +747,6 @@ void VulkanImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {
void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) { void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) {
auto swap_chain = context_->swap_chain(); auto swap_chain = context_->swap_chain();
if (draw.primitive_type != ImmediatePrimitiveType::kTriangles) {
return;
}
switch (draw.primitive_type) { switch (draw.primitive_type) {
case ImmediatePrimitiveType::kLines: case ImmediatePrimitiveType::kLines:
vkCmdBindPipeline(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindPipeline(current_cmd_buffer_, VK_PIPELINE_BIND_POINT_GRAPHICS,
@ -689,6 +761,10 @@ void VulkanImmediateDrawer::Draw(const ImmediateDraw& draw) {
// Setup texture binding. // Setup texture binding.
auto texture = reinterpret_cast<VulkanImmediateTexture*>(draw.texture_handle); auto texture = reinterpret_cast<VulkanImmediateTexture*>(draw.texture_handle);
if (texture) { if (texture) {
if (texture->layout() != VK_IMAGE_LAYOUT_GENERAL) {
texture->TransitionLayout(current_cmd_buffer_, VK_IMAGE_LAYOUT_GENERAL);
}
auto texture_set = texture->descriptor_set(); auto texture_set = texture->descriptor_set();
vkCmdBindDescriptorSets(current_cmd_buffer_, vkCmdBindDescriptorSets(current_cmd_buffer_,
VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout_,

View File

@ -32,6 +32,10 @@ class VulkanImmediateDrawer : public ImmediateDrawer {
ImmediateTextureFilter filter, ImmediateTextureFilter filter,
bool repeat, bool repeat,
const uint8_t* data) override; const uint8_t* data) override;
std::unique_ptr<ImmediateTexture> WrapTexture(VkImageView image_view,
VkSampler sampler,
uint32_t width,
uint32_t height);
void UpdateTexture(ImmediateTexture* texture, const uint8_t* data) override; void UpdateTexture(ImmediateTexture* texture, const uint8_t* data) override;
void Begin(int render_target_width, int render_target_height) override; void Begin(int render_target_width, int render_target_height) override;

View File

@ -187,6 +187,10 @@ bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) {
vkAllocateCommandBuffers(*device_, &cmd_buffer_info, &render_cmd_buffer_); vkAllocateCommandBuffers(*device_, &cmd_buffer_info, &render_cmd_buffer_);
CheckResult(err, "vkCreateCommandBuffer"); CheckResult(err, "vkCreateCommandBuffer");
// Create another command buffer that handles image copies.
err = vkAllocateCommandBuffers(*device_, &cmd_buffer_info, &copy_cmd_buffer_);
CheckResult(err, "vkCreateCommandBuffer");
// Create the render pass used to draw to the swap chain. // Create the render pass used to draw to the swap chain.
// The actual framebuffer attached will depend on which image we are drawing // The actual framebuffer attached will depend on which image we are drawing
// into. // into.
@ -194,7 +198,7 @@ bool VulkanSwapChain::Initialize(VkSurfaceKHR surface) {
color_attachment.flags = 0; color_attachment.flags = 0;
color_attachment.format = surface_format_; color_attachment.format = surface_format_;
color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; color_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; // CLEAR;
color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
@ -388,6 +392,7 @@ bool VulkanSwapChain::Begin() {
// Reset all command buffers. // Reset all command buffers.
vkResetCommandBuffer(render_cmd_buffer_, 0); vkResetCommandBuffer(render_cmd_buffer_, 0);
vkResetCommandBuffer(copy_cmd_buffer_, 0);
auto& current_buffer = buffers_[current_buffer_index_]; auto& current_buffer = buffers_[current_buffer_index_];
// Build the command buffer that will execute all queued rendering buffers. // Build the command buffer that will execute all queued rendering buffers.
@ -399,14 +404,18 @@ bool VulkanSwapChain::Begin() {
err = vkBeginCommandBuffer(render_cmd_buffer_, &begin_info); err = vkBeginCommandBuffer(render_cmd_buffer_, &begin_info);
CheckResult(err, "vkBeginCommandBuffer"); CheckResult(err, "vkBeginCommandBuffer");
// Transition the image to a format we can render to. // Start recording the copy command buffer as well.
err = vkBeginCommandBuffer(copy_cmd_buffer_, &begin_info);
CheckResult(err, "vkBeginCommandBuffer");
// Transition the image to a format we can copy to.
VkImageMemoryBarrier pre_image_memory_barrier; VkImageMemoryBarrier pre_image_memory_barrier;
pre_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; pre_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
pre_image_memory_barrier.pNext = nullptr; pre_image_memory_barrier.pNext = nullptr;
pre_image_memory_barrier.srcAccessMask = 0; pre_image_memory_barrier.srcAccessMask = 0;
pre_image_memory_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; pre_image_memory_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
pre_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; pre_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
pre_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; pre_image_memory_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
pre_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; pre_image_memory_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
pre_image_memory_barrier.image = current_buffer.image; pre_image_memory_barrier.image = current_buffer.image;
@ -416,23 +425,37 @@ bool VulkanSwapChain::Begin() {
pre_image_memory_barrier.subresourceRange.levelCount = 1; pre_image_memory_barrier.subresourceRange.levelCount = 1;
pre_image_memory_barrier.subresourceRange.baseArrayLayer = 0; pre_image_memory_barrier.subresourceRange.baseArrayLayer = 0;
pre_image_memory_barrier.subresourceRange.layerCount = 1; pre_image_memory_barrier.subresourceRange.layerCount = 1;
vkCmdPipelineBarrier(copy_cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &pre_image_memory_barrier);
// First: Issue a command to clear the render target.
VkImageSubresourceRange clear_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
VkClearColorValue clear_color;
clear_color.float32[0] = 238 / 255.0f;
clear_color.float32[1] = 238 / 255.0f;
clear_color.float32[2] = 238 / 255.0f;
clear_color.float32[3] = 1.0f;
if (FLAGS_vulkan_random_clear_color) {
clear_color.float32[0] =
rand() / static_cast<float>(RAND_MAX); // NOLINT(runtime/threadsafe_fn)
clear_color.float32[1] = 1.0f;
clear_color.float32[2] = 0.0f;
}
vkCmdClearColorImage(copy_cmd_buffer_, current_buffer.image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1,
&clear_range);
// Transition the image to a color attachment target for drawing.
pre_image_memory_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
pre_image_memory_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
pre_image_memory_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
pre_image_memory_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
vkCmdPipelineBarrier(render_cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vkCmdPipelineBarrier(render_cmd_buffer_, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &pre_image_memory_barrier); nullptr, 1, &pre_image_memory_barrier);
// Begin render pass. // Begin render pass.
VkClearValue color_clear_value;
color_clear_value.color.float32[0] = 238 / 255.0f;
color_clear_value.color.float32[1] = 238 / 255.0f;
color_clear_value.color.float32[2] = 238 / 255.0f;
color_clear_value.color.float32[3] = 1.0f;
if (FLAGS_vulkan_random_clear_color) {
color_clear_value.color.float32[0] =
rand() / static_cast<float>(RAND_MAX); // NOLINT(runtime/threadsafe_fn)
color_clear_value.color.float32[1] = 1.0f;
color_clear_value.color.float32[2] = 0.0f;
}
VkClearValue clear_values[] = {color_clear_value};
VkRenderPassBeginInfo render_pass_begin_info; VkRenderPassBeginInfo render_pass_begin_info;
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin_info.pNext = nullptr; render_pass_begin_info.pNext = nullptr;
@ -442,9 +465,8 @@ bool VulkanSwapChain::Begin() {
render_pass_begin_info.renderArea.offset.y = 0; render_pass_begin_info.renderArea.offset.y = 0;
render_pass_begin_info.renderArea.extent.width = surface_width_; render_pass_begin_info.renderArea.extent.width = surface_width_;
render_pass_begin_info.renderArea.extent.height = surface_height_; render_pass_begin_info.renderArea.extent.height = surface_height_;
render_pass_begin_info.clearValueCount = render_pass_begin_info.clearValueCount = 0;
static_cast<uint32_t>(xe::countof(clear_values)); render_pass_begin_info.pClearValues = nullptr;
render_pass_begin_info.pClearValues = clear_values;
vkCmdBeginRenderPass(render_cmd_buffer_, &render_pass_begin_info, vkCmdBeginRenderPass(render_cmd_buffer_, &render_pass_begin_info,
VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS); VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
@ -458,6 +480,7 @@ bool VulkanSwapChain::End() {
vkCmdEndRenderPass(render_cmd_buffer_); vkCmdEndRenderPass(render_cmd_buffer_);
// Transition the image to a format the presentation engine can source from. // Transition the image to a format the presentation engine can source from.
// FIXME: Do we need more synchronization here between the copy buffer?
VkImageMemoryBarrier post_image_memory_barrier; VkImageMemoryBarrier post_image_memory_barrier;
post_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; post_image_memory_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
post_image_memory_barrier.pNext = nullptr; post_image_memory_barrier.pNext = nullptr;
@ -483,14 +506,20 @@ bool VulkanSwapChain::End() {
auto err = vkEndCommandBuffer(render_cmd_buffer_); auto err = vkEndCommandBuffer(render_cmd_buffer_);
CheckResult(err, "vkEndCommandBuffer"); CheckResult(err, "vkEndCommandBuffer");
err = vkEndCommandBuffer(copy_cmd_buffer_);
CheckResult(err, "vkEndCommandBuffer");
VkCommandBuffer command_buffers[] = {copy_cmd_buffer_, render_cmd_buffer_};
// Submit rendering. // Submit rendering.
VkSubmitInfo render_submit_info; VkSubmitInfo render_submit_info;
render_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; render_submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
render_submit_info.pNext = nullptr; render_submit_info.pNext = nullptr;
render_submit_info.waitSemaphoreCount = 0; render_submit_info.waitSemaphoreCount = 0;
render_submit_info.pWaitSemaphores = nullptr; render_submit_info.pWaitSemaphores = nullptr;
render_submit_info.commandBufferCount = 1; render_submit_info.commandBufferCount =
render_submit_info.pCommandBuffers = &render_cmd_buffer_; static_cast<uint32_t>(xe::countof(command_buffers));
render_submit_info.pCommandBuffers = command_buffers;
render_submit_info.signalSemaphoreCount = 0; render_submit_info.signalSemaphoreCount = 0;
render_submit_info.pSignalSemaphores = nullptr; render_submit_info.pSignalSemaphores = nullptr;
{ {

View File

@ -35,11 +35,16 @@ class VulkanSwapChain {
uint32_t surface_width() const { return surface_width_; } uint32_t surface_width() const { return surface_width_; }
uint32_t surface_height() const { return surface_height_; } uint32_t surface_height() const { return surface_height_; }
VkImage surface_image() const {
return buffers_[current_buffer_index_].image;
}
// Render pass used for compositing. // Render pass used for compositing.
VkRenderPass render_pass() const { return render_pass_; } VkRenderPass render_pass() const { return render_pass_; }
// Render command buffer, active inside the render pass from Begin to End. // Render command buffer, active inside the render pass from Begin to End.
VkCommandBuffer render_cmd_buffer() const { return render_cmd_buffer_; } VkCommandBuffer render_cmd_buffer() const { return render_cmd_buffer_; }
// Copy commands, ran before the render command buffer.
VkCommandBuffer copy_cmd_buffer() const { return copy_cmd_buffer_; }
// Initializes the swap chain with the given WSI surface. // Initializes the swap chain with the given WSI surface.
bool Initialize(VkSurfaceKHR surface); bool Initialize(VkSurfaceKHR surface);
@ -74,6 +79,7 @@ class VulkanSwapChain {
uint32_t surface_height_ = 0; uint32_t surface_height_ = 0;
VkFormat surface_format_ = VK_FORMAT_UNDEFINED; VkFormat surface_format_ = VK_FORMAT_UNDEFINED;
VkCommandPool cmd_pool_ = nullptr; VkCommandPool cmd_pool_ = nullptr;
VkCommandBuffer copy_cmd_buffer_ = nullptr;
VkCommandBuffer render_cmd_buffer_ = nullptr; VkCommandBuffer render_cmd_buffer_ = nullptr;
VkRenderPass render_pass_ = nullptr; VkRenderPass render_pass_ = nullptr;
VkSemaphore image_available_semaphore_ = nullptr; VkSemaphore image_available_semaphore_ = nullptr;

View File

@ -25,6 +25,30 @@ namespace xe {
namespace ui { namespace ui {
namespace vulkan { namespace vulkan {
class Fence {
public:
Fence(VkDevice device) : device_(device) {
VkFenceCreateInfo fence_info;
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fence_info.pNext = nullptr;
fence_info.flags = 0;
vkCreateFence(device, &fence_info, nullptr, &fence_);
}
~Fence() {
vkDestroyFence(device_, fence_, nullptr);
fence_ = nullptr;
}
VkResult status() const { return vkGetFenceStatus(device_, fence_); }
VkFence fence() const { return fence_; }
operator VkFence() const { return fence_; }
private:
VkDevice device_;
VkFence fence_ = nullptr;
};
struct Version { struct Version {
uint32_t major; uint32_t major;
uint32_t minor; uint32_t minor;

View File

@ -1166,6 +1166,7 @@ void Builder::createMemoryBarrier(unsigned executionScope, unsigned memorySemant
// An opcode that has one operands, a result id, and a type // An opcode that has one operands, a result id, and a type
Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand) Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand)
{ {
assert(operand != 0);
Instruction* op = new Instruction(getUniqueId(), typeId, opCode); Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
op->addIdOperand(operand); op->addIdOperand(operand);
buildPoint->addInstruction(std::unique_ptr<Instruction>(op)); buildPoint->addInstruction(std::unique_ptr<Instruction>(op));
@ -1175,6 +1176,8 @@ Id Builder::createUnaryOp(Op opCode, Id typeId, Id operand)
Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right) Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right)
{ {
assert(left != 0);
assert(right != 0);
Instruction* op = new Instruction(getUniqueId(), typeId, opCode); Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
op->addIdOperand(left); op->addIdOperand(left);
op->addIdOperand(right); op->addIdOperand(right);
@ -1185,6 +1188,9 @@ Id Builder::createBinOp(Op opCode, Id typeId, Id left, Id right)
Id Builder::createTriOp(Op opCode, Id typeId, Id op1, Id op2, Id op3) Id Builder::createTriOp(Op opCode, Id typeId, Id op1, Id op2, Id op3)
{ {
assert(op1 != 0);
assert(op2 != 0);
assert(op3 != 0);
Instruction* op = new Instruction(getUniqueId(), typeId, opCode); Instruction* op = new Instruction(getUniqueId(), typeId, opCode);
op->addIdOperand(op1); op->addIdOperand(op1);
op->addIdOperand(op2); op->addIdOperand(op2);

View File

@ -93,6 +93,8 @@ public:
return id; return id;
} }
Module* getModule() { return &module; }
// For creating new types (will return old type if the requested one was already made). // For creating new types (will return old type if the requested one was already made).
Id makeVoidType(); Id makeVoidType();
Id makeBoolType(); Id makeBoolType();
@ -517,6 +519,7 @@ public:
void createBranch(Block* block); void createBranch(Block* block);
void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock); void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock);
void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control); void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control);
void createSelectionMerge(Block* mergeBlock, unsigned int control);
protected: protected:
Id makeIntConstant(Id typeId, unsigned value, bool specConstant); Id makeIntConstant(Id typeId, unsigned value, bool specConstant);
@ -527,7 +530,6 @@ public:
void transferAccessChainSwizzle(bool dynamic); void transferAccessChainSwizzle(bool dynamic);
void simplifyAccessChainSwizzle(); void simplifyAccessChainSwizzle();
void createAndSetNoPredecessorBlock(const char*); void createAndSetNoPredecessorBlock(const char*);
void createSelectionMerge(Block* mergeBlock, unsigned int control);
void dumpInstructions(std::vector<unsigned int>&, const std::vector<std::unique_ptr<Instruction> >&) const; void dumpInstructions(std::vector<unsigned int>&, const std::vector<std::unique_ptr<Instruction> >&) const;
SourceLanguage source; SourceLanguage source;

View File

@ -180,6 +180,11 @@ public:
void addInstruction(std::unique_ptr<Instruction> inst); void addInstruction(std::unique_ptr<Instruction> inst);
void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);} void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);}
void addLocalVariable(std::unique_ptr<Instruction> inst) { localVariables.push_back(std::move(inst)); } void addLocalVariable(std::unique_ptr<Instruction> inst) { localVariables.push_back(std::move(inst)); }
void insertInstruction(size_t pos, std::unique_ptr<Instruction> inst);
size_t getInstructionCount() { return instructions.size(); }
Instruction* getInstruction(size_t i) { return instructions[i].get(); }
void removeInstruction(size_t i) { instructions.erase(instructions.begin() + i); }
const std::vector<Block*>& getPredecessors() const { return predecessors; } const std::vector<Block*>& getPredecessors() const { return predecessors; }
const std::vector<Block*>& getSuccessors() const { return successors; } const std::vector<Block*>& getSuccessors() const { return successors; }
void setUnreachable() { unreachable = true; } void setUnreachable() { unreachable = true; }
@ -200,6 +205,10 @@ public:
bool isTerminated() const bool isTerminated() const
{ {
if (instructions.size() == 0) {
return false;
}
switch (instructions.back()->getOpCode()) { switch (instructions.back()->getOpCode()) {
case OpBranch: case OpBranch:
case OpBranchConditional: case OpBranchConditional:
@ -215,6 +224,7 @@ public:
void dump(std::vector<unsigned int>& out) const void dump(std::vector<unsigned int>& out) const
{ {
// OpLabel
instructions[0]->dump(out); instructions[0]->dump(out);
for (int i = 0; i < (int)localVariables.size(); ++i) for (int i = 0; i < (int)localVariables.size(); ++i)
localVariables[i]->dump(out); localVariables[i]->dump(out);
@ -222,7 +232,51 @@ public:
instructions[i]->dump(out); instructions[i]->dump(out);
} }
protected: // Moves all instructions from a target block into this block, and removes
// the target block from our list of successors.
// This function assumes this block unconditionally branches to the target
// block directly.
void merge(Block* target_block) {
if (isTerminated()) {
instructions.erase(instructions.end() - 1);
}
// Find the target block in our successors first.
for (auto it = successors.begin(); it != successors.end(); ++it) {
if (*it == target_block) {
it = successors.erase(it);
break;
}
}
// Add target block's successors to our successors.
successors.insert(successors.end(), target_block->successors.begin(),
target_block->successors.end());
// For each successor, replace the target block in their predecessors with
// us.
for (auto block : successors) {
std::replace(block->predecessors.begin(), block->predecessors.end(),
target_block, this);
}
// Move instructions from target block into this block.
for (auto it = target_block->instructions.begin();
it != target_block->instructions.end();) {
if ((*it)->getOpCode() == spv::Op::OpLabel) {
++it;
continue;
}
instructions.push_back(std::move(*it));
it = target_block->instructions.erase(it);
}
target_block->predecessors.clear();
target_block->successors.clear();
}
protected:
Block(const Block&); Block(const Block&);
Block& operator=(Block&); Block& operator=(Block&);
@ -275,6 +329,17 @@ public:
Module& getParent() const { return parent; } Module& getParent() const { return parent; }
Block* getEntryBlock() const { return blocks.front(); } Block* getEntryBlock() const { return blocks.front(); }
Block* getLastBlock() const { return blocks.back(); } Block* getLastBlock() const { return blocks.back(); }
Block* findBlockById(Id id)
{
for (auto block : blocks) {
if (block->getId() == id) {
return block;
}
}
return nullptr;
}
std::vector<Block*>& getBlocks() { return blocks; }
void addLocalVariable(std::unique_ptr<Instruction> inst); void addLocalVariable(std::unique_ptr<Instruction> inst);
Id getReturnType() const { return functionInstruction.getTypeId(); } Id getReturnType() const { return functionInstruction.getTypeId(); }
void dump(std::vector<unsigned int>& out) const void dump(std::vector<unsigned int>& out) const
@ -315,6 +380,8 @@ public:
} }
void addFunction(Function *fun) { functions.push_back(fun); } void addFunction(Function *fun) { functions.push_back(fun); }
const std::vector<Function*>& getFunctions() const { return functions; }
std::vector<Function*>& getFunctions() { return functions; }
void mapInstruction(Instruction *instruction) void mapInstruction(Instruction *instruction)
{ {
@ -398,6 +465,14 @@ __inline void Block::addInstruction(std::unique_ptr<Instruction> inst)
parent.getParent().mapInstruction(raw_instruction); parent.getParent().mapInstruction(raw_instruction);
} }
__inline void Block::insertInstruction(size_t pos, std::unique_ptr<Instruction> inst) {
Instruction* raw_instruction = inst.get();
instructions.insert(instructions.begin() + pos, std::move(inst));
raw_instruction->setBlock(this);
if (raw_instruction->getResultId())
parent.getParent().mapInstruction(raw_instruction);
}
}; // end spv namespace }; // end spv namespace
#endif // spvIR_H #endif // spvIR_H

View File

@ -13,9 +13,9 @@ project("spirv-tools")
"spirv-tools/include", "spirv-tools/include",
}) })
files({ files({
"spirv-tools/external/include/headers/GLSL.std.450.h", "spirv-tools/include/spirv/GLSL.std.450.h",
"spirv-tools/external/include/headers/OpenCL.std.h", "spirv-tools/include/spirv/OpenCL.std.h",
"spirv-tools/external/include/headers/spirv.h", "spirv-tools/include/spirv/spirv.h",
"spirv-tools/include/spirv-tools/libspirv.h", "spirv-tools/include/spirv-tools/libspirv.h",
"spirv-tools/source/assembly_grammar.cpp", "spirv-tools/source/assembly_grammar.cpp",
"spirv-tools/source/assembly_grammar.h", "spirv-tools/source/assembly_grammar.h",

View File

@ -642,8 +642,7 @@ class GenSpirvCommand(Command):
print('Generating SPIR-V binaries...') print('Generating SPIR-V binaries...')
print('') print('')
# TODO(benvanik): actually find vulkan SDK. Env var? etc? vulkan_sdk_path = os.environ['VULKAN_SDK']
vulkan_sdk_path = 'C:\\VulkanSDK\\1.0.3.1'
vulkan_bin_path = os.path.join(vulkan_sdk_path, 'bin') vulkan_bin_path = os.path.join(vulkan_sdk_path, 'bin')
glslang = os.path.join(vulkan_bin_path, 'glslangValidator') glslang = os.path.join(vulkan_bin_path, 'glslangValidator')
spirv_dis = os.path.join(vulkan_bin_path, 'spirv-dis') spirv_dis = os.path.join(vulkan_bin_path, 'spirv-dis')