Merge pull request #531 from DrChat/gl4_shader_cache

GL4 Shader Cache
This commit is contained in:
Ben Vanik 2016-02-16 07:20:33 -08:00
commit c5de61860d
12 changed files with 350 additions and 51 deletions

View File

@ -56,7 +56,8 @@ GL4CommandProcessor::GL4CommandProcessor(GL4GraphicsSystem* graphics_system,
: CommandProcessor(graphics_system, kernel_state), : CommandProcessor(graphics_system, kernel_state),
shader_translator_(GlslShaderTranslator::Dialect::kGL45), shader_translator_(GlslShaderTranslator::Dialect::kGL45),
draw_batcher_(graphics_system_->register_file()), draw_batcher_(graphics_system_->register_file()),
scratch_buffer_(kScratchBufferCapacity, kScratchBufferAlignment) {} scratch_buffer_(kScratchBufferCapacity, kScratchBufferAlignment),
shader_cache_(&shader_translator_) {}
GL4CommandProcessor::~GL4CommandProcessor() = default; GL4CommandProcessor::~GL4CommandProcessor() = default;
@ -324,8 +325,7 @@ void GL4CommandProcessor::ShutdownContext() {
scratch_buffer_.Shutdown(); scratch_buffer_.Shutdown();
all_pipelines_.clear(); all_pipelines_.clear();
all_shaders_.clear(); shader_cache_.Reset();
shader_cache_.clear();
CommandProcessor::ShutdownContext(); CommandProcessor::ShutdownContext();
} }
@ -484,41 +484,8 @@ Shader* GL4CommandProcessor::LoadShader(ShaderType shader_type,
uint32_t guest_address, uint32_t guest_address,
const uint32_t* host_address, const uint32_t* host_address,
uint32_t dword_count) { uint32_t dword_count) {
// Hash the input memory and lookup the shader. return shader_cache_.LookupOrInsertShader(shader_type, host_address,
GL4Shader* shader_ptr = nullptr;
uint64_t hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
auto it = shader_cache_.find(hash);
if (it != shader_cache_.end()) {
// Found in the cache.
// TODO(benvanik): compare bytes? Likelyhood of collision is low.
shader_ptr = it->second;
} else {
// Not found in cache.
auto shader = std::make_unique<GL4Shader>(shader_type, hash, host_address,
dword_count); dword_count);
shader_ptr = shader.get();
shader_cache_.insert({hash, shader_ptr});
all_shaders_.emplace_back(std::move(shader));
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (shader_translator_.Translate(shader_ptr)) {
shader_ptr->Prepare();
// Dump shader files if desired.
if (!FLAGS_dump_shaders.empty()) {
shader_ptr->Dump(FLAGS_dump_shaders, "gl4");
}
} else {
XELOGE("Shader failed translation");
}
XELOGGPU("Set %s shader at %0.8X (%db):\n%s",
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
guest_address, dword_count * 4,
shader_ptr->ucode_disassembly().c_str());
}
return shader_ptr;
} }
bool GL4CommandProcessor::IssueDraw(PrimitiveType prim_type, bool GL4CommandProcessor::IssueDraw(PrimitiveType prim_type,

View File

@ -24,6 +24,7 @@
#include "xenia/gpu/command_processor.h" #include "xenia/gpu/command_processor.h"
#include "xenia/gpu/gl4/draw_batcher.h" #include "xenia/gpu/gl4/draw_batcher.h"
#include "xenia/gpu/gl4/gl4_shader.h" #include "xenia/gpu/gl4/gl4_shader.h"
#include "xenia/gpu/gl4/gl4_shader_cache.h"
#include "xenia/gpu/gl4/texture_cache.h" #include "xenia/gpu/gl4/texture_cache.h"
#include "xenia/gpu/glsl_shader_translator.h" #include "xenia/gpu/glsl_shader_translator.h"
#include "xenia/gpu/register_file.h" #include "xenia/gpu/register_file.h"
@ -131,8 +132,7 @@ class GL4CommandProcessor : public CommandProcessor {
GLuint depth_target); GLuint depth_target);
GlslShaderTranslator shader_translator_; GlslShaderTranslator shader_translator_;
std::vector<std::unique_ptr<GL4Shader>> all_shaders_; GL4ShaderCache shader_cache_;
std::unordered_map<uint64_t, GL4Shader*> shader_cache_;
CachedFramebuffer* active_framebuffer_ = nullptr; CachedFramebuffer* active_framebuffer_ = nullptr;
GLuint last_framebuffer_texture_ = 0; GLuint last_framebuffer_texture_ = 0;

View File

@ -12,3 +12,6 @@
DEFINE_bool(disable_framebuffer_readback, false, DEFINE_bool(disable_framebuffer_readback, false,
"Disable framebuffer readback."); "Disable framebuffer readback.");
DEFINE_bool(disable_textures, false, "Disable textures and use colors only."); DEFINE_bool(disable_textures, false, "Disable textures and use colors only.");
DEFINE_string(shader_cache_dir, "",
"GL4 Shader cache directory (relative to Xenia). Specify an "
"empty string to disable the cache.");

View File

@ -14,6 +14,7 @@
DECLARE_bool(disable_framebuffer_readback); DECLARE_bool(disable_framebuffer_readback);
DECLARE_bool(disable_textures); DECLARE_bool(disable_textures);
DECLARE_string(shader_cache_dir);
#define FINE_GRAINED_DRAW_SCOPES 0 #define FINE_GRAINED_DRAW_SCOPES 0

View File

@ -51,6 +51,37 @@ bool GL4Shader::Prepare() {
return success; return success;
} }
bool GL4Shader::LoadFromBinary(const uint8_t* blob, GLenum binary_format,
size_t length) {
program_ = glCreateProgram();
glProgramBinary(program_, binary_format, blob, GLsizei(length));
GLint link_status = 0;
glGetProgramiv(program_, GL_LINK_STATUS, &link_status);
if (!link_status) {
// Failed to link. Not fatal - just clean up so we can get generated later.
XELOGD("GL4Shader::LoadFromBinary failed. Log:\n%s",
GetProgramInfoLog().c_str());
glDeleteProgram(program_);
program_ = 0;
return false;
}
// Build static vertex array descriptor.
if (!PrepareVertexArrayObject()) {
XELOGE("Unable to prepare vertex shader array object");
return false;
}
// Success!
host_binary_ = GetBinary();
host_disassembly_ = GetHostDisasmNV(host_binary_);
is_valid_ = true;
return true;
}
bool GL4Shader::PrepareVertexArrayObject() { bool GL4Shader::PrepareVertexArrayObject() {
glCreateVertexArrays(1, &vao_); glCreateVertexArrays(1, &vao_);
@ -211,7 +242,7 @@ std::string GL4Shader::GetProgramInfoLog() {
return log; return log;
} }
std::vector<uint8_t> GL4Shader::GetBinary() { std::vector<uint8_t> GL4Shader::GetBinary(GLenum* binary_format) {
std::vector<uint8_t> binary; std::vector<uint8_t> binary;
// Get program binary, if it's available. // Get program binary, if it's available.
@ -219,9 +250,13 @@ std::vector<uint8_t> GL4Shader::GetBinary() {
glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &binary_length); glGetProgramiv(program_, GL_PROGRAM_BINARY_LENGTH, &binary_length);
if (binary_length) { if (binary_length) {
binary.resize(binary_length); binary.resize(binary_length);
GLenum binary_format; GLenum binary_format_tmp = 0;
glGetProgramBinary(program_, binary_length, &binary_length, &binary_format, glGetProgramBinary(program_, binary_length, &binary_length,
binary.data()); &binary_format_tmp, binary.data());
if (binary_format) {
*binary_format = binary_format_tmp;
}
} }
return binary; return binary;

View File

@ -29,7 +29,9 @@ class GL4Shader : public Shader {
GLuint shader() const { return shader_; } GLuint shader() const { return shader_; }
GLuint vao() const { return vao_; } GLuint vao() const { return vao_; }
bool Prepare() override; bool Prepare();
bool LoadFromBinary(const uint8_t* blob, GLenum binary_format, size_t length);
std::vector<uint8_t> GetBinary(GLenum* binary_format = nullptr);
protected: protected:
bool PrepareVertexArrayObject(); bool PrepareVertexArrayObject();
@ -38,7 +40,6 @@ class GL4Shader : public Shader {
std::string GetShaderInfoLog(); std::string GetShaderInfoLog();
std::string GetProgramInfoLog(); std::string GetProgramInfoLog();
std::vector<uint8_t> GetBinary();
static std::string GetHostDisasmNV(const std::vector<uint8_t>& binary); static std::string GetHostDisasmNV(const std::vector<uint8_t>& binary);
GLuint program_ = 0; GLuint program_ = 0;

View File

@ -0,0 +1,187 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/gl4/gl4_shader_cache.h"
#include <cinttypes>
#include "xenia/base/filesystem.h"
#include "xenia/base/logging.h"
#include "xenia/base/mapped_memory.h"
#include "xenia/gpu/gl4/gl4_gpu_flags.h"
#include "xenia/gpu/gl4/gl4_shader.h"
#include "xenia/gpu/glsl_shader_translator.h"
#include "xenia/gpu/gpu_flags.h"
#include "third_party/xxhash/xxhash.h"
namespace xe {
namespace gpu {
namespace gl4 {
GL4ShaderCache::GL4ShaderCache(GlslShaderTranslator* shader_translator)
: shader_translator_(shader_translator) {}
GL4ShaderCache::~GL4ShaderCache() {}
void GL4ShaderCache::Reset() {
shader_map_.clear();
all_shaders_.clear();
}
GL4Shader* GL4ShaderCache::LookupOrInsertShader(ShaderType shader_type,
const uint32_t* dwords,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
GL4Shader* shader_ptr = nullptr;
uint64_t hash = XXH64(dwords, dword_count * sizeof(uint32_t), 0);
auto it = shader_map_.find(hash);
if (it != shader_map_.end()) {
// Shader has been previously loaded.
// TODO(benvanik): compare bytes? Likelihood of collision is low.
shader_ptr = it->second;
} else {
// Check filesystem cache.
shader_ptr = FindCachedShader(shader_type, hash, dwords, dword_count);
if (shader_ptr) {
// Found!
XELOGGPU("Loaded %s shader from cache (hash: %.16" PRIX64 ")",
shader_type == ShaderType::kVertex ? "vertex" : "pixel", hash);
return shader_ptr;
}
// Not found in cache - load from scratch.
auto shader =
std::make_unique<GL4Shader>(shader_type, hash, dwords, dword_count);
shader_ptr = shader.get();
shader_map_.insert({hash, shader_ptr});
all_shaders_.emplace_back(std::move(shader));
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (shader_translator_->Translate(shader_ptr)) {
shader_ptr->Prepare();
if (shader_ptr->is_valid()) {
CacheShader(shader_ptr);
XELOGGPU("Generated %s shader at 0x%.16" PRIX64 " (%db):\n%s",
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
dwords, dword_count * 4,
shader_ptr->ucode_disassembly().c_str());
}
// Dump shader files if desired.
if (!FLAGS_dump_shaders.empty()) {
shader_ptr->Dump(FLAGS_dump_shaders, "gl4");
}
} else {
XELOGE("Shader failed translation");
}
}
return shader_ptr;
}
void GL4ShaderCache::CacheShader(GL4Shader* shader) {
if (FLAGS_shader_cache_dir.empty()) {
// Cache disabled.
return;
}
GLenum binary_format = 0;
auto binary = shader->GetBinary(&binary_format);
if (binary.size() == 0) {
// No binary returned.
return;
}
auto cache_dir = xe::to_absolute_path(xe::to_wstring(FLAGS_shader_cache_dir));
xe::filesystem::CreateFolder(cache_dir);
auto filename =
cache_dir + xe::format_string(
L"%.16" PRIX64 ".%s", shader->ucode_data_hash(),
shader->type() == ShaderType::kPixel ? L"frag" : L"vert");
auto file = xe::filesystem::OpenFile(filename, "wb");
if (!file) {
// Not fatal, but not too good.
return;
}
std::vector<uint8_t> cached_shader_mem;
// Resize this vector to the final filesize (- 1 to account for dummy array
// in CachedShader)
cached_shader_mem.resize(sizeof(CachedShader) + binary.size() - 1);
auto cached_shader =
reinterpret_cast<CachedShader*>(cached_shader_mem.data());
cached_shader->magic = xe::byte_swap('XSHD');
cached_shader->version = 0; // TODO
cached_shader->shader_type = uint8_t(shader->type());
cached_shader->binary_len = uint32_t(binary.size());
cached_shader->binary_format = binary_format;
std::memcpy(cached_shader->binary, binary.data(), binary.size());
fwrite(cached_shader_mem.data(), cached_shader_mem.size(), 1, file);
fclose(file);
}
GL4Shader* GL4ShaderCache::FindCachedShader(ShaderType shader_type,
uint64_t hash,
const uint32_t* dwords,
uint32_t dword_count) {
if (FLAGS_shader_cache_dir.empty()) {
// Cache disabled.
return nullptr;
}
auto cache_dir = xe::to_absolute_path(xe::to_wstring(FLAGS_shader_cache_dir));
auto filename =
cache_dir +
xe::format_string(L"%.16" PRIX64 ".%s", hash,
shader_type == ShaderType::kPixel ? L"frag" : L"vert");
if (!xe::filesystem::PathExists(filename)) {
return nullptr;
}
// Shader is cached. Open it up.
auto map = xe::MappedMemory::Open(filename, MappedMemory::Mode::kRead);
if (!map) {
// Should not fail
assert_always();
return nullptr;
}
auto cached_shader = reinterpret_cast<CachedShader*>(map->data());
// TODO: Compare versions
if (cached_shader->magic != xe::byte_swap('XSHD')) {
return nullptr;
}
auto shader =
std::make_unique<GL4Shader>(shader_type, hash, dwords, dword_count);
// Gather the binding points.
// TODO: Make Shader do this on construction.
// TODO: Regenerate microcode disasm/etc on load.
shader_translator_->GatherAllBindingInformation(shader.get());
if (!shader->LoadFromBinary(cached_shader->binary,
cached_shader->binary_format,
cached_shader->binary_len)) {
// Failed to load from binary.
return nullptr;
}
auto shader_ptr = shader.get();
shader_map_.insert({hash, shader_ptr});
all_shaders_.emplace_back(std::move(shader));
return shader_ptr;
}
} // namespace gl4
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,60 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_GL4_SHADER_CACHE_H_
#define XENIA_GPU_GL4_SHADER_CACHE_H_
#include <cstdint>
#include <memory>
#include <unordered_map>
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
class GlslShaderTranslator;
namespace gl4 {
class GL4Shader;
class GL4ShaderCache {
public:
GL4ShaderCache(GlslShaderTranslator* shader_translator);
~GL4ShaderCache();
void Reset();
GL4Shader* LookupOrInsertShader(ShaderType shader_type,
const uint32_t* dwords, uint32_t dword_count);
private:
// Cached shader file format.
struct CachedShader {
uint32_t magic;
uint32_t version; // Version of the shader translator used.
uint8_t shader_type; // ShaderType enum
uint32_t binary_len; // Code length
uint32_t binary_format; // Binary format (from OpenGL)
uint8_t binary[1]; // Code
};
void CacheShader(GL4Shader* shader);
GL4Shader* FindCachedShader(ShaderType shader_type, uint64_t hash,
const uint32_t* dwords, uint32_t dword_count);
GlslShaderTranslator* shader_translator_ = nullptr;
std::vector<std::unique_ptr<GL4Shader>> all_shaders_;
std::unordered_map<uint64_t, GL4Shader*> shader_map_;
};
} // namespace gl4
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_GL4_SHADER_CACHE_H_

View File

@ -16,9 +16,11 @@
#include "xenia/base/math.h" #include "xenia/base/math.h"
#include "xenia/base/memory.h" #include "xenia/base/memory.h"
#include "xenia/base/string.h" #include "xenia/base/string.h"
#include "xenia/gpu/ucode.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
using namespace ucode;
Shader::Shader(ShaderType shader_type, uint64_t ucode_data_hash, Shader::Shader(ShaderType shader_type, uint64_t ucode_data_hash,
const uint32_t* ucode_dwords, size_t ucode_dword_count) const uint32_t* ucode_dwords, size_t ucode_dword_count)
@ -38,7 +40,8 @@ std::string Shader::GetTranslatedBinaryString() const {
return result; return result;
} }
void Shader::Dump(const std::string& base_path, const char* path_prefix) { std::pair<std::string, std::string> Shader::Dump(const std::string& base_path,
const char* path_prefix) {
// Ensure target path exists. // Ensure target path exists.
auto target_path = xe::to_wstring(base_path); auto target_path = xe::to_wstring(base_path);
if (!target_path.empty()) { if (!target_path.empty()) {
@ -79,6 +82,8 @@ void Shader::Dump(const std::string& base_path, const char* path_prefix) {
fwrite(ucode_data_.data(), 4, ucode_data_.size(), f); fwrite(ucode_data_.data(), 4, ucode_data_.size(), f);
fclose(f); fclose(f);
} }
return {std::string(txt_file_name), std::string(bin_file_name)};
} }
} // namespace gpu } // namespace gpu

View File

@ -502,6 +502,7 @@ class Shader {
// Microcode dwords in host endianness. // Microcode dwords in host endianness.
const std::vector<uint32_t>& ucode_data() const { return ucode_data_; } const std::vector<uint32_t>& ucode_data() const { return ucode_data_; }
uint64_t ucode_data_hash() const { return ucode_data_hash_; }
const uint32_t* ucode_dwords() const { return ucode_data_.data(); } const uint32_t* ucode_dwords() const { return ucode_data_.data(); }
size_t ucode_dword_count() const { return ucode_data_.size(); } size_t ucode_dword_count() const { return ucode_data_.size(); }
@ -547,13 +548,12 @@ class Shader {
// May be empty if the host does not support saving binaries. // May be empty if the host does not support saving binaries.
const std::vector<uint8_t>& host_binary() const { return host_binary_; } const std::vector<uint8_t>& host_binary() const { return host_binary_; }
// Prepares the shader for use in the host graphics API.
virtual bool Prepare() { return is_valid_; }
// Dumps the shader to a file in the given path based on ucode hash. // Dumps the shader to a file in the given path based on ucode hash.
// Both the ucode binary and disassembled and translated shader will be // Both the ucode binary and disassembled and translated shader will be
// written. // written.
void Dump(const std::string& base_path, const char* path_prefix); // Returns the filename of the shader and the binary.
std::pair<std::string, std::string> Dump(const std::string& base_path,
const char* path_prefix);
protected: protected:
friend class ShaderTranslator; friend class ShaderTranslator;

View File

@ -1,3 +1,4 @@
#include "shader_translator.h"
/** /**
****************************************************************************** ******************************************************************************
* Xenia : Xbox 360 Emulator Research Project * * Xenia : Xbox 360 Emulator Research Project *
@ -57,6 +58,41 @@ void ShaderTranslator::Reset() {
} }
} }
bool ShaderTranslator::GatherAllBindingInformation(Shader* shader) {
// FIXME: This is kind of silly.
Reset();
shader_type_ = shader->type();
ucode_dwords_ = shader->ucode_dwords();
ucode_dword_count_ = shader->ucode_dword_count();
uint32_t max_cf_dword_index = static_cast<uint32_t>(ucode_dword_count_);
for (uint32_t i = 0; i < max_cf_dword_index; i += 3) {
ControlFlowInstruction cf_a;
ControlFlowInstruction cf_b;
UnpackControlFlowInstructions(ucode_dwords_ + i, &cf_a, &cf_b);
if (IsControlFlowOpcodeExec(cf_a.opcode())) {
max_cf_dword_index =
std::min(max_cf_dword_index, cf_a.exec.address() * 3);
}
if (IsControlFlowOpcodeExec(cf_b.opcode())) {
max_cf_dword_index =
std::min(max_cf_dword_index, cf_b.exec.address() * 3);
}
GatherBindingInformation(cf_a);
GatherBindingInformation(cf_b);
}
shader->vertex_bindings_ = std::move(vertex_bindings_);
shader->texture_bindings_ = std::move(texture_bindings_);
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
shader->writes_color_targets_[i] = writes_color_targets_[i];
}
return true;
}
bool ShaderTranslator::Translate(Shader* shader) { bool ShaderTranslator::Translate(Shader* shader) {
Reset(); Reset();
@ -79,6 +115,7 @@ bool ShaderTranslator::Translate(Shader* shader) {
max_cf_dword_index = max_cf_dword_index =
std::min(max_cf_dword_index, cf_b.exec.address() * 3); std::min(max_cf_dword_index, cf_b.exec.address() * 3);
} }
GatherBindingInformation(cf_a); GatherBindingInformation(cf_a);
GatherBindingInformation(cf_b); GatherBindingInformation(cf_b);
} }

View File

@ -26,6 +26,9 @@ class ShaderTranslator {
public: public:
virtual ~ShaderTranslator(); virtual ~ShaderTranslator();
// Gathers all vertex/texture bindings. Implicitly called in Translate.
// TODO: Move this functionality to Shader.
bool GatherAllBindingInformation(Shader* shader);
bool Translate(Shader* shader); bool Translate(Shader* shader);
protected: protected: