From d72610ba1b8b9a8f09e95849d59dbf3d62819ffa Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 7 Mar 2015 10:17:09 -0800 Subject: [PATCH] Custom blitter for final resolve. --- src/xenia/gpu/gl4/blitter.cc | 198 ++++++++++++++++++++++ src/xenia/gpu/gl4/blitter.h | 57 +++++++ src/xenia/gpu/gl4/command_processor.cc | 13 +- src/xenia/gpu/gl4/command_processor.h | 4 +- src/xenia/gpu/gl4/gl4_graphics_system.cc | 9 +- src/xenia/gpu/gl4/gl4_profiler_display.cc | 14 +- src/xenia/gpu/gl4/gl_context.cc | 15 +- src/xenia/gpu/gl4/gl_context.h | 6 + src/xenia/gpu/gl4/sources.gypi | 2 + src/xenia/gpu/gl4/texture_cache.cc | 18 +- src/xenia/gpu/gl4/texture_cache.h | 6 +- 11 files changed, 307 insertions(+), 35 deletions(-) create mode 100644 src/xenia/gpu/gl4/blitter.cc create mode 100644 src/xenia/gpu/gl4/blitter.h diff --git a/src/xenia/gpu/gl4/blitter.cc b/src/xenia/gpu/gl4/blitter.cc new file mode 100644 index 000000000..74f6d032a --- /dev/null +++ b/src/xenia/gpu/gl4/blitter.cc @@ -0,0 +1,198 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/gl4/blitter.h" + +#include + +#include "poly/assert.h" +#include "poly/math.h" + +namespace xe { +namespace gpu { +namespace gl4 { + +extern "C" GLEWContext* glewGetContext(); +extern "C" WGLEWContext* wglewGetContext(); + +Blitter::Blitter() + : vertex_program_(0), + fragment_program_(0), + pipeline_(0), + vbo_(0), + vao_(0), + nearest_sampler_(0), + linear_sampler_(0) {} + +Blitter::~Blitter() = default; + +bool Blitter::Initialize() { + const std::string header = + "\n\ +#version 450 \n\ +#extension GL_ARB_explicit_uniform_location : require \n\ +#extension GL_ARB_shading_language_420pack : require \n\ +precision highp float; \n\ +precision highp int; \n\ +layout(std140, column_major) uniform; \n\ +layout(std430, column_major) buffer; \n\ +struct VertexData { \n\ +vec2 uv; \n\ +}; \n\ +"; + const std::string vs_source = header + + "\n\ +layout(location = 0) uniform vec4 src_uv_params; \n\ +out gl_PerVertex { \n\ + vec4 gl_Position; \n\ + float gl_PointSize; \n\ + float gl_ClipDistance[]; \n\ +}; \n\ +struct VertexFetch { \n\ +vec2 pos; \n\ +};\n\ +layout(location = 0) in VertexFetch vfetch; \n\ +layout(location = 0) out VertexData vtx; \n\ +void main() { \n\ + gl_Position = vec4(vfetch.pos.xy * vec2(2.0, 2.0) - vec2(1.0, 1.0), 0.0, 1.0); \n\ + vtx.uv = vfetch.pos.xy * src_uv_params.zw + src_uv_params.xy; \n\ +} \n\ +"; + const std::string fs_source = header + + "\n\ +layout(location = 1) uniform sampler2D src_texture; \n\ +layout(location = 0) in VertexData vtx; \n\ +layout(location = 0) out vec4 oC; \n\ +void main() { \n\ +vec4 color = texture(src_texture, vtx.uv); \n\ +oC = color; \n\ +} \n\ +"; + + auto vs_source_str = vs_source.c_str(); + vertex_program_ = glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &vs_source_str); + auto fs_source_str = fs_source.c_str(); + fragment_program_ = + glCreateShaderProgramv(GL_FRAGMENT_SHADER, 1, &fs_source_str); + char log[2048]; + GLsizei log_length; + glGetProgramInfoLog(vertex_program_, 2048, &log_length, log); + glCreateProgramPipelines(1, &pipeline_); + glUseProgramStages(pipeline_, GL_VERTEX_SHADER_BIT, vertex_program_); + glUseProgramStages(pipeline_, GL_FRAGMENT_SHADER_BIT, fragment_program_); + + glCreateBuffers(1, &vbo_); + static const GLfloat vbo_data[] = { + 0, 0, 1, 0, 0, 1, 1, 1, + }; + glNamedBufferStorage(vbo_, sizeof(vbo_data), vbo_data, 0); + + glCreateVertexArrays(1, &vao_); + glEnableVertexArrayAttrib(vao_, 0); + glVertexArrayAttribBinding(vao_, 0, 0); + glVertexArrayAttribFormat(vao_, 0, 2, GL_FLOAT, GL_FALSE, 0); + glVertexArrayVertexBuffer(vao_, 0, vbo_, 0, sizeof(GLfloat) * 2); + + glCreateSamplers(1, &nearest_sampler_); + glSamplerParameteri(nearest_sampler_, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(nearest_sampler_, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(nearest_sampler_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(nearest_sampler_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glCreateSamplers(1, &linear_sampler_); + glSamplerParameteri(linear_sampler_, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(linear_sampler_, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(linear_sampler_, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(linear_sampler_, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + return true; +} + +void Blitter::Shutdown() { + if (vertex_program_) { + glDeleteProgram(vertex_program_); + } + if (fragment_program_) { + glDeleteProgram(fragment_program_); + } + if (pipeline_) { + glDeleteProgramPipelines(1, &pipeline_); + } + if (vbo_) { + glDeleteBuffers(1, &vbo_); + } + if (vao_) { + glDeleteVertexArrays(1, &vao_); + } + if (nearest_sampler_) { + glDeleteSamplers(1, &nearest_sampler_); + } + if (linear_sampler_) { + glDeleteSamplers(1, &linear_sampler_); + } +} + +void Blitter::Draw(GLuint src_texture, uint32_t src_x, uint32_t src_y, + uint32_t src_width, uint32_t src_height, GLenum filter) { + glDisablei(GL_BLEND, 0); + glDisable(GL_DEPTH_TEST); + glBindProgramPipeline(pipeline_); + glBindVertexArray(vao_); + glBindTextures(0, 1, &src_texture); + switch (filter) { + default: + case GL_NEAREST: + glBindSampler(0, nearest_sampler_); + break; + case GL_LINEAR: + glBindSampler(0, linear_sampler_); + break; + } + + // TODO(benvanik): avoid this? + GLint src_texture_width; + glGetTextureLevelParameteriv(src_texture, 0, GL_TEXTURE_WIDTH, + &src_texture_width); + GLint src_texture_height; + glGetTextureLevelParameteriv(src_texture, 0, GL_TEXTURE_HEIGHT, + &src_texture_height); + glProgramUniform4f(vertex_program_, 0, src_x / float(src_texture_width), + src_y / float(src_texture_height), + src_width / float(src_texture_width), + src_height / float(src_texture_height)); + + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glBindProgramPipeline(0); + glBindVertexArray(0); + GLuint zero = 0; + glBindTextures(0, 1, &zero); + glBindSampler(0, 0); +} + +void Blitter::BlitTexture2D(GLuint src_texture, uint32_t src_x, uint32_t src_y, + uint32_t src_width, uint32_t src_height, + uint32_t dest_x, uint32_t dest_y, + uint32_t dest_width, uint32_t dest_height, + GLenum filter) { + glViewport(dest_x, dest_y, dest_width, dest_height); + Draw(src_texture, src_x, src_y, src_width, src_height, filter); +} + +void Blitter::CopyTexture2D(GLuint src_texture, uint32_t src_x, uint32_t src_y, + uint32_t src_width, uint32_t src_height, + uint32_t dest_texture, uint32_t dest_x, + uint32_t dest_y, uint32_t dest_width, + uint32_t dest_height, GLenum filter) { + glViewport(dest_x, dest_y, dest_width, dest_height); + // +} + +} // namespace gl4 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/gl4/blitter.h b/src/xenia/gpu/gl4/blitter.h new file mode 100644 index 000000000..2a2f72fde --- /dev/null +++ b/src/xenia/gpu/gl4/blitter.h @@ -0,0 +1,57 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_GL4_BLITTER_H_ +#define XENIA_GPU_GL4_BLITTER_H_ + +#include + +#include "third_party/GL/glew.h" +#include "third_party/GL/wglew.h" + +namespace xe { +namespace gpu { +namespace gl4 { + +class Blitter { + public: + Blitter(); + ~Blitter(); + + bool Initialize(); + void Shutdown(); + + void BlitTexture2D(GLuint src_texture, uint32_t src_x, uint32_t src_y, + uint32_t src_width, uint32_t src_height, uint32_t dest_x, + uint32_t dest_y, uint32_t dest_width, uint32_t dest_height, + GLenum filter); + + void CopyTexture2D(GLuint src_texture, uint32_t src_x, uint32_t src_y, + uint32_t src_width, uint32_t src_height, + uint32_t dest_texture, uint32_t dest_x, uint32_t dest_y, + uint32_t dest_width, uint32_t dest_height, GLenum filter); + + private: + void Draw(GLuint src_texture, uint32_t src_x, uint32_t src_y, + uint32_t src_width, uint32_t src_height, GLenum filter); + + GLuint vertex_program_; + GLuint fragment_program_; + GLuint pipeline_; + GLuint vbo_; + GLuint vao_; + GLuint nearest_sampler_; + GLuint linear_sampler_; +}; + +} // namespace gl4 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_GL4_BLITTER_H_ diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 9720c819a..10aee7288 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -75,6 +75,7 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) active_vertex_shader_(nullptr), active_pixel_shader_(nullptr), active_framebuffer_(nullptr), + last_framebuffer_texture_(0), point_list_geometry_program_(0), rect_list_geometry_program_(0), quad_list_geometry_program_(0), @@ -566,13 +567,7 @@ void CommandProcessor::IssueSwap() { // TODO(benvanik): handle dirty cases (resolved to sysmem, touched). // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // HACK: just use whatever our current framebuffer is. - if (active_framebuffer_) { - swap_params.framebuffer = active_framebuffer_->framebuffer; - // TODO(benvanik): pick the right one? - swap_params.attachment = GL_COLOR_ATTACHMENT0; - } else { - swap_params.framebuffer = 0; - } + swap_params.framebuffer_texture = last_framebuffer_texture_; // Guess frontbuffer dimensions. // Command buffer seems to set these right before the XE_SWAP. @@ -2513,7 +2508,7 @@ bool CommandProcessor::IssueCopy() { glNamedFramebufferReadBuffer(source_framebuffer->framebuffer, GL_COLOR_ATTACHMENT0 + copy_src_select); // TODO(benvanik): RAW copy. - texture_cache_.CopyReadBufferTexture( + last_framebuffer_texture_ = texture_cache_.CopyReadBufferTexture( copy_dest_base, x, y, w, h, ColorFormatToTextureFormat(copy_dest_format), copy_dest_swap ? true : false); @@ -2539,7 +2534,7 @@ bool CommandProcessor::IssueCopy() { GL_COLOR_ATTACHMENT0 + copy_src_select); // Either copy the readbuffer into an existing texture or create a new // one in the cache so we can service future upload requests. - texture_cache_.CopyReadBufferTexture( + last_framebuffer_texture_ = texture_cache_.CopyReadBufferTexture( copy_dest_base, x, y, w, h, ColorFormatToTextureFormat(copy_dest_format), copy_dest_swap ? true : false); diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index d804ea687..d9e74e8bd 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -40,8 +40,7 @@ struct SwapParameters { uint32_t width; uint32_t height; - GLuint framebuffer; - GLenum attachment; + GLuint framebuffer_texture; }; enum class SwapMode { @@ -254,6 +253,7 @@ class CommandProcessor { GL4Shader* active_vertex_shader_; GL4Shader* active_pixel_shader_; CachedFramebuffer* active_framebuffer_; + GLuint last_framebuffer_texture_; std::vector cached_framebuffers_; std::vector cached_color_render_targets_; diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.cc b/src/xenia/gpu/gl4/gl4_graphics_system.cc index 78ac91273..42af232de 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.cc +++ b/src/xenia/gpu/gl4/gl4_graphics_system.cc @@ -258,11 +258,10 @@ void GL4GraphicsSystem::SwapHandler(const SwapParameters& swap_params) { // Swap requested. Synchronously post a request to the loop so that // we do the swap in the right thread. control_->SynchronousRepaint([&]() { - glBlitNamedFramebuffer(swap_params.framebuffer, 0, swap_params.x, - swap_params.y, swap_params.x + swap_params.width, - swap_params.y + swap_params.height, 0, 0, - control_->width(), control_->height(), - GL_COLOR_BUFFER_BIT, GL_LINEAR); + control_->context()->blitter()->BlitTexture2D( + swap_params.framebuffer_texture, swap_params.x, swap_params.y, + swap_params.width, swap_params.height, 0, 0, control_->width(), + control_->height(), GL_LINEAR); }); } diff --git a/src/xenia/gpu/gl4/gl4_profiler_display.cc b/src/xenia/gpu/gl4/gl4_profiler_display.cc index a25a46206..21c442dd6 100644 --- a/src/xenia/gpu/gl4/gl4_profiler_display.cc +++ b/src/xenia/gpu/gl4/gl4_profiler_display.cc @@ -244,14 +244,14 @@ bool GL4ProfilerDisplay::SetupShaders() { const std::string header = "\n\ #version 450 \n\ -#extension GL_ARB_bindless_texture : require\n\ -#extension GL_ARB_explicit_uniform_location : require\n\ -#extension GL_ARB_shading_language_420pack : require\n\ +#extension GL_ARB_bindless_texture : require \n\ +#extension GL_ARB_explicit_uniform_location : require \n\ +#extension GL_ARB_shading_language_420pack : require \n\ precision highp float; \n\ -precision highp int;\n\ -layout(std140, column_major) uniform;\n\ -layout(std430, column_major) buffer;\n\ -struct VertexData {\n\ +precision highp int; \n\ +layout(std140, column_major) uniform; \n\ +layout(std430, column_major) buffer; \n\ +struct VertexData { \n\ vec4 color; \n\ vec2 uv; \n\ };\n\ diff --git a/src/xenia/gpu/gl4/gl_context.cc b/src/xenia/gpu/gl4/gl_context.cc index c16e15e2f..ac64739b0 100644 --- a/src/xenia/gpu/gl4/gl_context.cc +++ b/src/xenia/gpu/gl4/gl_context.cc @@ -36,7 +36,9 @@ GLContext::GLContext(HWND hwnd, HGLRC glrc) } GLContext::~GLContext() { - wglMakeCurrent(nullptr, nullptr); + MakeCurrent(); + blitter_.Shutdown(); + ClearCurrent(); if (glrc_) { wglDeleteContext(glrc_); } @@ -119,6 +121,12 @@ bool GLContext::Initialize(HWND hwnd) { SetupDebugging(); + if (!blitter_.Initialize()) { + PLOGE("Unable to initialize blitter"); + ClearCurrent(); + return false; + } + ClearCurrent(); return true; @@ -169,6 +177,11 @@ std::unique_ptr GLContext::CreateShared() { SetupDebugging(); + if (!new_context->blitter_.Initialize()) { + PLOGE("Unable to initialize blitter"); + return nullptr; + } + new_context->ClearCurrent(); return new_context; diff --git a/src/xenia/gpu/gl4/gl_context.h b/src/xenia/gpu/gl4/gl_context.h index 9ac386f53..e8b6083c2 100644 --- a/src/xenia/gpu/gl4/gl_context.h +++ b/src/xenia/gpu/gl4/gl_context.h @@ -12,6 +12,8 @@ #include +#include "xenia/gpu/gl4/blitter.h" + #include "third_party/GL/glew.h" #include "third_party/GL/wglew.h" @@ -34,6 +36,8 @@ class GLContext { bool MakeCurrent(); void ClearCurrent(); + Blitter* blitter() { return &blitter_; } + private: void SetupDebugging(); void DebugMessage(GLenum source, GLenum type, GLuint id, GLenum severity, @@ -48,6 +52,8 @@ class GLContext { GLEWContext glew_context_; WGLEWContext wglew_context_; + + Blitter blitter_; }; struct GLContextLock { diff --git a/src/xenia/gpu/gl4/sources.gypi b/src/xenia/gpu/gl4/sources.gypi index 2f5c0db72..18d991268 100644 --- a/src/xenia/gpu/gl4/sources.gypi +++ b/src/xenia/gpu/gl4/sources.gypi @@ -1,6 +1,8 @@ # Copyright 2014 Ben Vanik. All Rights Reserved. { 'sources': [ + 'blitter.cc', + 'blitter.h', 'circular_buffer.cc', 'circular_buffer.h', 'command_processor.cc', diff --git a/src/xenia/gpu/gl4/texture_cache.cc b/src/xenia/gpu/gl4/texture_cache.cc index 136b7f54a..5696391e6 100644 --- a/src/xenia/gpu/gl4/texture_cache.cc +++ b/src/xenia/gpu/gl4/texture_cache.cc @@ -526,11 +526,11 @@ TextureCache::TextureEntry* TextureCache::LookupAddress(uint32_t guest_address, return nullptr; } -void TextureCache::CopyReadBufferTexture(uint32_t guest_address, uint32_t x, - uint32_t y, uint32_t width, - uint32_t height, - TextureFormat format, - bool swap_channels) { +GLuint TextureCache::CopyReadBufferTexture(uint32_t guest_address, uint32_t x, + uint32_t y, uint32_t width, + uint32_t height, + TextureFormat format, + bool swap_channels) { // See if we have used a texture at this address before. If we have, we can // reuse it. // TODO(benvanik): better lookup matching format/etc? @@ -546,7 +546,7 @@ void TextureCache::CopyReadBufferTexture(uint32_t guest_address, uint32_t x, memory_->CancelWriteWatch(texture_entry->write_watch_handle); texture_entry->write_watch_handle = 0; } - return; + return texture_entry->handle; } // Check pending read buffer textures (for multiple resolves with no @@ -558,14 +558,14 @@ void TextureCache::CopyReadBufferTexture(uint32_t guest_address, uint32_t x, entry->height == height && entry->format == format) { // Found an existing entry - just reupload. glCopyTextureSubImage2D(entry->handle, 0, 0, 0, x, y, width, height); - return; + return entry->handle; } } const auto& config = texture_configs[uint32_t(format)]; if (config.format == GL_INVALID_ENUM) { assert_always("Unhandled destination texture format"); - return; + return 0; } // Need to create a new texture. @@ -584,7 +584,9 @@ void TextureCache::CopyReadBufferTexture(uint32_t guest_address, uint32_t x, glTextureStorage2D(entry->handle, 1, config.internal_format, width, height); glCopyTextureSubImage2D(entry->handle, 0, 0, 0, x, y, width, height); + GLuint handle = entry->handle; read_buffer_textures_.push_back(entry.release()); + return handle; } void TextureCache::EvictTexture(TextureEntry* entry) { diff --git a/src/xenia/gpu/gl4/texture_cache.h b/src/xenia/gpu/gl4/texture_cache.h index 9b3cf2adc..ac5736e83 100644 --- a/src/xenia/gpu/gl4/texture_cache.h +++ b/src/xenia/gpu/gl4/texture_cache.h @@ -57,9 +57,9 @@ class TextureCache { TextureEntryView* Demand(const TextureInfo& texture_info, const SamplerInfo& sampler_info); - void CopyReadBufferTexture(uint32_t guest_address, uint32_t x, uint32_t y, - uint32_t width, uint32_t height, - TextureFormat format, bool swap_channels); + GLuint CopyReadBufferTexture(uint32_t guest_address, uint32_t x, uint32_t y, + uint32_t width, uint32_t height, + TextureFormat format, bool swap_channels); private: struct ReadBufferTexture {