/* * Copyright (C) 2011-2016 PCSX2 Dev Team * Copyright (C) 2007-2009 Gabest * * This Program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This Program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. * http://www.gnu.org/copyleft/gpl.html * */ #include "stdafx.h" #include "GSState.h" #include "GSDeviceOGL.h" #include "GLState.h" #include "GSUtil.h" #include //#define ONLY_LINES #ifdef _WIN32 #include "resource.h" #else #include "GSdxResources.h" #endif // TODO port those value into PerfMon API #ifdef ENABLE_OGL_DEBUG_MEM_BW uint64 g_real_texture_upload_byte = 0; uint64 g_vertex_upload_byte = 0; uint64 g_uniform_upload_byte = 0; #endif static const uint32 g_merge_cb_index = 10; static const uint32 g_interlace_cb_index = 11; static const uint32 g_fx_cb_index = 14; static const uint32 g_convert_index = 15; static const uint32 g_vs_cb_index = 20; static const uint32 g_ps_cb_index = 21; bool GSDeviceOGL::m_debug_gl_call = false; int GSDeviceOGL::m_shader_inst = 0; int GSDeviceOGL::m_shader_reg = 0; FILE* GSDeviceOGL::m_debug_gl_file = NULL; GSDeviceOGL::GSDeviceOGL() : m_force_texture_clear(0) , m_fbo(0) , m_fbo_read(0) , m_va(NULL) , m_apitrace(0) , m_palette_ss(0) , m_vs_cb(NULL) , m_ps_cb(NULL) , m_shader(NULL) { memset(&m_merge_obj, 0, sizeof(m_merge_obj)); memset(&m_interlace, 0, sizeof(m_interlace)); memset(&m_convert, 0, sizeof(m_convert)); memset(&m_fxaa, 0, sizeof(m_fxaa)); memset(&m_shaderfx, 0, sizeof(m_shaderfx)); memset(&m_date, 0, sizeof(m_date)); memset(&m_shadeboost, 0, sizeof(m_shadeboost)); memset(&m_om_dss, 0, sizeof(m_om_dss)); memset(&m_profiler, 0 , sizeof(m_profiler)); GLState::Clear(); m_mipmap = theApp.GetConfigI("mipmap"); if (theApp.GetConfigB("UserHacks")) m_filter = static_cast(theApp.GetConfigI("UserHacks_TriFilter")); else m_filter = TriFiltering::None; // Reset the debug file #ifdef ENABLE_OGL_DEBUG if (theApp.GetCurrentRendererType() == GSRendererType::OGL_SW) m_debug_gl_file = fopen("GSdx_opengl_debug_sw.txt","w"); else m_debug_gl_file = fopen("GSdx_opengl_debug_hw.txt","w"); #endif m_debug_gl_call = theApp.GetConfigB("debug_opengl"); m_disable_hw_gl_draw = theApp.GetConfigB("disable_hw_gl_draw"); } GSDeviceOGL::~GSDeviceOGL() { if (m_debug_gl_file) { fclose(m_debug_gl_file); m_debug_gl_file = NULL; } // If the create function wasn't called nothing to do. if (m_shader == NULL) return; GL_PUSH("GSDeviceOGL destructor"); // Clean vertex buffer state delete m_va; // Clean m_merge_obj delete m_merge_obj.cb; // Clean m_interlace delete m_interlace.cb; // Clean m_convert delete m_convert.dss; delete m_convert.dss_write; delete m_convert.cb; // Clean m_fxaa delete m_fxaa.cb; // Clean m_shaderfx delete m_shaderfx.cb; // Clean m_date delete m_date.dss; // Clean various opengl allocation glDeleteFramebuffers(1, &m_fbo); glDeleteFramebuffers(1, &m_fbo_read); // Delete HW FX delete m_vs_cb; delete m_ps_cb; glDeleteSamplers(1, &m_palette_ss); m_ps.clear(); glDeleteSamplers(countof(m_ps_ss), m_ps_ss); for (uint32 key = 0; key < countof(m_om_dss); key++) delete m_om_dss[key]; PboPool::Destroy(); // Must be done after the destruction of all shader/program objects delete m_shader; m_shader = NULL; } void GSDeviceOGL::GenerateProfilerData() { if (m_profiler.last_query < 3) { glDeleteQueries(1 << 16, m_profiler.timer_query); return; } // Wait latest quey to get valid result GLuint available = 0; while (!available) { glGetQueryObjectuiv(m_profiler.timer(), GL_QUERY_RESULT_AVAILABLE, &available); } GLuint64 time_start; GLuint64 time_end; std::vector times; double ms = 0.000001; int replay = theApp.GetConfigI("linux_replay"); int first_query = replay > 1 ? m_profiler.last_query / replay : 0; glGetQueryObjectui64v(m_profiler.timer_query[first_query], GL_QUERY_RESULT, &time_start); for (uint32 q = first_query + 1; q < m_profiler.last_query; q++) { glGetQueryObjectui64v(m_profiler.timer_query[q], GL_QUERY_RESULT, &time_end); uint64 t = time_end - time_start; times.push_back((double)t * ms); time_start = time_end; } // Latest value is often silly, just drop it times.pop_back(); glDeleteQueries(1 << 16, m_profiler.timer_query); double frames = times.size(); double mean = 0.0; double sd = 0.0; auto minmax_time = std::minmax_element(times.begin(), times.end()); for (auto t : times) mean += t; mean = mean / frames; for (auto t : times) sd += pow(t-mean, 2); sd = sqrt(sd / frames); uint32 time_repartition[16] = {0}; for (auto t : times) { uint32 slot = (uint32)(t/2.0); if (slot >= countof(time_repartition)) { slot = countof(time_repartition) - 1; } time_repartition[slot]++; } fprintf(stderr, "\nPerformance Profile for %.0f frames:\n", frames); fprintf(stderr, "Min %4.2f ms\t(%4.2f fps)\n", *minmax_time.first, 1000.0 / *minmax_time.first); fprintf(stderr, "Mean %4.2f ms\t(%4.2f fps)\n", mean, 1000.0 / mean); fprintf(stderr, "Max %4.2f ms\t(%4.2f fps)\n", *minmax_time.second, 1000.0 / *minmax_time.second); fprintf(stderr, "SD %4.2f ms\n", sd); fprintf(stderr, "\n"); fprintf(stderr, "Frame Repartition\n"); for (uint32 i = 0; i < countof(time_repartition); i ++) { fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i+1), time_repartition[i]); } FILE* csv = fopen("GSdx_profile.csv", "w"); if (csv) { for (size_t i = 0; i < times.size(); i++) { fprintf(csv, "%zu,%lf\n", i, times[i]); } fclose(csv); } } GSTexture* GSDeviceOGL::CreateSurface(int type, int w, int h, int fmt) { GL_PUSH("Create surface"); // A wrapper to call GSTextureOGL, with the different kind of parameter GSTextureOGL* t = new GSTextureOGL(type, w, h, fmt, m_fbo_read, m_mipmap > 1 || m_filter != TriFiltering::None); // NOTE: I'm not sure RenderTarget always need to be cleared. It could be costly for big upscale. // FIXME: it will be more logical to do it in FetchSurface. This code is only called at first creation // of the texture. However we could reuse a deleted texture. if (m_force_texture_clear == 0) { // Clear won't be done if the texture isn't committed. Commit the full texture to ensure // correct behavior of force clear option (debug option) t->Commit(); switch(type) { case GSTexture::RenderTarget: ClearRenderTarget(t, 0); break; case GSTexture::DepthStencil: ClearDepth(t); // No need to clear the stencil now. break; } } return t; } GSTexture* GSDeviceOGL::FetchSurface(int type, int w, int h, int format) { if (format == 0) format = (type == GSTexture::DepthStencil || type == GSTexture::SparseDepthStencil) ? GL_DEPTH32F_STENCIL8 : GL_RGBA8; GSTexture* t = GSDevice::FetchSurface(type, w, h, format); if (m_force_texture_clear) { // Clear won't be done if the texture isn't committed. Commit the full texture to ensure // correct behavior of force clear option (debug option) t->Commit(); GSVector4 red(1.0f, 0.0f, 0.0f, 1.0f); switch(type) { case GSTexture::RenderTarget: ClearRenderTarget(t, 0); break; case GSTexture::DepthStencil: ClearDepth(t); // No need to clear the stencil now. break; case GSTexture::Texture: if (m_force_texture_clear > 1) static_cast(t)->Clear((void*)&red); else if (m_force_texture_clear) static_cast(t)->Clear(NULL); break; } } return t; } bool GSDeviceOGL::Create(const std::shared_ptr &wnd) { std::vector shader; // **************************************************************** // Debug helper // **************************************************************** #ifdef ENABLE_OGL_DEBUG if (theApp.GetConfigB("debug_opengl")) { glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB); glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true); // Useless info message on Nvidia driver GLuint ids[] = {0x20004}; glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, countof(ids), ids, false); } #endif m_force_texture_clear = theApp.GetConfigI("force_texture_clear"); // WARNING it must be done after the control setup (at least on MESA) GL_PUSH("GSDeviceOGL::Create"); // **************************************************************** // Various object // **************************************************************** { GL_PUSH("GSDeviceOGL::Various"); m_shader = new GSShaderOGL(theApp.GetConfigB("debug_glsl_shader")); glGenFramebuffers(1, &m_fbo); // Always write to the first buffer OMSetFBO(m_fbo); GLenum target[1] = {GL_COLOR_ATTACHMENT0}; glDrawBuffers(1, target); OMSetFBO(0); glGenFramebuffers(1, &m_fbo_read); // Always read from the first buffer glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glReadBuffer(GL_COLOR_ATTACHMENT0); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); // Some timers to help profiling if (GLLoader::in_replayer) { glCreateQueries(GL_TIMESTAMP, 1 << 16, m_profiler.timer_query); } } // **************************************************************** // Vertex buffer state // **************************************************************** { GL_PUSH("GSDeviceOGL::Vertex Buffer"); static_assert(sizeof(GSVertexPT1) == sizeof(GSVertex), "wrong GSVertex size"); std::vector il_convert = { {0, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(0) } , {1, 2 , GL_FLOAT , GL_FALSE , sizeof(GSVertexPT1) , (const GLvoid*)(16) } , {2, 4 , GL_UNSIGNED_BYTE , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(8) } , {3, 1 , GL_FLOAT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(12) } , {4, 2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(16) } , {5, 1 , GL_UNSIGNED_INT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(20) } , {6, 2 , GL_UNSIGNED_SHORT , GL_FALSE , sizeof(GSVertex) , (const GLvoid*)(24) } , {7, 4 , GL_UNSIGNED_BYTE , GL_TRUE , sizeof(GSVertex) , (const GLvoid*)(28) } , // Only 1 byte is useful but hardware unit only support 4B }; m_va = new GSVertexBufferStateOGL(il_convert); } // **************************************************************** // Pre Generate the different sampler object // **************************************************************** { GL_PUSH("GSDeviceOGL::Sampler"); for (uint32 key = 0; key < countof(m_ps_ss); key++) { m_ps_ss[key] = CreateSampler(PSSamplerSelector(key)); } } // **************************************************************** // convert // **************************************************************** GLuint vs = 0; GLuint ps = 0; { GL_PUSH("GSDeviceOGL::Convert"); m_convert.cb = new GSUniformBufferOGL("Misc UBO", g_convert_index, sizeof(MiscConstantBuffer)); // Upload once and forget about it. // Use value of 1 when upscale multiplier is 0 for ScalingFactor, // this is to avoid doing math with 0 in shader. It helps custom res be less broken. m_misc_cb_cache.ScalingFactor = GSVector4i(std::max(1, theApp.GetConfigI("upscale_multiplier"))); m_convert.cb->cache_upload(&m_misc_cb_cache); theApp.LoadResource(IDR_CONVERT_GLSL, shader); vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, shader.data()); m_convert.vs = vs; for(size_t i = 0; i < countof(m_convert.ps); i++) { ps = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); std::string pretty_name = "Convert pipe " + std::to_string(i); m_convert.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); } PSSamplerSelector point; m_convert.pt = GetSamplerID(point); PSSamplerSelector bilinear; bilinear.biln = true; m_convert.ln = GetSamplerID(bilinear); m_convert.dss = new GSDepthStencilOGL(); m_convert.dss_write = new GSDepthStencilOGL(); m_convert.dss_write->EnableDepth(); m_convert.dss_write->SetDepth(GL_ALWAYS, true); } // **************************************************************** // merge // **************************************************************** { GL_PUSH("GSDeviceOGL::Merge"); m_merge_obj.cb = new GSUniformBufferOGL("Merge UBO", g_merge_cb_index, sizeof(MergeConstantBuffer)); theApp.LoadResource(IDR_MERGE_GLSL, shader); for(size_t i = 0; i < countof(m_merge_obj.ps); i++) { ps = m_shader->Compile("merge.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); std::string pretty_name = "Merge pipe " + std::to_string(i); m_merge_obj.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); } } // **************************************************************** // interlace // **************************************************************** { GL_PUSH("GSDeviceOGL::Interlace"); m_interlace.cb = new GSUniformBufferOGL("Interlace UBO", g_interlace_cb_index, sizeof(InterlaceConstantBuffer)); theApp.LoadResource(IDR_INTERLACE_GLSL, shader); for(size_t i = 0; i < countof(m_interlace.ps); i++) { ps = m_shader->Compile("interlace.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, shader.data()); std::string pretty_name = "Interlace pipe " + std::to_string(i); m_interlace.ps[i] = m_shader->LinkPipeline(pretty_name, vs, 0, ps); } } // **************************************************************** // Shade boost // **************************************************************** { GL_PUSH("GSDeviceOGL::Shadeboost"); int ShadeBoost_Contrast = std::max(0, std::min(theApp.GetConfigI("ShadeBoost_Contrast"), 100)); int ShadeBoost_Brightness = std::max(0, std::min(theApp.GetConfigI("ShadeBoost_Brightness"), 100)); int ShadeBoost_Saturation = std::max(0, std::min(theApp.GetConfigI("ShadeBoost_Saturation"), 100)); std::string shade_macro = format("#define SB_SATURATION %d.0\n", ShadeBoost_Saturation) + format("#define SB_BRIGHTNESS %d.0\n", ShadeBoost_Brightness) + format("#define SB_CONTRAST %d.0\n", ShadeBoost_Contrast); theApp.LoadResource(IDR_SHADEBOOST_GLSL, shader); ps = m_shader->Compile("shadeboost.glsl", "ps_main", GL_FRAGMENT_SHADER, shader.data(), shade_macro); m_shadeboost.ps = m_shader->LinkPipeline("ShadeBoost pipe", vs, 0, ps); } // **************************************************************** // rasterization configuration // **************************************************************** { GL_PUSH("GSDeviceOGL::Rasterization"); #ifdef ONLY_LINES glLineWidth(5.0); glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); #else glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); #endif glDisable(GL_CULL_FACE); glEnable(GL_SCISSOR_TEST); glDisable(GL_MULTISAMPLE); glDisable(GL_DITHER); // Honestly I don't know! } // **************************************************************** // DATE // **************************************************************** { GL_PUSH("GSDeviceOGL::Date"); m_date.dss = new GSDepthStencilOGL(); m_date.dss->EnableStencil(); m_date.dss->SetStencil(GL_ALWAYS, GL_REPLACE); } // **************************************************************** // Use DX coordinate convention // **************************************************************** // VS gl_position.z => [-1,-1] // FS depth => [0, 1] // because of -1 we loose lot of precision for small GS value // This extension allow FS depth to range from -1 to 1. So // gl_position.z could range from [0, 1] // Change depth convention if (GLExtension::Has("GL_ARB_clip_control")) glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); // **************************************************************** // HW renderer shader // **************************************************************** CreateTextureFX(); // **************************************************************** // Pbo Pool allocation // **************************************************************** { GL_PUSH("GSDeviceOGL::PBO"); // Mesa seems to use it to compute the row length. In our case, we are // tightly packed so don't bother with this parameter and set it to the // minimum alignment (1 byte) glPixelStorei(GL_UNPACK_ALIGNMENT, 1); PboPool::Init(); } // **************************************************************** // Get Available Memory // **************************************************************** GLint vram[4] = {0}; if (GLLoader::vendor_id_amd) { // Full vram, remove a small margin for others buffer glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, vram); } else if (GLExtension::Has("GL_NVX_gpu_memory_info")) { // GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX <= give full memory // Available vram glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, vram); } else { fprintf(stdout, "No extenstion supported to get available memory. Use default value !\n"); } // When VRAM is at least 2GB, we set the limit to the default i.e. 3.8 GB // When VRAM is below 2GB, we add a factor 2 because RAM can be used. Potentially // low VRAM gpu can go higher but perf will be bad anyway. if (vram[0] > 0 && vram[0] < 1800000) GLState::available_vram = (int64)(vram[0]) * 1024ul * 2ul; fprintf(stdout, "Available VRAM/RAM:%lldMB for textures\n", GLState::available_vram >> 20u); // **************************************************************** // Texture Font (OSD) // **************************************************************** GSVector2i tex_font = m_osd.get_texture_font_size(); m_font = std::unique_ptr( new GSTextureOGL(GSTextureOGL::Texture, tex_font.x, tex_font.y, GL_R8, m_fbo_read, false) ); // **************************************************************** // Finish window setup and backbuffer // **************************************************************** if(!GSDevice::Create(wnd)) return false; GSVector4i rect = wnd->GetClientRect(); Reset(rect.z, rect.w); // Basic to ensure structures are correctly packed static_assert(sizeof(VSSelector) == 4, "Wrong VSSelector size"); static_assert(sizeof(PSSelector) == 8, "Wrong PSSelector size"); static_assert(sizeof(PSSamplerSelector) == 4, "Wrong PSSamplerSelector size"); static_assert(sizeof(OMDepthStencilSelector) == 4, "Wrong OMDepthStencilSelector size"); static_assert(sizeof(OMColorMaskSelector) == 4, "Wrong OMColorMaskSelector size"); return true; } void GSDeviceOGL::CreateTextureFX() { GL_PUSH("GSDeviceOGL::CreateTextureFX"); m_vs_cb = new GSUniformBufferOGL("HW VS UBO", g_vs_cb_index, sizeof(VSConstantBuffer)); m_ps_cb = new GSUniformBufferOGL("HW PS UBO", g_ps_cb_index, sizeof(PSConstantBuffer)); theApp.LoadResource(IDR_TFX_VGS_GLSL, m_shader_tfx_vgs); theApp.LoadResource(IDR_TFX_FS_GLSL, m_shader_tfx_fs); // warning 1 sampler by image unit. So you cannot reuse m_ps_ss... m_palette_ss = CreateSampler(PSSamplerSelector(0)); glBindSampler(1, m_palette_ss); // Pre compile the (remaining) Geometry & Vertex Shader // One-Hot encoding memset(m_gs, 0, sizeof(m_gs)); m_gs[1] = CompileGS(GSSelector(1)); m_gs[2] = CompileGS(GSSelector(2)); m_gs[4] = CompileGS(GSSelector(4)); for (uint32 key = 0; key < countof(m_vs); key++) m_vs[key] = CompileVS(VSSelector(key)); // Enable all bits for stencil operations. Technically 1 bit is // enough but buffer is polluted with noise. Clear will be limited // to the mask. glStencilMask(0xFF); for (uint32 key = 0; key < countof(m_om_dss); key++) { m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key)); } // Help to debug FS in apitrace m_apitrace = CompilePS(PSSelector()); } bool GSDeviceOGL::Reset(int w, int h) { if(!GSDevice::Reset(w, h)) return false; // Opengl allocate the backbuffer with the window. The render is done in the backbuffer when // there isn't any FBO. Only a dummy texture is created to easily detect when the rendering is done // in the backbuffer m_backbuffer = new GSTextureOGL(GSTextureOGL::Backbuffer, w, h, 0, m_fbo_read, false); return true; } void GSDeviceOGL::SetVSync(int vsync) { m_wnd->SetVSync(vsync); } void GSDeviceOGL::Flip() { m_wnd->Flip(); if (GLLoader::in_replayer) { glQueryCounter(m_profiler.timer(), GL_TIMESTAMP); m_profiler.last_query++; } } void GSDeviceOGL::BeforeDraw() { } void GSDeviceOGL::AfterDraw() { } void GSDeviceOGL::DrawPrimitive() { BeforeDraw(); m_va->DrawPrimitive(); AfterDraw(); } void GSDeviceOGL::DrawPrimitive(int offset, int count) { BeforeDraw(); m_va->DrawPrimitive(offset, count); AfterDraw(); } void GSDeviceOGL::DrawIndexedPrimitive() { BeforeDraw(); if (!m_disable_hw_gl_draw) m_va->DrawIndexedPrimitive(); AfterDraw(); } void GSDeviceOGL::DrawIndexedPrimitive(int offset, int count) { //ASSERT(offset + count <= (int)m_index.count); BeforeDraw(); if (!m_disable_hw_gl_draw) m_va->DrawIndexedPrimitive(offset, count); AfterDraw(); } void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) { if (!t) return; GSTextureOGL* T = static_cast(t); if (T->HasBeenCleaned() && !T->IsBackbuffer()) return; // Performance note: potentially T->Clear() could be used. Main purpose of // Clear() is to avoid the framebuffer setup cost. However, in this context, // the texture 't' will be set as the render target of the framebuffer and // therefore will require a framebuffer setup. // So using the old/standard path is faster/better albeit verbose. GL_PUSH("Clear RT %d", T->GetID()); // TODO: check size of scissor before toggling it glDisable(GL_SCISSOR_TEST); uint32 old_color_mask = GLState::wrgba; OMSetColorMaskState(); if (T->IsBackbuffer()) { OMSetFBO(0); // glDrawBuffer(GL_BACK); // this is the default when there is no FB // 0 will select the first drawbuffer ie GL_BACK glClearBufferfv(GL_COLOR, 0, c.v); } else { OMSetFBO(m_fbo); OMAttachRt(T); glClearBufferfv(GL_COLOR, 0, c.v); } OMSetColorMaskState(OMColorMaskSelector(old_color_mask)); glEnable(GL_SCISSOR_TEST); T->WasCleaned(); } void GSDeviceOGL::ClearRenderTarget(GSTexture* t, uint32 c) { if (!t) return; GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255); ClearRenderTarget(t, color); } void GSDeviceOGL::ClearDepth(GSTexture* t) { if (!t) return; GSTextureOGL* T = static_cast(t); GL_PUSH("Clear Depth %d", T->GetID()); if (0 && GLLoader::found_GL_ARB_clear_texture) { // I don't know what the driver does but it creates // some slowdowns on Harry Potter PS // Maybe it triggers some texture relocations, or maybe // it clears also the stencil value (2 times slower) // // Let's disable this code for the moment. // Don't bother with Depth_Stencil insanity T->Clear(NULL); } else { OMSetFBO(m_fbo); // RT must be detached, if RT is too small, depth won't be fully cleared // AT tolenico 2 map clip bug OMAttachRt(NULL); OMAttachDs(T); // TODO: check size of scissor before toggling it glDisable(GL_SCISSOR_TEST); float c = 0.0f; if (GLState::depth_mask) { glClearBufferfv(GL_DEPTH, 0, &c); } else { glDepthMask(true); glClearBufferfv(GL_DEPTH, 0, &c); glDepthMask(false); } glEnable(GL_SCISSOR_TEST); } } void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) { if (!t) return; GSTextureOGL* T = static_cast(t); GL_PUSH("Clear Stencil %d", T->GetID()); // Keep SCISSOR_TEST enabled on purpose to reduce the size // of clean in DATE (impact big upscaling) OMSetFBO(m_fbo); OMAttachDs(T); GLint color = c; glClearBufferiv(GL_STENCIL, 0, &color); } GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel) { GL_PUSH("Create Sampler"); GLuint sampler; glCreateSamplers(1, &sampler); // Bilinear filtering if (sel.biln) { glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR); } else { glSamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST); } switch (static_cast(sel.triln)) { case GS_MIN_FILTER::Nearest: // Nop based on biln break; case GS_MIN_FILTER::Linear: // Nop based on biln break; case GS_MIN_FILTER::Nearest_Mipmap_Nearest: glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_NEAREST); break; case GS_MIN_FILTER::Nearest_Mipmap_Linear: glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_NEAREST_MIPMAP_LINEAR); break; case GS_MIN_FILTER::Linear_Mipmap_Nearest: glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST); break; case GS_MIN_FILTER::Linear_Mipmap_Linear: glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); break; default: break; } //glSamplerParameterf(sampler, GL_TEXTURE_MIN_LOD, 0); //glSamplerParameterf(sampler, GL_TEXTURE_MAX_LOD, 6); if (sel.tau) glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S, GL_REPEAT); else glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); if (sel.tav) glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T, GL_REPEAT); else glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); int anisotropy = theApp.GetConfigI("MaxAnisotropy"); if (anisotropy && sel.aniso) { if (GLExtension::Has("GL_ARB_texture_filter_anisotropic")) glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, (float)anisotropy); else if (GLExtension::Has("GL_EXT_texture_filter_anisotropic")) glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY_EXT, (float)anisotropy); } return sampler; } GLuint GSDeviceOGL::GetSamplerID(PSSamplerSelector ssel) { return m_ps_ss[ssel]; } GSDepthStencilOGL* GSDeviceOGL::CreateDepthStencil(OMDepthStencilSelector dssel) { GSDepthStencilOGL* dss = new GSDepthStencilOGL(); if (dssel.date) { dss->EnableStencil(); if (dssel.date_one) dss->SetStencil(GL_EQUAL, GL_ZERO); else dss->SetStencil(GL_EQUAL, GL_KEEP); } if(dssel.ztst != ZTST_ALWAYS || dssel.zwe) { static const GLenum ztst[] = { GL_NEVER, GL_ALWAYS, GL_GEQUAL, GL_GREATER }; dss->EnableDepth(); dss->SetDepth(ztst[dssel.ztst], dssel.zwe); } return dss; } void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt, const GSVector4i& area) { const GSVector2i& rtsize = rt->GetSize(); // Create a texture to avoid the useless clean@0 if (m_date.t == NULL) m_date.t = CreateTexture(rtsize.x, rtsize.y, GL_R32I); // Clean with the max signed value int max_int = 0x7FFFFFFF; static_cast(m_date.t)->Clear(&max_int, area); glBindImageTexture(2, static_cast(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I); #ifdef ENABLE_OGL_DEBUG // Help to see the texture in apitrace PSSetShaderResource(2, m_date.t); #endif } void GSDeviceOGL::RecycleDateTexture() { if (m_date.t) { //static_cast(m_date.t)->Save(format("/tmp/date_adv_%04ld.csv", GSState::s_n)); Recycle(m_date.t); m_date.t = NULL; } } void GSDeviceOGL::Barrier(GLbitfield b) { glMemoryBarrier(b); } GLuint GSDeviceOGL::CompileVS(VSSelector sel) { std::string macro = format("#define VS_INT_FST %d\n", sel.int_fst); if (GLLoader::buggy_sso_dual_src) return m_shader->CompileShader("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, m_shader_tfx_vgs.data(), macro); else return m_shader->Compile("tfx_vgs.glsl", "vs_main", GL_VERTEX_SHADER, m_shader_tfx_vgs.data(), macro); } GLuint GSDeviceOGL::CompileGS(GSSelector sel) { std::string macro = format("#define GS_POINT %d\n", sel.point) + format("#define GS_LINE %d\n", sel.line); if (GLLoader::buggy_sso_dual_src) return m_shader->CompileShader("tfx_vgs.glsl", "gs_main", GL_GEOMETRY_SHADER, m_shader_tfx_vgs.data(), macro); else return m_shader->Compile("tfx_vgs.glsl", "gs_main", GL_GEOMETRY_SHADER, m_shader_tfx_vgs.data(), macro); } GLuint GSDeviceOGL::CompilePS(PSSelector sel) { std::string macro = format("#define PS_FST %d\n", sel.fst) + format("#define PS_WMS %d\n", sel.wms) + format("#define PS_WMT %d\n", sel.wmt) + format("#define PS_TEX_FMT %d\n", sel.tex_fmt) + format("#define PS_DFMT %d\n", sel.dfmt) + format("#define PS_DEPTH_FMT %d\n", sel.depth_fmt) + format("#define PS_CHANNEL_FETCH %d\n", sel.channel) + format("#define PS_URBAN_CHAOS_HLE %d\n", sel.urban_chaos_hle) + format("#define PS_TALES_OF_ABYSS_HLE %d\n", sel.tales_of_abyss_hle) + format("#define PS_TEX_IS_FB %d\n", sel.tex_is_fb) + format("#define PS_INVALID_TEX0 %d\n", sel.invalid_tex0) + format("#define PS_AEM %d\n", sel.aem) + format("#define PS_TFX %d\n", sel.tfx) + format("#define PS_TCC %d\n", sel.tcc) + format("#define PS_ATST %d\n", sel.atst) + format("#define PS_FOG %d\n", sel.fog) + format("#define PS_CLR1 %d\n", sel.clr1) + format("#define PS_FBA %d\n", sel.fba) + format("#define PS_LTF %d\n", sel.ltf) + format("#define PS_AUTOMATIC_LOD %d\n", sel.automatic_lod) + format("#define PS_MANUAL_LOD %d\n", sel.manual_lod) + format("#define PS_COLCLIP %d\n", sel.colclip) + format("#define PS_DATE %d\n", sel.date) + format("#define PS_TCOFFSETHACK %d\n", sel.tcoffsethack) + format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler) + format("#define PS_BLEND_A %d\n", sel.blend_a) + format("#define PS_BLEND_B %d\n", sel.blend_b) + format("#define PS_BLEND_C %d\n", sel.blend_c) + format("#define PS_BLEND_D %d\n", sel.blend_d) + format("#define PS_IIP %d\n", sel.iip) + format("#define PS_SHUFFLE %d\n", sel.shuffle) + format("#define PS_READ_BA %d\n", sel.read_ba) + format("#define PS_WRITE_RG %d\n", sel.write_rg) + format("#define PS_FBMASK %d\n", sel.fbmask) + format("#define PS_HDR %d\n", sel.hdr) + format("#define PS_DITHER %d\n", sel.dither) + format("#define PS_ZCLAMP %d\n", sel.zclamp) // + format("#define PS_PABE %d\n", sel.pabe) ; if (GLLoader::buggy_sso_dual_src) return m_shader->CompileShader("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, m_shader_tfx_fs.data(), macro); else return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, m_shader_tfx_fs.data(), macro); } void GSDeviceOGL::SelfShaderTestRun(const std::string& dir, const std::string& file, const PSSelector& sel, int& nb_shader) { #ifdef __unix__ std::string out = "/tmp/GSdx_Shader/"; GSmkdir(out.c_str()); out += dir + "/"; GSmkdir(out.c_str()); out += file; #else std::string out = file; #endif #ifdef __linux__ // Nouveau actually if (GLLoader::mesa_driver) { if (freopen(out.c_str(), "w", stderr) == NULL) fprintf(stderr, "Failed to redirect stderr\n"); } #endif GLuint p = CompilePS(sel); nb_shader++; m_shader_inst += m_shader->DumpAsm(out, p); #ifdef __linux__ // Nouveau actually if (GLLoader::mesa_driver) { if (freopen("/dev/tty", "w", stderr) == NULL) fprintf(stderr, "Failed to restore stderr\n"); } #endif } void GSDeviceOGL::SelfShaderTestPrint(const std::string& test, int& nb_shader) { fprintf(stderr, "%-25s\t\t%d shaders:\t%d instructions (M %4.2f)\t%d registers (M %4.2f)\n", test.c_str(), nb_shader, m_shader_inst, (float)m_shader_inst/(float)nb_shader, m_shader_reg, (float)m_shader_reg/(float)nb_shader); m_shader_inst = 0; m_shader_reg = 0; nb_shader = 0; } void GSDeviceOGL::SelfShaderTest() { std::string out; #ifdef __unix__ setenv("NV50_PROG_DEBUG", "1", 1); #endif std::string test; m_shader_inst = 0; m_shader_reg = 0; int nb_shader = 0; test = "SW_Blending"; for (int colclip = 0; colclip < 2; colclip++) { for (int fmt = 0; fmt < 3; fmt++) { for (int i = 0; i < 3; i++) { PSSelector sel; sel.tfx = 4; int ib = (i + 1) % 3; sel.blend_a = i; sel.blend_b = ib;; sel.blend_c = i; sel.blend_d = i; sel.colclip = colclip; sel.dfmt = fmt; std::string file = format("Shader_Blend_%d_%d_%d_%d__Cclip_%d__Dfmt_%d.glsl.asm", i, ib, i, i, colclip, fmt); SelfShaderTestRun(test, file, sel, nb_shader); } } } SelfShaderTestPrint(test, nb_shader); test = "Alpha_Test"; for (int atst = 0; atst < 5; atst++) { PSSelector sel; sel.tfx = 4; sel.atst = atst; std::string file = format("Shader_Atst_%d.glsl.asm", atst); SelfShaderTestRun(test, file, sel, nb_shader); } SelfShaderTestPrint(test, nb_shader); test = "Fbmask__Fog__Shuffle__Read_ba"; for (int read_ba = 0; read_ba < 2; read_ba++) { PSSelector sel; sel.tfx = 4; sel.fog = 1; sel.fbmask = 1; sel.shuffle = 1; sel.read_ba = read_ba; std::string file = format("Shader_Fog__Fbmask__Shuffle__Read_ba_%d.glsl.asm", read_ba); SelfShaderTestRun(test, file, sel, nb_shader); } SelfShaderTestPrint(test, nb_shader); test = "Date"; for (int date = 1; date < 7; date++) { PSSelector sel; sel.tfx = 4; sel.date = date; std::string file = format("Shader_Date_%d.glsl.asm", date); SelfShaderTestRun(test, file, sel, nb_shader); } SelfShaderTestPrint(test, nb_shader); test = "FBA"; for (int fmt = 0; fmt < 3; fmt++) { PSSelector sel; sel.tfx = 4; sel.fba = 1; sel.dfmt = fmt; sel.clr1 = 1; std::string file = format("Shader_Fba__Clr1__Dfmt_%d.glsl.asm", fmt); SelfShaderTestRun(test, file, sel, nb_shader); } SelfShaderTestPrint(test, nb_shader); test = "Fst__Tc__IIP"; { PSSelector sel; sel.tfx = 1; sel.fst = 0; sel.iip = 1; sel.tcoffsethack = 1; std::string file = format("Shader_Fst__TC__Iip.glsl.asm"); SelfShaderTestRun(test, file, sel, nb_shader); } SelfShaderTestPrint(test, nb_shader); test = "Tfx__Tcc"; for (int channel = 0; channel < 5; channel++) { for (int tfx = 0; tfx < 5; tfx++) { for (int tcc = 0; tcc < 2; tcc++) { PSSelector sel; sel.fst = 1; sel.channel = channel; sel.tfx = tfx; sel.tcc = tcc; std::string file = format("Shader_Tfx_%d__Tcc_%d__Channel_%d.glsl.asm", tfx, tcc, channel); SelfShaderTestRun(test, file, sel, nb_shader); } } } SelfShaderTestPrint(test, nb_shader); test = "Texture_Sampling"; for (int depth = 0; depth < 4; depth++) { for (int fmt = 0; fmt < 16; fmt++) { if ((fmt & 3) == 3) continue; for (int ltf = 0; ltf < 2; ltf++) { for (int aem = 0; aem < 2; aem++) { for (int wms = 1; wms < 4; wms++) { for (int wmt = 1; wmt < 4; wmt++) { PSSelector sel; sel.tfx = 1; sel.tcc = 1; sel.fst = 1; sel.depth_fmt = depth; sel.ltf = ltf; sel.aem = aem; sel.tex_fmt = fmt; sel.wms = wms; sel.wmt = wmt; std::string file = format("Shader_Ltf_%d__Aem_%d__TFmt_%d__Wms_%d__Wmt_%d__DepthFmt_%d.glsl.asm", ltf, aem, fmt, wms, wmt, depth); SelfShaderTestRun(test, file, sel, nb_shader); } } } } } } SelfShaderTestPrint(test, nb_shader); } // blit a texture into an offscreen buffer GSTexture* GSDeviceOGL::CopyOffscreen(GSTexture* src, const GSVector4& sRect, int w, int h, int format, int ps_shader) { if (format == 0) format = GL_RGBA8; ASSERT(src); ASSERT(format == GL_RGBA8 || format == GL_R16UI || format == GL_R32UI); GSTexture* dst = CreateOffscreen(w, h, format); GSVector4 dRect(0, 0, w, h); // StretchRect will read an old target. However, the memory cache might contains // invalid data (for example due to SW blending). glTextureBarrier(); StretchRect(src, sRect, dst, dRect, m_convert.ps[ps_shader]); return dst; } // Copy a sub part of texture (same as below but force a conversion) void GSDeviceOGL::CopyRectConv(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r, bool at_origin) { ASSERT(sTex && dTex); if (!(sTex && dTex)) return; const GLuint& sid = static_cast(sTex)->GetID(); const GLuint& did = static_cast(dTex)->GetID(); GL_PUSH(format("CopyRectConv from %d to %d", sid, did).c_str()); dTex->CommitRegion(GSVector2i(r.z, r.w)); glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, sid, 0); if (at_origin) glCopyTextureSubImage2D(did, GL_TEX_LEVEL_0, 0, 0, r.x, r.y, r.width(), r.height()); else glCopyTextureSubImage2D(did, GL_TEX_LEVEL_0, r.x, r.y, r.x, r.y, r.width(), r.height()); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); } // Copy a sub part of a texture into another void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r) { ASSERT(sTex && dTex); if (!(sTex && dTex)) return; const GLuint& sid = static_cast(sTex)->GetID(); const GLuint& did = static_cast(dTex)->GetID(); GL_PUSH("CopyRect from %d to %d", sid, did); #ifdef ENABLE_OGL_DEBUG PSSetShaderResource(6, sTex); #endif dTex->CommitRegion(GSVector2i(r.z, r.w)); ASSERT(GLExtension::Has("GL_ARB_copy_image") && glCopyImageSubData); glCopyImageSubData( sid, GL_TEXTURE_2D, 0, r.x, r.y, 0, did, GL_TEXTURE_2D, 0, 0, 0, 0, r.width(), r.height(), 1); } void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, int shader, bool linear) { StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[shader], linear); } void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, bool linear) { StretchRect(sTex, sRect, dTex, dRect, ps, m_NO_BLEND, OMColorMaskSelector(), linear); } void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) { OMColorMaskSelector cms; cms.wr = red; cms.wg = green; cms.wb = blue; cms.wa = alpha; StretchRect(sTex, sRect, dTex, dRect, m_convert.ps[ShaderConvert_COPY], m_NO_BLEND, cms, false); } void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, int bs, OMColorMaskSelector cms, bool linear) { if(!sTex || !dTex) { ASSERT(0); return; } bool draw_in_depth = (ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT32] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT24] || ps == m_convert.ps[ShaderConvert_RGBA8_TO_FLOAT16] || ps == m_convert.ps[ShaderConvert_RGB5A1_TO_FLOAT16]); // Performance optimization. It might be faster to use a framebuffer blit for standard case // instead to emulate it with shader // see https://www.opengl.org/wiki/Framebuffer#Blitting GL_PUSH("StretchRect from %d to %d", sTex->GetID(), dTex->GetID()); // ************************************ // Init // ************************************ BeginScene(); GSVector2i ds = dTex->GetSize(); m_shader->BindPipeline(ps); // ************************************ // om // ************************************ if (draw_in_depth) OMSetDepthStencilState(m_convert.dss_write); else OMSetDepthStencilState(m_convert.dss); if (draw_in_depth) OMSetRenderTargets(NULL, dTex); else OMSetRenderTargets(dTex, NULL); OMSetBlendState((uint8)bs); OMSetColorMaskState(cms); // ************************************ // ia // ************************************ // Original code from DX float left = dRect.x * 2 / ds.x - 1.0f; float right = dRect.z * 2 / ds.x - 1.0f; #if 0 float top = 1.0f - dRect.y * 2 / ds.y; float bottom = 1.0f - dRect.w * 2 / ds.y; #else // Opengl get some issues with the coordinate // I flip top/bottom to fix scaling of the internal resolution float top = -1.0f + dRect.y * 2 / ds.y; float bottom = -1.0f + dRect.w * 2 / ds.y; #endif // Flip y axis only when we render in the backbuffer // By default everything is render in the wrong order (ie dx). // 1/ consistency between several pass rendering (interlace) // 2/ in case some GSdx code expect thing in dx order. // Only flipping the backbuffer is transparent (I hope)... GSVector4 flip_sr = sRect; if (static_cast(dTex)->IsBackbuffer()) { flip_sr.y = sRect.w; flip_sr.w = sRect.y; } GSVertexPT1 vertices[] = { {GSVector4(left , top , 0.0f, 0.0f) , GSVector2(flip_sr.x , flip_sr.y)} , {GSVector4(right , top , 0.0f, 0.0f) , GSVector2(flip_sr.z , flip_sr.y)} , {GSVector4(left , bottom, 0.0f, 0.0f) , GSVector2(flip_sr.x , flip_sr.w)} , {GSVector4(right , bottom, 0.0f, 0.0f) , GSVector2(flip_sr.z , flip_sr.w)} , }; IASetVertexBuffer(vertices, 4); IASetPrimitiveTopology(GL_TRIANGLE_STRIP); // ************************************ // Texture // ************************************ PSSetShaderResource(0, sTex); PSSetSamplerState(linear ? m_convert.ln : m_convert.pt); // ************************************ // Draw // ************************************ dTex->CommitRegion(GSVector2i((int)dRect.z + 1, (int)dRect.w + 1)); DrawPrimitive(); // ************************************ // End // ************************************ EndScene(); } void GSDeviceOGL::RenderOsd(GSTexture* dt) { BeginScene(); m_shader->BindPipeline(m_convert.ps[ShaderConvert_OSD]); OMSetDepthStencilState(m_convert.dss); OMSetBlendState((uint8)GSDeviceOGL::m_MERGE_BLEND); OMSetRenderTargets(dt, NULL); if(m_osd.m_texture_dirty) { m_osd.upload_texture_atlas(m_font.get()); } PSSetShaderResource(0, m_font.get()); PSSetSamplerState(m_convert.pt); IASetPrimitiveTopology(GL_TRIANGLES); // Note scaling could also be done in shader (require gl3/dx10) size_t count = m_osd.Size(); GSVertexPT1* dst = (GSVertexPT1*)m_va->MapVB(count); count = m_osd.GeneratePrimitives(dst, count); m_va->UnmapVB(); DrawPrimitive(); EndScene(); } void GSDeviceOGL::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c) { GL_PUSH("DoMerge"); GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f); bool feedback_write_2 = PMODE.EN2 && sTex[2] != nullptr && EXTBUF.FBIN == 1; bool feedback_write_1 = PMODE.EN1 && sTex[2] != nullptr && EXTBUF.FBIN == 0; bool feedback_write_2_but_blend_bg = feedback_write_2 && PMODE.SLBG == 1; // Merge the 2 source textures (sTex[0],sTex[1]). Final results go to dTex. Feedback write will go to sTex[2]. // If either 2nd output is disabled or SLBG is 1, a background color will be used. // Note: background color is also used when outside of the unit rectangle area OMSetColorMaskState(); ClearRenderTarget(dTex, c); // Upload constant to select YUV algo if (feedback_write_2 || feedback_write_1) { // Write result to feedback loop m_misc_cb_cache.EMOD_AC.x = EXTBUF.EMODA; m_misc_cb_cache.EMOD_AC.y = EXTBUF.EMODC; m_convert.cb->cache_upload(&m_misc_cb_cache); } if (sTex[1] && (PMODE.SLBG == 0 || feedback_write_2_but_blend_bg)) { // 2nd output is enabled and selected. Copy it to destination so we can blend it with 1st output // Note: value outside of dRect must contains the background color (c) StretchRect(sTex[1], sRect[1], dTex, dRect[1], ShaderConvert_COPY); } // Save 2nd output if (feedback_write_2) // FIXME I'm not sure dRect[1] is always correct StretchRect(dTex, full_r, sTex[2], dRect[1], ShaderConvert_YUV); // Restore background color to process the normal merge if (feedback_write_2_but_blend_bg) ClearRenderTarget(dTex, c); if (sTex[0]) { if (PMODE.AMOD == 1) // Keep the alpha from the 2nd output OMSetColorMaskState(OMColorMaskSelector(0x7)); // 1st output is enabled. It must be blended if (PMODE.MMOD == 1) { // Blend with a constant alpha m_merge_obj.cb->cache_upload(&c.v); StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[1], m_MERGE_BLEND, OMColorMaskSelector()); } else { // Blend with 2 * input alpha StretchRect(sTex[0], sRect[0], dTex, dRect[0], m_merge_obj.ps[0], m_MERGE_BLEND, OMColorMaskSelector()); } } if (feedback_write_1) // FIXME I'm not sure dRect[0] is always correct StretchRect(dTex, full_r, sTex[2], dRect[0], ShaderConvert_YUV); } void GSDeviceOGL::DoInterlace(GSTexture* sTex, GSTexture* dTex, int shader, bool linear, float yoffset) { GL_PUSH("DoInterlace"); OMSetColorMaskState(); GSVector4 s = GSVector4(dTex->GetSize()); GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0.0f, yoffset, s.x, s.y + yoffset); InterlaceConstantBuffer cb; cb.ZrH = GSVector2(0, 1.0f / s.y); cb.hH = s.y / 2; m_interlace.cb->cache_upload(&cb); StretchRect(sTex, sRect, dTex, dRect, m_interlace.ps[shader], linear); } void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex) { // Lazy compile if (!m_fxaa.ps) { if (!GLLoader::found_GL_ARB_gpu_shader5) { // GL4.0 extension return; } std::string fxaa_macro = "#define FXAA_GLSL_130 1\n"; fxaa_macro += "#extension GL_ARB_gpu_shader5 : enable\n"; std::vector shader; theApp.LoadResource(IDR_FXAA_FX, shader); GLuint ps = m_shader->Compile("fxaa.fx", "ps_main", GL_FRAGMENT_SHADER, shader.data(), fxaa_macro); m_fxaa.ps = m_shader->LinkPipeline("FXAA pipe", m_convert.vs, 0, ps); } GL_PUSH("DoFxaa"); OMSetColorMaskState(); GSVector2i s = dTex->GetSize(); GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, s.x, s.y); StretchRect(sTex, sRect, dTex, dRect, m_fxaa.ps, true); } void GSDeviceOGL::DoExternalFX(GSTexture* sTex, GSTexture* dTex) { // Lazy compile if (!m_shaderfx.ps) { if (!GLLoader::found_GL_ARB_gpu_shader5) { // GL4.0 extension return; } std::string config_name(theApp.GetConfigS("shaderfx_conf")); std::ifstream fconfig(config_name); std::stringstream config; config << "#extension GL_ARB_gpu_shader5 : require\n"; if (fconfig.good()) config << fconfig.rdbuf(); else fprintf(stderr, "Warning failed to load '%s'. External Shader might be wrongly configured\n", config_name.c_str()); std::string shader_name(theApp.GetConfigS("shaderfx_glsl")); std::ifstream fshader(shader_name); std::stringstream shader; if (!fshader.good()) { fprintf(stderr, "Error failed to load '%s'. External Shader will be disabled !\n", shader_name.c_str()); return; } shader << fshader.rdbuf(); m_shaderfx.cb = new GSUniformBufferOGL("eFX UBO", g_fx_cb_index, sizeof(ExternalFXConstantBuffer)); GLuint ps = m_shader->Compile("Extra", "ps_main", GL_FRAGMENT_SHADER, shader.str().c_str(), config.str()); m_shaderfx.ps = m_shader->LinkPipeline("eFX pipie", m_convert.vs, 0, ps); } GL_PUSH("DoExternalFX"); OMSetColorMaskState(); GSVector2i s = dTex->GetSize(); GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, s.x, s.y); ExternalFXConstantBuffer cb; cb.xyFrame = GSVector2((float)s.x, (float)s.y); cb.rcpFrame = GSVector4(1.0f / s.x, 1.0f / s.y, 0.0f, 0.0f); cb.rcpFrameOpt = GSVector4::zero(); m_shaderfx.cb->cache_upload(&cb); StretchRect(sTex, sRect, dTex, dRect, m_shaderfx.ps, true); } void GSDeviceOGL::DoShadeBoost(GSTexture* sTex, GSTexture* dTex) { GL_PUSH("DoShadeBoost"); OMSetColorMaskState(); GSVector2i s = dTex->GetSize(); GSVector4 sRect(0, 0, 1, 1); GSVector4 dRect(0, 0, s.x, s.y); StretchRect(sTex, sRect, dTex, dRect, m_shadeboost.ps, true); } void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm) { GL_PUSH("DATE First Pass"); // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows BeginScene(); ClearStencil(ds, 0); m_shader->BindPipeline(m_convert.ps[datm ? ShaderConvert_DATM_1 : ShaderConvert_DATM_0]); // om OMSetDepthStencilState(m_date.dss); if (GLState::blend) { glDisable(GL_BLEND); } OMSetRenderTargets(NULL, ds, &GLState::scissor); // ia IASetVertexBuffer(vertices, 4); IASetPrimitiveTopology(GL_TRIANGLE_STRIP); // Texture PSSetShaderResource(0, rt); PSSetSamplerState(m_convert.pt); DrawPrimitive(); if (GLState::blend) { glEnable(GL_BLEND); } EndScene(); } void GSDeviceOGL::EndScene() { m_va->EndScene(); } void GSDeviceOGL::IASetVertexBuffer(const void* vertices, size_t count) { m_va->UploadVB(vertices, count); } void GSDeviceOGL::IASetIndexBuffer(const void* index, size_t count) { m_va->UploadIB(index, count); } void GSDeviceOGL::IASetPrimitiveTopology(GLenum topology) { m_va->SetTopology(topology); } void GSDeviceOGL::PSSetShaderResource(int i, GSTexture* sr) { ASSERT(i < (int)countof(GLState::tex_unit)); // Note: Nvidia debgger doesn't support the id 0 (ie the NULL texture) if (sr) { GLuint id = static_cast(sr)->GetID(); if (GLState::tex_unit[i] != id) { GLState::tex_unit[i] = id; glBindTextureUnit(i, id); } } } void GSDeviceOGL::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) { PSSetShaderResource(0, sr0); PSSetShaderResource(1, sr1); } void GSDeviceOGL::PSSetSamplerState(GLuint ss) { if (GLState::ps_ss != ss) { GLState::ps_ss = ss; glBindSampler(0, ss); } } void GSDeviceOGL::OMAttachRt(GSTextureOGL* rt) { GLuint id; if (rt) { rt->WasAttached(); id = rt->GetID(); } else { id = 0; } if (GLState::rt != id) { GLState::rt = id; glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, id, 0); } } void GSDeviceOGL::OMAttachDs(GSTextureOGL* ds) { GLuint id; if (ds) { ds->WasAttached(); id = ds->GetID(); } else { id = 0; } if (GLState::ds != id) { GLState::ds = id; glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, id, 0); } } void GSDeviceOGL::OMSetFBO(GLuint fbo) { if (GLState::fbo != fbo) { GLState::fbo = fbo; glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); } } void GSDeviceOGL::OMSetDepthStencilState(GSDepthStencilOGL* dss) { dss->SetupDepth(); dss->SetupStencil(); } void GSDeviceOGL::OMSetColorMaskState(OMColorMaskSelector sel) { if (sel.wrgba != GLState::wrgba) { GLState::wrgba = sel.wrgba; glColorMaski(0, sel.wr, sel.wg, sel.wb, sel.wa); } } void GSDeviceOGL::OMSetBlendState(uint8 blend_index, uint8 blend_factor, bool is_blend_constant, bool accumulation_blend) { if (blend_index) { if (!GLState::blend) { GLState::blend = true; glEnable(GL_BLEND); } if (is_blend_constant && GLState::bf != blend_factor) { GLState::bf = blend_factor; float bf = (float)blend_factor / 128.0f; glBlendColor(bf, bf, bf, bf); } HWBlend b = GetBlend(blend_index); if (accumulation_blend) { b.src = GL_ONE; b.dst = GL_ONE; } if (GLState::eq_RGB != b.op) { GLState::eq_RGB = b.op; glBlendEquationSeparate(b.op, GL_FUNC_ADD); } if (GLState::f_sRGB != b.src || GLState::f_dRGB != b.dst) { GLState::f_sRGB = b.src; GLState::f_dRGB = b.dst; glBlendFuncSeparate(b.src, b.dst, GL_ONE, GL_ZERO); } } else { if (GLState::blend) { GLState::blend = false; glDisable(GL_BLEND); } } } void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor) { GSTextureOGL* RT = static_cast(rt); GSTextureOGL* DS = static_cast(ds); if (rt == NULL || !RT->IsBackbuffer()) { OMSetFBO(m_fbo); if (rt) { OMAttachRt(RT); } else { OMAttachRt(); } // Note: it must be done after OMSetFBO if (ds) OMAttachDs(DS); else OMAttachDs(); } else { // Render in the backbuffer OMSetFBO(0); } GSVector2i size = rt ? rt->GetSize() : ds ? ds->GetSize() : GLState::viewport; if(GLState::viewport != size) { GLState::viewport = size; // FIXME ViewportIndexedf or ViewportIndexedfv (GL4.1) glViewportIndexedf(0, 0, 0, GLfloat(size.x), GLfloat(size.y)); } GSVector4i r = scissor ? *scissor : GSVector4i(size).zwxy(); if(!GLState::scissor.eq(r)) { GLState::scissor = r; // FIXME ScissorIndexedv (GL4.1) glScissorIndexed(0, r.x, r.y, r.width(), r.height()); } } void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb) { GL_PUSH("UBO"); if(m_vs_cb_cache.Update(vs_cb)) { m_vs_cb->upload(vs_cb); } if(m_ps_cb_cache.Update(ps_cb)) { m_ps_cb->upload(ps_cb); } } void GSDeviceOGL::SetupCBMisc(const GSVector4i& channel) { m_misc_cb_cache.ChannelShuffle = channel; m_convert.cb->cache_upload(&m_misc_cb_cache); } void GSDeviceOGL::SetupPipeline(const VSSelector& vsel, const GSSelector& gsel, const PSSelector& psel) { GLuint ps; auto i = m_ps.find(psel); if (i == m_ps.end()) { ps = CompilePS(psel); m_ps[psel] = ps; } else { ps = i->second; } { #if defined(_DEBUG) && 0 // Toggling Shader is bad for the perf. Let's trace parameter that often toggle to detect // potential uber shader possibilities. static PSSelector old_psel; static GLuint old_ps = 0; std::string msg(""); #define CHECK_STATE(p) if (psel.p != old_psel.p) msg.append(" ").append(#p); if (old_ps != ps) { CHECK_STATE(tex_fmt); CHECK_STATE(dfmt); CHECK_STATE(depth_fmt); CHECK_STATE(aem); CHECK_STATE(fba); CHECK_STATE(fog); CHECK_STATE(iip); CHECK_STATE(date); CHECK_STATE(atst); CHECK_STATE(fst); CHECK_STATE(tfx); CHECK_STATE(tcc); CHECK_STATE(wms); CHECK_STATE(wmt); CHECK_STATE(ltf); CHECK_STATE(shuffle); CHECK_STATE(read_ba); CHECK_STATE(write_rg); CHECK_STATE(fbmask); CHECK_STATE(blend_a); CHECK_STATE(blend_b); CHECK_STATE(blend_c); CHECK_STATE(blend_d); CHECK_STATE(clr1); CHECK_STATE(pabe); CHECK_STATE(hdr); CHECK_STATE(colclip); // CHECK_STATE(channel); // CHECK_STATE(tcoffsethack); // CHECK_STATE(urban_chaos_hle); // CHECK_STATE(tales_of_abyss_hle); GL_PERF("New PS :%s", msg.c_str()); } old_psel.key = psel.key; old_ps = ps; #endif } if (GLLoader::buggy_sso_dual_src) m_shader->BindProgram(m_vs[vsel], m_gs[gsel], ps); else m_shader->BindPipeline(m_vs[vsel], m_gs[gsel], ps); } void GSDeviceOGL::SetupSampler(PSSamplerSelector ssel) { PSSetSamplerState(m_ps_ss[ssel]); } GLuint GSDeviceOGL::GetPaletteSamplerID() { return m_palette_ss; } void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel) { OMSetDepthStencilState(m_om_dss[dssel]); } // Note: used as a callback of DebugMessageCallback. Don't change the signature void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar *gl_message, const void* userParam) { std::string message(gl_message, gl_length >= 0 ? gl_length : strlen(gl_message)); std::string type, severity, source; static int sev_counter = 0; switch(gl_type) { case GL_DEBUG_TYPE_ERROR_ARB : type = "Error"; break; case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB : type = "Deprecated bhv"; break; case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB : type = "Undefined bhv"; break; case GL_DEBUG_TYPE_PORTABILITY_ARB : type = "Portability"; break; case GL_DEBUG_TYPE_PERFORMANCE_ARB : type = "Perf"; break; case GL_DEBUG_TYPE_OTHER_ARB : type = "Oth"; break; case GL_DEBUG_TYPE_PUSH_GROUP : return; // Don't print message injected by myself case GL_DEBUG_TYPE_POP_GROUP : return; // Don't print message injected by myself default : type = "TTT"; break; } switch(gl_severity) { case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; sev_counter++; break; case GL_DEBUG_SEVERITY_MEDIUM_ARB : severity = "Mid"; break; case GL_DEBUG_SEVERITY_LOW_ARB : severity = "Low"; break; default : if (id == 0xFEAD) severity = "Cache"; else if (id == 0xB0B0) severity = "REG"; else if (id == 0xD0D0) severity = "EXTRA"; break; } switch(gl_source) { case GL_DEBUG_SOURCE_API_ARB : source = "API"; break; case GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB : source = "WINDOW"; break; case GL_DEBUG_SOURCE_SHADER_COMPILER_ARB : source = "COMPILER"; break; case GL_DEBUG_SOURCE_THIRD_PARTY_ARB : source = "3rdparty"; break; case GL_DEBUG_SOURCE_APPLICATION_ARB : source = "Application"; break; case GL_DEBUG_SOURCE_OTHER_ARB : source = "Others"; break; default : source = "???"; break; } #ifdef _DEBUG // Don't spam noisy information on the terminal if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION) { fprintf(stderr,"T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str()); } #else // Print nouveau shader compiler info if (GSState::s_n == 0) { int t, local, gpr, inst, byte; int status = sscanf(message.c_str(), "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d", &t, &local, &gpr, &inst, &byte); if (status == 5) { m_shader_inst += inst; m_shader_reg += gpr; fprintf(stderr,"T:%s\t\tS:%s\t=> %s\n", type.c_str(), severity.c_str(), message.c_str()); } } #endif if (m_debug_gl_file) fprintf(m_debug_gl_file,"T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str()); #ifdef _DEBUG if (sev_counter >= 5) { // Close the file to flush the content on disk before exiting. if (m_debug_gl_file) { fclose(m_debug_gl_file); m_debug_gl_file = NULL; } ASSERT(0); } #endif } uint16 GSDeviceOGL::ConvertBlendEnum(uint16 generic) { switch (generic) { case SRC_COLOR : return GL_SRC_COLOR; case INV_SRC_COLOR : return GL_ONE_MINUS_SRC_COLOR; case DST_COLOR : return GL_DST_COLOR; case INV_DST_COLOR : return GL_ONE_MINUS_DST_COLOR; case SRC1_COLOR : return GL_SRC1_COLOR; case INV_SRC1_COLOR : return GL_ONE_MINUS_SRC1_COLOR; case SRC_ALPHA : return GL_SRC_ALPHA; case INV_SRC_ALPHA : return GL_ONE_MINUS_SRC_ALPHA; case DST_ALPHA : return GL_DST_ALPHA; case INV_DST_ALPHA : return GL_ONE_MINUS_DST_ALPHA; case SRC1_ALPHA : return GL_SRC1_ALPHA; case INV_SRC1_ALPHA : return GL_ONE_MINUS_SRC1_ALPHA; case CONST_COLOR : return GL_CONSTANT_COLOR; case INV_CONST_COLOR : return GL_ONE_MINUS_CONSTANT_COLOR; case CONST_ONE : return GL_ONE; case CONST_ZERO : return GL_ZERO; case OP_ADD : return GL_FUNC_ADD; case OP_SUBTRACT : return GL_FUNC_SUBTRACT; case OP_REV_SUBTRACT : return GL_FUNC_REVERSE_SUBTRACT; default : ASSERT(0); return 0; } }