diff --git a/CMakeLists.txt b/CMakeLists.txt index 97b1566add..1fc9aedc88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,6 +90,8 @@ include(Pcsx2Utils) # Detect current OS detectOperatingSystem() +check_compiler_version("4.7" "4.5") + #------------------------------------------------------------------------------- # Include specific module # BuildParameters Must be done before SearchForStuff @@ -109,6 +111,9 @@ include_directories(${PROJECT_SOURCE_DIR}/common/include ${CMAKE_BINARY_DIR}/common/include # WORKAROUND Some issue with multiarch on Debian/Ubuntu /usr/include/i386-linux-gnu + # WORKAROUND Clang integration issue with multiarch on Debian + #/usr/include/i386-linux-gnu/c++ + #/usr/include/i386-linux-gnu/c++/4.8 ) # make the translation diff --git a/cmake/Pcsx2Utils.cmake b/cmake/Pcsx2Utils.cmake index cc2b16e5c5..a17355afe6 100644 --- a/cmake/Pcsx2Utils.cmake +++ b/cmake/Pcsx2Utils.cmake @@ -50,3 +50,19 @@ function(write_svnrev_h) file(WRITE ${CMAKE_BINARY_DIR}/common/include/svnrev.h "#define SVN_REV 0 \n#define SVN_MODS 0") endif() endfunction() + +function(check_compiler_version version_warn version_err) + if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + string(STRIP "${GCC_VERSION}" GCC_VERSION) + if(GCC_VERSION VERSION_LESS ${version_err}) + message(FATAL_ERROR "PCSX2 doesn't support your old GCC ${GCC_VERSION}! Please upgrade it ! + + The minimum version is ${version_err} but ${version_warn} is warmly recommended") + else() + if(GCC_VERSION VERSION_LESS ${version_warn}) + message(WARNING "PCSX2 will stop to support GCC ${GCC_VERSION} in a near future. Please upgrade it to GCC ${version_warn}.") + endif() + endif() + endif() +endfunction() diff --git a/linux_various/glsl2h.pl b/linux_various/glsl2h.pl index 869d826b11..ea8e97d615 100755 --- a/linux_various/glsl2h.pl +++ b/linux_various/glsl2h.pl @@ -4,8 +4,12 @@ use strict; use warnings; use File::Spec; use File::Basename; +use File::Copy; use Cwd 'abs_path'; +use Digest::file qw/digest_file_hex/; +use Digest::MD5 qw(md5_hex); + my @res = qw/convert interlace merge shadeboost tfx/; my $path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "GSdx", "res"); @@ -14,6 +18,9 @@ foreach my $r (@res) { } glsl2h($path, "fxaa", "fx"); +my $zz_path = File::Spec->catdir(dirname(abs_path($0)), "..", "plugins", "zzogl-pg", "opengl"); +glsl2h($zz_path, "ps2hw_gl4", "glsl"); + sub glsl2h { my $path = shift; my $glsl = shift; @@ -22,13 +29,16 @@ sub glsl2h { my $in = File::Spec->catfile($path, "${glsl}.$ext"); my $out = File::Spec->catfile($path, "${glsl}.h"); open(my $GLSL, "<$in") or die; - open(my $H, ">$out") or die; + + my $data = ""; + + my $include = ""; + if ($in =~ /GSdx/) { + $include = "#include \"stdafx.h\"" + } my $header = <)) { chomp $line; + $line =~ s/\\/\\\\/g; $line =~ s/"/\\"/g; - print $H "\t\"$line\\n\"\n"; + $data .= "\t\"$line\\n\"\n"; + } + $data .= "\t;\n"; + + # Rewriting the file will trigger a relink (even if the content is the + # same). So we check first the content with md5 digest + my $old_md5 = digest_file_hex($out, "MD5"); + my $new_md5 = md5_hex($data); + + if ($old_md5 ne $new_md5) { + open(my $H, ">$out") or die; + print $H $data; } - print $H "\t;\n"; } diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index ea305f81b1..03bb61101e 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -1272,12 +1272,8 @@ void GSDeviceOGL::CompileShaderFromSource(const std::string& glsl_file, const st } if (GLLoader::found_GL_ARB_separate_shader_objects) { version += "#extension GL_ARB_separate_shader_objects : require\n"; - // REMOVE ME: Emulate open source driver - //if (!GLLoader::found_GL_ARB_shading_language_420pack) { - // version += "#define NO_STRUCT 1\n"; - //} } else { - if (GLLoader::found_only_gl30) + if (!GLLoader::fglrx_buggy_driver) version += "#define DISABLE_SSO\n"; } if (GLLoader::found_only_gl30) { diff --git a/plugins/GSdx/GSWndEGL.cpp b/plugins/GSdx/GSWndEGL.cpp index 0bb97bd05d..d2797e8625 100644 --- a/plugins/GSdx/GSWndEGL.cpp +++ b/plugins/GSdx/GSWndEGL.cpp @@ -35,17 +35,15 @@ void GSWndEGL::CreateContext(int major, int minor) EGLint numConfigs; EGLint contextAttribs[] = { - // Not yet supported by Radeon/Gallium -#if 0 EGL_CONTEXT_MAJOR_VERSION_KHR, major, EGL_CONTEXT_MINOR_VERSION_KHR, minor, // Keep compatibility for old cruft //EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT_KHR, // FIXME : Request a debug context to ease opengl development EGL_CONTEXT_FLAGS_KHR, EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR | EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR, -#endif EGL_NONE }; + EGLint NullContextAttribs[] = { EGL_NONE }; EGLint attrList[] = { EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, @@ -70,11 +68,18 @@ void GSWndEGL::CreateContext(int major, int minor) throw GSDXRecoverableError(); } - m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, contextAttribs ); + m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, contextAttribs); + EGLint status = eglGetError(); + if (status == EGL_BAD_ATTRIBUTE) { + // Radeon/Gallium don't support advance attribute. Fallback to random value + fprintf(stderr, "EGL: warning your driver doesn't suport advance openGL context attributes\n"); + m_eglContext = eglCreateContext(m_eglDisplay, eglConfig, EGL_NO_CONTEXT, NullContextAttribs); + status = eglGetError(); + } if ( m_eglContext == EGL_NO_CONTEXT ) { fprintf(stderr,"EGL: Failed to create the context\n"); - fprintf(stderr,"EGL STATUS: %x\n", eglGetError()); + fprintf(stderr,"EGL STATUS: %x\n", status); throw GSDXRecoverableError(); } diff --git a/plugins/GSdx/res/convert.h b/plugins/GSdx/res/convert.h index 25ffec27cd..766f33dc5f 100644 --- a/plugins/GSdx/res/convert.h +++ b/plugins/GSdx/res/convert.h @@ -1,7 +1,4 @@ /* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * * This file was generated by glsl2h.pl script * * This Program is free software; you can redistribute it and/or modify diff --git a/plugins/GSdx/res/fxaa.h b/plugins/GSdx/res/fxaa.h index d9cb8bbfbe..6d279d4a25 100644 --- a/plugins/GSdx/res/fxaa.h +++ b/plugins/GSdx/res/fxaa.h @@ -1,7 +1,4 @@ /* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * * This file was generated by glsl2h.pl script * * This Program is free software; you can redistribute it and/or modify diff --git a/plugins/GSdx/res/interlace.h b/plugins/GSdx/res/interlace.h index 375676f5b1..0f6f4a1793 100644 --- a/plugins/GSdx/res/interlace.h +++ b/plugins/GSdx/res/interlace.h @@ -1,7 +1,4 @@ /* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * * This file was generated by glsl2h.pl script * * This Program is free software; you can redistribute it and/or modify diff --git a/plugins/GSdx/res/merge.h b/plugins/GSdx/res/merge.h index c593c64f91..6d7836d82a 100644 --- a/plugins/GSdx/res/merge.h +++ b/plugins/GSdx/res/merge.h @@ -1,7 +1,4 @@ /* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * * This file was generated by glsl2h.pl script * * This Program is free software; you can redistribute it and/or modify diff --git a/plugins/GSdx/res/shadeboost.h b/plugins/GSdx/res/shadeboost.h index 09c8ed7939..d1e9117ffa 100644 --- a/plugins/GSdx/res/shadeboost.h +++ b/plugins/GSdx/res/shadeboost.h @@ -1,7 +1,4 @@ /* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * * This file was generated by glsl2h.pl script * * This Program is free software; you can redistribute it and/or modify diff --git a/plugins/GSdx/res/tfx.h b/plugins/GSdx/res/tfx.h index 1ccec99edd..838a6451aa 100644 --- a/plugins/GSdx/res/tfx.h +++ b/plugins/GSdx/res/tfx.h @@ -1,7 +1,4 @@ /* - * Copyright (C) 2011-2013 Gregory hainaut - * Copyright (C) 2007-2009 Gabest - * * This file was generated by glsl2h.pl script * * This Program is free software; you can redistribute it and/or modify diff --git a/plugins/zzogl-pg/opengl/CMakeLists.txt b/plugins/zzogl-pg/opengl/CMakeLists.txt index 358a70b15b..2576b78173 100644 --- a/plugins/zzogl-pg/opengl/CMakeLists.txt +++ b/plugins/zzogl-pg/opengl/CMakeLists.txt @@ -110,6 +110,7 @@ set(zzoglHeaders GS.h GSDump.h HostMemory.h + ps2hw_gl4.h Mem.h Mem_Swizzle.h Mem_Transmit.h @@ -153,6 +154,9 @@ set(zzoglLinuxHeaders include_directories(. Linux) +# Generate Glsl header file +add_custom_command(OUTPUT ps2hw_gl4.h COMMAND perl ${PROJECT_SOURCE_DIR}/linux_various/glsl2h.pl) + # add library add_library(${Output} SHARED ${zzoglSources} @@ -167,32 +171,24 @@ set_target_properties(${Output} PROPERTIES COMPILE_DEFINITIONS USE_GSOPEN2) # link target with project internal libraries target_link_libraries(${Output} Utilities) -# link target with Cg -if(NOT GLSL_API) - target_link_libraries(${Output} ${CG_LIBRARIES}) -endif(NOT GLSL_API) - -# link target with glew -target_link_libraries(${Output} ${GLEW_LIBRARY}) - -# link target with opengl +# link target with the various opengl flavor target_link_libraries(${Output} ${OPENGL_LIBRARIES}) if(EGL_API AND EGL_FOUND AND EGL_GL_CONTEXT_SUPPORT) target_link_libraries(${Output} ${EGL_LIBRARIES}) endif() +if(NOT GLSL_API) + target_link_libraries(${Output} ${CG_LIBRARIES}) +endif(NOT GLSL_API) + +target_link_libraries(${Output} ${GLEW_LIBRARY}) -# link target with X11 target_link_libraries(${Output} ${X11_LIBRARIES}) - -# link target with jpeg target_link_libraries(${Output} ${JPEG_LIBRARIES}) if(Linux) - # link target with gtk2 target_link_libraries(${Output} ${GTK2_LIBRARIES}) endif(Linux) -# link target with zlib target_link_libraries(${Output} ${ZLIB_LIBRARIES}) # User flags options @@ -202,25 +198,17 @@ endif(NOT USER_CMAKE_LD_FLAGS STREQUAL "") if(PACKAGE_MODE) install(TARGETS ${Output} DESTINATION ${PLUGIN_DIR}) - if(GLSL_API) - install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.glsl DESTINATION ${GLSL_SHADER_DIR}) - install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw_gl4.glsl DESTINATION ${GLSL_SHADER_DIR}) - else(GLSL_API) - if(NOT REBUILD_SHADER) - install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${PLUGIN_DIR}) - endif(NOT REBUILD_SHADER) - endif(GLSL_API) -else(PACKAGE_MODE) +else() install(TARGETS ${Output} DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins) - if(GLSL_API) - install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.glsl DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins) - install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw_gl4.glsl DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins) - else(GLSL_API) - if(NOT REBUILD_SHADER) - install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins) - endif(NOT REBUILD_SHADER) - endif(GLSL_API) -endif(PACKAGE_MODE) +endif() + +if(NOT GLSL_API AND NOT REBUILD_SHADER) + if(PACKAGE_MODE) + install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${PLUGIN_DIR}) + else() + install(FILES ${PROJECT_SOURCE_DIR}/plugins/zzogl-pg/opengl/ps2hw.dat DESTINATION ${CMAKE_SOURCE_DIR}/bin/plugins) + endif() +endif() ################################### Replay Loader if(BUILD_REPLAY_LOADERS) diff --git a/plugins/zzogl-pg/opengl/GSUniformBufferOGL.h b/plugins/zzogl-pg/opengl/GSUniformBufferOGL.h index f20de010c9..5011da33a7 100644 --- a/plugins/zzogl-pg/opengl/GSUniformBufferOGL.h +++ b/plugins/zzogl-pg/opengl/GSUniformBufferOGL.h @@ -58,10 +58,14 @@ public: void upload(const void* src) { - uint32 flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT; - uint8* dst = (uint8*) glMapBufferRange(target, 0, size, flags); - memcpy(dst, src, size); - glUnmapBuffer(target); + // uint32 flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT; + // uint8* dst = (uint8*) glMapBufferRange(target, 0, size, flags); + // memcpy(dst, src, size); + // glUnmapBuffer(target); + // glMapBufferRange allow to set various parameter but the call is + // synchronous whereas glBufferSubData could be asynchronous. + // TODO: investigate the extension ARB_invalidate_subdata + glBufferSubData(target, 0, size, src); } ~GSUniformBufferOGL() { diff --git a/plugins/zzogl-pg/opengl/GSVertexArrayOGL.h b/plugins/zzogl-pg/opengl/GSVertexArrayOGL.h index b52c6b15e6..e65a52c498 100644 --- a/plugins/zzogl-pg/opengl/GSVertexArrayOGL.h +++ b/plugins/zzogl-pg/opengl/GSVertexArrayOGL.h @@ -34,13 +34,12 @@ struct GSInputLayoutOGL { }; class GSBufferOGL { - size_t m_stride; + const size_t m_stride; size_t m_start; size_t m_count; size_t m_limit; - GLenum m_target; + const GLenum m_target; GLuint m_buffer; - size_t m_default_size; public: GSBufferOGL(GLenum target, size_t stride) : @@ -52,12 +51,12 @@ class GSBufferOGL { { glGenBuffers(1, &m_buffer); // Opengl works best with 1-4MB buffer. - m_default_size = 2 * 1024 * 1024 / m_stride; + m_limit = 2 * 1024 * 1024 / m_stride; } ~GSBufferOGL() { glDeleteBuffers(1, &m_buffer); } - void allocate() { allocate(m_default_size); } + void allocate() { allocate(m_limit); } void allocate(size_t new_limit) { @@ -73,6 +72,7 @@ class GSBufferOGL { void upload(const void* src, uint32 count) { +#if 0 // Upload the data to the buffer void* dst; if (Map(&dst, count)) { @@ -81,15 +81,25 @@ class GSBufferOGL { memcpy(dst, src, m_stride*m_count); Unmap(); } +#else + m_count = count; + + // Current GPU buffer is really too small need to allocate a new one + if (m_count > m_limit) { + allocate(std::max(m_count * 3 / 2, m_limit)); + + } else if (m_count > (m_limit - m_start) ) { + // Not enough left free room. Just go back at the beginning + m_start = 0; + // Orphan the buffer to avoid synchronization + allocate(m_limit); + } + + glBufferSubData(m_target, m_stride * m_start, m_stride * m_count, src); +#endif } bool Map(void** pointer, uint32 count ) { -#ifdef OGL_DEBUG - GLint b_size = -1; - glGetBufferParameteriv(m_target, GL_BUFFER_SIZE, &b_size); - - if (b_size <= 0) return false; -#endif m_count = count; @@ -99,7 +109,7 @@ class GSBufferOGL { // Current GPU buffer is really too small need to allocate a new one if (m_count > m_limit) { - allocate(std::max(m_count * 3 / 2, m_default_size)); + allocate(std::max(m_count * 3 / 2, m_limit)); } else if (m_count > (m_limit - m_start) ) { // Not enough left free room. Just go back at the beginning @@ -116,13 +126,7 @@ class GSBufferOGL { // Upload the data to the buffer *pointer = (uint8*) glMapBufferRange(m_target, m_stride*m_start, m_stride*m_count, map_flags); - //fprintf(stderr, "Map %x from %d to %d\n", *pointer, m_start, m_start+m_count); -#ifdef OGL_DEBUG - if (*pointer == NULL) { - fprintf(stderr, "CRITICAL ERROR map failed for vb!!!\n"); - return false; - } -#endif + return true; } diff --git a/plugins/zzogl-pg/opengl/ZZoglCreate.cpp b/plugins/zzogl-pg/opengl/ZZoglCreate.cpp index 3ed0018923..ae9c6aa50d 100644 --- a/plugins/zzogl-pg/opengl/ZZoglCreate.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglCreate.cpp @@ -457,10 +457,7 @@ bool ZZCreate(int _width, int _height) GPU_TEXWIDTH = min (g_MaxTexWidth/8, 1024); g_fiGPU_TEXWIDTH = 1.0f / GPU_TEXWIDTH; - // FIXME: not clean maybe re integrate the function in shader files --greg -#if defined(GLSL_API) || defined(GLSL4_API) - if (!ZZshCreateOpenShadersFile()) return false; -#else +#if !(defined(GLSL_API) || defined(GLSL4_API)) if (!CreateOpenShadersFile()) return false; #endif diff --git a/plugins/zzogl-pg/opengl/ZZoglShadersGLSL4.cpp b/plugins/zzogl-pg/opengl/ZZoglShadersGLSL4.cpp index 8694165ef7..e128035f47 100644 --- a/plugins/zzogl-pg/opengl/ZZoglShadersGLSL4.cpp +++ b/plugins/zzogl-pg/opengl/ZZoglShadersGLSL4.cpp @@ -50,7 +50,7 @@ #include "zpipe.h" #include #include // this for open(). Maybe linux-specific -#include // and this for mmap +#include "ps2hw_gl4.h" // ----------------- Defines @@ -88,9 +88,6 @@ u8* s_lpShaderResources = NULL; ZZshShaderLink pvs[16] = {sZero}, g_vsprog = sZero, g_psprog = sZero; // 2 -- ZZ ZZshParameter g_vparamPosXY[2] = {pZero}, g_fparamFogColor = pZero; -char* ZZshSource; // Shader's source data. -off_t ZZshSourceSize; - bool g_bCRTCBilinear = true; float4 g_vdepth, vlogz; @@ -175,34 +172,10 @@ bool ZZshStartUsingShaders() { // open shader file according to build target bool ZZshCreateOpenShadersFile() { - std::string ShaderFileName("plugins/ps2hw_gl4.glsl"); - int ShaderFD = open(ShaderFileName.c_str(), O_RDONLY); - struct stat sb; - if ((ShaderFD == -1) || (fstat(ShaderFD, &sb) == -1)) { - // Each linux distributions have his rules for path so we give them the possibility to - // change it with compilation flags. -- Gregory -#ifdef GLSL_SHADER_DIR_COMPILATION -#define xGLSL_SHADER_DIR_str(s) GLSL_SHADER_DIR_str(s) -#define GLSL_SHADER_DIR_str(s) #s - ShaderFileName = string(xGLSL_SHADER_DIR_str(GLSL_SHADER_DIR_COMPILATION)) + "/ps2hw_gl4.glsl"; - ShaderFD = open(ShaderFileName.c_str(), O_RDONLY); -#endif - if ((ShaderFD == -1) || (fstat(ShaderFD, &sb) == -1)) { - ZZLog::Error_Log("No source for %s: \n", ShaderFileName.c_str()); - return false; - } - } - - ZZshSourceSize = sb.st_size; - ZZshSource = (char*)mmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, ShaderFD, 0); // This function directly maped file into memory. - ZZshSource[ ZZshSourceSize - 1] = 0; // Made source null-terminated. - - close(ShaderFD); return true; } void ZZshExitCleaning() { - munmap(ZZshSource, ZZshSourceSize); delete constant_buffer; delete common_buffer; delete vertex_buffer; @@ -299,10 +272,25 @@ static bool ValidateProgram(ZZshProgram Prog) { char* InfoLog = new char[infologlength]; glGetProgramInfoLog(Prog, infologlength, &lenght, InfoLog); ZZLog::Error_Log("Validation %d... %d:\t %s", Prog, infologlength, InfoLog); + delete[] InfoLog; } return (isValid != 0); } +static void ValidatePipeline(GLuint pipeline) { + glValidateProgramPipeline(pipeline); + GLint isValid; + glGetProgramPipelineiv(pipeline, GL_VALIDATE_STATUS, &isValid); + if (!isValid) { + int lenght, infologlength; + glGetProgramPipelineiv(pipeline, GL_INFO_LOG_LENGTH, &infologlength); + char* InfoLog = new char[infologlength]; + glGetProgramPipelineInfoLog(pipeline, infologlength, &lenght, InfoLog); + ZZLog::Error_Log("Validation %d... %d:\t %s", pipeline, infologlength, InfoLog); + delete[] InfoLog; + } +} + inline bool CompileShaderFromFile(ZZshProgram& program, const std::string& DefineString, std::string main_entry, GLenum ShaderType) { std::string header(""); @@ -315,10 +303,18 @@ inline bool CompileShaderFromFile(ZZshProgram& program, const std::string& Defin const GLchar* ShaderSource[2]; +#if 0 + // It sucks because it doesn't report the good line for error/warnings! + // But at least this stupid AMD drivers doesn't crash... + ShaderSource[0] = header.append(ps2hw_gl4_glsl).c_str(); + program = glCreateShaderProgramv(ShaderType, 1, &ShaderSource[0]); + +#else ShaderSource[0] = header.c_str(); - ShaderSource[1] = (const GLchar*)ZZshSource; + ShaderSource[1] = ps2hw_gl4_glsl; program = glCreateShaderProgramv(ShaderType, 2, &ShaderSource[0]); +#endif ZZLog::Debug_Log("Creating program %d for %s", program, main_entry.c_str()); @@ -343,10 +339,7 @@ void ZZshSetupShader() { // shaders given the current GL state" // It might be a good idea to validate the pipeline also in release mode??? #if defined(DEVBUILD) || defined(_DEBUG) - glValidateProgramPipeline(s_pipeline); - GLint isValid; - glGetProgramPipelineiv(s_pipeline, GL_VALIDATE_STATUS, &isValid); - if (!isValid) ZZLog::Error_Log("Something weird happened on pipeline validation."); + ValidatePipeline(s_pipeline); #endif PutParametersInProgram(vs, ps); diff --git a/plugins/zzogl-pg/opengl/ps2hw_gl4.glsl b/plugins/zzogl-pg/opengl/ps2hw_gl4.glsl index 8227dae51e..95796ebd5e 100644 --- a/plugins/zzogl-pg/opengl/ps2hw_gl4.glsl +++ b/plugins/zzogl-pg/opengl/ps2hw_gl4.glsl @@ -58,8 +58,8 @@ struct vertex { vec4 color; - TEX_DECL tex; vec4 Z; + TEX_DECL tex; float fog; }; diff --git a/plugins/zzogl-pg/opengl/ps2hw_gl4.h b/plugins/zzogl-pg/opengl/ps2hw_gl4.h new file mode 100644 index 0000000000..56028376e5 --- /dev/null +++ b/plugins/zzogl-pg/opengl/ps2hw_gl4.h @@ -0,0 +1,941 @@ +/* + * This file was generated by glsl2h.pl script + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + + + +static const char* ps2hw_gl4_glsl = + "//#version 420 Keep it for text editor detection\n" + "\n" + "// ZZ Open GL graphics plugin\n" + "// Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com, gregory.hainaut@gmail.com\n" + "// Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008\n" + "//\n" + "// This program is free software; you can redistribute it and/or modify\n" + "// it under the terms of the GNU General Public License as published by\n" + "// the Free Software Foundation; either version 2 of the License, or\n" + "// (at your option) any later version.\n" + "//\n" + "// This program is distributed in the hope that it will be useful,\n" + "// but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + "// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" + "// GNU General Public License for more details.\n" + "//\n" + "// You should have received a copy of the GNU General Public License\n" + "// along with this program; if not, write to the Free Software\n" + "// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\n" + "\n" + "// divides by z for every pixel, instead of in vertex shader\n" + "// fixes kh textures\n" + "\n" + "#extension GL_ARB_texture_rectangle: require\n" + "#extension GL_ARB_shading_language_420pack: require\n" + "#extension GL_ARB_separate_shader_objects : require\n" + "\n" + "\n" + "//#define TEST_AEM // tests AEM for black pixels\n" + "//#define REGION_REPEAT // set if texture wrapping mode is region repeat\n" + "//#define WRITE_DEPTH // set if depth is also written in a MRT\n" + "//#define ACCURATE_DECOMPRESSION // set for less capable hardware ATI Radeon 9000 series\n" + "//#define EXACT_COLOR // make sure the output color is clamped to 1/255 boundaries (for alpha testing)\n" + "//#define NO_LOGZ // disable logz\n" + "\n" + "#define PERSPECTIVE_CORRECT_TEX\n" + "\n" + "#ifdef PERSPECTIVE_CORRECT_TEX\n" + "#define TEX_XY tex.xy/tex.z\n" + "#define TEX_DECL vec3\n" + "#else\n" + "#define TEX_XY tex.xy\n" + "#define TEX_DECL vec2\n" + "#endif\n" + "\n" + "// NVidia CG-data types\n" + "#define half2 vec2\n" + "#define half3 vec3\n" + "#define half4 vec4\n" + "#define float2 vec2\n" + "#define float3 vec3\n" + "#define float4 vec4\n" + "\n" + "////////////////////////////////////////////////////////////////////\n" + "// INPUT/OUTPUT\n" + "////////////////////////////////////////////////////////////////////\n" + "// NOTE: Future optimization tex.w is normally useless (in cg it is a float3) so it can contains the fog value\n" + "struct vertex\n" + "{\n" + " vec4 color;\n" + " vec4 Z;\n" + " TEX_DECL tex;\n" + " float fog;\n" + "};\n" + "\n" + "#ifdef VERTEX_SHADER\n" + "out gl_PerVertex {\n" + " invariant vec4 gl_Position;\n" + " float gl_PointSize;\n" + " float gl_ClipDistance[];\n" + "};\n" + "\n" + "layout(location = 0) in ivec4 Vert;\n" + "layout(location = 1) in vec4 Color;\n" + "layout(location = 2) in vec4 SecondaryColor;\n" + "layout(location = 3) in vec3 TexCoord;\n" + "\n" + "layout(location = 0) out vertex VSout;\n" + "\n" + "#endif\n" + "\n" + "#ifdef FRAGMENT_SHADER\n" + "\n" + "layout(location = 0) in vertex PSin;\n" + "\n" + "// FIXME: host only do glDrawBuffers of 1 buffers not 2. I think this is a major bug\n" + "layout(location = 0) out vec4 FragData0;\n" + "layout(location = 1) out vec4 FragData1;\n" + "\n" + "#endif\n" + "\n" + "////////////////////////////////////////////////////////////////////\n" + "// Texture SAMPLER\n" + "////////////////////////////////////////////////////////////////////\n" + "// // main ps2 memory, each pixel is stored in 32bit color\n" + "// uniform sampler2DRect g_sMemory[2];\n" + "//\n" + "// // used to get the tiled offset into a page given the linear offset\n" + "// uniform sampler2DRect g_sSrcFinal;\n" + "// uniform sampler2D g_sBlocks;\n" + "// uniform sampler2D g_sBilinearBlocks;\n" + "// uniform sampler2D g_sConv16to32;\n" + "// uniform sampler3D g_sConv32to16;\n" + "// uniform sampler2DRect g_sBitwiseANDX;\n" + "// uniform sampler2DRect g_sBitwiseANDY;\n" + "// uniform sampler2DRect g_sInterlace;\n" + "//\n" + "// // used only on rare cases where the render target is PSMT8H\n" + "// uniform sampler2D g_sCLUT;\n" + "// main ps2 memory, each pixel is stored in 32bit color\n" + "layout(binding = 10) uniform sampler2DRect g_sMemory; // dual context\n" + "\n" + "// used to get the tiled offset into a page given the linear offset\n" + "layout(binding = 1) uniform sampler2DRect g_sSrcFinal;\n" + "layout(binding = 2) uniform sampler2D g_sBlocks;\n" + "layout(binding = 3) uniform sampler2D g_sBilinearBlocks;\n" + "layout(binding = 4) uniform sampler2D g_sConv16to32;\n" + "layout(binding = 5) uniform sampler3D g_sConv32to16;\n" + "layout(binding = 6) uniform sampler2DRect g_sBitwiseANDX;\n" + "layout(binding = 7) uniform sampler2DRect g_sBitwiseANDY;\n" + "layout(binding = 8) uniform sampler2DRect g_sInterlace;\n" + "\n" + "// used only on rare cases where the render target is PSMT8H\n" + "layout(binding = 9) uniform sampler2D g_sCLUT;\n" + "\n" + "////////////////////////////////////////////////////////////////////\n" + "// UNIFORM BUFFER\n" + "////////////////////////////////////////////////////////////////////\n" + "layout(std140, binding = 0) uniform constant_buffer\n" + "{\n" + " // Both shader\n" + " // .z is used for the addressing fn\n" + " // FIXME: not same value between c and shader...\n" + " // float4 g_fBilinear = float4(-0.7f, -0.65f, 0.9,1/32767.0f);\n" + " float4 g_fBilinear;\n" + " float4 g_fZBias;\n" + " float4 g_fc0;\n" + " float4 g_fMult;\n" + " // Vertex\n" + " float4 g_fZ; // transforms d3dcolor z into float z\n" + " // Pixel\n" + " half4 g_fExactColor;\n" + "};\n" + "layout(std140, binding = 1) uniform common_buffer\n" + "{\n" + " float4 g_fPosXY;\n" + " float4 g_fFogColor;\n" + "};\n" + "layout(std140, binding = 2) uniform vertex_buffer\n" + "{\n" + " float4 g_fBitBltPos;\n" + " float4 g_fBitBltTex;\n" + " float4 g_fBitBltTrans;\n" + "};\n" + "layout(std140, binding = 3) uniform fragment_buffer\n" + "{\n" + " half4 fTexAlpha2;\n" + "\n" + " float4 g_fTexOffset; // converts the page and block offsets into the mem addr/1024\n" + " float4 g_fTexDims; // mult by tex dims when accessing the block texture\n" + " float4 g_fTexBlock;\n" + "\n" + " float4 g_fClampExts; // if clamping the texture, use (minu, minv, maxu, maxv)\n" + " float4 TexWrapMode; // 0 - repeat/clamp, 1 - region rep (use fRegRepMask)\n" + "\n" + " float4 g_fRealTexDims; // tex dims used for linear filtering (w,h,1/w,1/h)\n" + "\n" + " // (alpha0, alpha1, 1 if highlight2 and tcc is rgba, 1-y)\n" + " half4 g_fTestBlack; // used for aem bit\n" + "\n" + " float4 g_fPageOffset;\n" + "\n" + " half4 fTexAlpha;\n" + "\n" + " float4 g_fInvTexDims; // similar to g_fClutOff\n" + "\n" + " // used for rectblitting\n" + " float4 g_fBitBltZ;\n" + "\n" + " half4 g_fOneColor; // col*.xxxy+.zzzw\n" + "};\n" + "\n" + "\n" + "// given a local tex coord, returns the coord in the memory\n" + "float2 ps2memcoord(float2 realtex)\n" + "{\n" + " float4 off;\n" + "\n" + " // block off\n" + " realtex.xy = realtex.xy * g_fTexDims.xy + g_fTexDims.zw;\n" + " realtex.xy = (realtex.xy - fract(realtex.xy)) * g_fMult.zw;\n" + " float2 fblock = fract(realtex.xy);\n" + " off.xy = realtex.xy-fblock.xy;\n" + "\n" + "#ifdef ACCURATE_DECOMPRESSION\n" + " off.z = texture(g_sBlocks, g_fTexBlock.xy*fblock + g_fTexBlock.zw).r;\n" + " off.x = dot(off.xy, g_fTexOffset.xy);\n" + " float r = g_fTexOffset.w;\n" + " float f = fract(off.x);\n" + " float fadd = g_fTexOffset.z * off.z;\n" + " off.w = off.x + fadd + r;\n" + " off.x = fract(f + fadd + r);\n" + " off.w -= off.x ;\n" + "#else\n" + " off.z = texture(g_sBlocks, g_fTexBlock.xy*fblock + g_fTexBlock.zw).r;\n" + "\n" + " // combine the two\n" + " off.x = dot(off.xyz, g_fTexOffset.xyz)+g_fTexOffset.w;\n" + " off.x = modf(off.x, off.w);\n" + "#endif\n" + "\n" + " off.xy = off.xw * g_fPageOffset.zy + g_fPageOffset.wx;\n" + " //off.y = off.w * g_fPageOffset.y + g_fPageOffset.x;\n" + " return off.xy;\n" + "}\n" + "\n" + "// find all texcoords for bilinear filtering\n" + "// assume that orgtex are already on boundaries\n" + "void ps2memcoord4(float4 orgtex, out float4 off0, out float4 off1)\n" + "{\n" + " //float4 off0, off1, off2, off3;\n" + " float4 realtex;\n" + "\n" + " // block off\n" + " realtex = (orgtex * g_fTexDims.xyxy + g_fTexDims.zwzw);// * g_fMult.zwzw;\n" + " float4 fblock = fract(realtex.xyzw);\n" + " float4 ftransblock = g_fTexBlock.xyxy*fblock + g_fTexBlock.zwzw;\n" + " realtex -= fblock;\n" + "\n" + " float4 transvals = g_fTexOffset.x * realtex.xzxz + g_fTexOffset.y * realtex.yyww + g_fTexOffset.w;\n" + "\n" + " float4 colors;// = texture(g_sBilinearBlocks, ftransblock.xy);\n" + "\n" + " // this is faster on ffx ingame\n" + " colors.x = texture(g_sBlocks, ftransblock.xy).r;\n" + " colors.y = texture(g_sBlocks, ftransblock.zy).r;\n" + " colors.z = texture(g_sBlocks, ftransblock.xw).r;\n" + " colors.w = texture(g_sBlocks, ftransblock.zw).r;\n" + "\n" + " float4 fr, rem;\n" + "\n" + "#ifdef ACCURATE_DECOMPRESSION\n" + " fr = fract(transvals);\n" + " float4 fadd = colors * g_fTexOffset.z;\n" + " rem = transvals + fadd;\n" + " fr = fract(fr + fadd);\n" + " rem -= fr;\n" + "#else\n" + " transvals += colors * g_fTexOffset.z;\n" + "\n" + " fr = modf(transvals, rem);\n" + "#endif\n" + "\n" + " rem = rem * g_fPageOffset.y + g_fPageOffset.x;\n" + " fr = fr * g_fPageOffset.z + g_fPageOffset.w;\n" + "\n" + " // combine\n" + " off0 = g_fc0.yxyx * fr.xxyy + g_fc0.xyxy * rem.xxyy;\n" + " off1 = g_fc0.yxyx * fr.zzww + g_fc0.xyxy * rem.zzww;\n" + "}\n" + "\n" + "void ps2memcoord4_fast(float4 orgtex, out float4 off0, out float4 off1)\n" + "{\n" + " float4 realtex;\n" + "\n" + " realtex = (orgtex * g_fTexDims.xyxy + g_fTexDims.zwzw);// * g_fMult.zwzw;\n" + " float4 fblock = fract(realtex.xyzw);\n" + " float2 ftransblock = g_fTexBlock.xy*fblock.xy + g_fTexBlock.zw;\n" + " realtex -= fblock;\n" + "\n" + " float4 transvals = g_fTexOffset.x * realtex.xzxz + g_fTexOffset.y * realtex.yyww + g_fTexOffset.w;\n" + "\n" + " float4 colors = texture(g_sBilinearBlocks, ftransblock.xy);\n" + " float4 fr, rem;\n" + "\n" + "#ifdef ACCURATE_DECOMPRESSION\n" + " fr = fract(transvals);\n" + " float4 fadd = colors * g_fTexOffset.z;\n" + " rem = transvals + fadd;\n" + " fr = fract(fr + fadd);\n" + " rem -= fr;\n" + "#else\n" + " transvals += colors * g_fTexOffset.z;\n" + "\n" + " fr = modf(transvals, rem);\n" + "#endif\n" + "\n" + " rem = rem * g_fPageOffset.y + g_fPageOffset.x;\n" + " fr = fr * g_fPageOffset.z;\n" + "\n" + " off0 = g_fc0.yxyx * fr.xxyy + g_fc0.xyxy * rem.xxyy;\n" + " off1 = g_fc0.yxyx * fr.zzww + g_fc0.xyxy * rem.zzww;\n" + "}\n" + "\n" + "// Wrapping modes\n" + "#if defined(REPEAT)\n" + "\n" + "float2 ps2addr(float2 coord)\n" + "{\n" + " return fract(coord.xy);\n" + "}\n" + "\n" + "#elif defined(CLAMP)\n" + "\n" + "float2 ps2addr(float2 coord)\n" + "{\n" + " return clamp(coord.xy, g_fClampExts.xy, g_fClampExts.zw);\n" + "}\n" + "\n" + "#elif defined(REGION_REPEAT)\n" + "\n" + "// computes the local tex coord along with addressing modes\n" + "float2 ps2addr(float2 coord)\n" + "{\n" + " float2 final = fract(clamp(coord.xy, g_fClampExts.xy, g_fClampExts.zw));\n" + "\n" + " if( TexWrapMode.x > g_fBilinear.z ) // region repeat mode for x (umsk&x)|ufix\n" + " final.x = texture(g_sBitwiseANDX, abs(coord.x)*TexWrapMode.zx).x * g_fClampExts.x + g_fClampExts.z;\n" + " if( TexWrapMode.y > g_fBilinear.z ) // region repeat mode for x (vmsk&x)|vfix\n" + " final.y = texture(g_sBitwiseANDY, abs(coord.y)*TexWrapMode.wy).x * g_fClampExts.y + g_fClampExts.w;\n" + "\n" + " return final;\n" + "}\n" + "\n" + "#else\n" + "\n" + "float2 ps2addr(float2 coord)\n" + "{\n" + " return fract(clamp(coord.xy, g_fClampExts.xy, g_fClampExts.zw));\n" + "}\n" + "\n" + "#endif\n" + "\n" + "half4 tex2DPS_32(float2 tex0)\n" + "{\n" + " return texture(g_sMemory, ps2memcoord(tex0));\n" + "}\n" + "\n" + "// use when texture is not tiled -- shader 1\n" + "half4 tex2DPS_tex32(float2 tex0)\n" + "{\n" + " return texture(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw)*g_fZBias.zzzw+g_fPageOffset.w;\n" + "}\n" + "\n" + "// use when texture is not tiled -- shader 2\n" + "half4 tex2DPS_clut32(float2 tex0)\n" + "{\n" + " float index = texture(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw).a+g_fPageOffset.w;\n" + " return texture(g_sCLUT, index*g_fExactColor.xz+g_fExactColor.yz);\n" + "}\n" + "\n" + "// Shader 3\n" + "// use when texture is not tiled and converting from 32bit to 16bit\n" + "// don't convert on the block level, only on the column level\n" + "// so every other 8 pixels, use the upper bits instead of lower\n" + "half4 tex2DPS_tex32to16(float2 tex0)\n" + "{\n" + " bool upper = false;\n" + " tex0.y += g_fPageOffset.z;\n" + " float2 ffrac = mod(tex0, g_fTexOffset.xy);\n" + " tex0.xy = g_fc0.ww * (tex0.xy + ffrac);\n" + " if( ffrac.x > g_fTexOffset.z ) {\n" + " tex0.x -= g_fTexOffset.z;\n" + " upper = true;\n" + " }\n" + " if( ffrac.y >= g_fTexOffset.w ) {\n" + " tex0.y -= g_fTexOffset.w;\n" + " tex0.x += g_fc0.w;\n" + " }\n" + "\n" + " half4 color = texture(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw)*g_fZBias.zzzw+g_fPageOffset.w;\n" + " float2 uv = upper ? color.xw : color.zy;\n" + " return texture(g_sConv16to32, uv+g_fPageOffset.xy);\n" + "}\n" + "\n" + "// Shader 4\n" + "// used when a 16 bit texture is used an 8h\n" + "half4 tex2DPS_tex16to8h(float2 tex0)\n" + "{\n" + " float4 final;\n" + " float2 ffrac = mod(tex0+g_fPageOffset.zw, g_fTexOffset.xy);\n" + " tex0.xy = g_fPageOffset.xy * tex0.xy - ffrac * g_fc0.yw;\n" + "\n" + " if( ffrac.x > g_fTexOffset.x*g_fc0.w )\n" + " tex0.x += g_fTexOffset.x*g_fc0.w;\n" + " if( tex0.x >= g_fc0.y ) tex0 += g_fTexOffset.zw;\n" + "\n" + " float4 upper = texture(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw);\n" + "\n" + " // only need alpha\n" + " float index = texture(g_sConv32to16, upper.zyx-g_fc0.z).y + upper.w*g_fc0.w*g_fc0.w;\n" + " return texture(g_sCLUT, index+g_fExactColor.yz);\n" + "}\n" + "\n" + "// Shader 5\n" + "// used when a 16 bit texture is used a 32bit one\n" + "half4 tex2DPS_tex16to32(float2 tex0)\n" + "{\n" + " float4 final;\n" + " float2 ffrac = mod(tex0+g_fPageOffset.zw, g_fTexOffset.xy);\n" + " //tex0.xy = g_fPageOffset.xy * tex0.xy - ffrac * g_fc0.yw;\n" + " tex0.y += g_fPageOffset.y * ffrac.y;\n" + "\n" + " if( ffrac.x > g_fTexOffset.z ) {\n" + " tex0.x -= g_fTexOffset.z;\n" + " tex0.y += g_fTexOffset.w;\n" + " }\n" + "\n" + " float fconst = g_fc0.w*g_fc0.w;\n" + " float4 lower = texture(g_sSrcFinal, g_fTexDims.xy*tex0);\n" + " float4 upper = texture(g_sMemory, g_fTexDims.xy*tex0+g_fTexDims.zw);\n" + "\n" + " final.zy = texture(g_sConv32to16, lower.zyx).xy + lower.ww*fconst;\n" + " final.xw = texture(g_sConv32to16, upper.zyx).xy + upper.ww*fconst;\n" + " return final;\n" + "}\n" + "\n" + "half4 tex2DPS_tex16to32h(float2 tex0)\n" + "{\n" + " float4 final = vec4(0.0, 0.0, 0.0, 0.0);\n" + " return final;\n" + "}\n" + "\n" + "//half4 f;\n" + "//f.w = old.y > (127.2f/255.0f) ? 1 : 0;\n" + "//old.y -= 0.5f * f.w;\n" + "//f.xyz = fract(old.yyx*half3(2.002*255.0f/256.0f, 64.025f*255.0f/256.0f, 8.002*255.0f/256.0f));\n" + "//f.y += old.x * (0.25f*255.0f/256.0f);\n" + "\n" + "////////////////////////////////\n" + "// calculates the texture color\n" + "////////////////////////////////\n" + "\n" + "#define decl_ps2shade(num) \\\n" + "decl_ps2shade_##num(_32) \\\n" + "decl_ps2shade_##num(_tex32) \\\n" + "decl_ps2shade_##num(_clut32) \\\n" + "decl_ps2shade_##num(_tex32to16) \\\n" + "decl_ps2shade_##num(_tex16to8h) \\\n" + "decl_ps2shade_##num(_tex16to32h)\n" + "\n" + "// nearest\n" + "#define decl_ps2shade_0(bit) \\\n" + "float4 ps2shade0##bit( TEX_DECL tex) \\\n" + "{ \\\n" + " return tex2DPS##bit( ps2addr(TEX_XY)); \\\n" + "}\n" + "\n" + "// do fast memcoord4 calcs when textures behave well\n" + "#ifdef REPEAT\n" + "#define PS2MEMCOORD4 ps2memcoord4\n" + "#else\n" + "#define PS2MEMCOORD4 ps2memcoord4\n" + "#endif\n" + "\n" + "\n" + "#define decl_BilinearFilter(bit, addrfn) \\\n" + "half4 BilinearFilter##bit(float2 tex0) \\\n" + "{ \\\n" + " float4 off0, off1; \\\n" + " float4 ftex; \\\n" + " float2 ffrac; \\\n" + " ftex.xy = tex0 + g_fBilinear.xy * g_fRealTexDims.zw; \\\n" + " ffrac = fract(ftex.xy*g_fRealTexDims.xy); \\\n" + " ftex.xy -= ffrac.xy * g_fRealTexDims.zw; \\\n" + " \\\n" + " ftex.zw = ps2addr(ftex.xy + g_fRealTexDims.zw); \\\n" + " ftex.xy = ps2addr(ftex.xy); \\\n" + " \\\n" + " PS2MEMCOORD4(ftex, off0, off1); \\\n" + " half4 c0 = texture(g_sMemory, off0.xy); \\\n" + " half4 c1 = texture(g_sMemory, off0.zw); \\\n" + " half4 c2 = texture(g_sMemory, off1.xy); \\\n" + " half4 c3 = texture(g_sMemory, off1.zw); \\\n" + " return mix( mix(c0, c1, vec4(ffrac.x)), mix(c2, c3, ffrac.x), vec4(ffrac.y) ); \\\n" + "}\n" + "\n" + "decl_BilinearFilter(_32, ps2addr)\n" + "decl_BilinearFilter(_tex32, ps2addr)\n" + "decl_BilinearFilter(_clut32, ps2addr)\n" + "decl_BilinearFilter(_tex32to16, ps2addr)\n" + "decl_BilinearFilter(_tex16to8h, ps2addr)\n" + "decl_BilinearFilter(_tex16to32h, ps2addr)\n" + "\n" + "//TODO! For mip maps, only apply when LOD >= 0\n" + "// lcm == 0, LOD = log(1/Q)*L + K, lcm == 1, LOD = K\n" + "\n" + "// bilinear\n" + "#define decl_ps2shade_1(bit) \\\n" + "half4 ps2shade1##bit(TEX_DECL tex) \\\n" + "{ \\\n" + " return BilinearFilter##bit(TEX_XY); \\\n" + "}\n" + "\n" + "// nearest, mip nearest\n" + "#define decl_ps2shade_2(bit) \\\n" + "half4 ps2shade2##bit(TEX_DECL tex) \\\n" + "{ \\\n" + " return tex2DPS##bit( ps2addr(TEX_XY)); \\\n" + "}\n" + "\n" + "// nearest, mip linear\n" + "#define decl_ps2shade_3(bit) \\\n" + "half4 ps2shade3##bit(TEX_DECL tex) \\\n" + "{ \\\n" + " return tex2DPS##bit(ps2addr(TEX_XY)); \\\n" + "}\n" + "\n" + "// linear, mip nearest\n" + "#define decl_ps2shade_4(bit) \\\n" + "half4 ps2shade4##bit(TEX_DECL tex) \\\n" + "{ \\\n" + " return BilinearFilter##bit(TEX_XY); \\\n" + "}\n" + "\n" + "// linear, mip linear\n" + "#define decl_ps2shade_5(bit) \\\n" + "half4 ps2shade5##bit(TEX_DECL tex) \\\n" + "{ \\\n" + " return BilinearFilter##bit(TEX_XY); \\\n" + "}\n" + "\n" + "decl_ps2shade(0)\n" + "decl_ps2shade(1)\n" + "decl_ps2shade(2)\n" + "decl_ps2shade(3)\n" + "decl_ps2shade(4)\n" + "decl_ps2shade(5)\n" + "\n" + "\n" + "half4 ps2CalcShade(half4 texcol, half4 color)\n" + "{\n" + "#ifdef TEST_AEM\n" + " if( dot(texcol.xyzw, g_fTestBlack.xyzw) <= g_fc0.z )\n" + " texcol.w = g_fc0.x;\n" + " else\n" + "#endif\n" + " texcol.w = texcol.w * fTexAlpha.y + fTexAlpha.x;\n" + "\n" + " texcol = texcol * (fTexAlpha2.zzzw * color + fTexAlpha2.xxxy) + fTexAlpha.zzzw * color.wwww;\n" + "\n" + " return texcol;\n" + "}\n" + "\n" + "// final ops on the color\n" + "#ifdef EXACT_COLOR\n" + "\n" + "half4 ps2FinalColor(half4 col)\n" + "{\n" + " // g_fOneColor has to scale by 255\n" + " half4 temp = col * g_fOneColor.xxxy + g_fOneColor.zzzw;\n" + " temp.w = floor(temp.w)*g_fExactColor.w;\n" + " return temp;\n" + "}\n" + "\n" + "#else\n" + "half4 ps2FinalColor(half4 col)\n" + "{\n" + " return col * g_fOneColor.xxxy + g_fOneColor.zzzw;\n" + "}\n" + "#endif\n" + "\n" + "\n" + "#ifdef FRAGMENT_SHADER // This is code only for FRAGMENTS (pixel shader)\n" + "\n" + "#ifdef WRITE_DEPTH\n" + "void write_depth_target()\n" + "{\n" + " FragData1 = PSin.Z; \n" + "}\n" + "#else\n" + "void write_depth_target() { }\n" + "#endif\n" + "\n" + "void RegularPS() {\n" + " // whenever outputting depth, make sure to mult by 255/256 and 1\n" + " FragData0 = ps2FinalColor(PSin.color);\n" + " write_depth_target();\n" + "}\n" + "\n" + "#define DECL_TEXPS(num, bit) \\\n" + "void Texture##num##bit##PS() \\\n" + "{ \\\n" + " FragData0 = ps2FinalColor(ps2CalcShade(ps2shade##num##bit(PSin.tex), PSin.color)); \\\n" + " write_depth_target(); \\\n" + "}\n" + "\n" + "#define DECL_TEXPS_(num) \\\n" + "DECL_TEXPS(num, _32) \\\n" + "DECL_TEXPS(num, _tex32) \\\n" + "DECL_TEXPS(num, _clut32) \\\n" + "DECL_TEXPS(num, _tex32to16) \\\n" + "DECL_TEXPS(num, _tex16to8h)\n" + "\n" + "DECL_TEXPS_(0)\n" + "DECL_TEXPS_(1)\n" + "DECL_TEXPS_(2)\n" + "DECL_TEXPS_(3)\n" + "DECL_TEXPS_(4)\n" + "DECL_TEXPS_(5)\n" + "\n" + "void RegularFogPS() {\n" + " half4 c;\n" + " c.xyz = mix(g_fFogColor.xyz, PSin.color.xyz, vec3(PSin.fog));\n" + " c.w = PSin.color.w;\n" + " FragData0 = ps2FinalColor(c);\n" + " write_depth_target();\n" + "}\n" + "\n" + "#define DECL_TEXFOGPS(num, bit) \\\n" + "void TextureFog##num##bit##PS() \\\n" + "{ \\\n" + " half4 c = ps2CalcShade(ps2shade##num##bit(PSin.tex), PSin.color); \\\n" + " c.xyz = mix(g_fFogColor.xyz, c.xyz, vec3(PSin.fog)); \\\n" + " FragData0 = ps2FinalColor(c); \\\n" + " write_depth_target(); \\\n" + "}\n" + "\n" + "#define DECL_TEXFOGPS_(num) \\\n" + "DECL_TEXFOGPS(num, _32) \\\n" + "DECL_TEXFOGPS(num, _tex32) \\\n" + "DECL_TEXFOGPS(num, _clut32) \\\n" + "DECL_TEXFOGPS(num, _tex32to16) \\\n" + "DECL_TEXFOGPS(num, _tex16to8h)\n" + "\n" + "DECL_TEXFOGPS_(0)\n" + "DECL_TEXFOGPS_(1)\n" + "DECL_TEXFOGPS_(2)\n" + "DECL_TEXFOGPS_(3)\n" + "DECL_TEXFOGPS_(4)\n" + "DECL_TEXFOGPS_(5)\n" + "\n" + "//-------------------------------------------------------\n" + "// Techniques not related to the main primitive commands\n" + "half4 BilinearBitBlt(float2 tex0)\n" + "{\n" + " float4 ftex;\n" + " float2 ffrac;\n" + "\n" + " ffrac.xy = fract(tex0*g_fRealTexDims.xy);\n" + " ftex.xy = tex0 - ffrac.xy * g_fRealTexDims.zw;\n" + " ftex.zw = ftex.xy + g_fRealTexDims.zw;\n" + "\n" + " float4 off0, off1;\n" + " ps2memcoord4_fast(ftex, off0, off1);\n" + " half4 c0 = texture(g_sMemory, off0.xy);\n" + " half4 c1 = texture(g_sMemory, off0.zw);\n" + " half4 c2 = texture(g_sMemory, off1.xy);\n" + " half4 c3 = texture(g_sMemory, off1.zw);\n" + "\n" + " return mix( mix(c0, c1, vec4(ffrac.x)), mix(c2, c3, vec4(ffrac.x)), vec4(ffrac.y) );\n" + "}\n" + "\n" + "void BitBltPS() {\n" + " FragData0 = texture(g_sMemory, ps2memcoord(PSin.tex.xy))*g_fOneColor.xxxy;\n" + "}\n" + "\n" + "// used when AA\n" + "void BitBltAAPS() {\n" + " FragData0 = BilinearBitBlt(PSin.tex.xy) * g_fOneColor.xxxy;\n" + "}\n" + "\n" + "void BitBltDepthPS() {\n" + " vec4 data;\n" + " data = texture(g_sMemory, ps2memcoord(PSin.tex.xy));\n" + " FragData0 = data + g_fZBias.y;\n" + "#ifdef NO_LOGZ\n" + " gl_FragDepth = dot(data, g_fBitBltZ);\n" + "#else\n" + " gl_FragDepth = log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w;\n" + "#endif\n" + "}\n" + "\n" + "void BitBltDepthMRTPS() {\n" + " vec4 data;\n" + " data = texture(g_sMemory, ps2memcoord(PSin.tex.xy));\n" + " FragData0 = data + g_fZBias.y;\n" + " FragData1.x = g_fc0.x;\n" + "#ifdef NO_LOGZ\n" + " gl_FragDepth = dot(data, g_fBitBltZ);\n" + "#else\n" + " gl_FragDepth = log(g_fc0.y + dot(data, g_fBitBltZ)) * g_fOneColor.w;\n" + "#endif\n" + "}\n" + "\n" + "// static const float BlurKernel[9] = {\n" + "// 0.027601,\n" + "// 0.066213,\n" + "// 0.123701,\n" + "// 0.179952,\n" + "// 0.205065,\n" + "// 0.179952,\n" + "// 0.123701,\n" + "// 0.066213,\n" + "// 0.027601\n" + "// };\n" + "\n" + "half4 BilinearFloat16(float2 tex0)\n" + "{\n" + " return texture(g_sSrcFinal, tex0.xy);\n" + "}\n" + "\n" + "void CRTCTargInterPS() {\n" + " float finter = texture(g_sInterlace, PSin.Z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;\n" + " float4 c = BilinearFloat16(PSin.tex.xy);\n" + " c.w = ( g_fc0.w*c.w * g_fOneColor.x + g_fOneColor.y ) * finter;\n" + " FragData0 = c;\n" + "}\n" + "\n" + "void CRTCTargPS() {\n" + " float4 c = BilinearFloat16(PSin.tex.xy);\n" + " // FIXME DEBUG: to validate tex coord on blit\n" + " //c = vec4(PSin.tex.x/512.0f, PSin.tex.y/512.0f, 0.0, 1.0);\n" + " c.w = g_fc0.w * c.w * g_fOneColor.x + g_fOneColor.y;\n" + " FragData0 = c;\n" + "}\n" + "\n" + "void CRTCInterPS() {\n" + " float finter = texture(g_sInterlace, PSin.Z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;\n" + " float2 filtcoord = trunc(PSin.tex.xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;\n" + " half4 c = BilinearBitBlt(filtcoord);\n" + " c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;\n" + " FragData0 = c;\n" + "}\n" + "\n" + "// simpler\n" + "void CRTCInterPS_Nearest() {\n" + " float finter = texture(g_sInterlace, PSin.Z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;\n" + " half4 c = texture(g_sMemory, ps2memcoord(PSin.tex.xy));\n" + " c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;\n" + " FragData0 = c;\n" + "}\n" + "\n" + "void CRTCPS() {\n" + " float2 filtcoord = PSin.tex.xy * g_fInvTexDims.xy+g_fInvTexDims.zw;\n" + " half4 c = BilinearBitBlt(filtcoord);\n" + " c.w = c.w * g_fOneColor.x + g_fOneColor.y;\n" + " FragData0 = c;\n" + "}\n" + "\n" + "// simpler\n" + "void CRTCPS_Nearest() {\n" + " half4 c = texture(g_sMemory, ps2memcoord(PSin.tex.xy));\n" + " c.w = c.w * g_fOneColor.x + g_fOneColor.y;\n" + " FragData0 = c;\n" + "}\n" + "\n" + "void CRTC24InterPS() {\n" + " float finter = texture(g_sInterlace, PSin.Z.yy).x * g_fOneColor.z + g_fOneColor.w + g_fc0.w;\n" + " float2 filtcoord = trunc(PSin.tex.xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;\n" + "\n" + " half4 c = texture(g_sMemory, ps2memcoord(filtcoord));\n" + " c.w = (c.w * g_fOneColor.x + g_fOneColor.y)*finter;\n" + " FragData0 = c;\n" + "}\n" + "\n" + "void CRTC24PS() {\n" + " float2 filtcoord = trunc(PSin.tex.xy) * g_fInvTexDims.xy + g_fInvTexDims.zw;\n" + " half4 c = texture(g_sMemory, ps2memcoord(filtcoord));\n" + " c.w = c.w * g_fOneColor.x + g_fOneColor.y;\n" + " FragData0 = c;\n" + "}\n" + "\n" + "void ZeroPS() {\n" + " FragData0 = g_fOneColor;\n" + "}\n" + "\n" + "void ZeroDebugPS() {\n" + " FragData0 = vec4(PSin.tex.x, PSin.tex.y, PSin.tex.z, 0.5);\n" + "}\n" + "\n" + "void ZeroDebug2PS() {\n" + " vec2 xy = ps2memcoord(fract(PSin.tex.xy/PSin.tex.z)) * vec2(1/4096.0f, 1/48.0f);\n" + " FragData0 = vec4(xy.x, xy.y, 0.0, 0.5);\n" + "}\n" + "\n" + "void ZeroDebug3PS() {\n" + " //FragData0 = vec4(PSin.position.x/2.0f + 0.5f, PSin.position.y/2.0f + 0.5f, 1.0, 0.5);\n" + "}\n" + "\n" + "void BaseTexturePS() {\n" + " FragData0 = texture(g_sSrcFinal, PSin.tex.xy) * g_fOneColor;\n" + "}\n" + "\n" + "void Convert16to32PS() {\n" + " float4 final;\n" + " float2 ffrac = mod ( PSin.tex.xy + g_fTexDims.zw, g_fTexOffset.xy);\n" + " float2 tex0 = g_fTexDims.xy * PSin.tex.xy - ffrac * g_fc0.yw;\n" + "\n" + " if (ffrac.x > g_fTexOffset.x*g_fc0.w)\n" + " tex0.x += g_fTexOffset.x*g_fc0.w;\n" + " if (tex0.x >= g_fc0.y)\n" + " tex0 += g_fTexOffset.zw;\n" + "\n" + " float4 lower = texture(g_sSrcFinal, tex0);\n" + " float4 upper = texture(g_sSrcFinal, tex0 + g_fPageOffset.xy);\n" + "\n" + " final.zy = texture(g_sConv32to16, lower.zyx).xy + lower.ww*g_fPageOffset.zw;\n" + " final.xw = texture(g_sConv32to16, upper.zyx).xy + upper.ww*g_fPageOffset.zw;\n" + "\n" + " FragData0= final;\n" + "}\n" + "\n" + "// use when texture is not tiled and converting from 32bit to 16bit\n" + "// don't convert on the block level, only on the column level\n" + "// so every other 8 pixels, use the upper bits instead of lower\n" + "void Convert32to16PS() {\n" + " bool upper = false;\n" + " float2 ffrac = mod(PSin.tex.xy + g_fTexDims.zw, g_fTexOffset.xy);\n" + " float2 tex0 = g_fc0.ww * (PSin.tex.xy + ffrac);\n" + " if( ffrac.x > g_fTexOffset.z ) {\n" + " tex0.x -= g_fTexOffset.z;\n" + " upper = true;\n" + " }\n" + " if( ffrac.y >= g_fTexOffset.w ) {\n" + " tex0.y -= g_fTexOffset.w;\n" + " tex0.x += g_fc0.w;\n" + " }\n" + "\n" + " half4 color = texture(g_sSrcFinal, tex0*g_fTexDims.xy)*g_fc0.yyyw;\n" + " float2 uv = upper ? color.xw : color.zy;\n" + " FragData0 = texture(g_sConv16to32, uv*g_fPageOffset.xy+g_fPageOffset.zw)*g_fTexDims.xxxy;\n" + "}\n" + "#endif //FRAGMENT_SHADER\n" + "\n" + "#ifdef VERTEX_SHADER\n" + "\n" + "void SetColor() {\n" + " VSout.color = Color;\n" + "}\n" + "\n" + "void SetTex() {\n" + "#ifdef PERSPECTIVE_CORRECT_TEX\n" + " VSout.tex = TexCoord;\n" + "#else\n" + " VSout.tex.xy = TexCoord.xy/TexCoord.z;\n" + "#endif\n" + "}\n" + "\n" + "void SetZ() {\n" + "#ifdef WRITE_DEPTH\n" + " VSout.Z = SecondaryColor * g_fZBias.x + g_fZBias.y;\n" + " VSout.Z.w = 1.0f;\n" + "#endif\n" + "}\n" + "\n" + "void SetPosition() {\n" + " float4 position;\n" + " position.xy = vec2(Vert.xy) * g_fPosXY.xy + g_fPosXY.zw;\n" + " // FIXME: the factor in normal mode seem bogus. They don't have same order than in log mode. Or I failed to understand the logic\n" + " //// normal mode.\n" + " // -> dot(g_fZ, SecondaryColor.zyxw)) => reconstruct a float from normalized char. The float range from 0 to 1\n" + " // position.z = dot(g_fZ, SecondaryColor.zyxw);\n" + " //// logz mode\n" + " // -> dot(g_fZ, SecondaryColor.zyxw)) => reconstruct a float from normalized char. The float range from 0 to 2**32\n" + " // position.z = log(g_fc0.y + dot(g_fZ, SecondaryColor.zyxw)) * g_fZNorm.x\n" + " // position.z = log(1 + Z_INT) * 0.999f / (32 * log(2.0)) = log2(1 + Z_INT) * 0.999f / 32\n" + " // log2(...) will range from 0 to 32\n" + "\n" + " // position.z = (log(g_fc0.y + dot(g_fZ, SecondaryColor.zyxw)) * g_fZNorm.x + g_fZNorm.y) * g_fZMin.y + dot(g_fZ, SecondaryColor.zyxw) * g_fZMin.x ;\n" + "#ifdef NO_LOGZ\n" + " position.z = dot(g_fZ, SecondaryColor.zyxw);\n" + "#else\n" + " position.z = log2(1.0f + dot(g_fZ, SecondaryColor.zyxw)) * 0.999f/32.0f;\n" + "#endif\n" + " position.w = 1.0f;\n" + "\n" + " gl_Position = position;\n" + "}\n" + "\n" + "void SetFog() {\n" + " VSout.fog = float(Vert.z) * g_fBilinear.w;\n" + "}\n" + "\n" + "// just smooth shadering\n" + "void RegularVS() {\n" + " SetPosition();\n" + " SetColor();\n" + " SetZ();\n" + "}\n" + "\n" + "// diffuse texture mapping\n" + "void TextureVS() {\n" + " SetPosition();\n" + " SetColor();\n" + " SetTex();\n" + " SetZ();\n" + "}\n" + "\n" + "void RegularFogVS() {\n" + " SetPosition();\n" + " SetColor();\n" + " SetZ();\n" + " SetFog();\n" + "}\n" + "\n" + "void TextureFogVS() {\n" + " SetPosition();\n" + " SetColor();\n" + " SetTex();\n" + " SetZ();\n" + " SetFog();\n" + "}\n" + "\n" + "void BitBltVS() {\n" + " vec4 position;\n" + " position.xy = vec2(Vert.xy) * g_fBitBltPos.xy + g_fBitBltPos.zw;\n" + " position.zw = g_fc0.xy;\n" + " gl_Position = position;\n" + "\n" + " VSout.tex.xy = TexCoord.xy * g_fBitBltTex.xy + g_fBitBltTex.zw;\n" + " VSout.Z.xy = position.xy * g_fBitBltTrans.xy + g_fBitBltTrans.zw;\n" + "}\n" + "\n" + "#endif\n" + ;