GS/OpenGL: Cleanups and GLES support

This commit is contained in:
Connor McLaughlin 2022-07-17 03:29:59 +10:00 committed by refractionpcsx2
parent 3f1fb1fc3c
commit eba2273cd1
29 changed files with 487 additions and 789 deletions

View File

@ -393,7 +393,7 @@ A_STATIC void CasSetup(
const1[0]=AU1_AF1(sharp);
const1[1]=AU1_AH2_AF2(hSharp);
const1[2]=AU1_AF1(AF1_(8.0)*inputSizeInPixelsX*ARcpF1(outputSizeInPixelsX));
const1[3]=0;}
const1[3]=AU1(0);}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//_____________________________________________________________/\_______________________________________________________________

View File

@ -1,5 +1,14 @@
#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) || defined(FXAA_GLSL_VK) || defined(__METAL_VERSION__)
#ifndef SHADER_MODEL
#define SHADER_MODEL 0
#endif
#ifndef FXAA_HLSL_4
#define FXAA_HLSL_4 0
#endif
#ifndef FXAA_HLSL_5
#define FXAA_HLSL_5 0
#endif
#ifndef FXAA_GLSL_130
#define FXAA_GLSL_130 0
#endif
@ -508,7 +517,7 @@ float4 FxaaPass(float4 FxaaColor, float2 uv0, texture2d<float> tex)
FxaaColor = FxaaPixelShader(uv0, tex, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);
#elif (FXAA_GLSL_130 == 1 || FXAA_GLSL_VK == 1)
vec2 PixelSize = textureSize(TextureSampler, 0);
vec2 PixelSize = vec2(textureSize(TextureSampler, 0));
FxaaColor = FxaaPixelShader(uv0, TextureSampler, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);
#elif defined(__METAL_VERSION__)
float2 PixelSize = float2(tex.get_width(), tex.get_height());

View File

@ -6,6 +6,7 @@
#ifdef VERTEX_SHADER
#if !pGL_ES
out gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
@ -13,6 +14,7 @@ out gl_PerVertex {
float gl_ClipDistance[1];
#endif
};
#endif
#endif
@ -20,6 +22,7 @@ out gl_PerVertex {
#ifdef GEOMETRY_SHADER
#if !pGL_ES
in gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
@ -35,6 +38,7 @@ out gl_PerVertex {
float gl_ClipDistance[1];
#endif
};
#endif
#endif

View File

@ -75,7 +75,11 @@ void ps_convert_rgba8_16bits()
void ps_convert_float32_32bits()
{
// Convert a GL_FLOAT32 depth texture into a 32 bits UINT texture
#if HAS_CLIP_CONTROL
SV_Target1 = uint(exp2(32.0f) * sample_c().r);
#else
SV_Target1 = uint(exp2(24.0f) * sample_c().r);
#endif
}
#endif
@ -83,7 +87,11 @@ void ps_convert_float32_32bits()
void ps_convert_float32_rgba8()
{
// Convert a GL_FLOAT32 depth texture into a RGBA color texture
#if HAS_CLIP_CONTROL
uint d = uint(sample_c().r * exp2(32.0f));
#else
uint d = uint(sample_c().r * exp2(24.0f));
#endif
SV_Target0 = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24))) / vec4(255.0);
}
#endif
@ -92,7 +100,11 @@ void ps_convert_float32_rgba8()
void ps_convert_float16_rgb5a1()
{
// Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
#if HAS_CLIP_CONTROL
uint d = uint(sample_c().r * exp2(32.0f));
#else
uint d = uint(sample_c().r * exp2(24.0f));
#endif
SV_Target0 = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) / vec4(32.0f, 32.0f, 32.0f, 1.0f);
}
#endif
@ -100,25 +112,41 @@ void ps_convert_float16_rgb5a1()
float rgba8_to_depth32(vec4 unorm)
{
uvec4 c = uvec4(unorm * vec4(255.5f));
#if HAS_CLIP_CONTROL
return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
#else
return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-24.0f);
#endif
}
float rgba8_to_depth24(vec4 unorm)
{
uvec3 c = uvec3(unorm.rgb * vec3(255.5f));
#if HAS_CLIP_CONTROL
return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
#else
return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-24.0f);
#endif
}
float rgba8_to_depth16(vec4 unorm)
{
uvec2 c = uvec2(unorm.rg * vec2(255.5f));
#if HAS_CLIP_CONTROL
return float(c.r | (c.g << 8)) * exp2(-32.0f);
#else
return float(c.r | (c.g << 8)) * exp2(-24.0f);
#endif
}
float rgb5a1_to_depth16(vec4 unorm)
{
uvec4 c = uvec4(unorm * vec4(255.5f));
#if HAS_CLIP_CONTROL
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
#else
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-24.0f);
#endif
}
#ifdef ps_convert_rgba8_float32

View File

@ -14,9 +14,9 @@ layout(location = 0) out vec4 SV_Target0;
// Weave shader
void ps_main0()
{
const int idx = int(ZrH.x); // buffer index passed from CPU
const int field = idx & 1; // current field
const int vpos = int(gl_FragCoord.y); // vertical position of destination texture
int idx = int(ZrH.x); // buffer index passed from CPU
int field = idx & 1; // current field
int vpos = int(gl_FragCoord.y); // vertical position of destination texture
if ((vpos & 1) == field)
SV_Target0 = texture(TextureSampler, PSin_t);
@ -54,16 +54,16 @@ void ps_main3()
// causing the wrong lines to be discarded, so a vertical offset (lofs) is added to the vertical
// position of the destination texture to force the proper field alignment
const int idx = int(ZrH.x); // buffer index passed from CPU
const int bank = idx >> 1; // current bank
const int field = idx & 1; // current field
const int vres = int(ZrH.z) >> 1; // vertical resolution of source texture
const int lofs = ((((vres + 1) >> 1) << 1) - vres) & bank; // line alignment offset for bank 1
const int vpos = int(gl_FragCoord.y) + lofs; // vertical position of destination texture
const vec2 bofs = vec2(0.0f, 0.5f * bank); // vertical offset of the current bank relative to source texture size
const vec2 vscale = vec2(1.0f, 2.0f); // scaling factor from source to destination texture
const vec2 optr = PSin_t - bofs; // used to check if the current destination line is within the current bank
const vec2 iptr = optr * vscale; // pointer to the current pixel in the source texture
int idx = int(ZrH.x); // buffer index passed from CPU
int bank = idx >> 1; // current bank
int field = idx & 1; // current field
int vres = int(ZrH.z) >> 1; // vertical resolution of source texture
int lofs = ((((vres + 1) >> 1) << 1) - vres) & bank; // line alignment offset for bank 1
int vpos = int(gl_FragCoord.y) + lofs; // vertical position of destination texture
vec2 bofs = vec2(0.0f, 0.5f * float(bank)); // vertical offset of the current bank relative to source texture size
vec2 vscale = vec2(1.0f, 2.0f); // scaling factor from source to destination texture
vec2 optr = PSin_t - bofs; // used to check if the current destination line is within the current bank
vec2 iptr = optr * vscale; // pointer to the current pixel in the source texture
// if the index of current destination line belongs to the current fiels we update it, otherwise
// we leave the old line in the destination buffer
@ -79,15 +79,15 @@ void ps_main4()
{
// we use the contents of the MAD frame buffer to reconstruct the missing lines from the current field.
const int idx = int(ZrH.x); // buffer index passed from CPU
const int field = idx & 1; // current field
const int vpos = int(gl_FragCoord.y); // vertical position of destination texture
const float sensitivity = ZrH.w; // passed from CPU, higher values mean more likely to use weave
const vec3 motion_thr = vec3(1.0, 1.0, 1.0) * sensitivity; //
const vec2 bofs = vec2(0.0f, 0.5f); // position of the bank 1 relative to source texture size
const vec2 vscale = vec2(1.0f, 0.5f); // scaling factor from source to destination texture
const vec2 lofs = vec2(0.0f, ZrH.y) * vscale; // distance between two adjacent lines relative to source texture size
const vec2 iptr = PSin_t * vscale; // pointer to the current pixel in the source texture
int idx = int(ZrH.x); // buffer index passed from CPU
int field = idx & 1; // current field
int vpos = int(gl_FragCoord.y); // vertical position of destination texture
float sensitivity = ZrH.w; // passed from CPU, higher values mean more likely to use weave
vec3 motion_thr = vec3(1.0, 1.0, 1.0) * sensitivity; //
vec2 bofs = vec2(0.0f, 0.5f); // position of the bank 1 relative to source texture size
vec2 vscale = vec2(1.0f, 0.5f); // scaling factor from source to destination texture
vec2 lofs = vec2(0.0f, ZrH.y) * vscale; // distance between two adjacent lines relative to source texture size
vec2 iptr = PSin_t * vscale; // pointer to the current pixel in the source texture
vec2 p_t0; // pointer to current pixel (missing or not) from most recent frame
vec2 p_t1; // pointer to current pixel (missing or not) from one frame back

View File

@ -73,10 +73,11 @@ void ps_copy()
#ifdef ps_filter_scanlines
vec4 ps_scanlines(uint i)
{
vec4 mask[2] =
{
vec4(1, 1, 1, 0),
vec4(0, 0, 0, 0)};
vec4 mask[2] = vec4[2]
(
vec4(1, 1, 1, 0),
vec4(0, 0, 0, 0)
);
return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);
}
@ -360,7 +361,7 @@ vec4 LottesCRTPass()
//flipped y axis in opengl
vec2 fragcoord = vec2(gl_FragCoord.x, u_target_resolution.y - gl_FragCoord.y) - u_target_rect.xy;
vec4 color;
vec2 inSize = u_target_resolution - (2 * u_target_rect.xy);
vec2 inSize = u_target_resolution - (2.0 * u_target_rect.xy);
vec2 pos = Warp(fragcoord.xy / inSize);

View File

@ -28,7 +28,7 @@
#ifdef FRAGMENT_SHADER
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
#if !defined(BROKEN_DRIVER) && (pGL_ES || defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts)
layout(location = 0)
#endif
in SHADER
@ -55,6 +55,8 @@ in SHADER
#undef TARGET_0_QUALIFIER
#define TARGET_0_QUALIFIER inout
#define LAST_FRAG_COLOR SV_Target0
#elif defined(GL_ARM_shader_framebuffer_fetch)
#define LAST_FRAG_COLOR gl_LastFragColorARM
#endif
#endif
@ -125,7 +127,7 @@ vec4 sample_c(vec2 uv)
return textureLod(TextureSampler, uv, lod);
#else
return textureLod(TextureSampler, uv, 0); // No lod
return textureLod(TextureSampler, uv, 0.0f); // No lod
#endif
#endif
@ -249,10 +251,16 @@ mat4 sample_4p(vec4 u)
int fetch_raw_depth()
{
#if PS_TEX_IS_FB == 1
return int(fetch_rt().r * exp2(32.0f));
#if HAS_CLIP_CONTROL
float multiplier = exp2(32.0f);
#else
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
float multiplier = exp2(24.0f);
#endif
#if PS_TEX_IS_FB == 1
return int(fetch_rt().r * multiplier);
#else
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * multiplier);
#endif
}
@ -344,13 +352,21 @@ vec4 sample_depth(vec2 st)
#elif PS_DEPTH_FMT == 1
// Based on ps_convert_float32_rgba8 of convert
// Convert a GL_FLOAT32 depth texture into a RGBA color texture
uint d = uint(fetch_c(uv).r * exp2(32.0f));
#if HAS_CLIP_CONTROL
uint d = uint(fetch_c(uv).r * exp2(32.0f));
#else
uint d = uint(fetch_c(uv).r * exp2(24.0f));
#endif
t = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));
#elif PS_DEPTH_FMT == 2
// Based on ps_convert_float16_rgb5a1 of convert
// Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
uint d = uint(fetch_c(uv).r * exp2(32.0f));
#if HAS_CLIP_CONTROL
uint d = uint(fetch_c(uv).r * exp2(32.0f));
#else
uint d = uint(fetch_c(uv).r * exp2(24.0f));
#endif
t = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
#elif PS_DEPTH_FMT == 3
@ -834,16 +850,16 @@ void ps_main()
vec4 C = ps_color();
#if (APITRACE_DEBUG & 1) == 1
C.r = 255f;
C.r = 255.0f;
#endif
#if (APITRACE_DEBUG & 2) == 2
C.g = 255f;
C.g = 255.0f;
#endif
#if (APITRACE_DEBUG & 4) == 4
C.b = 255f;
C.b = 255.0f;
#endif
#if (APITRACE_DEBUG & 8) == 8
C.a = 128f;
C.a = 128.0f;
#endif
#if PS_SHUFFLE

View File

@ -9,7 +9,7 @@ layout(location = 5) in uint i_z;
layout(location = 6) in uvec2 i_uv;
layout(location = 7) in vec4 i_f;
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
#if !defined(BROKEN_DRIVER) && (pGL_ES || defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts)
layout(location = 0)
#endif
out SHADER
@ -59,7 +59,15 @@ void vs_main()
p.xy = vec2(i_p) - vec2(0.05f, 0.05f);
p.xy = p.xy * VertexScale - VertexOffset;
p.w = 1.0f;
#if HAS_CLIP_CONTROL
p.z = float(z) * exp_min32;
#else
// GLES doesn't support ARB_clip_control, so remap it to -1..1. We also reduce the range from 32 bits
// to 24 bits, which means some games with very large depth ranges will not render correctly. But,
// for most, it's okay, and really, the best we can do.
p.z = min(float(z) * exp2(-23.0f), 2.0f) - 1.0f;
#endif
gl_Position = p;
@ -77,7 +85,7 @@ void vs_main()
#ifdef GEOMETRY_SHADER
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
#if !defined(BROKEN_DRIVER) && (pGL_ES || defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts)
layout(location = 0)
#endif
in SHADER
@ -91,7 +99,7 @@ in SHADER
#endif
} GSin[];
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
#if !defined(BROKEN_DRIVER) && (pGL_ES || defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts)
layout(location = 0)
#endif
out SHADER
@ -173,7 +181,7 @@ void gs_main()
// Potentially there is faster math
vec2 line_vector = normalize(rt_p.xy - lt_p.xy);
vec2 line_normal = vec2(line_vector.y, -line_vector.x);
vec2 line_width = (line_normal * PointSize) / 2;
vec2 line_width = (line_normal * PointSize) / 2.0f;
lt_p.xy -= line_width;
rt_p.xy -= line_width;

View File

@ -65,6 +65,11 @@ namespace GL
glBufferSubData(m_target, 0, used_size, m_cpu_buffer.data());
}
u32 GetChunkSize() const override
{
return m_size;
}
static std::unique_ptr<StreamBuffer> Create(GLenum target, u32 size)
{
glGetError();
@ -115,6 +120,11 @@ namespace GL
glBufferData(m_target, used_size, m_cpu_buffer.data(), GL_STREAM_DRAW);
}
u32 GetChunkSize() const override
{
return m_size;
}
static std::unique_ptr<StreamBuffer> Create(GLenum target, u32 size)
{
glGetError();
@ -226,6 +236,11 @@ namespace GL
}
}
u32 GetChunkSize() const override
{
return m_size / NUM_SYNC_POINTS;
}
u32 m_position = 0;
u32 m_used_block_index = 0;
u32 m_available_block_index = NUM_SYNC_POINTS;

View File

@ -46,6 +46,9 @@ namespace GL
virtual MappingResult Map(u32 alignment, u32 min_size) = 0;
virtual void Unmap(u32 used_size) = 0;
/// Returns the minimum granularity of blocks which sync objects will be created around.
virtual u32 GetChunkSize() const = 0;
static std::unique_ptr<StreamBuffer> Create(GLenum target, u32 size);
protected:

View File

@ -788,7 +788,6 @@ if(USE_OPENGL)
GS/Renderers/OpenGL/GLState.h
GS/Renderers/OpenGL/GSDeviceOGL.h
GS/Renderers/OpenGL/GSTextureOGL.h
GS/Renderers/OpenGL/GSUniformBufferOGL.h
)
target_link_libraries(PCSX2_FLAGS INTERFACE glad)
endif()

View File

@ -20,9 +20,7 @@
#if !defined(NDEBUG) || defined(_DEBUG) || defined(_DEVEL)
#define ENABLE_OGL_DEBUG // Create a debug context and check opengl command status. Allow also to dump various textures/states.
//#define ENABLE_OGL_DEBUG_FENCE
//#define ENABLE_OGL_DEBUG_MEM_BW // compute the quantity of data transfered (debug purpose)
//#define ENABLE_TRACE_REG // print GS reg write
//#define ENABLE_TRACE_REG // print GS reg write
//#define ENABLE_EXTRA_LOG // print extra log
#endif

View File

@ -699,6 +699,7 @@ public:
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
bool framebuffer_fetch : 1; ///< Can sample from the framebuffer without texture barriers.
bool dual_source_blend : 1; ///< Can use alpha output as a blend factor.
bool clip_control : 1; ///< Can use 0..1 depth range instead of -1..1.
bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE.
bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening.
FeatureSupport()

View File

@ -58,6 +58,7 @@ GSDevice11::GSDevice11()
m_features.framebuffer_fetch = false;
m_features.dual_source_blend = true;
m_features.stencil_buffer = true;
m_features.clip_control = true;
}
GSDevice11::~GSDevice11()

View File

@ -2179,7 +2179,7 @@ void GSRendererHW::EmulateZbuffer()
}
else if (!m_context->ZBUF.ZMSK)
{
m_conf.cb_ps.TA_MaxDepth_Af.z = static_cast<float>(max_z) * 0x1p-32f;
m_conf.cb_ps.TA_MaxDepth_Af.z = static_cast<float>(max_z) * (g_gs_device->Features().clip_control ? 0x1p-32f : 0x1p-24f);
m_conf.ps.zclamp = 1;
}
}

View File

@ -16,28 +16,8 @@
#include "PrecompiledHeader.h"
#include "GLLoader.h"
#include "GS/GS.h"
#include <unordered_set>
#include "Host.h"
namespace GLExtension
{
static std::unordered_set<std::string> s_extensions;
bool Has(const std::string& ext)
{
return !!s_extensions.count(ext);
}
void Set(const std::string& ext, bool v)
{
if (v)
s_extensions.insert(ext);
else
s_extensions.erase(ext);
}
} // namespace GLExtension
namespace ReplaceGL
{
void APIENTRY ScissorIndexed(GLuint index, GLint left, GLint bottom, GLsizei width, GLsizei height)
@ -56,7 +36,6 @@ namespace ReplaceGL
} // namespace ReplaceGL
#ifdef _WIN32
namespace Emulate_DSA
{
// Texture entry point
@ -108,12 +87,6 @@ namespace Emulate_DSA
}
// Misc entry point
// (only purpose is to have a consistent API otherwise it is useless)
void APIENTRY CreateProgramPipelines(GLsizei n, GLuint* pipelines)
{
glGenProgramPipelines(n, pipelines);
}
void APIENTRY CreateSamplers(GLsizei n, GLuint* samplers)
{
glGenSamplers(n, samplers);
@ -130,12 +103,10 @@ namespace Emulate_DSA
glCompressedTextureSubImage2D = CompressedTextureSubImage;
glGetTextureImage = GetTexureImage;
glTextureParameteri = TextureParameteri;
glCreateProgramPipelines = CreateProgramPipelines;
glGenerateTextureMipmap = GenerateTextureMipmap;
glCreateSamplers = CreateSamplers;
}
} // namespace Emulate_DSA
#endif
namespace GLLoader
{
@ -143,53 +114,18 @@ namespace GLLoader
bool vendor_id_nvidia = false;
bool vendor_id_intel = false;
bool mesa_driver = false;
bool in_replayer = false;
bool buggy_pbo = false;
bool is_gles = false;
bool has_dual_source_blend = false;
bool has_clip_control = true;
bool found_framebuffer_fetch = false;
bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default
// DX11 GPU
bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge
bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge
bool found_GL_ARB_texture_barrier = false;
static bool mandatory(const std::string& ext)
{
if (!GLExtension::Has(ext))
{
Host::ReportFormattedErrorAsync("GS", "ERROR: %s is NOT SUPPORTED\n", ext.c_str());
return false;
}
return true;
}
static bool optional(const std::string& name)
{
bool found = GLExtension::Has(name);
if (!found)
{
DevCon.Warning("INFO: %s is NOT SUPPORTED", name.c_str());
}
else
{
DevCon.WriteLn("INFO: %s is available", name.c_str());
}
std::string opt("override_");
opt += name;
if (theApp.GetConfigI(opt.c_str()) != -1)
{
found = theApp.GetConfigB(opt.c_str());
fprintf(stderr, "Override %s detection (%s)\n", name.c_str(), found ? "Enabled" : "Disabled");
GLExtension::Set(name, found);
}
return found;
}
bool check_gl_version(int major, int minor)
static bool check_gl_version()
{
const char* vendor = (const char*)glGetString(GL_VENDOR);
if (strstr(vendor, "Advanced Micro Devices") || strstr(vendor, "ATI Technologies Inc.") || strstr(vendor, "ATI"))
@ -209,103 +145,97 @@ namespace GLLoader
{
found_geometry_shader = GSConfig.OverrideGeometryShaders != 0 &&
(GLAD_GL_VERSION_3_2 || GL_ARB_geometry_shader4 || GSConfig.OverrideGeometryShaders == 1);
GLExtension::Set("GL_ARB_geometry_shader4", found_geometry_shader);
fprintf(stderr, "Overriding geometry shaders detection\n");
Console.Warning("Overriding geometry shaders detection to %s", found_geometry_shader ? "true" : "false");
}
GLint major_gl = 0;
GLint minor_gl = 0;
glGetIntegerv(GL_MAJOR_VERSION, &major_gl);
glGetIntegerv(GL_MINOR_VERSION, &minor_gl);
if ((major_gl < major) || (major_gl == major && minor_gl < minor))
if (!GLAD_GL_VERSION_3_3 && !GLAD_GL_ES_VERSION_3_1)
{
Host::ReportFormattedErrorAsync("GS", "OpenGL %d.%d is not supported. Only OpenGL %d.%d\n was found", major, minor, major_gl, minor_gl);
Host::ReportFormattedErrorAsync("GS", "OpenGL is not supported. Only OpenGL %d.%d\n was found", major_gl, minor_gl);
return false;
}
return true;
}
bool check_gl_supported_extension()
static bool check_gl_supported_extension()
{
int max_ext = 0;
glGetIntegerv(GL_NUM_EXTENSIONS, &max_ext);
for (GLint i = 0; i < max_ext; i++)
if (GLAD_GL_VERSION_3_3 && !GLAD_GL_ARB_shading_language_420pack)
{
std::string ext{(const char*)glGetStringi(GL_EXTENSIONS, i)};
GLExtension::Set(ext);
//fprintf(stderr, "DEBUG ext: %s\n", ext.c_str());
}
// Mandatory for both renderer
bool ok = true;
{
// GL4.1
ok = ok && mandatory("GL_ARB_separate_shader_objects");
// GL4.2
ok = ok && mandatory("GL_ARB_shading_language_420pack");
ok = ok && mandatory("GL_ARB_texture_storage");
// GL4.3
ok = ok && mandatory("GL_KHR_debug");
// GL4.4
ok = ok && mandatory("GL_ARB_buffer_storage");
}
// Only for HW renderer
if (GSConfig.UseHardwareRenderer())
{
ok = ok && mandatory("GL_ARB_copy_image");
ok = ok && mandatory("GL_ARB_clip_control");
}
if (!ok)
Host::ReportFormattedErrorAsync("GS",
"GL_ARB_shading_language_420pack is not supported, this is required for the OpenGL renderer.");
return false;
// Extra
{
// GL4.0
found_GL_ARB_gpu_shader5 = optional("GL_ARB_gpu_shader5");
// GL4.5
optional("GL_ARB_direct_state_access");
// Mandatory for the advance HW renderer effect. Unfortunately Mesa LLVMPIPE/SWR renderers doesn't support this extension.
// Rendering might be corrupted but it could be good enough for test/virtual machine.
found_GL_ARB_texture_barrier = optional("GL_ARB_texture_barrier");
has_dual_source_blend = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_blend_func_extended;
found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch)
{
Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
found_framebuffer_fetch = false;
}
}
if (!GLExtension::Has("GL_ARB_viewport_array"))
// GLES doesn't have ARB_clip_control.
has_clip_control = GLAD_GL_ARB_clip_control;
if (!has_clip_control && !is_gles)
{
Host::AddOSDMessage("GL_ARB_clip_control is not supported, this will cause rendering issues.",
Host::OSD_ERROR_DURATION);
}
found_GL_ARB_gpu_shader5 = GLAD_GL_ARB_gpu_shader5;
found_GL_ARB_texture_barrier = GLAD_GL_ARB_texture_barrier;
has_dual_source_blend = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_blend_func_extended;
found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch)
{
Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
found_framebuffer_fetch = false;
}
if (!GLAD_GL_ARB_viewport_array)
{
glScissorIndexed = ReplaceGL::ScissorIndexed;
glViewportIndexedf = ReplaceGL::ViewportIndexedf;
Console.Warning("GL_ARB_viewport_array is not supported! Function pointer will be replaced");
}
if (!GLExtension::Has("GL_ARB_texture_barrier"))
if (!GLAD_GL_ARB_texture_barrier)
{
glTextureBarrier = ReplaceGL::TextureBarrier;
Console.Warning("GL_ARB_texture_barrier is not supported! Blending emulation will not be supported");
Host::AddOSDMessage("GL_ARB_texture_barrier is not supported, blending will not be accurate.",
Host::OSD_ERROR_DURATION);
}
#ifdef _WIN32
// Thank you Intel for not providing support of basic features on your IGPUs.
if (!GLExtension::Has("GL_ARB_direct_state_access"))
if (!GLAD_GL_ARB_direct_state_access)
{
Console.Warning("GL_ARB_direct_state_access is not supported, this will reduce performance.");
Emulate_DSA::Init();
}
#endif
if (is_gles)
{
has_dual_source_blend = GLAD_GL_EXT_blend_func_extended || GLAD_GL_ARB_blend_func_extended;
if (!has_dual_source_blend && !found_framebuffer_fetch)
{
Host::AddOSDMessage("Both dual source blending and framebuffer fetch are missing, things will be broken.",
Host::OSD_ERROR_DURATION);
}
}
else
{
// Core in GL3.2, so everything supports it.
has_dual_source_blend = true;
}
// Don't use PBOs when we don't have ARB_buffer_storage, orphaning buffers probably ends up worse than just
// using the normal texture update routines and letting the driver take care of it.
GLLoader::buggy_pbo = !GLAD_GL_VERSION_4_4 && !GLAD_GL_ARB_buffer_storage && !GLAD_GL_EXT_buffer_storage;
if (GLLoader::buggy_pbo)
Console.Warning("Not using PBOs for texture uploads because buffer_storage is unavailable.");
return true;
}
bool check_gl_requirements()
{
if (!check_gl_version(3, 3))
if (!check_gl_version())
return false;
if (!check_gl_supported_extension())

View File

@ -17,17 +17,11 @@
#define GL_TEX_LEVEL_0 (0)
#define GL_TEX_LEVEL_1 (1)
#define GL_FB_DEFAULT (0)
#define GL_BUFFER_0 (0)
#define GL_FB_DEFAULT (0)
#define GL_BUFFER_0 (0)
#include "glad.h"
namespace GLExtension
{
extern bool Has(const std::string& ext);
extern void Set(const std::string& ext, bool v = true);
} // namespace GLExtension
namespace GLLoader
{
bool check_gl_requirements();
@ -36,9 +30,12 @@ namespace GLLoader
extern bool vendor_id_nvidia;
extern bool vendor_id_intel;
extern bool mesa_driver;
extern bool buggy_pbo;
extern bool in_replayer;
// GL
extern bool is_gles;
extern bool has_clip_control;
extern bool has_dual_source_blend;
extern bool found_framebuffer_fetch;
extern bool found_geometry_shader;

View File

@ -23,6 +23,7 @@ namespace GLState
GSVector4i scissor;
bool point_size = false;
float line_width = 1.0f;
bool blend;
u16 eq_RGB;

View File

@ -25,6 +25,7 @@ namespace GLState
extern GSVector4i scissor;
extern bool point_size;
extern float line_width;
extern bool blend;
extern u16 eq_RGB;

View File

@ -30,13 +30,6 @@
//#define ONLY_LINES
// TODO port those value into PerfMon API
#ifdef ENABLE_OGL_DEBUG_MEM_BW
u64 g_real_texture_upload_byte = 0;
u64 g_vertex_upload_byte = 0;
u64 g_uniform_upload_byte = 0;
#endif
static constexpr u32 g_vs_cb_index = 1;
static constexpr u32 g_ps_cb_index = 0;
@ -44,33 +37,15 @@ static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
static constexpr u32 VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024;
static constexpr u32 FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024;
static constexpr u32 TEXTURE_UPLOAD_BUFFER_SIZE = 128 * 1024 * 1024;
int GSDeviceOGL::m_shader_inst = 0;
int GSDeviceOGL::m_shader_reg = 0;
FILE* GSDeviceOGL::m_debug_gl_file = NULL;
static std::unique_ptr<GL::StreamBuffer> s_texture_upload_buffer;
GSDeviceOGL::GSDeviceOGL()
: m_fbo(0)
, m_fbo_read(0)
, m_palette_ss(0)
{
// Reset the debug file
#ifdef ENABLE_OGL_DEBUG
m_debug_gl_file = fopen("GS_opengl_debug.txt", "w");
#endif
}
GSDeviceOGL::GSDeviceOGL() = default;
GSDeviceOGL::~GSDeviceOGL()
{
#ifdef ENABLE_OGL_DEBUG
if (m_debug_gl_file)
{
fclose(m_debug_gl_file);
m_debug_gl_file = NULL;
}
#endif
// Clean vertex buffer state
s_texture_upload_buffer.reset();
if (m_vertex_array_object)
glDeleteVertexArrays(1, &m_vertex_array_object);
m_vertex_stream_buffer.reset();
@ -86,6 +61,7 @@ GSDeviceOGL::~GSDeviceOGL()
// Clean various opengl allocation
glDeleteFramebuffers(1, &m_fbo);
glDeleteFramebuffers(1, &m_fbo_read);
glDeleteFramebuffers(1, &m_fbo_write);
// Delete HW FX
m_vertex_uniform_stream_buffer.reset();
@ -98,91 +74,6 @@ GSDeviceOGL::~GSDeviceOGL()
for (GSDepthStencilOGL* ds : m_om_dss)
delete ds;
PboPool::Destroy();
}
void GSDeviceOGL::GenerateProfilerData()
{
if (m_profiler.last_query < 3)
{
glDeleteQueries(1 << 16, m_profiler.timer_query);
return;
}
// Wait latest quey to get valid result
GLuint available = 0;
while (!available)
{
glGetQueryObjectuiv(m_profiler.timer(), GL_QUERY_RESULT_AVAILABLE, &available);
}
GLuint64 time_start = 0;
GLuint64 time_end = 0;
std::vector<double> times;
constexpr double ms = 0.000001;
const int replay = theApp.GetConfigI("linux_replay");
const int first_query = replay > 1 ? m_profiler.last_query / replay : 0;
glGetQueryObjectui64v(m_profiler.timer_query[first_query], GL_QUERY_RESULT, &time_start);
for (u32 q = first_query + 1; q < m_profiler.last_query; q++)
{
glGetQueryObjectui64v(m_profiler.timer_query[q], GL_QUERY_RESULT, &time_end);
u64 t = time_end - time_start;
times.push_back((double)t * ms);
time_start = time_end;
}
// Latest value is often silly, just drop it
times.pop_back();
glDeleteQueries(1 << 16, m_profiler.timer_query);
const double frames = times.size();
double mean = 0.0;
double sd = 0.0;
auto minmax_time = std::minmax_element(times.begin(), times.end());
for (auto t : times)
mean += t;
mean = mean / frames;
for (auto t : times)
sd += pow(t - mean, 2);
sd = sqrt(sd / frames);
u32 time_repartition[16] = {0};
for (auto t : times)
{
size_t slot = std::min<size_t>(t / 2.0, std::size(time_repartition) - 1);
time_repartition[slot]++;
}
fprintf(stderr, "\nPerformance Profile for %.0f frames:\n", frames);
fprintf(stderr, "Min %4.2f ms\t(%4.2f fps)\n", *minmax_time.first, 1000.0 / *minmax_time.first);
fprintf(stderr, "Mean %4.2f ms\t(%4.2f fps)\n", mean, 1000.0 / mean);
fprintf(stderr, "Max %4.2f ms\t(%4.2f fps)\n", *minmax_time.second, 1000.0 / *minmax_time.second);
fprintf(stderr, "SD %4.2f ms\n", sd);
fprintf(stderr, "\n");
fprintf(stderr, "Frame Repartition\n");
for (u32 i = 0; i < std::size(time_repartition); i++)
{
fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i + 1), time_repartition[i]);
}
FILE* csv = fopen("GS_profile.csv", "w");
if (csv)
{
for (size_t i = 0; i < times.size(); i++)
{
fprintf(csv, "%zu,%lf\n", i, times[i]);
}
fclose(csv);
}
}
GSTexture* GSDeviceOGL::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
@ -196,11 +87,13 @@ bool GSDeviceOGL::Create()
if (!GSDevice::Create())
return false;
if (g_host_display->GetRenderAPI() != RenderAPI::OpenGL)
const RenderAPI render_api = g_host_display->GetRenderAPI();
if (render_api != RenderAPI::OpenGL && render_api != RenderAPI::OpenGLES)
return false;
// Check openGL requirement as soon as possible so we can switch to another
// renderer/device
GLLoader::is_gles = (render_api == RenderAPI::OpenGLES);
if (!GLLoader::check_gl_requirements())
return false;
@ -227,18 +120,29 @@ bool GSDeviceOGL::Create()
m_features.provoking_vertex_last = true;
m_features.dxt_textures = GLAD_GL_EXT_texture_compression_s3tc;
m_features.bptc_textures = GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc;
m_features.prefer_new_textures = false;
m_features.prefer_new_textures = GLLoader::is_gles;
m_features.framebuffer_fetch = GLLoader::found_framebuffer_fetch;
m_features.dual_source_blend = GLLoader::has_dual_source_blend && !GSConfig.DisableDualSourceBlend;
m_features.clip_control = GLLoader::has_clip_control;
m_features.stencil_buffer = true;
// Wide line support in GL is deprecated as of 3.1, so we will just do it in the Geometry Shader.
m_features.line_expand = false;
GLint point_range[2] = {};
glGetIntegerv(GL_ALIASED_POINT_SIZE_RANGE, point_range);
m_features.point_expand = (point_range[0] <= GSConfig.UpscaleMultiplier && point_range[1] >= GSConfig.UpscaleMultiplier);
Console.WriteLn("Using %s for point expansion.", m_features.point_expand ? "hardware" : "geometry shaders");
if (GLLoader::is_gles)
{
GLint line_range[2] = {};
glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_range);
m_features.line_expand = (line_range[0] <= static_cast<GLint>(GSConfig.UpscaleMultiplier) && line_range[1] >= static_cast<GLint>(GSConfig.UpscaleMultiplier));
}
else
{
m_features.line_expand = false;
}
Console.WriteLn("Using %s for point expansion and %s for line expansion.",
m_features.point_expand ? "hardware" : "geometry shaders", m_features.line_expand ? "hardware" : "geometry shaders");
{
auto shader = Host::ReadResourceFileToString("shaders/opengl/common_header.glsl");
@ -257,18 +161,27 @@ bool GSDeviceOGL::Create()
// ****************************************************************
// Debug helper
// ****************************************************************
#ifdef ENABLE_OGL_DEBUG
if (GSConfig.UseDebugDevice)
{
glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB);
if (!GLLoader::is_gles)
{
glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL);
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
// Useless info message on Nvidia driver
GLuint ids[] = {0x20004};
glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false);
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
// Useless info message on Nvidia driver
GLuint ids[] = { 0x20004 };
glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false);
}
else if (GLAD_GL_KHR_debug)
{
glDebugMessageCallbackKHR((GLDEBUGPROC)DebugOutputToFile, NULL);
glDebugMessageControlKHR(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
}
// Uncomment synchronous if you want callstacks which match where the error occurred.
glEnable(GL_DEBUG_OUTPUT);
//glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB);
}
#endif
// WARNING it must be done after the control setup (at least on MESA)
GL_PUSH("GSDeviceOGL::Create");
@ -287,16 +200,11 @@ bool GSDeviceOGL::Create()
OMSetFBO(0);
glGenFramebuffers(1, &m_fbo_read);
glGenFramebuffers(1, &m_fbo_write);
// Always read from the first buffer
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
glReadBuffer(GL_COLOR_ATTACHMENT0);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
// Some timers to help profiling
if (GLLoader::in_replayer)
{
glCreateQueries(GL_TIMESTAMP, 1 << 16, m_profiler.timer_query);
}
}
// ****************************************************************
@ -379,7 +287,7 @@ bool GSDeviceOGL::Create()
{
const char* name = shaderName(static_cast<ShaderConvert>(i));
const std::string macro_sel = (static_cast<ShaderConvert>(i) == ShaderConvert::RGBA_TO_8I) ?
fmt::format("#define PS_SCALE_FACTOR {}\n", GSConfig.UpscaleMultiplier) :
fmt::format("#define PS_SCALE_FACTOR {:.8f}f\n", GSConfig.UpscaleMultiplier) :
std::string();
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, m_shader_common_header, *convert_glsl, macro_sel));
if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, {}, ps))
@ -518,15 +426,13 @@ bool GSDeviceOGL::Create()
{
GL_PUSH("GSDeviceOGL::Rasterization");
#ifdef ONLY_LINES
glLineWidth(5.0);
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
#else
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
#endif
if (!GLLoader::is_gles)
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glDisable(GL_CULL_FACE);
glEnable(GL_SCISSOR_TEST);
glDisable(GL_MULTISAMPLE);
if (!GLLoader::is_gles)
glDisable(GL_MULTISAMPLE);
glDisable(GL_DITHER); // Honestly I don't know!
}
@ -560,7 +466,7 @@ bool GSDeviceOGL::Create()
// This extension allow FS depth to range from -1 to 1. So
// gl_position.z could range from [0, 1]
// Change depth convention
if (GLExtension::Has("GL_ARB_clip_control"))
if (GLLoader::has_clip_control)
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
// ****************************************************************
@ -572,15 +478,19 @@ bool GSDeviceOGL::Create()
// ****************************************************************
// Pbo Pool allocation
// ****************************************************************
if (!GLLoader::buggy_pbo)
{
GL_PUSH("GSDeviceOGL::PBO");
// Mesa seems to use it to compute the row length. In our case, we are
// tightly packed so don't bother with this parameter and set it to the
// minimum alignment (1 byte)
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
PboPool::Init();
s_texture_upload_buffer = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, TEXTURE_UPLOAD_BUFFER_SIZE);
if (!s_texture_upload_buffer)
{
Console.Error("Failed to create texture upload buffer. Using slow path.");
GLLoader::buggy_pbo = true;
}
}
// ****************************************************************
@ -592,7 +502,7 @@ bool GSDeviceOGL::Create()
// Full vram, remove a small margin for others buffer
glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, vram);
}
else if (GLExtension::Has("GL_NVX_gpu_memory_info"))
else if (GLAD_GL_NVX_gpu_memory_info)
{
// GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX <= give full memory
// Available vram
@ -649,12 +559,6 @@ bool GSDeviceOGL::CreateTextureFX()
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
}
if (GLLoader::in_replayer)
{
glQueryCounter(m_profiler.timer(), GL_TIMESTAMP);
m_profiler.last_query++;
}
return true;
}
@ -662,6 +566,12 @@ void GSDeviceOGL::ResetAPIState()
{
if (GLState::point_size)
glDisable(GL_PROGRAM_POINT_SIZE);
if (GLState::line_width != 1.0f)
glLineWidth(1.0f);
// clear out DSB
glBlendFuncSeparate(GL_ONE, GL_ZERO, GL_ONE, GL_ZERO);
glDisable(GL_BLEND);
}
void GSDeviceOGL::RestoreAPIState()
@ -714,6 +624,8 @@ void GSDeviceOGL::RestoreAPIState()
if (GLState::point_size)
glEnable(GL_PROGRAM_POINT_SIZE);
if (GLState::line_width != 1.0f)
glLineWidth(GLState::line_width);
// Force UBOs to be reuploaded, we don't know what else was bound there.
std::memset(&m_vs_cb_cache, 0xFF, sizeof(m_vs_cb_cache));
@ -770,7 +682,17 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
OMSetFBO(m_fbo);
OMAttachRt(T);
glClearBufferfv(GL_COLOR, 0, c.v);
if (T->IsIntegerFormat())
{
if (T->IsUnsignedFormat())
glClearBufferuiv(GL_COLOR, 0, c.U32);
else
glClearBufferiv(GL_COLOR, 0, c.I32);
}
else
{
glClearBufferfv(GL_COLOR, 0, c.v);
}
OMSetColorMaskState(OMColorMaskSelector(old_color_mask));
@ -899,10 +821,10 @@ GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel)
const int anisotropy = GSConfig.MaxAnisotropy;
if (anisotropy > 1 && sel.aniso)
{
if (GLExtension::Has("GL_ARB_texture_filter_anisotropic"))
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, (float)anisotropy);
else if (GLExtension::Has("GL_EXT_texture_filter_anisotropic"))
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY_EXT, (float)anisotropy);
if (GLAD_GL_ARB_texture_filter_anisotropic)
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, static_cast<float>(anisotropy));
else if (GLAD_GL_EXT_texture_filter_anisotropic)
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY_EXT, static_cast<float>(anisotropy));
}
return sampler;
@ -963,28 +885,64 @@ std::string GSDeviceOGL::GetShaderSource(const std::string_view& entry, GLenum t
std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum type, const std::string_view& macro)
{
std::string header = "#version 330 core\n";
std::string header;
// Need GL version 420
header += "#extension GL_ARB_shading_language_420pack: require\n";
// Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n";
if (m_features.framebuffer_fetch)
if (GLLoader::is_gles)
{
if (GLAD_GL_EXT_shader_framebuffer_fetch)
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
header += "#extension GL_ARM_shader_framebuffer_fetch : require\n";
}
if (GLAD_GL_ES_VERSION_3_2)
header = "#version 320 es\n";
else if (GLAD_GL_ES_VERSION_3_1)
header = "#version 310 es\n";
if (GLLoader::found_GL_ARB_gpu_shader5)
header += "#extension GL_ARB_gpu_shader5 : enable\n";
if (GLAD_GL_EXT_blend_func_extended)
header += "#extension GL_EXT_blend_func_extended : require\n";
if (GLAD_GL_ARB_blend_func_extended)
header += "#extension GL_ARB_blend_func_extended : require\n";
if (m_features.framebuffer_fetch)
{
if (GLAD_GL_EXT_shader_framebuffer_fetch)
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
header += "#extension GL_ARM_shader_framebuffer_fetch : require\n";
}
header += "precision highp float;\n";
header += "precision highp int;\n";
header += "precision highp sampler2D;\n";
if (GLAD_GL_ES_VERSION_3_1)
header += "precision highp sampler2DMS;\n";
if (GLAD_GL_ES_VERSION_3_2)
header += "precision highp usamplerBuffer;\n";
if (!GLAD_GL_EXT_blend_func_extended && !GLAD_GL_ARB_blend_func_extended)
header += "#define DISABLE_DUAL_SOURCE\n";
}
else
{
header = "#version 330 core\n";
// Need GL version 420
header += "#extension GL_ARB_shading_language_420pack: require\n";
// Need GL version 410
header += "#extension GL_ARB_separate_shader_objects: require\n";
if (m_features.framebuffer_fetch && GLAD_GL_EXT_shader_framebuffer_fetch)
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
if (GLLoader::found_GL_ARB_gpu_shader5)
header += "#extension GL_ARB_gpu_shader5 : enable\n";
}
if (m_features.framebuffer_fetch)
header += "#define HAS_FRAMEBUFFER_FETCH 1\n";
else
header += "#define HAS_FRAMEBUFFER_FETCH 0\n";
if (GLLoader::has_clip_control)
header += "#define HAS_CLIP_CONTROL 1\n";
else
header += "#define HAS_CLIP_CONTROL 0\n";
if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel)
header += "#define BROKEN_DRIVER as_usual\n";
@ -992,7 +950,10 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
// AMD/nvidia define it to 0
// intel window don't define it
// intel linux refuse to define it
header += "#define pGL_ES 0\n";
if (GLLoader::is_gles)
header += "#define pGL_ES 1\n";
else
header += "#define pGL_ES 0\n";
// Allow to puts several shader in 1 files
switch (type)
@ -1030,7 +991,7 @@ std::string GSDeviceOGL::GetVSSource(VSSelector sel)
+ fmt::format("#define VS_IIP {}\n", static_cast<u32>(sel.iip))
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size));
if (sel.point_size)
macro += fmt::format("#define VS_POINT_SIZE_VALUE {}\n", GSConfig.UpscaleMultiplier);
macro += fmt::format("#define VS_POINT_SIZE_VALUE {:.8f}f\n", GSConfig.UpscaleMultiplier);
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
src += m_shader_common_header;
@ -1102,7 +1063,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
+ fmt::format("#define PS_FIXED_ONE_A {}\n", sel.fixed_one_a)
+ fmt::format("#define PS_PABE {}\n", sel.pabe)
+ fmt::format("#define PS_SCANMSK {}\n", sel.scanmsk)
+ fmt::format("#define PS_SCALE_FACTOR {}\n", GSConfig.UpscaleMultiplier)
+ fmt::format("#define PS_SCALE_FACTOR {:.8f}f\n", GSConfig.UpscaleMultiplier)
+ fmt::format("#define PS_NO_COLOR {}\n", sel.no_color)
+ fmt::format("#define PS_NO_COLOR1 {}\n", sel.no_color1)
+ fmt::format("#define PS_NO_ABLEND {}\n", sel.no_ablend)
@ -1169,12 +1130,36 @@ void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
ASSERT(GLExtension::Has("GL_ARB_copy_image") && glCopyImageSubData);
glCopyImageSubData(sid, GL_TEXTURE_2D,
0, r.x, r.y, 0,
did, GL_TEXTURE_2D,
0, destX, destY, 0,
r.width(), r.height(), 1);
if (GLAD_GL_VERSION_4_3 || GLAD_GL_ARB_copy_image)
{
glCopyImageSubData(sid, GL_TEXTURE_2D, 0, r.x, r.y, 0, did, GL_TEXTURE_2D,
0, destX, destY, 0, r.width(), r.height(), 1);
}
else if (GLAD_GL_EXT_copy_image)
{
glCopyImageSubDataEXT(sid, GL_TEXTURE_2D, 0, r.x, r.y, 0, did, GL_TEXTURE_2D,
0, destX, destY, 0, r.width(), r.height(), 1);
}
else if (GLAD_GL_OES_copy_image)
{
glCopyImageSubDataOES(sid, GL_TEXTURE_2D, 0, r.x, r.y, 0, did, GL_TEXTURE_2D,
0, destX, destY, 0, r.width(), r.height(), 1);
}
else
{
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_fbo_write);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, sid, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, did, 0);
const int w = r.width(), h = r.height();
glDisable(GL_SCISSOR_TEST);
glBlitFramebuffer(r.x, r.y, r.x + w, r.y + h, destX + r.x, destY + r.y, destX + r.x + w, destY + r.y + h, GL_COLOR_BUFFER_BIT, GL_NEAREST);
glEnable(GL_SCISSOR_TEST);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, GLState::fbo);
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
}
}
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear)
@ -1407,7 +1392,7 @@ void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex)
if (!m_fxaa.ps.IsValid())
{
// Needs ARB_gpu_shader5 for gather.
if (!GLLoader::found_GL_ARB_gpu_shader5)
if (!GLLoader::is_gles && !GLLoader::found_GL_ARB_gpu_shader5)
return;
std::string fxaa_macro = "#define FXAA_GLSL_130 1\n";
@ -1559,7 +1544,7 @@ void GSDeviceOGL::ClearSamplerCache()
bool GSDeviceOGL::CreateCASPrograms()
{
// Image load store and GLSL 420pack is core in GL4.2, no need to check.
m_features.cas_sharpening = GLAD_GL_VERSION_4_2 && GLAD_GL_ARB_compute_shader;
m_features.cas_sharpening = (GLAD_GL_VERSION_4_2 && GLAD_GL_ARB_compute_shader) || GLAD_GL_ES_VERSION_3_2;
if (!m_features.cas_sharpening)
{
Console.Warning("Compute shaders not supported, CAS is unavailable.");
@ -1574,6 +1559,12 @@ bool GSDeviceOGL::CreateCASPrograms()
}
const char* header =
GLLoader::is_gles ?
"#version 320 es\n"
"precision highp float;\n"
"precision highp int;\n"
"precision highp sampler2D;\n"
"precision highp image2D;\n" :
"#version 420\n"
"#extension GL_ARB_compute_shader : require\n";
const char* sharpen_params[2] = {
@ -1710,6 +1701,15 @@ void GSDeviceOGL::OMSetBlendState(bool enable, GLenum src_factor, GLenum dst_fac
{
if (GLState::blend)
{
// make sure we're not using dual source
if (GLState::f_sRGB == GL_SRC1_ALPHA || GLState::f_sRGB == GL_ONE_MINUS_SRC1_ALPHA ||
GLState::f_dRGB == GL_SRC1_ALPHA || GLState::f_dRGB == GL_ONE_MINUS_SRC1_ALPHA)
{
glBlendFuncSeparate(GL_ONE, GL_ZERO, GL_ONE, GL_ZERO);
GLState::f_sRGB = GL_ONE;
GLState::f_dRGB = GL_ZERO;
}
GLState::blend = false;
glDisable(GL_BLEND);
}
@ -1798,12 +1798,13 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel)
OMSetDepthStencilState(m_om_dss[dssel.key]);
}
static GSDeviceOGL::VSSelector convertSel(const GSHWDrawConfig::VSSelector sel)
static GSDeviceOGL::VSSelector convertSel(const GSHWDrawConfig::VSSelector sel, const GSHWDrawConfig::Topology topology)
{
// Mali requires gl_PointSize written when rasterizing points. The spec seems to suggest this is okay.
GSDeviceOGL::VSSelector out;
out.int_fst = !sel.fst;
out.iip = sel.iip;
out.point_size = sel.point_size;
out.point_size = sel.point_size || (GLLoader::is_gles && topology == GSHWDrawConfig::Topology::Point);
return out;
}
@ -1916,7 +1917,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
}
ProgramSelector psel;
psel.vs = convertSel(config.vs);
psel.vs = convertSel(config.vs, config.topology);
psel.ps.key_hi = config.ps.key_hi;
psel.ps.key_lo = config.ps.key_lo;
psel.gs.key = 0;
@ -1936,7 +1937,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
SetupPipeline(psel);
// additional non-pipeline config stuff
const bool point_size_enabled = config.vs.point_size;
const bool point_size_enabled = config.vs.point_size && !GLLoader::is_gles;
if (GLState::point_size != point_size_enabled)
{
if (point_size_enabled)
@ -1945,6 +1946,12 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
glDisable(GL_PROGRAM_POINT_SIZE);
GLState::point_size = point_size_enabled;
}
const float line_width = config.line_expand ? static_cast<float>(GSConfig.UpscaleMultiplier) : 1.0f;
if (GLState::line_width != line_width)
{
GLState::line_width = line_width;
glLineWidth(line_width);
}
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
{
@ -2115,7 +2122,6 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
{
std::string message(gl_message, gl_length >= 0 ? gl_length : strlen(gl_message));
std::string type, severity, source;
static int sev_counter = 0;
switch (gl_type)
{
case GL_DEBUG_TYPE_ERROR_ARB : type = "Error"; break;
@ -2130,7 +2136,7 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
}
switch (gl_severity)
{
case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; sev_counter++; break;
case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; break;
case GL_DEBUG_SEVERITY_MEDIUM_ARB : severity = "Mid"; break;
case GL_DEBUG_SEVERITY_LOW_ARB : severity = "Low"; break;
default:
@ -2153,43 +2159,16 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
default : source = "???"; break;
}
#ifdef _DEBUG
// Don't spam noisy information on the terminal
if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION)
if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION && gl_source != GL_DEBUG_SOURCE_APPLICATION)
{
Console.Error("T:%s\tID:%d\tS:%s\t=> %s", type.c_str(), GSState::s_n, severity.c_str(), message.c_str());
}
#else
// Print nouveau shader compiler info
if (GSState::s_n == 0)
{
int t, local, gpr, inst, byte;
const int status = sscanf(message.c_str(), "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
&t, &local, &gpr, &inst, &byte);
if (status == 5)
{
m_shader_inst += inst;
m_shader_reg += gpr;
fprintf(stderr, "T:%s\t\tS:%s\t=> %s\n", type.c_str(), severity.c_str(), message.c_str());
}
}
#endif
}
#ifdef ENABLE_OGL_DEBUG
if (m_debug_gl_file)
fprintf(m_debug_gl_file, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str());
if (sev_counter >= 5)
{
// Close the file to flush the content on disk before exiting.
if (m_debug_gl_file)
{
fclose(m_debug_gl_file);
m_debug_gl_file = NULL;
}
ASSERT(0);
}
#endif
GL::StreamBuffer* GSDeviceOGL::GetTextureUploadBuffer()
{
return s_texture_upload_buffer.get();
}
void GSDeviceOGL::PushDebugGroup(const char* fmt, ...)

View File

@ -22,16 +22,10 @@
#include "common/HashCombine.h"
#include "GS/Renderers/Common/GSDevice.h"
#include "GSTextureOGL.h"
#include "GSUniformBufferOGL.h"
#include "GLState.h"
#include "GLLoader.h"
#include "GS/GS.h"
#ifdef ENABLE_OGL_DEBUG_MEM_BW
extern u64 g_real_texture_upload_byte;
extern u64 g_vertex_upload_byte;
#endif
class GSDepthStencilOGL
{
bool m_depth_enable;
@ -207,19 +201,15 @@ public:
}
};
static int m_shader_inst;
static int m_shader_reg;
private:
static FILE* m_debug_gl_file;
// Place holder for the GLSL shader code (to avoid useless reload)
std::string m_shader_common_header;
std::string m_shader_tfx_vgs;
std::string m_shader_tfx_fs;
GLuint m_fbo; // frame buffer container
GLuint m_fbo_read; // frame buffer container only for reading
GLuint m_fbo = 0; // frame buffer container
GLuint m_fbo_read = 0; // frame buffer container only for reading
GLuint m_fbo_write = 0; // frame buffer container only for writing
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
std::unique_ptr<GL::StreamBuffer> m_index_stream_buffer;
@ -274,20 +264,12 @@ private:
GL::Program sharpen_ps;
} m_cas;
struct
{
u16 last_query = 0;
GLuint timer_query[1 << 16] = {};
GLuint timer() { return timer_query[last_query]; }
} m_profiler;
GLuint m_ps_ss[1 << 8];
GSDepthStencilOGL* m_om_dss[1 << 5] = {};
std::unordered_map<ProgramSelector, GL::Program, ProgramSelectorHash> m_programs;
GL::ShaderCache m_shader_cache;
GLuint m_palette_ss;
GLuint m_palette_ss = 0;
GSHWDrawConfig::VSConstantBuffer m_vs_cb_cache;
GSHWDrawConfig::PSConstantBuffer m_ps_cb_cache;
@ -314,11 +296,11 @@ public:
GSDeviceOGL();
virtual ~GSDeviceOGL();
void GenerateProfilerData();
// Used by OpenGL, so the same calling convention is required.
static void APIENTRY DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar* gl_message, const void* userParam);
static GL::StreamBuffer* GetTextureUploadBuffer();
bool Create() override;
void ResetAPIState() override;

View File

@ -15,170 +15,23 @@
#include "PrecompiledHeader.h"
#include <limits.h>
#include "GSTextureOGL.h"
#include "GLState.h"
#include "GS/Renderers/OpenGL/GSDeviceOGL.h"
#include "GS/Renderers/OpenGL/GSTextureOGL.h"
#include "GS/Renderers/OpenGL/GLState.h"
#include "GS/GSPerfMon.h"
#include "GS/GSPng.h"
#include "GS/GSGL.h"
#include "common/StringUtil.h"
#ifdef ENABLE_OGL_DEBUG_MEM_BW
extern u64 g_real_texture_upload_byte;
#endif
// FIXME OGL4: investigate, only 1 unpack buffer always bound
namespace PboPool
{
const u32 m_pbo_size = 64 * 1024 * 1024;
const u32 m_seg_size = 16 * 1024 * 1024;
GLuint m_buffer;
uptr m_offset;
char* m_map;
u32 m_size;
GLsync m_fence[m_pbo_size / m_seg_size];
// Option for buffer storage
// XXX: actually does I really need coherent and barrier???
// As far as I understand glTexSubImage2D is a client-server transfer so no need to make
// the value visible to the server
const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT;
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
void Init()
{
glGenBuffers(1, &m_buffer);
BindPbo();
glObjectLabel(GL_BUFFER, m_buffer, -1, "PBO");
glBufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, create_flags);
m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags);
m_offset = 0;
std::fill(std::begin(m_fence), std::end(m_fence), nullptr);
UnbindPbo();
}
char* Map(u32 size)
{
char* map;
// Note: keep offset aligned for SSE/AVX
m_size = (size + 63) & ~0x3F;
if (m_size > m_pbo_size)
{
fprintf(stderr, "BUG: PBO too small %u but need %u\n", m_pbo_size, m_size);
}
// Note: texsubimage will access currently bound buffer
// Pbo ready let's get a pointer
BindPbo();
Sync();
map = m_map + m_offset;
return map;
}
void Unmap()
{
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset, m_size);
}
uptr Offset()
{
return m_offset;
}
void Destroy()
{
m_map = NULL;
m_offset = 0;
for (GLsync& fence : m_fence)
{
if (fence != 0)
{
glDeleteSync(fence);
fence = 0;
}
}
if (m_buffer != 0)
{
glDeleteBuffers(1, &m_buffer);
m_buffer = 0;
}
}
void BindPbo()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer);
}
void Sync()
{
u32 segment_current = m_offset / m_seg_size;
u32 segment_next = (m_offset + m_size) / m_seg_size;
if (segment_current != segment_next)
{
if (segment_next >= std::size(m_fence))
{
segment_next = 0;
}
// Align current transfer on the start of the segment
m_offset = m_seg_size * segment_next;
if (m_size > m_seg_size)
{
fprintf(stderr, "BUG: PBO Map size %u is bigger than a single segment %u. Crossing more than one fence is not supported yet, texture data may be corrupted.\n", m_size, m_seg_size);
// TODO Synchronize all crossed fences
}
// protect the left segment
m_fence[segment_current] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
// Check next segment is free
if (m_fence[segment_next])
{
GLenum status = glClientWaitSync(m_fence[segment_next], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
// Potentially it doesn't work on AMD driver which might always return GL_CONDITION_SATISFIED
if (status != GL_ALREADY_SIGNALED)
{
GL_PERF("GL_PIXEL_UNPACK_BUFFER: Sync Sync (%x)! Buffer too small ?", status);
}
glDeleteSync(m_fence[segment_next]);
m_fence[segment_next] = 0;
}
}
}
void UnbindPbo()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
void EndTransfer()
{
m_offset += m_size;
}
} // namespace PboPool
static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 256;
GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format format, GLuint fbo_read)
: m_clean(false), m_r_x(0), m_r_y(0), m_r_w(0), m_r_h(0), m_layer(0)
{
// OpenGL didn't like dimensions of size 0
m_size.x = std::max(1, width);
m_size.y = std::max(1, height);
m_format = format;
m_type = type;
m_type = type;
m_fbo_read = fbo_read;
m_texture_id = 0;
m_mipmap_levels = 1;
@ -189,46 +42,46 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
{
// 1 Channel integer
case Format::PrimID:
gl_fmt = GL_R32F;
m_int_format = GL_RED;
m_int_type = GL_INT;
m_int_shift = 2;
gl_fmt = GL_R32F;
m_int_format = GL_RED;
m_int_type = GL_INT;
m_int_shift = 2;
break;
case Format::UInt32:
gl_fmt = GL_R32UI;
m_int_format = GL_RED_INTEGER;
m_int_type = GL_UNSIGNED_INT;
m_int_shift = 2;
gl_fmt = GL_R32UI;
m_int_format = GL_RED_INTEGER;
m_int_type = GL_UNSIGNED_INT;
m_int_shift = 2;
break;
case Format::UInt16:
gl_fmt = GL_R16UI;
m_int_format = GL_RED_INTEGER;
m_int_type = GL_UNSIGNED_SHORT;
m_int_shift = 1;
gl_fmt = GL_R16UI;
m_int_format = GL_RED_INTEGER;
m_int_type = GL_UNSIGNED_SHORT;
m_int_shift = 1;
break;
// 1 Channel normalized
case Format::UNorm8:
gl_fmt = GL_R8;
m_int_format = GL_RED;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 0;
gl_fmt = GL_R8;
m_int_format = GL_RED;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 0;
break;
// 4 channel normalized
case Format::Color:
gl_fmt = GL_RGBA8;
m_int_format = GL_RGBA;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 2;
gl_fmt = GL_RGBA8;
m_int_format = GL_RGBA;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 2;
break;
// 4 channel float
case Format::HDRColor:
gl_fmt = GL_RGBA16;
m_int_format = GL_RGBA;
m_int_type = GL_UNSIGNED_SHORT;
m_int_shift = 3;
gl_fmt = GL_RGBA16;
m_int_format = GL_RGBA;
m_int_type = GL_UNSIGNED_SHORT;
m_int_shift = 3;
break;
// Depth buffer
@ -252,37 +105,37 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
break;
case Format::BC1:
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
break;
case Format::BC2:
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
break;
case Format::BC3:
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
break;
case Format::BC7:
gl_fmt = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
m_int_format = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
gl_fmt = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
m_int_format = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
m_int_type = GL_UNSIGNED_BYTE;
m_int_shift = 1;
break;
case Format::Invalid:
m_int_format = 0;
m_int_type = 0;
m_int_shift = 0;
m_int_format = 0;
m_int_type = 0;
m_int_shift = 0;
ASSERT(0);
}
@ -363,9 +216,6 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
u32 row_byte = r.width() << m_int_shift;
u32 map_size = r.height() * row_byte;
#ifdef ENABLE_OGL_DEBUG_MEM_BW
g_real_texture_upload_byte += map_size;
#endif
#if 0
if (r.height() == 1) {
@ -389,7 +239,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
glCompressedTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, upload_size, data);
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
else if (map_size >= PboPool::m_seg_size)
else if (GLLoader::buggy_pbo || map_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize())
{
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);
glTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data);
@ -397,27 +247,15 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
}
else
{
// The complex solution with PBO
char* src = (char*)data;
char* map = PboPool::Map(map_size);
GL::StreamBuffer* const sb = GSDeviceOGL::GetTextureUploadBuffer();
// PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch
// Note: row_byte != pitch
for (int h = 0; h < r.height(); h++)
{
memcpy(map, src, row_byte);
map += row_byte;
src += pitch;
}
const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
StringUtil::StrideMemCpy(map.pointer, row_byte, data, pitch, row_byte, r.height());
sb->Unmap(map_size);
sb->Bind();
PboPool::Unmap();
glTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());
// FIXME OGL4: investigate, only 1 unpack buffer always bound
PboPool::UnbindPbo();
PboPool::EndTransfer();
glTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
}
m_needs_mipmaps_generated = true;
@ -441,7 +279,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
if (m_type == Type::Texture || m_type == Type::RenderTarget)
{
const u32 map_size = r.height() * row_byte;
if (map_size > PboPool::m_seg_size)
if (GLLoader::buggy_pbo || map_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize())
return false;
GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap
@ -449,11 +287,8 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
m_clean = false;
m.bits = (u8*)PboPool::Map(map_size);
#ifdef ENABLE_OGL_DEBUG_MEM_BW
g_real_texture_upload_byte += map_size;
#endif
const auto map = GSDeviceOGL::GetTextureUploadBuffer()->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
m.bits = static_cast<u8*>(map.pointer);
// Save the area for the unmap
m_r_x = r.x;
@ -461,6 +296,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
m_r_w = r.width();
m_r_h = r.height();
m_layer = layer;
m_map_offset = map.buffer_offset;
return true;
}
@ -472,15 +308,13 @@ void GSTextureOGL::Unmap()
{
if (m_type == Type::Texture || m_type == Type::RenderTarget)
{
const u32 map_size = (m_r_w << m_int_shift) * m_r_h;
GL::StreamBuffer* sb = GSDeviceOGL::GetTextureUploadBuffer();
sb->Unmap(map_size);
sb->Bind();
PboPool::Unmap();
glTextureSubImage2D(m_texture_id, m_layer, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type, (const void*)PboPool::Offset());
// FIXME OGL4: investigate, only 1 unpack buffer always bound
PboPool::UnbindPbo();
PboPool::EndTransfer();
glTextureSubImage2D(m_texture_id, m_layer, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type,
reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
m_needs_mipmaps_generated = true;

View File

@ -19,40 +19,26 @@
#include "GS/Renderers/OpenGL/GLLoader.h"
#include "common/AlignedMalloc.h"
namespace PboPool
{
inline void BindPbo();
inline void UnbindPbo();
inline void Sync();
inline char* Map(u32 size);
inline void Unmap();
inline uptr Offset();
inline void EndTransfer();
void Init();
void Destroy();
} // namespace PboPool
class GSTextureOGL final : public GSTexture
{
private:
GLuint m_texture_id; // the texture id
GLuint m_fbo_read;
bool m_clean;
GLuint m_texture_id = 0; // the texture id
GLuint m_fbo_read = 0;
bool m_clean = false;
// Avoid alignment constrain
//GSVector4i m_r;
int m_r_x;
int m_r_y;
int m_r_w;
int m_r_h;
int m_layer;
int m_r_x = 0;
int m_r_y = 0;
int m_r_w = 0;
int m_r_h = 0;
int m_layer = 0;
u32 m_map_offset = 0;
// internal opengl format/type/alignment
GLenum m_int_format;
GLenum m_int_type;
u32 m_int_shift;
GLenum m_int_format = 0;
GLenum m_int_type = 0;
u32 m_int_shift = 0;
public:
explicit GSTextureOGL(Type type, int width, int height, int levels, Format format, GLuint fbo_read);

View File

@ -1,88 +0,0 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "GLState.h"
#ifdef ENABLE_OGL_DEBUG_MEM_BW
extern u64 g_uniform_upload_byte;
#endif
class GSUniformBufferOGL
{
GLuint m_buffer; // data object
GLuint m_index; // GLSL slot
u32 m_size; // size of the data
u8* m_cache; // content of the previous upload
public:
GSUniformBufferOGL(const std::string& pretty_name, GLuint index, u32 size)
: m_index(index), m_size(size)
{
glGenBuffers(1, &m_buffer);
bind();
glObjectLabel(GL_BUFFER, m_buffer, pretty_name.size(), pretty_name.c_str());
allocate();
attach();
m_cache = (u8*)_aligned_malloc(m_size, 32);
memset(m_cache, 0, m_size);
}
void bind()
{
glBindBuffer(GL_UNIFORM_BUFFER, m_buffer);
}
void allocate()
{
glBufferData(GL_UNIFORM_BUFFER, m_size, NULL, GL_DYNAMIC_DRAW);
}
void attach()
{
// From the opengl manpage:
// glBindBufferBase also binds buffer to the generic buffer binding point specified by target
glBindBufferBase(GL_UNIFORM_BUFFER, m_index, m_buffer);
}
void upload(const void* src)
{
bind();
// glMapBufferRange allow to set various parameter but the call is
// synchronous whereas glBufferSubData could be asynchronous.
// TODO: investigate the extension ARB_invalidate_subdata
glBufferSubData(GL_UNIFORM_BUFFER, 0, m_size, src);
#ifdef ENABLE_OGL_DEBUG_MEM_BW
g_uniform_upload_byte += m_size;
#endif
}
void cache_upload(const void* src)
{
if (memcmp(m_cache, src, m_size) != 0)
{
memcpy(m_cache, src, m_size);
upload(src);
}
}
~GSUniformBufferOGL()
{
glDeleteBuffers(1, &m_buffer);
_aligned_free(m_cache);
}
};

View File

@ -243,6 +243,7 @@ bool GSDeviceVK::CheckFeatures()
m_features.prefer_new_textures = true;
m_features.provoking_vertex_last = g_vulkan_context->GetOptionalExtensions().vk_ext_provoking_vertex;
m_features.dual_source_blend = features.dualSrcBlend && !GSConfig.DisableDualSourceBlend;
m_features.clip_control = true;
if (!m_features.dual_source_blend)
Console.Warning("Vulkan driver is missing dual-source blending. This will have an impact on performance.");

View File

@ -868,7 +868,6 @@
<ClInclude Include="GS\Renderers\SW\GSTextureSW.h" />
<ClInclude Include="GS\GSThread.h" />
<ClInclude Include="GS\GSThread_CXX11.h" />
<ClInclude Include="GS\Renderers\OpenGL\GSUniformBufferOGL.h" />
<ClInclude Include="GS\GSUtil.h" />
<ClInclude Include="GS\GSVector.h" />
<ClInclude Include="GS\GSVector4i.h" />
@ -1170,4 +1169,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

View File

@ -2833,9 +2833,6 @@
<ClInclude Include="GS\Renderers\HW\GSVertexHW.h">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\OpenGL\GSUniformBufferOGL.h">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\Common\GSRenderer.h">
<Filter>System\Ps2\GS\Renderers\Common</Filter>
</ClInclude>
@ -3101,4 +3098,4 @@
<Filter>AppHost\Resources</Filter>
</Manifest>
</ItemGroup>
</Project>
</Project>

View File

@ -645,7 +645,6 @@
<ClInclude Include="GS\Renderers\SW\GSTextureSW.h" />
<ClInclude Include="GS\GSThread.h" />
<ClInclude Include="GS\GSThread_CXX11.h" />
<ClInclude Include="GS\Renderers\OpenGL\GSUniformBufferOGL.h" />
<ClInclude Include="GS\GSUtil.h" />
<ClInclude Include="GS\GSVector.h" />
<ClInclude Include="GS\GSVector4i.h" />
@ -846,4 +845,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

View File

@ -1997,9 +1997,6 @@
<ClInclude Include="GS\Renderers\HW\GSVertexHW.h">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\OpenGL\GSUniformBufferOGL.h">
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\Common\GSRenderer.h">
<Filter>System\Ps2\GS\Renderers\Common</Filter>
</ClInclude>
@ -2222,4 +2219,4 @@
<Filter>System\Ps2\Debug\rdebug</Filter>
</CustomBuildStep>
</ItemGroup>
</Project>
</Project>