mirror of https://github.com/PCSX2/pcsx2.git
GS/OpenGL: Cleanups and GLES support
This commit is contained in:
parent
3f1fb1fc3c
commit
eba2273cd1
|
@ -393,7 +393,7 @@ A_STATIC void CasSetup(
|
|||
const1[0]=AU1_AF1(sharp);
|
||||
const1[1]=AU1_AH2_AF2(hSharp);
|
||||
const1[2]=AU1_AF1(AF1_(8.0)*inputSizeInPixelsX*ARcpF1(outputSizeInPixelsX));
|
||||
const1[3]=0;}
|
||||
const1[3]=AU1(0);}
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//_____________________________________________________________/\_______________________________________________________________
|
||||
|
|
|
@ -1,5 +1,14 @@
|
|||
#if defined(SHADER_MODEL) || defined(FXAA_GLSL_130) || defined(FXAA_GLSL_VK) || defined(__METAL_VERSION__)
|
||||
|
||||
#ifndef SHADER_MODEL
|
||||
#define SHADER_MODEL 0
|
||||
#endif
|
||||
#ifndef FXAA_HLSL_4
|
||||
#define FXAA_HLSL_4 0
|
||||
#endif
|
||||
#ifndef FXAA_HLSL_5
|
||||
#define FXAA_HLSL_5 0
|
||||
#endif
|
||||
#ifndef FXAA_GLSL_130
|
||||
#define FXAA_GLSL_130 0
|
||||
#endif
|
||||
|
@ -508,7 +517,7 @@ float4 FxaaPass(float4 FxaaColor, float2 uv0, texture2d<float> tex)
|
|||
FxaaColor = FxaaPixelShader(uv0, tex, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);
|
||||
|
||||
#elif (FXAA_GLSL_130 == 1 || FXAA_GLSL_VK == 1)
|
||||
vec2 PixelSize = textureSize(TextureSampler, 0);
|
||||
vec2 PixelSize = vec2(textureSize(TextureSampler, 0));
|
||||
FxaaColor = FxaaPixelShader(uv0, TextureSampler, 1.0/PixelSize.xy, FxaaSubpixMax, FxaaEdgeThreshold, FxaaEdgeThresholdMin);
|
||||
#elif defined(__METAL_VERSION__)
|
||||
float2 PixelSize = float2(tex.get_width(), tex.get_height());
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#ifdef VERTEX_SHADER
|
||||
|
||||
#if !pGL_ES
|
||||
out gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
float gl_PointSize;
|
||||
|
@ -13,6 +14,7 @@ out gl_PerVertex {
|
|||
float gl_ClipDistance[1];
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -20,6 +22,7 @@ out gl_PerVertex {
|
|||
|
||||
#ifdef GEOMETRY_SHADER
|
||||
|
||||
#if !pGL_ES
|
||||
in gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
float gl_PointSize;
|
||||
|
@ -35,6 +38,7 @@ out gl_PerVertex {
|
|||
float gl_ClipDistance[1];
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -75,7 +75,11 @@ void ps_convert_rgba8_16bits()
|
|||
void ps_convert_float32_32bits()
|
||||
{
|
||||
// Convert a GL_FLOAT32 depth texture into a 32 bits UINT texture
|
||||
#if HAS_CLIP_CONTROL
|
||||
SV_Target1 = uint(exp2(32.0f) * sample_c().r);
|
||||
#else
|
||||
SV_Target1 = uint(exp2(24.0f) * sample_c().r);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -83,7 +87,11 @@ void ps_convert_float32_32bits()
|
|||
void ps_convert_float32_rgba8()
|
||||
{
|
||||
// Convert a GL_FLOAT32 depth texture into a RGBA color texture
|
||||
#if HAS_CLIP_CONTROL
|
||||
uint d = uint(sample_c().r * exp2(32.0f));
|
||||
#else
|
||||
uint d = uint(sample_c().r * exp2(24.0f));
|
||||
#endif
|
||||
SV_Target0 = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24))) / vec4(255.0);
|
||||
}
|
||||
#endif
|
||||
|
@ -92,7 +100,11 @@ void ps_convert_float32_rgba8()
|
|||
void ps_convert_float16_rgb5a1()
|
||||
{
|
||||
// Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
|
||||
#if HAS_CLIP_CONTROL
|
||||
uint d = uint(sample_c().r * exp2(32.0f));
|
||||
#else
|
||||
uint d = uint(sample_c().r * exp2(24.0f));
|
||||
#endif
|
||||
SV_Target0 = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) / vec4(32.0f, 32.0f, 32.0f, 1.0f);
|
||||
}
|
||||
#endif
|
||||
|
@ -100,25 +112,41 @@ void ps_convert_float16_rgb5a1()
|
|||
float rgba8_to_depth32(vec4 unorm)
|
||||
{
|
||||
uvec4 c = uvec4(unorm * vec4(255.5f));
|
||||
#if HAS_CLIP_CONTROL
|
||||
return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-32.0f);
|
||||
#else
|
||||
return float(c.r | (c.g << 8) | (c.b << 16) | (c.a << 24)) * exp2(-24.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
float rgba8_to_depth24(vec4 unorm)
|
||||
{
|
||||
uvec3 c = uvec3(unorm.rgb * vec3(255.5f));
|
||||
#if HAS_CLIP_CONTROL
|
||||
return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-32.0f);
|
||||
#else
|
||||
return float(c.r | (c.g << 8) | (c.b << 16)) * exp2(-24.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
float rgba8_to_depth16(vec4 unorm)
|
||||
{
|
||||
uvec2 c = uvec2(unorm.rg * vec2(255.5f));
|
||||
#if HAS_CLIP_CONTROL
|
||||
return float(c.r | (c.g << 8)) * exp2(-32.0f);
|
||||
#else
|
||||
return float(c.r | (c.g << 8)) * exp2(-24.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
float rgb5a1_to_depth16(vec4 unorm)
|
||||
{
|
||||
uvec4 c = uvec4(unorm * vec4(255.5f));
|
||||
#if HAS_CLIP_CONTROL
|
||||
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
|
||||
#else
|
||||
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-24.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef ps_convert_rgba8_float32
|
||||
|
|
|
@ -14,9 +14,9 @@ layout(location = 0) out vec4 SV_Target0;
|
|||
// Weave shader
|
||||
void ps_main0()
|
||||
{
|
||||
const int idx = int(ZrH.x); // buffer index passed from CPU
|
||||
const int field = idx & 1; // current field
|
||||
const int vpos = int(gl_FragCoord.y); // vertical position of destination texture
|
||||
int idx = int(ZrH.x); // buffer index passed from CPU
|
||||
int field = idx & 1; // current field
|
||||
int vpos = int(gl_FragCoord.y); // vertical position of destination texture
|
||||
|
||||
if ((vpos & 1) == field)
|
||||
SV_Target0 = texture(TextureSampler, PSin_t);
|
||||
|
@ -54,16 +54,16 @@ void ps_main3()
|
|||
// causing the wrong lines to be discarded, so a vertical offset (lofs) is added to the vertical
|
||||
// position of the destination texture to force the proper field alignment
|
||||
|
||||
const int idx = int(ZrH.x); // buffer index passed from CPU
|
||||
const int bank = idx >> 1; // current bank
|
||||
const int field = idx & 1; // current field
|
||||
const int vres = int(ZrH.z) >> 1; // vertical resolution of source texture
|
||||
const int lofs = ((((vres + 1) >> 1) << 1) - vres) & bank; // line alignment offset for bank 1
|
||||
const int vpos = int(gl_FragCoord.y) + lofs; // vertical position of destination texture
|
||||
const vec2 bofs = vec2(0.0f, 0.5f * bank); // vertical offset of the current bank relative to source texture size
|
||||
const vec2 vscale = vec2(1.0f, 2.0f); // scaling factor from source to destination texture
|
||||
const vec2 optr = PSin_t - bofs; // used to check if the current destination line is within the current bank
|
||||
const vec2 iptr = optr * vscale; // pointer to the current pixel in the source texture
|
||||
int idx = int(ZrH.x); // buffer index passed from CPU
|
||||
int bank = idx >> 1; // current bank
|
||||
int field = idx & 1; // current field
|
||||
int vres = int(ZrH.z) >> 1; // vertical resolution of source texture
|
||||
int lofs = ((((vres + 1) >> 1) << 1) - vres) & bank; // line alignment offset for bank 1
|
||||
int vpos = int(gl_FragCoord.y) + lofs; // vertical position of destination texture
|
||||
vec2 bofs = vec2(0.0f, 0.5f * float(bank)); // vertical offset of the current bank relative to source texture size
|
||||
vec2 vscale = vec2(1.0f, 2.0f); // scaling factor from source to destination texture
|
||||
vec2 optr = PSin_t - bofs; // used to check if the current destination line is within the current bank
|
||||
vec2 iptr = optr * vscale; // pointer to the current pixel in the source texture
|
||||
|
||||
// if the index of current destination line belongs to the current fiels we update it, otherwise
|
||||
// we leave the old line in the destination buffer
|
||||
|
@ -79,15 +79,15 @@ void ps_main4()
|
|||
{
|
||||
// we use the contents of the MAD frame buffer to reconstruct the missing lines from the current field.
|
||||
|
||||
const int idx = int(ZrH.x); // buffer index passed from CPU
|
||||
const int field = idx & 1; // current field
|
||||
const int vpos = int(gl_FragCoord.y); // vertical position of destination texture
|
||||
const float sensitivity = ZrH.w; // passed from CPU, higher values mean more likely to use weave
|
||||
const vec3 motion_thr = vec3(1.0, 1.0, 1.0) * sensitivity; //
|
||||
const vec2 bofs = vec2(0.0f, 0.5f); // position of the bank 1 relative to source texture size
|
||||
const vec2 vscale = vec2(1.0f, 0.5f); // scaling factor from source to destination texture
|
||||
const vec2 lofs = vec2(0.0f, ZrH.y) * vscale; // distance between two adjacent lines relative to source texture size
|
||||
const vec2 iptr = PSin_t * vscale; // pointer to the current pixel in the source texture
|
||||
int idx = int(ZrH.x); // buffer index passed from CPU
|
||||
int field = idx & 1; // current field
|
||||
int vpos = int(gl_FragCoord.y); // vertical position of destination texture
|
||||
float sensitivity = ZrH.w; // passed from CPU, higher values mean more likely to use weave
|
||||
vec3 motion_thr = vec3(1.0, 1.0, 1.0) * sensitivity; //
|
||||
vec2 bofs = vec2(0.0f, 0.5f); // position of the bank 1 relative to source texture size
|
||||
vec2 vscale = vec2(1.0f, 0.5f); // scaling factor from source to destination texture
|
||||
vec2 lofs = vec2(0.0f, ZrH.y) * vscale; // distance between two adjacent lines relative to source texture size
|
||||
vec2 iptr = PSin_t * vscale; // pointer to the current pixel in the source texture
|
||||
|
||||
vec2 p_t0; // pointer to current pixel (missing or not) from most recent frame
|
||||
vec2 p_t1; // pointer to current pixel (missing or not) from one frame back
|
||||
|
|
|
@ -73,10 +73,11 @@ void ps_copy()
|
|||
#ifdef ps_filter_scanlines
|
||||
vec4 ps_scanlines(uint i)
|
||||
{
|
||||
vec4 mask[2] =
|
||||
{
|
||||
vec4(1, 1, 1, 0),
|
||||
vec4(0, 0, 0, 0)};
|
||||
vec4 mask[2] = vec4[2]
|
||||
(
|
||||
vec4(1, 1, 1, 0),
|
||||
vec4(0, 0, 0, 0)
|
||||
);
|
||||
|
||||
return sample_c() * clamp((mask[i] + 0.5f), 0.0f, 1.0f);
|
||||
}
|
||||
|
@ -360,7 +361,7 @@ vec4 LottesCRTPass()
|
|||
//flipped y axis in opengl
|
||||
vec2 fragcoord = vec2(gl_FragCoord.x, u_target_resolution.y - gl_FragCoord.y) - u_target_rect.xy;
|
||||
vec4 color;
|
||||
vec2 inSize = u_target_resolution - (2 * u_target_rect.xy);
|
||||
vec2 inSize = u_target_resolution - (2.0 * u_target_rect.xy);
|
||||
|
||||
vec2 pos = Warp(fragcoord.xy / inSize);
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
|
||||
#ifdef FRAGMENT_SHADER
|
||||
|
||||
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
|
||||
#if !defined(BROKEN_DRIVER) && (pGL_ES || defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts)
|
||||
layout(location = 0)
|
||||
#endif
|
||||
in SHADER
|
||||
|
@ -55,6 +55,8 @@ in SHADER
|
|||
#undef TARGET_0_QUALIFIER
|
||||
#define TARGET_0_QUALIFIER inout
|
||||
#define LAST_FRAG_COLOR SV_Target0
|
||||
#elif defined(GL_ARM_shader_framebuffer_fetch)
|
||||
#define LAST_FRAG_COLOR gl_LastFragColorARM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -125,7 +127,7 @@ vec4 sample_c(vec2 uv)
|
|||
|
||||
return textureLod(TextureSampler, uv, lod);
|
||||
#else
|
||||
return textureLod(TextureSampler, uv, 0); // No lod
|
||||
return textureLod(TextureSampler, uv, 0.0f); // No lod
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -249,10 +251,16 @@ mat4 sample_4p(vec4 u)
|
|||
|
||||
int fetch_raw_depth()
|
||||
{
|
||||
#if PS_TEX_IS_FB == 1
|
||||
return int(fetch_rt().r * exp2(32.0f));
|
||||
#if HAS_CLIP_CONTROL
|
||||
float multiplier = exp2(32.0f);
|
||||
#else
|
||||
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
|
||||
float multiplier = exp2(24.0f);
|
||||
#endif
|
||||
|
||||
#if PS_TEX_IS_FB == 1
|
||||
return int(fetch_rt().r * multiplier);
|
||||
#else
|
||||
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * multiplier);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -344,13 +352,21 @@ vec4 sample_depth(vec2 st)
|
|||
#elif PS_DEPTH_FMT == 1
|
||||
// Based on ps_convert_float32_rgba8 of convert
|
||||
// Convert a GL_FLOAT32 depth texture into a RGBA color texture
|
||||
uint d = uint(fetch_c(uv).r * exp2(32.0f));
|
||||
#if HAS_CLIP_CONTROL
|
||||
uint d = uint(fetch_c(uv).r * exp2(32.0f));
|
||||
#else
|
||||
uint d = uint(fetch_c(uv).r * exp2(24.0f));
|
||||
#endif
|
||||
t = vec4(uvec4((d & 0xFFu), ((d >> 8) & 0xFFu), ((d >> 16) & 0xFFu), (d >> 24)));
|
||||
|
||||
#elif PS_DEPTH_FMT == 2
|
||||
// Based on ps_convert_float16_rgb5a1 of convert
|
||||
// Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
|
||||
uint d = uint(fetch_c(uv).r * exp2(32.0f));
|
||||
#if HAS_CLIP_CONTROL
|
||||
uint d = uint(fetch_c(uv).r * exp2(32.0f));
|
||||
#else
|
||||
uint d = uint(fetch_c(uv).r * exp2(24.0f));
|
||||
#endif
|
||||
t = vec4(uvec4((d & 0x1Fu), ((d >> 5) & 0x1Fu), ((d >> 10) & 0x1Fu), (d >> 15) & 0x01u)) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
|
||||
|
||||
#elif PS_DEPTH_FMT == 3
|
||||
|
@ -834,16 +850,16 @@ void ps_main()
|
|||
|
||||
vec4 C = ps_color();
|
||||
#if (APITRACE_DEBUG & 1) == 1
|
||||
C.r = 255f;
|
||||
C.r = 255.0f;
|
||||
#endif
|
||||
#if (APITRACE_DEBUG & 2) == 2
|
||||
C.g = 255f;
|
||||
C.g = 255.0f;
|
||||
#endif
|
||||
#if (APITRACE_DEBUG & 4) == 4
|
||||
C.b = 255f;
|
||||
C.b = 255.0f;
|
||||
#endif
|
||||
#if (APITRACE_DEBUG & 8) == 8
|
||||
C.a = 128f;
|
||||
C.a = 128.0f;
|
||||
#endif
|
||||
|
||||
#if PS_SHUFFLE
|
||||
|
|
|
@ -9,7 +9,7 @@ layout(location = 5) in uint i_z;
|
|||
layout(location = 6) in uvec2 i_uv;
|
||||
layout(location = 7) in vec4 i_f;
|
||||
|
||||
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
|
||||
#if !defined(BROKEN_DRIVER) && (pGL_ES || defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts)
|
||||
layout(location = 0)
|
||||
#endif
|
||||
out SHADER
|
||||
|
@ -59,7 +59,15 @@ void vs_main()
|
|||
p.xy = vec2(i_p) - vec2(0.05f, 0.05f);
|
||||
p.xy = p.xy * VertexScale - VertexOffset;
|
||||
p.w = 1.0f;
|
||||
|
||||
#if HAS_CLIP_CONTROL
|
||||
p.z = float(z) * exp_min32;
|
||||
#else
|
||||
// GLES doesn't support ARB_clip_control, so remap it to -1..1. We also reduce the range from 32 bits
|
||||
// to 24 bits, which means some games with very large depth ranges will not render correctly. But,
|
||||
// for most, it's okay, and really, the best we can do.
|
||||
p.z = min(float(z) * exp2(-23.0f), 2.0f) - 1.0f;
|
||||
#endif
|
||||
|
||||
gl_Position = p;
|
||||
|
||||
|
@ -77,7 +85,7 @@ void vs_main()
|
|||
|
||||
#ifdef GEOMETRY_SHADER
|
||||
|
||||
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
|
||||
#if !defined(BROKEN_DRIVER) && (pGL_ES || defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts)
|
||||
layout(location = 0)
|
||||
#endif
|
||||
in SHADER
|
||||
|
@ -91,7 +99,7 @@ in SHADER
|
|||
#endif
|
||||
} GSin[];
|
||||
|
||||
#if !defined(BROKEN_DRIVER) && defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts
|
||||
#if !defined(BROKEN_DRIVER) && (pGL_ES || defined(GL_ARB_enhanced_layouts) && GL_ARB_enhanced_layouts)
|
||||
layout(location = 0)
|
||||
#endif
|
||||
out SHADER
|
||||
|
@ -173,7 +181,7 @@ void gs_main()
|
|||
// Potentially there is faster math
|
||||
vec2 line_vector = normalize(rt_p.xy - lt_p.xy);
|
||||
vec2 line_normal = vec2(line_vector.y, -line_vector.x);
|
||||
vec2 line_width = (line_normal * PointSize) / 2;
|
||||
vec2 line_width = (line_normal * PointSize) / 2.0f;
|
||||
|
||||
lt_p.xy -= line_width;
|
||||
rt_p.xy -= line_width;
|
||||
|
|
|
@ -65,6 +65,11 @@ namespace GL
|
|||
glBufferSubData(m_target, 0, used_size, m_cpu_buffer.data());
|
||||
}
|
||||
|
||||
u32 GetChunkSize() const override
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
static std::unique_ptr<StreamBuffer> Create(GLenum target, u32 size)
|
||||
{
|
||||
glGetError();
|
||||
|
@ -115,6 +120,11 @@ namespace GL
|
|||
glBufferData(m_target, used_size, m_cpu_buffer.data(), GL_STREAM_DRAW);
|
||||
}
|
||||
|
||||
u32 GetChunkSize() const override
|
||||
{
|
||||
return m_size;
|
||||
}
|
||||
|
||||
static std::unique_ptr<StreamBuffer> Create(GLenum target, u32 size)
|
||||
{
|
||||
glGetError();
|
||||
|
@ -226,6 +236,11 @@ namespace GL
|
|||
}
|
||||
}
|
||||
|
||||
u32 GetChunkSize() const override
|
||||
{
|
||||
return m_size / NUM_SYNC_POINTS;
|
||||
}
|
||||
|
||||
u32 m_position = 0;
|
||||
u32 m_used_block_index = 0;
|
||||
u32 m_available_block_index = NUM_SYNC_POINTS;
|
||||
|
|
|
@ -46,6 +46,9 @@ namespace GL
|
|||
virtual MappingResult Map(u32 alignment, u32 min_size) = 0;
|
||||
virtual void Unmap(u32 used_size) = 0;
|
||||
|
||||
/// Returns the minimum granularity of blocks which sync objects will be created around.
|
||||
virtual u32 GetChunkSize() const = 0;
|
||||
|
||||
static std::unique_ptr<StreamBuffer> Create(GLenum target, u32 size);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -788,7 +788,6 @@ if(USE_OPENGL)
|
|||
GS/Renderers/OpenGL/GLState.h
|
||||
GS/Renderers/OpenGL/GSDeviceOGL.h
|
||||
GS/Renderers/OpenGL/GSTextureOGL.h
|
||||
GS/Renderers/OpenGL/GSUniformBufferOGL.h
|
||||
)
|
||||
target_link_libraries(PCSX2_FLAGS INTERFACE glad)
|
||||
endif()
|
||||
|
|
|
@ -20,9 +20,7 @@
|
|||
|
||||
#if !defined(NDEBUG) || defined(_DEBUG) || defined(_DEVEL)
|
||||
#define ENABLE_OGL_DEBUG // Create a debug context and check opengl command status. Allow also to dump various textures/states.
|
||||
//#define ENABLE_OGL_DEBUG_FENCE
|
||||
//#define ENABLE_OGL_DEBUG_MEM_BW // compute the quantity of data transfered (debug purpose)
|
||||
//#define ENABLE_TRACE_REG // print GS reg write
|
||||
//#define ENABLE_TRACE_REG // print GS reg write
|
||||
//#define ENABLE_EXTRA_LOG // print extra log
|
||||
#endif
|
||||
|
||||
|
|
|
@ -699,6 +699,7 @@ public:
|
|||
bool bptc_textures : 1; ///< Supports BC6/7 texture compression.
|
||||
bool framebuffer_fetch : 1; ///< Can sample from the framebuffer without texture barriers.
|
||||
bool dual_source_blend : 1; ///< Can use alpha output as a blend factor.
|
||||
bool clip_control : 1; ///< Can use 0..1 depth range instead of -1..1.
|
||||
bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE.
|
||||
bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening.
|
||||
FeatureSupport()
|
||||
|
|
|
@ -58,6 +58,7 @@ GSDevice11::GSDevice11()
|
|||
m_features.framebuffer_fetch = false;
|
||||
m_features.dual_source_blend = true;
|
||||
m_features.stencil_buffer = true;
|
||||
m_features.clip_control = true;
|
||||
}
|
||||
|
||||
GSDevice11::~GSDevice11()
|
||||
|
|
|
@ -2179,7 +2179,7 @@ void GSRendererHW::EmulateZbuffer()
|
|||
}
|
||||
else if (!m_context->ZBUF.ZMSK)
|
||||
{
|
||||
m_conf.cb_ps.TA_MaxDepth_Af.z = static_cast<float>(max_z) * 0x1p-32f;
|
||||
m_conf.cb_ps.TA_MaxDepth_Af.z = static_cast<float>(max_z) * (g_gs_device->Features().clip_control ? 0x1p-32f : 0x1p-24f);
|
||||
m_conf.ps.zclamp = 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,28 +16,8 @@
|
|||
#include "PrecompiledHeader.h"
|
||||
#include "GLLoader.h"
|
||||
#include "GS/GS.h"
|
||||
#include <unordered_set>
|
||||
#include "Host.h"
|
||||
|
||||
namespace GLExtension
|
||||
{
|
||||
|
||||
static std::unordered_set<std::string> s_extensions;
|
||||
|
||||
bool Has(const std::string& ext)
|
||||
{
|
||||
return !!s_extensions.count(ext);
|
||||
}
|
||||
|
||||
void Set(const std::string& ext, bool v)
|
||||
{
|
||||
if (v)
|
||||
s_extensions.insert(ext);
|
||||
else
|
||||
s_extensions.erase(ext);
|
||||
}
|
||||
} // namespace GLExtension
|
||||
|
||||
namespace ReplaceGL
|
||||
{
|
||||
void APIENTRY ScissorIndexed(GLuint index, GLint left, GLint bottom, GLsizei width, GLsizei height)
|
||||
|
@ -56,7 +36,6 @@ namespace ReplaceGL
|
|||
|
||||
} // namespace ReplaceGL
|
||||
|
||||
#ifdef _WIN32
|
||||
namespace Emulate_DSA
|
||||
{
|
||||
// Texture entry point
|
||||
|
@ -108,12 +87,6 @@ namespace Emulate_DSA
|
|||
}
|
||||
|
||||
// Misc entry point
|
||||
// (only purpose is to have a consistent API otherwise it is useless)
|
||||
void APIENTRY CreateProgramPipelines(GLsizei n, GLuint* pipelines)
|
||||
{
|
||||
glGenProgramPipelines(n, pipelines);
|
||||
}
|
||||
|
||||
void APIENTRY CreateSamplers(GLsizei n, GLuint* samplers)
|
||||
{
|
||||
glGenSamplers(n, samplers);
|
||||
|
@ -130,12 +103,10 @@ namespace Emulate_DSA
|
|||
glCompressedTextureSubImage2D = CompressedTextureSubImage;
|
||||
glGetTextureImage = GetTexureImage;
|
||||
glTextureParameteri = TextureParameteri;
|
||||
|
||||
glCreateProgramPipelines = CreateProgramPipelines;
|
||||
glGenerateTextureMipmap = GenerateTextureMipmap;
|
||||
glCreateSamplers = CreateSamplers;
|
||||
}
|
||||
} // namespace Emulate_DSA
|
||||
#endif
|
||||
|
||||
namespace GLLoader
|
||||
{
|
||||
|
@ -143,53 +114,18 @@ namespace GLLoader
|
|||
bool vendor_id_nvidia = false;
|
||||
bool vendor_id_intel = false;
|
||||
bool mesa_driver = false;
|
||||
bool in_replayer = false;
|
||||
bool buggy_pbo = false;
|
||||
|
||||
bool is_gles = false;
|
||||
bool has_dual_source_blend = false;
|
||||
bool has_clip_control = true;
|
||||
bool found_framebuffer_fetch = false;
|
||||
bool found_geometry_shader = true; // we require GL3.3 so geometry must be supported by default
|
||||
// DX11 GPU
|
||||
bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge
|
||||
bool found_GL_ARB_gpu_shader5 = false; // Require IvyBridge
|
||||
bool found_GL_ARB_texture_barrier = false;
|
||||
|
||||
static bool mandatory(const std::string& ext)
|
||||
{
|
||||
if (!GLExtension::Has(ext))
|
||||
{
|
||||
Host::ReportFormattedErrorAsync("GS", "ERROR: %s is NOT SUPPORTED\n", ext.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool optional(const std::string& name)
|
||||
{
|
||||
bool found = GLExtension::Has(name);
|
||||
|
||||
if (!found)
|
||||
{
|
||||
DevCon.Warning("INFO: %s is NOT SUPPORTED", name.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
DevCon.WriteLn("INFO: %s is available", name.c_str());
|
||||
}
|
||||
|
||||
std::string opt("override_");
|
||||
opt += name;
|
||||
|
||||
if (theApp.GetConfigI(opt.c_str()) != -1)
|
||||
{
|
||||
found = theApp.GetConfigB(opt.c_str());
|
||||
fprintf(stderr, "Override %s detection (%s)\n", name.c_str(), found ? "Enabled" : "Disabled");
|
||||
GLExtension::Set(name, found);
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
bool check_gl_version(int major, int minor)
|
||||
static bool check_gl_version()
|
||||
{
|
||||
const char* vendor = (const char*)glGetString(GL_VENDOR);
|
||||
if (strstr(vendor, "Advanced Micro Devices") || strstr(vendor, "ATI Technologies Inc.") || strstr(vendor, "ATI"))
|
||||
|
@ -209,103 +145,97 @@ namespace GLLoader
|
|||
{
|
||||
found_geometry_shader = GSConfig.OverrideGeometryShaders != 0 &&
|
||||
(GLAD_GL_VERSION_3_2 || GL_ARB_geometry_shader4 || GSConfig.OverrideGeometryShaders == 1);
|
||||
GLExtension::Set("GL_ARB_geometry_shader4", found_geometry_shader);
|
||||
fprintf(stderr, "Overriding geometry shaders detection\n");
|
||||
Console.Warning("Overriding geometry shaders detection to %s", found_geometry_shader ? "true" : "false");
|
||||
}
|
||||
|
||||
GLint major_gl = 0;
|
||||
GLint minor_gl = 0;
|
||||
glGetIntegerv(GL_MAJOR_VERSION, &major_gl);
|
||||
glGetIntegerv(GL_MINOR_VERSION, &minor_gl);
|
||||
if ((major_gl < major) || (major_gl == major && minor_gl < minor))
|
||||
if (!GLAD_GL_VERSION_3_3 && !GLAD_GL_ES_VERSION_3_1)
|
||||
{
|
||||
Host::ReportFormattedErrorAsync("GS", "OpenGL %d.%d is not supported. Only OpenGL %d.%d\n was found", major, minor, major_gl, minor_gl);
|
||||
Host::ReportFormattedErrorAsync("GS", "OpenGL is not supported. Only OpenGL %d.%d\n was found", major_gl, minor_gl);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool check_gl_supported_extension()
|
||||
static bool check_gl_supported_extension()
|
||||
{
|
||||
int max_ext = 0;
|
||||
glGetIntegerv(GL_NUM_EXTENSIONS, &max_ext);
|
||||
for (GLint i = 0; i < max_ext; i++)
|
||||
if (GLAD_GL_VERSION_3_3 && !GLAD_GL_ARB_shading_language_420pack)
|
||||
{
|
||||
std::string ext{(const char*)glGetStringi(GL_EXTENSIONS, i)};
|
||||
GLExtension::Set(ext);
|
||||
//fprintf(stderr, "DEBUG ext: %s\n", ext.c_str());
|
||||
}
|
||||
|
||||
// Mandatory for both renderer
|
||||
bool ok = true;
|
||||
{
|
||||
// GL4.1
|
||||
ok = ok && mandatory("GL_ARB_separate_shader_objects");
|
||||
// GL4.2
|
||||
ok = ok && mandatory("GL_ARB_shading_language_420pack");
|
||||
ok = ok && mandatory("GL_ARB_texture_storage");
|
||||
// GL4.3
|
||||
ok = ok && mandatory("GL_KHR_debug");
|
||||
// GL4.4
|
||||
ok = ok && mandatory("GL_ARB_buffer_storage");
|
||||
}
|
||||
|
||||
// Only for HW renderer
|
||||
if (GSConfig.UseHardwareRenderer())
|
||||
{
|
||||
ok = ok && mandatory("GL_ARB_copy_image");
|
||||
ok = ok && mandatory("GL_ARB_clip_control");
|
||||
}
|
||||
if (!ok)
|
||||
Host::ReportFormattedErrorAsync("GS",
|
||||
"GL_ARB_shading_language_420pack is not supported, this is required for the OpenGL renderer.");
|
||||
return false;
|
||||
|
||||
// Extra
|
||||
{
|
||||
// GL4.0
|
||||
found_GL_ARB_gpu_shader5 = optional("GL_ARB_gpu_shader5");
|
||||
// GL4.5
|
||||
optional("GL_ARB_direct_state_access");
|
||||
// Mandatory for the advance HW renderer effect. Unfortunately Mesa LLVMPIPE/SWR renderers doesn't support this extension.
|
||||
// Rendering might be corrupted but it could be good enough for test/virtual machine.
|
||||
found_GL_ARB_texture_barrier = optional("GL_ARB_texture_barrier");
|
||||
|
||||
has_dual_source_blend = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_blend_func_extended;
|
||||
found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
|
||||
if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch)
|
||||
{
|
||||
Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
|
||||
found_framebuffer_fetch = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!GLExtension::Has("GL_ARB_viewport_array"))
|
||||
// GLES doesn't have ARB_clip_control.
|
||||
has_clip_control = GLAD_GL_ARB_clip_control;
|
||||
if (!has_clip_control && !is_gles)
|
||||
{
|
||||
Host::AddOSDMessage("GL_ARB_clip_control is not supported, this will cause rendering issues.",
|
||||
Host::OSD_ERROR_DURATION);
|
||||
}
|
||||
|
||||
found_GL_ARB_gpu_shader5 = GLAD_GL_ARB_gpu_shader5;
|
||||
found_GL_ARB_texture_barrier = GLAD_GL_ARB_texture_barrier;
|
||||
|
||||
has_dual_source_blend = GLAD_GL_VERSION_3_2 || GLAD_GL_ARB_blend_func_extended;
|
||||
found_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
|
||||
if (found_framebuffer_fetch && GSConfig.DisableFramebufferFetch)
|
||||
{
|
||||
Console.Warning("Framebuffer fetch was found but is disabled. This will reduce performance.");
|
||||
found_framebuffer_fetch = false;
|
||||
}
|
||||
|
||||
if (!GLAD_GL_ARB_viewport_array)
|
||||
{
|
||||
glScissorIndexed = ReplaceGL::ScissorIndexed;
|
||||
glViewportIndexedf = ReplaceGL::ViewportIndexedf;
|
||||
Console.Warning("GL_ARB_viewport_array is not supported! Function pointer will be replaced");
|
||||
}
|
||||
|
||||
if (!GLExtension::Has("GL_ARB_texture_barrier"))
|
||||
if (!GLAD_GL_ARB_texture_barrier)
|
||||
{
|
||||
glTextureBarrier = ReplaceGL::TextureBarrier;
|
||||
Console.Warning("GL_ARB_texture_barrier is not supported! Blending emulation will not be supported");
|
||||
Host::AddOSDMessage("GL_ARB_texture_barrier is not supported, blending will not be accurate.",
|
||||
Host::OSD_ERROR_DURATION);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// Thank you Intel for not providing support of basic features on your IGPUs.
|
||||
if (!GLExtension::Has("GL_ARB_direct_state_access"))
|
||||
if (!GLAD_GL_ARB_direct_state_access)
|
||||
{
|
||||
Console.Warning("GL_ARB_direct_state_access is not supported, this will reduce performance.");
|
||||
Emulate_DSA::Init();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (is_gles)
|
||||
{
|
||||
has_dual_source_blend = GLAD_GL_EXT_blend_func_extended || GLAD_GL_ARB_blend_func_extended;
|
||||
if (!has_dual_source_blend && !found_framebuffer_fetch)
|
||||
{
|
||||
Host::AddOSDMessage("Both dual source blending and framebuffer fetch are missing, things will be broken.",
|
||||
Host::OSD_ERROR_DURATION);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Core in GL3.2, so everything supports it.
|
||||
has_dual_source_blend = true;
|
||||
}
|
||||
|
||||
// Don't use PBOs when we don't have ARB_buffer_storage, orphaning buffers probably ends up worse than just
|
||||
// using the normal texture update routines and letting the driver take care of it.
|
||||
GLLoader::buggy_pbo = !GLAD_GL_VERSION_4_4 && !GLAD_GL_ARB_buffer_storage && !GLAD_GL_EXT_buffer_storage;
|
||||
if (GLLoader::buggy_pbo)
|
||||
Console.Warning("Not using PBOs for texture uploads because buffer_storage is unavailable.");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool check_gl_requirements()
|
||||
{
|
||||
if (!check_gl_version(3, 3))
|
||||
if (!check_gl_version())
|
||||
return false;
|
||||
|
||||
if (!check_gl_supported_extension())
|
||||
|
|
|
@ -17,17 +17,11 @@
|
|||
|
||||
#define GL_TEX_LEVEL_0 (0)
|
||||
#define GL_TEX_LEVEL_1 (1)
|
||||
#define GL_FB_DEFAULT (0)
|
||||
#define GL_BUFFER_0 (0)
|
||||
#define GL_FB_DEFAULT (0)
|
||||
#define GL_BUFFER_0 (0)
|
||||
|
||||
#include "glad.h"
|
||||
|
||||
namespace GLExtension
|
||||
{
|
||||
extern bool Has(const std::string& ext);
|
||||
extern void Set(const std::string& ext, bool v = true);
|
||||
} // namespace GLExtension
|
||||
|
||||
namespace GLLoader
|
||||
{
|
||||
bool check_gl_requirements();
|
||||
|
@ -36,9 +30,12 @@ namespace GLLoader
|
|||
extern bool vendor_id_nvidia;
|
||||
extern bool vendor_id_intel;
|
||||
extern bool mesa_driver;
|
||||
extern bool buggy_pbo;
|
||||
extern bool in_replayer;
|
||||
|
||||
// GL
|
||||
extern bool is_gles;
|
||||
extern bool has_clip_control;
|
||||
extern bool has_dual_source_blend;
|
||||
extern bool found_framebuffer_fetch;
|
||||
extern bool found_geometry_shader;
|
||||
|
|
|
@ -23,6 +23,7 @@ namespace GLState
|
|||
GSVector4i scissor;
|
||||
|
||||
bool point_size = false;
|
||||
float line_width = 1.0f;
|
||||
|
||||
bool blend;
|
||||
u16 eq_RGB;
|
||||
|
|
|
@ -25,6 +25,7 @@ namespace GLState
|
|||
extern GSVector4i scissor;
|
||||
|
||||
extern bool point_size;
|
||||
extern float line_width;
|
||||
|
||||
extern bool blend;
|
||||
extern u16 eq_RGB;
|
||||
|
|
|
@ -30,13 +30,6 @@
|
|||
|
||||
//#define ONLY_LINES
|
||||
|
||||
// TODO port those value into PerfMon API
|
||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||
u64 g_real_texture_upload_byte = 0;
|
||||
u64 g_vertex_upload_byte = 0;
|
||||
u64 g_uniform_upload_byte = 0;
|
||||
#endif
|
||||
|
||||
static constexpr u32 g_vs_cb_index = 1;
|
||||
static constexpr u32 g_ps_cb_index = 0;
|
||||
|
||||
|
@ -44,33 +37,15 @@ static constexpr u32 VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
|
|||
static constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
static constexpr u32 VERTEX_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024;
|
||||
static constexpr u32 FRAGMENT_UNIFORM_BUFFER_SIZE = 8 * 1024 * 1024;
|
||||
static constexpr u32 TEXTURE_UPLOAD_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
|
||||
int GSDeviceOGL::m_shader_inst = 0;
|
||||
int GSDeviceOGL::m_shader_reg = 0;
|
||||
FILE* GSDeviceOGL::m_debug_gl_file = NULL;
|
||||
static std::unique_ptr<GL::StreamBuffer> s_texture_upload_buffer;
|
||||
|
||||
GSDeviceOGL::GSDeviceOGL()
|
||||
: m_fbo(0)
|
||||
, m_fbo_read(0)
|
||||
, m_palette_ss(0)
|
||||
{
|
||||
// Reset the debug file
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
m_debug_gl_file = fopen("GS_opengl_debug.txt", "w");
|
||||
#endif
|
||||
}
|
||||
GSDeviceOGL::GSDeviceOGL() = default;
|
||||
|
||||
GSDeviceOGL::~GSDeviceOGL()
|
||||
{
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
if (m_debug_gl_file)
|
||||
{
|
||||
fclose(m_debug_gl_file);
|
||||
m_debug_gl_file = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Clean vertex buffer state
|
||||
s_texture_upload_buffer.reset();
|
||||
if (m_vertex_array_object)
|
||||
glDeleteVertexArrays(1, &m_vertex_array_object);
|
||||
m_vertex_stream_buffer.reset();
|
||||
|
@ -86,6 +61,7 @@ GSDeviceOGL::~GSDeviceOGL()
|
|||
// Clean various opengl allocation
|
||||
glDeleteFramebuffers(1, &m_fbo);
|
||||
glDeleteFramebuffers(1, &m_fbo_read);
|
||||
glDeleteFramebuffers(1, &m_fbo_write);
|
||||
|
||||
// Delete HW FX
|
||||
m_vertex_uniform_stream_buffer.reset();
|
||||
|
@ -98,91 +74,6 @@ GSDeviceOGL::~GSDeviceOGL()
|
|||
|
||||
for (GSDepthStencilOGL* ds : m_om_dss)
|
||||
delete ds;
|
||||
|
||||
PboPool::Destroy();
|
||||
}
|
||||
|
||||
void GSDeviceOGL::GenerateProfilerData()
|
||||
{
|
||||
if (m_profiler.last_query < 3)
|
||||
{
|
||||
glDeleteQueries(1 << 16, m_profiler.timer_query);
|
||||
return;
|
||||
}
|
||||
|
||||
// Wait latest quey to get valid result
|
||||
GLuint available = 0;
|
||||
while (!available)
|
||||
{
|
||||
glGetQueryObjectuiv(m_profiler.timer(), GL_QUERY_RESULT_AVAILABLE, &available);
|
||||
}
|
||||
|
||||
GLuint64 time_start = 0;
|
||||
GLuint64 time_end = 0;
|
||||
std::vector<double> times;
|
||||
constexpr double ms = 0.000001;
|
||||
|
||||
const int replay = theApp.GetConfigI("linux_replay");
|
||||
const int first_query = replay > 1 ? m_profiler.last_query / replay : 0;
|
||||
|
||||
glGetQueryObjectui64v(m_profiler.timer_query[first_query], GL_QUERY_RESULT, &time_start);
|
||||
for (u32 q = first_query + 1; q < m_profiler.last_query; q++)
|
||||
{
|
||||
glGetQueryObjectui64v(m_profiler.timer_query[q], GL_QUERY_RESULT, &time_end);
|
||||
u64 t = time_end - time_start;
|
||||
times.push_back((double)t * ms);
|
||||
|
||||
time_start = time_end;
|
||||
}
|
||||
|
||||
// Latest value is often silly, just drop it
|
||||
times.pop_back();
|
||||
|
||||
glDeleteQueries(1 << 16, m_profiler.timer_query);
|
||||
|
||||
const double frames = times.size();
|
||||
double mean = 0.0;
|
||||
double sd = 0.0;
|
||||
|
||||
auto minmax_time = std::minmax_element(times.begin(), times.end());
|
||||
|
||||
for (auto t : times)
|
||||
mean += t;
|
||||
mean = mean / frames;
|
||||
|
||||
for (auto t : times)
|
||||
sd += pow(t - mean, 2);
|
||||
sd = sqrt(sd / frames);
|
||||
|
||||
u32 time_repartition[16] = {0};
|
||||
for (auto t : times)
|
||||
{
|
||||
size_t slot = std::min<size_t>(t / 2.0, std::size(time_repartition) - 1);
|
||||
time_repartition[slot]++;
|
||||
}
|
||||
|
||||
fprintf(stderr, "\nPerformance Profile for %.0f frames:\n", frames);
|
||||
fprintf(stderr, "Min %4.2f ms\t(%4.2f fps)\n", *minmax_time.first, 1000.0 / *minmax_time.first);
|
||||
fprintf(stderr, "Mean %4.2f ms\t(%4.2f fps)\n", mean, 1000.0 / mean);
|
||||
fprintf(stderr, "Max %4.2f ms\t(%4.2f fps)\n", *minmax_time.second, 1000.0 / *minmax_time.second);
|
||||
fprintf(stderr, "SD %4.2f ms\n", sd);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Frame Repartition\n");
|
||||
for (u32 i = 0; i < std::size(time_repartition); i++)
|
||||
{
|
||||
fprintf(stderr, "%3u ms => %3u ms\t%4u\n", 2 * i, 2 * (i + 1), time_repartition[i]);
|
||||
}
|
||||
|
||||
FILE* csv = fopen("GS_profile.csv", "w");
|
||||
if (csv)
|
||||
{
|
||||
for (size_t i = 0; i < times.size(); i++)
|
||||
{
|
||||
fprintf(csv, "%zu,%lf\n", i, times[i]);
|
||||
}
|
||||
|
||||
fclose(csv);
|
||||
}
|
||||
}
|
||||
|
||||
GSTexture* GSDeviceOGL::CreateSurface(GSTexture::Type type, int width, int height, int levels, GSTexture::Format format)
|
||||
|
@ -196,11 +87,13 @@ bool GSDeviceOGL::Create()
|
|||
if (!GSDevice::Create())
|
||||
return false;
|
||||
|
||||
if (g_host_display->GetRenderAPI() != RenderAPI::OpenGL)
|
||||
const RenderAPI render_api = g_host_display->GetRenderAPI();
|
||||
if (render_api != RenderAPI::OpenGL && render_api != RenderAPI::OpenGLES)
|
||||
return false;
|
||||
|
||||
// Check openGL requirement as soon as possible so we can switch to another
|
||||
// renderer/device
|
||||
GLLoader::is_gles = (render_api == RenderAPI::OpenGLES);
|
||||
if (!GLLoader::check_gl_requirements())
|
||||
return false;
|
||||
|
||||
|
@ -227,18 +120,29 @@ bool GSDeviceOGL::Create()
|
|||
m_features.provoking_vertex_last = true;
|
||||
m_features.dxt_textures = GLAD_GL_EXT_texture_compression_s3tc;
|
||||
m_features.bptc_textures = GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc;
|
||||
m_features.prefer_new_textures = false;
|
||||
m_features.prefer_new_textures = GLLoader::is_gles;
|
||||
m_features.framebuffer_fetch = GLLoader::found_framebuffer_fetch;
|
||||
m_features.dual_source_blend = GLLoader::has_dual_source_blend && !GSConfig.DisableDualSourceBlend;
|
||||
m_features.clip_control = GLLoader::has_clip_control;
|
||||
m_features.stencil_buffer = true;
|
||||
// Wide line support in GL is deprecated as of 3.1, so we will just do it in the Geometry Shader.
|
||||
m_features.line_expand = false;
|
||||
|
||||
GLint point_range[2] = {};
|
||||
glGetIntegerv(GL_ALIASED_POINT_SIZE_RANGE, point_range);
|
||||
m_features.point_expand = (point_range[0] <= GSConfig.UpscaleMultiplier && point_range[1] >= GSConfig.UpscaleMultiplier);
|
||||
|
||||
Console.WriteLn("Using %s for point expansion.", m_features.point_expand ? "hardware" : "geometry shaders");
|
||||
if (GLLoader::is_gles)
|
||||
{
|
||||
GLint line_range[2] = {};
|
||||
glGetIntegerv(GL_ALIASED_LINE_WIDTH_RANGE, line_range);
|
||||
m_features.line_expand = (line_range[0] <= static_cast<GLint>(GSConfig.UpscaleMultiplier) && line_range[1] >= static_cast<GLint>(GSConfig.UpscaleMultiplier));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_features.line_expand = false;
|
||||
}
|
||||
|
||||
Console.WriteLn("Using %s for point expansion and %s for line expansion.",
|
||||
m_features.point_expand ? "hardware" : "geometry shaders", m_features.line_expand ? "hardware" : "geometry shaders");
|
||||
|
||||
{
|
||||
auto shader = Host::ReadResourceFileToString("shaders/opengl/common_header.glsl");
|
||||
|
@ -257,18 +161,27 @@ bool GSDeviceOGL::Create()
|
|||
// ****************************************************************
|
||||
// Debug helper
|
||||
// ****************************************************************
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
if (GSConfig.UseDebugDevice)
|
||||
{
|
||||
glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL);
|
||||
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB);
|
||||
if (!GLLoader::is_gles)
|
||||
{
|
||||
glDebugMessageCallback((GLDEBUGPROC)DebugOutputToFile, NULL);
|
||||
|
||||
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
|
||||
// Useless info message on Nvidia driver
|
||||
GLuint ids[] = {0x20004};
|
||||
glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false);
|
||||
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
|
||||
// Useless info message on Nvidia driver
|
||||
GLuint ids[] = { 0x20004 };
|
||||
glDebugMessageControl(GL_DEBUG_SOURCE_API_ARB, GL_DEBUG_TYPE_OTHER_ARB, GL_DONT_CARE, std::size(ids), ids, false);
|
||||
}
|
||||
else if (GLAD_GL_KHR_debug)
|
||||
{
|
||||
glDebugMessageCallbackKHR((GLDEBUGPROC)DebugOutputToFile, NULL);
|
||||
glDebugMessageControlKHR(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, true);
|
||||
}
|
||||
|
||||
// Uncomment synchronous if you want callstacks which match where the error occurred.
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
//glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB);
|
||||
}
|
||||
#endif
|
||||
|
||||
// WARNING it must be done after the control setup (at least on MESA)
|
||||
GL_PUSH("GSDeviceOGL::Create");
|
||||
|
@ -287,16 +200,11 @@ bool GSDeviceOGL::Create()
|
|||
OMSetFBO(0);
|
||||
|
||||
glGenFramebuffers(1, &m_fbo_read);
|
||||
glGenFramebuffers(1, &m_fbo_write);
|
||||
// Always read from the first buffer
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT0);
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
|
||||
|
||||
// Some timers to help profiling
|
||||
if (GLLoader::in_replayer)
|
||||
{
|
||||
glCreateQueries(GL_TIMESTAMP, 1 << 16, m_profiler.timer_query);
|
||||
}
|
||||
}
|
||||
|
||||
// ****************************************************************
|
||||
|
@ -379,7 +287,7 @@ bool GSDeviceOGL::Create()
|
|||
{
|
||||
const char* name = shaderName(static_cast<ShaderConvert>(i));
|
||||
const std::string macro_sel = (static_cast<ShaderConvert>(i) == ShaderConvert::RGBA_TO_8I) ?
|
||||
fmt::format("#define PS_SCALE_FACTOR {}\n", GSConfig.UpscaleMultiplier) :
|
||||
fmt::format("#define PS_SCALE_FACTOR {:.8f}f\n", GSConfig.UpscaleMultiplier) :
|
||||
std::string();
|
||||
const std::string ps(GetShaderSource(name, GL_FRAGMENT_SHADER, m_shader_common_header, *convert_glsl, macro_sel));
|
||||
if (!m_shader_cache.GetProgram(&m_convert.ps[i], m_convert.vs, {}, ps))
|
||||
|
@ -518,15 +426,13 @@ bool GSDeviceOGL::Create()
|
|||
{
|
||||
GL_PUSH("GSDeviceOGL::Rasterization");
|
||||
|
||||
#ifdef ONLY_LINES
|
||||
glLineWidth(5.0);
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
|
||||
#else
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||
#endif
|
||||
if (!GLLoader::is_gles)
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||
glDisable(GL_CULL_FACE);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glDisable(GL_MULTISAMPLE);
|
||||
if (!GLLoader::is_gles)
|
||||
glDisable(GL_MULTISAMPLE);
|
||||
|
||||
glDisable(GL_DITHER); // Honestly I don't know!
|
||||
}
|
||||
|
||||
|
@ -560,7 +466,7 @@ bool GSDeviceOGL::Create()
|
|||
// This extension allow FS depth to range from -1 to 1. So
|
||||
// gl_position.z could range from [0, 1]
|
||||
// Change depth convention
|
||||
if (GLExtension::Has("GL_ARB_clip_control"))
|
||||
if (GLLoader::has_clip_control)
|
||||
glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
|
||||
|
||||
// ****************************************************************
|
||||
|
@ -572,15 +478,19 @@ bool GSDeviceOGL::Create()
|
|||
// ****************************************************************
|
||||
// Pbo Pool allocation
|
||||
// ****************************************************************
|
||||
if (!GLLoader::buggy_pbo)
|
||||
{
|
||||
GL_PUSH("GSDeviceOGL::PBO");
|
||||
|
||||
// Mesa seems to use it to compute the row length. In our case, we are
|
||||
// tightly packed so don't bother with this parameter and set it to the
|
||||
// minimum alignment (1 byte)
|
||||
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||
|
||||
PboPool::Init();
|
||||
s_texture_upload_buffer = GL::StreamBuffer::Create(GL_PIXEL_UNPACK_BUFFER, TEXTURE_UPLOAD_BUFFER_SIZE);
|
||||
if (!s_texture_upload_buffer)
|
||||
{
|
||||
Console.Error("Failed to create texture upload buffer. Using slow path.");
|
||||
GLLoader::buggy_pbo = true;
|
||||
}
|
||||
}
|
||||
|
||||
// ****************************************************************
|
||||
|
@ -592,7 +502,7 @@ bool GSDeviceOGL::Create()
|
|||
// Full vram, remove a small margin for others buffer
|
||||
glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, vram);
|
||||
}
|
||||
else if (GLExtension::Has("GL_NVX_gpu_memory_info"))
|
||||
else if (GLAD_GL_NVX_gpu_memory_info)
|
||||
{
|
||||
// GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX <= give full memory
|
||||
// Available vram
|
||||
|
@ -649,12 +559,6 @@ bool GSDeviceOGL::CreateTextureFX()
|
|||
m_om_dss[key] = CreateDepthStencil(OMDepthStencilSelector(key));
|
||||
}
|
||||
|
||||
if (GLLoader::in_replayer)
|
||||
{
|
||||
glQueryCounter(m_profiler.timer(), GL_TIMESTAMP);
|
||||
m_profiler.last_query++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -662,6 +566,12 @@ void GSDeviceOGL::ResetAPIState()
|
|||
{
|
||||
if (GLState::point_size)
|
||||
glDisable(GL_PROGRAM_POINT_SIZE);
|
||||
if (GLState::line_width != 1.0f)
|
||||
glLineWidth(1.0f);
|
||||
|
||||
// clear out DSB
|
||||
glBlendFuncSeparate(GL_ONE, GL_ZERO, GL_ONE, GL_ZERO);
|
||||
glDisable(GL_BLEND);
|
||||
}
|
||||
|
||||
void GSDeviceOGL::RestoreAPIState()
|
||||
|
@ -714,6 +624,8 @@ void GSDeviceOGL::RestoreAPIState()
|
|||
|
||||
if (GLState::point_size)
|
||||
glEnable(GL_PROGRAM_POINT_SIZE);
|
||||
if (GLState::line_width != 1.0f)
|
||||
glLineWidth(GLState::line_width);
|
||||
|
||||
// Force UBOs to be reuploaded, we don't know what else was bound there.
|
||||
std::memset(&m_vs_cb_cache, 0xFF, sizeof(m_vs_cb_cache));
|
||||
|
@ -770,7 +682,17 @@ void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c)
|
|||
OMSetFBO(m_fbo);
|
||||
OMAttachRt(T);
|
||||
|
||||
glClearBufferfv(GL_COLOR, 0, c.v);
|
||||
if (T->IsIntegerFormat())
|
||||
{
|
||||
if (T->IsUnsignedFormat())
|
||||
glClearBufferuiv(GL_COLOR, 0, c.U32);
|
||||
else
|
||||
glClearBufferiv(GL_COLOR, 0, c.I32);
|
||||
}
|
||||
else
|
||||
{
|
||||
glClearBufferfv(GL_COLOR, 0, c.v);
|
||||
}
|
||||
|
||||
OMSetColorMaskState(OMColorMaskSelector(old_color_mask));
|
||||
|
||||
|
@ -899,10 +821,10 @@ GLuint GSDeviceOGL::CreateSampler(PSSamplerSelector sel)
|
|||
const int anisotropy = GSConfig.MaxAnisotropy;
|
||||
if (anisotropy > 1 && sel.aniso)
|
||||
{
|
||||
if (GLExtension::Has("GL_ARB_texture_filter_anisotropic"))
|
||||
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, (float)anisotropy);
|
||||
else if (GLExtension::Has("GL_EXT_texture_filter_anisotropic"))
|
||||
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY_EXT, (float)anisotropy);
|
||||
if (GLAD_GL_ARB_texture_filter_anisotropic)
|
||||
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, static_cast<float>(anisotropy));
|
||||
else if (GLAD_GL_EXT_texture_filter_anisotropic)
|
||||
glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY_EXT, static_cast<float>(anisotropy));
|
||||
}
|
||||
|
||||
return sampler;
|
||||
|
@ -963,28 +885,64 @@ std::string GSDeviceOGL::GetShaderSource(const std::string_view& entry, GLenum t
|
|||
|
||||
std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum type, const std::string_view& macro)
|
||||
{
|
||||
std::string header = "#version 330 core\n";
|
||||
std::string header;
|
||||
|
||||
// Need GL version 420
|
||||
header += "#extension GL_ARB_shading_language_420pack: require\n";
|
||||
// Need GL version 410
|
||||
header += "#extension GL_ARB_separate_shader_objects: require\n";
|
||||
if (m_features.framebuffer_fetch)
|
||||
if (GLLoader::is_gles)
|
||||
{
|
||||
if (GLAD_GL_EXT_shader_framebuffer_fetch)
|
||||
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
|
||||
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
|
||||
header += "#extension GL_ARM_shader_framebuffer_fetch : require\n";
|
||||
}
|
||||
if (GLAD_GL_ES_VERSION_3_2)
|
||||
header = "#version 320 es\n";
|
||||
else if (GLAD_GL_ES_VERSION_3_1)
|
||||
header = "#version 310 es\n";
|
||||
|
||||
if (GLLoader::found_GL_ARB_gpu_shader5)
|
||||
header += "#extension GL_ARB_gpu_shader5 : enable\n";
|
||||
if (GLAD_GL_EXT_blend_func_extended)
|
||||
header += "#extension GL_EXT_blend_func_extended : require\n";
|
||||
if (GLAD_GL_ARB_blend_func_extended)
|
||||
header += "#extension GL_ARB_blend_func_extended : require\n";
|
||||
if (m_features.framebuffer_fetch)
|
||||
{
|
||||
if (GLAD_GL_EXT_shader_framebuffer_fetch)
|
||||
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
|
||||
else if (GLAD_GL_ARM_shader_framebuffer_fetch)
|
||||
header += "#extension GL_ARM_shader_framebuffer_fetch : require\n";
|
||||
}
|
||||
|
||||
header += "precision highp float;\n";
|
||||
header += "precision highp int;\n";
|
||||
header += "precision highp sampler2D;\n";
|
||||
if (GLAD_GL_ES_VERSION_3_1)
|
||||
header += "precision highp sampler2DMS;\n";
|
||||
if (GLAD_GL_ES_VERSION_3_2)
|
||||
header += "precision highp usamplerBuffer;\n";
|
||||
|
||||
if (!GLAD_GL_EXT_blend_func_extended && !GLAD_GL_ARB_blend_func_extended)
|
||||
header += "#define DISABLE_DUAL_SOURCE\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
header = "#version 330 core\n";
|
||||
|
||||
// Need GL version 420
|
||||
header += "#extension GL_ARB_shading_language_420pack: require\n";
|
||||
// Need GL version 410
|
||||
header += "#extension GL_ARB_separate_shader_objects: require\n";
|
||||
|
||||
if (m_features.framebuffer_fetch && GLAD_GL_EXT_shader_framebuffer_fetch)
|
||||
header += "#extension GL_EXT_shader_framebuffer_fetch : require\n";
|
||||
|
||||
if (GLLoader::found_GL_ARB_gpu_shader5)
|
||||
header += "#extension GL_ARB_gpu_shader5 : enable\n";
|
||||
}
|
||||
|
||||
if (m_features.framebuffer_fetch)
|
||||
header += "#define HAS_FRAMEBUFFER_FETCH 1\n";
|
||||
else
|
||||
header += "#define HAS_FRAMEBUFFER_FETCH 0\n";
|
||||
|
||||
if (GLLoader::has_clip_control)
|
||||
header += "#define HAS_CLIP_CONTROL 1\n";
|
||||
else
|
||||
header += "#define HAS_CLIP_CONTROL 0\n";
|
||||
|
||||
if (GLLoader::vendor_id_amd || GLLoader::vendor_id_intel)
|
||||
header += "#define BROKEN_DRIVER as_usual\n";
|
||||
|
||||
|
@ -992,7 +950,10 @@ std::string GSDeviceOGL::GenGlslHeader(const std::string_view& entry, GLenum typ
|
|||
// AMD/nvidia define it to 0
|
||||
// intel window don't define it
|
||||
// intel linux refuse to define it
|
||||
header += "#define pGL_ES 0\n";
|
||||
if (GLLoader::is_gles)
|
||||
header += "#define pGL_ES 1\n";
|
||||
else
|
||||
header += "#define pGL_ES 0\n";
|
||||
|
||||
// Allow to puts several shader in 1 files
|
||||
switch (type)
|
||||
|
@ -1030,7 +991,7 @@ std::string GSDeviceOGL::GetVSSource(VSSelector sel)
|
|||
+ fmt::format("#define VS_IIP {}\n", static_cast<u32>(sel.iip))
|
||||
+ fmt::format("#define VS_POINT_SIZE {}\n", static_cast<u32>(sel.point_size));
|
||||
if (sel.point_size)
|
||||
macro += fmt::format("#define VS_POINT_SIZE_VALUE {}\n", GSConfig.UpscaleMultiplier);
|
||||
macro += fmt::format("#define VS_POINT_SIZE_VALUE {:.8f}f\n", GSConfig.UpscaleMultiplier);
|
||||
|
||||
std::string src = GenGlslHeader("vs_main", GL_VERTEX_SHADER, macro);
|
||||
src += m_shader_common_header;
|
||||
|
@ -1102,7 +1063,7 @@ std::string GSDeviceOGL::GetPSSource(const PSSelector& sel)
|
|||
+ fmt::format("#define PS_FIXED_ONE_A {}\n", sel.fixed_one_a)
|
||||
+ fmt::format("#define PS_PABE {}\n", sel.pabe)
|
||||
+ fmt::format("#define PS_SCANMSK {}\n", sel.scanmsk)
|
||||
+ fmt::format("#define PS_SCALE_FACTOR {}\n", GSConfig.UpscaleMultiplier)
|
||||
+ fmt::format("#define PS_SCALE_FACTOR {:.8f}f\n", GSConfig.UpscaleMultiplier)
|
||||
+ fmt::format("#define PS_NO_COLOR {}\n", sel.no_color)
|
||||
+ fmt::format("#define PS_NO_COLOR1 {}\n", sel.no_color1)
|
||||
+ fmt::format("#define PS_NO_ABLEND {}\n", sel.no_ablend)
|
||||
|
@ -1169,12 +1130,36 @@ void GSDeviceOGL::CopyRect(GSTexture* sTex, GSTexture* dTex, const GSVector4i& r
|
|||
|
||||
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
|
||||
|
||||
ASSERT(GLExtension::Has("GL_ARB_copy_image") && glCopyImageSubData);
|
||||
glCopyImageSubData(sid, GL_TEXTURE_2D,
|
||||
0, r.x, r.y, 0,
|
||||
did, GL_TEXTURE_2D,
|
||||
0, destX, destY, 0,
|
||||
r.width(), r.height(), 1);
|
||||
if (GLAD_GL_VERSION_4_3 || GLAD_GL_ARB_copy_image)
|
||||
{
|
||||
glCopyImageSubData(sid, GL_TEXTURE_2D, 0, r.x, r.y, 0, did, GL_TEXTURE_2D,
|
||||
0, destX, destY, 0, r.width(), r.height(), 1);
|
||||
}
|
||||
else if (GLAD_GL_EXT_copy_image)
|
||||
{
|
||||
glCopyImageSubDataEXT(sid, GL_TEXTURE_2D, 0, r.x, r.y, 0, did, GL_TEXTURE_2D,
|
||||
0, destX, destY, 0, r.width(), r.height(), 1);
|
||||
}
|
||||
else if (GLAD_GL_OES_copy_image)
|
||||
{
|
||||
glCopyImageSubDataOES(sid, GL_TEXTURE_2D, 0, r.x, r.y, 0, did, GL_TEXTURE_2D,
|
||||
0, destX, destY, 0, r.width(), r.height(), 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_fbo_read);
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_fbo_write);
|
||||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, sid, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, did, 0);
|
||||
|
||||
const int w = r.width(), h = r.height();
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glBlitFramebuffer(r.x, r.y, r.x + w, r.y + h, destX + r.x, destY + r.y, destX + r.x + w, destY + r.y + h, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, GLState::fbo);
|
||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader, bool linear)
|
||||
|
@ -1407,7 +1392,7 @@ void GSDeviceOGL::DoFXAA(GSTexture* sTex, GSTexture* dTex)
|
|||
if (!m_fxaa.ps.IsValid())
|
||||
{
|
||||
// Needs ARB_gpu_shader5 for gather.
|
||||
if (!GLLoader::found_GL_ARB_gpu_shader5)
|
||||
if (!GLLoader::is_gles && !GLLoader::found_GL_ARB_gpu_shader5)
|
||||
return;
|
||||
|
||||
std::string fxaa_macro = "#define FXAA_GLSL_130 1\n";
|
||||
|
@ -1559,7 +1544,7 @@ void GSDeviceOGL::ClearSamplerCache()
|
|||
bool GSDeviceOGL::CreateCASPrograms()
|
||||
{
|
||||
// Image load store and GLSL 420pack is core in GL4.2, no need to check.
|
||||
m_features.cas_sharpening = GLAD_GL_VERSION_4_2 && GLAD_GL_ARB_compute_shader;
|
||||
m_features.cas_sharpening = (GLAD_GL_VERSION_4_2 && GLAD_GL_ARB_compute_shader) || GLAD_GL_ES_VERSION_3_2;
|
||||
if (!m_features.cas_sharpening)
|
||||
{
|
||||
Console.Warning("Compute shaders not supported, CAS is unavailable.");
|
||||
|
@ -1574,6 +1559,12 @@ bool GSDeviceOGL::CreateCASPrograms()
|
|||
}
|
||||
|
||||
const char* header =
|
||||
GLLoader::is_gles ?
|
||||
"#version 320 es\n"
|
||||
"precision highp float;\n"
|
||||
"precision highp int;\n"
|
||||
"precision highp sampler2D;\n"
|
||||
"precision highp image2D;\n" :
|
||||
"#version 420\n"
|
||||
"#extension GL_ARB_compute_shader : require\n";
|
||||
const char* sharpen_params[2] = {
|
||||
|
@ -1710,6 +1701,15 @@ void GSDeviceOGL::OMSetBlendState(bool enable, GLenum src_factor, GLenum dst_fac
|
|||
{
|
||||
if (GLState::blend)
|
||||
{
|
||||
// make sure we're not using dual source
|
||||
if (GLState::f_sRGB == GL_SRC1_ALPHA || GLState::f_sRGB == GL_ONE_MINUS_SRC1_ALPHA ||
|
||||
GLState::f_dRGB == GL_SRC1_ALPHA || GLState::f_dRGB == GL_ONE_MINUS_SRC1_ALPHA)
|
||||
{
|
||||
glBlendFuncSeparate(GL_ONE, GL_ZERO, GL_ONE, GL_ZERO);
|
||||
GLState::f_sRGB = GL_ONE;
|
||||
GLState::f_dRGB = GL_ZERO;
|
||||
}
|
||||
|
||||
GLState::blend = false;
|
||||
glDisable(GL_BLEND);
|
||||
}
|
||||
|
@ -1798,12 +1798,13 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel)
|
|||
OMSetDepthStencilState(m_om_dss[dssel.key]);
|
||||
}
|
||||
|
||||
static GSDeviceOGL::VSSelector convertSel(const GSHWDrawConfig::VSSelector sel)
|
||||
static GSDeviceOGL::VSSelector convertSel(const GSHWDrawConfig::VSSelector sel, const GSHWDrawConfig::Topology topology)
|
||||
{
|
||||
// Mali requires gl_PointSize written when rasterizing points. The spec seems to suggest this is okay.
|
||||
GSDeviceOGL::VSSelector out;
|
||||
out.int_fst = !sel.fst;
|
||||
out.iip = sel.iip;
|
||||
out.point_size = sel.point_size;
|
||||
out.point_size = sel.point_size || (GLLoader::is_gles && topology == GSHWDrawConfig::Topology::Point);
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -1916,7 +1917,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
|||
}
|
||||
|
||||
ProgramSelector psel;
|
||||
psel.vs = convertSel(config.vs);
|
||||
psel.vs = convertSel(config.vs, config.topology);
|
||||
psel.ps.key_hi = config.ps.key_hi;
|
||||
psel.ps.key_lo = config.ps.key_lo;
|
||||
psel.gs.key = 0;
|
||||
|
@ -1936,7 +1937,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
|||
SetupPipeline(psel);
|
||||
|
||||
// additional non-pipeline config stuff
|
||||
const bool point_size_enabled = config.vs.point_size;
|
||||
const bool point_size_enabled = config.vs.point_size && !GLLoader::is_gles;
|
||||
if (GLState::point_size != point_size_enabled)
|
||||
{
|
||||
if (point_size_enabled)
|
||||
|
@ -1945,6 +1946,12 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
|
|||
glDisable(GL_PROGRAM_POINT_SIZE);
|
||||
GLState::point_size = point_size_enabled;
|
||||
}
|
||||
const float line_width = config.line_expand ? static_cast<float>(GSConfig.UpscaleMultiplier) : 1.0f;
|
||||
if (GLState::line_width != line_width)
|
||||
{
|
||||
GLState::line_width = line_width;
|
||||
glLineWidth(line_width);
|
||||
}
|
||||
|
||||
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
|
||||
{
|
||||
|
@ -2115,7 +2122,6 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
|
|||
{
|
||||
std::string message(gl_message, gl_length >= 0 ? gl_length : strlen(gl_message));
|
||||
std::string type, severity, source;
|
||||
static int sev_counter = 0;
|
||||
switch (gl_type)
|
||||
{
|
||||
case GL_DEBUG_TYPE_ERROR_ARB : type = "Error"; break;
|
||||
|
@ -2130,7 +2136,7 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
|
|||
}
|
||||
switch (gl_severity)
|
||||
{
|
||||
case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; sev_counter++; break;
|
||||
case GL_DEBUG_SEVERITY_HIGH_ARB : severity = "High"; break;
|
||||
case GL_DEBUG_SEVERITY_MEDIUM_ARB : severity = "Mid"; break;
|
||||
case GL_DEBUG_SEVERITY_LOW_ARB : severity = "Low"; break;
|
||||
default:
|
||||
|
@ -2153,43 +2159,16 @@ void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id,
|
|||
default : source = "???"; break;
|
||||
}
|
||||
|
||||
#ifdef _DEBUG
|
||||
// Don't spam noisy information on the terminal
|
||||
if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION)
|
||||
if (gl_severity != GL_DEBUG_SEVERITY_NOTIFICATION && gl_source != GL_DEBUG_SOURCE_APPLICATION)
|
||||
{
|
||||
Console.Error("T:%s\tID:%d\tS:%s\t=> %s", type.c_str(), GSState::s_n, severity.c_str(), message.c_str());
|
||||
}
|
||||
#else
|
||||
// Print nouveau shader compiler info
|
||||
if (GSState::s_n == 0)
|
||||
{
|
||||
int t, local, gpr, inst, byte;
|
||||
const int status = sscanf(message.c_str(), "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
|
||||
&t, &local, &gpr, &inst, &byte);
|
||||
if (status == 5)
|
||||
{
|
||||
m_shader_inst += inst;
|
||||
m_shader_reg += gpr;
|
||||
fprintf(stderr, "T:%s\t\tS:%s\t=> %s\n", type.c_str(), severity.c_str(), message.c_str());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef ENABLE_OGL_DEBUG
|
||||
if (m_debug_gl_file)
|
||||
fprintf(m_debug_gl_file, "T:%s\tID:%d\tS:%s\t=> %s\n", type.c_str(), GSState::s_n, severity.c_str(), message.c_str());
|
||||
|
||||
if (sev_counter >= 5)
|
||||
{
|
||||
// Close the file to flush the content on disk before exiting.
|
||||
if (m_debug_gl_file)
|
||||
{
|
||||
fclose(m_debug_gl_file);
|
||||
m_debug_gl_file = NULL;
|
||||
}
|
||||
ASSERT(0);
|
||||
}
|
||||
#endif
|
||||
GL::StreamBuffer* GSDeviceOGL::GetTextureUploadBuffer()
|
||||
{
|
||||
return s_texture_upload_buffer.get();
|
||||
}
|
||||
|
||||
void GSDeviceOGL::PushDebugGroup(const char* fmt, ...)
|
||||
|
|
|
@ -22,16 +22,10 @@
|
|||
#include "common/HashCombine.h"
|
||||
#include "GS/Renderers/Common/GSDevice.h"
|
||||
#include "GSTextureOGL.h"
|
||||
#include "GSUniformBufferOGL.h"
|
||||
#include "GLState.h"
|
||||
#include "GLLoader.h"
|
||||
#include "GS/GS.h"
|
||||
|
||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||
extern u64 g_real_texture_upload_byte;
|
||||
extern u64 g_vertex_upload_byte;
|
||||
#endif
|
||||
|
||||
class GSDepthStencilOGL
|
||||
{
|
||||
bool m_depth_enable;
|
||||
|
@ -207,19 +201,15 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
static int m_shader_inst;
|
||||
static int m_shader_reg;
|
||||
|
||||
private:
|
||||
static FILE* m_debug_gl_file;
|
||||
|
||||
// Place holder for the GLSL shader code (to avoid useless reload)
|
||||
std::string m_shader_common_header;
|
||||
std::string m_shader_tfx_vgs;
|
||||
std::string m_shader_tfx_fs;
|
||||
|
||||
GLuint m_fbo; // frame buffer container
|
||||
GLuint m_fbo_read; // frame buffer container only for reading
|
||||
GLuint m_fbo = 0; // frame buffer container
|
||||
GLuint m_fbo_read = 0; // frame buffer container only for reading
|
||||
GLuint m_fbo_write = 0; // frame buffer container only for writing
|
||||
|
||||
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
|
||||
std::unique_ptr<GL::StreamBuffer> m_index_stream_buffer;
|
||||
|
@ -274,20 +264,12 @@ private:
|
|||
GL::Program sharpen_ps;
|
||||
} m_cas;
|
||||
|
||||
struct
|
||||
{
|
||||
u16 last_query = 0;
|
||||
GLuint timer_query[1 << 16] = {};
|
||||
|
||||
GLuint timer() { return timer_query[last_query]; }
|
||||
} m_profiler;
|
||||
|
||||
GLuint m_ps_ss[1 << 8];
|
||||
GSDepthStencilOGL* m_om_dss[1 << 5] = {};
|
||||
std::unordered_map<ProgramSelector, GL::Program, ProgramSelectorHash> m_programs;
|
||||
GL::ShaderCache m_shader_cache;
|
||||
|
||||
GLuint m_palette_ss;
|
||||
GLuint m_palette_ss = 0;
|
||||
|
||||
GSHWDrawConfig::VSConstantBuffer m_vs_cb_cache;
|
||||
GSHWDrawConfig::PSConstantBuffer m_ps_cb_cache;
|
||||
|
@ -314,11 +296,11 @@ public:
|
|||
GSDeviceOGL();
|
||||
virtual ~GSDeviceOGL();
|
||||
|
||||
void GenerateProfilerData();
|
||||
|
||||
// Used by OpenGL, so the same calling convention is required.
|
||||
static void APIENTRY DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar* gl_message, const void* userParam);
|
||||
|
||||
static GL::StreamBuffer* GetTextureUploadBuffer();
|
||||
|
||||
bool Create() override;
|
||||
|
||||
void ResetAPIState() override;
|
||||
|
|
|
@ -15,170 +15,23 @@
|
|||
|
||||
#include "PrecompiledHeader.h"
|
||||
#include <limits.h>
|
||||
#include "GSTextureOGL.h"
|
||||
#include "GLState.h"
|
||||
#include "GS/Renderers/OpenGL/GSDeviceOGL.h"
|
||||
#include "GS/Renderers/OpenGL/GSTextureOGL.h"
|
||||
#include "GS/Renderers/OpenGL/GLState.h"
|
||||
#include "GS/GSPerfMon.h"
|
||||
#include "GS/GSPng.h"
|
||||
#include "GS/GSGL.h"
|
||||
#include "common/StringUtil.h"
|
||||
|
||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||
extern u64 g_real_texture_upload_byte;
|
||||
#endif
|
||||
|
||||
// FIXME OGL4: investigate, only 1 unpack buffer always bound
|
||||
namespace PboPool
|
||||
{
|
||||
|
||||
const u32 m_pbo_size = 64 * 1024 * 1024;
|
||||
const u32 m_seg_size = 16 * 1024 * 1024;
|
||||
|
||||
GLuint m_buffer;
|
||||
uptr m_offset;
|
||||
char* m_map;
|
||||
u32 m_size;
|
||||
GLsync m_fence[m_pbo_size / m_seg_size];
|
||||
|
||||
// Option for buffer storage
|
||||
// XXX: actually does I really need coherent and barrier???
|
||||
// As far as I understand glTexSubImage2D is a client-server transfer so no need to make
|
||||
// the value visible to the server
|
||||
const GLbitfield common_flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
const GLbitfield map_flags = common_flags | GL_MAP_FLUSH_EXPLICIT_BIT;
|
||||
const GLbitfield create_flags = common_flags | GL_CLIENT_STORAGE_BIT;
|
||||
|
||||
void Init()
|
||||
{
|
||||
glGenBuffers(1, &m_buffer);
|
||||
|
||||
BindPbo();
|
||||
|
||||
glObjectLabel(GL_BUFFER, m_buffer, -1, "PBO");
|
||||
|
||||
glBufferStorage(GL_PIXEL_UNPACK_BUFFER, m_pbo_size, NULL, create_flags);
|
||||
m_map = (char*)glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, m_pbo_size, map_flags);
|
||||
m_offset = 0;
|
||||
|
||||
std::fill(std::begin(m_fence), std::end(m_fence), nullptr);
|
||||
|
||||
UnbindPbo();
|
||||
}
|
||||
|
||||
char* Map(u32 size)
|
||||
{
|
||||
char* map;
|
||||
// Note: keep offset aligned for SSE/AVX
|
||||
m_size = (size + 63) & ~0x3F;
|
||||
|
||||
if (m_size > m_pbo_size)
|
||||
{
|
||||
fprintf(stderr, "BUG: PBO too small %u but need %u\n", m_pbo_size, m_size);
|
||||
}
|
||||
|
||||
// Note: texsubimage will access currently bound buffer
|
||||
// Pbo ready let's get a pointer
|
||||
BindPbo();
|
||||
|
||||
Sync();
|
||||
|
||||
map = m_map + m_offset;
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
void Unmap()
|
||||
{
|
||||
glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, m_offset, m_size);
|
||||
}
|
||||
|
||||
uptr Offset()
|
||||
{
|
||||
return m_offset;
|
||||
}
|
||||
|
||||
void Destroy()
|
||||
{
|
||||
m_map = NULL;
|
||||
m_offset = 0;
|
||||
|
||||
for (GLsync& fence : m_fence)
|
||||
{
|
||||
if (fence != 0)
|
||||
{
|
||||
glDeleteSync(fence);
|
||||
fence = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_buffer != 0)
|
||||
{
|
||||
glDeleteBuffers(1, &m_buffer);
|
||||
m_buffer = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void BindPbo()
|
||||
{
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_buffer);
|
||||
}
|
||||
|
||||
void Sync()
|
||||
{
|
||||
u32 segment_current = m_offset / m_seg_size;
|
||||
u32 segment_next = (m_offset + m_size) / m_seg_size;
|
||||
|
||||
if (segment_current != segment_next)
|
||||
{
|
||||
if (segment_next >= std::size(m_fence))
|
||||
{
|
||||
segment_next = 0;
|
||||
}
|
||||
// Align current transfer on the start of the segment
|
||||
m_offset = m_seg_size * segment_next;
|
||||
|
||||
if (m_size > m_seg_size)
|
||||
{
|
||||
fprintf(stderr, "BUG: PBO Map size %u is bigger than a single segment %u. Crossing more than one fence is not supported yet, texture data may be corrupted.\n", m_size, m_seg_size);
|
||||
// TODO Synchronize all crossed fences
|
||||
}
|
||||
|
||||
// protect the left segment
|
||||
m_fence[segment_current] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
|
||||
// Check next segment is free
|
||||
if (m_fence[segment_next])
|
||||
{
|
||||
GLenum status = glClientWaitSync(m_fence[segment_next], GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
// Potentially it doesn't work on AMD driver which might always return GL_CONDITION_SATISFIED
|
||||
if (status != GL_ALREADY_SIGNALED)
|
||||
{
|
||||
GL_PERF("GL_PIXEL_UNPACK_BUFFER: Sync Sync (%x)! Buffer too small ?", status);
|
||||
}
|
||||
|
||||
glDeleteSync(m_fence[segment_next]);
|
||||
m_fence[segment_next] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnbindPbo()
|
||||
{
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
}
|
||||
|
||||
void EndTransfer()
|
||||
{
|
||||
m_offset += m_size;
|
||||
}
|
||||
} // namespace PboPool
|
||||
static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 256;
|
||||
|
||||
GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format format, GLuint fbo_read)
|
||||
: m_clean(false), m_r_x(0), m_r_y(0), m_r_w(0), m_r_h(0), m_layer(0)
|
||||
{
|
||||
// OpenGL didn't like dimensions of size 0
|
||||
m_size.x = std::max(1, width);
|
||||
m_size.y = std::max(1, height);
|
||||
m_format = format;
|
||||
m_type = type;
|
||||
m_type = type;
|
||||
m_fbo_read = fbo_read;
|
||||
m_texture_id = 0;
|
||||
m_mipmap_levels = 1;
|
||||
|
@ -189,46 +42,46 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
|
|||
{
|
||||
// 1 Channel integer
|
||||
case Format::PrimID:
|
||||
gl_fmt = GL_R32F;
|
||||
m_int_format = GL_RED;
|
||||
m_int_type = GL_INT;
|
||||
m_int_shift = 2;
|
||||
gl_fmt = GL_R32F;
|
||||
m_int_format = GL_RED;
|
||||
m_int_type = GL_INT;
|
||||
m_int_shift = 2;
|
||||
break;
|
||||
case Format::UInt32:
|
||||
gl_fmt = GL_R32UI;
|
||||
m_int_format = GL_RED_INTEGER;
|
||||
m_int_type = GL_UNSIGNED_INT;
|
||||
m_int_shift = 2;
|
||||
gl_fmt = GL_R32UI;
|
||||
m_int_format = GL_RED_INTEGER;
|
||||
m_int_type = GL_UNSIGNED_INT;
|
||||
m_int_shift = 2;
|
||||
break;
|
||||
case Format::UInt16:
|
||||
gl_fmt = GL_R16UI;
|
||||
m_int_format = GL_RED_INTEGER;
|
||||
m_int_type = GL_UNSIGNED_SHORT;
|
||||
m_int_shift = 1;
|
||||
gl_fmt = GL_R16UI;
|
||||
m_int_format = GL_RED_INTEGER;
|
||||
m_int_type = GL_UNSIGNED_SHORT;
|
||||
m_int_shift = 1;
|
||||
break;
|
||||
|
||||
// 1 Channel normalized
|
||||
case Format::UNorm8:
|
||||
gl_fmt = GL_R8;
|
||||
m_int_format = GL_RED;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 0;
|
||||
gl_fmt = GL_R8;
|
||||
m_int_format = GL_RED;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 0;
|
||||
break;
|
||||
|
||||
// 4 channel normalized
|
||||
case Format::Color:
|
||||
gl_fmt = GL_RGBA8;
|
||||
m_int_format = GL_RGBA;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 2;
|
||||
gl_fmt = GL_RGBA8;
|
||||
m_int_format = GL_RGBA;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 2;
|
||||
break;
|
||||
|
||||
// 4 channel float
|
||||
case Format::HDRColor:
|
||||
gl_fmt = GL_RGBA16;
|
||||
m_int_format = GL_RGBA;
|
||||
m_int_type = GL_UNSIGNED_SHORT;
|
||||
m_int_shift = 3;
|
||||
gl_fmt = GL_RGBA16;
|
||||
m_int_format = GL_RGBA;
|
||||
m_int_type = GL_UNSIGNED_SHORT;
|
||||
m_int_shift = 3;
|
||||
break;
|
||||
|
||||
// Depth buffer
|
||||
|
@ -252,37 +105,37 @@ GSTextureOGL::GSTextureOGL(Type type, int width, int height, int levels, Format
|
|||
break;
|
||||
|
||||
case Format::BC1:
|
||||
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
|
||||
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 1;
|
||||
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
|
||||
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 1;
|
||||
break;
|
||||
|
||||
case Format::BC2:
|
||||
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
|
||||
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 1;
|
||||
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
|
||||
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 1;
|
||||
break;
|
||||
|
||||
case Format::BC3:
|
||||
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
|
||||
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 1;
|
||||
gl_fmt = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
|
||||
m_int_format = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 1;
|
||||
break;
|
||||
|
||||
case Format::BC7:
|
||||
gl_fmt = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
|
||||
m_int_format = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 1;
|
||||
gl_fmt = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
|
||||
m_int_format = GL_COMPRESSED_RGBA_BPTC_UNORM_ARB;
|
||||
m_int_type = GL_UNSIGNED_BYTE;
|
||||
m_int_shift = 1;
|
||||
break;
|
||||
|
||||
case Format::Invalid:
|
||||
m_int_format = 0;
|
||||
m_int_type = 0;
|
||||
m_int_shift = 0;
|
||||
m_int_format = 0;
|
||||
m_int_type = 0;
|
||||
m_int_shift = 0;
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
|
@ -363,9 +216,6 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
|
|||
|
||||
u32 row_byte = r.width() << m_int_shift;
|
||||
u32 map_size = r.height() * row_byte;
|
||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||
g_real_texture_upload_byte += map_size;
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
if (r.height() == 1) {
|
||||
|
@ -389,7 +239,7 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
|
|||
glCompressedTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, upload_size, data);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
}
|
||||
else if (map_size >= PboPool::m_seg_size)
|
||||
else if (GLLoader::buggy_pbo || map_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize())
|
||||
{
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch >> m_int_shift);
|
||||
glTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, data);
|
||||
|
@ -397,27 +247,15 @@ bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch, int
|
|||
}
|
||||
else
|
||||
{
|
||||
// The complex solution with PBO
|
||||
char* src = (char*)data;
|
||||
char* map = PboPool::Map(map_size);
|
||||
GL::StreamBuffer* const sb = GSDeviceOGL::GetTextureUploadBuffer();
|
||||
|
||||
// PERF: slow path of the texture upload. Dunno if we could do better maybe check if TC can keep row_byte == pitch
|
||||
// Note: row_byte != pitch
|
||||
for (int h = 0; h < r.height(); h++)
|
||||
{
|
||||
memcpy(map, src, row_byte);
|
||||
map += row_byte;
|
||||
src += pitch;
|
||||
}
|
||||
const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
|
||||
StringUtil::StrideMemCpy(map.pointer, row_byte, data, pitch, row_byte, r.height());
|
||||
sb->Unmap(map_size);
|
||||
sb->Bind();
|
||||
|
||||
PboPool::Unmap();
|
||||
|
||||
glTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type, (const void*)PboPool::Offset());
|
||||
|
||||
// FIXME OGL4: investigate, only 1 unpack buffer always bound
|
||||
PboPool::UnbindPbo();
|
||||
|
||||
PboPool::EndTransfer();
|
||||
glTextureSubImage2D(m_texture_id, layer, r.x, r.y, r.width(), r.height(), m_int_format, m_int_type,
|
||||
reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset)));
|
||||
}
|
||||
|
||||
m_needs_mipmaps_generated = true;
|
||||
|
@ -441,7 +279,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
|
|||
if (m_type == Type::Texture || m_type == Type::RenderTarget)
|
||||
{
|
||||
const u32 map_size = r.height() * row_byte;
|
||||
if (map_size > PboPool::m_seg_size)
|
||||
if (GLLoader::buggy_pbo || map_size > GSDeviceOGL::GetTextureUploadBuffer()->GetChunkSize())
|
||||
return false;
|
||||
|
||||
GL_PUSH_("Upload Texture %d", m_texture_id); // POP is in Unmap
|
||||
|
@ -449,11 +287,8 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
|
|||
|
||||
m_clean = false;
|
||||
|
||||
m.bits = (u8*)PboPool::Map(map_size);
|
||||
|
||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||
g_real_texture_upload_byte += map_size;
|
||||
#endif
|
||||
const auto map = GSDeviceOGL::GetTextureUploadBuffer()->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size);
|
||||
m.bits = static_cast<u8*>(map.pointer);
|
||||
|
||||
// Save the area for the unmap
|
||||
m_r_x = r.x;
|
||||
|
@ -461,6 +296,7 @@ bool GSTextureOGL::Map(GSMap& m, const GSVector4i* _r, int layer)
|
|||
m_r_w = r.width();
|
||||
m_r_h = r.height();
|
||||
m_layer = layer;
|
||||
m_map_offset = map.buffer_offset;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -472,15 +308,13 @@ void GSTextureOGL::Unmap()
|
|||
{
|
||||
if (m_type == Type::Texture || m_type == Type::RenderTarget)
|
||||
{
|
||||
const u32 map_size = (m_r_w << m_int_shift) * m_r_h;
|
||||
GL::StreamBuffer* sb = GSDeviceOGL::GetTextureUploadBuffer();
|
||||
sb->Unmap(map_size);
|
||||
sb->Bind();
|
||||
|
||||
PboPool::Unmap();
|
||||
|
||||
glTextureSubImage2D(m_texture_id, m_layer, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type, (const void*)PboPool::Offset());
|
||||
|
||||
// FIXME OGL4: investigate, only 1 unpack buffer always bound
|
||||
PboPool::UnbindPbo();
|
||||
|
||||
PboPool::EndTransfer();
|
||||
glTextureSubImage2D(m_texture_id, m_layer, m_r_x, m_r_y, m_r_w, m_r_h, m_int_format, m_int_type,
|
||||
reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset)));
|
||||
|
||||
m_needs_mipmaps_generated = true;
|
||||
|
||||
|
|
|
@ -19,40 +19,26 @@
|
|||
#include "GS/Renderers/OpenGL/GLLoader.h"
|
||||
#include "common/AlignedMalloc.h"
|
||||
|
||||
namespace PboPool
|
||||
{
|
||||
inline void BindPbo();
|
||||
inline void UnbindPbo();
|
||||
inline void Sync();
|
||||
|
||||
inline char* Map(u32 size);
|
||||
inline void Unmap();
|
||||
inline uptr Offset();
|
||||
inline void EndTransfer();
|
||||
|
||||
void Init();
|
||||
void Destroy();
|
||||
} // namespace PboPool
|
||||
|
||||
class GSTextureOGL final : public GSTexture
|
||||
{
|
||||
private:
|
||||
GLuint m_texture_id; // the texture id
|
||||
GLuint m_fbo_read;
|
||||
bool m_clean;
|
||||
GLuint m_texture_id = 0; // the texture id
|
||||
GLuint m_fbo_read = 0;
|
||||
bool m_clean = false;
|
||||
|
||||
// Avoid alignment constrain
|
||||
//GSVector4i m_r;
|
||||
int m_r_x;
|
||||
int m_r_y;
|
||||
int m_r_w;
|
||||
int m_r_h;
|
||||
int m_layer;
|
||||
int m_r_x = 0;
|
||||
int m_r_y = 0;
|
||||
int m_r_w = 0;
|
||||
int m_r_h = 0;
|
||||
int m_layer = 0;
|
||||
u32 m_map_offset = 0;
|
||||
|
||||
// internal opengl format/type/alignment
|
||||
GLenum m_int_format;
|
||||
GLenum m_int_type;
|
||||
u32 m_int_shift;
|
||||
GLenum m_int_format = 0;
|
||||
GLenum m_int_type = 0;
|
||||
u32 m_int_shift = 0;
|
||||
|
||||
public:
|
||||
explicit GSTextureOGL(Type type, int width, int height, int levels, Format format, GLuint fbo_read);
|
||||
|
|
|
@ -1,88 +0,0 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2021 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GLState.h"
|
||||
|
||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||
extern u64 g_uniform_upload_byte;
|
||||
#endif
|
||||
|
||||
|
||||
class GSUniformBufferOGL
|
||||
{
|
||||
GLuint m_buffer; // data object
|
||||
GLuint m_index; // GLSL slot
|
||||
u32 m_size; // size of the data
|
||||
u8* m_cache; // content of the previous upload
|
||||
|
||||
public:
|
||||
GSUniformBufferOGL(const std::string& pretty_name, GLuint index, u32 size)
|
||||
: m_index(index), m_size(size)
|
||||
{
|
||||
glGenBuffers(1, &m_buffer);
|
||||
bind();
|
||||
glObjectLabel(GL_BUFFER, m_buffer, pretty_name.size(), pretty_name.c_str());
|
||||
allocate();
|
||||
attach();
|
||||
m_cache = (u8*)_aligned_malloc(m_size, 32);
|
||||
memset(m_cache, 0, m_size);
|
||||
}
|
||||
|
||||
void bind()
|
||||
{
|
||||
glBindBuffer(GL_UNIFORM_BUFFER, m_buffer);
|
||||
}
|
||||
|
||||
void allocate()
|
||||
{
|
||||
glBufferData(GL_UNIFORM_BUFFER, m_size, NULL, GL_DYNAMIC_DRAW);
|
||||
}
|
||||
|
||||
void attach()
|
||||
{
|
||||
// From the opengl manpage:
|
||||
// glBindBufferBase also binds buffer to the generic buffer binding point specified by target
|
||||
glBindBufferBase(GL_UNIFORM_BUFFER, m_index, m_buffer);
|
||||
}
|
||||
|
||||
void upload(const void* src)
|
||||
{
|
||||
bind();
|
||||
// glMapBufferRange allow to set various parameter but the call is
|
||||
// synchronous whereas glBufferSubData could be asynchronous.
|
||||
// TODO: investigate the extension ARB_invalidate_subdata
|
||||
glBufferSubData(GL_UNIFORM_BUFFER, 0, m_size, src);
|
||||
#ifdef ENABLE_OGL_DEBUG_MEM_BW
|
||||
g_uniform_upload_byte += m_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
void cache_upload(const void* src)
|
||||
{
|
||||
if (memcmp(m_cache, src, m_size) != 0)
|
||||
{
|
||||
memcpy(m_cache, src, m_size);
|
||||
upload(src);
|
||||
}
|
||||
}
|
||||
|
||||
~GSUniformBufferOGL()
|
||||
{
|
||||
glDeleteBuffers(1, &m_buffer);
|
||||
_aligned_free(m_cache);
|
||||
}
|
||||
};
|
|
@ -243,6 +243,7 @@ bool GSDeviceVK::CheckFeatures()
|
|||
m_features.prefer_new_textures = true;
|
||||
m_features.provoking_vertex_last = g_vulkan_context->GetOptionalExtensions().vk_ext_provoking_vertex;
|
||||
m_features.dual_source_blend = features.dualSrcBlend && !GSConfig.DisableDualSourceBlend;
|
||||
m_features.clip_control = true;
|
||||
|
||||
if (!m_features.dual_source_blend)
|
||||
Console.Warning("Vulkan driver is missing dual-source blending. This will have an impact on performance.");
|
||||
|
|
|
@ -868,7 +868,6 @@
|
|||
<ClInclude Include="GS\Renderers\SW\GSTextureSW.h" />
|
||||
<ClInclude Include="GS\GSThread.h" />
|
||||
<ClInclude Include="GS\GSThread_CXX11.h" />
|
||||
<ClInclude Include="GS\Renderers\OpenGL\GSUniformBufferOGL.h" />
|
||||
<ClInclude Include="GS\GSUtil.h" />
|
||||
<ClInclude Include="GS\GSVector.h" />
|
||||
<ClInclude Include="GS\GSVector4i.h" />
|
||||
|
@ -1170,4 +1169,4 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets" />
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -2833,9 +2833,6 @@
|
|||
<ClInclude Include="GS\Renderers\HW\GSVertexHW.h">
|
||||
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GS\Renderers\OpenGL\GSUniformBufferOGL.h">
|
||||
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GS\Renderers\Common\GSRenderer.h">
|
||||
<Filter>System\Ps2\GS\Renderers\Common</Filter>
|
||||
</ClInclude>
|
||||
|
@ -3101,4 +3098,4 @@
|
|||
<Filter>AppHost\Resources</Filter>
|
||||
</Manifest>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
||||
|
|
|
@ -645,7 +645,6 @@
|
|||
<ClInclude Include="GS\Renderers\SW\GSTextureSW.h" />
|
||||
<ClInclude Include="GS\GSThread.h" />
|
||||
<ClInclude Include="GS\GSThread_CXX11.h" />
|
||||
<ClInclude Include="GS\Renderers\OpenGL\GSUniformBufferOGL.h" />
|
||||
<ClInclude Include="GS\GSUtil.h" />
|
||||
<ClInclude Include="GS\GSVector.h" />
|
||||
<ClInclude Include="GS\GSVector4i.h" />
|
||||
|
@ -846,4 +845,4 @@
|
|||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets" />
|
||||
</Project>
|
||||
</Project>
|
|
@ -1997,9 +1997,6 @@
|
|||
<ClInclude Include="GS\Renderers\HW\GSVertexHW.h">
|
||||
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GS\Renderers\OpenGL\GSUniformBufferOGL.h">
|
||||
<Filter>System\Ps2\GS\Renderers\OpenGL</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GS\Renderers\Common\GSRenderer.h">
|
||||
<Filter>System\Ps2\GS\Renderers\Common</Filter>
|
||||
</ClInclude>
|
||||
|
@ -2222,4 +2219,4 @@
|
|||
<Filter>System\Ps2\Debug\rdebug</Filter>
|
||||
</CustomBuildStep>
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
Loading…
Reference in New Issue