Merge pull request #4467 from stenzek/gpu-texture-decoding

VideoBackends: GPU Texture Decoding
This commit is contained in:
Markus Wick 2017-04-03 10:46:13 +02:00 committed by GitHub
commit 3bd184a255
49 changed files with 2182 additions and 225 deletions

View File

@ -286,6 +286,7 @@ public final class SettingsFragmentPresenter
BooleanSetting ignoreFormat = new BooleanSetting(SettingsFile.KEY_IGNORE_FORMAT, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, ignoreFormatValue);
Setting efbToTexture = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_EFB_TEXTURE);
Setting texCacheAccuracy = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_TEXCACHE_ACCURACY);
Setting gpuTextureDecoding = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_GPU_TEXTURE_DECODING);
IntSetting xfb = new IntSetting(SettingsFile.KEY_XFB, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, xfbValue);
Setting fastDepth = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_FAST_DEPTH);
Setting aspectRatio = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_ASPECT_RATIO);
@ -297,6 +298,7 @@ public final class SettingsFragmentPresenter
sl.add(new HeaderSetting(null, null, R.string.texture_cache, 0));
sl.add(new SingleChoiceSetting(SettingsFile.KEY_TEXCACHE_ACCURACY, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.texture_cache_accuracy, R.string.texture_cache_accuracy_descrip, R.array.textureCacheAccuracyEntries, R.array.textureCacheAccuracyValues, 128, texCacheAccuracy));
sl.add(new CheckBoxSetting(SettingsFile.KEY_GPU_TEXTURE_DECODING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.gpu_texture_decoding, R.string.gpu_texture_decoding_descrip, false, gpuTextureDecoding));
sl.add(new HeaderSetting(null, null, R.string.external_frame_buffer, 0));
sl.add(new SingleChoiceSetting(SettingsFile.KEY_XFB_METHOD, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, R.string.external_frame_buffer, R.string.external_frame_buffer_descrip, R.array.externalFrameBufferEntries, R.array.externalFrameBufferValues, 0, xfb));

View File

@ -73,6 +73,7 @@ public final class SettingsFile
public static final String KEY_IGNORE_FORMAT = "EFBEmulateFormatChanges";
public static final String KEY_EFB_TEXTURE = "EFBToTextureEnable";
public static final String KEY_TEXCACHE_ACCURACY = "SafeTextureCacheColorSamples";
public static final String KEY_GPU_TEXTURE_DECODING = "EnableGPUTextureDecoding";
public static final String KEY_XFB = "UseXFB";
public static final String KEY_XFB_REAL = "UseRealXFB";
public static final String KEY_FAST_DEPTH = "FastDepthCalc";

View File

@ -168,6 +168,8 @@
<string name="texture_cache">Texture Cache</string>
<string name="texture_cache_accuracy">Texture Cache Accuracy</string>
<string name="texture_cache_accuracy_descrip">The safer the selection, the less likely the emulator will be missing any texture updates from RAM.</string>
<string name="gpu_texture_decoding">GPU Texture Decoding</string>
<string name="gpu_texture_decoding_descrip">Decodes textures on the GPU using compute shaders where supported. May improve performance in some scenarios.</string>
<string name="external_frame_buffer">External Frame Buffer</string>
<string name="external_frame_buffer_descrip">Determines how the XFB will be emulated.</string>
<string name="disable_destination_alpha">Disable Destination Alpha</string>

View File

@ -72,6 +72,7 @@
<ClInclude Include="GL\GLExtensions\ARB_blend_func_extended.h" />
<ClInclude Include="GL\GLExtensions\ARB_buffer_storage.h" />
<ClInclude Include="GL\GLExtensions\ARB_clip_control.h" />
<ClInclude Include="GL\GLExtensions\ARB_compute_shader.h" />
<ClInclude Include="GL\GLExtensions\ARB_copy_image.h" />
<ClInclude Include="GL\GLExtensions\ARB_debug_output.h" />
<ClInclude Include="GL\GLExtensions\ARB_draw_elements_base_vertex.h" />
@ -83,9 +84,11 @@
<ClInclude Include="GL\GLExtensions\ARB_occlusion_query2.h" />
<ClInclude Include="GL\GLExtensions\ARB_sampler_objects.h" />
<ClInclude Include="GL\GLExtensions\ARB_sample_shading.h" />
<ClInclude Include="GL\GLExtensions\ARB_shader_image_load_store.h" />
<ClInclude Include="GL\GLExtensions\ARB_shader_storage_buffer_object.h" />
<ClInclude Include="GL\GLExtensions\ARB_sync.h" />
<ClInclude Include="GL\GLExtensions\ARB_texture_multisample.h" />
<ClInclude Include="GL\GLExtensions\ARB_texture_storage.h" />
<ClInclude Include="GL\GLExtensions\ARB_texture_storage_multisample.h" />
<ClInclude Include="GL\GLExtensions\ARB_uniform_buffer_object.h" />
<ClInclude Include="GL\GLExtensions\ARB_vertex_array_object.h" />

View File

@ -238,6 +238,16 @@
<ClInclude Include="NonCopyable.h" />
<ClInclude Include="Analytics.h" />
<ClInclude Include="Semaphore.h" />
<ClInclude Include="MD5.h" />
<ClInclude Include="GL\GLExtensions\ARB_texture_storage.h">
<Filter>GL\GLExtensions</Filter>
</ClInclude>
<ClInclude Include="GL\GLExtensions\ARB_shader_image_load_store.h">
<Filter>GL\GLExtensions</Filter>
</ClInclude>
<ClInclude Include="GL\GLExtensions\ARB_compute_shader.h">
<Filter>GL\GLExtensions</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="CDUtils.cpp" />
@ -303,6 +313,7 @@
</ClCompile>
<ClCompile Include="ucrtFreadWorkaround.cpp" />
<ClCompile Include="Analytics.cpp" />
<ClCompile Include="MD5.cpp" />
</ItemGroup>
<ItemGroup>
<Text Include="CMakeLists.txt" />

View File

@ -0,0 +1,53 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
#include "Common/GL/GLExtensions/gl_common.h"
#define GL_COMPUTE_SHADER 0x91B9
#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
#define GL_COMPUTE_SHADER_BIT 0x00000020
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y,
GLuint num_groups_z);
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
extern PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
extern PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
#define glDispatchCompute dolDispatchCompute
#define glDispatchComputeIndirect dolDispatchComputeIndirect

View File

@ -0,0 +1,100 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
#include "Common/GL/GLExtensions/gl_common.h"
#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
#define GL_UNIFORM_BARRIER_BIT 0x00000004
#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
#define GL_COMMAND_BARRIER_BIT 0x00000040
#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
#define GL_MAX_IMAGE_UNITS 0x8F38
#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
#define GL_IMAGE_BINDING_NAME 0x8F3A
#define GL_IMAGE_BINDING_LEVEL 0x8F3B
#define GL_IMAGE_BINDING_LAYERED 0x8F3C
#define GL_IMAGE_BINDING_LAYER 0x8F3D
#define GL_IMAGE_BINDING_ACCESS 0x8F3E
#define GL_IMAGE_1D 0x904C
#define GL_IMAGE_2D 0x904D
#define GL_IMAGE_3D 0x904E
#define GL_IMAGE_2D_RECT 0x904F
#define GL_IMAGE_CUBE 0x9050
#define GL_IMAGE_BUFFER 0x9051
#define GL_IMAGE_1D_ARRAY 0x9052
#define GL_IMAGE_2D_ARRAY 0x9053
#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
#define GL_IMAGE_2D_MULTISAMPLE 0x9055
#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
#define GL_INT_IMAGE_1D 0x9057
#define GL_INT_IMAGE_2D 0x9058
#define GL_INT_IMAGE_3D 0x9059
#define GL_INT_IMAGE_2D_RECT 0x905A
#define GL_INT_IMAGE_CUBE 0x905B
#define GL_INT_IMAGE_BUFFER 0x905C
#define GL_INT_IMAGE_1D_ARRAY 0x905D
#define GL_INT_IMAGE_2D_ARRAY 0x905E
#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
#define GL_MAX_IMAGE_SAMPLES 0x906D
#define GL_IMAGE_BINDING_FORMAT 0x906E
#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
typedef void(APIENTRYP PFNDOLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level,
GLboolean layered, GLint layer, GLenum access,
GLenum format);
typedef void(APIENTRYP PFNDOLMEMORYBARRIERPROC)(GLbitfield barriers);
extern PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
extern PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
#define glBindImageTexture dolBindImageTexture
#define glMemoryBarrier dolMemoryBarrier

View File

@ -0,0 +1,41 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
#include "Common/GL/GLExtensions/gl_common.h"
#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
typedef void(APIENTRYP PFNDOLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width);
typedef void(APIENTRYP PFNDOLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNDOLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height, GLsizei depth);
extern PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
extern PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
extern PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
#define glTexStorage1D dolTexStorage1D
#define glTexStorage2D dolTexStorage2D
#define glTexStorage3D dolTexStorage3D

View File

@ -653,19 +653,12 @@ PFNDOLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC
dolDrawElementsInstancedBaseVertexBaseInstance;
PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ;
PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv;
PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced;
PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced;
// gl_4_3
PFNDOLCLEARBUFFERDATAPROC dolClearBufferData;
PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData;
PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri;
PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv;
PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v;
@ -905,6 +898,11 @@ PFNDOLTEXIMAGE3DMULTISAMPLEPROC dolTexImage3DMultisample;
PFNDOLGETMULTISAMPLEFVPROC dolGetMultisamplefv;
PFNDOLSAMPLEMASKIPROC dolSampleMaski;
// ARB_texture_storage
PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
// ARB_texture_storage_multisample
PFNDOLTEXSTORAGE2DMULTISAMPLEPROC dolTexStorage2DMultisample;
PFNDOLTEXSTORAGE3DMULTISAMPLEPROC dolTexStorage3DMultisample;
@ -989,6 +987,14 @@ PFNDOLDEPTHRANGEDNVPROC dolDepthRangedNV;
PFNDOLCLEARDEPTHDNVPROC dolClearDepthdNV;
PFNDOLDEPTHBOUNDSDNVPROC dolDepthBoundsdNV;
// ARB_shader_image_load_store
PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
// ARB_compute_shader
PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
// Creates a GLFunc object that requires a feature
#define GLFUNC_REQUIRES(x, y) \
{ \
@ -1681,6 +1687,11 @@ const GLFunc gl_function_array[] = {
GLFUNC_REQUIRES(glGetMultisamplefv, "GL_ARB_texture_multisample"),
GLFUNC_REQUIRES(glSampleMaski, "GL_ARB_texture_multisample"),
// ARB_texture_storage
GLFUNC_REQUIRES(glTexStorage1D, "GL_ARB_texture_storage !VERSION_4_2"),
GLFUNC_REQUIRES(glTexStorage2D, "GL_ARB_texture_storage !VERSION_4_2 |VERSION_GLES_3"),
GLFUNC_REQUIRES(glTexStorage3D, "GL_ARB_texture_storage !VERSION_4_2 |VERSION_GLES_3"),
// ARB_texture_storage_multisample
GLFUNC_REQUIRES(glTexStorage2DMultisample,
"GL_ARB_texture_storage_multisample !VERSION_4_3 |VERSION_GLES_3_1"),
@ -1848,6 +1859,17 @@ const GLFunc gl_function_array[] = {
GLFUNC_REQUIRES(glDepthRangedNV, "GL_NV_depth_buffer_float"),
GLFUNC_REQUIRES(glClearDepthdNV, "GL_NV_depth_buffer_float"),
GLFUNC_REQUIRES(glDepthBoundsdNV, "GL_NV_depth_buffer_float"),
// ARB_shader_image_load_store
GLFUNC_REQUIRES(glBindImageTexture,
"GL_ARB_shader_image_load_store !VERSION_4_2 |VERSION_GLES_3_1"),
GLFUNC_REQUIRES(glMemoryBarrier,
"GL_ARB_shader_image_load_store !VERSION_4_2 |VERSION_GLES_3_1"),
// ARB_compute_shader
GLFUNC_REQUIRES(glDispatchCompute, "GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"),
GLFUNC_REQUIRES(glDispatchComputeIndirect,
"GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"),
};
namespace GLExtensions

View File

@ -12,6 +12,7 @@
#include "Common/GL/GLExtensions/ARB_blend_func_extended.h"
#include "Common/GL/GLExtensions/ARB_buffer_storage.h"
#include "Common/GL/GLExtensions/ARB_clip_control.h"
#include "Common/GL/GLExtensions/ARB_compute_shader.h"
#include "Common/GL/GLExtensions/ARB_copy_image.h"
#include "Common/GL/GLExtensions/ARB_debug_output.h"
#include "Common/GL/GLExtensions/ARB_draw_elements_base_vertex.h"
@ -21,9 +22,11 @@
#include "Common/GL/GLExtensions/ARB_occlusion_query2.h"
#include "Common/GL/GLExtensions/ARB_sample_shading.h"
#include "Common/GL/GLExtensions/ARB_sampler_objects.h"
#include "Common/GL/GLExtensions/ARB_shader_image_load_store.h"
#include "Common/GL/GLExtensions/ARB_shader_storage_buffer_object.h"
#include "Common/GL/GLExtensions/ARB_sync.h"
#include "Common/GL/GLExtensions/ARB_texture_multisample.h"
#include "Common/GL/GLExtensions/ARB_texture_storage.h"
#include "Common/GL/GLExtensions/ARB_texture_storage_multisample.h"
#include "Common/GL/GLExtensions/ARB_uniform_buffer_object.h"
#include "Common/GL/GLExtensions/ARB_vertex_array_object.h"

View File

@ -66,75 +66,10 @@
#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9
#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA
#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB
#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
#define GL_UNIFORM_BARRIER_BIT 0x00000004
#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
#define GL_COMMAND_BARRIER_BIT 0x00000040
#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
#define GL_MAX_IMAGE_UNITS 0x8F38
#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
#define GL_IMAGE_BINDING_NAME 0x8F3A
#define GL_IMAGE_BINDING_LEVEL 0x8F3B
#define GL_IMAGE_BINDING_LAYERED 0x8F3C
#define GL_IMAGE_BINDING_LAYER 0x8F3D
#define GL_IMAGE_BINDING_ACCESS 0x8F3E
#define GL_IMAGE_1D 0x904C
#define GL_IMAGE_2D 0x904D
#define GL_IMAGE_3D 0x904E
#define GL_IMAGE_2D_RECT 0x904F
#define GL_IMAGE_CUBE 0x9050
#define GL_IMAGE_BUFFER 0x9051
#define GL_IMAGE_1D_ARRAY 0x9052
#define GL_IMAGE_2D_ARRAY 0x9053
#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
#define GL_IMAGE_2D_MULTISAMPLE 0x9055
#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
#define GL_INT_IMAGE_1D 0x9057
#define GL_INT_IMAGE_2D 0x9058
#define GL_INT_IMAGE_3D 0x9059
#define GL_INT_IMAGE_2D_RECT 0x905A
#define GL_INT_IMAGE_CUBE 0x905B
#define GL_INT_IMAGE_BUFFER 0x905C
#define GL_INT_IMAGE_1D_ARRAY 0x905D
#define GL_INT_IMAGE_2D_ARRAY 0x905E
#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
#define GL_MAX_IMAGE_SAMPLES 0x906D
#define GL_IMAGE_BINDING_FORMAT 0x906E
#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
#define GL_COMPRESSED_RGBA_BPTC_UNORM 0x8E8C
#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM 0x8E8D
#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT 0x8E8E
#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT 0x8E8F
#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
typedef void(APIENTRYP PFNDOLDRAWARRAYSINSTANCEDBASEINSTANCEPROC)(GLenum mode, GLint first,
GLsizei count,
@ -152,16 +87,6 @@ typedef void(APIENTRYP PFNDOLGETINTERNALFORMATIVPROC)(GLenum target, GLenum inte
GLenum pname, GLsizei bufSize, GLint* params);
typedef void(APIENTRYP PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC)(GLuint program, GLuint bufferIndex,
GLenum pname, GLint* params);
typedef void(APIENTRYP PFNDOLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level,
GLboolean layered, GLint layer, GLenum access,
GLenum format);
typedef void(APIENTRYP PFNDOLMEMORYBARRIERPROC)(GLbitfield barriers);
typedef void(APIENTRYP PFNDOLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width);
typedef void(APIENTRYP PFNDOLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNDOLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height, GLsizei depth);
typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC)(GLenum mode, GLuint id,
GLsizei instancecount);
typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC)(GLenum mode, GLuint id,
@ -174,11 +99,6 @@ extern PFNDOLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC
dolDrawElementsInstancedBaseVertexBaseInstance;
extern PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ;
extern PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv;
extern PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
extern PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
extern PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
extern PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
extern PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
extern PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced;
extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced;
@ -187,10 +107,5 @@ extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackSt
#define glDrawElementsInstancedBaseVertexBaseInstance dolDrawElementsInstancedBaseVertexBaseInstance
#define glGetInternalformativ dolGetInternalformativ
#define glGetActiveAtomicCounterBufferiv dolGetActiveAtomicCounterBufferiv
#define glBindImageTexture dolBindImageTexture
#define glMemoryBarrier dolMemoryBarrier
#define glTexStorage1D dolTexStorage1D
#define glTexStorage2D dolTexStorage2D
#define glTexStorage3D dolTexStorage3D
#define glDrawTransformFeedbackInstanced dolDrawTransformFeedbackInstanced
#define glDrawTransformFeedbackStreamInstanced dolDrawTransformFeedbackStreamInstanced

View File

@ -38,24 +38,6 @@
#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69
#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A
#define GL_MAX_ELEMENT_INDEX 0x8D6B
#define GL_COMPUTE_SHADER 0x91B9
#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
#define GL_COMPUTE_SHADER_BIT 0x00000020
#define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242
#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243
#define GL_DEBUG_CALLBACK_FUNCTION 0x8244
@ -287,9 +269,6 @@ typedef void(APIENTRYP PFNDOLCLEARBUFFERDATAPROC)(GLenum target, GLenum internal
typedef void(APIENTRYP PFNDOLCLEARBUFFERSUBDATAPROC)(GLenum target, GLenum internalformat,
GLintptr offset, GLsizeiptr size,
GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y,
GLuint num_groups_z);
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
typedef void(APIENTRYP PFNDOLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNDOLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname,
GLint* params);
@ -348,8 +327,6 @@ typedef void(APIENTRYP PFNDOLVERTEXBINDINGDIVISORPROC)(GLuint bindingindex, GLui
extern PFNDOLCLEARBUFFERDATAPROC dolClearBufferData;
extern PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData;
extern PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
extern PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
extern PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri;
extern PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv;
extern PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v;
@ -378,8 +355,6 @@ extern PFNDOLVERTEXBINDINGDIVISORPROC dolVertexBindingDivisor;
#define glClearBufferData dolClearBufferData
#define glClearBufferSubData dolClearBufferSubData
#define glDispatchCompute dolDispatchCompute
#define glDispatchComputeIndirect dolDispatchComputeIndirect
#define glFramebufferParameteri dolFramebufferParameteri
#define glGetFramebufferParameteriv dolGetFramebufferParameteriv
#define glGetInternalformati64v dolGetInternalformati64v

View File

@ -284,6 +284,10 @@ static wxString true_color_desc =
wxTRANSLATE("Forces the game to render the RGB color channels in 24-bit, thereby increasing "
"quality by reducing color banding.\nIt has no impact on performance and causes "
"few graphical issues.\n\n\nIf unsure, leave this checked.");
static wxString gpu_texture_decoding_desc =
wxTRANSLATE("Enables texture decoding using the GPU instead of the CPU. This may result in "
"performance gains in some scenarios, or systems where the CPU is the bottleneck."
"\n\nIf unsure, leave this unchecked.");
#if !defined(__APPLE__)
// Search for available resolutions - TODO: Move to Common?
@ -755,6 +759,15 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title)
slide_szr->Add(new wxStaticText(page_hacks, wxID_ANY, _("Fast")), 0, wxALIGN_CENTER_VERTICAL);
szr_safetex->Add(slide_szr, 1, wxEXPAND | wxLEFT | wxRIGHT, space5);
if (vconfig.backend_info.bSupportsGPUTextureDecoding)
{
szr_safetex->Add(CreateCheckBox(page_hacks, _("GPU Texture Decoding"),
wxGetTranslation(gpu_texture_decoding_desc),
vconfig.bEnableGPUTextureDecoding),
1, wxEXPAND | wxLEFT | wxRIGHT, space5);
}
if (slider_pos == -1)
{
stc_slider->Disable();

View File

@ -67,6 +67,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = false;
g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsPostProcessing = false;
g_Config.backend_info.bSupportsPaletteConversion = true;
@ -75,6 +76,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = false;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
IDXGIFactory* factory;
IDXGIAdapter* ad;

View File

@ -70,6 +70,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = false;
g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsPostProcessing = false;
g_Config.backend_info.bSupportsPaletteConversion = true;
@ -78,6 +79,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = false;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
IDXGIFactory* factory;
IDXGIAdapter* ad;

View File

@ -30,6 +30,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = false;
g_Config.backend_info.bSupportsEarlyZ = true;
g_Config.backend_info.bSupportsBindingLayout = true;
@ -43,6 +44,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = true;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
// aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear();

View File

@ -65,7 +65,7 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo
}
else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
{
if (g_ogl_config.bSupports3DTextureStorage)
if (g_ogl_config.bSupports3DTextureStorageMultisample)
glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth,
m_targetHeight, m_EFBLayers, false);
else
@ -74,7 +74,7 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo
}
else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE)
{
if (g_ogl_config.bSupports2DTextureStorage)
if (g_ogl_config.bSupports2DTextureStorageMultisample)
glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth,
m_targetHeight, false);
else

View File

@ -0,0 +1,105 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/GL/GLExtensions/GLExtensions.h"
#ifndef GL_TIME_ELAPSED
#define GL_TIME_ELAPSED 0x88BF
#endif
namespace OGL
{
/*
* This class can be used to measure the time it takes for the GPU to perform a draw call
* or compute dispatch. To use:
*
* - Create an instance of GPUTimer before issuing the draw call.
* (this can be before or after any binding that needs to be done)
*
* - (optionally) call Begin(). This is not needed for a single draw call.
*
* - Issue the draw call or compute dispatch as normal.
*
* - (optionally) call End(). This is not necessary for a single draw call.
*
* - Call GetTime{Seconds,Milliseconds,Nanoseconds} to determine how long the operation
* took to execute on the GPU.
*
* NOTE: When the timer is read back, this will force a GL flush, so the more often a timer is used,
* the larger of a performance impact it will have. Only one timer can be active at any time, due to
* using GL_TIME_ELAPSED. This is not enforced by the class, however.
*
*/
class GPUTimer final
{
public:
GPUTimer()
{
glGenQueries(1, &m_query_id);
Begin();
}
~GPUTimer()
{
End();
glDeleteQueries(1, &m_query_id);
}
void Begin()
{
if (m_started)
glEndQuery(GL_TIME_ELAPSED);
glBeginQuery(GL_TIME_ELAPSED, m_query_id);
m_started = true;
}
void End()
{
if (!m_started)
return;
glEndQuery(GL_TIME_ELAPSED);
m_started = false;
}
double GetTimeSeconds()
{
GetResult();
return static_cast<double>(m_result) / 1000000000.0;
}
double GetTimeMilliseconds()
{
GetResult();
return static_cast<double>(m_result) / 1000000.0;
}
u32 GetTimeNanoseconds()
{
GetResult();
return m_result;
}
private:
void GetResult()
{
if (m_has_result)
return;
if (m_started)
End();
glGetQueryObjectuiv(m_query_id, GL_QUERY_RESULT, &m_result);
m_has_result = true;
}
GLuint m_query_id;
GLuint m_result = 0;
bool m_started = false;
bool m_has_result = false;
};
} // namespace OGL

View File

@ -53,6 +53,7 @@
<ItemGroup>
<ClInclude Include="BoundingBox.h" />
<ClInclude Include="FramebufferManager.h" />
<ClInclude Include="GPUTimer.h" />
<ClInclude Include="PerfQuery.h" />
<ClInclude Include="PostProcessing.h" />
<ClInclude Include="ProgramShaderCache.h" />

View File

@ -90,6 +90,9 @@
</ClInclude>
<ClInclude Include="SamplerCache.h" />
<ClInclude Include="VideoBackend.h" />
<ClInclude Include="GPUTimer.h">
<Filter>GLUtil</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Text Include="CMakeLists.txt" />

View File

@ -65,6 +65,8 @@ static std::string GetGLSLVersionString()
return "#version 330";
case GLSL_400:
return "#version 400";
case GLSL_430:
return "#version 430";
default:
// Shouldn't ever hit this
return "#version ERROR";
@ -103,7 +105,9 @@ void SHADER::SetProgramVariables()
}
}
void SHADER::SetProgramBindings()
void SHADER::SetProgramBindings(bool is_compute)
{
if (!is_compute)
{
if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
@ -124,6 +128,7 @@ void SHADER::SetProgramBindings()
glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0");
glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1");
glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2");
}
for (int i = 0; i < 8; i++)
{
@ -281,7 +286,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
if (g_ogl_config.bSupportsGLSLCache)
glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
shader.SetProgramBindings();
shader.SetProgramBindings(false);
glLinkProgram(pid);
@ -296,10 +301,10 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
{
GLsizei charsWritten;
GLchar* infoLog = new GLchar[length];
glGetProgramInfoLog(pid, length, &charsWritten, infoLog);
ERROR_LOG(VIDEO, "Program info log:\n%s", infoLog);
std::string info_log;
info_log.resize(length);
glGetProgramInfoLog(pid, length, &length, &info_log[0]);
ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str());
std::string filename =
StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
@ -308,7 +313,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
file << s_glsl_header << vcode << s_glsl_header << pcode;
if (!gcode.empty())
file << s_glsl_header << gcode;
file << infoLog;
file << info_log;
file.close();
if (linkStatus != GL_TRUE)
@ -316,10 +321,8 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
PanicAlert("Failed to link shaders: %s\n"
"Debug info (%s, %s, %s):\n%s",
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
g_ogl_config.gl_version, infoLog);
g_ogl_config.gl_version, info_log.c_str());
}
delete[] infoLog;
}
if (linkStatus != GL_TRUE)
{
@ -336,6 +339,73 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
return true;
}
bool ProgramShaderCache::CompileComputeShader(SHADER& shader, const std::string& code)
{
// We need to enable GL_ARB_compute_shader for drivers that support the extension,
// but not GLSL 4.3. Mesa is one example.
std::string header;
if (g_ActiveConfig.backend_info.bSupportsComputeShaders &&
g_ogl_config.eSupportedGLSLVersion < GLSL_430)
{
header = "#extension GL_ARB_compute_shader : enable\n";
}
GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, header + code);
if (!shader_id)
return false;
GLuint pid = shader.glprogid = glCreateProgram();
glAttachShader(pid, shader_id);
if (g_ogl_config.bSupportsGLSLCache)
glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
shader.SetProgramBindings(true);
glLinkProgram(pid);
// original shaders aren't needed any more
glDeleteShader(shader_id);
GLint linkStatus;
glGetProgramiv(pid, GL_LINK_STATUS, &linkStatus);
GLsizei length = 0;
glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
{
std::string info_log;
info_log.resize(length);
glGetProgramInfoLog(pid, length, &length, &info_log[0]);
ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str());
std::string filename =
StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file;
OpenFStream(file, filename, std::ios_base::out);
file << s_glsl_header << code;
file << info_log;
file.close();
if (linkStatus != GL_TRUE)
{
PanicAlert("Failed to link shaders: %s\n"
"Debug info (%s, %s, %s):\n%s",
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
g_ogl_config.gl_version, info_log.c_str());
}
}
if (linkStatus != GL_TRUE)
{
// Compile failed
ERROR_LOG(VIDEO, "Program linking failed; see info log");
// Don't try to use this shader
glDeleteProgram(pid);
return false;
}
return true;
}
GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& code)
{
GLuint result = glCreateShader(type);
@ -351,31 +421,43 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c
if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
{
GLsizei charsWritten;
GLchar* infoLog = new GLchar[length];
glGetShaderInfoLog(result, length, &charsWritten, infoLog);
ERROR_LOG(VIDEO, "%s Shader info log:\n%s",
type == GL_VERTEX_SHADER ? "VS" : type == GL_FRAGMENT_SHADER ? "PS" : "GS", infoLog);
std::string info_log;
info_log.resize(length);
glGetShaderInfoLog(result, length, &length, &info_log[0]);
const char* prefix = "";
switch (type)
{
case GL_VERTEX_SHADER:
prefix = "vs";
break;
case GL_GEOMETRY_SHADER:
prefix = "gs";
break;
case GL_FRAGMENT_SHADER:
prefix = "ps";
break;
case GL_COMPUTE_SHADER:
prefix = "cs";
break;
}
ERROR_LOG(VIDEO, "%s Shader info log:\n%s", prefix, info_log.c_str());
std::string filename = StringFromFormat(
"%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
type == GL_VERTEX_SHADER ? "vs" : type == GL_FRAGMENT_SHADER ? "ps" : "gs", num_failures++);
"%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), prefix, num_failures++);
std::ofstream file;
OpenFStream(file, filename, std::ios_base::out);
file << s_glsl_header << code << infoLog;
file << s_glsl_header << code << info_log;
file.close();
if (compileStatus != GL_TRUE)
{
PanicAlert("Failed to compile %s shader: %s\n"
"Debug info (%s, %s, %s):\n%s",
type == GL_VERTEX_SHADER ? "vertex" : type == GL_FRAGMENT_SHADER ? "pixel" :
"geometry",
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
g_ogl_config.gl_version, infoLog);
prefix, filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
g_ogl_config.gl_version, info_log.c_str());
}
delete[] infoLog;
}
if (compileStatus != GL_TRUE)
{
@ -539,11 +621,9 @@ void ProgramShaderCache::CreateHeader()
std::string earlyz_string = "";
if (g_ActiveConfig.backend_info.bSupportsEarlyZ)
{
if (g_ogl_config.bSupportsEarlyFragmentTests)
if (g_ogl_config.bSupportsImageLoadStore)
{
earlyz_string = "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n";
if (!is_glsles) // GLES supports this by default
earlyz_string += "#extension GL_ARB_shader_image_load_store : enable\n";
}
else if (g_ogl_config.bSupportsConservativeDepth)
{
@ -569,6 +649,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n" // texture buffer
"%s\n" // ES texture buffer
"%s\n" // ES dual source blend
"%s\n" // shader image load store
// Precision defines for GLSL ES
"%s\n"
@ -576,6 +657,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n"
"%s\n"
"%s\n"
"%s\n"
// Silly differences
"#define float2 vec2\n"
@ -638,12 +720,17 @@ void ProgramShaderCache::CreateHeader()
""
,
g_ogl_config.bSupportsImageLoadStore &&
((!is_glsles && v < GLSL_430) || (is_glsles && v < GLSLES_310)) ?
"#extension GL_ARB_shader_image_load_store : enable" :
"",
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
is_glsles ? "precision highp sampler2DArray;" : "",
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
"precision highp usamplerBuffer;" :
"",
v > GLSLES_300 ? "precision highp sampler2DMS;" : "");
v > GLSLES_300 ? "precision highp sampler2DMS;" : "",
v >= GLSLES_310 ? "precision highp image2DArray;" : "");
}
void ProgramShaderCache::ProgramShaderCacheInserter::Read(const SHADERUID& key, const u8* value,

View File

@ -46,7 +46,7 @@ struct SHADER
std::string strvprog, strpprog, strgprog;
void SetProgramVariables();
void SetProgramBindings();
void SetProgramBindings(bool is_compute);
void Bind();
};
@ -67,6 +67,7 @@ public:
static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode,
const std::string& gcode = "");
static bool CompileComputeShader(SHADER& shader, const std::string& code);
static GLuint CompileSingleShader(GLuint type, const std::string& code);
static void UploadConstants();

View File

@ -451,15 +451,16 @@ Renderer::Renderer()
g_ogl_config.bSupportViewportFloat = GLExtensions::Supports("GL_ARB_viewport_array");
g_ogl_config.bSupportsDebug =
GLExtensions::Supports("GL_KHR_debug") || GLExtensions::Supports("GL_ARB_debug_output");
g_ogl_config.bSupports3DTextureStorage =
g_ogl_config.bSupportsTextureStorage = GLExtensions::Supports("GL_ARB_texture_storage");
g_ogl_config.bSupports3DTextureStorageMultisample =
GLExtensions::Supports("GL_ARB_texture_storage_multisample") ||
GLExtensions::Supports("GL_OES_texture_storage_multisample_2d_array");
g_ogl_config.bSupports2DTextureStorage =
g_ogl_config.bSupports2DTextureStorageMultisample =
GLExtensions::Supports("GL_ARB_texture_storage_multisample");
g_ogl_config.bSupportsEarlyFragmentTests =
GLExtensions::Supports("GL_ARB_shader_image_load_store");
g_ogl_config.bSupportsImageLoadStore = GLExtensions::Supports("GL_ARB_shader_image_load_store");
g_ogl_config.bSupportsConservativeDepth = GLExtensions::Supports("GL_ARB_conservative_depth");
g_ogl_config.bSupportsAniso = GLExtensions::Supports("GL_EXT_texture_filter_anisotropic");
g_Config.backend_info.bSupportsComputeShaders = GLExtensions::Supports("GL_ARB_compute_shader");
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
{
@ -486,6 +487,7 @@ Renderer::Renderer()
{
g_ogl_config.eSupportedGLSLVersion = GLSLES_300;
g_ogl_config.bSupportsAEP = false;
g_ogl_config.bSupportsTextureStorage = true;
g_Config.backend_info.bSupportsGeometryShaders = false;
}
else if (GLExtensions::Version() == 310)
@ -493,16 +495,18 @@ Renderer::Renderer()
g_ogl_config.eSupportedGLSLVersion = GLSLES_310;
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
g_Config.backend_info.bSupportsBindingLayout = true;
g_ogl_config.bSupportsEarlyFragmentTests = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsGeometryShaders = g_ogl_config.bSupportsAEP;
g_Config.backend_info.bSupportsComputeShaders = true;
g_Config.backend_info.bSupportsGSInstancing =
g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0;
g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP;
g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true;
g_ogl_config.bSupportsMSAA = true;
g_ogl_config.bSupports2DTextureStorage = true;
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupports2DTextureStorageMultisample = true;
if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 &&
!g_ogl_config.bSupports3DTextureStorage)
!g_ogl_config.bSupports3DTextureStorageMultisample)
{
// GLES 3.1 can't support stereo rendering and MSAA
OSD::AddMessage("MSAA Stereo rendering isn't supported by your GPU.", 10000);
@ -514,8 +518,9 @@ Renderer::Renderer()
g_ogl_config.eSupportedGLSLVersion = GLSLES_320;
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
g_Config.backend_info.bSupportsBindingLayout = true;
g_ogl_config.bSupportsEarlyFragmentTests = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = true;
g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0;
g_Config.backend_info.bSupportsPaletteConversion = true;
g_Config.backend_info.bSupportsSSAA = true;
@ -524,8 +529,9 @@ Renderer::Renderer()
g_ogl_config.bSupportsGLBaseVertex = true;
g_ogl_config.bSupportsDebug = true;
g_ogl_config.bSupportsMSAA = true;
g_ogl_config.bSupports2DTextureStorage = true;
g_ogl_config.bSupports3DTextureStorage = true;
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupports2DTextureStorageMultisample = true;
g_ogl_config.bSupports3DTextureStorageMultisample = true;
}
}
else
@ -541,8 +547,7 @@ Renderer::Renderer()
else if (GLExtensions::Version() == 300)
{
g_ogl_config.eSupportedGLSLVersion = GLSL_130;
g_ogl_config.bSupportsEarlyFragmentTests =
false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsConservativeDepth =
false; // layout keyword is only supported on glsl150+
g_Config.backend_info.bSupportsGeometryShaders =
@ -551,8 +556,7 @@ Renderer::Renderer()
else if (GLExtensions::Version() == 310)
{
g_ogl_config.eSupportedGLSLVersion = GLSL_140;
g_ogl_config.bSupportsEarlyFragmentTests =
false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsConservativeDepth =
false; // layout keyword is only supported on glsl150+
g_Config.backend_info.bSupportsGeometryShaders =
@ -566,10 +570,28 @@ Renderer::Renderer()
{
g_ogl_config.eSupportedGLSLVersion = GLSL_330;
}
else if (GLExtensions::Version() >= 430)
{
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
g_ogl_config.eSupportedGLSLVersion = GLSL_430;
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsSSAA = true;
// Compute shaders are core in GL4.3.
g_Config.backend_info.bSupportsComputeShaders = true;
}
else
{
g_ogl_config.eSupportedGLSLVersion = GLSL_400;
g_Config.backend_info.bSupportsSSAA = true;
if (GLExtensions::Version() == 420)
{
// Texture storage and shader image load/store are core in GL4.2.
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
}
}
// Desktop OpenGL can't have the Android Extension Pack
@ -578,12 +600,19 @@ Renderer::Renderer()
// Either method can do early-z tests. See PixelShaderGen for details.
g_Config.backend_info.bSupportsEarlyZ =
g_ogl_config.bSupportsEarlyFragmentTests || g_ogl_config.bSupportsConservativeDepth;
g_ogl_config.bSupportsImageLoadStore || g_ogl_config.bSupportsConservativeDepth;
glGetIntegerv(GL_MAX_SAMPLES, &g_ogl_config.max_samples);
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
g_ogl_config.max_samples = 1;
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
// enabled in the version check below.
g_Config.backend_info.bSupportsGPUTextureDecoding =
g_Config.backend_info.bSupportsPaletteConversion &&
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
if (g_ogl_config.bSupportsDebug)
{
if (GLExtensions::Supports("GL_KHR_debug"))

View File

@ -23,6 +23,7 @@ enum GLSL_VERSION
GLSL_150,
GLSL_330,
GLSL_400, // and above
GLSL_430,
GLSLES_300, // GLES 3.0
GLSLES_310, // GLES 3.1
GLSLES_320, // GLES 3.2
@ -51,10 +52,11 @@ struct VideoConfig
bool bSupportsCopySubImage;
u8 SupportedESPointSize;
ES_TEXBUF_TYPE SupportedESTextureBuffer;
bool bSupports2DTextureStorage;
bool bSupports3DTextureStorage;
bool bSupportsEarlyFragmentTests;
bool bSupportsTextureStorage;
bool bSupports2DTextureStorageMultisample;
bool bSupports3DTextureStorageMultisample;
bool bSupportsConservativeDepth;
bool bSupportsImageLoadStore;
bool bSupportsAniso;
const char* gl_vendor;

View File

@ -16,6 +16,7 @@
#include "Common/StringUtil.h"
#include "VideoBackends/OGL/FramebufferManager.h"
#include "VideoBackends/OGL/GPUTimer.h"
#include "VideoBackends/OGL/ProgramShaderCache.h"
#include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/SamplerCache.h"
@ -23,6 +24,7 @@
#include "VideoBackends/OGL/TextureConverter.h"
#include "VideoCommon/ImageWrite.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoConfig.h"
@ -49,6 +51,26 @@ static GLuint s_palette_buffer_offset_uniform[3];
static GLuint s_palette_multiplier_uniform[3];
static GLuint s_palette_copy_position_uniform[3];
struct TextureDecodingProgramInfo
{
const TextureConversionShader::DecodingShaderInfo* base_info = nullptr;
SHADER program;
GLint uniform_dst_size = -1;
GLint uniform_src_size = -1;
GLint uniform_src_row_stride = -1;
GLint uniform_src_offset = -1;
GLint uniform_palette_offset = -1;
bool valid = false;
};
//#define TIME_TEXTURE_DECODING 1
static std::map<std::pair<u32, u32>, TextureDecodingProgramInfo> s_texture_decoding_program_info;
static std::array<GLuint, TextureConversionShader::BUFFER_FORMAT_COUNT>
s_texture_decoding_buffer_views;
static void CreateTextureDecodingResources();
static void DestroyTextureDecodingResources();
bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width,
int virtual_height, unsigned int level)
{
@ -119,12 +141,22 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConf
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAX_LEVEL, config.levels - 1);
if (g_ogl_config.bSupportsTextureStorage)
{
glTexStorage3D(GL_TEXTURE_2D_ARRAY, config.levels, GL_RGBA8, config.width, config.height,
config.layers);
}
if (config.rendertarget)
{
for (u32 level = 0; level <= config.levels; level++)
if (!g_ogl_config.bSupportsTextureStorage)
{
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, config.width, config.height, config.layers,
0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
for (u32 level = 0; level < config.levels; level++)
{
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, std::max(config.width >> level, 1u),
std::max(config.height >> level, 1u), config.layers, 0, GL_RGBA,
GL_UNSIGNED_BYTE, nullptr);
}
}
glGenFramebuffers(1, &entry->framebuffer);
FramebufferManager::SetFramebuffer(entry->framebuffer);
@ -187,8 +219,16 @@ void TextureCache::TCacheEntry::Load(const u8* buffer, u32 width, u32 height, u3
if (expanded_width != width)
glPixelStorei(GL_UNPACK_ROW_LENGTH, expanded_width);
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, width, height, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE,
buffer);
if (g_ogl_config.bSupportsTextureStorage)
{
glTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, GL_RGBA,
GL_UNSIGNED_BYTE, buffer);
}
else
{
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, width, height, 1, 0, GL_RGBA,
GL_UNSIGNED_BYTE, buffer);
}
if (expanded_width != width)
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
@ -267,26 +307,31 @@ TextureCache::TextureCache()
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
s32 buffer_size = 1024 * 1024;
s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1);
s32 buffer_size = buffer_size_mb * 1024 * 1024;
s32 max_buffer_size = 0;
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates
// is 65KB, we are asking for a 1MB buffer here.
// Make sure to check the maximum size and if it is below 1MB
// then use the maximum the hardware supports instead.
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB
// buffer here. This buffer is also used as storage for undecoded textures when compute shader
// texture decoding is enabled, in which case the requested size is 32MB.
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
// Clamp the buffer size to the maximum size that the driver supports.
buffer_size = std::min(buffer_size, max_buffer_size);
s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size);
glGenTextures(1, &s_palette_resolv_texture);
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer);
CreateTextureDecodingResources();
}
}
TextureCache::~TextureCache()
{
DeleteShaders();
DestroyTextureDecodingResources();
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{
@ -588,4 +633,159 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc
FramebufferManager::SetFramebuffer(0);
g_renderer->RestoreAPIState();
}
static const std::string decoding_vertex_shader = R"(
void main()
{
vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);
gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);
}
)";
void CreateTextureDecodingResources()
{
static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = {
GL_R8UI, // BUFFER_FORMAT_R8_UINT
GL_R16UI, // BUFFER_FORMAT_R16_UINT
GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT
};
glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
s_texture_decoding_buffer_views.data());
for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++)
{
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]);
glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer);
}
}
void DestroyTextureDecodingResources()
{
glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
s_texture_decoding_buffer_views.data());
s_texture_decoding_buffer_views.fill(0);
s_texture_decoding_program_info.clear();
}
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
{
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key);
if (iter != s_texture_decoding_program_info.end())
return iter->second.valid;
TextureDecodingProgramInfo info;
info.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
if (!info.base_info)
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
std::string shader_source =
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
if (shader_source.empty())
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source))
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size");
info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size");
info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset");
info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride");
info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset");
info.valid = true;
s_texture_decoding_program_info.emplace(key, info);
return true;
}
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
{
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key);
if (iter == s_texture_decoding_program_info.end())
return;
#ifdef TIME_TEXTURE_DECODING
GPUTimer timer;
#endif
// Copy to GPU-visible buffer, aligned to the data type.
auto info = iter->second;
u32 bytes_per_buffer_elem =
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
// Only copy palette if it is required.
bool has_palette = info.base_info->palette_size > 0;
u32 total_upload_size = static_cast<u32>(data_size);
u32 palette_offset = total_upload_size;
if (has_palette)
{
// Align to u16.
if ((total_upload_size % sizeof(u16)) != 0)
{
total_upload_size++;
palette_offset++;
}
total_upload_size += info.base_info->palette_size;
}
// Allocate space in stream buffer, and copy texture + palette across.
auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem);
memcpy(buffer.first, data, data_size);
if (has_palette)
memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size);
s_palette_stream_buffer->Unmap(total_upload_size);
info.program.Bind();
// Calculate stride in buffer elements
u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem;
u32 offset_in_elements = buffer.second / bytes_per_buffer_elem;
u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16);
if (info.uniform_dst_size >= 0)
glUniform2ui(info.uniform_dst_size, width, height);
if (info.uniform_src_size >= 0)
glUniform2ui(info.uniform_src_size, aligned_width, aligned_height);
if (info.uniform_src_offset >= 0)
glUniform1ui(info.uniform_src_offset, offset_in_elements);
if (info.uniform_src_row_stride >= 0)
glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements);
if (info.uniform_palette_offset >= 0)
glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements);
glActiveTexture(GL_TEXTURE9);
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]);
if (has_palette)
{
// Use an R16UI view for the palette.
glActiveTexture(GL_TEXTURE10);
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
}
auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, width, height);
glBindImageTexture(0, static_cast<TCacheEntry*>(entry)->texture, dst_level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1);
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
TextureCache::SetStage();
#ifdef TIME_TEXTURE_DECODING
WARN_LOG(VIDEO, "Decode texture format %u size %ux%u took %.4fms", static_cast<u32>(format),
width, height, timer.GetTimeMilliseconds());
#endif
}
}

View File

@ -23,6 +23,12 @@ public:
static void DisableStage(unsigned int stage);
static void SetStage();
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override;
private:
struct TCacheEntry : TCacheEntryBase
{

View File

@ -101,6 +101,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsExclusiveFullscreen = false;
g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = false;
g_Config.backend_info.bSupportsPostProcessing = true;
g_Config.backend_info.bSupportsSSAA = true;
@ -108,6 +109,11 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true;
// TODO: There is a bug here, if texel buffers are not supported the graphics options
// will show the option when it is not supported. The only way around this would be
// creating a context when calling this function to determine what is available.
g_Config.backend_info.bSupportsGPUTextureDecoding = true;
// Overwritten in Render.cpp later
g_Config.backend_info.bSupportsDualSourceBlend = true;
g_Config.backend_info.bSupportsPrimitiveRestart = true;

View File

@ -131,7 +131,9 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsPrimitiveRestart = false;
g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
// aamodes
g_Config.backend_info.AAModes = {1};

View File

@ -91,7 +91,8 @@ bool CommandBufferManager::CreateCommandBuffers()
VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000},
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024}};
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024}};
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
nullptr,

View File

@ -30,6 +30,7 @@ enum DESCRIPTOR_SET_LAYOUT
DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS,
DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS,
DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS,
DESCRIPTOR_SET_LAYOUT_COMPUTE,
NUM_DESCRIPTOR_SET_LAYOUTS
};
@ -52,6 +53,12 @@ enum DESCRIPTOR_SET_BIND_POINT
// - Same as standard, plus 128 bytes of push constants, accessible from all stages.
// - Texture Decoding
// - Same as push constant, plus a single texel buffer accessible from PS.
// - Compute
// - 1 uniform buffer [set=0, binding=0]
// - 4 combined image samplers [set=0, binding=1-4]
// - 1 texel buffer [set=0, binding=5]
// - 1 storage image [set=0, binding=6]
// - 128 bytes of push constants
//
// All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers).
// The third descriptor set (see bind points above) is used for storage or texel buffers.
@ -62,6 +69,7 @@ enum PIPELINE_LAYOUT
PIPELINE_LAYOUT_BBOX,
PIPELINE_LAYOUT_PUSH_CONSTANT,
PIPELINE_LAYOUT_TEXTURE_CONVERSION,
PIPELINE_LAYOUT_COMPUTE,
NUM_PIPELINE_LAYOUTS
};

View File

@ -324,6 +324,41 @@ std::pair<VkPipeline, bool> ObjectCache::GetPipelineWithCacheResult(const Pipeli
return {pipeline, false};
}
VkPipeline ObjectCache::CreateComputePipeline(const ComputePipelineInfo& info)
{
VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
nullptr,
0,
{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, info.cs,
"main", nullptr},
info.pipeline_layout,
VK_NULL_HANDLE,
-1};
VkPipeline pipeline;
VkResult res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), VK_NULL_HANDLE, 1,
&pipeline_info, nullptr, &pipeline);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: ");
return VK_NULL_HANDLE;
}
return pipeline;
}
VkPipeline ObjectCache::GetComputePipeline(const ComputePipelineInfo& info)
{
auto iter = m_compute_pipeline_objects.find(info);
if (iter != m_compute_pipeline_objects.end())
return iter->second;
VkPipeline pipeline = CreateComputePipeline(info);
m_compute_pipeline_objects.emplace(info, pipeline);
return pipeline;
}
std::string ObjectCache::GetDiskCacheFileName(const char* type)
{
return StringFromFormat("%svulkan-%s-%s.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
@ -477,6 +512,13 @@ void ObjectCache::DestroyPipelineCache()
}
m_pipeline_objects.clear();
for (const auto& it : m_compute_pipeline_objects)
{
if (it.second != VK_NULL_HANDLE)
vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr);
}
m_compute_pipeline_objects.clear();
vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr);
m_pipeline_cache = VK_NULL_HANDLE;
}
@ -725,6 +767,17 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
};
static const VkDescriptorSetLayoutBinding compute_set_bindings[] = {
{0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{5, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{6, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT},
};
static const VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = {
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(ubo_set_bindings)), ubo_set_bindings},
@ -733,7 +786,9 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(ssbo_set_bindings)), ssbo_set_bindings},
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings}};
static_cast<u32>(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings},
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(compute_set_bindings)), compute_set_bindings}};
for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++)
{
@ -774,8 +829,11 @@ bool ObjectCache::CreatePipelineLayouts()
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UNIFORM_BUFFERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS]};
VkDescriptorSetLayout compute_sets[] = {m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]};
VkPushConstantRange push_constant_range = {
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE};
VkPushConstantRange compute_push_constant_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0,
PUSH_CONSTANT_BUFFER_SIZE};
// Info for each pipeline layout
VkPipelineLayoutCreateInfo pipeline_layout_info[NUM_PIPELINE_LAYOUTS] = {
@ -794,7 +852,11 @@ bool ObjectCache::CreatePipelineLayouts()
// Texture Conversion
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(texture_conversion_sets)), texture_conversion_sets, 1,
&push_constant_range}};
&push_constant_range},
// Compute
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(compute_sets)), compute_sets, 1, &compute_push_constant_range}};
for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++)
{
@ -1007,6 +1069,31 @@ bool operator<(const SamplerState& lhs, const SamplerState& rhs)
return lhs.bits < rhs.bits;
}
std::size_t ComputePipelineInfoHash::operator()(const ComputePipelineInfo& key) const
{
return static_cast<std::size_t>(XXH64(&key, sizeof(key), 0));
}
bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
{
return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
}
bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
{
return !operator==(lhs, rhs);
}
bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
{
return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0;
}
bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
{
return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0;
}
bool ObjectCache::CompileSharedShaders()
{
static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"(

View File

@ -56,6 +56,22 @@ bool operator!=(const SamplerState& lhs, const SamplerState& rhs);
bool operator>(const SamplerState& lhs, const SamplerState& rhs);
bool operator<(const SamplerState& lhs, const SamplerState& rhs);
struct ComputePipelineInfo
{
VkPipelineLayout pipeline_layout;
VkShaderModule cs;
};
struct ComputePipelineInfoHash
{
std::size_t operator()(const ComputePipelineInfo& key) const;
};
bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
class ObjectCache
{
public:
@ -114,6 +130,12 @@ public:
// otherwise for a cache hit it will be true.
std::pair<VkPipeline, bool> GetPipelineWithCacheResult(const PipelineInfo& info);
// Creates a compute pipeline, and does not track the handle.
VkPipeline CreateComputePipeline(const ComputePipelineInfo& info);
// Find a pipeline by the specified description, if not found, attempts to create it
VkPipeline GetComputePipeline(const ComputePipelineInfo& info);
// Saves the pipeline cache to disk. Call when shutting down.
void SavePipelineCache();
@ -166,6 +188,8 @@ private:
ShaderCache<PixelShaderUid> m_ps_cache;
std::unordered_map<PipelineInfo, VkPipeline, PipelineInfoHash> m_pipeline_objects;
std::unordered_map<ComputePipelineInfo, VkPipeline, ComputePipelineInfoHash>
m_compute_pipeline_objects;
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
std::string m_pipeline_cache_filename;

View File

@ -35,7 +35,7 @@ static const TBuiltInResource* GetCompilerResourceLimits();
// Compile a shader to SPIR-V via glslang
static bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage,
const char* stage_filename, const char* source_code,
size_t source_code_length, bool prepend_header);
size_t source_code_length, const char* header, size_t header_length);
// Regarding the UBO bind points, we subtract one from the binding index because
// the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV).
@ -73,9 +73,32 @@ static const char SHADER_HEADER[] = R"(
#define gl_VertexID gl_VertexIndex
#define gl_InstanceID gl_InstanceIndex
)";
static const char COMPUTE_SHADER_HEADER[] = R"(
// Target GLSL 4.5.
#version 450 core
// All resources are packed into one descriptor set for compute.
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (0 + x))
#define SAMPLER_BINDING(x) layout(set = 0, binding = (1 + x))
#define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (5 + x))
#define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (7 + x))
// hlsl to glsl function translation
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define frac fract
#define lerp mix
)";
bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename,
const char* source_code, size_t source_code_length, bool prepend_header)
const char* source_code, size_t source_code_length, const char* header,
size_t header_length)
{
if (!InitializeGlslang())
return false;
@ -91,10 +114,10 @@ bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char
std::string full_source_code;
const char* pass_source_code = source_code;
int pass_source_code_length = static_cast<int>(source_code_length);
if (prepend_header)
if (header_length > 0)
{
full_source_code.reserve(sizeof(SHADER_HEADER) + source_code_length);
full_source_code.append(SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
full_source_code.reserve(header_length + source_code_length);
full_source_code.append(header, header_length);
full_source_code.append(source_code, source_code_length);
pass_source_code = full_source_code.c_str();
pass_source_code_length = static_cast<int>(full_source_code.length());
@ -318,21 +341,28 @@ bool CompileVertexShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header)
{
return CompileShaderToSPV(out_code, EShLangVertex, "vs", source_code, source_code_length,
prepend_header);
SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
}
bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header)
{
return CompileShaderToSPV(out_code, EShLangGeometry, "gs", source_code, source_code_length,
prepend_header);
SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
}
bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header)
{
return CompileShaderToSPV(out_code, EShLangFragment, "ps", source_code, source_code_length,
prepend_header);
SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
}
bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header)
{
return CompileShaderToSPV(out_code, EShLangCompute, "cs", source_code, source_code_length,
COMPUTE_SHADER_HEADER, sizeof(COMPUTE_SHADER_HEADER) - 1);
}
} // namespace ShaderCompiler

View File

@ -29,5 +29,9 @@ bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code,
bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header = true);
// Compile a compute shader to SPIR-V.
bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header = true);
} // namespace ShaderCompiler
} // namespace Vulkan

View File

@ -4,6 +4,7 @@
#include <algorithm>
#include "Common/Assert.h"
#include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/Texture2D.h"
#include "VideoBackends/Vulkan/VulkanContext.h"
@ -273,10 +274,132 @@ void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout
break;
}
// If we were using a compute layout, the stages need to reflect that
switch (m_compute_layout)
{
case ComputeImageLayout::Undefined:
break;
case ComputeImageLayout::ReadOnly:
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case ComputeImageLayout::WriteOnly:
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case ComputeImageLayout::ReadWrite:
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
}
m_compute_layout = ComputeImageLayout::Undefined;
vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1,
&barrier);
m_layout = new_layout;
}
void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout)
{
_assert_(new_layout != ComputeImageLayout::Undefined);
if (m_compute_layout == new_layout)
return;
VkImageMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
nullptr, // const void* pNext
0, // VkAccessFlags srcAccessMask
0, // VkAccessFlags dstAccessMask
m_layout, // VkImageLayout oldLayout
VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
m_image, // VkImage image
{static_cast<VkImageAspectFlags>(Util::IsDepthFormat(m_format) ? VK_IMAGE_ASPECT_DEPTH_BIT :
VK_IMAGE_ASPECT_COLOR_BIT),
0, m_levels, 0, m_layers} // VkImageSubresourceRange subresourceRange
};
VkPipelineStageFlags srcStageMask, dstStageMask;
switch (m_layout)
{
case VK_IMAGE_LAYOUT_UNDEFINED:
// Layout undefined therefore contents undefined, and we don't care what happens to it.
barrier.srcAccessMask = 0;
srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
break;
case VK_IMAGE_LAYOUT_PREINITIALIZED:
// Image has been pre-initialized by the host, so ensure all writes have completed.
barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_HOST_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
// Image was being used as a color attachment, so ensure all writes have completed.
barrier.srcAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
barrier.srcAccessMask =
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
// Image was being used as a shader resource, make sure all reads have finished.
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
// Image was being used as a copy source, ensure all reads have finished.
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
// Image was being used as a copy destination, ensure all writes have finished.
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
default:
srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
break;
}
switch (new_layout)
{
case ComputeImageLayout::ReadOnly:
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case ComputeImageLayout::WriteOnly:
barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case ComputeImageLayout::ReadWrite:
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
default:
dstStageMask = 0;
break;
}
m_layout = barrier.newLayout;
m_compute_layout = new_layout;
vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1,
&barrier);
}
} // namespace Vulkan

View File

@ -17,6 +17,15 @@ class ObjectCache;
class Texture2D
{
public:
// Custom image layouts, mainly used for switching to/from compute
enum class ComputeImageLayout
{
Undefined,
ReadOnly,
WriteOnly,
ReadWrite
};
Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format,
VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image,
VkDeviceMemory device_memory, VkImageView view);
@ -50,6 +59,7 @@ public:
void OverrideImageLayout(VkImageLayout new_layout);
void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout);
void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout);
private:
u32 m_width;
@ -60,6 +70,7 @@ private:
VkSampleCountFlagBits m_samples;
VkImageViewType m_view_type;
VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED;
ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined;
VkImage m_image;
VkDeviceMemory m_device_memory;

View File

@ -138,6 +138,21 @@ void TextureCache::CopyRectangleFromTexture(TCacheEntry* dst_texture,
ScaleTextureRectangle(dst_texture, dst_rect, src_texture, src_rect);
}
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
{
return m_texture_converter->SupportsTextureDecoding(format, palette_format);
}
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
{
m_texture_converter->DecodeTexture(static_cast<TCacheEntry*>(entry), dst_level, data, data_size,
format, width, height, aligned_width, aligned_height,
row_stride, palette, palette_format);
}
void TextureCache::CopyTextureRectangle(TCacheEntry* dst_texture,
const MathUtil::Rectangle<int>& dst_rect,
Texture2D* src_texture,

View File

@ -66,6 +66,13 @@ public:
void CopyRectangleFromTexture(TCacheEntry* dst_texture, const MathUtil::Rectangle<int>& dst_rect,
Texture2D* src_texture, const MathUtil::Rectangle<int>& src_rect);
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override;
private:
bool CreateRenderPasses();

View File

@ -42,8 +42,12 @@ TextureConverter::~TextureConverter()
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it, nullptr);
}
if (m_texel_buffer_view_r8_uint != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r8_uint, nullptr);
if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr);
if (m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r32g32_uint, nullptr);
if (m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_unorm, nullptr);
@ -59,6 +63,12 @@ TextureConverter::~TextureConverter()
vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr);
}
for (const auto& it : m_decoding_pipelines)
{
if (it.second.compute_shader != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.compute_shader, nullptr);
}
if (m_rgb_to_yuyv_shader != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_rgb_to_yuyv_shader, nullptr);
if (m_yuyv_to_rgb_shader != VK_NULL_HANDLE)
@ -103,6 +113,12 @@ bool TextureConverter::Initialize()
return false;
}
if (!CreateDecodingTexture())
{
PanicAlert("Failed to create decoding texture");
return false;
}
if (!CompileYUYVConversionShaders())
{
PanicAlert("Failed to compile YUYV conversion shaders");
@ -371,6 +387,152 @@ void TextureConverter::DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* ds
draw.EndRenderPass();
}
bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format)
{
auto key = std::make_pair(format, palette_format);
auto iter = m_decoding_pipelines.find(key);
if (iter != m_decoding_pipelines.end())
return iter->second.valid;
TextureDecodingPipeline pipeline;
pipeline.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
pipeline.compute_shader = VK_NULL_HANDLE;
pipeline.valid = false;
if (!pipeline.base_info)
{
m_decoding_pipelines.emplace(key, pipeline);
return false;
}
std::string shader_source =
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::Vulkan);
pipeline.compute_shader = Util::CompileAndCreateComputeShader(shader_source, true);
if (pipeline.compute_shader == VK_NULL_HANDLE)
{
m_decoding_pipelines.emplace(key, pipeline);
return false;
}
pipeline.valid = true;
m_decoding_pipelines.emplace(key, pipeline);
return true;
}
void TextureConverter::DecodeTexture(TextureCache::TCacheEntry* entry, u32 dst_level,
const u8* data, size_t data_size, TextureFormat format,
u32 width, u32 height, u32 aligned_width, u32 aligned_height,
u32 row_stride, const u8* palette, TlutFormat palette_format)
{
auto key = std::make_pair(format, palette_format);
auto iter = m_decoding_pipelines.find(key);
if (iter == m_decoding_pipelines.end())
return;
struct PushConstants
{
u32 dst_size[2];
u32 src_size[2];
u32 src_offset;
u32 src_row_stride;
u32 palette_offset;
};
// Copy to GPU-visible buffer, aligned to the data type
auto info = iter->second;
u32 bytes_per_buffer_elem =
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
// Calculate total data size, including palette.
// Only copy palette if it is required.
u32 total_upload_size = static_cast<u32>(data_size);
u32 palette_size = iter->second.base_info->palette_size;
u32 palette_offset = total_upload_size;
bool has_palette = palette_size > 0;
if (has_palette)
{
// Align to u16.
if ((total_upload_size % sizeof(u16)) != 0)
{
total_upload_size++;
palette_offset++;
}
total_upload_size += palette_size;
}
// Allocate space for upload, if it fails, execute the buffer.
if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem))
{
Util::ExecuteCurrentCommandsAndRestoreState(true, false);
if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem))
PanicAlert("Failed to reserve memory for encoded texture upload");
}
// Copy/commit upload buffer.
u32 texel_buffer_offset = static_cast<u32>(m_texel_buffer->GetCurrentOffset());
std::memcpy(m_texel_buffer->GetCurrentHostPointer(), data, data_size);
if (has_palette)
std::memcpy(m_texel_buffer->GetCurrentHostPointer() + palette_offset, palette, palette_size);
m_texel_buffer->CommitMemory(total_upload_size);
// Determine uniforms.
PushConstants constants = {
{width, height},
{aligned_width, aligned_height},
texel_buffer_offset / bytes_per_buffer_elem,
row_stride / bytes_per_buffer_elem,
static_cast<u32>((texel_buffer_offset + palette_offset) / sizeof(u16))};
// Determine view to use for texel buffers.
VkBufferView data_view = VK_NULL_HANDLE;
switch (iter->second.base_info->buffer_format)
{
case TextureConversionShader::BUFFER_FORMAT_R8_UINT:
data_view = m_texel_buffer_view_r8_uint;
break;
case TextureConversionShader::BUFFER_FORMAT_R16_UINT:
data_view = m_texel_buffer_view_r16_uint;
break;
case TextureConversionShader::BUFFER_FORMAT_R32G32_UINT:
data_view = m_texel_buffer_view_r32g32_uint;
break;
default:
break;
}
// Place compute shader dispatches together in the init command buffer.
// That way we don't have to pay a penalty for switching from graphics->compute,
// or end/restart our render pass.
VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentInitCommandBuffer();
// Dispatch compute to temporary texture.
ComputeShaderDispatcher dispatcher(command_buffer,
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE),
iter->second.compute_shader);
m_decoding_texture->TransitionToLayout(command_buffer, Texture2D::ComputeImageLayout::WriteOnly);
dispatcher.SetPushConstants(&constants, sizeof(constants));
dispatcher.SetStorageImage(m_decoding_texture->GetView(), m_decoding_texture->GetLayout());
dispatcher.SetTexelBuffer(0, data_view);
if (has_palette)
dispatcher.SetTexelBuffer(1, m_texel_buffer_view_r16_uint);
auto groups = TextureConversionShader::GetDispatchCount(iter->second.base_info, width, height);
dispatcher.Dispatch(groups.first, groups.second, 1);
// Copy from temporary texture to final destination.
m_decoding_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
entry->GetTexture()->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkImageCopy image_copy = {{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1},
{0, 0, 0},
{VK_IMAGE_ASPECT_COLOR_BIT, dst_level, 0, 1},
{0, 0, 0},
{width, height, 1}};
vkCmdCopyImage(command_buffer, m_decoding_texture->GetImage(),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, entry->GetTexture()->GetImage(),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy);
}
bool TextureConverter::CreateTexelBuffer()
{
// Prefer an 8MB buffer if possible, but use less if the device doesn't support this.
@ -386,9 +548,13 @@ bool TextureConverter::CreateTexelBuffer()
return false;
// Create views of the formats that we will be using.
m_texel_buffer_view_r8_uint = CreateTexelBufferView(VK_FORMAT_R8_UINT);
m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT);
m_texel_buffer_view_r32g32_uint = CreateTexelBufferView(VK_FORMAT_R32G32_UINT);
m_texel_buffer_view_rgba8_unorm = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UNORM);
return m_texel_buffer_view_r16_uint != VK_NULL_HANDLE &&
return m_texel_buffer_view_r8_uint != VK_NULL_HANDLE &&
m_texel_buffer_view_r16_uint != VK_NULL_HANDLE &&
m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE &&
m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE;
}
@ -611,6 +777,15 @@ bool TextureConverter::CreateEncodingDownloadTexture()
return m_encoding_download_texture && m_encoding_download_texture->Map();
}
bool TextureConverter::CreateDecodingTexture()
{
m_decoding_texture = Texture2D::Create(
DECODING_TEXTURE_WIDTH, DECODING_TEXTURE_HEIGHT, 1, 1, VK_FORMAT_R8G8B8A8_UNORM,
VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
return static_cast<bool>(m_decoding_texture);
}
bool TextureConverter::CompileYUYVConversionShaders()
{
static const char RGB_TO_YUYV_SHADER_SOURCE[] = R"(

View File

@ -5,11 +5,14 @@
#pragma once
#include <array>
#include <map>
#include <memory>
#include <utility>
#include "Common/CommonTypes.h"
#include "VideoBackends/Vulkan/StreamBuffer.h"
#include "VideoBackends/Vulkan/TextureCache.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoCommon.h"
@ -45,6 +48,12 @@ public:
void DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* dst_texture, const void* src_ptr,
u32 src_width, u32 src_stride, u32 src_height);
bool SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format);
void DecodeTexture(TextureCache::TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format);
private:
static const u32 NUM_TEXTURE_ENCODING_SHADERS = 64;
static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4;
@ -52,6 +61,10 @@ private:
static const VkFormat ENCODING_TEXTURE_FORMAT = VK_FORMAT_B8G8R8A8_UNORM;
static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3;
// Maximum size of a texture based on BP registers.
static const u32 DECODING_TEXTURE_WIDTH = 1024;
static const u32 DECODING_TEXTURE_HEIGHT = 1024;
bool CreateTexelBuffer();
VkBufferView CreateTexelBufferView(VkFormat format) const;
@ -62,6 +75,8 @@ private:
bool CreateEncodingTexture();
bool CreateEncodingDownloadTexture();
bool CreateDecodingTexture();
bool CompileYUYVConversionShaders();
// Allocates storage in the texel command buffer of the specified size.
@ -77,7 +92,9 @@ private:
// Shared between conversion types
std::unique_ptr<StreamBuffer> m_texel_buffer;
VkBufferView m_texel_buffer_view_r8_uint = VK_NULL_HANDLE;
VkBufferView m_texel_buffer_view_r16_uint = VK_NULL_HANDLE;
VkBufferView m_texel_buffer_view_r32g32_uint = VK_NULL_HANDLE;
VkBufferView m_texel_buffer_view_rgba8_unorm = VK_NULL_HANDLE;
size_t m_texel_buffer_size = 0;
@ -91,6 +108,16 @@ private:
VkFramebuffer m_encoding_render_framebuffer = VK_NULL_HANDLE;
std::unique_ptr<StagingTexture2D> m_encoding_download_texture;
// Texture decoding - GX format in memory->RGBA8
struct TextureDecodingPipeline
{
const TextureConversionShader::DecodingShaderInfo* base_info;
VkShaderModule compute_shader;
bool valid;
};
std::map<std::pair<TextureFormat, TlutFormat>, TextureDecodingPipeline> m_decoding_pipelines;
std::unique_ptr<Texture2D> m_decoding_texture;
// XFB encoding/decoding shaders
VkShaderModule m_rgb_to_yuyv_shader = VK_NULL_HANDLE;
VkShaderModule m_yuyv_to_rgb_shader = VK_NULL_HANDLE;

View File

@ -250,6 +250,18 @@ VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code, bo
return CreateShaderModule(code.data(), code.size());
}
VkShaderModule CompileAndCreateComputeShader(const std::string& source_code, bool prepend_header)
{
ShaderCompiler::SPIRVCodeVector code;
if (!ShaderCompiler::CompileComputeShader(&code, source_code.c_str(), source_code.length(),
prepend_header))
{
return VK_NULL_HANDLE;
}
return CreateShaderModule(code.data(), code.size());
}
} // namespace Util
UtilityShaderDraw::UtilityShaderDraw(VkCommandBuffer command_buffer,
@ -670,4 +682,157 @@ bool UtilityShaderDraw::BindPipeline()
return true;
}
ComputeShaderDispatcher::ComputeShaderDispatcher(VkCommandBuffer command_buffer,
VkPipelineLayout pipeline_layout,
VkShaderModule compute_shader)
: m_command_buffer(command_buffer)
{
// Populate minimal pipeline state
m_pipeline_info.pipeline_layout = pipeline_layout;
m_pipeline_info.cs = compute_shader;
}
u8* ComputeShaderDispatcher::AllocateUniformBuffer(size_t size)
{
if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory(
size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true))
PanicAlert("Failed to allocate util uniforms");
return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer();
}
void ComputeShaderDispatcher::CommitUniformBuffer(size_t size)
{
m_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer();
m_uniform_buffer.offset = 0;
m_uniform_buffer.range = size;
m_uniform_buffer_offset =
static_cast<u32>(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset());
g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size);
}
void ComputeShaderDispatcher::SetPushConstants(const void* data, size_t data_size)
{
_assert_(static_cast<u32>(data_size) < PUSH_CONSTANT_BUFFER_SIZE);
vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
0, static_cast<u32>(data_size), data);
}
void ComputeShaderDispatcher::SetSampler(size_t index, VkImageView view, VkSampler sampler)
{
m_samplers[index].sampler = sampler;
m_samplers[index].imageView = view;
m_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
void ComputeShaderDispatcher::SetStorageImage(VkImageView view, VkImageLayout image_layout)
{
m_storage_image.sampler = VK_NULL_HANDLE;
m_storage_image.imageView = view;
m_storage_image.imageLayout = image_layout;
}
void ComputeShaderDispatcher::SetTexelBuffer(size_t index, VkBufferView view)
{
m_texel_buffers[index] = view;
}
void ComputeShaderDispatcher::Dispatch(u32 groups_x, u32 groups_y, u32 groups_z)
{
BindDescriptors();
if (!BindPipeline())
return;
vkCmdDispatch(m_command_buffer, groups_x, groups_y, groups_z);
}
void ComputeShaderDispatcher::BindDescriptors()
{
VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(
g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE));
if (set == VK_NULL_HANDLE)
{
PanicAlert("Failed to allocate descriptor set for compute dispatch");
return;
}
// Reserve enough descriptors to write every binding.
std::array<VkWriteDescriptorSet, 7> set_writes = {};
u32 num_set_writes = 0;
if (m_uniform_buffer.buffer != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
0,
0,
1,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
nullptr,
&m_uniform_buffer,
nullptr};
}
// Samplers
for (size_t i = 0; i < m_samplers.size(); i++)
{
const VkDescriptorImageInfo& info = m_samplers[i];
if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
static_cast<u32>(1 + i),
0,
1,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
&info,
nullptr,
nullptr};
}
}
for (size_t i = 0; i < m_texel_buffers.size(); i++)
{
if (m_texel_buffers[i] != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 5 + static_cast<u32>(i), 0, 1,
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, nullptr, nullptr, &m_texel_buffers[i]};
}
}
if (m_storage_image.imageView != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 7, 0, 1,
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &m_storage_image, nullptr, nullptr};
}
if (num_set_writes > 0)
{
vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0,
nullptr);
}
vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
m_pipeline_info.pipeline_layout, 0, 1, &set, 1, &m_uniform_buffer_offset);
}
bool ComputeShaderDispatcher::BindPipeline()
{
VkPipeline pipeline = g_object_cache->GetComputePipeline(m_pipeline_info);
if (pipeline == VK_NULL_HANDLE)
{
PanicAlert("Failed to get pipeline for backend compute dispatch");
return false;
}
vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
return true;
}
} // namespace Vulkan

View File

@ -63,6 +63,10 @@ VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code,
// Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V.
VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code,
bool prepend_header = true);
// Compile a compute shader and create a shader module, discarding the intermediate SPIR-V.
VkShaderModule CompileAndCreateComputeShader(const std::string& source_code,
bool prepend_header = true);
}
// Utility shader vertex format
@ -188,4 +192,41 @@ private:
PipelineInfo m_pipeline_info = {};
};
class ComputeShaderDispatcher
{
public:
ComputeShaderDispatcher(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout,
VkShaderModule compute_shader);
u8* AllocateUniformBuffer(size_t size);
void CommitUniformBuffer(size_t size);
void SetPushConstants(const void* data, size_t data_size);
void SetSampler(size_t index, VkImageView view, VkSampler sampler);
void SetTexelBuffer(size_t index, VkBufferView view);
void SetStorageImage(VkImageView view, VkImageLayout image_layout);
void Dispatch(u32 groups_x, u32 groups_y, u32 groups_z);
private:
void BindDescriptors();
bool BindPipeline();
VkCommandBuffer m_command_buffer = VK_NULL_HANDLE;
VkDescriptorBufferInfo m_uniform_buffer = {};
u32 m_uniform_buffer_offset = 0;
std::array<VkDescriptorImageInfo, 4> m_samplers = {};
std::array<VkBufferView, 2> m_texel_buffers = {};
VkDescriptorImageInfo m_storage_image = {};
ComputePipelineInfo m_pipeline_info = {};
};
} // namespace Vulkan

View File

@ -234,6 +234,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsPaletteConversion = true; // Assumed support.
config->backend_info.bSupportsClipControl = true; // Assumed support.
config->backend_info.bSupportsMultithreading = true; // Assumed support.
config->backend_info.bSupportsComputeShaders = true; // Assumed support.
config->backend_info.bSupportsGPUTextureDecoding = true; // Assumed support.
config->backend_info.bSupportsInternalResolutionFrameDumps = true; // Assumed support.
config->backend_info.bSupportsPostProcessing = false; // No support yet.
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.

View File

@ -110,7 +110,8 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config)
if (config.iSafeTextureCache_ColorSamples != backup_config.color_samples ||
config.bTexFmtOverlayEnable != backup_config.texfmt_overlay ||
config.bTexFmtOverlayCenter != backup_config.texfmt_overlay_center ||
config.bHiresTextures != backup_config.hires_textures)
config.bHiresTextures != backup_config.hires_textures ||
config.bEnableGPUTextureDecoding != backup_config.gpu_texture_decoding)
{
Invalidate();
@ -209,6 +210,7 @@ void TextureCacheBase::SetBackupConfig(const VideoConfig& config)
backup_config.cache_hires_textures = config.bCacheHiresTextures;
backup_config.stereo_3d = config.iStereoMode > 0;
backup_config.efb_mono_depth = config.bStereoEFBMonoDepth;
backup_config.gpu_texture_decoding = config.bEnableGPUTextureDecoding;
}
TextureCacheBase::TCacheEntryBase* TextureCacheBase::ApplyPaletteToEntry(TCacheEntryBase* entry,
@ -526,6 +528,7 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
const u32 texture_size =
TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
u32 bytes_per_block = (bsw * bsh * TexDecoder_GetTexelSizeInNibbles(texformat)) / 2;
u32 additional_mips_size = 0; // not including level 0, which is texture_size
// GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in
@ -755,6 +758,17 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
// how many levels the allocated texture shall have
const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : tex_levels;
// We can decode on the GPU if it is a supported format and the flag is enabled.
// Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
bool decode_on_gpu =
!hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(static_cast<TextureFormat>(texformat),
static_cast<TlutFormat>(tlutfmt)) &&
!(from_tmem && texformat == GX_TF_RGBA8);
// create the entry/texture
TCacheEntryConfig config;
config.width = width;
@ -768,10 +782,19 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
return nullptr;
if (!hires_tex)
{
const u8* tlut = &texMem[tlutaddr];
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(
entry, 0, src_data, texture_size, static_cast<TextureFormat>(texformat), width, height,
expandedWidth, expandedHeight, row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
}
else
{
if (!(texformat == GX_TF_RGBA8 && from_tmem))
{
const u8* tlut = &texMem[tlutaddr];
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut,
(TlutFormat)tlutfmt);
}
@ -781,6 +804,9 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
}
entry->Load(temp, width, height, expandedWidth, 0);
}
}
iter = textures_by_address.emplace(address, entry);
@ -797,9 +823,6 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
entry->is_efb_copy = false;
entry->is_custom_tex = hires_tex != nullptr;
// load texture
entry->Load(temp, width, height, expandedWidth, 0);
std::string basename = "";
if (g_ActiveConfig.bDumpTextures && !hires_tex)
{
@ -840,13 +863,26 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh);
const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data;
size_t mip_size =
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
const u8* tlut = &texMem[tlutaddr];
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (mip_width / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size,
static_cast<TextureFormat>(texformat), mip_width,
mip_height, expanded_mip_width, expanded_mip_height,
row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
}
else
{
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
tlut, (TlutFormat)tlutfmt);
mip_src_data +=
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
entry->Load(temp, mip_width, mip_height, expanded_mip_width, level);
}
mip_src_data += mip_size;
if (g_ActiveConfig.bDumpTextures)
DumpTexture(entry, basename, level);

View File

@ -171,6 +171,23 @@ public:
virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette,
TlutFormat format) = 0;
// Returns true if the texture data and palette formats are supported by the GPU decoder.
virtual bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
{
return false;
}
// Decodes the specified data to the GPU texture specified by entry.
// width, height are the size of the image in pixels.
// aligned_width, aligned_height are the size of the image in pixels, aligned to the block size.
// row_stride is the number of bytes for a row of blocks, not pixels.
virtual void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
{
}
protected:
TextureCacheBase();
@ -225,6 +242,7 @@ private:
bool copy_cache_enable;
bool stereo_3d;
bool efb_mono_depth;
bool gpu_texture_decoding;
};
BackupConfig backup_config = {};
};

View File

@ -2,9 +2,13 @@
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <array>
#include <cmath>
#include <cstdio>
#include <map>
#include <sstream>
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "Common/MathUtil.h"
#include "Common/MsgHandler.h"
@ -720,4 +724,546 @@ const char* GenerateEncodingShader(u32 format, APIType ApiType)
return text;
}
// NOTE: In these uniforms, a row refers to a row of blocks, not texels.
static const char decoding_shader_header[] = R"(
#ifdef VULKAN
layout(std140, push_constant) uniform PushConstants {
uvec2 dst_size;
uvec2 src_size;
uint src_offset;
uint src_row_stride;
uint palette_offset;
} push_constants;
#define u_dst_size (push_constants.dst_size)
#define u_src_size (push_constants.src_size)
#define u_src_offset (push_constants.src_offset)
#define u_src_row_stride (push_constants.src_row_stride)
#define u_palette_offset (push_constants.palette_offset)
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
#else
uniform uvec2 u_dst_size;
uniform uvec2 u_src_size;
uniform uint u_src_offset;
uniform uint u_src_row_stride;
uniform uint u_palette_offset;
SAMPLER_BINDING(9) uniform usamplerBuffer s_input_buffer;
SAMPLER_BINDING(10) uniform usamplerBuffer s_palette_buffer;
layout(rgba8, binding = 0) uniform writeonly image2DArray output_image;
#endif
uint Swap16(uint v)
{
// Convert BE to LE.
return ((v >> 8) | (v << 8)) & 0xFFFFu;
}
uint Convert3To8(uint v)
{
// Swizzle bits: 00000123 -> 12312312
return (v << 5) | (v << 2) | (v >> 1);
}
uint Convert4To8(uint v)
{
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | v;
}
uint Convert5To8(uint v)
{
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
}
uint Convert6To8(uint v)
{
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
}
uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords)
{
uvec2 block = coords / block_size;
uvec2 offset = coords % block_size;
uint buffer_pos = u_src_offset;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * (block_size.x * block_size.y);
buffer_pos += offset.y * block_size.x;
buffer_pos += offset.x;
return buffer_pos;
}
uvec4 GetPaletteColor(uint index)
{
// Fetch and swap BE to LE.
uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x);
uvec4 color;
#if defined(PALETTE_FORMAT_IA8)
uint a = bitfieldExtract(val, 8, 8);
uint i = bitfieldExtract(val, 0, 8);
color = uvec4(i, i, i, a);
#elif defined(PALETTE_FORMAT_RGB565)
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
color.y = Convert6To8(bitfieldExtract(val, 5, 6));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
#elif defined(PALETTE_FORMAT_RGB5A3)
if ((val & 0x8000u) != 0u)
{
color.x = Convert5To8(bitfieldExtract(val, 10, 5));
color.y = Convert5To8(bitfieldExtract(val, 5, 5));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
}
else
{
color.a = Convert3To8(bitfieldExtract(val, 12, 3));
color.r = Convert4To8(bitfieldExtract(val, 8, 4));
color.g = Convert4To8(bitfieldExtract(val, 4, 4));
color.b = Convert4To8(bitfieldExtract(val, 0, 4));
}
#else
// Not used.
color = uvec4(0, 0, 0, 0);
#endif
return color;
}
vec4 GetPaletteColorNormalized(uint index)
{
uvec4 color = GetPaletteColor(index);
return vec4(color) / 255.0;
}
)";
static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
{GX_TF_I4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x8 blocks, 4 bits per pixel
// We need to do the tiling manually here because the texel size is smaller than
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint i;
if ((coords.x & 1u) == 0u)
i = Convert4To8((val >> 4));
else
i = Convert4To8((val & 0x0Fu));
uvec4 color = uvec4(i, i, i, i);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_IA4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint i = Convert4To8((val & 0x0Fu));
uint a = Convert4To8((val >> 4));
uvec4 color = uvec4(i, i, i, a);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_I8,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint i = texelFetch(s_input_buffer, int(buffer_pos)).x;
uvec4 color = uvec4(i, i, i, i);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_IA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint a = (val & 0xFFu);
uint i = (val >> 8);
uvec4 color = uvec4(i, i, i, a);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_RGB565,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uvec4 color;
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
color.y = Convert6To8(bitfieldExtract(val, 5, 6));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_RGB5A3,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uvec4 color;
if ((val & 0x8000u) != 0u)
{
color.x = Convert5To8(bitfieldExtract(val, 10, 5));
color.y = Convert5To8(bitfieldExtract(val, 5, 5));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
}
else
{
color.a = Convert3To8(bitfieldExtract(val, 12, 3));
color.r = Convert4To8(bitfieldExtract(val, 8, 4));
color.g = Convert4To8(bitfieldExtract(val, 4, 4));
color.b = Convert4To8(bitfieldExtract(val, 0, 4));
}
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_RGBA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
// We can't use the normal calculation function, as these are packed as the AR channels
// for the entire block, then the GB channels afterwards.
uint2 block = coords.xy / 4u;
uint2 offset = coords.xy % 4u;
uint buffer_pos = u_src_offset;
// Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes.
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x;
// The two GB channels follow after the block's AR channels.
uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x;
uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x;
uvec4 color;
color.a = (val1 & 0xFFu);
color.r = (val1 >> 8);
color.g = (val2 & 0xFFu);
color.b = (val2 >> 8);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_CMPR,
{BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true,
R"(
// In the compute version of this decoder, we flatten the blocks to a one-dimension array.
// Each group is subdivided into 16, and the first thread in each group fetches the DXT data.
// All threads then calculate the possible colors for the block and write to the output image.
#define GROUP_SIZE 64u
#define BLOCK_SIZE_X 4u
#define BLOCK_SIZE_Y 4u
#define BLOCK_SIZE (BLOCK_SIZE_X * BLOCK_SIZE_Y)
#define BLOCKS_PER_GROUP (GROUP_SIZE / BLOCK_SIZE)
layout(local_size_x = GROUP_SIZE, local_size_y = 1) in;
shared uvec2 shared_temp[BLOCKS_PER_GROUP];
uint DXTBlend(uint v1, uint v2)
{
// 3/8 blend, which is close to 1/3
return ((v1 * 3u + v2 * 5u) >> 3);
}
void main()
{
uint local_thread_id = gl_LocalInvocationID.x;
uint block_in_group = local_thread_id / BLOCK_SIZE;
uint thread_in_block = local_thread_id % BLOCK_SIZE;
uint block_index = gl_WorkGroupID.x * BLOCKS_PER_GROUP + block_in_group;
// Annoyingly, we can't precalculate this as a uniform because the DXT block size differs
// from the block size of the overall texture (4 vs 8). We can however use a multiply and
// subtraction to avoid the modulo for calculating the block's X coordinate.
uint blocks_wide = u_src_size.x / BLOCK_SIZE_X;
uvec2 block_coords;
block_coords.y = block_index / blocks_wide;
block_coords.x = block_index - (block_coords.y * blocks_wide);
// Only the first thread for each block reads from the texel buffer.
if (thread_in_block == 0u)
{
// Calculate tiled block coordinates.
uvec2 tile_block_coords = block_coords / 2u;
uvec2 subtile_block_coords = block_coords % 2u;
uint buffer_pos = u_src_offset;
buffer_pos += tile_block_coords.y * u_src_row_stride;
buffer_pos += tile_block_coords.x * 4u;
buffer_pos += subtile_block_coords.y * 2u;
buffer_pos += subtile_block_coords.x;
// Read the entire DXT block to shared memory.
uvec2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
shared_temp[block_in_group] = raw_data;
}
// Ensure store is completed before the remaining threads in the block continue.
memoryBarrierShared();
barrier();
// Unpack colors and swap BE to LE.
uvec2 raw_data = shared_temp[block_in_group];
uint swapped = ((raw_data.x & 0xFF00FF00u) >> 8) | ((raw_data.x & 0x00FF00FFu) << 8);
uint c1 = swapped & 0xFFFFu;
uint c2 = swapped >> 16;
// Expand 5/6 bit channels to 8-bits per channel.
uint blue1 = Convert5To8(bitfieldExtract(c1, 0, 5));
uint blue2 = Convert5To8(bitfieldExtract(c2, 0, 5));
uint green1 = Convert6To8(bitfieldExtract(c1, 5, 6));
uint green2 = Convert6To8(bitfieldExtract(c2, 5, 6));
uint red1 = Convert5To8(bitfieldExtract(c1, 11, 5));
uint red2 = Convert5To8(bitfieldExtract(c2, 11, 5));
// Determine the four colors the block can use.
// It's quicker to just precalculate all four colors rather than branching on the index.
// NOTE: These must be masked with 0xFF. This is done at the normalization stage below.
uvec4 color0, color1, color2, color3;
color0 = uvec4(red1, green1, blue1, 255u);
color1 = uvec4(red2, green2, blue2, 255u);
if (c1 > c2)
{
color2 = uvec4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u);
color3 = uvec4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u);
}
else
{
color2 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u);
color3 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u);
}
// Calculate the texel coordinates that we will write to.
// The divides/modulo here should be turned into a shift/binary AND.
uint local_y = thread_in_block / BLOCK_SIZE_X;
uint local_x = thread_in_block % BLOCK_SIZE_X;
uint global_x = block_coords.x * BLOCK_SIZE_X + local_x;
uint global_y = block_coords.y * BLOCK_SIZE_Y + local_y;
// Use the coordinates within the block to shift the 32-bit value containing
// all 16 indices to a single 2-bit index.
uint index = bitfieldExtract(raw_data.y, int((local_y * 8u) + (6u - local_x * 2u)), 2);
// Select the un-normalized color from the precalculated color array.
// Using a switch statement here removes the need for dynamic indexing of an array.
uvec4 color;
switch (index)
{
case 0u: color = color0; break;
case 1u: color = color1; break;
case 2u: color = color2; break;
case 3u: color = color3; break;
default: color = color0; break;
}
// Normalize and write to the output image.
vec4 norm_color = vec4(color & 0xFFu) / 255.0;
imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color);
}
)"}},
{GX_TF_C4,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C4)), 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x8 blocks, 4 bits per pixel
// We need to do the tiling manually here because the texel size is smaller than
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu);
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_C8,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C8)), 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x;
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_C14X2,
{BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C14X2)), 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu;
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}}};
static const std::array<u32, BUFFER_FORMAT_COUNT> s_buffer_bytes_per_texel = {{
1, // BUFFER_FORMAT_R8_UINT
2, // BUFFER_FORMAT_R16_UINT
8, // BUFFER_FORMAT_R32G32_UINT
}};
const DecodingShaderInfo* GetDecodingShaderInfo(u32 format)
{
auto iter = s_decoding_shader_info.find(static_cast<TextureFormat>(format));
return iter != s_decoding_shader_info.end() ? &iter->second : nullptr;
}
u32 GetBytesPerBufferElement(BufferFormat buffer_format)
{
return s_buffer_bytes_per_texel[buffer_format];
}
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height)
{
// Flatten to a single dimension?
if (info->group_flatten)
return {(width * height + (info->group_size_x - 1)) / info->group_size_x, 1};
return {(width + (info->group_size_x - 1)) / info->group_size_x,
(height + (info->group_size_y - 1)) / info->group_size_y};
}
std::string GenerateDecodingShader(u32 format, u32 palette_format, APIType api_type)
{
const DecodingShaderInfo* info = GetDecodingShaderInfo(format);
if (!info)
return "";
std::stringstream ss;
switch (palette_format)
{
case GX_TL_IA8:
ss << "#define PALETTE_FORMAT_IA8 1\n";
break;
case GX_TL_RGB565:
ss << "#define PALETTE_FORMAT_RGB565 1\n";
break;
case GX_TL_RGB5A3:
ss << "#define PALETTE_FORMAT_RGB5A3 1\n";
break;
}
ss << decoding_shader_header;
ss << info->shader_body;
return ss.str();
}
} // namespace

View File

@ -4,6 +4,9 @@
#pragma once
#include <string>
#include <utility>
#include "Common/CommonTypes.h"
enum class APIType;
@ -13,4 +16,40 @@ namespace TextureConversionShader
u16 GetEncodedSampleCount(u32 format);
const char* GenerateEncodingShader(u32 format, APIType ApiType);
}
// View format of the input data to the texture decoding shader.
enum BufferFormat
{
BUFFER_FORMAT_R8_UINT,
BUFFER_FORMAT_R16_UINT,
BUFFER_FORMAT_R32G32_UINT,
BUFFER_FORMAT_COUNT
};
// Information required to compile and dispatch a texture decoding shader.
struct DecodingShaderInfo
{
BufferFormat buffer_format;
u32 palette_size;
u32 group_size_x;
u32 group_size_y;
bool group_flatten;
const char* shader_body;
};
// Obtain shader information for the specified texture format.
// If this format does not have a shader written for it, returns nullptr.
const DecodingShaderInfo* GetDecodingShaderInfo(u32 format);
// Determine how many bytes there are in each element of the texel buffer.
// Needed for alignment and stride calculations.
u32 GetBytesPerBufferElement(BufferFormat buffer_format);
// Determine how many thread groups should be dispatched for an image of the specified width/height.
// First is the number of X groups, second is the number of Y groups, Z is always one.
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height);
// Returns the GLSL string containing the texture decoding shader for the specified format.
std::string GenerateDecodingShader(u32 format, u32 palette_format, APIType api_type);
} // namespace TextureConversionShader

View File

@ -81,6 +81,7 @@ void VideoConfig::Load(const std::string& ini_file)
settings->Get("DumpPath", &sDumpPath, "");
settings->Get("BitrateKbps", &iBitrateKbps, 2500);
settings->Get("InternalResolutionFrameDumps", &bInternalResolutionFrameDumps, false);
settings->Get("EnableGPUTextureDecoding", &bEnableGPUTextureDecoding, false);
settings->Get("EnablePixelLighting", &bEnablePixelLighting, false);
settings->Get("FastDepthCalc", &bFastDepthCalc, true);
settings->Get("MSAA", &iMultisamples, 1);
@ -305,6 +306,7 @@ void VideoConfig::Save(const std::string& ini_file)
settings->Set("DumpPath", sDumpPath);
settings->Set("BitrateKbps", iBitrateKbps);
settings->Set("InternalResolutionFrameDumps", bInternalResolutionFrameDumps);
settings->Set("EnableGPUTextureDecoding", bEnableGPUTextureDecoding);
settings->Set("EnablePixelLighting", bEnablePixelLighting);
settings->Set("FastDepthCalc", bFastDepthCalc);
settings->Set("MSAA", iMultisamples);

View File

@ -108,6 +108,7 @@ struct VideoConfig final
bool bInternalResolutionFrameDumps;
bool bFreeLook;
bool bBorderlessFullscreen;
bool bEnableGPUTextureDecoding;
int iBitrateKbps;
// Hacks
@ -181,6 +182,7 @@ struct VideoConfig final
bool bSupportsPrimitiveRestart;
bool bSupportsOversizedViewports;
bool bSupportsGeometryShaders;
bool bSupportsComputeShaders;
bool bSupports3DVision;
bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon
bool bSupportsBindingLayout; // Needed by ShaderGen, so must stay in VideoCommon
@ -195,6 +197,7 @@ struct VideoConfig final
bool bSupportsReversedDepthRange;
bool bSupportsMultithreading;
bool bSupportsInternalResolutionFrameDumps;
bool bSupportsGPUTextureDecoding;
} backend_info;
// Utility
@ -210,6 +213,10 @@ struct VideoConfig final
return false;
return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics;
}
bool UseGPUTextureDecoding() const
{
return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;
}
};
extern VideoConfig g_Config;