Merge pull request #4467 from stenzek/gpu-texture-decoding

VideoBackends: GPU Texture Decoding
This commit is contained in:
Markus Wick 2017-04-03 10:46:13 +02:00 committed by GitHub
commit 3bd184a255
49 changed files with 2182 additions and 225 deletions

View File

@ -286,6 +286,7 @@ public final class SettingsFragmentPresenter
BooleanSetting ignoreFormat = new BooleanSetting(SettingsFile.KEY_IGNORE_FORMAT, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, ignoreFormatValue); BooleanSetting ignoreFormat = new BooleanSetting(SettingsFile.KEY_IGNORE_FORMAT, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, ignoreFormatValue);
Setting efbToTexture = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_EFB_TEXTURE); Setting efbToTexture = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_EFB_TEXTURE);
Setting texCacheAccuracy = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_TEXCACHE_ACCURACY); Setting texCacheAccuracy = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_TEXCACHE_ACCURACY);
Setting gpuTextureDecoding = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_GPU_TEXTURE_DECODING);
IntSetting xfb = new IntSetting(SettingsFile.KEY_XFB, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, xfbValue); IntSetting xfb = new IntSetting(SettingsFile.KEY_XFB, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, xfbValue);
Setting fastDepth = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_FAST_DEPTH); Setting fastDepth = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_FAST_DEPTH);
Setting aspectRatio = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_ASPECT_RATIO); Setting aspectRatio = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_ASPECT_RATIO);
@ -297,6 +298,7 @@ public final class SettingsFragmentPresenter
sl.add(new HeaderSetting(null, null, R.string.texture_cache, 0)); sl.add(new HeaderSetting(null, null, R.string.texture_cache, 0));
sl.add(new SingleChoiceSetting(SettingsFile.KEY_TEXCACHE_ACCURACY, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.texture_cache_accuracy, R.string.texture_cache_accuracy_descrip, R.array.textureCacheAccuracyEntries, R.array.textureCacheAccuracyValues, 128, texCacheAccuracy)); sl.add(new SingleChoiceSetting(SettingsFile.KEY_TEXCACHE_ACCURACY, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.texture_cache_accuracy, R.string.texture_cache_accuracy_descrip, R.array.textureCacheAccuracyEntries, R.array.textureCacheAccuracyValues, 128, texCacheAccuracy));
sl.add(new CheckBoxSetting(SettingsFile.KEY_GPU_TEXTURE_DECODING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.gpu_texture_decoding, R.string.gpu_texture_decoding_descrip, false, gpuTextureDecoding));
sl.add(new HeaderSetting(null, null, R.string.external_frame_buffer, 0)); sl.add(new HeaderSetting(null, null, R.string.external_frame_buffer, 0));
sl.add(new SingleChoiceSetting(SettingsFile.KEY_XFB_METHOD, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, R.string.external_frame_buffer, R.string.external_frame_buffer_descrip, R.array.externalFrameBufferEntries, R.array.externalFrameBufferValues, 0, xfb)); sl.add(new SingleChoiceSetting(SettingsFile.KEY_XFB_METHOD, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, R.string.external_frame_buffer, R.string.external_frame_buffer_descrip, R.array.externalFrameBufferEntries, R.array.externalFrameBufferValues, 0, xfb));

View File

@ -73,6 +73,7 @@ public final class SettingsFile
public static final String KEY_IGNORE_FORMAT = "EFBEmulateFormatChanges"; public static final String KEY_IGNORE_FORMAT = "EFBEmulateFormatChanges";
public static final String KEY_EFB_TEXTURE = "EFBToTextureEnable"; public static final String KEY_EFB_TEXTURE = "EFBToTextureEnable";
public static final String KEY_TEXCACHE_ACCURACY = "SafeTextureCacheColorSamples"; public static final String KEY_TEXCACHE_ACCURACY = "SafeTextureCacheColorSamples";
public static final String KEY_GPU_TEXTURE_DECODING = "EnableGPUTextureDecoding";
public static final String KEY_XFB = "UseXFB"; public static final String KEY_XFB = "UseXFB";
public static final String KEY_XFB_REAL = "UseRealXFB"; public static final String KEY_XFB_REAL = "UseRealXFB";
public static final String KEY_FAST_DEPTH = "FastDepthCalc"; public static final String KEY_FAST_DEPTH = "FastDepthCalc";

View File

@ -168,6 +168,8 @@
<string name="texture_cache">Texture Cache</string> <string name="texture_cache">Texture Cache</string>
<string name="texture_cache_accuracy">Texture Cache Accuracy</string> <string name="texture_cache_accuracy">Texture Cache Accuracy</string>
<string name="texture_cache_accuracy_descrip">The safer the selection, the less likely the emulator will be missing any texture updates from RAM.</string> <string name="texture_cache_accuracy_descrip">The safer the selection, the less likely the emulator will be missing any texture updates from RAM.</string>
<string name="gpu_texture_decoding">GPU Texture Decoding</string>
<string name="gpu_texture_decoding_descrip">Decodes textures on the GPU using compute shaders where supported. May improve performance in some scenarios.</string>
<string name="external_frame_buffer">External Frame Buffer</string> <string name="external_frame_buffer">External Frame Buffer</string>
<string name="external_frame_buffer_descrip">Determines how the XFB will be emulated.</string> <string name="external_frame_buffer_descrip">Determines how the XFB will be emulated.</string>
<string name="disable_destination_alpha">Disable Destination Alpha</string> <string name="disable_destination_alpha">Disable Destination Alpha</string>

View File

@ -72,6 +72,7 @@
<ClInclude Include="GL\GLExtensions\ARB_blend_func_extended.h" /> <ClInclude Include="GL\GLExtensions\ARB_blend_func_extended.h" />
<ClInclude Include="GL\GLExtensions\ARB_buffer_storage.h" /> <ClInclude Include="GL\GLExtensions\ARB_buffer_storage.h" />
<ClInclude Include="GL\GLExtensions\ARB_clip_control.h" /> <ClInclude Include="GL\GLExtensions\ARB_clip_control.h" />
<ClInclude Include="GL\GLExtensions\ARB_compute_shader.h" />
<ClInclude Include="GL\GLExtensions\ARB_copy_image.h" /> <ClInclude Include="GL\GLExtensions\ARB_copy_image.h" />
<ClInclude Include="GL\GLExtensions\ARB_debug_output.h" /> <ClInclude Include="GL\GLExtensions\ARB_debug_output.h" />
<ClInclude Include="GL\GLExtensions\ARB_draw_elements_base_vertex.h" /> <ClInclude Include="GL\GLExtensions\ARB_draw_elements_base_vertex.h" />
@ -83,9 +84,11 @@
<ClInclude Include="GL\GLExtensions\ARB_occlusion_query2.h" /> <ClInclude Include="GL\GLExtensions\ARB_occlusion_query2.h" />
<ClInclude Include="GL\GLExtensions\ARB_sampler_objects.h" /> <ClInclude Include="GL\GLExtensions\ARB_sampler_objects.h" />
<ClInclude Include="GL\GLExtensions\ARB_sample_shading.h" /> <ClInclude Include="GL\GLExtensions\ARB_sample_shading.h" />
<ClInclude Include="GL\GLExtensions\ARB_shader_image_load_store.h" />
<ClInclude Include="GL\GLExtensions\ARB_shader_storage_buffer_object.h" /> <ClInclude Include="GL\GLExtensions\ARB_shader_storage_buffer_object.h" />
<ClInclude Include="GL\GLExtensions\ARB_sync.h" /> <ClInclude Include="GL\GLExtensions\ARB_sync.h" />
<ClInclude Include="GL\GLExtensions\ARB_texture_multisample.h" /> <ClInclude Include="GL\GLExtensions\ARB_texture_multisample.h" />
<ClInclude Include="GL\GLExtensions\ARB_texture_storage.h" />
<ClInclude Include="GL\GLExtensions\ARB_texture_storage_multisample.h" /> <ClInclude Include="GL\GLExtensions\ARB_texture_storage_multisample.h" />
<ClInclude Include="GL\GLExtensions\ARB_uniform_buffer_object.h" /> <ClInclude Include="GL\GLExtensions\ARB_uniform_buffer_object.h" />
<ClInclude Include="GL\GLExtensions\ARB_vertex_array_object.h" /> <ClInclude Include="GL\GLExtensions\ARB_vertex_array_object.h" />

View File

@ -238,6 +238,16 @@
<ClInclude Include="NonCopyable.h" /> <ClInclude Include="NonCopyable.h" />
<ClInclude Include="Analytics.h" /> <ClInclude Include="Analytics.h" />
<ClInclude Include="Semaphore.h" /> <ClInclude Include="Semaphore.h" />
<ClInclude Include="MD5.h" />
<ClInclude Include="GL\GLExtensions\ARB_texture_storage.h">
<Filter>GL\GLExtensions</Filter>
</ClInclude>
<ClInclude Include="GL\GLExtensions\ARB_shader_image_load_store.h">
<Filter>GL\GLExtensions</Filter>
</ClInclude>
<ClInclude Include="GL\GLExtensions\ARB_compute_shader.h">
<Filter>GL\GLExtensions</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="CDUtils.cpp" /> <ClCompile Include="CDUtils.cpp" />
@ -303,6 +313,7 @@
</ClCompile> </ClCompile>
<ClCompile Include="ucrtFreadWorkaround.cpp" /> <ClCompile Include="ucrtFreadWorkaround.cpp" />
<ClCompile Include="Analytics.cpp" /> <ClCompile Include="Analytics.cpp" />
<ClCompile Include="MD5.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Text Include="CMakeLists.txt" /> <Text Include="CMakeLists.txt" />

View File

@ -0,0 +1,53 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
#include "Common/GL/GLExtensions/gl_common.h"
#define GL_COMPUTE_SHADER 0x91B9
#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
#define GL_COMPUTE_SHADER_BIT 0x00000020
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y,
GLuint num_groups_z);
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
extern PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
extern PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
#define glDispatchCompute dolDispatchCompute
#define glDispatchComputeIndirect dolDispatchComputeIndirect

View File

@ -0,0 +1,100 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
#include "Common/GL/GLExtensions/gl_common.h"
#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
#define GL_UNIFORM_BARRIER_BIT 0x00000004
#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
#define GL_COMMAND_BARRIER_BIT 0x00000040
#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
#define GL_MAX_IMAGE_UNITS 0x8F38
#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
#define GL_IMAGE_BINDING_NAME 0x8F3A
#define GL_IMAGE_BINDING_LEVEL 0x8F3B
#define GL_IMAGE_BINDING_LAYERED 0x8F3C
#define GL_IMAGE_BINDING_LAYER 0x8F3D
#define GL_IMAGE_BINDING_ACCESS 0x8F3E
#define GL_IMAGE_1D 0x904C
#define GL_IMAGE_2D 0x904D
#define GL_IMAGE_3D 0x904E
#define GL_IMAGE_2D_RECT 0x904F
#define GL_IMAGE_CUBE 0x9050
#define GL_IMAGE_BUFFER 0x9051
#define GL_IMAGE_1D_ARRAY 0x9052
#define GL_IMAGE_2D_ARRAY 0x9053
#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
#define GL_IMAGE_2D_MULTISAMPLE 0x9055
#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
#define GL_INT_IMAGE_1D 0x9057
#define GL_INT_IMAGE_2D 0x9058
#define GL_INT_IMAGE_3D 0x9059
#define GL_INT_IMAGE_2D_RECT 0x905A
#define GL_INT_IMAGE_CUBE 0x905B
#define GL_INT_IMAGE_BUFFER 0x905C
#define GL_INT_IMAGE_1D_ARRAY 0x905D
#define GL_INT_IMAGE_2D_ARRAY 0x905E
#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
#define GL_MAX_IMAGE_SAMPLES 0x906D
#define GL_IMAGE_BINDING_FORMAT 0x906E
#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
typedef void(APIENTRYP PFNDOLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level,
GLboolean layered, GLint layer, GLenum access,
GLenum format);
typedef void(APIENTRYP PFNDOLMEMORYBARRIERPROC)(GLbitfield barriers);
extern PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
extern PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
#define glBindImageTexture dolBindImageTexture
#define glMemoryBarrier dolMemoryBarrier

View File

@ -0,0 +1,41 @@
/*
** Copyright (c) 2013-2015 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
#include "Common/GL/GLExtensions/gl_common.h"
#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
typedef void(APIENTRYP PFNDOLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width);
typedef void(APIENTRYP PFNDOLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNDOLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height, GLsizei depth);
extern PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
extern PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
extern PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
#define glTexStorage1D dolTexStorage1D
#define glTexStorage2D dolTexStorage2D
#define glTexStorage3D dolTexStorage3D

View File

@ -653,19 +653,12 @@ PFNDOLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC
dolDrawElementsInstancedBaseVertexBaseInstance; dolDrawElementsInstancedBaseVertexBaseInstance;
PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ; PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ;
PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv; PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv;
PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced; PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced;
PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced; PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced;
// gl_4_3 // gl_4_3
PFNDOLCLEARBUFFERDATAPROC dolClearBufferData; PFNDOLCLEARBUFFERDATAPROC dolClearBufferData;
PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData; PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData;
PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri; PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri;
PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv; PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv;
PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v; PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v;
@ -905,6 +898,11 @@ PFNDOLTEXIMAGE3DMULTISAMPLEPROC dolTexImage3DMultisample;
PFNDOLGETMULTISAMPLEFVPROC dolGetMultisamplefv; PFNDOLGETMULTISAMPLEFVPROC dolGetMultisamplefv;
PFNDOLSAMPLEMASKIPROC dolSampleMaski; PFNDOLSAMPLEMASKIPROC dolSampleMaski;
// ARB_texture_storage
PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
// ARB_texture_storage_multisample // ARB_texture_storage_multisample
PFNDOLTEXSTORAGE2DMULTISAMPLEPROC dolTexStorage2DMultisample; PFNDOLTEXSTORAGE2DMULTISAMPLEPROC dolTexStorage2DMultisample;
PFNDOLTEXSTORAGE3DMULTISAMPLEPROC dolTexStorage3DMultisample; PFNDOLTEXSTORAGE3DMULTISAMPLEPROC dolTexStorage3DMultisample;
@ -989,6 +987,14 @@ PFNDOLDEPTHRANGEDNVPROC dolDepthRangedNV;
PFNDOLCLEARDEPTHDNVPROC dolClearDepthdNV; PFNDOLCLEARDEPTHDNVPROC dolClearDepthdNV;
PFNDOLDEPTHBOUNDSDNVPROC dolDepthBoundsdNV; PFNDOLDEPTHBOUNDSDNVPROC dolDepthBoundsdNV;
// ARB_shader_image_load_store
PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
// ARB_compute_shader
PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
// Creates a GLFunc object that requires a feature // Creates a GLFunc object that requires a feature
#define GLFUNC_REQUIRES(x, y) \ #define GLFUNC_REQUIRES(x, y) \
{ \ { \
@ -1681,6 +1687,11 @@ const GLFunc gl_function_array[] = {
GLFUNC_REQUIRES(glGetMultisamplefv, "GL_ARB_texture_multisample"), GLFUNC_REQUIRES(glGetMultisamplefv, "GL_ARB_texture_multisample"),
GLFUNC_REQUIRES(glSampleMaski, "GL_ARB_texture_multisample"), GLFUNC_REQUIRES(glSampleMaski, "GL_ARB_texture_multisample"),
// ARB_texture_storage
GLFUNC_REQUIRES(glTexStorage1D, "GL_ARB_texture_storage !VERSION_4_2"),
GLFUNC_REQUIRES(glTexStorage2D, "GL_ARB_texture_storage !VERSION_4_2 |VERSION_GLES_3"),
GLFUNC_REQUIRES(glTexStorage3D, "GL_ARB_texture_storage !VERSION_4_2 |VERSION_GLES_3"),
// ARB_texture_storage_multisample // ARB_texture_storage_multisample
GLFUNC_REQUIRES(glTexStorage2DMultisample, GLFUNC_REQUIRES(glTexStorage2DMultisample,
"GL_ARB_texture_storage_multisample !VERSION_4_3 |VERSION_GLES_3_1"), "GL_ARB_texture_storage_multisample !VERSION_4_3 |VERSION_GLES_3_1"),
@ -1848,6 +1859,17 @@ const GLFunc gl_function_array[] = {
GLFUNC_REQUIRES(glDepthRangedNV, "GL_NV_depth_buffer_float"), GLFUNC_REQUIRES(glDepthRangedNV, "GL_NV_depth_buffer_float"),
GLFUNC_REQUIRES(glClearDepthdNV, "GL_NV_depth_buffer_float"), GLFUNC_REQUIRES(glClearDepthdNV, "GL_NV_depth_buffer_float"),
GLFUNC_REQUIRES(glDepthBoundsdNV, "GL_NV_depth_buffer_float"), GLFUNC_REQUIRES(glDepthBoundsdNV, "GL_NV_depth_buffer_float"),
// ARB_shader_image_load_store
GLFUNC_REQUIRES(glBindImageTexture,
"GL_ARB_shader_image_load_store !VERSION_4_2 |VERSION_GLES_3_1"),
GLFUNC_REQUIRES(glMemoryBarrier,
"GL_ARB_shader_image_load_store !VERSION_4_2 |VERSION_GLES_3_1"),
// ARB_compute_shader
GLFUNC_REQUIRES(glDispatchCompute, "GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"),
GLFUNC_REQUIRES(glDispatchComputeIndirect,
"GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"),
}; };
namespace GLExtensions namespace GLExtensions

View File

@ -12,6 +12,7 @@
#include "Common/GL/GLExtensions/ARB_blend_func_extended.h" #include "Common/GL/GLExtensions/ARB_blend_func_extended.h"
#include "Common/GL/GLExtensions/ARB_buffer_storage.h" #include "Common/GL/GLExtensions/ARB_buffer_storage.h"
#include "Common/GL/GLExtensions/ARB_clip_control.h" #include "Common/GL/GLExtensions/ARB_clip_control.h"
#include "Common/GL/GLExtensions/ARB_compute_shader.h"
#include "Common/GL/GLExtensions/ARB_copy_image.h" #include "Common/GL/GLExtensions/ARB_copy_image.h"
#include "Common/GL/GLExtensions/ARB_debug_output.h" #include "Common/GL/GLExtensions/ARB_debug_output.h"
#include "Common/GL/GLExtensions/ARB_draw_elements_base_vertex.h" #include "Common/GL/GLExtensions/ARB_draw_elements_base_vertex.h"
@ -21,9 +22,11 @@
#include "Common/GL/GLExtensions/ARB_occlusion_query2.h" #include "Common/GL/GLExtensions/ARB_occlusion_query2.h"
#include "Common/GL/GLExtensions/ARB_sample_shading.h" #include "Common/GL/GLExtensions/ARB_sample_shading.h"
#include "Common/GL/GLExtensions/ARB_sampler_objects.h" #include "Common/GL/GLExtensions/ARB_sampler_objects.h"
#include "Common/GL/GLExtensions/ARB_shader_image_load_store.h"
#include "Common/GL/GLExtensions/ARB_shader_storage_buffer_object.h" #include "Common/GL/GLExtensions/ARB_shader_storage_buffer_object.h"
#include "Common/GL/GLExtensions/ARB_sync.h" #include "Common/GL/GLExtensions/ARB_sync.h"
#include "Common/GL/GLExtensions/ARB_texture_multisample.h" #include "Common/GL/GLExtensions/ARB_texture_multisample.h"
#include "Common/GL/GLExtensions/ARB_texture_storage.h"
#include "Common/GL/GLExtensions/ARB_texture_storage_multisample.h" #include "Common/GL/GLExtensions/ARB_texture_storage_multisample.h"
#include "Common/GL/GLExtensions/ARB_uniform_buffer_object.h" #include "Common/GL/GLExtensions/ARB_uniform_buffer_object.h"
#include "Common/GL/GLExtensions/ARB_vertex_array_object.h" #include "Common/GL/GLExtensions/ARB_vertex_array_object.h"

View File

@ -66,75 +66,10 @@
#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9 #define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9
#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA #define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA
#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB #define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB
#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
#define GL_UNIFORM_BARRIER_BIT 0x00000004
#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
#define GL_COMMAND_BARRIER_BIT 0x00000040
#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
#define GL_MAX_IMAGE_UNITS 0x8F38
#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
#define GL_IMAGE_BINDING_NAME 0x8F3A
#define GL_IMAGE_BINDING_LEVEL 0x8F3B
#define GL_IMAGE_BINDING_LAYERED 0x8F3C
#define GL_IMAGE_BINDING_LAYER 0x8F3D
#define GL_IMAGE_BINDING_ACCESS 0x8F3E
#define GL_IMAGE_1D 0x904C
#define GL_IMAGE_2D 0x904D
#define GL_IMAGE_3D 0x904E
#define GL_IMAGE_2D_RECT 0x904F
#define GL_IMAGE_CUBE 0x9050
#define GL_IMAGE_BUFFER 0x9051
#define GL_IMAGE_1D_ARRAY 0x9052
#define GL_IMAGE_2D_ARRAY 0x9053
#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
#define GL_IMAGE_2D_MULTISAMPLE 0x9055
#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
#define GL_INT_IMAGE_1D 0x9057
#define GL_INT_IMAGE_2D 0x9058
#define GL_INT_IMAGE_3D 0x9059
#define GL_INT_IMAGE_2D_RECT 0x905A
#define GL_INT_IMAGE_CUBE 0x905B
#define GL_INT_IMAGE_BUFFER 0x905C
#define GL_INT_IMAGE_1D_ARRAY 0x905D
#define GL_INT_IMAGE_2D_ARRAY 0x905E
#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
#define GL_MAX_IMAGE_SAMPLES 0x906D
#define GL_IMAGE_BINDING_FORMAT 0x906E
#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
#define GL_COMPRESSED_RGBA_BPTC_UNORM 0x8E8C #define GL_COMPRESSED_RGBA_BPTC_UNORM 0x8E8C
#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM 0x8E8D #define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM 0x8E8D
#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT 0x8E8E #define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT 0x8E8E
#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT 0x8E8F #define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT 0x8E8F
#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
typedef void(APIENTRYP PFNDOLDRAWARRAYSINSTANCEDBASEINSTANCEPROC)(GLenum mode, GLint first, typedef void(APIENTRYP PFNDOLDRAWARRAYSINSTANCEDBASEINSTANCEPROC)(GLenum mode, GLint first,
GLsizei count, GLsizei count,
@ -152,16 +87,6 @@ typedef void(APIENTRYP PFNDOLGETINTERNALFORMATIVPROC)(GLenum target, GLenum inte
GLenum pname, GLsizei bufSize, GLint* params); GLenum pname, GLsizei bufSize, GLint* params);
typedef void(APIENTRYP PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC)(GLuint program, GLuint bufferIndex, typedef void(APIENTRYP PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC)(GLuint program, GLuint bufferIndex,
GLenum pname, GLint* params); GLenum pname, GLint* params);
typedef void(APIENTRYP PFNDOLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level,
GLboolean layered, GLint layer, GLenum access,
GLenum format);
typedef void(APIENTRYP PFNDOLMEMORYBARRIERPROC)(GLbitfield barriers);
typedef void(APIENTRYP PFNDOLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width);
typedef void(APIENTRYP PFNDOLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNDOLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
GLsizei width, GLsizei height, GLsizei depth);
typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC)(GLenum mode, GLuint id, typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC)(GLenum mode, GLuint id,
GLsizei instancecount); GLsizei instancecount);
typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC)(GLenum mode, GLuint id, typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC)(GLenum mode, GLuint id,
@ -174,11 +99,6 @@ extern PFNDOLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC
dolDrawElementsInstancedBaseVertexBaseInstance; dolDrawElementsInstancedBaseVertexBaseInstance;
extern PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ; extern PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ;
extern PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv; extern PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv;
extern PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
extern PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
extern PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
extern PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
extern PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
extern PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced; extern PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced;
extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced; extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced;
@ -187,10 +107,5 @@ extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackSt
#define glDrawElementsInstancedBaseVertexBaseInstance dolDrawElementsInstancedBaseVertexBaseInstance #define glDrawElementsInstancedBaseVertexBaseInstance dolDrawElementsInstancedBaseVertexBaseInstance
#define glGetInternalformativ dolGetInternalformativ #define glGetInternalformativ dolGetInternalformativ
#define glGetActiveAtomicCounterBufferiv dolGetActiveAtomicCounterBufferiv #define glGetActiveAtomicCounterBufferiv dolGetActiveAtomicCounterBufferiv
#define glBindImageTexture dolBindImageTexture
#define glMemoryBarrier dolMemoryBarrier
#define glTexStorage1D dolTexStorage1D
#define glTexStorage2D dolTexStorage2D
#define glTexStorage3D dolTexStorage3D
#define glDrawTransformFeedbackInstanced dolDrawTransformFeedbackInstanced #define glDrawTransformFeedbackInstanced dolDrawTransformFeedbackInstanced
#define glDrawTransformFeedbackStreamInstanced dolDrawTransformFeedbackStreamInstanced #define glDrawTransformFeedbackStreamInstanced dolDrawTransformFeedbackStreamInstanced

View File

@ -38,24 +38,6 @@
#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69 #define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69
#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A #define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A
#define GL_MAX_ELEMENT_INDEX 0x8D6B #define GL_MAX_ELEMENT_INDEX 0x8D6B
#define GL_COMPUTE_SHADER 0x91B9
#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
#define GL_COMPUTE_SHADER_BIT 0x00000020
#define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242 #define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242
#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243 #define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243
#define GL_DEBUG_CALLBACK_FUNCTION 0x8244 #define GL_DEBUG_CALLBACK_FUNCTION 0x8244
@ -287,9 +269,6 @@ typedef void(APIENTRYP PFNDOLCLEARBUFFERDATAPROC)(GLenum target, GLenum internal
typedef void(APIENTRYP PFNDOLCLEARBUFFERSUBDATAPROC)(GLenum target, GLenum internalformat, typedef void(APIENTRYP PFNDOLCLEARBUFFERSUBDATAPROC)(GLenum target, GLenum internalformat,
GLintptr offset, GLsizeiptr size, GLintptr offset, GLsizeiptr size,
GLenum format, GLenum type, const void* data); GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y,
GLuint num_groups_z);
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
typedef void(APIENTRYP PFNDOLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param); typedef void(APIENTRYP PFNDOLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNDOLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, typedef void(APIENTRYP PFNDOLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname,
GLint* params); GLint* params);
@ -348,8 +327,6 @@ typedef void(APIENTRYP PFNDOLVERTEXBINDINGDIVISORPROC)(GLuint bindingindex, GLui
extern PFNDOLCLEARBUFFERDATAPROC dolClearBufferData; extern PFNDOLCLEARBUFFERDATAPROC dolClearBufferData;
extern PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData; extern PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData;
extern PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
extern PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
extern PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri; extern PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri;
extern PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv; extern PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv;
extern PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v; extern PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v;
@ -378,8 +355,6 @@ extern PFNDOLVERTEXBINDINGDIVISORPROC dolVertexBindingDivisor;
#define glClearBufferData dolClearBufferData #define glClearBufferData dolClearBufferData
#define glClearBufferSubData dolClearBufferSubData #define glClearBufferSubData dolClearBufferSubData
#define glDispatchCompute dolDispatchCompute
#define glDispatchComputeIndirect dolDispatchComputeIndirect
#define glFramebufferParameteri dolFramebufferParameteri #define glFramebufferParameteri dolFramebufferParameteri
#define glGetFramebufferParameteriv dolGetFramebufferParameteriv #define glGetFramebufferParameteriv dolGetFramebufferParameteriv
#define glGetInternalformati64v dolGetInternalformati64v #define glGetInternalformati64v dolGetInternalformati64v

View File

@ -284,6 +284,10 @@ static wxString true_color_desc =
wxTRANSLATE("Forces the game to render the RGB color channels in 24-bit, thereby increasing " wxTRANSLATE("Forces the game to render the RGB color channels in 24-bit, thereby increasing "
"quality by reducing color banding.\nIt has no impact on performance and causes " "quality by reducing color banding.\nIt has no impact on performance and causes "
"few graphical issues.\n\n\nIf unsure, leave this checked."); "few graphical issues.\n\n\nIf unsure, leave this checked.");
static wxString gpu_texture_decoding_desc =
wxTRANSLATE("Enables texture decoding using the GPU instead of the CPU. This may result in "
"performance gains in some scenarios, or systems where the CPU is the bottleneck."
"\n\nIf unsure, leave this unchecked.");
#if !defined(__APPLE__) #if !defined(__APPLE__)
// Search for available resolutions - TODO: Move to Common? // Search for available resolutions - TODO: Move to Common?
@ -755,6 +759,15 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title)
slide_szr->Add(new wxStaticText(page_hacks, wxID_ANY, _("Fast")), 0, wxALIGN_CENTER_VERTICAL); slide_szr->Add(new wxStaticText(page_hacks, wxID_ANY, _("Fast")), 0, wxALIGN_CENTER_VERTICAL);
szr_safetex->Add(slide_szr, 1, wxEXPAND | wxLEFT | wxRIGHT, space5); szr_safetex->Add(slide_szr, 1, wxEXPAND | wxLEFT | wxRIGHT, space5);
if (vconfig.backend_info.bSupportsGPUTextureDecoding)
{
szr_safetex->Add(CreateCheckBox(page_hacks, _("GPU Texture Decoding"),
wxGetTranslation(gpu_texture_decoding_desc),
vconfig.bEnableGPUTextureDecoding),
1, wxEXPAND | wxLEFT | wxRIGHT, space5);
}
if (slider_pos == -1) if (slider_pos == -1)
{ {
stc_slider->Disable(); stc_slider->Disable();

View File

@ -67,6 +67,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = false; g_Config.backend_info.bSupportsOversizedViewports = false;
g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = true; g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsPostProcessing = false; g_Config.backend_info.bSupportsPostProcessing = false;
g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsPaletteConversion = true;
@ -75,6 +76,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = false; g_Config.backend_info.bSupportsReversedDepthRange = false;
g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false; g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
IDXGIFactory* factory; IDXGIFactory* factory;
IDXGIAdapter* ad; IDXGIAdapter* ad;

View File

@ -70,6 +70,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = false; g_Config.backend_info.bSupportsOversizedViewports = false;
g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = true; g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsPostProcessing = false; g_Config.backend_info.bSupportsPostProcessing = false;
g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsPaletteConversion = true;
@ -78,6 +79,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = false; g_Config.backend_info.bSupportsReversedDepthRange = false;
g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false; g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
IDXGIFactory* factory; IDXGIFactory* factory;
IDXGIAdapter* ad; IDXGIAdapter* ad;

View File

@ -30,6 +30,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = true; g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = false; g_Config.backend_info.bSupports3DVision = false;
g_Config.backend_info.bSupportsEarlyZ = true; g_Config.backend_info.bSupportsEarlyZ = true;
g_Config.backend_info.bSupportsBindingLayout = true; g_Config.backend_info.bSupportsBindingLayout = true;
@ -43,6 +44,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsReversedDepthRange = true; g_Config.backend_info.bSupportsReversedDepthRange = true;
g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false; g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
// aamodes: We only support 1 sample, so no MSAA // aamodes: We only support 1 sample, so no MSAA
g_Config.backend_info.Adapters.clear(); g_Config.backend_info.Adapters.clear();

View File

@ -65,7 +65,7 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo
} }
else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
{ {
if (g_ogl_config.bSupports3DTextureStorage) if (g_ogl_config.bSupports3DTextureStorageMultisample)
glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth,
m_targetHeight, m_EFBLayers, false); m_targetHeight, m_EFBLayers, false);
else else
@ -74,7 +74,7 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo
} }
else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE) else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE)
{ {
if (g_ogl_config.bSupports2DTextureStorage) if (g_ogl_config.bSupports2DTextureStorageMultisample)
glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth, glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth,
m_targetHeight, false); m_targetHeight, false);
else else

View File

@ -0,0 +1,105 @@
// Copyright 2016 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/GL/GLExtensions/GLExtensions.h"
#ifndef GL_TIME_ELAPSED
#define GL_TIME_ELAPSED 0x88BF
#endif
namespace OGL
{
/*
* This class can be used to measure the time it takes for the GPU to perform a draw call
* or compute dispatch. To use:
*
* - Create an instance of GPUTimer before issuing the draw call.
* (this can be before or after any binding that needs to be done)
*
* - (optionally) call Begin(). This is not needed for a single draw call.
*
* - Issue the draw call or compute dispatch as normal.
*
* - (optionally) call End(). This is not necessary for a single draw call.
*
* - Call GetTime{Seconds,Milliseconds,Nanoseconds} to determine how long the operation
* took to execute on the GPU.
*
* NOTE: When the timer is read back, this will force a GL flush, so the more often a timer is used,
* the larger of a performance impact it will have. Only one timer can be active at any time, due to
* using GL_TIME_ELAPSED. This is not enforced by the class, however.
*
*/
class GPUTimer final
{
public:
GPUTimer()
{
glGenQueries(1, &m_query_id);
Begin();
}
~GPUTimer()
{
End();
glDeleteQueries(1, &m_query_id);
}
void Begin()
{
if (m_started)
glEndQuery(GL_TIME_ELAPSED);
glBeginQuery(GL_TIME_ELAPSED, m_query_id);
m_started = true;
}
void End()
{
if (!m_started)
return;
glEndQuery(GL_TIME_ELAPSED);
m_started = false;
}
double GetTimeSeconds()
{
GetResult();
return static_cast<double>(m_result) / 1000000000.0;
}
double GetTimeMilliseconds()
{
GetResult();
return static_cast<double>(m_result) / 1000000.0;
}
u32 GetTimeNanoseconds()
{
GetResult();
return m_result;
}
private:
void GetResult()
{
if (m_has_result)
return;
if (m_started)
End();
glGetQueryObjectuiv(m_query_id, GL_QUERY_RESULT, &m_result);
m_has_result = true;
}
GLuint m_query_id;
GLuint m_result = 0;
bool m_started = false;
bool m_has_result = false;
};
} // namespace OGL

View File

@ -53,6 +53,7 @@
<ItemGroup> <ItemGroup>
<ClInclude Include="BoundingBox.h" /> <ClInclude Include="BoundingBox.h" />
<ClInclude Include="FramebufferManager.h" /> <ClInclude Include="FramebufferManager.h" />
<ClInclude Include="GPUTimer.h" />
<ClInclude Include="PerfQuery.h" /> <ClInclude Include="PerfQuery.h" />
<ClInclude Include="PostProcessing.h" /> <ClInclude Include="PostProcessing.h" />
<ClInclude Include="ProgramShaderCache.h" /> <ClInclude Include="ProgramShaderCache.h" />

View File

@ -90,6 +90,9 @@
</ClInclude> </ClInclude>
<ClInclude Include="SamplerCache.h" /> <ClInclude Include="SamplerCache.h" />
<ClInclude Include="VideoBackend.h" /> <ClInclude Include="VideoBackend.h" />
<ClInclude Include="GPUTimer.h">
<Filter>GLUtil</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Text Include="CMakeLists.txt" /> <Text Include="CMakeLists.txt" />

View File

@ -65,6 +65,8 @@ static std::string GetGLSLVersionString()
return "#version 330"; return "#version 330";
case GLSL_400: case GLSL_400:
return "#version 400"; return "#version 400";
case GLSL_430:
return "#version 430";
default: default:
// Shouldn't ever hit this // Shouldn't ever hit this
return "#version ERROR"; return "#version ERROR";
@ -103,7 +105,9 @@ void SHADER::SetProgramVariables()
} }
} }
void SHADER::SetProgramBindings() void SHADER::SetProgramBindings(bool is_compute)
{
if (!is_compute)
{ {
if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend) if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{ {
@ -124,6 +128,7 @@ void SHADER::SetProgramBindings()
glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0"); glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0");
glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1"); glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1");
glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2"); glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2");
}
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
@ -281,7 +286,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
if (g_ogl_config.bSupportsGLSLCache) if (g_ogl_config.bSupportsGLSLCache)
glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE); glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
shader.SetProgramBindings(); shader.SetProgramBindings(false);
glLinkProgram(pid); glLinkProgram(pid);
@ -296,10 +301,10 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length); glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL)) if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
{ {
GLsizei charsWritten; std::string info_log;
GLchar* infoLog = new GLchar[length]; info_log.resize(length);
glGetProgramInfoLog(pid, length, &charsWritten, infoLog); glGetProgramInfoLog(pid, length, &length, &info_log[0]);
ERROR_LOG(VIDEO, "Program info log:\n%s", infoLog); ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str());
std::string filename = std::string filename =
StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++); StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
@ -308,7 +313,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
file << s_glsl_header << vcode << s_glsl_header << pcode; file << s_glsl_header << vcode << s_glsl_header << pcode;
if (!gcode.empty()) if (!gcode.empty())
file << s_glsl_header << gcode; file << s_glsl_header << gcode;
file << infoLog; file << info_log;
file.close(); file.close();
if (linkStatus != GL_TRUE) if (linkStatus != GL_TRUE)
@ -316,10 +321,8 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
PanicAlert("Failed to link shaders: %s\n" PanicAlert("Failed to link shaders: %s\n"
"Debug info (%s, %s, %s):\n%s", "Debug info (%s, %s, %s):\n%s",
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer, filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
g_ogl_config.gl_version, infoLog); g_ogl_config.gl_version, info_log.c_str());
} }
delete[] infoLog;
} }
if (linkStatus != GL_TRUE) if (linkStatus != GL_TRUE)
{ {
@ -336,6 +339,73 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
return true; return true;
} }
bool ProgramShaderCache::CompileComputeShader(SHADER& shader, const std::string& code)
{
// We need to enable GL_ARB_compute_shader for drivers that support the extension,
// but not GLSL 4.3. Mesa is one example.
std::string header;
if (g_ActiveConfig.backend_info.bSupportsComputeShaders &&
g_ogl_config.eSupportedGLSLVersion < GLSL_430)
{
header = "#extension GL_ARB_compute_shader : enable\n";
}
GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, header + code);
if (!shader_id)
return false;
GLuint pid = shader.glprogid = glCreateProgram();
glAttachShader(pid, shader_id);
if (g_ogl_config.bSupportsGLSLCache)
glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
shader.SetProgramBindings(true);
glLinkProgram(pid);
// original shaders aren't needed any more
glDeleteShader(shader_id);
GLint linkStatus;
glGetProgramiv(pid, GL_LINK_STATUS, &linkStatus);
GLsizei length = 0;
glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
{
std::string info_log;
info_log.resize(length);
glGetProgramInfoLog(pid, length, &length, &info_log[0]);
ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str());
std::string filename =
StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file;
OpenFStream(file, filename, std::ios_base::out);
file << s_glsl_header << code;
file << info_log;
file.close();
if (linkStatus != GL_TRUE)
{
PanicAlert("Failed to link shaders: %s\n"
"Debug info (%s, %s, %s):\n%s",
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
g_ogl_config.gl_version, info_log.c_str());
}
}
if (linkStatus != GL_TRUE)
{
// Compile failed
ERROR_LOG(VIDEO, "Program linking failed; see info log");
// Don't try to use this shader
glDeleteProgram(pid);
return false;
}
return true;
}
GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& code) GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& code)
{ {
GLuint result = glCreateShader(type); GLuint result = glCreateShader(type);
@ -351,31 +421,43 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c
if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL)) if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
{ {
GLsizei charsWritten; std::string info_log;
GLchar* infoLog = new GLchar[length]; info_log.resize(length);
glGetShaderInfoLog(result, length, &charsWritten, infoLog); glGetShaderInfoLog(result, length, &length, &info_log[0]);
ERROR_LOG(VIDEO, "%s Shader info log:\n%s",
type == GL_VERTEX_SHADER ? "VS" : type == GL_FRAGMENT_SHADER ? "PS" : "GS", infoLog); const char* prefix = "";
switch (type)
{
case GL_VERTEX_SHADER:
prefix = "vs";
break;
case GL_GEOMETRY_SHADER:
prefix = "gs";
break;
case GL_FRAGMENT_SHADER:
prefix = "ps";
break;
case GL_COMPUTE_SHADER:
prefix = "cs";
break;
}
ERROR_LOG(VIDEO, "%s Shader info log:\n%s", prefix, info_log.c_str());
std::string filename = StringFromFormat( std::string filename = StringFromFormat(
"%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), "%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), prefix, num_failures++);
type == GL_VERTEX_SHADER ? "vs" : type == GL_FRAGMENT_SHADER ? "ps" : "gs", num_failures++);
std::ofstream file; std::ofstream file;
OpenFStream(file, filename, std::ios_base::out); OpenFStream(file, filename, std::ios_base::out);
file << s_glsl_header << code << infoLog; file << s_glsl_header << code << info_log;
file.close(); file.close();
if (compileStatus != GL_TRUE) if (compileStatus != GL_TRUE)
{ {
PanicAlert("Failed to compile %s shader: %s\n" PanicAlert("Failed to compile %s shader: %s\n"
"Debug info (%s, %s, %s):\n%s", "Debug info (%s, %s, %s):\n%s",
type == GL_VERTEX_SHADER ? "vertex" : type == GL_FRAGMENT_SHADER ? "pixel" : prefix, filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
"geometry", g_ogl_config.gl_version, info_log.c_str());
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
g_ogl_config.gl_version, infoLog);
} }
delete[] infoLog;
} }
if (compileStatus != GL_TRUE) if (compileStatus != GL_TRUE)
{ {
@ -539,11 +621,9 @@ void ProgramShaderCache::CreateHeader()
std::string earlyz_string = ""; std::string earlyz_string = "";
if (g_ActiveConfig.backend_info.bSupportsEarlyZ) if (g_ActiveConfig.backend_info.bSupportsEarlyZ)
{ {
if (g_ogl_config.bSupportsEarlyFragmentTests) if (g_ogl_config.bSupportsImageLoadStore)
{ {
earlyz_string = "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n"; earlyz_string = "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n";
if (!is_glsles) // GLES supports this by default
earlyz_string += "#extension GL_ARB_shader_image_load_store : enable\n";
} }
else if (g_ogl_config.bSupportsConservativeDepth) else if (g_ogl_config.bSupportsConservativeDepth)
{ {
@ -569,6 +649,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n" // texture buffer "%s\n" // texture buffer
"%s\n" // ES texture buffer "%s\n" // ES texture buffer
"%s\n" // ES dual source blend "%s\n" // ES dual source blend
"%s\n" // shader image load store
// Precision defines for GLSL ES // Precision defines for GLSL ES
"%s\n" "%s\n"
@ -576,6 +657,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n" "%s\n"
"%s\n" "%s\n"
"%s\n" "%s\n"
"%s\n"
// Silly differences // Silly differences
"#define float2 vec2\n" "#define float2 vec2\n"
@ -638,12 +720,17 @@ void ProgramShaderCache::CreateHeader()
"" ""
, ,
g_ogl_config.bSupportsImageLoadStore &&
((!is_glsles && v < GLSL_430) || (is_glsles && v < GLSLES_310)) ?
"#extension GL_ARB_shader_image_load_store : enable" :
"",
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "", is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
is_glsles ? "precision highp sampler2DArray;" : "", is_glsles ? "precision highp sampler2DArray;" : "",
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ? (is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
"precision highp usamplerBuffer;" : "precision highp usamplerBuffer;" :
"", "",
v > GLSLES_300 ? "precision highp sampler2DMS;" : ""); v > GLSLES_300 ? "precision highp sampler2DMS;" : "",
v >= GLSLES_310 ? "precision highp image2DArray;" : "");
} }
void ProgramShaderCache::ProgramShaderCacheInserter::Read(const SHADERUID& key, const u8* value, void ProgramShaderCache::ProgramShaderCacheInserter::Read(const SHADERUID& key, const u8* value,

View File

@ -46,7 +46,7 @@ struct SHADER
std::string strvprog, strpprog, strgprog; std::string strvprog, strpprog, strgprog;
void SetProgramVariables(); void SetProgramVariables();
void SetProgramBindings(); void SetProgramBindings(bool is_compute);
void Bind(); void Bind();
}; };
@ -67,6 +67,7 @@ public:
static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode, static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode,
const std::string& gcode = ""); const std::string& gcode = "");
static bool CompileComputeShader(SHADER& shader, const std::string& code);
static GLuint CompileSingleShader(GLuint type, const std::string& code); static GLuint CompileSingleShader(GLuint type, const std::string& code);
static void UploadConstants(); static void UploadConstants();

View File

@ -451,15 +451,16 @@ Renderer::Renderer()
g_ogl_config.bSupportViewportFloat = GLExtensions::Supports("GL_ARB_viewport_array"); g_ogl_config.bSupportViewportFloat = GLExtensions::Supports("GL_ARB_viewport_array");
g_ogl_config.bSupportsDebug = g_ogl_config.bSupportsDebug =
GLExtensions::Supports("GL_KHR_debug") || GLExtensions::Supports("GL_ARB_debug_output"); GLExtensions::Supports("GL_KHR_debug") || GLExtensions::Supports("GL_ARB_debug_output");
g_ogl_config.bSupports3DTextureStorage = g_ogl_config.bSupportsTextureStorage = GLExtensions::Supports("GL_ARB_texture_storage");
g_ogl_config.bSupports3DTextureStorageMultisample =
GLExtensions::Supports("GL_ARB_texture_storage_multisample") || GLExtensions::Supports("GL_ARB_texture_storage_multisample") ||
GLExtensions::Supports("GL_OES_texture_storage_multisample_2d_array"); GLExtensions::Supports("GL_OES_texture_storage_multisample_2d_array");
g_ogl_config.bSupports2DTextureStorage = g_ogl_config.bSupports2DTextureStorageMultisample =
GLExtensions::Supports("GL_ARB_texture_storage_multisample"); GLExtensions::Supports("GL_ARB_texture_storage_multisample");
g_ogl_config.bSupportsEarlyFragmentTests = g_ogl_config.bSupportsImageLoadStore = GLExtensions::Supports("GL_ARB_shader_image_load_store");
GLExtensions::Supports("GL_ARB_shader_image_load_store");
g_ogl_config.bSupportsConservativeDepth = GLExtensions::Supports("GL_ARB_conservative_depth"); g_ogl_config.bSupportsConservativeDepth = GLExtensions::Supports("GL_ARB_conservative_depth");
g_ogl_config.bSupportsAniso = GLExtensions::Supports("GL_EXT_texture_filter_anisotropic"); g_ogl_config.bSupportsAniso = GLExtensions::Supports("GL_EXT_texture_filter_anisotropic");
g_Config.backend_info.bSupportsComputeShaders = GLExtensions::Supports("GL_ARB_compute_shader");
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3) if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
{ {
@ -486,6 +487,7 @@ Renderer::Renderer()
{ {
g_ogl_config.eSupportedGLSLVersion = GLSLES_300; g_ogl_config.eSupportedGLSLVersion = GLSLES_300;
g_ogl_config.bSupportsAEP = false; g_ogl_config.bSupportsAEP = false;
g_ogl_config.bSupportsTextureStorage = true;
g_Config.backend_info.bSupportsGeometryShaders = false; g_Config.backend_info.bSupportsGeometryShaders = false;
} }
else if (GLExtensions::Version() == 310) else if (GLExtensions::Version() == 310)
@ -493,16 +495,18 @@ Renderer::Renderer()
g_ogl_config.eSupportedGLSLVersion = GLSLES_310; g_ogl_config.eSupportedGLSLVersion = GLSLES_310;
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a"); g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
g_Config.backend_info.bSupportsBindingLayout = true; g_Config.backend_info.bSupportsBindingLayout = true;
g_ogl_config.bSupportsEarlyFragmentTests = true; g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsGeometryShaders = g_ogl_config.bSupportsAEP; g_Config.backend_info.bSupportsGeometryShaders = g_ogl_config.bSupportsAEP;
g_Config.backend_info.bSupportsComputeShaders = true;
g_Config.backend_info.bSupportsGSInstancing = g_Config.backend_info.bSupportsGSInstancing =
g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0;
g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP; g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP;
g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true; g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true;
g_ogl_config.bSupportsMSAA = true; g_ogl_config.bSupportsMSAA = true;
g_ogl_config.bSupports2DTextureStorage = true; g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupports2DTextureStorageMultisample = true;
if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 && if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 &&
!g_ogl_config.bSupports3DTextureStorage) !g_ogl_config.bSupports3DTextureStorageMultisample)
{ {
// GLES 3.1 can't support stereo rendering and MSAA // GLES 3.1 can't support stereo rendering and MSAA
OSD::AddMessage("MSAA Stereo rendering isn't supported by your GPU.", 10000); OSD::AddMessage("MSAA Stereo rendering isn't supported by your GPU.", 10000);
@ -514,8 +518,9 @@ Renderer::Renderer()
g_ogl_config.eSupportedGLSLVersion = GLSLES_320; g_ogl_config.eSupportedGLSLVersion = GLSLES_320;
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a"); g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
g_Config.backend_info.bSupportsBindingLayout = true; g_Config.backend_info.bSupportsBindingLayout = true;
g_ogl_config.bSupportsEarlyFragmentTests = true; g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = true;
g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0; g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0;
g_Config.backend_info.bSupportsPaletteConversion = true; g_Config.backend_info.bSupportsPaletteConversion = true;
g_Config.backend_info.bSupportsSSAA = true; g_Config.backend_info.bSupportsSSAA = true;
@ -524,8 +529,9 @@ Renderer::Renderer()
g_ogl_config.bSupportsGLBaseVertex = true; g_ogl_config.bSupportsGLBaseVertex = true;
g_ogl_config.bSupportsDebug = true; g_ogl_config.bSupportsDebug = true;
g_ogl_config.bSupportsMSAA = true; g_ogl_config.bSupportsMSAA = true;
g_ogl_config.bSupports2DTextureStorage = true; g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupports3DTextureStorage = true; g_ogl_config.bSupports2DTextureStorageMultisample = true;
g_ogl_config.bSupports3DTextureStorageMultisample = true;
} }
} }
else else
@ -541,8 +547,7 @@ Renderer::Renderer()
else if (GLExtensions::Version() == 300) else if (GLExtensions::Version() == 300)
{ {
g_ogl_config.eSupportedGLSLVersion = GLSL_130; g_ogl_config.eSupportedGLSLVersion = GLSL_130;
g_ogl_config.bSupportsEarlyFragmentTests = g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsConservativeDepth = g_ogl_config.bSupportsConservativeDepth =
false; // layout keyword is only supported on glsl150+ false; // layout keyword is only supported on glsl150+
g_Config.backend_info.bSupportsGeometryShaders = g_Config.backend_info.bSupportsGeometryShaders =
@ -551,8 +556,7 @@ Renderer::Renderer()
else if (GLExtensions::Version() == 310) else if (GLExtensions::Version() == 310)
{ {
g_ogl_config.eSupportedGLSLVersion = GLSL_140; g_ogl_config.eSupportedGLSLVersion = GLSL_140;
g_ogl_config.bSupportsEarlyFragmentTests = g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
false; // layout keyword is only supported on glsl150+
g_ogl_config.bSupportsConservativeDepth = g_ogl_config.bSupportsConservativeDepth =
false; // layout keyword is only supported on glsl150+ false; // layout keyword is only supported on glsl150+
g_Config.backend_info.bSupportsGeometryShaders = g_Config.backend_info.bSupportsGeometryShaders =
@ -566,10 +570,28 @@ Renderer::Renderer()
{ {
g_ogl_config.eSupportedGLSLVersion = GLSL_330; g_ogl_config.eSupportedGLSLVersion = GLSL_330;
} }
else if (GLExtensions::Version() >= 430)
{
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
g_ogl_config.eSupportedGLSLVersion = GLSL_430;
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
g_Config.backend_info.bSupportsSSAA = true;
// Compute shaders are core in GL4.3.
g_Config.backend_info.bSupportsComputeShaders = true;
}
else else
{ {
g_ogl_config.eSupportedGLSLVersion = GLSL_400; g_ogl_config.eSupportedGLSLVersion = GLSL_400;
g_Config.backend_info.bSupportsSSAA = true; g_Config.backend_info.bSupportsSSAA = true;
if (GLExtensions::Version() == 420)
{
// Texture storage and shader image load/store are core in GL4.2.
g_ogl_config.bSupportsTextureStorage = true;
g_ogl_config.bSupportsImageLoadStore = true;
}
} }
// Desktop OpenGL can't have the Android Extension Pack // Desktop OpenGL can't have the Android Extension Pack
@ -578,12 +600,19 @@ Renderer::Renderer()
// Either method can do early-z tests. See PixelShaderGen for details. // Either method can do early-z tests. See PixelShaderGen for details.
g_Config.backend_info.bSupportsEarlyZ = g_Config.backend_info.bSupportsEarlyZ =
g_ogl_config.bSupportsEarlyFragmentTests || g_ogl_config.bSupportsConservativeDepth; g_ogl_config.bSupportsImageLoadStore || g_ogl_config.bSupportsConservativeDepth;
glGetIntegerv(GL_MAX_SAMPLES, &g_ogl_config.max_samples); glGetIntegerv(GL_MAX_SAMPLES, &g_ogl_config.max_samples);
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA) if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
g_ogl_config.max_samples = 1; g_ogl_config.max_samples = 1;
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
// enabled in the version check below.
g_Config.backend_info.bSupportsGPUTextureDecoding =
g_Config.backend_info.bSupportsPaletteConversion &&
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
if (g_ogl_config.bSupportsDebug) if (g_ogl_config.bSupportsDebug)
{ {
if (GLExtensions::Supports("GL_KHR_debug")) if (GLExtensions::Supports("GL_KHR_debug"))

View File

@ -23,6 +23,7 @@ enum GLSL_VERSION
GLSL_150, GLSL_150,
GLSL_330, GLSL_330,
GLSL_400, // and above GLSL_400, // and above
GLSL_430,
GLSLES_300, // GLES 3.0 GLSLES_300, // GLES 3.0
GLSLES_310, // GLES 3.1 GLSLES_310, // GLES 3.1
GLSLES_320, // GLES 3.2 GLSLES_320, // GLES 3.2
@ -51,10 +52,11 @@ struct VideoConfig
bool bSupportsCopySubImage; bool bSupportsCopySubImage;
u8 SupportedESPointSize; u8 SupportedESPointSize;
ES_TEXBUF_TYPE SupportedESTextureBuffer; ES_TEXBUF_TYPE SupportedESTextureBuffer;
bool bSupports2DTextureStorage; bool bSupportsTextureStorage;
bool bSupports3DTextureStorage; bool bSupports2DTextureStorageMultisample;
bool bSupportsEarlyFragmentTests; bool bSupports3DTextureStorageMultisample;
bool bSupportsConservativeDepth; bool bSupportsConservativeDepth;
bool bSupportsImageLoadStore;
bool bSupportsAniso; bool bSupportsAniso;
const char* gl_vendor; const char* gl_vendor;

View File

@ -16,6 +16,7 @@
#include "Common/StringUtil.h" #include "Common/StringUtil.h"
#include "VideoBackends/OGL/FramebufferManager.h" #include "VideoBackends/OGL/FramebufferManager.h"
#include "VideoBackends/OGL/GPUTimer.h"
#include "VideoBackends/OGL/ProgramShaderCache.h" #include "VideoBackends/OGL/ProgramShaderCache.h"
#include "VideoBackends/OGL/Render.h" #include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/SamplerCache.h" #include "VideoBackends/OGL/SamplerCache.h"
@ -23,6 +24,7 @@
#include "VideoBackends/OGL/TextureConverter.h" #include "VideoBackends/OGL/TextureConverter.h"
#include "VideoCommon/ImageWrite.h" #include "VideoCommon/ImageWrite.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureDecoder.h" #include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
@ -49,6 +51,26 @@ static GLuint s_palette_buffer_offset_uniform[3];
static GLuint s_palette_multiplier_uniform[3]; static GLuint s_palette_multiplier_uniform[3];
static GLuint s_palette_copy_position_uniform[3]; static GLuint s_palette_copy_position_uniform[3];
struct TextureDecodingProgramInfo
{
const TextureConversionShader::DecodingShaderInfo* base_info = nullptr;
SHADER program;
GLint uniform_dst_size = -1;
GLint uniform_src_size = -1;
GLint uniform_src_row_stride = -1;
GLint uniform_src_offset = -1;
GLint uniform_palette_offset = -1;
bool valid = false;
};
//#define TIME_TEXTURE_DECODING 1
static std::map<std::pair<u32, u32>, TextureDecodingProgramInfo> s_texture_decoding_program_info;
static std::array<GLuint, TextureConversionShader::BUFFER_FORMAT_COUNT>
s_texture_decoding_buffer_views;
static void CreateTextureDecodingResources();
static void DestroyTextureDecodingResources();
bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width, bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width,
int virtual_height, unsigned int level) int virtual_height, unsigned int level)
{ {
@ -119,12 +141,22 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConf
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAX_LEVEL, config.levels - 1); glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAX_LEVEL, config.levels - 1);
if (g_ogl_config.bSupportsTextureStorage)
{
glTexStorage3D(GL_TEXTURE_2D_ARRAY, config.levels, GL_RGBA8, config.width, config.height,
config.layers);
}
if (config.rendertarget) if (config.rendertarget)
{ {
for (u32 level = 0; level <= config.levels; level++) if (!g_ogl_config.bSupportsTextureStorage)
{ {
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, config.width, config.height, config.layers, for (u32 level = 0; level < config.levels; level++)
0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); {
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, std::max(config.width >> level, 1u),
std::max(config.height >> level, 1u), config.layers, 0, GL_RGBA,
GL_UNSIGNED_BYTE, nullptr);
}
} }
glGenFramebuffers(1, &entry->framebuffer); glGenFramebuffers(1, &entry->framebuffer);
FramebufferManager::SetFramebuffer(entry->framebuffer); FramebufferManager::SetFramebuffer(entry->framebuffer);
@ -187,8 +219,16 @@ void TextureCache::TCacheEntry::Load(const u8* buffer, u32 width, u32 height, u3
if (expanded_width != width) if (expanded_width != width)
glPixelStorei(GL_UNPACK_ROW_LENGTH, expanded_width); glPixelStorei(GL_UNPACK_ROW_LENGTH, expanded_width);
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, width, height, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, if (g_ogl_config.bSupportsTextureStorage)
buffer); {
glTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, GL_RGBA,
GL_UNSIGNED_BYTE, buffer);
}
else
{
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, width, height, 1, 0, GL_RGBA,
GL_UNSIGNED_BYTE, buffer);
}
if (expanded_width != width) if (expanded_width != width)
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
@ -267,26 +307,31 @@ TextureCache::TextureCache()
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{ {
s32 buffer_size = 1024 * 1024; s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1);
s32 buffer_size = buffer_size_mb * 1024 * 1024;
s32 max_buffer_size = 0; s32 max_buffer_size = 0;
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates // The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB
// is 65KB, we are asking for a 1MB buffer here. // buffer here. This buffer is also used as storage for undecoded textures when compute shader
// Make sure to check the maximum size and if it is below 1MB // texture decoding is enabled, in which case the requested size is 32MB.
// then use the maximum the hardware supports instead.
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size); glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
// Clamp the buffer size to the maximum size that the driver supports.
buffer_size = std::min(buffer_size, max_buffer_size); buffer_size = std::min(buffer_size, max_buffer_size);
s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size); s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size);
glGenTextures(1, &s_palette_resolv_texture); glGenTextures(1, &s_palette_resolv_texture);
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture); glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer); glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer);
CreateTextureDecodingResources();
} }
} }
TextureCache::~TextureCache() TextureCache::~TextureCache()
{ {
DeleteShaders(); DeleteShaders();
DestroyTextureDecodingResources();
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{ {
@ -588,4 +633,159 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc
FramebufferManager::SetFramebuffer(0); FramebufferManager::SetFramebuffer(0);
g_renderer->RestoreAPIState(); g_renderer->RestoreAPIState();
} }
static const std::string decoding_vertex_shader = R"(
void main()
{
vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);
gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);
}
)";
void CreateTextureDecodingResources()
{
static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = {
GL_R8UI, // BUFFER_FORMAT_R8_UINT
GL_R16UI, // BUFFER_FORMAT_R16_UINT
GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT
};
glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
s_texture_decoding_buffer_views.data());
for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++)
{
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]);
glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer);
}
}
void DestroyTextureDecodingResources()
{
glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
s_texture_decoding_buffer_views.data());
s_texture_decoding_buffer_views.fill(0);
s_texture_decoding_program_info.clear();
}
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
{
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key);
if (iter != s_texture_decoding_program_info.end())
return iter->second.valid;
TextureDecodingProgramInfo info;
info.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
if (!info.base_info)
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
std::string shader_source =
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
if (shader_source.empty())
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source))
{
s_texture_decoding_program_info.emplace(key, info);
return false;
}
info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size");
info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size");
info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset");
info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride");
info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset");
info.valid = true;
s_texture_decoding_program_info.emplace(key, info);
return true;
}
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
{
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key);
if (iter == s_texture_decoding_program_info.end())
return;
#ifdef TIME_TEXTURE_DECODING
GPUTimer timer;
#endif
// Copy to GPU-visible buffer, aligned to the data type.
auto info = iter->second;
u32 bytes_per_buffer_elem =
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
// Only copy palette if it is required.
bool has_palette = info.base_info->palette_size > 0;
u32 total_upload_size = static_cast<u32>(data_size);
u32 palette_offset = total_upload_size;
if (has_palette)
{
// Align to u16.
if ((total_upload_size % sizeof(u16)) != 0)
{
total_upload_size++;
palette_offset++;
}
total_upload_size += info.base_info->palette_size;
}
// Allocate space in stream buffer, and copy texture + palette across.
auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem);
memcpy(buffer.first, data, data_size);
if (has_palette)
memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size);
s_palette_stream_buffer->Unmap(total_upload_size);
info.program.Bind();
// Calculate stride in buffer elements
u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem;
u32 offset_in_elements = buffer.second / bytes_per_buffer_elem;
u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16);
if (info.uniform_dst_size >= 0)
glUniform2ui(info.uniform_dst_size, width, height);
if (info.uniform_src_size >= 0)
glUniform2ui(info.uniform_src_size, aligned_width, aligned_height);
if (info.uniform_src_offset >= 0)
glUniform1ui(info.uniform_src_offset, offset_in_elements);
if (info.uniform_src_row_stride >= 0)
glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements);
if (info.uniform_palette_offset >= 0)
glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements);
glActiveTexture(GL_TEXTURE9);
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]);
if (has_palette)
{
// Use an R16UI view for the palette.
glActiveTexture(GL_TEXTURE10);
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
}
auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, width, height);
glBindImageTexture(0, static_cast<TCacheEntry*>(entry)->texture, dst_level, GL_TRUE, 0,
GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1);
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
TextureCache::SetStage();
#ifdef TIME_TEXTURE_DECODING
WARN_LOG(VIDEO, "Decode texture format %u size %ux%u took %.4fms", static_cast<u32>(format),
width, height, timer.GetTimeMilliseconds());
#endif
}
} }

View File

@ -23,6 +23,12 @@ public:
static void DisableStage(unsigned int stage); static void DisableStage(unsigned int stage);
static void SetStage(); static void SetStage();
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override;
private: private:
struct TCacheEntry : TCacheEntryBase struct TCacheEntry : TCacheEntryBase
{ {

View File

@ -101,6 +101,7 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsExclusiveFullscreen = false; g_Config.backend_info.bSupportsExclusiveFullscreen = false;
g_Config.backend_info.bSupportsOversizedViewports = true; g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsGeometryShaders = true; g_Config.backend_info.bSupportsGeometryShaders = true;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupports3DVision = false; g_Config.backend_info.bSupports3DVision = false;
g_Config.backend_info.bSupportsPostProcessing = true; g_Config.backend_info.bSupportsPostProcessing = true;
g_Config.backend_info.bSupportsSSAA = true; g_Config.backend_info.bSupportsSSAA = true;
@ -108,6 +109,11 @@ void VideoBackend::InitBackendInfo()
g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true; g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true;
// TODO: There is a bug here, if texel buffers are not supported the graphics options
// will show the option when it is not supported. The only way around this would be
// creating a context when calling this function to determine what is available.
g_Config.backend_info.bSupportsGPUTextureDecoding = true;
// Overwritten in Render.cpp later // Overwritten in Render.cpp later
g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsDualSourceBlend = true;
g_Config.backend_info.bSupportsPrimitiveRestart = true; g_Config.backend_info.bSupportsPrimitiveRestart = true;

View File

@ -131,7 +131,9 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsOversizedViewports = true; g_Config.backend_info.bSupportsOversizedViewports = true;
g_Config.backend_info.bSupportsPrimitiveRestart = false; g_Config.backend_info.bSupportsPrimitiveRestart = false;
g_Config.backend_info.bSupportsMultithreading = false; g_Config.backend_info.bSupportsMultithreading = false;
g_Config.backend_info.bSupportsComputeShaders = false;
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false; g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
// aamodes // aamodes
g_Config.backend_info.AAModes = {1}; g_Config.backend_info.AAModes = {1};

View File

@ -91,7 +91,8 @@ bool CommandBufferManager::CreateCommandBuffers()
VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000}, VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000},
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000}, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000},
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16}, {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16},
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024}}; {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024},
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024}};
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
nullptr, nullptr,

View File

@ -30,6 +30,7 @@ enum DESCRIPTOR_SET_LAYOUT
DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS, DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS,
DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS, DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS,
DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS, DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS,
DESCRIPTOR_SET_LAYOUT_COMPUTE,
NUM_DESCRIPTOR_SET_LAYOUTS NUM_DESCRIPTOR_SET_LAYOUTS
}; };
@ -52,6 +53,12 @@ enum DESCRIPTOR_SET_BIND_POINT
// - Same as standard, plus 128 bytes of push constants, accessible from all stages. // - Same as standard, plus 128 bytes of push constants, accessible from all stages.
// - Texture Decoding // - Texture Decoding
// - Same as push constant, plus a single texel buffer accessible from PS. // - Same as push constant, plus a single texel buffer accessible from PS.
// - Compute
// - 1 uniform buffer [set=0, binding=0]
// - 4 combined image samplers [set=0, binding=1-4]
// - 1 texel buffer [set=0, binding=5]
// - 1 storage image [set=0, binding=6]
// - 128 bytes of push constants
// //
// All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers). // All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers).
// The third descriptor set (see bind points above) is used for storage or texel buffers. // The third descriptor set (see bind points above) is used for storage or texel buffers.
@ -62,6 +69,7 @@ enum PIPELINE_LAYOUT
PIPELINE_LAYOUT_BBOX, PIPELINE_LAYOUT_BBOX,
PIPELINE_LAYOUT_PUSH_CONSTANT, PIPELINE_LAYOUT_PUSH_CONSTANT,
PIPELINE_LAYOUT_TEXTURE_CONVERSION, PIPELINE_LAYOUT_TEXTURE_CONVERSION,
PIPELINE_LAYOUT_COMPUTE,
NUM_PIPELINE_LAYOUTS NUM_PIPELINE_LAYOUTS
}; };

View File

@ -324,6 +324,41 @@ std::pair<VkPipeline, bool> ObjectCache::GetPipelineWithCacheResult(const Pipeli
return {pipeline, false}; return {pipeline, false};
} }
VkPipeline ObjectCache::CreateComputePipeline(const ComputePipelineInfo& info)
{
VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
nullptr,
0,
{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, info.cs,
"main", nullptr},
info.pipeline_layout,
VK_NULL_HANDLE,
-1};
VkPipeline pipeline;
VkResult res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), VK_NULL_HANDLE, 1,
&pipeline_info, nullptr, &pipeline);
if (res != VK_SUCCESS)
{
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: ");
return VK_NULL_HANDLE;
}
return pipeline;
}
VkPipeline ObjectCache::GetComputePipeline(const ComputePipelineInfo& info)
{
auto iter = m_compute_pipeline_objects.find(info);
if (iter != m_compute_pipeline_objects.end())
return iter->second;
VkPipeline pipeline = CreateComputePipeline(info);
m_compute_pipeline_objects.emplace(info, pipeline);
return pipeline;
}
std::string ObjectCache::GetDiskCacheFileName(const char* type) std::string ObjectCache::GetDiskCacheFileName(const char* type)
{ {
return StringFromFormat("%svulkan-%s-%s.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(), return StringFromFormat("%svulkan-%s-%s.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
@ -477,6 +512,13 @@ void ObjectCache::DestroyPipelineCache()
} }
m_pipeline_objects.clear(); m_pipeline_objects.clear();
for (const auto& it : m_compute_pipeline_objects)
{
if (it.second != VK_NULL_HANDLE)
vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr);
}
m_compute_pipeline_objects.clear();
vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr); vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr);
m_pipeline_cache = VK_NULL_HANDLE; m_pipeline_cache = VK_NULL_HANDLE;
} }
@ -725,6 +767,17 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT}, {0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
}; };
static const VkDescriptorSetLayoutBinding compute_set_bindings[] = {
{0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{5, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{6, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT},
};
static const VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = { static const VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = {
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(ubo_set_bindings)), ubo_set_bindings}, static_cast<u32>(ArraySize(ubo_set_bindings)), ubo_set_bindings},
@ -733,7 +786,9 @@ bool ObjectCache::CreateDescriptorSetLayouts()
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(ssbo_set_bindings)), ssbo_set_bindings}, static_cast<u32>(ArraySize(ssbo_set_bindings)), ssbo_set_bindings},
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0, {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings}}; static_cast<u32>(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings},
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(compute_set_bindings)), compute_set_bindings}};
for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++) for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++)
{ {
@ -774,8 +829,11 @@ bool ObjectCache::CreatePipelineLayouts()
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UNIFORM_BUFFERS], m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UNIFORM_BUFFERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS], m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS],
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS]}; m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS]};
VkDescriptorSetLayout compute_sets[] = {m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]};
VkPushConstantRange push_constant_range = { VkPushConstantRange push_constant_range = {
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE}; VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE};
VkPushConstantRange compute_push_constant_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0,
PUSH_CONSTANT_BUFFER_SIZE};
// Info for each pipeline layout // Info for each pipeline layout
VkPipelineLayoutCreateInfo pipeline_layout_info[NUM_PIPELINE_LAYOUTS] = { VkPipelineLayoutCreateInfo pipeline_layout_info[NUM_PIPELINE_LAYOUTS] = {
@ -794,7 +852,11 @@ bool ObjectCache::CreatePipelineLayouts()
// Texture Conversion // Texture Conversion
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0, {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(texture_conversion_sets)), texture_conversion_sets, 1, static_cast<u32>(ArraySize(texture_conversion_sets)), texture_conversion_sets, 1,
&push_constant_range}}; &push_constant_range},
// Compute
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
static_cast<u32>(ArraySize(compute_sets)), compute_sets, 1, &compute_push_constant_range}};
for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++) for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++)
{ {
@ -1007,6 +1069,31 @@ bool operator<(const SamplerState& lhs, const SamplerState& rhs)
return lhs.bits < rhs.bits; return lhs.bits < rhs.bits;
} }
std::size_t ComputePipelineInfoHash::operator()(const ComputePipelineInfo& key) const
{
return static_cast<std::size_t>(XXH64(&key, sizeof(key), 0));
}
bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
{
return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
}
bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
{
return !operator==(lhs, rhs);
}
bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
{
return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0;
}
bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
{
return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0;
}
bool ObjectCache::CompileSharedShaders() bool ObjectCache::CompileSharedShaders()
{ {
static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"( static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"(

View File

@ -56,6 +56,22 @@ bool operator!=(const SamplerState& lhs, const SamplerState& rhs);
bool operator>(const SamplerState& lhs, const SamplerState& rhs); bool operator>(const SamplerState& lhs, const SamplerState& rhs);
bool operator<(const SamplerState& lhs, const SamplerState& rhs); bool operator<(const SamplerState& lhs, const SamplerState& rhs);
struct ComputePipelineInfo
{
VkPipelineLayout pipeline_layout;
VkShaderModule cs;
};
struct ComputePipelineInfoHash
{
std::size_t operator()(const ComputePipelineInfo& key) const;
};
bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
class ObjectCache class ObjectCache
{ {
public: public:
@ -114,6 +130,12 @@ public:
// otherwise for a cache hit it will be true. // otherwise for a cache hit it will be true.
std::pair<VkPipeline, bool> GetPipelineWithCacheResult(const PipelineInfo& info); std::pair<VkPipeline, bool> GetPipelineWithCacheResult(const PipelineInfo& info);
// Creates a compute pipeline, and does not track the handle.
VkPipeline CreateComputePipeline(const ComputePipelineInfo& info);
// Find a pipeline by the specified description, if not found, attempts to create it
VkPipeline GetComputePipeline(const ComputePipelineInfo& info);
// Saves the pipeline cache to disk. Call when shutting down. // Saves the pipeline cache to disk. Call when shutting down.
void SavePipelineCache(); void SavePipelineCache();
@ -166,6 +188,8 @@ private:
ShaderCache<PixelShaderUid> m_ps_cache; ShaderCache<PixelShaderUid> m_ps_cache;
std::unordered_map<PipelineInfo, VkPipeline, PipelineInfoHash> m_pipeline_objects; std::unordered_map<PipelineInfo, VkPipeline, PipelineInfoHash> m_pipeline_objects;
std::unordered_map<ComputePipelineInfo, VkPipeline, ComputePipelineInfoHash>
m_compute_pipeline_objects;
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE; VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
std::string m_pipeline_cache_filename; std::string m_pipeline_cache_filename;

View File

@ -35,7 +35,7 @@ static const TBuiltInResource* GetCompilerResourceLimits();
// Compile a shader to SPIR-V via glslang // Compile a shader to SPIR-V via glslang
static bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, static bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage,
const char* stage_filename, const char* source_code, const char* stage_filename, const char* source_code,
size_t source_code_length, bool prepend_header); size_t source_code_length, const char* header, size_t header_length);
// Regarding the UBO bind points, we subtract one from the binding index because // Regarding the UBO bind points, we subtract one from the binding index because
// the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV). // the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV).
@ -73,9 +73,32 @@ static const char SHADER_HEADER[] = R"(
#define gl_VertexID gl_VertexIndex #define gl_VertexID gl_VertexIndex
#define gl_InstanceID gl_InstanceIndex #define gl_InstanceID gl_InstanceIndex
)"; )";
static const char COMPUTE_SHADER_HEADER[] = R"(
// Target GLSL 4.5.
#version 450 core
// All resources are packed into one descriptor set for compute.
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (0 + x))
#define SAMPLER_BINDING(x) layout(set = 0, binding = (1 + x))
#define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (5 + x))
#define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (7 + x))
// hlsl to glsl function translation
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define uint2 uvec2
#define uint3 uvec3
#define uint4 uvec4
#define int2 ivec2
#define int3 ivec3
#define int4 ivec4
#define frac fract
#define lerp mix
)";
bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename, bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename,
const char* source_code, size_t source_code_length, bool prepend_header) const char* source_code, size_t source_code_length, const char* header,
size_t header_length)
{ {
if (!InitializeGlslang()) if (!InitializeGlslang())
return false; return false;
@ -91,10 +114,10 @@ bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char
std::string full_source_code; std::string full_source_code;
const char* pass_source_code = source_code; const char* pass_source_code = source_code;
int pass_source_code_length = static_cast<int>(source_code_length); int pass_source_code_length = static_cast<int>(source_code_length);
if (prepend_header) if (header_length > 0)
{ {
full_source_code.reserve(sizeof(SHADER_HEADER) + source_code_length); full_source_code.reserve(header_length + source_code_length);
full_source_code.append(SHADER_HEADER, sizeof(SHADER_HEADER) - 1); full_source_code.append(header, header_length);
full_source_code.append(source_code, source_code_length); full_source_code.append(source_code, source_code_length);
pass_source_code = full_source_code.c_str(); pass_source_code = full_source_code.c_str();
pass_source_code_length = static_cast<int>(full_source_code.length()); pass_source_code_length = static_cast<int>(full_source_code.length());
@ -318,21 +341,28 @@ bool CompileVertexShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header) size_t source_code_length, bool prepend_header)
{ {
return CompileShaderToSPV(out_code, EShLangVertex, "vs", source_code, source_code_length, return CompileShaderToSPV(out_code, EShLangVertex, "vs", source_code, source_code_length,
prepend_header); SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
} }
bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code, bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header) size_t source_code_length, bool prepend_header)
{ {
return CompileShaderToSPV(out_code, EShLangGeometry, "gs", source_code, source_code_length, return CompileShaderToSPV(out_code, EShLangGeometry, "gs", source_code, source_code_length,
prepend_header); SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
} }
bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code, bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header) size_t source_code_length, bool prepend_header)
{ {
return CompileShaderToSPV(out_code, EShLangFragment, "ps", source_code, source_code_length, return CompileShaderToSPV(out_code, EShLangFragment, "ps", source_code, source_code_length,
prepend_header); SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
}
bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header)
{
return CompileShaderToSPV(out_code, EShLangCompute, "cs", source_code, source_code_length,
COMPUTE_SHADER_HEADER, sizeof(COMPUTE_SHADER_HEADER) - 1);
} }
} // namespace ShaderCompiler } // namespace ShaderCompiler

View File

@ -29,5 +29,9 @@ bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code,
bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code, bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header = true); size_t source_code_length, bool prepend_header = true);
// Compile a compute shader to SPIR-V.
bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code,
size_t source_code_length, bool prepend_header = true);
} // namespace ShaderCompiler } // namespace ShaderCompiler
} // namespace Vulkan } // namespace Vulkan

View File

@ -4,6 +4,7 @@
#include <algorithm> #include <algorithm>
#include "Common/Assert.h"
#include "VideoBackends/Vulkan/CommandBufferManager.h" #include "VideoBackends/Vulkan/CommandBufferManager.h"
#include "VideoBackends/Vulkan/Texture2D.h" #include "VideoBackends/Vulkan/Texture2D.h"
#include "VideoBackends/Vulkan/VulkanContext.h" #include "VideoBackends/Vulkan/VulkanContext.h"
@ -273,10 +274,132 @@ void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout
break; break;
} }
// If we were using a compute layout, the stages need to reflect that
switch (m_compute_layout)
{
case ComputeImageLayout::Undefined:
break;
case ComputeImageLayout::ReadOnly:
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case ComputeImageLayout::WriteOnly:
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case ComputeImageLayout::ReadWrite:
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
}
m_compute_layout = ComputeImageLayout::Undefined;
vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1, vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1,
&barrier); &barrier);
m_layout = new_layout; m_layout = new_layout;
} }
void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout)
{
_assert_(new_layout != ComputeImageLayout::Undefined);
if (m_compute_layout == new_layout)
return;
VkImageMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
nullptr, // const void* pNext
0, // VkAccessFlags srcAccessMask
0, // VkAccessFlags dstAccessMask
m_layout, // VkImageLayout oldLayout
VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
m_image, // VkImage image
{static_cast<VkImageAspectFlags>(Util::IsDepthFormat(m_format) ? VK_IMAGE_ASPECT_DEPTH_BIT :
VK_IMAGE_ASPECT_COLOR_BIT),
0, m_levels, 0, m_layers} // VkImageSubresourceRange subresourceRange
};
VkPipelineStageFlags srcStageMask, dstStageMask;
switch (m_layout)
{
case VK_IMAGE_LAYOUT_UNDEFINED:
// Layout undefined therefore contents undefined, and we don't care what happens to it.
barrier.srcAccessMask = 0;
srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
break;
case VK_IMAGE_LAYOUT_PREINITIALIZED:
// Image has been pre-initialized by the host, so ensure all writes have completed.
barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_HOST_BIT;
break;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
// Image was being used as a color attachment, so ensure all writes have completed.
barrier.srcAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
barrier.srcAccessMask =
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
break;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
// Image was being used as a shader resource, make sure all reads have finished.
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
// Image was being used as a copy source, ensure all reads have finished.
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
// Image was being used as a copy destination, ensure all writes have finished.
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
break;
default:
srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
break;
}
switch (new_layout)
{
case ComputeImageLayout::ReadOnly:
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case ComputeImageLayout::WriteOnly:
barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
case ComputeImageLayout::ReadWrite:
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
break;
default:
dstStageMask = 0;
break;
}
m_layout = barrier.newLayout;
m_compute_layout = new_layout;
vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1,
&barrier);
}
} // namespace Vulkan } // namespace Vulkan

View File

@ -17,6 +17,15 @@ class ObjectCache;
class Texture2D class Texture2D
{ {
public: public:
// Custom image layouts, mainly used for switching to/from compute
enum class ComputeImageLayout
{
Undefined,
ReadOnly,
WriteOnly,
ReadWrite
};
Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format, Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format,
VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image, VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image,
VkDeviceMemory device_memory, VkImageView view); VkDeviceMemory device_memory, VkImageView view);
@ -50,6 +59,7 @@ public:
void OverrideImageLayout(VkImageLayout new_layout); void OverrideImageLayout(VkImageLayout new_layout);
void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout); void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout);
void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout);
private: private:
u32 m_width; u32 m_width;
@ -60,6 +70,7 @@ private:
VkSampleCountFlagBits m_samples; VkSampleCountFlagBits m_samples;
VkImageViewType m_view_type; VkImageViewType m_view_type;
VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED; VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED;
ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined;
VkImage m_image; VkImage m_image;
VkDeviceMemory m_device_memory; VkDeviceMemory m_device_memory;

View File

@ -138,6 +138,21 @@ void TextureCache::CopyRectangleFromTexture(TCacheEntry* dst_texture,
ScaleTextureRectangle(dst_texture, dst_rect, src_texture, src_rect); ScaleTextureRectangle(dst_texture, dst_rect, src_texture, src_rect);
} }
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
{
return m_texture_converter->SupportsTextureDecoding(format, palette_format);
}
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
{
m_texture_converter->DecodeTexture(static_cast<TCacheEntry*>(entry), dst_level, data, data_size,
format, width, height, aligned_width, aligned_height,
row_stride, palette, palette_format);
}
void TextureCache::CopyTextureRectangle(TCacheEntry* dst_texture, void TextureCache::CopyTextureRectangle(TCacheEntry* dst_texture,
const MathUtil::Rectangle<int>& dst_rect, const MathUtil::Rectangle<int>& dst_rect,
Texture2D* src_texture, Texture2D* src_texture,

View File

@ -66,6 +66,13 @@ public:
void CopyRectangleFromTexture(TCacheEntry* dst_texture, const MathUtil::Rectangle<int>& dst_rect, void CopyRectangleFromTexture(TCacheEntry* dst_texture, const MathUtil::Rectangle<int>& dst_rect,
Texture2D* src_texture, const MathUtil::Rectangle<int>& src_rect); Texture2D* src_texture, const MathUtil::Rectangle<int>& src_rect);
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override;
private: private:
bool CreateRenderPasses(); bool CreateRenderPasses();

View File

@ -42,8 +42,12 @@ TextureConverter::~TextureConverter()
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it, nullptr); vkDestroyShaderModule(g_vulkan_context->GetDevice(), it, nullptr);
} }
if (m_texel_buffer_view_r8_uint != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r8_uint, nullptr);
if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE) if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr); vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr);
if (m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r32g32_uint, nullptr);
if (m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE) if (m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE)
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_unorm, nullptr); vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_unorm, nullptr);
@ -59,6 +63,12 @@ TextureConverter::~TextureConverter()
vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr); vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr);
} }
for (const auto& it : m_decoding_pipelines)
{
if (it.second.compute_shader != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.compute_shader, nullptr);
}
if (m_rgb_to_yuyv_shader != VK_NULL_HANDLE) if (m_rgb_to_yuyv_shader != VK_NULL_HANDLE)
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_rgb_to_yuyv_shader, nullptr); vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_rgb_to_yuyv_shader, nullptr);
if (m_yuyv_to_rgb_shader != VK_NULL_HANDLE) if (m_yuyv_to_rgb_shader != VK_NULL_HANDLE)
@ -103,6 +113,12 @@ bool TextureConverter::Initialize()
return false; return false;
} }
if (!CreateDecodingTexture())
{
PanicAlert("Failed to create decoding texture");
return false;
}
if (!CompileYUYVConversionShaders()) if (!CompileYUYVConversionShaders())
{ {
PanicAlert("Failed to compile YUYV conversion shaders"); PanicAlert("Failed to compile YUYV conversion shaders");
@ -371,6 +387,152 @@ void TextureConverter::DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* ds
draw.EndRenderPass(); draw.EndRenderPass();
} }
bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format)
{
auto key = std::make_pair(format, palette_format);
auto iter = m_decoding_pipelines.find(key);
if (iter != m_decoding_pipelines.end())
return iter->second.valid;
TextureDecodingPipeline pipeline;
pipeline.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
pipeline.compute_shader = VK_NULL_HANDLE;
pipeline.valid = false;
if (!pipeline.base_info)
{
m_decoding_pipelines.emplace(key, pipeline);
return false;
}
std::string shader_source =
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::Vulkan);
pipeline.compute_shader = Util::CompileAndCreateComputeShader(shader_source, true);
if (pipeline.compute_shader == VK_NULL_HANDLE)
{
m_decoding_pipelines.emplace(key, pipeline);
return false;
}
pipeline.valid = true;
m_decoding_pipelines.emplace(key, pipeline);
return true;
}
void TextureConverter::DecodeTexture(TextureCache::TCacheEntry* entry, u32 dst_level,
const u8* data, size_t data_size, TextureFormat format,
u32 width, u32 height, u32 aligned_width, u32 aligned_height,
u32 row_stride, const u8* palette, TlutFormat palette_format)
{
auto key = std::make_pair(format, palette_format);
auto iter = m_decoding_pipelines.find(key);
if (iter == m_decoding_pipelines.end())
return;
struct PushConstants
{
u32 dst_size[2];
u32 src_size[2];
u32 src_offset;
u32 src_row_stride;
u32 palette_offset;
};
// Copy to GPU-visible buffer, aligned to the data type
auto info = iter->second;
u32 bytes_per_buffer_elem =
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
// Calculate total data size, including palette.
// Only copy palette if it is required.
u32 total_upload_size = static_cast<u32>(data_size);
u32 palette_size = iter->second.base_info->palette_size;
u32 palette_offset = total_upload_size;
bool has_palette = palette_size > 0;
if (has_palette)
{
// Align to u16.
if ((total_upload_size % sizeof(u16)) != 0)
{
total_upload_size++;
palette_offset++;
}
total_upload_size += palette_size;
}
// Allocate space for upload, if it fails, execute the buffer.
if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem))
{
Util::ExecuteCurrentCommandsAndRestoreState(true, false);
if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem))
PanicAlert("Failed to reserve memory for encoded texture upload");
}
// Copy/commit upload buffer.
u32 texel_buffer_offset = static_cast<u32>(m_texel_buffer->GetCurrentOffset());
std::memcpy(m_texel_buffer->GetCurrentHostPointer(), data, data_size);
if (has_palette)
std::memcpy(m_texel_buffer->GetCurrentHostPointer() + palette_offset, palette, palette_size);
m_texel_buffer->CommitMemory(total_upload_size);
// Determine uniforms.
PushConstants constants = {
{width, height},
{aligned_width, aligned_height},
texel_buffer_offset / bytes_per_buffer_elem,
row_stride / bytes_per_buffer_elem,
static_cast<u32>((texel_buffer_offset + palette_offset) / sizeof(u16))};
// Determine view to use for texel buffers.
VkBufferView data_view = VK_NULL_HANDLE;
switch (iter->second.base_info->buffer_format)
{
case TextureConversionShader::BUFFER_FORMAT_R8_UINT:
data_view = m_texel_buffer_view_r8_uint;
break;
case TextureConversionShader::BUFFER_FORMAT_R16_UINT:
data_view = m_texel_buffer_view_r16_uint;
break;
case TextureConversionShader::BUFFER_FORMAT_R32G32_UINT:
data_view = m_texel_buffer_view_r32g32_uint;
break;
default:
break;
}
// Place compute shader dispatches together in the init command buffer.
// That way we don't have to pay a penalty for switching from graphics->compute,
// or end/restart our render pass.
VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentInitCommandBuffer();
// Dispatch compute to temporary texture.
ComputeShaderDispatcher dispatcher(command_buffer,
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE),
iter->second.compute_shader);
m_decoding_texture->TransitionToLayout(command_buffer, Texture2D::ComputeImageLayout::WriteOnly);
dispatcher.SetPushConstants(&constants, sizeof(constants));
dispatcher.SetStorageImage(m_decoding_texture->GetView(), m_decoding_texture->GetLayout());
dispatcher.SetTexelBuffer(0, data_view);
if (has_palette)
dispatcher.SetTexelBuffer(1, m_texel_buffer_view_r16_uint);
auto groups = TextureConversionShader::GetDispatchCount(iter->second.base_info, width, height);
dispatcher.Dispatch(groups.first, groups.second, 1);
// Copy from temporary texture to final destination.
m_decoding_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
entry->GetTexture()->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
VkImageCopy image_copy = {{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1},
{0, 0, 0},
{VK_IMAGE_ASPECT_COLOR_BIT, dst_level, 0, 1},
{0, 0, 0},
{width, height, 1}};
vkCmdCopyImage(command_buffer, m_decoding_texture->GetImage(),
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, entry->GetTexture()->GetImage(),
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy);
}
bool TextureConverter::CreateTexelBuffer() bool TextureConverter::CreateTexelBuffer()
{ {
// Prefer an 8MB buffer if possible, but use less if the device doesn't support this. // Prefer an 8MB buffer if possible, but use less if the device doesn't support this.
@ -386,9 +548,13 @@ bool TextureConverter::CreateTexelBuffer()
return false; return false;
// Create views of the formats that we will be using. // Create views of the formats that we will be using.
m_texel_buffer_view_r8_uint = CreateTexelBufferView(VK_FORMAT_R8_UINT);
m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT); m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT);
m_texel_buffer_view_r32g32_uint = CreateTexelBufferView(VK_FORMAT_R32G32_UINT);
m_texel_buffer_view_rgba8_unorm = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UNORM); m_texel_buffer_view_rgba8_unorm = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UNORM);
return m_texel_buffer_view_r16_uint != VK_NULL_HANDLE && return m_texel_buffer_view_r8_uint != VK_NULL_HANDLE &&
m_texel_buffer_view_r16_uint != VK_NULL_HANDLE &&
m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE &&
m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE; m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE;
} }
@ -611,6 +777,15 @@ bool TextureConverter::CreateEncodingDownloadTexture()
return m_encoding_download_texture && m_encoding_download_texture->Map(); return m_encoding_download_texture && m_encoding_download_texture->Map();
} }
bool TextureConverter::CreateDecodingTexture()
{
m_decoding_texture = Texture2D::Create(
DECODING_TEXTURE_WIDTH, DECODING_TEXTURE_HEIGHT, 1, 1, VK_FORMAT_R8G8B8A8_UNORM,
VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
return static_cast<bool>(m_decoding_texture);
}
bool TextureConverter::CompileYUYVConversionShaders() bool TextureConverter::CompileYUYVConversionShaders()
{ {
static const char RGB_TO_YUYV_SHADER_SOURCE[] = R"( static const char RGB_TO_YUYV_SHADER_SOURCE[] = R"(

View File

@ -5,11 +5,14 @@
#pragma once #pragma once
#include <array> #include <array>
#include <map>
#include <memory> #include <memory>
#include <utility>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoBackends/Vulkan/StreamBuffer.h" #include "VideoBackends/Vulkan/StreamBuffer.h"
#include "VideoBackends/Vulkan/TextureCache.h" #include "VideoBackends/Vulkan/TextureCache.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureDecoder.h" #include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
@ -45,6 +48,12 @@ public:
void DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* dst_texture, const void* src_ptr, void DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* dst_texture, const void* src_ptr,
u32 src_width, u32 src_stride, u32 src_height); u32 src_width, u32 src_stride, u32 src_height);
bool SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format);
void DecodeTexture(TextureCache::TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format);
private: private:
static const u32 NUM_TEXTURE_ENCODING_SHADERS = 64; static const u32 NUM_TEXTURE_ENCODING_SHADERS = 64;
static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4; static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4;
@ -52,6 +61,10 @@ private:
static const VkFormat ENCODING_TEXTURE_FORMAT = VK_FORMAT_B8G8R8A8_UNORM; static const VkFormat ENCODING_TEXTURE_FORMAT = VK_FORMAT_B8G8R8A8_UNORM;
static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3; static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3;
// Maximum size of a texture based on BP registers.
static const u32 DECODING_TEXTURE_WIDTH = 1024;
static const u32 DECODING_TEXTURE_HEIGHT = 1024;
bool CreateTexelBuffer(); bool CreateTexelBuffer();
VkBufferView CreateTexelBufferView(VkFormat format) const; VkBufferView CreateTexelBufferView(VkFormat format) const;
@ -62,6 +75,8 @@ private:
bool CreateEncodingTexture(); bool CreateEncodingTexture();
bool CreateEncodingDownloadTexture(); bool CreateEncodingDownloadTexture();
bool CreateDecodingTexture();
bool CompileYUYVConversionShaders(); bool CompileYUYVConversionShaders();
// Allocates storage in the texel command buffer of the specified size. // Allocates storage in the texel command buffer of the specified size.
@ -77,7 +92,9 @@ private:
// Shared between conversion types // Shared between conversion types
std::unique_ptr<StreamBuffer> m_texel_buffer; std::unique_ptr<StreamBuffer> m_texel_buffer;
VkBufferView m_texel_buffer_view_r8_uint = VK_NULL_HANDLE;
VkBufferView m_texel_buffer_view_r16_uint = VK_NULL_HANDLE; VkBufferView m_texel_buffer_view_r16_uint = VK_NULL_HANDLE;
VkBufferView m_texel_buffer_view_r32g32_uint = VK_NULL_HANDLE;
VkBufferView m_texel_buffer_view_rgba8_unorm = VK_NULL_HANDLE; VkBufferView m_texel_buffer_view_rgba8_unorm = VK_NULL_HANDLE;
size_t m_texel_buffer_size = 0; size_t m_texel_buffer_size = 0;
@ -91,6 +108,16 @@ private:
VkFramebuffer m_encoding_render_framebuffer = VK_NULL_HANDLE; VkFramebuffer m_encoding_render_framebuffer = VK_NULL_HANDLE;
std::unique_ptr<StagingTexture2D> m_encoding_download_texture; std::unique_ptr<StagingTexture2D> m_encoding_download_texture;
// Texture decoding - GX format in memory->RGBA8
struct TextureDecodingPipeline
{
const TextureConversionShader::DecodingShaderInfo* base_info;
VkShaderModule compute_shader;
bool valid;
};
std::map<std::pair<TextureFormat, TlutFormat>, TextureDecodingPipeline> m_decoding_pipelines;
std::unique_ptr<Texture2D> m_decoding_texture;
// XFB encoding/decoding shaders // XFB encoding/decoding shaders
VkShaderModule m_rgb_to_yuyv_shader = VK_NULL_HANDLE; VkShaderModule m_rgb_to_yuyv_shader = VK_NULL_HANDLE;
VkShaderModule m_yuyv_to_rgb_shader = VK_NULL_HANDLE; VkShaderModule m_yuyv_to_rgb_shader = VK_NULL_HANDLE;

View File

@ -250,6 +250,18 @@ VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code, bo
return CreateShaderModule(code.data(), code.size()); return CreateShaderModule(code.data(), code.size());
} }
VkShaderModule CompileAndCreateComputeShader(const std::string& source_code, bool prepend_header)
{
ShaderCompiler::SPIRVCodeVector code;
if (!ShaderCompiler::CompileComputeShader(&code, source_code.c_str(), source_code.length(),
prepend_header))
{
return VK_NULL_HANDLE;
}
return CreateShaderModule(code.data(), code.size());
}
} // namespace Util } // namespace Util
UtilityShaderDraw::UtilityShaderDraw(VkCommandBuffer command_buffer, UtilityShaderDraw::UtilityShaderDraw(VkCommandBuffer command_buffer,
@ -670,4 +682,157 @@ bool UtilityShaderDraw::BindPipeline()
return true; return true;
} }
ComputeShaderDispatcher::ComputeShaderDispatcher(VkCommandBuffer command_buffer,
VkPipelineLayout pipeline_layout,
VkShaderModule compute_shader)
: m_command_buffer(command_buffer)
{
// Populate minimal pipeline state
m_pipeline_info.pipeline_layout = pipeline_layout;
m_pipeline_info.cs = compute_shader;
}
u8* ComputeShaderDispatcher::AllocateUniformBuffer(size_t size)
{
if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory(
size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true))
PanicAlert("Failed to allocate util uniforms");
return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer();
}
void ComputeShaderDispatcher::CommitUniformBuffer(size_t size)
{
m_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer();
m_uniform_buffer.offset = 0;
m_uniform_buffer.range = size;
m_uniform_buffer_offset =
static_cast<u32>(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset());
g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size);
}
void ComputeShaderDispatcher::SetPushConstants(const void* data, size_t data_size)
{
_assert_(static_cast<u32>(data_size) < PUSH_CONSTANT_BUFFER_SIZE);
vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
0, static_cast<u32>(data_size), data);
}
void ComputeShaderDispatcher::SetSampler(size_t index, VkImageView view, VkSampler sampler)
{
m_samplers[index].sampler = sampler;
m_samplers[index].imageView = view;
m_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
void ComputeShaderDispatcher::SetStorageImage(VkImageView view, VkImageLayout image_layout)
{
m_storage_image.sampler = VK_NULL_HANDLE;
m_storage_image.imageView = view;
m_storage_image.imageLayout = image_layout;
}
void ComputeShaderDispatcher::SetTexelBuffer(size_t index, VkBufferView view)
{
m_texel_buffers[index] = view;
}
void ComputeShaderDispatcher::Dispatch(u32 groups_x, u32 groups_y, u32 groups_z)
{
BindDescriptors();
if (!BindPipeline())
return;
vkCmdDispatch(m_command_buffer, groups_x, groups_y, groups_z);
}
void ComputeShaderDispatcher::BindDescriptors()
{
VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(
g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE));
if (set == VK_NULL_HANDLE)
{
PanicAlert("Failed to allocate descriptor set for compute dispatch");
return;
}
// Reserve enough descriptors to write every binding.
std::array<VkWriteDescriptorSet, 7> set_writes = {};
u32 num_set_writes = 0;
if (m_uniform_buffer.buffer != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
0,
0,
1,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
nullptr,
&m_uniform_buffer,
nullptr};
}
// Samplers
for (size_t i = 0; i < m_samplers.size(); i++)
{
const VkDescriptorImageInfo& info = m_samplers[i];
if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
static_cast<u32>(1 + i),
0,
1,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
&info,
nullptr,
nullptr};
}
}
for (size_t i = 0; i < m_texel_buffers.size(); i++)
{
if (m_texel_buffers[i] != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 5 + static_cast<u32>(i), 0, 1,
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, nullptr, nullptr, &m_texel_buffers[i]};
}
}
if (m_storage_image.imageView != VK_NULL_HANDLE)
{
set_writes[num_set_writes++] = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 7, 0, 1,
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &m_storage_image, nullptr, nullptr};
}
if (num_set_writes > 0)
{
vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0,
nullptr);
}
vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
m_pipeline_info.pipeline_layout, 0, 1, &set, 1, &m_uniform_buffer_offset);
}
bool ComputeShaderDispatcher::BindPipeline()
{
VkPipeline pipeline = g_object_cache->GetComputePipeline(m_pipeline_info);
if (pipeline == VK_NULL_HANDLE)
{
PanicAlert("Failed to get pipeline for backend compute dispatch");
return false;
}
vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
return true;
}
} // namespace Vulkan } // namespace Vulkan

View File

@ -63,6 +63,10 @@ VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code,
// Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V. // Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V.
VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code, VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code,
bool prepend_header = true); bool prepend_header = true);
// Compile a compute shader and create a shader module, discarding the intermediate SPIR-V.
VkShaderModule CompileAndCreateComputeShader(const std::string& source_code,
bool prepend_header = true);
} }
// Utility shader vertex format // Utility shader vertex format
@ -188,4 +192,41 @@ private:
PipelineInfo m_pipeline_info = {}; PipelineInfo m_pipeline_info = {};
}; };
class ComputeShaderDispatcher
{
public:
ComputeShaderDispatcher(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout,
VkShaderModule compute_shader);
u8* AllocateUniformBuffer(size_t size);
void CommitUniformBuffer(size_t size);
void SetPushConstants(const void* data, size_t data_size);
void SetSampler(size_t index, VkImageView view, VkSampler sampler);
void SetTexelBuffer(size_t index, VkBufferView view);
void SetStorageImage(VkImageView view, VkImageLayout image_layout);
void Dispatch(u32 groups_x, u32 groups_y, u32 groups_z);
private:
void BindDescriptors();
bool BindPipeline();
VkCommandBuffer m_command_buffer = VK_NULL_HANDLE;
VkDescriptorBufferInfo m_uniform_buffer = {};
u32 m_uniform_buffer_offset = 0;
std::array<VkDescriptorImageInfo, 4> m_samplers = {};
std::array<VkBufferView, 2> m_texel_buffers = {};
VkDescriptorImageInfo m_storage_image = {};
ComputePipelineInfo m_pipeline_info = {};
};
} // namespace Vulkan } // namespace Vulkan

View File

@ -234,6 +234,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsPaletteConversion = true; // Assumed support. config->backend_info.bSupportsPaletteConversion = true; // Assumed support.
config->backend_info.bSupportsClipControl = true; // Assumed support. config->backend_info.bSupportsClipControl = true; // Assumed support.
config->backend_info.bSupportsMultithreading = true; // Assumed support. config->backend_info.bSupportsMultithreading = true; // Assumed support.
config->backend_info.bSupportsComputeShaders = true; // Assumed support.
config->backend_info.bSupportsGPUTextureDecoding = true; // Assumed support.
config->backend_info.bSupportsInternalResolutionFrameDumps = true; // Assumed support. config->backend_info.bSupportsInternalResolutionFrameDumps = true; // Assumed support.
config->backend_info.bSupportsPostProcessing = false; // No support yet. config->backend_info.bSupportsPostProcessing = false; // No support yet.
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features. config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.

View File

@ -110,7 +110,8 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config)
if (config.iSafeTextureCache_ColorSamples != backup_config.color_samples || if (config.iSafeTextureCache_ColorSamples != backup_config.color_samples ||
config.bTexFmtOverlayEnable != backup_config.texfmt_overlay || config.bTexFmtOverlayEnable != backup_config.texfmt_overlay ||
config.bTexFmtOverlayCenter != backup_config.texfmt_overlay_center || config.bTexFmtOverlayCenter != backup_config.texfmt_overlay_center ||
config.bHiresTextures != backup_config.hires_textures) config.bHiresTextures != backup_config.hires_textures ||
config.bEnableGPUTextureDecoding != backup_config.gpu_texture_decoding)
{ {
Invalidate(); Invalidate();
@ -209,6 +210,7 @@ void TextureCacheBase::SetBackupConfig(const VideoConfig& config)
backup_config.cache_hires_textures = config.bCacheHiresTextures; backup_config.cache_hires_textures = config.bCacheHiresTextures;
backup_config.stereo_3d = config.iStereoMode > 0; backup_config.stereo_3d = config.iStereoMode > 0;
backup_config.efb_mono_depth = config.bStereoEFBMonoDepth; backup_config.efb_mono_depth = config.bStereoEFBMonoDepth;
backup_config.gpu_texture_decoding = config.bEnableGPUTextureDecoding;
} }
TextureCacheBase::TCacheEntryBase* TextureCacheBase::ApplyPaletteToEntry(TCacheEntryBase* entry, TextureCacheBase::TCacheEntryBase* TextureCacheBase::ApplyPaletteToEntry(TCacheEntryBase* entry,
@ -526,6 +528,7 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
const u32 texture_size = const u32 texture_size =
TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat); TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
u32 bytes_per_block = (bsw * bsh * TexDecoder_GetTexelSizeInNibbles(texformat)) / 2;
u32 additional_mips_size = 0; // not including level 0, which is texture_size u32 additional_mips_size = 0; // not including level 0, which is texture_size
// GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in // GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in
@ -755,6 +758,17 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
// how many levels the allocated texture shall have // how many levels the allocated texture shall have
const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : tex_levels; const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : tex_levels;
// We can decode on the GPU if it is a supported format and the flag is enabled.
// Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate
// shader, however.
bool decode_on_gpu =
!hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
g_texture_cache->SupportsGPUTextureDecode(static_cast<TextureFormat>(texformat),
static_cast<TlutFormat>(tlutfmt)) &&
!(from_tmem && texformat == GX_TF_RGBA8);
// create the entry/texture // create the entry/texture
TCacheEntryConfig config; TCacheEntryConfig config;
config.width = width; config.width = width;
@ -768,10 +782,19 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
return nullptr; return nullptr;
if (!hires_tex) if (!hires_tex)
{
const u8* tlut = &texMem[tlutaddr];
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU(
entry, 0, src_data, texture_size, static_cast<TextureFormat>(texformat), width, height,
expandedWidth, expandedHeight, row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
}
else
{ {
if (!(texformat == GX_TF_RGBA8 && from_tmem)) if (!(texformat == GX_TF_RGBA8 && from_tmem))
{ {
const u8* tlut = &texMem[tlutaddr];
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut,
(TlutFormat)tlutfmt); (TlutFormat)tlutfmt);
} }
@ -781,6 +804,9 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE]; &texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight); TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
} }
entry->Load(temp, width, height, expandedWidth, 0);
}
} }
iter = textures_by_address.emplace(address, entry); iter = textures_by_address.emplace(address, entry);
@ -797,9 +823,6 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
entry->is_efb_copy = false; entry->is_efb_copy = false;
entry->is_custom_tex = hires_tex != nullptr; entry->is_custom_tex = hires_tex != nullptr;
// load texture
entry->Load(temp, width, height, expandedWidth, 0);
std::string basename = ""; std::string basename = "";
if (g_ActiveConfig.bDumpTextures && !hires_tex) if (g_ActiveConfig.bDumpTextures && !hires_tex)
{ {
@ -840,13 +863,26 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh); const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh);
const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data; const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data;
size_t mip_size =
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
const u8* tlut = &texMem[tlutaddr]; const u8* tlut = &texMem[tlutaddr];
if (decode_on_gpu)
{
u32 row_stride = bytes_per_block * (mip_width / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size,
static_cast<TextureFormat>(texformat), mip_width,
mip_height, expanded_mip_width, expanded_mip_height,
row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
}
else
{
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
tlut, (TlutFormat)tlutfmt); tlut, (TlutFormat)tlutfmt);
mip_src_data +=
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
entry->Load(temp, mip_width, mip_height, expanded_mip_width, level); entry->Load(temp, mip_width, mip_height, expanded_mip_width, level);
}
mip_src_data += mip_size;
if (g_ActiveConfig.bDumpTextures) if (g_ActiveConfig.bDumpTextures)
DumpTexture(entry, basename, level); DumpTexture(entry, basename, level);

View File

@ -171,6 +171,23 @@ public:
virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette, virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette,
TlutFormat format) = 0; TlutFormat format) = 0;
// Returns true if the texture data and palette formats are supported by the GPU decoder.
virtual bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
{
return false;
}
// Decodes the specified data to the GPU texture specified by entry.
// width, height are the size of the image in pixels.
// aligned_width, aligned_height are the size of the image in pixels, aligned to the block size.
// row_stride is the number of bytes for a row of blocks, not pixels.
virtual void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format)
{
}
protected: protected:
TextureCacheBase(); TextureCacheBase();
@ -225,6 +242,7 @@ private:
bool copy_cache_enable; bool copy_cache_enable;
bool stereo_3d; bool stereo_3d;
bool efb_mono_depth; bool efb_mono_depth;
bool gpu_texture_decoding;
}; };
BackupConfig backup_config = {}; BackupConfig backup_config = {};
}; };

View File

@ -2,9 +2,13 @@
// Licensed under GPLv2+ // Licensed under GPLv2+
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <array>
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
#include <map>
#include <sstream>
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
@ -720,4 +724,546 @@ const char* GenerateEncodingShader(u32 format, APIType ApiType)
return text; return text;
} }
// NOTE: In these uniforms, a row refers to a row of blocks, not texels.
static const char decoding_shader_header[] = R"(
#ifdef VULKAN
layout(std140, push_constant) uniform PushConstants {
uvec2 dst_size;
uvec2 src_size;
uint src_offset;
uint src_row_stride;
uint palette_offset;
} push_constants;
#define u_dst_size (push_constants.dst_size)
#define u_src_size (push_constants.src_size)
#define u_src_offset (push_constants.src_offset)
#define u_src_row_stride (push_constants.src_row_stride)
#define u_palette_offset (push_constants.palette_offset)
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
#else
uniform uvec2 u_dst_size;
uniform uvec2 u_src_size;
uniform uint u_src_offset;
uniform uint u_src_row_stride;
uniform uint u_palette_offset;
SAMPLER_BINDING(9) uniform usamplerBuffer s_input_buffer;
SAMPLER_BINDING(10) uniform usamplerBuffer s_palette_buffer;
layout(rgba8, binding = 0) uniform writeonly image2DArray output_image;
#endif
uint Swap16(uint v)
{
// Convert BE to LE.
return ((v >> 8) | (v << 8)) & 0xFFFFu;
}
uint Convert3To8(uint v)
{
// Swizzle bits: 00000123 -> 12312312
return (v << 5) | (v << 2) | (v >> 1);
}
uint Convert4To8(uint v)
{
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | v;
}
uint Convert5To8(uint v)
{
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
}
uint Convert6To8(uint v)
{
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
}
uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords)
{
uvec2 block = coords / block_size;
uvec2 offset = coords % block_size;
uint buffer_pos = u_src_offset;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * (block_size.x * block_size.y);
buffer_pos += offset.y * block_size.x;
buffer_pos += offset.x;
return buffer_pos;
}
uvec4 GetPaletteColor(uint index)
{
// Fetch and swap BE to LE.
uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x);
uvec4 color;
#if defined(PALETTE_FORMAT_IA8)
uint a = bitfieldExtract(val, 8, 8);
uint i = bitfieldExtract(val, 0, 8);
color = uvec4(i, i, i, a);
#elif defined(PALETTE_FORMAT_RGB565)
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
color.y = Convert6To8(bitfieldExtract(val, 5, 6));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
#elif defined(PALETTE_FORMAT_RGB5A3)
if ((val & 0x8000u) != 0u)
{
color.x = Convert5To8(bitfieldExtract(val, 10, 5));
color.y = Convert5To8(bitfieldExtract(val, 5, 5));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
}
else
{
color.a = Convert3To8(bitfieldExtract(val, 12, 3));
color.r = Convert4To8(bitfieldExtract(val, 8, 4));
color.g = Convert4To8(bitfieldExtract(val, 4, 4));
color.b = Convert4To8(bitfieldExtract(val, 0, 4));
}
#else
// Not used.
color = uvec4(0, 0, 0, 0);
#endif
return color;
}
vec4 GetPaletteColorNormalized(uint index)
{
uvec4 color = GetPaletteColor(index);
return vec4(color) / 255.0;
}
)";
static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
{GX_TF_I4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x8 blocks, 4 bits per pixel
// We need to do the tiling manually here because the texel size is smaller than
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint i;
if ((coords.x & 1u) == 0u)
i = Convert4To8((val >> 4));
else
i = Convert4To8((val & 0x0Fu));
uvec4 color = uvec4(i, i, i, i);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_IA4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint i = Convert4To8((val & 0x0Fu));
uint a = Convert4To8((val >> 4));
uvec4 color = uvec4(i, i, i, a);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_I8,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint i = texelFetch(s_input_buffer, int(buffer_pos)).x;
uvec4 color = uvec4(i, i, i, i);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_IA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint a = (val & 0xFFu);
uint i = (val >> 8);
uvec4 color = uvec4(i, i, i, a);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_RGB565,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uvec4 color;
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
color.y = Convert6To8(bitfieldExtract(val, 5, 6));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_RGB5A3,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
uvec4 color;
if ((val & 0x8000u) != 0u)
{
color.x = Convert5To8(bitfieldExtract(val, 10, 5));
color.y = Convert5To8(bitfieldExtract(val, 5, 5));
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
color.a = 255u;
}
else
{
color.a = Convert3To8(bitfieldExtract(val, 12, 3));
color.r = Convert4To8(bitfieldExtract(val, 8, 4));
color.g = Convert4To8(bitfieldExtract(val, 4, 4));
color.b = Convert4To8(bitfieldExtract(val, 0, 4));
}
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_RGBA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks
// We can't use the normal calculation function, as these are packed as the AR channels
// for the entire block, then the GB channels afterwards.
uint2 block = coords.xy / 4u;
uint2 offset = coords.xy % 4u;
uint buffer_pos = u_src_offset;
// Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes.
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x;
// The two GB channels follow after the block's AR channels.
uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x;
uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x;
uvec4 color;
color.a = (val1 & 0xFFu);
color.r = (val1 >> 8);
color.g = (val2 & 0xFFu);
color.b = (val2 >> 8);
vec4 norm_color = vec4(color) / 255.0;
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_CMPR,
{BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true,
R"(
// In the compute version of this decoder, we flatten the blocks to a one-dimension array.
// Each group is subdivided into 16, and the first thread in each group fetches the DXT data.
// All threads then calculate the possible colors for the block and write to the output image.
#define GROUP_SIZE 64u
#define BLOCK_SIZE_X 4u
#define BLOCK_SIZE_Y 4u
#define BLOCK_SIZE (BLOCK_SIZE_X * BLOCK_SIZE_Y)
#define BLOCKS_PER_GROUP (GROUP_SIZE / BLOCK_SIZE)
layout(local_size_x = GROUP_SIZE, local_size_y = 1) in;
shared uvec2 shared_temp[BLOCKS_PER_GROUP];
uint DXTBlend(uint v1, uint v2)
{
// 3/8 blend, which is close to 1/3
return ((v1 * 3u + v2 * 5u) >> 3);
}
void main()
{
uint local_thread_id = gl_LocalInvocationID.x;
uint block_in_group = local_thread_id / BLOCK_SIZE;
uint thread_in_block = local_thread_id % BLOCK_SIZE;
uint block_index = gl_WorkGroupID.x * BLOCKS_PER_GROUP + block_in_group;
// Annoyingly, we can't precalculate this as a uniform because the DXT block size differs
// from the block size of the overall texture (4 vs 8). We can however use a multiply and
// subtraction to avoid the modulo for calculating the block's X coordinate.
uint blocks_wide = u_src_size.x / BLOCK_SIZE_X;
uvec2 block_coords;
block_coords.y = block_index / blocks_wide;
block_coords.x = block_index - (block_coords.y * blocks_wide);
// Only the first thread for each block reads from the texel buffer.
if (thread_in_block == 0u)
{
// Calculate tiled block coordinates.
uvec2 tile_block_coords = block_coords / 2u;
uvec2 subtile_block_coords = block_coords % 2u;
uint buffer_pos = u_src_offset;
buffer_pos += tile_block_coords.y * u_src_row_stride;
buffer_pos += tile_block_coords.x * 4u;
buffer_pos += subtile_block_coords.y * 2u;
buffer_pos += subtile_block_coords.x;
// Read the entire DXT block to shared memory.
uvec2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
shared_temp[block_in_group] = raw_data;
}
// Ensure store is completed before the remaining threads in the block continue.
memoryBarrierShared();
barrier();
// Unpack colors and swap BE to LE.
uvec2 raw_data = shared_temp[block_in_group];
uint swapped = ((raw_data.x & 0xFF00FF00u) >> 8) | ((raw_data.x & 0x00FF00FFu) << 8);
uint c1 = swapped & 0xFFFFu;
uint c2 = swapped >> 16;
// Expand 5/6 bit channels to 8-bits per channel.
uint blue1 = Convert5To8(bitfieldExtract(c1, 0, 5));
uint blue2 = Convert5To8(bitfieldExtract(c2, 0, 5));
uint green1 = Convert6To8(bitfieldExtract(c1, 5, 6));
uint green2 = Convert6To8(bitfieldExtract(c2, 5, 6));
uint red1 = Convert5To8(bitfieldExtract(c1, 11, 5));
uint red2 = Convert5To8(bitfieldExtract(c2, 11, 5));
// Determine the four colors the block can use.
// It's quicker to just precalculate all four colors rather than branching on the index.
// NOTE: These must be masked with 0xFF. This is done at the normalization stage below.
uvec4 color0, color1, color2, color3;
color0 = uvec4(red1, green1, blue1, 255u);
color1 = uvec4(red2, green2, blue2, 255u);
if (c1 > c2)
{
color2 = uvec4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u);
color3 = uvec4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u);
}
else
{
color2 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u);
color3 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u);
}
// Calculate the texel coordinates that we will write to.
// The divides/modulo here should be turned into a shift/binary AND.
uint local_y = thread_in_block / BLOCK_SIZE_X;
uint local_x = thread_in_block % BLOCK_SIZE_X;
uint global_x = block_coords.x * BLOCK_SIZE_X + local_x;
uint global_y = block_coords.y * BLOCK_SIZE_Y + local_y;
// Use the coordinates within the block to shift the 32-bit value containing
// all 16 indices to a single 2-bit index.
uint index = bitfieldExtract(raw_data.y, int((local_y * 8u) + (6u - local_x * 2u)), 2);
// Select the un-normalized color from the precalculated color array.
// Using a switch statement here removes the need for dynamic indexing of an array.
uvec4 color;
switch (index)
{
case 0u: color = color0; break;
case 1u: color = color1; break;
case 2u: color = color2; break;
case 3u: color = color3; break;
default: color = color0; break;
}
// Normalize and write to the output image.
vec4 norm_color = vec4(color & 0xFFu) / 255.0;
imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color);
}
)"}},
{GX_TF_C4,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C4)), 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x8 blocks, 4 bits per pixel
// We need to do the tiling manually here because the texel size is smaller than
// the size of the buffer elements.
uint2 block = coords.xy / 8u;
uint2 offset = coords.xy % 8u;
uint buffer_pos = u_src_offset;
buffer_pos += block.y * u_src_row_stride;
buffer_pos += block.x * 32u;
buffer_pos += offset.y * 4u;
buffer_pos += offset.x / 2u;
// Select high nibble for odd texels, low for even.
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu);
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_C8,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C8)), 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 8x4 blocks, 8 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x;
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}},
{GX_TF_C14X2,
{BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C14X2)), 8, 8, false,
R"(
layout(local_size_x = 8, local_size_y = 8) in;
void main()
{
uvec2 coords = gl_GlobalInvocationID.xy;
// Tiled in 4x4 blocks, 16 bits per pixel
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu;
vec4 norm_color = GetPaletteColorNormalized(index);
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
}
)"}}};
static const std::array<u32, BUFFER_FORMAT_COUNT> s_buffer_bytes_per_texel = {{
1, // BUFFER_FORMAT_R8_UINT
2, // BUFFER_FORMAT_R16_UINT
8, // BUFFER_FORMAT_R32G32_UINT
}};
const DecodingShaderInfo* GetDecodingShaderInfo(u32 format)
{
auto iter = s_decoding_shader_info.find(static_cast<TextureFormat>(format));
return iter != s_decoding_shader_info.end() ? &iter->second : nullptr;
}
u32 GetBytesPerBufferElement(BufferFormat buffer_format)
{
return s_buffer_bytes_per_texel[buffer_format];
}
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height)
{
// Flatten to a single dimension?
if (info->group_flatten)
return {(width * height + (info->group_size_x - 1)) / info->group_size_x, 1};
return {(width + (info->group_size_x - 1)) / info->group_size_x,
(height + (info->group_size_y - 1)) / info->group_size_y};
}
std::string GenerateDecodingShader(u32 format, u32 palette_format, APIType api_type)
{
const DecodingShaderInfo* info = GetDecodingShaderInfo(format);
if (!info)
return "";
std::stringstream ss;
switch (palette_format)
{
case GX_TL_IA8:
ss << "#define PALETTE_FORMAT_IA8 1\n";
break;
case GX_TL_RGB565:
ss << "#define PALETTE_FORMAT_RGB565 1\n";
break;
case GX_TL_RGB5A3:
ss << "#define PALETTE_FORMAT_RGB5A3 1\n";
break;
}
ss << decoding_shader_header;
ss << info->shader_body;
return ss.str();
}
} // namespace } // namespace

View File

@ -4,6 +4,9 @@
#pragma once #pragma once
#include <string>
#include <utility>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
enum class APIType; enum class APIType;
@ -13,4 +16,40 @@ namespace TextureConversionShader
u16 GetEncodedSampleCount(u32 format); u16 GetEncodedSampleCount(u32 format);
const char* GenerateEncodingShader(u32 format, APIType ApiType); const char* GenerateEncodingShader(u32 format, APIType ApiType);
}
// View format of the input data to the texture decoding shader.
enum BufferFormat
{
BUFFER_FORMAT_R8_UINT,
BUFFER_FORMAT_R16_UINT,
BUFFER_FORMAT_R32G32_UINT,
BUFFER_FORMAT_COUNT
};
// Information required to compile and dispatch a texture decoding shader.
struct DecodingShaderInfo
{
BufferFormat buffer_format;
u32 palette_size;
u32 group_size_x;
u32 group_size_y;
bool group_flatten;
const char* shader_body;
};
// Obtain shader information for the specified texture format.
// If this format does not have a shader written for it, returns nullptr.
const DecodingShaderInfo* GetDecodingShaderInfo(u32 format);
// Determine how many bytes there are in each element of the texel buffer.
// Needed for alignment and stride calculations.
u32 GetBytesPerBufferElement(BufferFormat buffer_format);
// Determine how many thread groups should be dispatched for an image of the specified width/height.
// First is the number of X groups, second is the number of Y groups, Z is always one.
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height);
// Returns the GLSL string containing the texture decoding shader for the specified format.
std::string GenerateDecodingShader(u32 format, u32 palette_format, APIType api_type);
} // namespace TextureConversionShader

View File

@ -81,6 +81,7 @@ void VideoConfig::Load(const std::string& ini_file)
settings->Get("DumpPath", &sDumpPath, ""); settings->Get("DumpPath", &sDumpPath, "");
settings->Get("BitrateKbps", &iBitrateKbps, 2500); settings->Get("BitrateKbps", &iBitrateKbps, 2500);
settings->Get("InternalResolutionFrameDumps", &bInternalResolutionFrameDumps, false); settings->Get("InternalResolutionFrameDumps", &bInternalResolutionFrameDumps, false);
settings->Get("EnableGPUTextureDecoding", &bEnableGPUTextureDecoding, false);
settings->Get("EnablePixelLighting", &bEnablePixelLighting, false); settings->Get("EnablePixelLighting", &bEnablePixelLighting, false);
settings->Get("FastDepthCalc", &bFastDepthCalc, true); settings->Get("FastDepthCalc", &bFastDepthCalc, true);
settings->Get("MSAA", &iMultisamples, 1); settings->Get("MSAA", &iMultisamples, 1);
@ -305,6 +306,7 @@ void VideoConfig::Save(const std::string& ini_file)
settings->Set("DumpPath", sDumpPath); settings->Set("DumpPath", sDumpPath);
settings->Set("BitrateKbps", iBitrateKbps); settings->Set("BitrateKbps", iBitrateKbps);
settings->Set("InternalResolutionFrameDumps", bInternalResolutionFrameDumps); settings->Set("InternalResolutionFrameDumps", bInternalResolutionFrameDumps);
settings->Set("EnableGPUTextureDecoding", bEnableGPUTextureDecoding);
settings->Set("EnablePixelLighting", bEnablePixelLighting); settings->Set("EnablePixelLighting", bEnablePixelLighting);
settings->Set("FastDepthCalc", bFastDepthCalc); settings->Set("FastDepthCalc", bFastDepthCalc);
settings->Set("MSAA", iMultisamples); settings->Set("MSAA", iMultisamples);

View File

@ -108,6 +108,7 @@ struct VideoConfig final
bool bInternalResolutionFrameDumps; bool bInternalResolutionFrameDumps;
bool bFreeLook; bool bFreeLook;
bool bBorderlessFullscreen; bool bBorderlessFullscreen;
bool bEnableGPUTextureDecoding;
int iBitrateKbps; int iBitrateKbps;
// Hacks // Hacks
@ -181,6 +182,7 @@ struct VideoConfig final
bool bSupportsPrimitiveRestart; bool bSupportsPrimitiveRestart;
bool bSupportsOversizedViewports; bool bSupportsOversizedViewports;
bool bSupportsGeometryShaders; bool bSupportsGeometryShaders;
bool bSupportsComputeShaders;
bool bSupports3DVision; bool bSupports3DVision;
bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon
bool bSupportsBindingLayout; // Needed by ShaderGen, so must stay in VideoCommon bool bSupportsBindingLayout; // Needed by ShaderGen, so must stay in VideoCommon
@ -195,6 +197,7 @@ struct VideoConfig final
bool bSupportsReversedDepthRange; bool bSupportsReversedDepthRange;
bool bSupportsMultithreading; bool bSupportsMultithreading;
bool bSupportsInternalResolutionFrameDumps; bool bSupportsInternalResolutionFrameDumps;
bool bSupportsGPUTextureDecoding;
} backend_info; } backend_info;
// Utility // Utility
@ -210,6 +213,10 @@ struct VideoConfig final
return false; return false;
return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics; return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics;
} }
bool UseGPUTextureDecoding() const
{
return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;
}
}; };
extern VideoConfig g_Config; extern VideoConfig g_Config;