Merge pull request #4467 from stenzek/gpu-texture-decoding
VideoBackends: GPU Texture Decoding
This commit is contained in:
commit
3bd184a255
|
@ -286,6 +286,7 @@ public final class SettingsFragmentPresenter
|
|||
BooleanSetting ignoreFormat = new BooleanSetting(SettingsFile.KEY_IGNORE_FORMAT, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, ignoreFormatValue);
|
||||
Setting efbToTexture = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_EFB_TEXTURE);
|
||||
Setting texCacheAccuracy = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_TEXCACHE_ACCURACY);
|
||||
Setting gpuTextureDecoding = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_GPU_TEXTURE_DECODING);
|
||||
IntSetting xfb = new IntSetting(SettingsFile.KEY_XFB, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, xfbValue);
|
||||
Setting fastDepth = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_HACKS).getSetting(SettingsFile.KEY_FAST_DEPTH);
|
||||
Setting aspectRatio = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_ASPECT_RATIO);
|
||||
|
@ -297,6 +298,7 @@ public final class SettingsFragmentPresenter
|
|||
|
||||
sl.add(new HeaderSetting(null, null, R.string.texture_cache, 0));
|
||||
sl.add(new SingleChoiceSetting(SettingsFile.KEY_TEXCACHE_ACCURACY, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.texture_cache_accuracy, R.string.texture_cache_accuracy_descrip, R.array.textureCacheAccuracyEntries, R.array.textureCacheAccuracyValues, 128, texCacheAccuracy));
|
||||
sl.add(new CheckBoxSetting(SettingsFile.KEY_GPU_TEXTURE_DECODING, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.gpu_texture_decoding, R.string.gpu_texture_decoding_descrip, false, gpuTextureDecoding));
|
||||
|
||||
sl.add(new HeaderSetting(null, null, R.string.external_frame_buffer, 0));
|
||||
sl.add(new SingleChoiceSetting(SettingsFile.KEY_XFB_METHOD, SettingsFile.SECTION_GFX_HACKS, SettingsFile.SETTINGS_GFX, R.string.external_frame_buffer, R.string.external_frame_buffer_descrip, R.array.externalFrameBufferEntries, R.array.externalFrameBufferValues, 0, xfb));
|
||||
|
|
|
@ -73,6 +73,7 @@ public final class SettingsFile
|
|||
public static final String KEY_IGNORE_FORMAT = "EFBEmulateFormatChanges";
|
||||
public static final String KEY_EFB_TEXTURE = "EFBToTextureEnable";
|
||||
public static final String KEY_TEXCACHE_ACCURACY = "SafeTextureCacheColorSamples";
|
||||
public static final String KEY_GPU_TEXTURE_DECODING = "EnableGPUTextureDecoding";
|
||||
public static final String KEY_XFB = "UseXFB";
|
||||
public static final String KEY_XFB_REAL = "UseRealXFB";
|
||||
public static final String KEY_FAST_DEPTH = "FastDepthCalc";
|
||||
|
|
|
@ -168,6 +168,8 @@
|
|||
<string name="texture_cache">Texture Cache</string>
|
||||
<string name="texture_cache_accuracy">Texture Cache Accuracy</string>
|
||||
<string name="texture_cache_accuracy_descrip">The safer the selection, the less likely the emulator will be missing any texture updates from RAM.</string>
|
||||
<string name="gpu_texture_decoding">GPU Texture Decoding</string>
|
||||
<string name="gpu_texture_decoding_descrip">Decodes textures on the GPU using compute shaders where supported. May improve performance in some scenarios.</string>
|
||||
<string name="external_frame_buffer">External Frame Buffer</string>
|
||||
<string name="external_frame_buffer_descrip">Determines how the XFB will be emulated.</string>
|
||||
<string name="disable_destination_alpha">Disable Destination Alpha</string>
|
||||
|
|
|
@ -72,6 +72,7 @@
|
|||
<ClInclude Include="GL\GLExtensions\ARB_blend_func_extended.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_buffer_storage.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_clip_control.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_compute_shader.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_copy_image.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_debug_output.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_draw_elements_base_vertex.h" />
|
||||
|
@ -83,9 +84,11 @@
|
|||
<ClInclude Include="GL\GLExtensions\ARB_occlusion_query2.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_sampler_objects.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_sample_shading.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_shader_image_load_store.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_shader_storage_buffer_object.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_sync.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_texture_multisample.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_texture_storage.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_texture_storage_multisample.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_uniform_buffer_object.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_vertex_array_object.h" />
|
||||
|
|
|
@ -238,6 +238,16 @@
|
|||
<ClInclude Include="NonCopyable.h" />
|
||||
<ClInclude Include="Analytics.h" />
|
||||
<ClInclude Include="Semaphore.h" />
|
||||
<ClInclude Include="MD5.h" />
|
||||
<ClInclude Include="GL\GLExtensions\ARB_texture_storage.h">
|
||||
<Filter>GL\GLExtensions</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GL\GLExtensions\ARB_shader_image_load_store.h">
|
||||
<Filter>GL\GLExtensions</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GL\GLExtensions\ARB_compute_shader.h">
|
||||
<Filter>GL\GLExtensions</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="CDUtils.cpp" />
|
||||
|
@ -303,6 +313,7 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="ucrtFreadWorkaround.cpp" />
|
||||
<ClCompile Include="Analytics.cpp" />
|
||||
<ClCompile Include="MD5.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Text Include="CMakeLists.txt" />
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
** Copyright (c) 2013-2015 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
#include "Common/GL/GLExtensions/gl_common.h"
|
||||
|
||||
#define GL_COMPUTE_SHADER 0x91B9
|
||||
#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
|
||||
#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
|
||||
#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
|
||||
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
|
||||
#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
|
||||
#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
|
||||
#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
|
||||
#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
|
||||
#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
|
||||
#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
|
||||
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
|
||||
#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
|
||||
#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
|
||||
#define GL_COMPUTE_SHADER_BIT 0x00000020
|
||||
|
||||
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y,
|
||||
GLuint num_groups_z);
|
||||
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
|
||||
|
||||
extern PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
|
||||
extern PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
|
||||
|
||||
#define glDispatchCompute dolDispatchCompute
|
||||
#define glDispatchComputeIndirect dolDispatchComputeIndirect
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
** Copyright (c) 2013-2015 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
#include "Common/GL/GLExtensions/gl_common.h"
|
||||
|
||||
#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
|
||||
#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
|
||||
#define GL_UNIFORM_BARRIER_BIT 0x00000004
|
||||
#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
|
||||
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
|
||||
#define GL_COMMAND_BARRIER_BIT 0x00000040
|
||||
#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
|
||||
#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
|
||||
#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
|
||||
#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
|
||||
#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
|
||||
#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
|
||||
#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
|
||||
#define GL_MAX_IMAGE_UNITS 0x8F38
|
||||
#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
|
||||
#define GL_IMAGE_BINDING_NAME 0x8F3A
|
||||
#define GL_IMAGE_BINDING_LEVEL 0x8F3B
|
||||
#define GL_IMAGE_BINDING_LAYERED 0x8F3C
|
||||
#define GL_IMAGE_BINDING_LAYER 0x8F3D
|
||||
#define GL_IMAGE_BINDING_ACCESS 0x8F3E
|
||||
#define GL_IMAGE_1D 0x904C
|
||||
#define GL_IMAGE_2D 0x904D
|
||||
#define GL_IMAGE_3D 0x904E
|
||||
#define GL_IMAGE_2D_RECT 0x904F
|
||||
#define GL_IMAGE_CUBE 0x9050
|
||||
#define GL_IMAGE_BUFFER 0x9051
|
||||
#define GL_IMAGE_1D_ARRAY 0x9052
|
||||
#define GL_IMAGE_2D_ARRAY 0x9053
|
||||
#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
|
||||
#define GL_IMAGE_2D_MULTISAMPLE 0x9055
|
||||
#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
|
||||
#define GL_INT_IMAGE_1D 0x9057
|
||||
#define GL_INT_IMAGE_2D 0x9058
|
||||
#define GL_INT_IMAGE_3D 0x9059
|
||||
#define GL_INT_IMAGE_2D_RECT 0x905A
|
||||
#define GL_INT_IMAGE_CUBE 0x905B
|
||||
#define GL_INT_IMAGE_BUFFER 0x905C
|
||||
#define GL_INT_IMAGE_1D_ARRAY 0x905D
|
||||
#define GL_INT_IMAGE_2D_ARRAY 0x905E
|
||||
#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
|
||||
#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
|
||||
#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
|
||||
#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
|
||||
#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
|
||||
#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
|
||||
#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
|
||||
#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
|
||||
#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
|
||||
#define GL_MAX_IMAGE_SAMPLES 0x906D
|
||||
#define GL_IMAGE_BINDING_FORMAT 0x906E
|
||||
#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
|
||||
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
|
||||
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
|
||||
#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
|
||||
#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
|
||||
#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
|
||||
#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
|
||||
#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
|
||||
#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
|
||||
|
||||
typedef void(APIENTRYP PFNDOLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level,
|
||||
GLboolean layered, GLint layer, GLenum access,
|
||||
GLenum format);
|
||||
typedef void(APIENTRYP PFNDOLMEMORYBARRIERPROC)(GLbitfield barriers);
|
||||
|
||||
extern PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
|
||||
extern PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
|
||||
|
||||
#define glBindImageTexture dolBindImageTexture
|
||||
#define glMemoryBarrier dolMemoryBarrier
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
** Copyright (c) 2013-2015 The Khronos Group Inc.
|
||||
**
|
||||
** Permission is hereby granted, free of charge, to any person obtaining a
|
||||
** copy of this software and/or associated documentation files (the
|
||||
** "Materials"), to deal in the Materials without restriction, including
|
||||
** without limitation the rights to use, copy, modify, merge, publish,
|
||||
** distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
** permit persons to whom the Materials are furnished to do so, subject to
|
||||
** the following conditions:
|
||||
**
|
||||
** The above copyright notice and this permission notice shall be included
|
||||
** in all copies or substantial portions of the Materials.
|
||||
**
|
||||
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
*/
|
||||
|
||||
#include "Common/GL/GLExtensions/gl_common.h"
|
||||
|
||||
#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
|
||||
|
||||
typedef void(APIENTRYP PFNDOLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
|
||||
GLsizei width);
|
||||
typedef void(APIENTRYP PFNDOLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
|
||||
GLsizei width, GLsizei height);
|
||||
typedef void(APIENTRYP PFNDOLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
|
||||
GLsizei width, GLsizei height, GLsizei depth);
|
||||
|
||||
extern PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
|
||||
extern PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
|
||||
extern PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
|
||||
|
||||
#define glTexStorage1D dolTexStorage1D
|
||||
#define glTexStorage2D dolTexStorage2D
|
||||
#define glTexStorage3D dolTexStorage3D
|
|
@ -653,19 +653,12 @@ PFNDOLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC
|
|||
dolDrawElementsInstancedBaseVertexBaseInstance;
|
||||
PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ;
|
||||
PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv;
|
||||
PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
|
||||
PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
|
||||
PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
|
||||
PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
|
||||
PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
|
||||
PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced;
|
||||
PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced;
|
||||
|
||||
// gl_4_3
|
||||
PFNDOLCLEARBUFFERDATAPROC dolClearBufferData;
|
||||
PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData;
|
||||
PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
|
||||
PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
|
||||
PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri;
|
||||
PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv;
|
||||
PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v;
|
||||
|
@ -905,6 +898,11 @@ PFNDOLTEXIMAGE3DMULTISAMPLEPROC dolTexImage3DMultisample;
|
|||
PFNDOLGETMULTISAMPLEFVPROC dolGetMultisamplefv;
|
||||
PFNDOLSAMPLEMASKIPROC dolSampleMaski;
|
||||
|
||||
// ARB_texture_storage
|
||||
PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
|
||||
PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
|
||||
PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
|
||||
|
||||
// ARB_texture_storage_multisample
|
||||
PFNDOLTEXSTORAGE2DMULTISAMPLEPROC dolTexStorage2DMultisample;
|
||||
PFNDOLTEXSTORAGE3DMULTISAMPLEPROC dolTexStorage3DMultisample;
|
||||
|
@ -989,6 +987,14 @@ PFNDOLDEPTHRANGEDNVPROC dolDepthRangedNV;
|
|||
PFNDOLCLEARDEPTHDNVPROC dolClearDepthdNV;
|
||||
PFNDOLDEPTHBOUNDSDNVPROC dolDepthBoundsdNV;
|
||||
|
||||
// ARB_shader_image_load_store
|
||||
PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
|
||||
PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
|
||||
|
||||
// ARB_compute_shader
|
||||
PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
|
||||
PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
|
||||
|
||||
// Creates a GLFunc object that requires a feature
|
||||
#define GLFUNC_REQUIRES(x, y) \
|
||||
{ \
|
||||
|
@ -1681,6 +1687,11 @@ const GLFunc gl_function_array[] = {
|
|||
GLFUNC_REQUIRES(glGetMultisamplefv, "GL_ARB_texture_multisample"),
|
||||
GLFUNC_REQUIRES(glSampleMaski, "GL_ARB_texture_multisample"),
|
||||
|
||||
// ARB_texture_storage
|
||||
GLFUNC_REQUIRES(glTexStorage1D, "GL_ARB_texture_storage !VERSION_4_2"),
|
||||
GLFUNC_REQUIRES(glTexStorage2D, "GL_ARB_texture_storage !VERSION_4_2 |VERSION_GLES_3"),
|
||||
GLFUNC_REQUIRES(glTexStorage3D, "GL_ARB_texture_storage !VERSION_4_2 |VERSION_GLES_3"),
|
||||
|
||||
// ARB_texture_storage_multisample
|
||||
GLFUNC_REQUIRES(glTexStorage2DMultisample,
|
||||
"GL_ARB_texture_storage_multisample !VERSION_4_3 |VERSION_GLES_3_1"),
|
||||
|
@ -1848,6 +1859,17 @@ const GLFunc gl_function_array[] = {
|
|||
GLFUNC_REQUIRES(glDepthRangedNV, "GL_NV_depth_buffer_float"),
|
||||
GLFUNC_REQUIRES(glClearDepthdNV, "GL_NV_depth_buffer_float"),
|
||||
GLFUNC_REQUIRES(glDepthBoundsdNV, "GL_NV_depth_buffer_float"),
|
||||
|
||||
// ARB_shader_image_load_store
|
||||
GLFUNC_REQUIRES(glBindImageTexture,
|
||||
"GL_ARB_shader_image_load_store !VERSION_4_2 |VERSION_GLES_3_1"),
|
||||
GLFUNC_REQUIRES(glMemoryBarrier,
|
||||
"GL_ARB_shader_image_load_store !VERSION_4_2 |VERSION_GLES_3_1"),
|
||||
|
||||
// ARB_compute_shader
|
||||
GLFUNC_REQUIRES(glDispatchCompute, "GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"),
|
||||
GLFUNC_REQUIRES(glDispatchComputeIndirect,
|
||||
"GL_ARB_compute_shader !VERSION_4_3 |VERSION_GLES_3_1"),
|
||||
};
|
||||
|
||||
namespace GLExtensions
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "Common/GL/GLExtensions/ARB_blend_func_extended.h"
|
||||
#include "Common/GL/GLExtensions/ARB_buffer_storage.h"
|
||||
#include "Common/GL/GLExtensions/ARB_clip_control.h"
|
||||
#include "Common/GL/GLExtensions/ARB_compute_shader.h"
|
||||
#include "Common/GL/GLExtensions/ARB_copy_image.h"
|
||||
#include "Common/GL/GLExtensions/ARB_debug_output.h"
|
||||
#include "Common/GL/GLExtensions/ARB_draw_elements_base_vertex.h"
|
||||
|
@ -21,9 +22,11 @@
|
|||
#include "Common/GL/GLExtensions/ARB_occlusion_query2.h"
|
||||
#include "Common/GL/GLExtensions/ARB_sample_shading.h"
|
||||
#include "Common/GL/GLExtensions/ARB_sampler_objects.h"
|
||||
#include "Common/GL/GLExtensions/ARB_shader_image_load_store.h"
|
||||
#include "Common/GL/GLExtensions/ARB_shader_storage_buffer_object.h"
|
||||
#include "Common/GL/GLExtensions/ARB_sync.h"
|
||||
#include "Common/GL/GLExtensions/ARB_texture_multisample.h"
|
||||
#include "Common/GL/GLExtensions/ARB_texture_storage.h"
|
||||
#include "Common/GL/GLExtensions/ARB_texture_storage_multisample.h"
|
||||
#include "Common/GL/GLExtensions/ARB_uniform_buffer_object.h"
|
||||
#include "Common/GL/GLExtensions/ARB_vertex_array_object.h"
|
||||
|
|
|
@ -66,75 +66,10 @@
|
|||
#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9
|
||||
#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA
|
||||
#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB
|
||||
#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
|
||||
#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
|
||||
#define GL_UNIFORM_BARRIER_BIT 0x00000004
|
||||
#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
|
||||
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
|
||||
#define GL_COMMAND_BARRIER_BIT 0x00000040
|
||||
#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
|
||||
#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
|
||||
#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
|
||||
#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
|
||||
#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
|
||||
#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
|
||||
#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
|
||||
#define GL_MAX_IMAGE_UNITS 0x8F38
|
||||
#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
|
||||
#define GL_IMAGE_BINDING_NAME 0x8F3A
|
||||
#define GL_IMAGE_BINDING_LEVEL 0x8F3B
|
||||
#define GL_IMAGE_BINDING_LAYERED 0x8F3C
|
||||
#define GL_IMAGE_BINDING_LAYER 0x8F3D
|
||||
#define GL_IMAGE_BINDING_ACCESS 0x8F3E
|
||||
#define GL_IMAGE_1D 0x904C
|
||||
#define GL_IMAGE_2D 0x904D
|
||||
#define GL_IMAGE_3D 0x904E
|
||||
#define GL_IMAGE_2D_RECT 0x904F
|
||||
#define GL_IMAGE_CUBE 0x9050
|
||||
#define GL_IMAGE_BUFFER 0x9051
|
||||
#define GL_IMAGE_1D_ARRAY 0x9052
|
||||
#define GL_IMAGE_2D_ARRAY 0x9053
|
||||
#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
|
||||
#define GL_IMAGE_2D_MULTISAMPLE 0x9055
|
||||
#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
|
||||
#define GL_INT_IMAGE_1D 0x9057
|
||||
#define GL_INT_IMAGE_2D 0x9058
|
||||
#define GL_INT_IMAGE_3D 0x9059
|
||||
#define GL_INT_IMAGE_2D_RECT 0x905A
|
||||
#define GL_INT_IMAGE_CUBE 0x905B
|
||||
#define GL_INT_IMAGE_BUFFER 0x905C
|
||||
#define GL_INT_IMAGE_1D_ARRAY 0x905D
|
||||
#define GL_INT_IMAGE_2D_ARRAY 0x905E
|
||||
#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
|
||||
#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
|
||||
#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
|
||||
#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
|
||||
#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
|
||||
#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
|
||||
#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
|
||||
#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
|
||||
#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
|
||||
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
|
||||
#define GL_MAX_IMAGE_SAMPLES 0x906D
|
||||
#define GL_IMAGE_BINDING_FORMAT 0x906E
|
||||
#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
|
||||
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
|
||||
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
|
||||
#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
|
||||
#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
|
||||
#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
|
||||
#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
|
||||
#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
|
||||
#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
|
||||
#define GL_COMPRESSED_RGBA_BPTC_UNORM 0x8E8C
|
||||
#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM 0x8E8D
|
||||
#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT 0x8E8E
|
||||
#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT 0x8E8F
|
||||
#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
|
||||
|
||||
typedef void(APIENTRYP PFNDOLDRAWARRAYSINSTANCEDBASEINSTANCEPROC)(GLenum mode, GLint first,
|
||||
GLsizei count,
|
||||
|
@ -152,16 +87,6 @@ typedef void(APIENTRYP PFNDOLGETINTERNALFORMATIVPROC)(GLenum target, GLenum inte
|
|||
GLenum pname, GLsizei bufSize, GLint* params);
|
||||
typedef void(APIENTRYP PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC)(GLuint program, GLuint bufferIndex,
|
||||
GLenum pname, GLint* params);
|
||||
typedef void(APIENTRYP PFNDOLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level,
|
||||
GLboolean layered, GLint layer, GLenum access,
|
||||
GLenum format);
|
||||
typedef void(APIENTRYP PFNDOLMEMORYBARRIERPROC)(GLbitfield barriers);
|
||||
typedef void(APIENTRYP PFNDOLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
|
||||
GLsizei width);
|
||||
typedef void(APIENTRYP PFNDOLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
|
||||
GLsizei width, GLsizei height);
|
||||
typedef void(APIENTRYP PFNDOLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat,
|
||||
GLsizei width, GLsizei height, GLsizei depth);
|
||||
typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC)(GLenum mode, GLuint id,
|
||||
GLsizei instancecount);
|
||||
typedef void(APIENTRYP PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC)(GLenum mode, GLuint id,
|
||||
|
@ -174,11 +99,6 @@ extern PFNDOLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC
|
|||
dolDrawElementsInstancedBaseVertexBaseInstance;
|
||||
extern PFNDOLGETINTERNALFORMATIVPROC dolGetInternalformativ;
|
||||
extern PFNDOLGETACTIVEATOMICCOUNTERBUFFERIVPROC dolGetActiveAtomicCounterBufferiv;
|
||||
extern PFNDOLBINDIMAGETEXTUREPROC dolBindImageTexture;
|
||||
extern PFNDOLMEMORYBARRIERPROC dolMemoryBarrier;
|
||||
extern PFNDOLTEXSTORAGE1DPROC dolTexStorage1D;
|
||||
extern PFNDOLTEXSTORAGE2DPROC dolTexStorage2D;
|
||||
extern PFNDOLTEXSTORAGE3DPROC dolTexStorage3D;
|
||||
extern PFNDOLDRAWTRANSFORMFEEDBACKINSTANCEDPROC dolDrawTransformFeedbackInstanced;
|
||||
extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackStreamInstanced;
|
||||
|
||||
|
@ -187,10 +107,5 @@ extern PFNDOLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC dolDrawTransformFeedbackSt
|
|||
#define glDrawElementsInstancedBaseVertexBaseInstance dolDrawElementsInstancedBaseVertexBaseInstance
|
||||
#define glGetInternalformativ dolGetInternalformativ
|
||||
#define glGetActiveAtomicCounterBufferiv dolGetActiveAtomicCounterBufferiv
|
||||
#define glBindImageTexture dolBindImageTexture
|
||||
#define glMemoryBarrier dolMemoryBarrier
|
||||
#define glTexStorage1D dolTexStorage1D
|
||||
#define glTexStorage2D dolTexStorage2D
|
||||
#define glTexStorage3D dolTexStorage3D
|
||||
#define glDrawTransformFeedbackInstanced dolDrawTransformFeedbackInstanced
|
||||
#define glDrawTransformFeedbackStreamInstanced dolDrawTransformFeedbackStreamInstanced
|
||||
|
|
|
@ -38,24 +38,6 @@
|
|||
#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69
|
||||
#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A
|
||||
#define GL_MAX_ELEMENT_INDEX 0x8D6B
|
||||
#define GL_COMPUTE_SHADER 0x91B9
|
||||
#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
|
||||
#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
|
||||
#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
|
||||
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
|
||||
#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
|
||||
#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
|
||||
#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
|
||||
#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
|
||||
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
|
||||
#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
|
||||
#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
|
||||
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
|
||||
#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
|
||||
#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
|
||||
#define GL_COMPUTE_SHADER_BIT 0x00000020
|
||||
#define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242
|
||||
#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243
|
||||
#define GL_DEBUG_CALLBACK_FUNCTION 0x8244
|
||||
|
@ -287,9 +269,6 @@ typedef void(APIENTRYP PFNDOLCLEARBUFFERDATAPROC)(GLenum target, GLenum internal
|
|||
typedef void(APIENTRYP PFNDOLCLEARBUFFERSUBDATAPROC)(GLenum target, GLenum internalformat,
|
||||
GLintptr offset, GLsizeiptr size,
|
||||
GLenum format, GLenum type, const void* data);
|
||||
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y,
|
||||
GLuint num_groups_z);
|
||||
typedef void(APIENTRYP PFNDOLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
|
||||
typedef void(APIENTRYP PFNDOLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
|
||||
typedef void(APIENTRYP PFNDOLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname,
|
||||
GLint* params);
|
||||
|
@ -348,8 +327,6 @@ typedef void(APIENTRYP PFNDOLVERTEXBINDINGDIVISORPROC)(GLuint bindingindex, GLui
|
|||
|
||||
extern PFNDOLCLEARBUFFERDATAPROC dolClearBufferData;
|
||||
extern PFNDOLCLEARBUFFERSUBDATAPROC dolClearBufferSubData;
|
||||
extern PFNDOLDISPATCHCOMPUTEPROC dolDispatchCompute;
|
||||
extern PFNDOLDISPATCHCOMPUTEINDIRECTPROC dolDispatchComputeIndirect;
|
||||
extern PFNDOLFRAMEBUFFERPARAMETERIPROC dolFramebufferParameteri;
|
||||
extern PFNDOLGETFRAMEBUFFERPARAMETERIVPROC dolGetFramebufferParameteriv;
|
||||
extern PFNDOLGETINTERNALFORMATI64VPROC dolGetInternalformati64v;
|
||||
|
@ -378,8 +355,6 @@ extern PFNDOLVERTEXBINDINGDIVISORPROC dolVertexBindingDivisor;
|
|||
|
||||
#define glClearBufferData dolClearBufferData
|
||||
#define glClearBufferSubData dolClearBufferSubData
|
||||
#define glDispatchCompute dolDispatchCompute
|
||||
#define glDispatchComputeIndirect dolDispatchComputeIndirect
|
||||
#define glFramebufferParameteri dolFramebufferParameteri
|
||||
#define glGetFramebufferParameteriv dolGetFramebufferParameteriv
|
||||
#define glGetInternalformati64v dolGetInternalformati64v
|
||||
|
|
|
@ -284,6 +284,10 @@ static wxString true_color_desc =
|
|||
wxTRANSLATE("Forces the game to render the RGB color channels in 24-bit, thereby increasing "
|
||||
"quality by reducing color banding.\nIt has no impact on performance and causes "
|
||||
"few graphical issues.\n\n\nIf unsure, leave this checked.");
|
||||
static wxString gpu_texture_decoding_desc =
|
||||
wxTRANSLATE("Enables texture decoding using the GPU instead of the CPU. This may result in "
|
||||
"performance gains in some scenarios, or systems where the CPU is the bottleneck."
|
||||
"\n\nIf unsure, leave this unchecked.");
|
||||
|
||||
#if !defined(__APPLE__)
|
||||
// Search for available resolutions - TODO: Move to Common?
|
||||
|
@ -755,6 +759,15 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title)
|
|||
slide_szr->Add(new wxStaticText(page_hacks, wxID_ANY, _("Fast")), 0, wxALIGN_CENTER_VERTICAL);
|
||||
|
||||
szr_safetex->Add(slide_szr, 1, wxEXPAND | wxLEFT | wxRIGHT, space5);
|
||||
|
||||
if (vconfig.backend_info.bSupportsGPUTextureDecoding)
|
||||
{
|
||||
szr_safetex->Add(CreateCheckBox(page_hacks, _("GPU Texture Decoding"),
|
||||
wxGetTranslation(gpu_texture_decoding_desc),
|
||||
vconfig.bEnableGPUTextureDecoding),
|
||||
1, wxEXPAND | wxLEFT | wxRIGHT, space5);
|
||||
}
|
||||
|
||||
if (slider_pos == -1)
|
||||
{
|
||||
stc_slider->Disable();
|
||||
|
|
|
@ -67,6 +67,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsPrimitiveRestart = true;
|
||||
g_Config.backend_info.bSupportsOversizedViewports = false;
|
||||
g_Config.backend_info.bSupportsGeometryShaders = true;
|
||||
g_Config.backend_info.bSupportsComputeShaders = false;
|
||||
g_Config.backend_info.bSupports3DVision = true;
|
||||
g_Config.backend_info.bSupportsPostProcessing = false;
|
||||
g_Config.backend_info.bSupportsPaletteConversion = true;
|
||||
|
@ -75,6 +76,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsReversedDepthRange = false;
|
||||
g_Config.backend_info.bSupportsMultithreading = false;
|
||||
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
|
||||
|
||||
IDXGIFactory* factory;
|
||||
IDXGIAdapter* ad;
|
||||
|
|
|
@ -70,6 +70,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsPrimitiveRestart = true;
|
||||
g_Config.backend_info.bSupportsOversizedViewports = false;
|
||||
g_Config.backend_info.bSupportsGeometryShaders = true;
|
||||
g_Config.backend_info.bSupportsComputeShaders = false;
|
||||
g_Config.backend_info.bSupports3DVision = true;
|
||||
g_Config.backend_info.bSupportsPostProcessing = false;
|
||||
g_Config.backend_info.bSupportsPaletteConversion = true;
|
||||
|
@ -78,6 +79,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsReversedDepthRange = false;
|
||||
g_Config.backend_info.bSupportsMultithreading = false;
|
||||
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
|
||||
|
||||
IDXGIFactory* factory;
|
||||
IDXGIAdapter* ad;
|
||||
|
|
|
@ -30,6 +30,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsPrimitiveRestart = true;
|
||||
g_Config.backend_info.bSupportsOversizedViewports = true;
|
||||
g_Config.backend_info.bSupportsGeometryShaders = true;
|
||||
g_Config.backend_info.bSupportsComputeShaders = false;
|
||||
g_Config.backend_info.bSupports3DVision = false;
|
||||
g_Config.backend_info.bSupportsEarlyZ = true;
|
||||
g_Config.backend_info.bSupportsBindingLayout = true;
|
||||
|
@ -43,6 +44,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsReversedDepthRange = true;
|
||||
g_Config.backend_info.bSupportsMultithreading = false;
|
||||
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
|
||||
|
||||
// aamodes: We only support 1 sample, so no MSAA
|
||||
g_Config.backend_info.Adapters.clear();
|
||||
|
|
|
@ -65,7 +65,7 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo
|
|||
}
|
||||
else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
|
||||
{
|
||||
if (g_ogl_config.bSupports3DTextureStorage)
|
||||
if (g_ogl_config.bSupports3DTextureStorageMultisample)
|
||||
glTexStorage3DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth,
|
||||
m_targetHeight, m_EFBLayers, false);
|
||||
else
|
||||
|
@ -74,7 +74,7 @@ GLuint FramebufferManager::CreateTexture(GLenum texture_type, GLenum internal_fo
|
|||
}
|
||||
else if (texture_type == GL_TEXTURE_2D_MULTISAMPLE)
|
||||
{
|
||||
if (g_ogl_config.bSupports2DTextureStorage)
|
||||
if (g_ogl_config.bSupports2DTextureStorageMultisample)
|
||||
glTexStorage2DMultisample(texture_type, m_msaaSamples, internal_format, m_targetWidth,
|
||||
m_targetHeight, false);
|
||||
else
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
// Copyright 2016 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/GL/GLExtensions/GLExtensions.h"
|
||||
|
||||
#ifndef GL_TIME_ELAPSED
|
||||
#define GL_TIME_ELAPSED 0x88BF
|
||||
#endif
|
||||
|
||||
namespace OGL
|
||||
{
|
||||
/*
|
||||
* This class can be used to measure the time it takes for the GPU to perform a draw call
|
||||
* or compute dispatch. To use:
|
||||
*
|
||||
* - Create an instance of GPUTimer before issuing the draw call.
|
||||
* (this can be before or after any binding that needs to be done)
|
||||
*
|
||||
* - (optionally) call Begin(). This is not needed for a single draw call.
|
||||
*
|
||||
* - Issue the draw call or compute dispatch as normal.
|
||||
*
|
||||
* - (optionally) call End(). This is not necessary for a single draw call.
|
||||
*
|
||||
* - Call GetTime{Seconds,Milliseconds,Nanoseconds} to determine how long the operation
|
||||
* took to execute on the GPU.
|
||||
*
|
||||
* NOTE: When the timer is read back, this will force a GL flush, so the more often a timer is used,
|
||||
* the larger of a performance impact it will have. Only one timer can be active at any time, due to
|
||||
* using GL_TIME_ELAPSED. This is not enforced by the class, however.
|
||||
*
|
||||
*/
|
||||
class GPUTimer final
|
||||
{
|
||||
public:
|
||||
GPUTimer()
|
||||
{
|
||||
glGenQueries(1, &m_query_id);
|
||||
Begin();
|
||||
}
|
||||
|
||||
~GPUTimer()
|
||||
{
|
||||
End();
|
||||
glDeleteQueries(1, &m_query_id);
|
||||
}
|
||||
|
||||
void Begin()
|
||||
{
|
||||
if (m_started)
|
||||
glEndQuery(GL_TIME_ELAPSED);
|
||||
|
||||
glBeginQuery(GL_TIME_ELAPSED, m_query_id);
|
||||
m_started = true;
|
||||
}
|
||||
|
||||
void End()
|
||||
{
|
||||
if (!m_started)
|
||||
return;
|
||||
|
||||
glEndQuery(GL_TIME_ELAPSED);
|
||||
m_started = false;
|
||||
}
|
||||
|
||||
double GetTimeSeconds()
|
||||
{
|
||||
GetResult();
|
||||
return static_cast<double>(m_result) / 1000000000.0;
|
||||
}
|
||||
|
||||
double GetTimeMilliseconds()
|
||||
{
|
||||
GetResult();
|
||||
return static_cast<double>(m_result) / 1000000.0;
|
||||
}
|
||||
|
||||
u32 GetTimeNanoseconds()
|
||||
{
|
||||
GetResult();
|
||||
return m_result;
|
||||
}
|
||||
|
||||
private:
|
||||
void GetResult()
|
||||
{
|
||||
if (m_has_result)
|
||||
return;
|
||||
|
||||
if (m_started)
|
||||
End();
|
||||
|
||||
glGetQueryObjectuiv(m_query_id, GL_QUERY_RESULT, &m_result);
|
||||
m_has_result = true;
|
||||
}
|
||||
|
||||
GLuint m_query_id;
|
||||
GLuint m_result = 0;
|
||||
bool m_started = false;
|
||||
bool m_has_result = false;
|
||||
};
|
||||
} // namespace OGL
|
|
@ -53,6 +53,7 @@
|
|||
<ItemGroup>
|
||||
<ClInclude Include="BoundingBox.h" />
|
||||
<ClInclude Include="FramebufferManager.h" />
|
||||
<ClInclude Include="GPUTimer.h" />
|
||||
<ClInclude Include="PerfQuery.h" />
|
||||
<ClInclude Include="PostProcessing.h" />
|
||||
<ClInclude Include="ProgramShaderCache.h" />
|
||||
|
@ -79,4 +80,4 @@
|
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
</Project>
|
|
@ -90,8 +90,11 @@
|
|||
</ClInclude>
|
||||
<ClInclude Include="SamplerCache.h" />
|
||||
<ClInclude Include="VideoBackend.h" />
|
||||
<ClInclude Include="GPUTimer.h">
|
||||
<Filter>GLUtil</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Text Include="CMakeLists.txt" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
</Project>
|
|
@ -65,6 +65,8 @@ static std::string GetGLSLVersionString()
|
|||
return "#version 330";
|
||||
case GLSL_400:
|
||||
return "#version 400";
|
||||
case GLSL_430:
|
||||
return "#version 430";
|
||||
default:
|
||||
// Shouldn't ever hit this
|
||||
return "#version ERROR";
|
||||
|
@ -103,27 +105,30 @@ void SHADER::SetProgramVariables()
|
|||
}
|
||||
}
|
||||
|
||||
void SHADER::SetProgramBindings()
|
||||
void SHADER::SetProgramBindings(bool is_compute)
|
||||
{
|
||||
if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
|
||||
if (!is_compute)
|
||||
{
|
||||
// So we do support extended blending
|
||||
// So we need to set a few more things here.
|
||||
// Bind our out locations
|
||||
glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0");
|
||||
glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1");
|
||||
if (g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
|
||||
{
|
||||
// So we do support extended blending
|
||||
// So we need to set a few more things here.
|
||||
// Bind our out locations
|
||||
glBindFragDataLocationIndexed(glprogid, 0, 0, "ocol0");
|
||||
glBindFragDataLocationIndexed(glprogid, 0, 1, "ocol1");
|
||||
}
|
||||
// Need to set some attribute locations
|
||||
glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos");
|
||||
|
||||
glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB, "posmtx");
|
||||
|
||||
glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB, "color0");
|
||||
glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB, "color1");
|
||||
|
||||
glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0");
|
||||
glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1");
|
||||
glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2");
|
||||
}
|
||||
// Need to set some attribute locations
|
||||
glBindAttribLocation(glprogid, SHADER_POSITION_ATTRIB, "rawpos");
|
||||
|
||||
glBindAttribLocation(glprogid, SHADER_POSMTX_ATTRIB, "posmtx");
|
||||
|
||||
glBindAttribLocation(glprogid, SHADER_COLOR0_ATTRIB, "color0");
|
||||
glBindAttribLocation(glprogid, SHADER_COLOR1_ATTRIB, "color1");
|
||||
|
||||
glBindAttribLocation(glprogid, SHADER_NORM0_ATTRIB, "rawnorm0");
|
||||
glBindAttribLocation(glprogid, SHADER_NORM1_ATTRIB, "rawnorm1");
|
||||
glBindAttribLocation(glprogid, SHADER_NORM2_ATTRIB, "rawnorm2");
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
|
@ -281,7 +286,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
|
|||
if (g_ogl_config.bSupportsGLSLCache)
|
||||
glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
|
||||
|
||||
shader.SetProgramBindings();
|
||||
shader.SetProgramBindings(false);
|
||||
|
||||
glLinkProgram(pid);
|
||||
|
||||
|
@ -296,10 +301,10 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
|
|||
glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
|
||||
if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
|
||||
{
|
||||
GLsizei charsWritten;
|
||||
GLchar* infoLog = new GLchar[length];
|
||||
glGetProgramInfoLog(pid, length, &charsWritten, infoLog);
|
||||
ERROR_LOG(VIDEO, "Program info log:\n%s", infoLog);
|
||||
std::string info_log;
|
||||
info_log.resize(length);
|
||||
glGetProgramInfoLog(pid, length, &length, &info_log[0]);
|
||||
ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str());
|
||||
|
||||
std::string filename =
|
||||
StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
|
||||
|
@ -308,7 +313,7 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
|
|||
file << s_glsl_header << vcode << s_glsl_header << pcode;
|
||||
if (!gcode.empty())
|
||||
file << s_glsl_header << gcode;
|
||||
file << infoLog;
|
||||
file << info_log;
|
||||
file.close();
|
||||
|
||||
if (linkStatus != GL_TRUE)
|
||||
|
@ -316,10 +321,8 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
|
|||
PanicAlert("Failed to link shaders: %s\n"
|
||||
"Debug info (%s, %s, %s):\n%s",
|
||||
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
|
||||
g_ogl_config.gl_version, infoLog);
|
||||
g_ogl_config.gl_version, info_log.c_str());
|
||||
}
|
||||
|
||||
delete[] infoLog;
|
||||
}
|
||||
if (linkStatus != GL_TRUE)
|
||||
{
|
||||
|
@ -336,6 +339,73 @@ bool ProgramShaderCache::CompileShader(SHADER& shader, const std::string& vcode,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ProgramShaderCache::CompileComputeShader(SHADER& shader, const std::string& code)
|
||||
{
|
||||
// We need to enable GL_ARB_compute_shader for drivers that support the extension,
|
||||
// but not GLSL 4.3. Mesa is one example.
|
||||
std::string header;
|
||||
if (g_ActiveConfig.backend_info.bSupportsComputeShaders &&
|
||||
g_ogl_config.eSupportedGLSLVersion < GLSL_430)
|
||||
{
|
||||
header = "#extension GL_ARB_compute_shader : enable\n";
|
||||
}
|
||||
|
||||
GLuint shader_id = CompileSingleShader(GL_COMPUTE_SHADER, header + code);
|
||||
if (!shader_id)
|
||||
return false;
|
||||
|
||||
GLuint pid = shader.glprogid = glCreateProgram();
|
||||
glAttachShader(pid, shader_id);
|
||||
if (g_ogl_config.bSupportsGLSLCache)
|
||||
glProgramParameteri(pid, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
|
||||
|
||||
shader.SetProgramBindings(true);
|
||||
|
||||
glLinkProgram(pid);
|
||||
|
||||
// original shaders aren't needed any more
|
||||
glDeleteShader(shader_id);
|
||||
|
||||
GLint linkStatus;
|
||||
glGetProgramiv(pid, GL_LINK_STATUS, &linkStatus);
|
||||
GLsizei length = 0;
|
||||
glGetProgramiv(pid, GL_INFO_LOG_LENGTH, &length);
|
||||
if (linkStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
|
||||
{
|
||||
std::string info_log;
|
||||
info_log.resize(length);
|
||||
glGetProgramInfoLog(pid, length, &length, &info_log[0]);
|
||||
ERROR_LOG(VIDEO, "Program info log:\n%s", info_log.c_str());
|
||||
|
||||
std::string filename =
|
||||
StringFromFormat("%sbad_p_%d.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
|
||||
std::ofstream file;
|
||||
OpenFStream(file, filename, std::ios_base::out);
|
||||
file << s_glsl_header << code;
|
||||
file << info_log;
|
||||
file.close();
|
||||
|
||||
if (linkStatus != GL_TRUE)
|
||||
{
|
||||
PanicAlert("Failed to link shaders: %s\n"
|
||||
"Debug info (%s, %s, %s):\n%s",
|
||||
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
|
||||
g_ogl_config.gl_version, info_log.c_str());
|
||||
}
|
||||
}
|
||||
if (linkStatus != GL_TRUE)
|
||||
{
|
||||
// Compile failed
|
||||
ERROR_LOG(VIDEO, "Program linking failed; see info log");
|
||||
|
||||
// Don't try to use this shader
|
||||
glDeleteProgram(pid);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& code)
|
||||
{
|
||||
GLuint result = glCreateShader(type);
|
||||
|
@ -351,31 +421,43 @@ GLuint ProgramShaderCache::CompileSingleShader(GLuint type, const std::string& c
|
|||
|
||||
if (compileStatus != GL_TRUE || (length > 1 && DEBUG_GLSL))
|
||||
{
|
||||
GLsizei charsWritten;
|
||||
GLchar* infoLog = new GLchar[length];
|
||||
glGetShaderInfoLog(result, length, &charsWritten, infoLog);
|
||||
ERROR_LOG(VIDEO, "%s Shader info log:\n%s",
|
||||
type == GL_VERTEX_SHADER ? "VS" : type == GL_FRAGMENT_SHADER ? "PS" : "GS", infoLog);
|
||||
std::string info_log;
|
||||
info_log.resize(length);
|
||||
glGetShaderInfoLog(result, length, &length, &info_log[0]);
|
||||
|
||||
const char* prefix = "";
|
||||
switch (type)
|
||||
{
|
||||
case GL_VERTEX_SHADER:
|
||||
prefix = "vs";
|
||||
break;
|
||||
case GL_GEOMETRY_SHADER:
|
||||
prefix = "gs";
|
||||
break;
|
||||
case GL_FRAGMENT_SHADER:
|
||||
prefix = "ps";
|
||||
break;
|
||||
case GL_COMPUTE_SHADER:
|
||||
prefix = "cs";
|
||||
break;
|
||||
}
|
||||
|
||||
ERROR_LOG(VIDEO, "%s Shader info log:\n%s", prefix, info_log.c_str());
|
||||
|
||||
std::string filename = StringFromFormat(
|
||||
"%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
|
||||
type == GL_VERTEX_SHADER ? "vs" : type == GL_FRAGMENT_SHADER ? "ps" : "gs", num_failures++);
|
||||
"%sbad_%s_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), prefix, num_failures++);
|
||||
std::ofstream file;
|
||||
OpenFStream(file, filename, std::ios_base::out);
|
||||
file << s_glsl_header << code << infoLog;
|
||||
file << s_glsl_header << code << info_log;
|
||||
file.close();
|
||||
|
||||
if (compileStatus != GL_TRUE)
|
||||
{
|
||||
PanicAlert("Failed to compile %s shader: %s\n"
|
||||
"Debug info (%s, %s, %s):\n%s",
|
||||
type == GL_VERTEX_SHADER ? "vertex" : type == GL_FRAGMENT_SHADER ? "pixel" :
|
||||
"geometry",
|
||||
filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
|
||||
g_ogl_config.gl_version, infoLog);
|
||||
prefix, filename.c_str(), g_ogl_config.gl_vendor, g_ogl_config.gl_renderer,
|
||||
g_ogl_config.gl_version, info_log.c_str());
|
||||
}
|
||||
|
||||
delete[] infoLog;
|
||||
}
|
||||
if (compileStatus != GL_TRUE)
|
||||
{
|
||||
|
@ -539,11 +621,9 @@ void ProgramShaderCache::CreateHeader()
|
|||
std::string earlyz_string = "";
|
||||
if (g_ActiveConfig.backend_info.bSupportsEarlyZ)
|
||||
{
|
||||
if (g_ogl_config.bSupportsEarlyFragmentTests)
|
||||
if (g_ogl_config.bSupportsImageLoadStore)
|
||||
{
|
||||
earlyz_string = "#define FORCE_EARLY_Z layout(early_fragment_tests) in\n";
|
||||
if (!is_glsles) // GLES supports this by default
|
||||
earlyz_string += "#extension GL_ARB_shader_image_load_store : enable\n";
|
||||
}
|
||||
else if (g_ogl_config.bSupportsConservativeDepth)
|
||||
{
|
||||
|
@ -569,6 +649,7 @@ void ProgramShaderCache::CreateHeader()
|
|||
"%s\n" // texture buffer
|
||||
"%s\n" // ES texture buffer
|
||||
"%s\n" // ES dual source blend
|
||||
"%s\n" // shader image load store
|
||||
|
||||
// Precision defines for GLSL ES
|
||||
"%s\n"
|
||||
|
@ -576,6 +657,7 @@ void ProgramShaderCache::CreateHeader()
|
|||
"%s\n"
|
||||
"%s\n"
|
||||
"%s\n"
|
||||
"%s\n"
|
||||
|
||||
// Silly differences
|
||||
"#define float2 vec2\n"
|
||||
|
@ -638,12 +720,17 @@ void ProgramShaderCache::CreateHeader()
|
|||
""
|
||||
|
||||
,
|
||||
g_ogl_config.bSupportsImageLoadStore &&
|
||||
((!is_glsles && v < GLSL_430) || (is_glsles && v < GLSLES_310)) ?
|
||||
"#extension GL_ARB_shader_image_load_store : enable" :
|
||||
"",
|
||||
is_glsles ? "precision highp float;" : "", is_glsles ? "precision highp int;" : "",
|
||||
is_glsles ? "precision highp sampler2DArray;" : "",
|
||||
(is_glsles && g_ActiveConfig.backend_info.bSupportsPaletteConversion) ?
|
||||
"precision highp usamplerBuffer;" :
|
||||
"",
|
||||
v > GLSLES_300 ? "precision highp sampler2DMS;" : "");
|
||||
v > GLSLES_300 ? "precision highp sampler2DMS;" : "",
|
||||
v >= GLSLES_310 ? "precision highp image2DArray;" : "");
|
||||
}
|
||||
|
||||
void ProgramShaderCache::ProgramShaderCacheInserter::Read(const SHADERUID& key, const u8* value,
|
||||
|
|
|
@ -46,7 +46,7 @@ struct SHADER
|
|||
std::string strvprog, strpprog, strgprog;
|
||||
|
||||
void SetProgramVariables();
|
||||
void SetProgramBindings();
|
||||
void SetProgramBindings(bool is_compute);
|
||||
void Bind();
|
||||
};
|
||||
|
||||
|
@ -67,6 +67,7 @@ public:
|
|||
|
||||
static bool CompileShader(SHADER& shader, const std::string& vcode, const std::string& pcode,
|
||||
const std::string& gcode = "");
|
||||
static bool CompileComputeShader(SHADER& shader, const std::string& code);
|
||||
static GLuint CompileSingleShader(GLuint type, const std::string& code);
|
||||
static void UploadConstants();
|
||||
|
||||
|
|
|
@ -451,15 +451,16 @@ Renderer::Renderer()
|
|||
g_ogl_config.bSupportViewportFloat = GLExtensions::Supports("GL_ARB_viewport_array");
|
||||
g_ogl_config.bSupportsDebug =
|
||||
GLExtensions::Supports("GL_KHR_debug") || GLExtensions::Supports("GL_ARB_debug_output");
|
||||
g_ogl_config.bSupports3DTextureStorage =
|
||||
g_ogl_config.bSupportsTextureStorage = GLExtensions::Supports("GL_ARB_texture_storage");
|
||||
g_ogl_config.bSupports3DTextureStorageMultisample =
|
||||
GLExtensions::Supports("GL_ARB_texture_storage_multisample") ||
|
||||
GLExtensions::Supports("GL_OES_texture_storage_multisample_2d_array");
|
||||
g_ogl_config.bSupports2DTextureStorage =
|
||||
g_ogl_config.bSupports2DTextureStorageMultisample =
|
||||
GLExtensions::Supports("GL_ARB_texture_storage_multisample");
|
||||
g_ogl_config.bSupportsEarlyFragmentTests =
|
||||
GLExtensions::Supports("GL_ARB_shader_image_load_store");
|
||||
g_ogl_config.bSupportsImageLoadStore = GLExtensions::Supports("GL_ARB_shader_image_load_store");
|
||||
g_ogl_config.bSupportsConservativeDepth = GLExtensions::Supports("GL_ARB_conservative_depth");
|
||||
g_ogl_config.bSupportsAniso = GLExtensions::Supports("GL_EXT_texture_filter_anisotropic");
|
||||
g_Config.backend_info.bSupportsComputeShaders = GLExtensions::Supports("GL_ARB_compute_shader");
|
||||
|
||||
if (GLInterface->GetMode() == GLInterfaceMode::MODE_OPENGLES3)
|
||||
{
|
||||
|
@ -486,6 +487,7 @@ Renderer::Renderer()
|
|||
{
|
||||
g_ogl_config.eSupportedGLSLVersion = GLSLES_300;
|
||||
g_ogl_config.bSupportsAEP = false;
|
||||
g_ogl_config.bSupportsTextureStorage = true;
|
||||
g_Config.backend_info.bSupportsGeometryShaders = false;
|
||||
}
|
||||
else if (GLExtensions::Version() == 310)
|
||||
|
@ -493,16 +495,18 @@ Renderer::Renderer()
|
|||
g_ogl_config.eSupportedGLSLVersion = GLSLES_310;
|
||||
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
|
||||
g_Config.backend_info.bSupportsBindingLayout = true;
|
||||
g_ogl_config.bSupportsEarlyFragmentTests = true;
|
||||
g_ogl_config.bSupportsImageLoadStore = true;
|
||||
g_Config.backend_info.bSupportsGeometryShaders = g_ogl_config.bSupportsAEP;
|
||||
g_Config.backend_info.bSupportsComputeShaders = true;
|
||||
g_Config.backend_info.bSupportsGSInstancing =
|
||||
g_Config.backend_info.bSupportsGeometryShaders && g_ogl_config.SupportedESPointSize > 0;
|
||||
g_Config.backend_info.bSupportsSSAA = g_ogl_config.bSupportsAEP;
|
||||
g_Config.backend_info.bSupportsFragmentStoresAndAtomics = true;
|
||||
g_ogl_config.bSupportsMSAA = true;
|
||||
g_ogl_config.bSupports2DTextureStorage = true;
|
||||
g_ogl_config.bSupportsTextureStorage = true;
|
||||
g_ogl_config.bSupports2DTextureStorageMultisample = true;
|
||||
if (g_ActiveConfig.iStereoMode > 0 && g_ActiveConfig.iMultisamples > 1 &&
|
||||
!g_ogl_config.bSupports3DTextureStorage)
|
||||
!g_ogl_config.bSupports3DTextureStorageMultisample)
|
||||
{
|
||||
// GLES 3.1 can't support stereo rendering and MSAA
|
||||
OSD::AddMessage("MSAA Stereo rendering isn't supported by your GPU.", 10000);
|
||||
|
@ -514,8 +518,9 @@ Renderer::Renderer()
|
|||
g_ogl_config.eSupportedGLSLVersion = GLSLES_320;
|
||||
g_ogl_config.bSupportsAEP = GLExtensions::Supports("GL_ANDROID_extension_pack_es31a");
|
||||
g_Config.backend_info.bSupportsBindingLayout = true;
|
||||
g_ogl_config.bSupportsEarlyFragmentTests = true;
|
||||
g_ogl_config.bSupportsImageLoadStore = true;
|
||||
g_Config.backend_info.bSupportsGeometryShaders = true;
|
||||
g_Config.backend_info.bSupportsComputeShaders = true;
|
||||
g_Config.backend_info.bSupportsGSInstancing = g_ogl_config.SupportedESPointSize > 0;
|
||||
g_Config.backend_info.bSupportsPaletteConversion = true;
|
||||
g_Config.backend_info.bSupportsSSAA = true;
|
||||
|
@ -524,8 +529,9 @@ Renderer::Renderer()
|
|||
g_ogl_config.bSupportsGLBaseVertex = true;
|
||||
g_ogl_config.bSupportsDebug = true;
|
||||
g_ogl_config.bSupportsMSAA = true;
|
||||
g_ogl_config.bSupports2DTextureStorage = true;
|
||||
g_ogl_config.bSupports3DTextureStorage = true;
|
||||
g_ogl_config.bSupportsTextureStorage = true;
|
||||
g_ogl_config.bSupports2DTextureStorageMultisample = true;
|
||||
g_ogl_config.bSupports3DTextureStorageMultisample = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -541,8 +547,7 @@ Renderer::Renderer()
|
|||
else if (GLExtensions::Version() == 300)
|
||||
{
|
||||
g_ogl_config.eSupportedGLSLVersion = GLSL_130;
|
||||
g_ogl_config.bSupportsEarlyFragmentTests =
|
||||
false; // layout keyword is only supported on glsl150+
|
||||
g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
|
||||
g_ogl_config.bSupportsConservativeDepth =
|
||||
false; // layout keyword is only supported on glsl150+
|
||||
g_Config.backend_info.bSupportsGeometryShaders =
|
||||
|
@ -551,8 +556,7 @@ Renderer::Renderer()
|
|||
else if (GLExtensions::Version() == 310)
|
||||
{
|
||||
g_ogl_config.eSupportedGLSLVersion = GLSL_140;
|
||||
g_ogl_config.bSupportsEarlyFragmentTests =
|
||||
false; // layout keyword is only supported on glsl150+
|
||||
g_ogl_config.bSupportsImageLoadStore = false; // layout keyword is only supported on glsl150+
|
||||
g_ogl_config.bSupportsConservativeDepth =
|
||||
false; // layout keyword is only supported on glsl150+
|
||||
g_Config.backend_info.bSupportsGeometryShaders =
|
||||
|
@ -566,10 +570,28 @@ Renderer::Renderer()
|
|||
{
|
||||
g_ogl_config.eSupportedGLSLVersion = GLSL_330;
|
||||
}
|
||||
else if (GLExtensions::Version() >= 430)
|
||||
{
|
||||
// TODO: We should really parse the GL_SHADING_LANGUAGE_VERSION token.
|
||||
g_ogl_config.eSupportedGLSLVersion = GLSL_430;
|
||||
g_ogl_config.bSupportsTextureStorage = true;
|
||||
g_ogl_config.bSupportsImageLoadStore = true;
|
||||
g_Config.backend_info.bSupportsSSAA = true;
|
||||
|
||||
// Compute shaders are core in GL4.3.
|
||||
g_Config.backend_info.bSupportsComputeShaders = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_ogl_config.eSupportedGLSLVersion = GLSL_400;
|
||||
g_Config.backend_info.bSupportsSSAA = true;
|
||||
|
||||
if (GLExtensions::Version() == 420)
|
||||
{
|
||||
// Texture storage and shader image load/store are core in GL4.2.
|
||||
g_ogl_config.bSupportsTextureStorage = true;
|
||||
g_ogl_config.bSupportsImageLoadStore = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Desktop OpenGL can't have the Android Extension Pack
|
||||
|
@ -578,12 +600,19 @@ Renderer::Renderer()
|
|||
|
||||
// Either method can do early-z tests. See PixelShaderGen for details.
|
||||
g_Config.backend_info.bSupportsEarlyZ =
|
||||
g_ogl_config.bSupportsEarlyFragmentTests || g_ogl_config.bSupportsConservativeDepth;
|
||||
g_ogl_config.bSupportsImageLoadStore || g_ogl_config.bSupportsConservativeDepth;
|
||||
|
||||
glGetIntegerv(GL_MAX_SAMPLES, &g_ogl_config.max_samples);
|
||||
if (g_ogl_config.max_samples < 1 || !g_ogl_config.bSupportsMSAA)
|
||||
g_ogl_config.max_samples = 1;
|
||||
|
||||
// We require texel buffers, image load store, and compute shaders to enable GPU texture decoding.
|
||||
// If the driver doesn't expose the extensions, but supports GL4.3/GLES3.1, it will still be
|
||||
// enabled in the version check below.
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding =
|
||||
g_Config.backend_info.bSupportsPaletteConversion &&
|
||||
g_Config.backend_info.bSupportsComputeShaders && g_ogl_config.bSupportsImageLoadStore;
|
||||
|
||||
if (g_ogl_config.bSupportsDebug)
|
||||
{
|
||||
if (GLExtensions::Supports("GL_KHR_debug"))
|
||||
|
|
|
@ -22,7 +22,8 @@ enum GLSL_VERSION
|
|||
GLSL_140,
|
||||
GLSL_150,
|
||||
GLSL_330,
|
||||
GLSL_400, // and above
|
||||
GLSL_400, // and above
|
||||
GLSL_430,
|
||||
GLSLES_300, // GLES 3.0
|
||||
GLSLES_310, // GLES 3.1
|
||||
GLSLES_320, // GLES 3.2
|
||||
|
@ -51,10 +52,11 @@ struct VideoConfig
|
|||
bool bSupportsCopySubImage;
|
||||
u8 SupportedESPointSize;
|
||||
ES_TEXBUF_TYPE SupportedESTextureBuffer;
|
||||
bool bSupports2DTextureStorage;
|
||||
bool bSupports3DTextureStorage;
|
||||
bool bSupportsEarlyFragmentTests;
|
||||
bool bSupportsTextureStorage;
|
||||
bool bSupports2DTextureStorageMultisample;
|
||||
bool bSupports3DTextureStorageMultisample;
|
||||
bool bSupportsConservativeDepth;
|
||||
bool bSupportsImageLoadStore;
|
||||
bool bSupportsAniso;
|
||||
|
||||
const char* gl_vendor;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "Common/StringUtil.h"
|
||||
|
||||
#include "VideoBackends/OGL/FramebufferManager.h"
|
||||
#include "VideoBackends/OGL/GPUTimer.h"
|
||||
#include "VideoBackends/OGL/ProgramShaderCache.h"
|
||||
#include "VideoBackends/OGL/Render.h"
|
||||
#include "VideoBackends/OGL/SamplerCache.h"
|
||||
|
@ -23,6 +24,7 @@
|
|||
#include "VideoBackends/OGL/TextureConverter.h"
|
||||
|
||||
#include "VideoCommon/ImageWrite.h"
|
||||
#include "VideoCommon/TextureConversionShader.h"
|
||||
#include "VideoCommon/TextureDecoder.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
|
||||
|
@ -49,6 +51,26 @@ static GLuint s_palette_buffer_offset_uniform[3];
|
|||
static GLuint s_palette_multiplier_uniform[3];
|
||||
static GLuint s_palette_copy_position_uniform[3];
|
||||
|
||||
struct TextureDecodingProgramInfo
|
||||
{
|
||||
const TextureConversionShader::DecodingShaderInfo* base_info = nullptr;
|
||||
SHADER program;
|
||||
GLint uniform_dst_size = -1;
|
||||
GLint uniform_src_size = -1;
|
||||
GLint uniform_src_row_stride = -1;
|
||||
GLint uniform_src_offset = -1;
|
||||
GLint uniform_palette_offset = -1;
|
||||
bool valid = false;
|
||||
};
|
||||
|
||||
//#define TIME_TEXTURE_DECODING 1
|
||||
|
||||
static std::map<std::pair<u32, u32>, TextureDecodingProgramInfo> s_texture_decoding_program_info;
|
||||
static std::array<GLuint, TextureConversionShader::BUFFER_FORMAT_COUNT>
|
||||
s_texture_decoding_buffer_views;
|
||||
static void CreateTextureDecodingResources();
|
||||
static void DestroyTextureDecodingResources();
|
||||
|
||||
bool SaveTexture(const std::string& filename, u32 textarget, u32 tex, int virtual_width,
|
||||
int virtual_height, unsigned int level)
|
||||
{
|
||||
|
@ -119,12 +141,22 @@ TextureCache::TCacheEntryBase* TextureCache::CreateTexture(const TCacheEntryConf
|
|||
|
||||
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAX_LEVEL, config.levels - 1);
|
||||
|
||||
if (g_ogl_config.bSupportsTextureStorage)
|
||||
{
|
||||
glTexStorage3D(GL_TEXTURE_2D_ARRAY, config.levels, GL_RGBA8, config.width, config.height,
|
||||
config.layers);
|
||||
}
|
||||
|
||||
if (config.rendertarget)
|
||||
{
|
||||
for (u32 level = 0; level <= config.levels; level++)
|
||||
if (!g_ogl_config.bSupportsTextureStorage)
|
||||
{
|
||||
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, config.width, config.height, config.layers,
|
||||
0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
|
||||
for (u32 level = 0; level < config.levels; level++)
|
||||
{
|
||||
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, std::max(config.width >> level, 1u),
|
||||
std::max(config.height >> level, 1u), config.layers, 0, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, nullptr);
|
||||
}
|
||||
}
|
||||
glGenFramebuffers(1, &entry->framebuffer);
|
||||
FramebufferManager::SetFramebuffer(entry->framebuffer);
|
||||
|
@ -187,8 +219,16 @@ void TextureCache::TCacheEntry::Load(const u8* buffer, u32 width, u32 height, u3
|
|||
if (expanded_width != width)
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, expanded_width);
|
||||
|
||||
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, width, height, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE,
|
||||
buffer);
|
||||
if (g_ogl_config.bSupportsTextureStorage)
|
||||
{
|
||||
glTexSubImage3D(GL_TEXTURE_2D_ARRAY, level, 0, 0, 0, width, height, 1, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, buffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
glTexImage3D(GL_TEXTURE_2D_ARRAY, level, GL_RGBA, width, height, 1, 0, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, buffer);
|
||||
}
|
||||
|
||||
if (expanded_width != width)
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
|
@ -267,26 +307,31 @@ TextureCache::TextureCache()
|
|||
|
||||
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
|
||||
{
|
||||
s32 buffer_size = 1024 * 1024;
|
||||
s32 buffer_size_mb = (g_ActiveConfig.backend_info.bSupportsGPUTextureDecoding ? 32 : 1);
|
||||
s32 buffer_size = buffer_size_mb * 1024 * 1024;
|
||||
s32 max_buffer_size = 0;
|
||||
|
||||
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates
|
||||
// is 65KB, we are asking for a 1MB buffer here.
|
||||
// Make sure to check the maximum size and if it is below 1MB
|
||||
// then use the maximum the hardware supports instead.
|
||||
// The minimum MAX_TEXTURE_BUFFER_SIZE that the spec mandates is 65KB, we are asking for a 1MB
|
||||
// buffer here. This buffer is also used as storage for undecoded textures when compute shader
|
||||
// texture decoding is enabled, in which case the requested size is 32MB.
|
||||
glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &max_buffer_size);
|
||||
|
||||
// Clamp the buffer size to the maximum size that the driver supports.
|
||||
buffer_size = std::min(buffer_size, max_buffer_size);
|
||||
|
||||
s_palette_stream_buffer = StreamBuffer::Create(GL_TEXTURE_BUFFER, buffer_size);
|
||||
glGenTextures(1, &s_palette_resolv_texture);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, s_palette_stream_buffer->m_buffer);
|
||||
|
||||
CreateTextureDecodingResources();
|
||||
}
|
||||
}
|
||||
|
||||
TextureCache::~TextureCache()
|
||||
{
|
||||
DeleteShaders();
|
||||
DestroyTextureDecodingResources();
|
||||
|
||||
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
|
||||
{
|
||||
|
@ -588,4 +633,159 @@ void TextureCache::ConvertTexture(TCacheEntryBase* _entry, TCacheEntryBase* _unc
|
|||
FramebufferManager::SetFramebuffer(0);
|
||||
g_renderer->RestoreAPIState();
|
||||
}
|
||||
|
||||
static const std::string decoding_vertex_shader = R"(
|
||||
void main()
|
||||
{
|
||||
vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);
|
||||
gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
void CreateTextureDecodingResources()
|
||||
{
|
||||
static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = {
|
||||
GL_R8UI, // BUFFER_FORMAT_R8_UINT
|
||||
GL_R16UI, // BUFFER_FORMAT_R16_UINT
|
||||
GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT
|
||||
};
|
||||
|
||||
glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
|
||||
s_texture_decoding_buffer_views.data());
|
||||
for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++)
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[i]);
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], s_palette_stream_buffer->m_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
void DestroyTextureDecodingResources()
|
||||
{
|
||||
glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
|
||||
s_texture_decoding_buffer_views.data());
|
||||
s_texture_decoding_buffer_views.fill(0);
|
||||
s_texture_decoding_program_info.clear();
|
||||
}
|
||||
|
||||
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
|
||||
{
|
||||
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
|
||||
auto iter = s_texture_decoding_program_info.find(key);
|
||||
if (iter != s_texture_decoding_program_info.end())
|
||||
return iter->second.valid;
|
||||
|
||||
TextureDecodingProgramInfo info;
|
||||
info.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
|
||||
if (!info.base_info)
|
||||
{
|
||||
s_texture_decoding_program_info.emplace(key, info);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string shader_source =
|
||||
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
|
||||
if (shader_source.empty())
|
||||
{
|
||||
s_texture_decoding_program_info.emplace(key, info);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ProgramShaderCache::CompileComputeShader(info.program, shader_source))
|
||||
{
|
||||
s_texture_decoding_program_info.emplace(key, info);
|
||||
return false;
|
||||
}
|
||||
|
||||
info.uniform_dst_size = glGetUniformLocation(info.program.glprogid, "u_dst_size");
|
||||
info.uniform_src_size = glGetUniformLocation(info.program.glprogid, "u_src_size");
|
||||
info.uniform_src_offset = glGetUniformLocation(info.program.glprogid, "u_src_offset");
|
||||
info.uniform_src_row_stride = glGetUniformLocation(info.program.glprogid, "u_src_row_stride");
|
||||
info.uniform_palette_offset = glGetUniformLocation(info.program.glprogid, "u_palette_offset");
|
||||
info.valid = true;
|
||||
s_texture_decoding_program_info.emplace(key, info);
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
|
||||
size_t data_size, TextureFormat format, u32 width, u32 height,
|
||||
u32 aligned_width, u32 aligned_height, u32 row_stride,
|
||||
const u8* palette, TlutFormat palette_format)
|
||||
{
|
||||
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
|
||||
auto iter = s_texture_decoding_program_info.find(key);
|
||||
if (iter == s_texture_decoding_program_info.end())
|
||||
return;
|
||||
|
||||
#ifdef TIME_TEXTURE_DECODING
|
||||
GPUTimer timer;
|
||||
#endif
|
||||
|
||||
// Copy to GPU-visible buffer, aligned to the data type.
|
||||
auto info = iter->second;
|
||||
u32 bytes_per_buffer_elem =
|
||||
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
|
||||
|
||||
// Only copy palette if it is required.
|
||||
bool has_palette = info.base_info->palette_size > 0;
|
||||
u32 total_upload_size = static_cast<u32>(data_size);
|
||||
u32 palette_offset = total_upload_size;
|
||||
if (has_palette)
|
||||
{
|
||||
// Align to u16.
|
||||
if ((total_upload_size % sizeof(u16)) != 0)
|
||||
{
|
||||
total_upload_size++;
|
||||
palette_offset++;
|
||||
}
|
||||
|
||||
total_upload_size += info.base_info->palette_size;
|
||||
}
|
||||
|
||||
// Allocate space in stream buffer, and copy texture + palette across.
|
||||
auto buffer = s_palette_stream_buffer->Map(total_upload_size, bytes_per_buffer_elem);
|
||||
memcpy(buffer.first, data, data_size);
|
||||
if (has_palette)
|
||||
memcpy(buffer.first + palette_offset, palette, info.base_info->palette_size);
|
||||
s_palette_stream_buffer->Unmap(total_upload_size);
|
||||
|
||||
info.program.Bind();
|
||||
|
||||
// Calculate stride in buffer elements
|
||||
u32 row_stride_in_elements = row_stride / bytes_per_buffer_elem;
|
||||
u32 offset_in_elements = buffer.second / bytes_per_buffer_elem;
|
||||
u32 palette_offset_in_elements = (buffer.second + palette_offset) / sizeof(u16);
|
||||
if (info.uniform_dst_size >= 0)
|
||||
glUniform2ui(info.uniform_dst_size, width, height);
|
||||
if (info.uniform_src_size >= 0)
|
||||
glUniform2ui(info.uniform_src_size, aligned_width, aligned_height);
|
||||
if (info.uniform_src_offset >= 0)
|
||||
glUniform1ui(info.uniform_src_offset, offset_in_elements);
|
||||
if (info.uniform_src_row_stride >= 0)
|
||||
glUniform1ui(info.uniform_src_row_stride, row_stride_in_elements);
|
||||
if (info.uniform_palette_offset >= 0)
|
||||
glUniform1ui(info.uniform_palette_offset, palette_offset_in_elements);
|
||||
|
||||
glActiveTexture(GL_TEXTURE9);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, s_texture_decoding_buffer_views[info.base_info->buffer_format]);
|
||||
|
||||
if (has_palette)
|
||||
{
|
||||
// Use an R16UI view for the palette.
|
||||
glActiveTexture(GL_TEXTURE10);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, s_palette_resolv_texture);
|
||||
}
|
||||
|
||||
auto dispatch_groups = TextureConversionShader::GetDispatchCount(info.base_info, width, height);
|
||||
glBindImageTexture(0, static_cast<TCacheEntry*>(entry)->texture, dst_level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, GL_RGBA8);
|
||||
glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1);
|
||||
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
|
||||
|
||||
TextureCache::SetStage();
|
||||
|
||||
#ifdef TIME_TEXTURE_DECODING
|
||||
WARN_LOG(VIDEO, "Decode texture format %u size %ux%u took %.4fms", static_cast<u32>(format),
|
||||
width, height, timer.GetTimeMilliseconds());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,12 @@ public:
|
|||
static void DisableStage(unsigned int stage);
|
||||
static void SetStage();
|
||||
|
||||
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
|
||||
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
|
||||
TextureFormat format, u32 width, u32 height, u32 aligned_width,
|
||||
u32 aligned_height, u32 row_stride, const u8* palette,
|
||||
TlutFormat palette_format) override;
|
||||
|
||||
private:
|
||||
struct TCacheEntry : TCacheEntryBase
|
||||
{
|
||||
|
|
|
@ -101,6 +101,7 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsExclusiveFullscreen = false;
|
||||
g_Config.backend_info.bSupportsOversizedViewports = true;
|
||||
g_Config.backend_info.bSupportsGeometryShaders = true;
|
||||
g_Config.backend_info.bSupportsComputeShaders = false;
|
||||
g_Config.backend_info.bSupports3DVision = false;
|
||||
g_Config.backend_info.bSupportsPostProcessing = true;
|
||||
g_Config.backend_info.bSupportsSSAA = true;
|
||||
|
@ -108,6 +109,11 @@ void VideoBackend::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsMultithreading = false;
|
||||
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = true;
|
||||
|
||||
// TODO: There is a bug here, if texel buffers are not supported the graphics options
|
||||
// will show the option when it is not supported. The only way around this would be
|
||||
// creating a context when calling this function to determine what is available.
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding = true;
|
||||
|
||||
// Overwritten in Render.cpp later
|
||||
g_Config.backend_info.bSupportsDualSourceBlend = true;
|
||||
g_Config.backend_info.bSupportsPrimitiveRestart = true;
|
||||
|
|
|
@ -131,7 +131,9 @@ void VideoSoftware::InitBackendInfo()
|
|||
g_Config.backend_info.bSupportsOversizedViewports = true;
|
||||
g_Config.backend_info.bSupportsPrimitiveRestart = false;
|
||||
g_Config.backend_info.bSupportsMultithreading = false;
|
||||
g_Config.backend_info.bSupportsComputeShaders = false;
|
||||
g_Config.backend_info.bSupportsInternalResolutionFrameDumps = false;
|
||||
g_Config.backend_info.bSupportsGPUTextureDecoding = false;
|
||||
|
||||
// aamodes
|
||||
g_Config.backend_info.AAModes = {1};
|
||||
|
|
|
@ -91,7 +91,8 @@ bool CommandBufferManager::CreateCommandBuffers()
|
|||
VkDescriptorPoolSize pool_sizes[] = {{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 500000},
|
||||
{VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 500000},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 16},
|
||||
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024}};
|
||||
{VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1024},
|
||||
{VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1024}};
|
||||
|
||||
VkDescriptorPoolCreateInfo pool_create_info = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
nullptr,
|
||||
|
|
|
@ -30,6 +30,7 @@ enum DESCRIPTOR_SET_LAYOUT
|
|||
DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS,
|
||||
DESCRIPTOR_SET_LAYOUT_SHADER_STORAGE_BUFFERS,
|
||||
DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS,
|
||||
DESCRIPTOR_SET_LAYOUT_COMPUTE,
|
||||
NUM_DESCRIPTOR_SET_LAYOUTS
|
||||
};
|
||||
|
||||
|
@ -52,6 +53,12 @@ enum DESCRIPTOR_SET_BIND_POINT
|
|||
// - Same as standard, plus 128 bytes of push constants, accessible from all stages.
|
||||
// - Texture Decoding
|
||||
// - Same as push constant, plus a single texel buffer accessible from PS.
|
||||
// - Compute
|
||||
// - 1 uniform buffer [set=0, binding=0]
|
||||
// - 4 combined image samplers [set=0, binding=1-4]
|
||||
// - 1 texel buffer [set=0, binding=5]
|
||||
// - 1 storage image [set=0, binding=6]
|
||||
// - 128 bytes of push constants
|
||||
//
|
||||
// All four pipeline layout share the first two descriptor sets (uniform buffers, PS samplers).
|
||||
// The third descriptor set (see bind points above) is used for storage or texel buffers.
|
||||
|
@ -62,6 +69,7 @@ enum PIPELINE_LAYOUT
|
|||
PIPELINE_LAYOUT_BBOX,
|
||||
PIPELINE_LAYOUT_PUSH_CONSTANT,
|
||||
PIPELINE_LAYOUT_TEXTURE_CONVERSION,
|
||||
PIPELINE_LAYOUT_COMPUTE,
|
||||
NUM_PIPELINE_LAYOUTS
|
||||
};
|
||||
|
||||
|
|
|
@ -324,6 +324,41 @@ std::pair<VkPipeline, bool> ObjectCache::GetPipelineWithCacheResult(const Pipeli
|
|||
return {pipeline, false};
|
||||
}
|
||||
|
||||
VkPipeline ObjectCache::CreateComputePipeline(const ComputePipelineInfo& info)
|
||||
{
|
||||
VkComputePipelineCreateInfo pipeline_info = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
nullptr,
|
||||
0,
|
||||
{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
nullptr, 0, VK_SHADER_STAGE_COMPUTE_BIT, info.cs,
|
||||
"main", nullptr},
|
||||
info.pipeline_layout,
|
||||
VK_NULL_HANDLE,
|
||||
-1};
|
||||
|
||||
VkPipeline pipeline;
|
||||
VkResult res = vkCreateComputePipelines(g_vulkan_context->GetDevice(), VK_NULL_HANDLE, 1,
|
||||
&pipeline_info, nullptr, &pipeline);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "vkCreateComputePipelines failed: ");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
VkPipeline ObjectCache::GetComputePipeline(const ComputePipelineInfo& info)
|
||||
{
|
||||
auto iter = m_compute_pipeline_objects.find(info);
|
||||
if (iter != m_compute_pipeline_objects.end())
|
||||
return iter->second;
|
||||
|
||||
VkPipeline pipeline = CreateComputePipeline(info);
|
||||
m_compute_pipeline_objects.emplace(info, pipeline);
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
std::string ObjectCache::GetDiskCacheFileName(const char* type)
|
||||
{
|
||||
return StringFromFormat("%svulkan-%s-%s.cache", File::GetUserPath(D_SHADERCACHE_IDX).c_str(),
|
||||
|
@ -477,6 +512,13 @@ void ObjectCache::DestroyPipelineCache()
|
|||
}
|
||||
m_pipeline_objects.clear();
|
||||
|
||||
for (const auto& it : m_compute_pipeline_objects)
|
||||
{
|
||||
if (it.second != VK_NULL_HANDLE)
|
||||
vkDestroyPipeline(g_vulkan_context->GetDevice(), it.second, nullptr);
|
||||
}
|
||||
m_compute_pipeline_objects.clear();
|
||||
|
||||
vkDestroyPipelineCache(g_vulkan_context->GetDevice(), m_pipeline_cache, nullptr);
|
||||
m_pipeline_cache = VK_NULL_HANDLE;
|
||||
}
|
||||
|
@ -725,6 +767,17 @@ bool ObjectCache::CreateDescriptorSetLayouts()
|
|||
{0, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_FRAGMENT_BIT},
|
||||
};
|
||||
|
||||
static const VkDescriptorSetLayoutBinding compute_set_bindings[] = {
|
||||
{0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
{1, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
{2, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
{3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
{4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
{5, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
{6, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
{7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
};
|
||||
|
||||
static const VkDescriptorSetLayoutCreateInfo create_infos[NUM_DESCRIPTOR_SET_LAYOUTS] = {
|
||||
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
|
||||
static_cast<u32>(ArraySize(ubo_set_bindings)), ubo_set_bindings},
|
||||
|
@ -733,7 +786,9 @@ bool ObjectCache::CreateDescriptorSetLayouts()
|
|||
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
|
||||
static_cast<u32>(ArraySize(ssbo_set_bindings)), ssbo_set_bindings},
|
||||
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
|
||||
static_cast<u32>(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings}};
|
||||
static_cast<u32>(ArraySize(texel_buffer_set_bindings)), texel_buffer_set_bindings},
|
||||
{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, 0,
|
||||
static_cast<u32>(ArraySize(compute_set_bindings)), compute_set_bindings}};
|
||||
|
||||
for (size_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; i++)
|
||||
{
|
||||
|
@ -774,8 +829,11 @@ bool ObjectCache::CreatePipelineLayouts()
|
|||
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_UNIFORM_BUFFERS],
|
||||
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_PIXEL_SHADER_SAMPLERS],
|
||||
m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_TEXEL_BUFFERS]};
|
||||
VkDescriptorSetLayout compute_sets[] = {m_descriptor_set_layouts[DESCRIPTOR_SET_LAYOUT_COMPUTE]};
|
||||
VkPushConstantRange push_constant_range = {
|
||||
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0, PUSH_CONSTANT_BUFFER_SIZE};
|
||||
VkPushConstantRange compute_push_constant_range = {VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
||||
PUSH_CONSTANT_BUFFER_SIZE};
|
||||
|
||||
// Info for each pipeline layout
|
||||
VkPipelineLayoutCreateInfo pipeline_layout_info[NUM_PIPELINE_LAYOUTS] = {
|
||||
|
@ -794,7 +852,11 @@ bool ObjectCache::CreatePipelineLayouts()
|
|||
// Texture Conversion
|
||||
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
|
||||
static_cast<u32>(ArraySize(texture_conversion_sets)), texture_conversion_sets, 1,
|
||||
&push_constant_range}};
|
||||
&push_constant_range},
|
||||
|
||||
// Compute
|
||||
{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, nullptr, 0,
|
||||
static_cast<u32>(ArraySize(compute_sets)), compute_sets, 1, &compute_push_constant_range}};
|
||||
|
||||
for (size_t i = 0; i < NUM_PIPELINE_LAYOUTS; i++)
|
||||
{
|
||||
|
@ -1007,6 +1069,31 @@ bool operator<(const SamplerState& lhs, const SamplerState& rhs)
|
|||
return lhs.bits < rhs.bits;
|
||||
}
|
||||
|
||||
std::size_t ComputePipelineInfoHash::operator()(const ComputePipelineInfo& key) const
|
||||
{
|
||||
return static_cast<std::size_t>(XXH64(&key, sizeof(key), 0));
|
||||
}
|
||||
|
||||
bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
|
||||
{
|
||||
return std::memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
|
||||
}
|
||||
|
||||
bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
|
||||
{
|
||||
return !operator==(lhs, rhs);
|
||||
}
|
||||
|
||||
bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
|
||||
{
|
||||
return std::memcmp(&lhs, &rhs, sizeof(lhs)) < 0;
|
||||
}
|
||||
|
||||
bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs)
|
||||
{
|
||||
return std::memcmp(&lhs, &rhs, sizeof(lhs)) > 0;
|
||||
}
|
||||
|
||||
bool ObjectCache::CompileSharedShaders()
|
||||
{
|
||||
static const char PASSTHROUGH_VERTEX_SHADER_SOURCE[] = R"(
|
||||
|
|
|
@ -56,6 +56,22 @@ bool operator!=(const SamplerState& lhs, const SamplerState& rhs);
|
|||
bool operator>(const SamplerState& lhs, const SamplerState& rhs);
|
||||
bool operator<(const SamplerState& lhs, const SamplerState& rhs);
|
||||
|
||||
struct ComputePipelineInfo
|
||||
{
|
||||
VkPipelineLayout pipeline_layout;
|
||||
VkShaderModule cs;
|
||||
};
|
||||
|
||||
struct ComputePipelineInfoHash
|
||||
{
|
||||
std::size_t operator()(const ComputePipelineInfo& key) const;
|
||||
};
|
||||
|
||||
bool operator==(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
|
||||
bool operator!=(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
|
||||
bool operator<(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
|
||||
bool operator>(const ComputePipelineInfo& lhs, const ComputePipelineInfo& rhs);
|
||||
|
||||
class ObjectCache
|
||||
{
|
||||
public:
|
||||
|
@ -114,6 +130,12 @@ public:
|
|||
// otherwise for a cache hit it will be true.
|
||||
std::pair<VkPipeline, bool> GetPipelineWithCacheResult(const PipelineInfo& info);
|
||||
|
||||
// Creates a compute pipeline, and does not track the handle.
|
||||
VkPipeline CreateComputePipeline(const ComputePipelineInfo& info);
|
||||
|
||||
// Find a pipeline by the specified description, if not found, attempts to create it
|
||||
VkPipeline GetComputePipeline(const ComputePipelineInfo& info);
|
||||
|
||||
// Saves the pipeline cache to disk. Call when shutting down.
|
||||
void SavePipelineCache();
|
||||
|
||||
|
@ -166,6 +188,8 @@ private:
|
|||
ShaderCache<PixelShaderUid> m_ps_cache;
|
||||
|
||||
std::unordered_map<PipelineInfo, VkPipeline, PipelineInfoHash> m_pipeline_objects;
|
||||
std::unordered_map<ComputePipelineInfo, VkPipeline, ComputePipelineInfoHash>
|
||||
m_compute_pipeline_objects;
|
||||
VkPipelineCache m_pipeline_cache = VK_NULL_HANDLE;
|
||||
std::string m_pipeline_cache_filename;
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ static const TBuiltInResource* GetCompilerResourceLimits();
|
|||
// Compile a shader to SPIR-V via glslang
|
||||
static bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage,
|
||||
const char* stage_filename, const char* source_code,
|
||||
size_t source_code_length, bool prepend_header);
|
||||
size_t source_code_length, const char* header, size_t header_length);
|
||||
|
||||
// Regarding the UBO bind points, we subtract one from the binding index because
|
||||
// the OpenGL backend requires UBO #0 for non-block uniforms (at least on NV).
|
||||
|
@ -73,9 +73,32 @@ static const char SHADER_HEADER[] = R"(
|
|||
#define gl_VertexID gl_VertexIndex
|
||||
#define gl_InstanceID gl_InstanceIndex
|
||||
)";
|
||||
static const char COMPUTE_SHADER_HEADER[] = R"(
|
||||
// Target GLSL 4.5.
|
||||
#version 450 core
|
||||
// All resources are packed into one descriptor set for compute.
|
||||
#define UBO_BINDING(packing, x) layout(packing, set = 0, binding = (0 + x))
|
||||
#define SAMPLER_BINDING(x) layout(set = 0, binding = (1 + x))
|
||||
#define TEXEL_BUFFER_BINDING(x) layout(set = 0, binding = (5 + x))
|
||||
#define IMAGE_BINDING(format, x) layout(format, set = 0, binding = (7 + x))
|
||||
|
||||
// hlsl to glsl function translation
|
||||
#define float2 vec2
|
||||
#define float3 vec3
|
||||
#define float4 vec4
|
||||
#define uint2 uvec2
|
||||
#define uint3 uvec3
|
||||
#define uint4 uvec4
|
||||
#define int2 ivec2
|
||||
#define int3 ivec3
|
||||
#define int4 ivec4
|
||||
#define frac fract
|
||||
#define lerp mix
|
||||
)";
|
||||
|
||||
bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char* stage_filename,
|
||||
const char* source_code, size_t source_code_length, bool prepend_header)
|
||||
const char* source_code, size_t source_code_length, const char* header,
|
||||
size_t header_length)
|
||||
{
|
||||
if (!InitializeGlslang())
|
||||
return false;
|
||||
|
@ -91,10 +114,10 @@ bool CompileShaderToSPV(SPIRVCodeVector* out_code, EShLanguage stage, const char
|
|||
std::string full_source_code;
|
||||
const char* pass_source_code = source_code;
|
||||
int pass_source_code_length = static_cast<int>(source_code_length);
|
||||
if (prepend_header)
|
||||
if (header_length > 0)
|
||||
{
|
||||
full_source_code.reserve(sizeof(SHADER_HEADER) + source_code_length);
|
||||
full_source_code.append(SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
|
||||
full_source_code.reserve(header_length + source_code_length);
|
||||
full_source_code.append(header, header_length);
|
||||
full_source_code.append(source_code, source_code_length);
|
||||
pass_source_code = full_source_code.c_str();
|
||||
pass_source_code_length = static_cast<int>(full_source_code.length());
|
||||
|
@ -318,21 +341,28 @@ bool CompileVertexShader(SPIRVCodeVector* out_code, const char* source_code,
|
|||
size_t source_code_length, bool prepend_header)
|
||||
{
|
||||
return CompileShaderToSPV(out_code, EShLangVertex, "vs", source_code, source_code_length,
|
||||
prepend_header);
|
||||
SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
|
||||
}
|
||||
|
||||
bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code,
|
||||
size_t source_code_length, bool prepend_header)
|
||||
{
|
||||
return CompileShaderToSPV(out_code, EShLangGeometry, "gs", source_code, source_code_length,
|
||||
prepend_header);
|
||||
SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
|
||||
}
|
||||
|
||||
bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code,
|
||||
size_t source_code_length, bool prepend_header)
|
||||
{
|
||||
return CompileShaderToSPV(out_code, EShLangFragment, "ps", source_code, source_code_length,
|
||||
prepend_header);
|
||||
SHADER_HEADER, sizeof(SHADER_HEADER) - 1);
|
||||
}
|
||||
|
||||
bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code,
|
||||
size_t source_code_length, bool prepend_header)
|
||||
{
|
||||
return CompileShaderToSPV(out_code, EShLangCompute, "cs", source_code, source_code_length,
|
||||
COMPUTE_SHADER_HEADER, sizeof(COMPUTE_SHADER_HEADER) - 1);
|
||||
}
|
||||
|
||||
} // namespace ShaderCompiler
|
||||
|
|
|
@ -29,5 +29,9 @@ bool CompileGeometryShader(SPIRVCodeVector* out_code, const char* source_code,
|
|||
bool CompileFragmentShader(SPIRVCodeVector* out_code, const char* source_code,
|
||||
size_t source_code_length, bool prepend_header = true);
|
||||
|
||||
// Compile a compute shader to SPIR-V.
|
||||
bool CompileComputeShader(SPIRVCodeVector* out_code, const char* source_code,
|
||||
size_t source_code_length, bool prepend_header = true);
|
||||
|
||||
} // namespace ShaderCompiler
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "VideoBackends/Vulkan/CommandBufferManager.h"
|
||||
#include "VideoBackends/Vulkan/Texture2D.h"
|
||||
#include "VideoBackends/Vulkan/VulkanContext.h"
|
||||
|
@ -273,10 +274,132 @@ void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout
|
|||
break;
|
||||
}
|
||||
|
||||
// If we were using a compute layout, the stages need to reflect that
|
||||
switch (m_compute_layout)
|
||||
{
|
||||
case ComputeImageLayout::Undefined:
|
||||
break;
|
||||
case ComputeImageLayout::ReadOnly:
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
break;
|
||||
case ComputeImageLayout::WriteOnly:
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
break;
|
||||
case ComputeImageLayout::ReadWrite:
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
break;
|
||||
}
|
||||
m_compute_layout = ComputeImageLayout::Undefined;
|
||||
|
||||
vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1,
|
||||
&barrier);
|
||||
|
||||
m_layout = new_layout;
|
||||
}
|
||||
|
||||
void Texture2D::TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout)
|
||||
{
|
||||
_assert_(new_layout != ComputeImageLayout::Undefined);
|
||||
if (m_compute_layout == new_layout)
|
||||
return;
|
||||
|
||||
VkImageMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
|
||||
nullptr, // const void* pNext
|
||||
0, // VkAccessFlags srcAccessMask
|
||||
0, // VkAccessFlags dstAccessMask
|
||||
m_layout, // VkImageLayout oldLayout
|
||||
VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
|
||||
VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
|
||||
VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
|
||||
m_image, // VkImage image
|
||||
{static_cast<VkImageAspectFlags>(Util::IsDepthFormat(m_format) ? VK_IMAGE_ASPECT_DEPTH_BIT :
|
||||
VK_IMAGE_ASPECT_COLOR_BIT),
|
||||
0, m_levels, 0, m_layers} // VkImageSubresourceRange subresourceRange
|
||||
};
|
||||
|
||||
VkPipelineStageFlags srcStageMask, dstStageMask;
|
||||
switch (m_layout)
|
||||
{
|
||||
case VK_IMAGE_LAYOUT_UNDEFINED:
|
||||
// Layout undefined therefore contents undefined, and we don't care what happens to it.
|
||||
barrier.srcAccessMask = 0;
|
||||
srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
|
||||
break;
|
||||
|
||||
case VK_IMAGE_LAYOUT_PREINITIALIZED:
|
||||
// Image has been pre-initialized by the host, so ensure all writes have completed.
|
||||
barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_HOST_BIT;
|
||||
break;
|
||||
|
||||
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
|
||||
// Image was being used as a color attachment, so ensure all writes have completed.
|
||||
barrier.srcAccessMask =
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
break;
|
||||
|
||||
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
|
||||
// Image was being used as a depthstencil attachment, so ensure all writes have completed.
|
||||
barrier.srcAccessMask =
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
break;
|
||||
|
||||
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
|
||||
// Image was being used as a shader resource, make sure all reads have finished.
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
break;
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
|
||||
// Image was being used as a copy source, ensure all reads have finished.
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
break;
|
||||
|
||||
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
|
||||
// Image was being used as a copy destination, ensure all writes have finished.
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
break;
|
||||
|
||||
default:
|
||||
srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (new_layout)
|
||||
{
|
||||
case ComputeImageLayout::ReadOnly:
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
break;
|
||||
case ComputeImageLayout::WriteOnly:
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
break;
|
||||
case ComputeImageLayout::ReadWrite:
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
dstStageMask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
break;
|
||||
default:
|
||||
dstStageMask = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
m_layout = barrier.newLayout;
|
||||
m_compute_layout = new_layout;
|
||||
|
||||
vkCmdPipelineBarrier(command_buffer, srcStageMask, dstStageMask, 0, 0, nullptr, 0, nullptr, 1,
|
||||
&barrier);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -17,6 +17,15 @@ class ObjectCache;
|
|||
class Texture2D
|
||||
{
|
||||
public:
|
||||
// Custom image layouts, mainly used for switching to/from compute
|
||||
enum class ComputeImageLayout
|
||||
{
|
||||
Undefined,
|
||||
ReadOnly,
|
||||
WriteOnly,
|
||||
ReadWrite
|
||||
};
|
||||
|
||||
Texture2D(u32 width, u32 height, u32 levels, u32 layers, VkFormat format,
|
||||
VkSampleCountFlagBits samples, VkImageViewType view_type, VkImage image,
|
||||
VkDeviceMemory device_memory, VkImageView view);
|
||||
|
@ -50,6 +59,7 @@ public:
|
|||
void OverrideImageLayout(VkImageLayout new_layout);
|
||||
|
||||
void TransitionToLayout(VkCommandBuffer command_buffer, VkImageLayout new_layout);
|
||||
void TransitionToLayout(VkCommandBuffer command_buffer, ComputeImageLayout new_layout);
|
||||
|
||||
private:
|
||||
u32 m_width;
|
||||
|
@ -60,6 +70,7 @@ private:
|
|||
VkSampleCountFlagBits m_samples;
|
||||
VkImageViewType m_view_type;
|
||||
VkImageLayout m_layout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
ComputeImageLayout m_compute_layout = ComputeImageLayout::Undefined;
|
||||
|
||||
VkImage m_image;
|
||||
VkDeviceMemory m_device_memory;
|
||||
|
|
|
@ -138,6 +138,21 @@ void TextureCache::CopyRectangleFromTexture(TCacheEntry* dst_texture,
|
|||
ScaleTextureRectangle(dst_texture, dst_rect, src_texture, src_rect);
|
||||
}
|
||||
|
||||
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
|
||||
{
|
||||
return m_texture_converter->SupportsTextureDecoding(format, palette_format);
|
||||
}
|
||||
|
||||
void TextureCache::DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
|
||||
size_t data_size, TextureFormat format, u32 width, u32 height,
|
||||
u32 aligned_width, u32 aligned_height, u32 row_stride,
|
||||
const u8* palette, TlutFormat palette_format)
|
||||
{
|
||||
m_texture_converter->DecodeTexture(static_cast<TCacheEntry*>(entry), dst_level, data, data_size,
|
||||
format, width, height, aligned_width, aligned_height,
|
||||
row_stride, palette, palette_format);
|
||||
}
|
||||
|
||||
void TextureCache::CopyTextureRectangle(TCacheEntry* dst_texture,
|
||||
const MathUtil::Rectangle<int>& dst_rect,
|
||||
Texture2D* src_texture,
|
||||
|
|
|
@ -66,6 +66,13 @@ public:
|
|||
void CopyRectangleFromTexture(TCacheEntry* dst_texture, const MathUtil::Rectangle<int>& dst_rect,
|
||||
Texture2D* src_texture, const MathUtil::Rectangle<int>& src_rect);
|
||||
|
||||
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override;
|
||||
|
||||
void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data, size_t data_size,
|
||||
TextureFormat format, u32 width, u32 height, u32 aligned_width,
|
||||
u32 aligned_height, u32 row_stride, const u8* palette,
|
||||
TlutFormat palette_format) override;
|
||||
|
||||
private:
|
||||
bool CreateRenderPasses();
|
||||
|
||||
|
|
|
@ -42,8 +42,12 @@ TextureConverter::~TextureConverter()
|
|||
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it, nullptr);
|
||||
}
|
||||
|
||||
if (m_texel_buffer_view_r8_uint != VK_NULL_HANDLE)
|
||||
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r8_uint, nullptr);
|
||||
if (m_texel_buffer_view_r16_uint != VK_NULL_HANDLE)
|
||||
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r16_uint, nullptr);
|
||||
if (m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE)
|
||||
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_r32g32_uint, nullptr);
|
||||
if (m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE)
|
||||
vkDestroyBufferView(g_vulkan_context->GetDevice(), m_texel_buffer_view_rgba8_unorm, nullptr);
|
||||
|
||||
|
@ -59,6 +63,12 @@ TextureConverter::~TextureConverter()
|
|||
vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader, nullptr);
|
||||
}
|
||||
|
||||
for (const auto& it : m_decoding_pipelines)
|
||||
{
|
||||
if (it.second.compute_shader != VK_NULL_HANDLE)
|
||||
vkDestroyShaderModule(g_vulkan_context->GetDevice(), it.second.compute_shader, nullptr);
|
||||
}
|
||||
|
||||
if (m_rgb_to_yuyv_shader != VK_NULL_HANDLE)
|
||||
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_rgb_to_yuyv_shader, nullptr);
|
||||
if (m_yuyv_to_rgb_shader != VK_NULL_HANDLE)
|
||||
|
@ -103,6 +113,12 @@ bool TextureConverter::Initialize()
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!CreateDecodingTexture())
|
||||
{
|
||||
PanicAlert("Failed to create decoding texture");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!CompileYUYVConversionShaders())
|
||||
{
|
||||
PanicAlert("Failed to compile YUYV conversion shaders");
|
||||
|
@ -371,6 +387,152 @@ void TextureConverter::DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* ds
|
|||
draw.EndRenderPass();
|
||||
}
|
||||
|
||||
bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format)
|
||||
{
|
||||
auto key = std::make_pair(format, palette_format);
|
||||
auto iter = m_decoding_pipelines.find(key);
|
||||
if (iter != m_decoding_pipelines.end())
|
||||
return iter->second.valid;
|
||||
|
||||
TextureDecodingPipeline pipeline;
|
||||
pipeline.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
|
||||
pipeline.compute_shader = VK_NULL_HANDLE;
|
||||
pipeline.valid = false;
|
||||
|
||||
if (!pipeline.base_info)
|
||||
{
|
||||
m_decoding_pipelines.emplace(key, pipeline);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string shader_source =
|
||||
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::Vulkan);
|
||||
|
||||
pipeline.compute_shader = Util::CompileAndCreateComputeShader(shader_source, true);
|
||||
if (pipeline.compute_shader == VK_NULL_HANDLE)
|
||||
{
|
||||
m_decoding_pipelines.emplace(key, pipeline);
|
||||
return false;
|
||||
}
|
||||
|
||||
pipeline.valid = true;
|
||||
m_decoding_pipelines.emplace(key, pipeline);
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureConverter::DecodeTexture(TextureCache::TCacheEntry* entry, u32 dst_level,
|
||||
const u8* data, size_t data_size, TextureFormat format,
|
||||
u32 width, u32 height, u32 aligned_width, u32 aligned_height,
|
||||
u32 row_stride, const u8* palette, TlutFormat palette_format)
|
||||
{
|
||||
auto key = std::make_pair(format, palette_format);
|
||||
auto iter = m_decoding_pipelines.find(key);
|
||||
if (iter == m_decoding_pipelines.end())
|
||||
return;
|
||||
|
||||
struct PushConstants
|
||||
{
|
||||
u32 dst_size[2];
|
||||
u32 src_size[2];
|
||||
u32 src_offset;
|
||||
u32 src_row_stride;
|
||||
u32 palette_offset;
|
||||
};
|
||||
|
||||
// Copy to GPU-visible buffer, aligned to the data type
|
||||
auto info = iter->second;
|
||||
u32 bytes_per_buffer_elem =
|
||||
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
|
||||
|
||||
// Calculate total data size, including palette.
|
||||
// Only copy palette if it is required.
|
||||
u32 total_upload_size = static_cast<u32>(data_size);
|
||||
u32 palette_size = iter->second.base_info->palette_size;
|
||||
u32 palette_offset = total_upload_size;
|
||||
bool has_palette = palette_size > 0;
|
||||
if (has_palette)
|
||||
{
|
||||
// Align to u16.
|
||||
if ((total_upload_size % sizeof(u16)) != 0)
|
||||
{
|
||||
total_upload_size++;
|
||||
palette_offset++;
|
||||
}
|
||||
|
||||
total_upload_size += palette_size;
|
||||
}
|
||||
|
||||
// Allocate space for upload, if it fails, execute the buffer.
|
||||
if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem))
|
||||
{
|
||||
Util::ExecuteCurrentCommandsAndRestoreState(true, false);
|
||||
if (!m_texel_buffer->ReserveMemory(total_upload_size, bytes_per_buffer_elem))
|
||||
PanicAlert("Failed to reserve memory for encoded texture upload");
|
||||
}
|
||||
|
||||
// Copy/commit upload buffer.
|
||||
u32 texel_buffer_offset = static_cast<u32>(m_texel_buffer->GetCurrentOffset());
|
||||
std::memcpy(m_texel_buffer->GetCurrentHostPointer(), data, data_size);
|
||||
if (has_palette)
|
||||
std::memcpy(m_texel_buffer->GetCurrentHostPointer() + palette_offset, palette, palette_size);
|
||||
m_texel_buffer->CommitMemory(total_upload_size);
|
||||
|
||||
// Determine uniforms.
|
||||
PushConstants constants = {
|
||||
{width, height},
|
||||
{aligned_width, aligned_height},
|
||||
texel_buffer_offset / bytes_per_buffer_elem,
|
||||
row_stride / bytes_per_buffer_elem,
|
||||
static_cast<u32>((texel_buffer_offset + palette_offset) / sizeof(u16))};
|
||||
|
||||
// Determine view to use for texel buffers.
|
||||
VkBufferView data_view = VK_NULL_HANDLE;
|
||||
switch (iter->second.base_info->buffer_format)
|
||||
{
|
||||
case TextureConversionShader::BUFFER_FORMAT_R8_UINT:
|
||||
data_view = m_texel_buffer_view_r8_uint;
|
||||
break;
|
||||
case TextureConversionShader::BUFFER_FORMAT_R16_UINT:
|
||||
data_view = m_texel_buffer_view_r16_uint;
|
||||
break;
|
||||
case TextureConversionShader::BUFFER_FORMAT_R32G32_UINT:
|
||||
data_view = m_texel_buffer_view_r32g32_uint;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// Place compute shader dispatches together in the init command buffer.
|
||||
// That way we don't have to pay a penalty for switching from graphics->compute,
|
||||
// or end/restart our render pass.
|
||||
VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentInitCommandBuffer();
|
||||
|
||||
// Dispatch compute to temporary texture.
|
||||
ComputeShaderDispatcher dispatcher(command_buffer,
|
||||
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_COMPUTE),
|
||||
iter->second.compute_shader);
|
||||
m_decoding_texture->TransitionToLayout(command_buffer, Texture2D::ComputeImageLayout::WriteOnly);
|
||||
dispatcher.SetPushConstants(&constants, sizeof(constants));
|
||||
dispatcher.SetStorageImage(m_decoding_texture->GetView(), m_decoding_texture->GetLayout());
|
||||
dispatcher.SetTexelBuffer(0, data_view);
|
||||
if (has_palette)
|
||||
dispatcher.SetTexelBuffer(1, m_texel_buffer_view_r16_uint);
|
||||
auto groups = TextureConversionShader::GetDispatchCount(iter->second.base_info, width, height);
|
||||
dispatcher.Dispatch(groups.first, groups.second, 1);
|
||||
|
||||
// Copy from temporary texture to final destination.
|
||||
m_decoding_texture->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
entry->GetTexture()->TransitionToLayout(command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
VkImageCopy image_copy = {{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1},
|
||||
{0, 0, 0},
|
||||
{VK_IMAGE_ASPECT_COLOR_BIT, dst_level, 0, 1},
|
||||
{0, 0, 0},
|
||||
{width, height, 1}};
|
||||
vkCmdCopyImage(command_buffer, m_decoding_texture->GetImage(),
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, entry->GetTexture()->GetImage(),
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &image_copy);
|
||||
}
|
||||
|
||||
bool TextureConverter::CreateTexelBuffer()
|
||||
{
|
||||
// Prefer an 8MB buffer if possible, but use less if the device doesn't support this.
|
||||
|
@ -386,9 +548,13 @@ bool TextureConverter::CreateTexelBuffer()
|
|||
return false;
|
||||
|
||||
// Create views of the formats that we will be using.
|
||||
m_texel_buffer_view_r8_uint = CreateTexelBufferView(VK_FORMAT_R8_UINT);
|
||||
m_texel_buffer_view_r16_uint = CreateTexelBufferView(VK_FORMAT_R16_UINT);
|
||||
m_texel_buffer_view_r32g32_uint = CreateTexelBufferView(VK_FORMAT_R32G32_UINT);
|
||||
m_texel_buffer_view_rgba8_unorm = CreateTexelBufferView(VK_FORMAT_R8G8B8A8_UNORM);
|
||||
return m_texel_buffer_view_r16_uint != VK_NULL_HANDLE &&
|
||||
return m_texel_buffer_view_r8_uint != VK_NULL_HANDLE &&
|
||||
m_texel_buffer_view_r16_uint != VK_NULL_HANDLE &&
|
||||
m_texel_buffer_view_r32g32_uint != VK_NULL_HANDLE &&
|
||||
m_texel_buffer_view_rgba8_unorm != VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
|
@ -611,6 +777,15 @@ bool TextureConverter::CreateEncodingDownloadTexture()
|
|||
return m_encoding_download_texture && m_encoding_download_texture->Map();
|
||||
}
|
||||
|
||||
bool TextureConverter::CreateDecodingTexture()
|
||||
{
|
||||
m_decoding_texture = Texture2D::Create(
|
||||
DECODING_TEXTURE_WIDTH, DECODING_TEXTURE_HEIGHT, 1, 1, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_VIEW_TYPE_2D_ARRAY, VK_IMAGE_TILING_OPTIMAL,
|
||||
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
|
||||
return static_cast<bool>(m_decoding_texture);
|
||||
}
|
||||
|
||||
bool TextureConverter::CompileYUYVConversionShaders()
|
||||
{
|
||||
static const char RGB_TO_YUYV_SHADER_SOURCE[] = R"(
|
||||
|
|
|
@ -5,11 +5,14 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoBackends/Vulkan/StreamBuffer.h"
|
||||
#include "VideoBackends/Vulkan/TextureCache.h"
|
||||
#include "VideoCommon/TextureConversionShader.h"
|
||||
#include "VideoCommon/TextureDecoder.h"
|
||||
#include "VideoCommon/VideoCommon.h"
|
||||
|
||||
|
@ -45,6 +48,12 @@ public:
|
|||
void DecodeYUYVTextureFromMemory(TextureCache::TCacheEntry* dst_texture, const void* src_ptr,
|
||||
u32 src_width, u32 src_stride, u32 src_height);
|
||||
|
||||
bool SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format);
|
||||
void DecodeTexture(TextureCache::TCacheEntry* entry, u32 dst_level, const u8* data,
|
||||
size_t data_size, TextureFormat format, u32 width, u32 height,
|
||||
u32 aligned_width, u32 aligned_height, u32 row_stride, const u8* palette,
|
||||
TlutFormat palette_format);
|
||||
|
||||
private:
|
||||
static const u32 NUM_TEXTURE_ENCODING_SHADERS = 64;
|
||||
static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4;
|
||||
|
@ -52,6 +61,10 @@ private:
|
|||
static const VkFormat ENCODING_TEXTURE_FORMAT = VK_FORMAT_B8G8R8A8_UNORM;
|
||||
static const size_t NUM_PALETTE_CONVERSION_SHADERS = 3;
|
||||
|
||||
// Maximum size of a texture based on BP registers.
|
||||
static const u32 DECODING_TEXTURE_WIDTH = 1024;
|
||||
static const u32 DECODING_TEXTURE_HEIGHT = 1024;
|
||||
|
||||
bool CreateTexelBuffer();
|
||||
VkBufferView CreateTexelBufferView(VkFormat format) const;
|
||||
|
||||
|
@ -62,6 +75,8 @@ private:
|
|||
bool CreateEncodingTexture();
|
||||
bool CreateEncodingDownloadTexture();
|
||||
|
||||
bool CreateDecodingTexture();
|
||||
|
||||
bool CompileYUYVConversionShaders();
|
||||
|
||||
// Allocates storage in the texel command buffer of the specified size.
|
||||
|
@ -77,7 +92,9 @@ private:
|
|||
|
||||
// Shared between conversion types
|
||||
std::unique_ptr<StreamBuffer> m_texel_buffer;
|
||||
VkBufferView m_texel_buffer_view_r8_uint = VK_NULL_HANDLE;
|
||||
VkBufferView m_texel_buffer_view_r16_uint = VK_NULL_HANDLE;
|
||||
VkBufferView m_texel_buffer_view_r32g32_uint = VK_NULL_HANDLE;
|
||||
VkBufferView m_texel_buffer_view_rgba8_unorm = VK_NULL_HANDLE;
|
||||
size_t m_texel_buffer_size = 0;
|
||||
|
||||
|
@ -91,6 +108,16 @@ private:
|
|||
VkFramebuffer m_encoding_render_framebuffer = VK_NULL_HANDLE;
|
||||
std::unique_ptr<StagingTexture2D> m_encoding_download_texture;
|
||||
|
||||
// Texture decoding - GX format in memory->RGBA8
|
||||
struct TextureDecodingPipeline
|
||||
{
|
||||
const TextureConversionShader::DecodingShaderInfo* base_info;
|
||||
VkShaderModule compute_shader;
|
||||
bool valid;
|
||||
};
|
||||
std::map<std::pair<TextureFormat, TlutFormat>, TextureDecodingPipeline> m_decoding_pipelines;
|
||||
std::unique_ptr<Texture2D> m_decoding_texture;
|
||||
|
||||
// XFB encoding/decoding shaders
|
||||
VkShaderModule m_rgb_to_yuyv_shader = VK_NULL_HANDLE;
|
||||
VkShaderModule m_yuyv_to_rgb_shader = VK_NULL_HANDLE;
|
||||
|
|
|
@ -250,6 +250,18 @@ VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code, bo
|
|||
return CreateShaderModule(code.data(), code.size());
|
||||
}
|
||||
|
||||
VkShaderModule CompileAndCreateComputeShader(const std::string& source_code, bool prepend_header)
|
||||
{
|
||||
ShaderCompiler::SPIRVCodeVector code;
|
||||
if (!ShaderCompiler::CompileComputeShader(&code, source_code.c_str(), source_code.length(),
|
||||
prepend_header))
|
||||
{
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
return CreateShaderModule(code.data(), code.size());
|
||||
}
|
||||
|
||||
} // namespace Util
|
||||
|
||||
UtilityShaderDraw::UtilityShaderDraw(VkCommandBuffer command_buffer,
|
||||
|
@ -670,4 +682,157 @@ bool UtilityShaderDraw::BindPipeline()
|
|||
return true;
|
||||
}
|
||||
|
||||
ComputeShaderDispatcher::ComputeShaderDispatcher(VkCommandBuffer command_buffer,
|
||||
VkPipelineLayout pipeline_layout,
|
||||
VkShaderModule compute_shader)
|
||||
: m_command_buffer(command_buffer)
|
||||
{
|
||||
// Populate minimal pipeline state
|
||||
m_pipeline_info.pipeline_layout = pipeline_layout;
|
||||
m_pipeline_info.cs = compute_shader;
|
||||
}
|
||||
|
||||
u8* ComputeShaderDispatcher::AllocateUniformBuffer(size_t size)
|
||||
{
|
||||
if (!g_object_cache->GetUtilityShaderUniformBuffer()->ReserveMemory(
|
||||
size, g_vulkan_context->GetUniformBufferAlignment(), true, true, true))
|
||||
PanicAlert("Failed to allocate util uniforms");
|
||||
|
||||
return g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentHostPointer();
|
||||
}
|
||||
|
||||
void ComputeShaderDispatcher::CommitUniformBuffer(size_t size)
|
||||
{
|
||||
m_uniform_buffer.buffer = g_object_cache->GetUtilityShaderUniformBuffer()->GetBuffer();
|
||||
m_uniform_buffer.offset = 0;
|
||||
m_uniform_buffer.range = size;
|
||||
m_uniform_buffer_offset =
|
||||
static_cast<u32>(g_object_cache->GetUtilityShaderUniformBuffer()->GetCurrentOffset());
|
||||
|
||||
g_object_cache->GetUtilityShaderUniformBuffer()->CommitMemory(size);
|
||||
}
|
||||
|
||||
void ComputeShaderDispatcher::SetPushConstants(const void* data, size_t data_size)
|
||||
{
|
||||
_assert_(static_cast<u32>(data_size) < PUSH_CONSTANT_BUFFER_SIZE);
|
||||
|
||||
vkCmdPushConstants(m_command_buffer, m_pipeline_info.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, static_cast<u32>(data_size), data);
|
||||
}
|
||||
|
||||
void ComputeShaderDispatcher::SetSampler(size_t index, VkImageView view, VkSampler sampler)
|
||||
{
|
||||
m_samplers[index].sampler = sampler;
|
||||
m_samplers[index].imageView = view;
|
||||
m_samplers[index].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
}
|
||||
|
||||
void ComputeShaderDispatcher::SetStorageImage(VkImageView view, VkImageLayout image_layout)
|
||||
{
|
||||
m_storage_image.sampler = VK_NULL_HANDLE;
|
||||
m_storage_image.imageView = view;
|
||||
m_storage_image.imageLayout = image_layout;
|
||||
}
|
||||
|
||||
void ComputeShaderDispatcher::SetTexelBuffer(size_t index, VkBufferView view)
|
||||
{
|
||||
m_texel_buffers[index] = view;
|
||||
}
|
||||
|
||||
void ComputeShaderDispatcher::Dispatch(u32 groups_x, u32 groups_y, u32 groups_z)
|
||||
{
|
||||
BindDescriptors();
|
||||
if (!BindPipeline())
|
||||
return;
|
||||
|
||||
vkCmdDispatch(m_command_buffer, groups_x, groups_y, groups_z);
|
||||
}
|
||||
|
||||
void ComputeShaderDispatcher::BindDescriptors()
|
||||
{
|
||||
VkDescriptorSet set = g_command_buffer_mgr->AllocateDescriptorSet(
|
||||
g_object_cache->GetDescriptorSetLayout(DESCRIPTOR_SET_LAYOUT_COMPUTE));
|
||||
if (set == VK_NULL_HANDLE)
|
||||
{
|
||||
PanicAlert("Failed to allocate descriptor set for compute dispatch");
|
||||
return;
|
||||
}
|
||||
|
||||
// Reserve enough descriptors to write every binding.
|
||||
std::array<VkWriteDescriptorSet, 7> set_writes = {};
|
||||
u32 num_set_writes = 0;
|
||||
|
||||
if (m_uniform_buffer.buffer != VK_NULL_HANDLE)
|
||||
{
|
||||
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
nullptr,
|
||||
set,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
|
||||
nullptr,
|
||||
&m_uniform_buffer,
|
||||
nullptr};
|
||||
}
|
||||
|
||||
// Samplers
|
||||
for (size_t i = 0; i < m_samplers.size(); i++)
|
||||
{
|
||||
const VkDescriptorImageInfo& info = m_samplers[i];
|
||||
if (info.imageView != VK_NULL_HANDLE && info.sampler != VK_NULL_HANDLE)
|
||||
{
|
||||
set_writes[num_set_writes++] = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
nullptr,
|
||||
set,
|
||||
static_cast<u32>(1 + i),
|
||||
0,
|
||||
1,
|
||||
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
&info,
|
||||
nullptr,
|
||||
nullptr};
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < m_texel_buffers.size(); i++)
|
||||
{
|
||||
if (m_texel_buffers[i] != VK_NULL_HANDLE)
|
||||
{
|
||||
set_writes[num_set_writes++] = {
|
||||
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 5 + static_cast<u32>(i), 0, 1,
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, nullptr, nullptr, &m_texel_buffers[i]};
|
||||
}
|
||||
}
|
||||
|
||||
if (m_storage_image.imageView != VK_NULL_HANDLE)
|
||||
{
|
||||
set_writes[num_set_writes++] = {
|
||||
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, nullptr, set, 7, 0, 1,
|
||||
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &m_storage_image, nullptr, nullptr};
|
||||
}
|
||||
|
||||
if (num_set_writes > 0)
|
||||
{
|
||||
vkUpdateDescriptorSets(g_vulkan_context->GetDevice(), num_set_writes, set_writes.data(), 0,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
vkCmdBindDescriptorSets(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
m_pipeline_info.pipeline_layout, 0, 1, &set, 1, &m_uniform_buffer_offset);
|
||||
}
|
||||
|
||||
bool ComputeShaderDispatcher::BindPipeline()
|
||||
{
|
||||
VkPipeline pipeline = g_object_cache->GetComputePipeline(m_pipeline_info);
|
||||
if (pipeline == VK_NULL_HANDLE)
|
||||
{
|
||||
PanicAlert("Failed to get pipeline for backend compute dispatch");
|
||||
return false;
|
||||
}
|
||||
|
||||
vkCmdBindPipeline(m_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -63,6 +63,10 @@ VkShaderModule CompileAndCreateGeometryShader(const std::string& source_code,
|
|||
// Compile a fragment shader and create a shader module, discarding the intermediate SPIR-V.
|
||||
VkShaderModule CompileAndCreateFragmentShader(const std::string& source_code,
|
||||
bool prepend_header = true);
|
||||
|
||||
// Compile a compute shader and create a shader module, discarding the intermediate SPIR-V.
|
||||
VkShaderModule CompileAndCreateComputeShader(const std::string& source_code,
|
||||
bool prepend_header = true);
|
||||
}
|
||||
|
||||
// Utility shader vertex format
|
||||
|
@ -188,4 +192,41 @@ private:
|
|||
PipelineInfo m_pipeline_info = {};
|
||||
};
|
||||
|
||||
class ComputeShaderDispatcher
|
||||
{
|
||||
public:
|
||||
ComputeShaderDispatcher(VkCommandBuffer command_buffer, VkPipelineLayout pipeline_layout,
|
||||
VkShaderModule compute_shader);
|
||||
|
||||
u8* AllocateUniformBuffer(size_t size);
|
||||
void CommitUniformBuffer(size_t size);
|
||||
|
||||
void SetPushConstants(const void* data, size_t data_size);
|
||||
|
||||
void SetSampler(size_t index, VkImageView view, VkSampler sampler);
|
||||
|
||||
void SetTexelBuffer(size_t index, VkBufferView view);
|
||||
|
||||
void SetStorageImage(VkImageView view, VkImageLayout image_layout);
|
||||
|
||||
void Dispatch(u32 groups_x, u32 groups_y, u32 groups_z);
|
||||
|
||||
private:
|
||||
void BindDescriptors();
|
||||
bool BindPipeline();
|
||||
|
||||
VkCommandBuffer m_command_buffer = VK_NULL_HANDLE;
|
||||
|
||||
VkDescriptorBufferInfo m_uniform_buffer = {};
|
||||
u32 m_uniform_buffer_offset = 0;
|
||||
|
||||
std::array<VkDescriptorImageInfo, 4> m_samplers = {};
|
||||
|
||||
std::array<VkBufferView, 2> m_texel_buffers = {};
|
||||
|
||||
VkDescriptorImageInfo m_storage_image = {};
|
||||
|
||||
ComputePipelineInfo m_pipeline_info = {};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -234,6 +234,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
|
|||
config->backend_info.bSupportsPaletteConversion = true; // Assumed support.
|
||||
config->backend_info.bSupportsClipControl = true; // Assumed support.
|
||||
config->backend_info.bSupportsMultithreading = true; // Assumed support.
|
||||
config->backend_info.bSupportsComputeShaders = true; // Assumed support.
|
||||
config->backend_info.bSupportsGPUTextureDecoding = true; // Assumed support.
|
||||
config->backend_info.bSupportsInternalResolutionFrameDumps = true; // Assumed support.
|
||||
config->backend_info.bSupportsPostProcessing = false; // No support yet.
|
||||
config->backend_info.bSupportsDualSourceBlend = false; // Dependent on features.
|
||||
|
|
|
@ -110,7 +110,8 @@ void TextureCacheBase::OnConfigChanged(VideoConfig& config)
|
|||
if (config.iSafeTextureCache_ColorSamples != backup_config.color_samples ||
|
||||
config.bTexFmtOverlayEnable != backup_config.texfmt_overlay ||
|
||||
config.bTexFmtOverlayCenter != backup_config.texfmt_overlay_center ||
|
||||
config.bHiresTextures != backup_config.hires_textures)
|
||||
config.bHiresTextures != backup_config.hires_textures ||
|
||||
config.bEnableGPUTextureDecoding != backup_config.gpu_texture_decoding)
|
||||
{
|
||||
Invalidate();
|
||||
|
||||
|
@ -209,6 +210,7 @@ void TextureCacheBase::SetBackupConfig(const VideoConfig& config)
|
|||
backup_config.cache_hires_textures = config.bCacheHiresTextures;
|
||||
backup_config.stereo_3d = config.iStereoMode > 0;
|
||||
backup_config.efb_mono_depth = config.bStereoEFBMonoDepth;
|
||||
backup_config.gpu_texture_decoding = config.bEnableGPUTextureDecoding;
|
||||
}
|
||||
|
||||
TextureCacheBase::TCacheEntryBase* TextureCacheBase::ApplyPaletteToEntry(TCacheEntryBase* entry,
|
||||
|
@ -526,6 +528,7 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
|
|||
|
||||
const u32 texture_size =
|
||||
TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
|
||||
u32 bytes_per_block = (bsw * bsh * TexDecoder_GetTexelSizeInNibbles(texformat)) / 2;
|
||||
u32 additional_mips_size = 0; // not including level 0, which is texture_size
|
||||
|
||||
// GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in
|
||||
|
@ -755,6 +758,17 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
|
|||
// how many levels the allocated texture shall have
|
||||
const u32 texLevels = hires_tex ? (u32)hires_tex->m_levels.size() : tex_levels;
|
||||
|
||||
// We can decode on the GPU if it is a supported format and the flag is enabled.
|
||||
// Currently we don't decode RGBA8 textures from Tmem, as that would require copying from both
|
||||
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
|
||||
// there's no conversion between formats. In the future this could be extended with a separate
|
||||
// shader, however.
|
||||
bool decode_on_gpu =
|
||||
!hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
|
||||
g_texture_cache->SupportsGPUTextureDecode(static_cast<TextureFormat>(texformat),
|
||||
static_cast<TlutFormat>(tlutfmt)) &&
|
||||
!(from_tmem && texformat == GX_TF_RGBA8);
|
||||
|
||||
// create the entry/texture
|
||||
TCacheEntryConfig config;
|
||||
config.width = width;
|
||||
|
@ -769,17 +783,29 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
|
|||
|
||||
if (!hires_tex)
|
||||
{
|
||||
if (!(texformat == GX_TF_RGBA8 && from_tmem))
|
||||
const u8* tlut = &texMem[tlutaddr];
|
||||
if (decode_on_gpu)
|
||||
{
|
||||
const u8* tlut = &texMem[tlutaddr];
|
||||
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut,
|
||||
(TlutFormat)tlutfmt);
|
||||
u32 row_stride = bytes_per_block * (expandedWidth / bsw);
|
||||
g_texture_cache->DecodeTextureOnGPU(
|
||||
entry, 0, src_data, texture_size, static_cast<TextureFormat>(texformat), width, height,
|
||||
expandedWidth, expandedHeight, row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
|
||||
}
|
||||
else
|
||||
{
|
||||
u8* src_data_gb =
|
||||
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
|
||||
TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
|
||||
if (!(texformat == GX_TF_RGBA8 && from_tmem))
|
||||
{
|
||||
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut,
|
||||
(TlutFormat)tlutfmt);
|
||||
}
|
||||
else
|
||||
{
|
||||
u8* src_data_gb =
|
||||
&texMem[bpmem.tex[stage / 4].texImage2[stage % 4].tmem_odd * TMEM_LINE_SIZE];
|
||||
TexDecoder_DecodeRGBA8FromTmem(temp, src_data, src_data_gb, expandedWidth, expandedHeight);
|
||||
}
|
||||
|
||||
entry->Load(temp, width, height, expandedWidth, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -797,9 +823,6 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
|
|||
entry->is_efb_copy = false;
|
||||
entry->is_custom_tex = hires_tex != nullptr;
|
||||
|
||||
// load texture
|
||||
entry->Load(temp, width, height, expandedWidth, 0);
|
||||
|
||||
std::string basename = "";
|
||||
if (g_ActiveConfig.bDumpTextures && !hires_tex)
|
||||
{
|
||||
|
@ -840,13 +863,26 @@ TextureCacheBase::TCacheEntryBase* TextureCacheBase::Load(const u32 stage)
|
|||
const u32 expanded_mip_height = Common::AlignUp(mip_height, bsh);
|
||||
|
||||
const u8*& mip_src_data = from_tmem ? ((level % 2) ? ptr_odd : ptr_even) : src_data;
|
||||
const u8* tlut = &texMem[tlutaddr];
|
||||
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
|
||||
tlut, (TlutFormat)tlutfmt);
|
||||
mip_src_data +=
|
||||
size_t mip_size =
|
||||
TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat);
|
||||
const u8* tlut = &texMem[tlutaddr];
|
||||
|
||||
entry->Load(temp, mip_width, mip_height, expanded_mip_width, level);
|
||||
if (decode_on_gpu)
|
||||
{
|
||||
u32 row_stride = bytes_per_block * (mip_width / bsw);
|
||||
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size,
|
||||
static_cast<TextureFormat>(texformat), mip_width,
|
||||
mip_height, expanded_mip_width, expanded_mip_height,
|
||||
row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
|
||||
}
|
||||
else
|
||||
{
|
||||
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
|
||||
tlut, (TlutFormat)tlutfmt);
|
||||
entry->Load(temp, mip_width, mip_height, expanded_mip_width, level);
|
||||
}
|
||||
|
||||
mip_src_data += mip_size;
|
||||
|
||||
if (g_ActiveConfig.bDumpTextures)
|
||||
DumpTexture(entry, basename, level);
|
||||
|
|
|
@ -171,6 +171,23 @@ public:
|
|||
virtual void ConvertTexture(TCacheEntryBase* entry, TCacheEntryBase* unconverted, void* palette,
|
||||
TlutFormat format) = 0;
|
||||
|
||||
// Returns true if the texture data and palette formats are supported by the GPU decoder.
|
||||
virtual bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Decodes the specified data to the GPU texture specified by entry.
|
||||
// width, height are the size of the image in pixels.
|
||||
// aligned_width, aligned_height are the size of the image in pixels, aligned to the block size.
|
||||
// row_stride is the number of bytes for a row of blocks, not pixels.
|
||||
virtual void DecodeTextureOnGPU(TCacheEntryBase* entry, u32 dst_level, const u8* data,
|
||||
size_t data_size, TextureFormat format, u32 width, u32 height,
|
||||
u32 aligned_width, u32 aligned_height, u32 row_stride,
|
||||
const u8* palette, TlutFormat palette_format)
|
||||
{
|
||||
}
|
||||
|
||||
protected:
|
||||
TextureCacheBase();
|
||||
|
||||
|
@ -225,6 +242,7 @@ private:
|
|||
bool copy_cache_enable;
|
||||
bool stereo_3d;
|
||||
bool efb_mono_depth;
|
||||
bool gpu_texture_decoding;
|
||||
};
|
||||
BackupConfig backup_config = {};
|
||||
};
|
||||
|
|
|
@ -2,9 +2,13 @@
|
|||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MathUtil.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
|
@ -720,4 +724,546 @@ const char* GenerateEncodingShader(u32 format, APIType ApiType)
|
|||
return text;
|
||||
}
|
||||
|
||||
// NOTE: In these uniforms, a row refers to a row of blocks, not texels.
|
||||
static const char decoding_shader_header[] = R"(
|
||||
#ifdef VULKAN
|
||||
|
||||
layout(std140, push_constant) uniform PushConstants {
|
||||
uvec2 dst_size;
|
||||
uvec2 src_size;
|
||||
uint src_offset;
|
||||
uint src_row_stride;
|
||||
uint palette_offset;
|
||||
} push_constants;
|
||||
#define u_dst_size (push_constants.dst_size)
|
||||
#define u_src_size (push_constants.src_size)
|
||||
#define u_src_offset (push_constants.src_offset)
|
||||
#define u_src_row_stride (push_constants.src_row_stride)
|
||||
#define u_palette_offset (push_constants.palette_offset)
|
||||
|
||||
TEXEL_BUFFER_BINDING(0) uniform usamplerBuffer s_input_buffer;
|
||||
TEXEL_BUFFER_BINDING(1) uniform usamplerBuffer s_palette_buffer;
|
||||
|
||||
IMAGE_BINDING(rgba8, 0) uniform writeonly image2DArray output_image;
|
||||
|
||||
#else
|
||||
|
||||
uniform uvec2 u_dst_size;
|
||||
uniform uvec2 u_src_size;
|
||||
uniform uint u_src_offset;
|
||||
uniform uint u_src_row_stride;
|
||||
uniform uint u_palette_offset;
|
||||
|
||||
SAMPLER_BINDING(9) uniform usamplerBuffer s_input_buffer;
|
||||
SAMPLER_BINDING(10) uniform usamplerBuffer s_palette_buffer;
|
||||
|
||||
layout(rgba8, binding = 0) uniform writeonly image2DArray output_image;
|
||||
|
||||
#endif
|
||||
|
||||
uint Swap16(uint v)
|
||||
{
|
||||
// Convert BE to LE.
|
||||
return ((v >> 8) | (v << 8)) & 0xFFFFu;
|
||||
}
|
||||
|
||||
uint Convert3To8(uint v)
|
||||
{
|
||||
// Swizzle bits: 00000123 -> 12312312
|
||||
return (v << 5) | (v << 2) | (v >> 1);
|
||||
}
|
||||
uint Convert4To8(uint v)
|
||||
{
|
||||
// Swizzle bits: 00001234 -> 12341234
|
||||
return (v << 4) | v;
|
||||
}
|
||||
uint Convert5To8(uint v)
|
||||
{
|
||||
// Swizzle bits: 00012345 -> 12345123
|
||||
return (v << 3) | (v >> 2);
|
||||
}
|
||||
uint Convert6To8(uint v)
|
||||
{
|
||||
// Swizzle bits: 00123456 -> 12345612
|
||||
return (v << 2) | (v >> 4);
|
||||
}
|
||||
|
||||
uint GetTiledTexelOffset(uvec2 block_size, uvec2 coords)
|
||||
{
|
||||
uvec2 block = coords / block_size;
|
||||
uvec2 offset = coords % block_size;
|
||||
uint buffer_pos = u_src_offset;
|
||||
buffer_pos += block.y * u_src_row_stride;
|
||||
buffer_pos += block.x * (block_size.x * block_size.y);
|
||||
buffer_pos += offset.y * block_size.x;
|
||||
buffer_pos += offset.x;
|
||||
return buffer_pos;
|
||||
}
|
||||
|
||||
uvec4 GetPaletteColor(uint index)
|
||||
{
|
||||
// Fetch and swap BE to LE.
|
||||
uint val = Swap16(texelFetch(s_palette_buffer, int(u_palette_offset + index)).x);
|
||||
|
||||
uvec4 color;
|
||||
#if defined(PALETTE_FORMAT_IA8)
|
||||
uint a = bitfieldExtract(val, 8, 8);
|
||||
uint i = bitfieldExtract(val, 0, 8);
|
||||
color = uvec4(i, i, i, a);
|
||||
#elif defined(PALETTE_FORMAT_RGB565)
|
||||
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
|
||||
color.y = Convert6To8(bitfieldExtract(val, 5, 6));
|
||||
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
|
||||
color.a = 255u;
|
||||
|
||||
#elif defined(PALETTE_FORMAT_RGB5A3)
|
||||
if ((val & 0x8000u) != 0u)
|
||||
{
|
||||
color.x = Convert5To8(bitfieldExtract(val, 10, 5));
|
||||
color.y = Convert5To8(bitfieldExtract(val, 5, 5));
|
||||
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
|
||||
color.a = 255u;
|
||||
}
|
||||
else
|
||||
{
|
||||
color.a = Convert3To8(bitfieldExtract(val, 12, 3));
|
||||
color.r = Convert4To8(bitfieldExtract(val, 8, 4));
|
||||
color.g = Convert4To8(bitfieldExtract(val, 4, 4));
|
||||
color.b = Convert4To8(bitfieldExtract(val, 0, 4));
|
||||
}
|
||||
#else
|
||||
// Not used.
|
||||
color = uvec4(0, 0, 0, 0);
|
||||
#endif
|
||||
|
||||
return color;
|
||||
}
|
||||
|
||||
vec4 GetPaletteColorNormalized(uint index)
|
||||
{
|
||||
uvec4 color = GetPaletteColor(index);
|
||||
return vec4(color) / 255.0;
|
||||
}
|
||||
|
||||
)";
|
||||
|
||||
static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
|
||||
{GX_TF_I4,
|
||||
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 8x8 blocks, 4 bits per pixel
|
||||
// We need to do the tiling manually here because the texel size is smaller than
|
||||
// the size of the buffer elements.
|
||||
uint2 block = coords.xy / 8u;
|
||||
uint2 offset = coords.xy % 8u;
|
||||
uint buffer_pos = u_src_offset;
|
||||
buffer_pos += block.y * u_src_row_stride;
|
||||
buffer_pos += block.x * 32u;
|
||||
buffer_pos += offset.y * 4u;
|
||||
buffer_pos += offset.x / 2u;
|
||||
|
||||
// Select high nibble for odd texels, low for even.
|
||||
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
|
||||
uint i;
|
||||
if ((coords.x & 1u) == 0u)
|
||||
i = Convert4To8((val >> 4));
|
||||
else
|
||||
i = Convert4To8((val & 0x0Fu));
|
||||
|
||||
uvec4 color = uvec4(i, i, i, i);
|
||||
vec4 norm_color = vec4(color) / 255.0;
|
||||
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
|
||||
)"}},
|
||||
{GX_TF_IA4,
|
||||
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 8x4 blocks, 8 bits per pixel
|
||||
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
|
||||
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
|
||||
uint i = Convert4To8((val & 0x0Fu));
|
||||
uint a = Convert4To8((val >> 4));
|
||||
uvec4 color = uvec4(i, i, i, a);
|
||||
vec4 norm_color = vec4(color) / 255.0;
|
||||
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
)"}},
|
||||
{GX_TF_I8,
|
||||
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 8x4 blocks, 8 bits per pixel
|
||||
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
|
||||
uint i = texelFetch(s_input_buffer, int(buffer_pos)).x;
|
||||
uvec4 color = uvec4(i, i, i, i);
|
||||
vec4 norm_color = vec4(color) / 255.0;
|
||||
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
)"}},
|
||||
{GX_TF_IA8,
|
||||
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 4x4 blocks, 16 bits per pixel
|
||||
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
|
||||
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
|
||||
uint a = (val & 0xFFu);
|
||||
uint i = (val >> 8);
|
||||
uvec4 color = uvec4(i, i, i, a);
|
||||
vec4 norm_color = vec4(color) / 255.0;
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
)"}},
|
||||
{GX_TF_RGB565,
|
||||
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 4x4 blocks
|
||||
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
|
||||
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
|
||||
|
||||
uvec4 color;
|
||||
color.x = Convert5To8(bitfieldExtract(val, 11, 5));
|
||||
color.y = Convert6To8(bitfieldExtract(val, 5, 6));
|
||||
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
|
||||
color.a = 255u;
|
||||
|
||||
vec4 norm_color = vec4(color) / 255.0;
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
|
||||
)"}},
|
||||
{GX_TF_RGB5A3,
|
||||
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 4x4 blocks
|
||||
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
|
||||
uint val = Swap16(texelFetch(s_input_buffer, int(buffer_pos)).x);
|
||||
|
||||
uvec4 color;
|
||||
if ((val & 0x8000u) != 0u)
|
||||
{
|
||||
color.x = Convert5To8(bitfieldExtract(val, 10, 5));
|
||||
color.y = Convert5To8(bitfieldExtract(val, 5, 5));
|
||||
color.z = Convert5To8(bitfieldExtract(val, 0, 5));
|
||||
color.a = 255u;
|
||||
}
|
||||
else
|
||||
{
|
||||
color.a = Convert3To8(bitfieldExtract(val, 12, 3));
|
||||
color.r = Convert4To8(bitfieldExtract(val, 8, 4));
|
||||
color.g = Convert4To8(bitfieldExtract(val, 4, 4));
|
||||
color.b = Convert4To8(bitfieldExtract(val, 0, 4));
|
||||
}
|
||||
|
||||
vec4 norm_color = vec4(color) / 255.0;
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
|
||||
)"}},
|
||||
{GX_TF_RGBA8,
|
||||
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 4x4 blocks
|
||||
// We can't use the normal calculation function, as these are packed as the AR channels
|
||||
// for the entire block, then the GB channels afterwards.
|
||||
uint2 block = coords.xy / 4u;
|
||||
uint2 offset = coords.xy % 4u;
|
||||
uint buffer_pos = u_src_offset;
|
||||
|
||||
// Our buffer has 16-bit elements, so the offsets here are half what they would be in bytes.
|
||||
buffer_pos += block.y * u_src_row_stride;
|
||||
buffer_pos += block.x * 32u;
|
||||
buffer_pos += offset.y * 4u;
|
||||
buffer_pos += offset.x;
|
||||
|
||||
// The two GB channels follow after the block's AR channels.
|
||||
uint val1 = texelFetch(s_input_buffer, int(buffer_pos + 0u)).x;
|
||||
uint val2 = texelFetch(s_input_buffer, int(buffer_pos + 16u)).x;
|
||||
|
||||
uvec4 color;
|
||||
color.a = (val1 & 0xFFu);
|
||||
color.r = (val1 >> 8);
|
||||
color.g = (val2 & 0xFFu);
|
||||
color.b = (val2 >> 8);
|
||||
|
||||
vec4 norm_color = vec4(color) / 255.0;
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
)"}},
|
||||
{GX_TF_CMPR,
|
||||
{BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true,
|
||||
R"(
|
||||
// In the compute version of this decoder, we flatten the blocks to a one-dimension array.
|
||||
// Each group is subdivided into 16, and the first thread in each group fetches the DXT data.
|
||||
// All threads then calculate the possible colors for the block and write to the output image.
|
||||
|
||||
#define GROUP_SIZE 64u
|
||||
#define BLOCK_SIZE_X 4u
|
||||
#define BLOCK_SIZE_Y 4u
|
||||
#define BLOCK_SIZE (BLOCK_SIZE_X * BLOCK_SIZE_Y)
|
||||
#define BLOCKS_PER_GROUP (GROUP_SIZE / BLOCK_SIZE)
|
||||
|
||||
layout(local_size_x = GROUP_SIZE, local_size_y = 1) in;
|
||||
|
||||
shared uvec2 shared_temp[BLOCKS_PER_GROUP];
|
||||
|
||||
uint DXTBlend(uint v1, uint v2)
|
||||
{
|
||||
// 3/8 blend, which is close to 1/3
|
||||
return ((v1 * 3u + v2 * 5u) >> 3);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
uint local_thread_id = gl_LocalInvocationID.x;
|
||||
uint block_in_group = local_thread_id / BLOCK_SIZE;
|
||||
uint thread_in_block = local_thread_id % BLOCK_SIZE;
|
||||
uint block_index = gl_WorkGroupID.x * BLOCKS_PER_GROUP + block_in_group;
|
||||
|
||||
// Annoyingly, we can't precalculate this as a uniform because the DXT block size differs
|
||||
// from the block size of the overall texture (4 vs 8). We can however use a multiply and
|
||||
// subtraction to avoid the modulo for calculating the block's X coordinate.
|
||||
uint blocks_wide = u_src_size.x / BLOCK_SIZE_X;
|
||||
uvec2 block_coords;
|
||||
block_coords.y = block_index / blocks_wide;
|
||||
block_coords.x = block_index - (block_coords.y * blocks_wide);
|
||||
|
||||
// Only the first thread for each block reads from the texel buffer.
|
||||
if (thread_in_block == 0u)
|
||||
{
|
||||
// Calculate tiled block coordinates.
|
||||
uvec2 tile_block_coords = block_coords / 2u;
|
||||
uvec2 subtile_block_coords = block_coords % 2u;
|
||||
uint buffer_pos = u_src_offset;
|
||||
buffer_pos += tile_block_coords.y * u_src_row_stride;
|
||||
buffer_pos += tile_block_coords.x * 4u;
|
||||
buffer_pos += subtile_block_coords.y * 2u;
|
||||
buffer_pos += subtile_block_coords.x;
|
||||
|
||||
// Read the entire DXT block to shared memory.
|
||||
uvec2 raw_data = texelFetch(s_input_buffer, int(buffer_pos)).xy;
|
||||
shared_temp[block_in_group] = raw_data;
|
||||
}
|
||||
|
||||
// Ensure store is completed before the remaining threads in the block continue.
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
|
||||
// Unpack colors and swap BE to LE.
|
||||
uvec2 raw_data = shared_temp[block_in_group];
|
||||
uint swapped = ((raw_data.x & 0xFF00FF00u) >> 8) | ((raw_data.x & 0x00FF00FFu) << 8);
|
||||
uint c1 = swapped & 0xFFFFu;
|
||||
uint c2 = swapped >> 16;
|
||||
|
||||
// Expand 5/6 bit channels to 8-bits per channel.
|
||||
uint blue1 = Convert5To8(bitfieldExtract(c1, 0, 5));
|
||||
uint blue2 = Convert5To8(bitfieldExtract(c2, 0, 5));
|
||||
uint green1 = Convert6To8(bitfieldExtract(c1, 5, 6));
|
||||
uint green2 = Convert6To8(bitfieldExtract(c2, 5, 6));
|
||||
uint red1 = Convert5To8(bitfieldExtract(c1, 11, 5));
|
||||
uint red2 = Convert5To8(bitfieldExtract(c2, 11, 5));
|
||||
|
||||
// Determine the four colors the block can use.
|
||||
// It's quicker to just precalculate all four colors rather than branching on the index.
|
||||
// NOTE: These must be masked with 0xFF. This is done at the normalization stage below.
|
||||
uvec4 color0, color1, color2, color3;
|
||||
color0 = uvec4(red1, green1, blue1, 255u);
|
||||
color1 = uvec4(red2, green2, blue2, 255u);
|
||||
if (c1 > c2)
|
||||
{
|
||||
color2 = uvec4(DXTBlend(red2, red1), DXTBlend(green2, green1), DXTBlend(blue2, blue1), 255u);
|
||||
color3 = uvec4(DXTBlend(red1, red2), DXTBlend(green1, green2), DXTBlend(blue1, blue2), 255u);
|
||||
}
|
||||
else
|
||||
{
|
||||
color2 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 255u);
|
||||
color3 = uvec4((red1 + red2) / 2u, (green1 + green2) / 2u, (blue1 + blue2) / 2u, 0u);
|
||||
}
|
||||
|
||||
// Calculate the texel coordinates that we will write to.
|
||||
// The divides/modulo here should be turned into a shift/binary AND.
|
||||
uint local_y = thread_in_block / BLOCK_SIZE_X;
|
||||
uint local_x = thread_in_block % BLOCK_SIZE_X;
|
||||
uint global_x = block_coords.x * BLOCK_SIZE_X + local_x;
|
||||
uint global_y = block_coords.y * BLOCK_SIZE_Y + local_y;
|
||||
|
||||
// Use the coordinates within the block to shift the 32-bit value containing
|
||||
// all 16 indices to a single 2-bit index.
|
||||
uint index = bitfieldExtract(raw_data.y, int((local_y * 8u) + (6u - local_x * 2u)), 2);
|
||||
|
||||
// Select the un-normalized color from the precalculated color array.
|
||||
// Using a switch statement here removes the need for dynamic indexing of an array.
|
||||
uvec4 color;
|
||||
switch (index)
|
||||
{
|
||||
case 0u: color = color0; break;
|
||||
case 1u: color = color1; break;
|
||||
case 2u: color = color2; break;
|
||||
case 3u: color = color3; break;
|
||||
default: color = color0; break;
|
||||
}
|
||||
|
||||
// Normalize and write to the output image.
|
||||
vec4 norm_color = vec4(color & 0xFFu) / 255.0;
|
||||
imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color);
|
||||
}
|
||||
)"}},
|
||||
{GX_TF_C4,
|
||||
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C4)), 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 8x8 blocks, 4 bits per pixel
|
||||
// We need to do the tiling manually here because the texel size is smaller than
|
||||
// the size of the buffer elements.
|
||||
uint2 block = coords.xy / 8u;
|
||||
uint2 offset = coords.xy % 8u;
|
||||
uint buffer_pos = u_src_offset;
|
||||
buffer_pos += block.y * u_src_row_stride;
|
||||
buffer_pos += block.x * 32u;
|
||||
buffer_pos += offset.y * 4u;
|
||||
buffer_pos += offset.x / 2u;
|
||||
|
||||
// Select high nibble for odd texels, low for even.
|
||||
uint val = texelFetch(s_input_buffer, int(buffer_pos)).x;
|
||||
uint index = ((coords.x & 1u) == 0u) ? (val >> 4) : (val & 0x0Fu);
|
||||
vec4 norm_color = GetPaletteColorNormalized(index);
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
|
||||
)"}},
|
||||
{GX_TF_C8,
|
||||
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C8)), 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 8x4 blocks, 8 bits per pixel
|
||||
uint buffer_pos = GetTiledTexelOffset(uvec2(8u, 4u), coords);
|
||||
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x;
|
||||
vec4 norm_color = GetPaletteColorNormalized(index);
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
)"}},
|
||||
{GX_TF_C14X2,
|
||||
{BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C14X2)), 8, 8, false,
|
||||
R"(
|
||||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec2 coords = gl_GlobalInvocationID.xy;
|
||||
|
||||
// Tiled in 4x4 blocks, 16 bits per pixel
|
||||
uint buffer_pos = GetTiledTexelOffset(uvec2(4u, 4u), coords);
|
||||
uint index = texelFetch(s_input_buffer, int(buffer_pos)).x) & 0x3FFFu;
|
||||
vec4 norm_color = GetPaletteColorNormalized(index);
|
||||
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
|
||||
}
|
||||
)"}}};
|
||||
|
||||
static const std::array<u32, BUFFER_FORMAT_COUNT> s_buffer_bytes_per_texel = {{
|
||||
1, // BUFFER_FORMAT_R8_UINT
|
||||
2, // BUFFER_FORMAT_R16_UINT
|
||||
8, // BUFFER_FORMAT_R32G32_UINT
|
||||
}};
|
||||
|
||||
const DecodingShaderInfo* GetDecodingShaderInfo(u32 format)
|
||||
{
|
||||
auto iter = s_decoding_shader_info.find(static_cast<TextureFormat>(format));
|
||||
return iter != s_decoding_shader_info.end() ? &iter->second : nullptr;
|
||||
}
|
||||
|
||||
u32 GetBytesPerBufferElement(BufferFormat buffer_format)
|
||||
{
|
||||
return s_buffer_bytes_per_texel[buffer_format];
|
||||
}
|
||||
|
||||
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height)
|
||||
{
|
||||
// Flatten to a single dimension?
|
||||
if (info->group_flatten)
|
||||
return {(width * height + (info->group_size_x - 1)) / info->group_size_x, 1};
|
||||
|
||||
return {(width + (info->group_size_x - 1)) / info->group_size_x,
|
||||
(height + (info->group_size_y - 1)) / info->group_size_y};
|
||||
}
|
||||
|
||||
std::string GenerateDecodingShader(u32 format, u32 palette_format, APIType api_type)
|
||||
{
|
||||
const DecodingShaderInfo* info = GetDecodingShaderInfo(format);
|
||||
if (!info)
|
||||
return "";
|
||||
|
||||
std::stringstream ss;
|
||||
switch (palette_format)
|
||||
{
|
||||
case GX_TL_IA8:
|
||||
ss << "#define PALETTE_FORMAT_IA8 1\n";
|
||||
break;
|
||||
case GX_TL_RGB565:
|
||||
ss << "#define PALETTE_FORMAT_RGB565 1\n";
|
||||
break;
|
||||
case GX_TL_RGB5A3:
|
||||
ss << "#define PALETTE_FORMAT_RGB5A3 1\n";
|
||||
break;
|
||||
}
|
||||
|
||||
ss << decoding_shader_header;
|
||||
ss << info->shader_body;
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -4,6 +4,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
enum class APIType;
|
||||
|
@ -13,4 +16,40 @@ namespace TextureConversionShader
|
|||
u16 GetEncodedSampleCount(u32 format);
|
||||
|
||||
const char* GenerateEncodingShader(u32 format, APIType ApiType);
|
||||
}
|
||||
|
||||
// View format of the input data to the texture decoding shader.
|
||||
enum BufferFormat
|
||||
{
|
||||
BUFFER_FORMAT_R8_UINT,
|
||||
BUFFER_FORMAT_R16_UINT,
|
||||
BUFFER_FORMAT_R32G32_UINT,
|
||||
BUFFER_FORMAT_COUNT
|
||||
};
|
||||
|
||||
// Information required to compile and dispatch a texture decoding shader.
|
||||
struct DecodingShaderInfo
|
||||
{
|
||||
BufferFormat buffer_format;
|
||||
u32 palette_size;
|
||||
u32 group_size_x;
|
||||
u32 group_size_y;
|
||||
bool group_flatten;
|
||||
const char* shader_body;
|
||||
};
|
||||
|
||||
// Obtain shader information for the specified texture format.
|
||||
// If this format does not have a shader written for it, returns nullptr.
|
||||
const DecodingShaderInfo* GetDecodingShaderInfo(u32 format);
|
||||
|
||||
// Determine how many bytes there are in each element of the texel buffer.
|
||||
// Needed for alignment and stride calculations.
|
||||
u32 GetBytesPerBufferElement(BufferFormat buffer_format);
|
||||
|
||||
// Determine how many thread groups should be dispatched for an image of the specified width/height.
|
||||
// First is the number of X groups, second is the number of Y groups, Z is always one.
|
||||
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height);
|
||||
|
||||
// Returns the GLSL string containing the texture decoding shader for the specified format.
|
||||
std::string GenerateDecodingShader(u32 format, u32 palette_format, APIType api_type);
|
||||
|
||||
} // namespace TextureConversionShader
|
||||
|
|
|
@ -81,6 +81,7 @@ void VideoConfig::Load(const std::string& ini_file)
|
|||
settings->Get("DumpPath", &sDumpPath, "");
|
||||
settings->Get("BitrateKbps", &iBitrateKbps, 2500);
|
||||
settings->Get("InternalResolutionFrameDumps", &bInternalResolutionFrameDumps, false);
|
||||
settings->Get("EnableGPUTextureDecoding", &bEnableGPUTextureDecoding, false);
|
||||
settings->Get("EnablePixelLighting", &bEnablePixelLighting, false);
|
||||
settings->Get("FastDepthCalc", &bFastDepthCalc, true);
|
||||
settings->Get("MSAA", &iMultisamples, 1);
|
||||
|
@ -305,6 +306,7 @@ void VideoConfig::Save(const std::string& ini_file)
|
|||
settings->Set("DumpPath", sDumpPath);
|
||||
settings->Set("BitrateKbps", iBitrateKbps);
|
||||
settings->Set("InternalResolutionFrameDumps", bInternalResolutionFrameDumps);
|
||||
settings->Set("EnableGPUTextureDecoding", bEnableGPUTextureDecoding);
|
||||
settings->Set("EnablePixelLighting", bEnablePixelLighting);
|
||||
settings->Set("FastDepthCalc", bFastDepthCalc);
|
||||
settings->Set("MSAA", iMultisamples);
|
||||
|
|
|
@ -108,6 +108,7 @@ struct VideoConfig final
|
|||
bool bInternalResolutionFrameDumps;
|
||||
bool bFreeLook;
|
||||
bool bBorderlessFullscreen;
|
||||
bool bEnableGPUTextureDecoding;
|
||||
int iBitrateKbps;
|
||||
|
||||
// Hacks
|
||||
|
@ -181,6 +182,7 @@ struct VideoConfig final
|
|||
bool bSupportsPrimitiveRestart;
|
||||
bool bSupportsOversizedViewports;
|
||||
bool bSupportsGeometryShaders;
|
||||
bool bSupportsComputeShaders;
|
||||
bool bSupports3DVision;
|
||||
bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon
|
||||
bool bSupportsBindingLayout; // Needed by ShaderGen, so must stay in VideoCommon
|
||||
|
@ -195,6 +197,7 @@ struct VideoConfig final
|
|||
bool bSupportsReversedDepthRange;
|
||||
bool bSupportsMultithreading;
|
||||
bool bSupportsInternalResolutionFrameDumps;
|
||||
bool bSupportsGPUTextureDecoding;
|
||||
} backend_info;
|
||||
|
||||
// Utility
|
||||
|
@ -210,6 +213,10 @@ struct VideoConfig final
|
|||
return false;
|
||||
return backend_info.bSupportsBBox && backend_info.bSupportsFragmentStoresAndAtomics;
|
||||
}
|
||||
bool UseGPUTextureDecoding() const
|
||||
{
|
||||
return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;
|
||||
}
|
||||
};
|
||||
|
||||
extern VideoConfig g_Config;
|
||||
|
|
Loading…
Reference in New Issue