Merge pull request #5882 from degasus/efb2tex_copies

Use VideoCommon shader generators for efb2tex copies.
This commit is contained in:
Markus Wick 2017-12-02 16:28:12 +01:00 committed by GitHub
commit 7498d80a6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 506 additions and 237 deletions

View File

@ -143,7 +143,7 @@ ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyParams&
return iter->second;
D3DBlob* bytecode = nullptr;
const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::D3D);
const char* shader = TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::D3D);
if (!D3D::CompilePixelShader(shader, &bytecode))
{
PanicAlert("Failed to compile texture encoding shader.");

View File

@ -222,7 +222,8 @@ TextureCache::~TextureCache()
void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
unsigned int cbuf_id, const float* colmat)
unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity)
{
auto* destination_texture = static_cast<DXTexture*>(entry->texture.get());

View File

@ -34,7 +34,8 @@ private:
bool scale_by_half) override;
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override;
bool scale_by_half, unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity) override;
bool CompileShaders() override { return true; }
void DeleteShaders() override {}

View File

@ -32,7 +32,8 @@ public:
}
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override
bool scale_by_half, unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity) override
{
}
};

View File

@ -26,11 +26,44 @@
#include "VideoCommon/ImageWrite.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureConverterShaderGen.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
namespace OGL
{
constexpr const char* vertex_program =
"out vec3 %c_uv0;\n"
"SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"uniform vec4 copy_position;\n" // left, top, right, bottom
"void main()\n"
"{\n"
" vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
" %c_uv0 = vec3(mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, "
"0).xy), 0.0);\n"
" gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
"}\n";
constexpr const char* geometry_program = "layout(triangles) in;\n"
"layout(triangle_strip, max_vertices = 6) out;\n"
"in vec3 v_uv0[3];\n"
"out vec3 f_uv0;\n"
"SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"void main()\n"
"{\n"
" int layers = textureSize(samp9, 0).z;\n"
" for (int layer = 0; layer < layers; ++layer) {\n"
" for (int i = 0; i < 3; ++i) {\n"
" f_uv0 = vec3(v_uv0[i].xy, layer);\n"
" gl_Position = gl_in[i].gl_Position;\n"
" gl_Layer = layer;\n"
" EmitVertex();\n"
" }\n"
" EndPrimitive();\n"
" }\n"
"}\n";
//#define TIME_TEXTURE_DECODING 1
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
@ -124,101 +157,22 @@ bool TextureCache::CompileShaders()
" ocol0 = texcol;\n"
"}\n";
constexpr const char* color_matrix_program =
"SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"uniform vec4 colmat[7];\n"
"in vec3 f_uv0;\n"
"out vec4 ocol0;\n"
"\n"
"void main(){\n"
" vec4 texcol = texture(samp9, f_uv0);\n"
" texcol = floor(texcol * colmat[5]) * colmat[6];\n"
" ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n"
"}\n";
constexpr const char* depth_matrix_program =
"SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"uniform vec4 colmat[5];\n"
"in vec3 f_uv0;\n"
"out vec4 ocol0;\n"
"\n"
"void main(){\n"
" vec4 texcol = texture(samp9, vec3(f_uv0.xy, %s));\n"
" int depth = int(texcol.x * 16777216.0);\n"
// Convert to Z24 format
" ivec4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n"
// Convert to Z4 format
" workspace.a = (depth >> 16) & 0xF0;\n"
// Normalize components to [0.0..1.0]
" texcol = vec4(workspace) / 255.0;\n"
" ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n"
"}\n";
constexpr const char* vertex_program =
"out vec3 %s_uv0;\n"
"SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"uniform vec4 copy_position;\n" // left, top, right, bottom
"void main()\n"
"{\n"
" vec2 rawpos = vec2(gl_VertexID&1, gl_VertexID&2);\n"
" %s_uv0 = vec3(mix(copy_position.xy, copy_position.zw, rawpos) / vec2(textureSize(samp9, "
"0).xy), 0.0);\n"
" gl_Position = vec4(rawpos*2.0-1.0, 0.0, 1.0);\n"
"}\n";
const std::string geo_program = g_ActiveConfig.stereo_mode != StereoMode::Off ?
"layout(triangles) in;\n"
"layout(triangle_strip, max_vertices = 6) out;\n"
"in vec3 v_uv0[3];\n"
"out vec3 f_uv0;\n"
"SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"void main()\n"
"{\n"
" int layers = textureSize(samp9, 0).z;\n"
" for (int layer = 0; layer < layers; ++layer) {\n"
" for (int i = 0; i < 3; ++i) {\n"
" f_uv0 = vec3(v_uv0[i].xy, layer);\n"
" gl_Position = gl_in[i].gl_Position;\n"
" gl_Layer = layer;\n"
" EmitVertex();\n"
" }\n"
" EndPrimitive();\n"
" }\n"
"}\n" :
"";
const char* prefix = geo_program.empty() ? "f" : "v";
const char* depth_layer = g_ActiveConfig.bStereoEFBMonoDepth ? "0.0" : "f_uv0.z";
std::string geo_program = "";
char prefix = 'f';
if (g_ActiveConfig.stereo_mode != StereoMode::Off)
{
geo_program = geometry_program;
prefix = 'v';
}
if (!ProgramShaderCache::CompileShader(m_colorCopyProgram,
StringFromFormat(vertex_program, prefix, prefix),
color_copy_program, geo_program) ||
!ProgramShaderCache::CompileShader(m_colorMatrixProgram,
StringFromFormat(vertex_program, prefix, prefix),
color_matrix_program, geo_program) ||
!ProgramShaderCache::CompileShader(
m_depthMatrixProgram, StringFromFormat(vertex_program, prefix, prefix),
StringFromFormat(depth_matrix_program, depth_layer), geo_program))
color_copy_program, geo_program))
{
return false;
}
m_colorMatrixUniform = glGetUniformLocation(m_colorMatrixProgram.glprogid, "colmat");
m_depthMatrixUniform = glGetUniformLocation(m_depthMatrixProgram.glprogid, "colmat");
m_color_cbuf_id = UINT_MAX;
m_depth_cbuf_id = UINT_MAX;
m_colorCopyPositionUniform = glGetUniformLocation(m_colorCopyProgram.glprogid, "copy_position");
m_colorMatrixPositionUniform =
glGetUniformLocation(m_colorMatrixProgram.glprogid, "copy_position");
m_depthCopyPositionUniform = glGetUniformLocation(m_depthMatrixProgram.glprogid, "copy_position");
std::string palette_shader =
R"GLSL(
@ -320,8 +274,9 @@ bool TextureCache::CompileShaders()
void TextureCache::DeleteShaders()
{
m_colorMatrixProgram.Destroy();
m_depthMatrixProgram.Destroy();
for (auto& it : m_efb_copy_programs)
it.second.shader.Destroy();
m_efb_copy_programs.clear();
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
for (auto& shader : m_palette_shaders)
@ -382,16 +337,16 @@ void main()
void TextureCache::CreateTextureDecodingResources()
{
static const GLenum gl_view_types[TextureConversionShader::BUFFER_FORMAT_COUNT] = {
static const GLenum gl_view_types[TextureConversionShaderTiled::BUFFER_FORMAT_COUNT] = {
GL_R8UI, // BUFFER_FORMAT_R8_UINT
GL_R16UI, // BUFFER_FORMAT_R16_UINT
GL_RG32UI, // BUFFER_FORMAT_R32G32_UINT
GL_RGBA8UI, // BUFFER_FORMAT_RGBA8_UINT
};
glGenTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
glGenTextures(TextureConversionShaderTiled::BUFFER_FORMAT_COUNT,
m_texture_decoding_buffer_views.data());
for (size_t i = 0; i < TextureConversionShader::BUFFER_FORMAT_COUNT; i++)
for (size_t i = 0; i < TextureConversionShaderTiled::BUFFER_FORMAT_COUNT; i++)
{
glBindTexture(GL_TEXTURE_BUFFER, m_texture_decoding_buffer_views[i]);
glTexBuffer(GL_TEXTURE_BUFFER, gl_view_types[i], m_palette_stream_buffer->m_buffer);
@ -400,7 +355,7 @@ void TextureCache::CreateTextureDecodingResources()
void TextureCache::DestroyTextureDecodingResources()
{
glDeleteTextures(TextureConversionShader::BUFFER_FORMAT_COUNT,
glDeleteTextures(TextureConversionShaderTiled::BUFFER_FORMAT_COUNT,
m_texture_decoding_buffer_views.data());
m_texture_decoding_buffer_views.fill(0);
m_texture_decoding_program_info.clear();
@ -414,7 +369,7 @@ bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat pal
return iter->second.valid;
TextureDecodingProgramInfo info;
info.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
info.base_info = TextureConversionShaderTiled::GetDecodingShaderInfo(format);
if (!info.base_info)
{
m_texture_decoding_program_info.emplace(key, info);
@ -422,7 +377,7 @@ bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat pal
}
std::string shader_source =
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::OpenGL);
if (shader_source.empty())
{
m_texture_decoding_program_info.emplace(key, info);
@ -462,7 +417,7 @@ void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u
// Copy to GPU-visible buffer, aligned to the data type.
auto info = iter->second;
u32 bytes_per_buffer_elem =
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
TextureConversionShaderTiled::GetBytesPerBufferElement(info.base_info->buffer_format);
// Only copy palette if it is required.
bool has_palette = info.base_info->palette_size > 0;
@ -515,7 +470,7 @@ void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u
}
auto dispatch_groups =
TextureConversionShader::GetDispatchCount(info.base_info, aligned_width, aligned_height);
TextureConversionShaderTiled::GetDispatchCount(info.base_info, aligned_width, aligned_height);
glBindImageTexture(0, static_cast<OGLTexture*>(entry->texture.get())->GetRawTexIdentifier(),
dst_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8);
glDispatchCompute(dispatch_groups.first, dispatch_groups.second, 1);
@ -531,7 +486,8 @@ void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u
void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
unsigned int cbuf_id, const float* colmat)
unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity)
{
auto* destination_texture = static_cast<OGLTexture*>(entry->texture.get());
g_renderer->ResetAPIState(); // reset any game specific settings
@ -554,26 +510,36 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height);
GLuint uniform_location;
if (is_depth_copy)
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half);
auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader());
EFBCopyShader& shader = it.first->second;
bool created = it.second;
if (created)
{
m_depthMatrixProgram.Bind();
if (m_depth_cbuf_id != cbuf_id)
glUniform4fv(m_depthMatrixUniform, 5, colmat);
m_depth_cbuf_id = cbuf_id;
uniform_location = m_depthCopyPositionUniform;
}
else
{
m_colorMatrixProgram.Bind();
if (m_color_cbuf_id != cbuf_id)
glUniform4fv(m_colorMatrixUniform, 7, colmat);
m_color_cbuf_id = cbuf_id;
uniform_location = m_colorMatrixPositionUniform;
ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::OpenGL, uid.GetUidData());
std::string geo_program = "";
char prefix = 'f';
if (g_ActiveConfig.stereo_mode != StereoMode::Off)
{
geo_program = geometry_program;
prefix = 'v';
}
ProgramShaderCache::CompileShader(shader.shader,
StringFromFormat(vertex_program, prefix, prefix),
code.GetBuffer(), geo_program);
shader.position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position");
}
shader.shader.Bind();
TargetRectangle R = g_renderer->ConvertEFBRectangle(src_rect);
glUniform4f(uniform_location, static_cast<float>(R.left), static_cast<float>(R.top),
glUniform4f(shader.position_uniform, static_cast<float>(R.left), static_cast<float>(R.top),
static_cast<float>(R.right), static_cast<float>(R.bottom));
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);

View File

@ -13,6 +13,7 @@
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/TextureConverterShaderGen.h"
#include "VideoCommon/VideoCommon.h"
class AbstractTexture;
@ -49,7 +50,7 @@ private:
struct TextureDecodingProgramInfo
{
const TextureConversionShader::DecodingShaderInfo* base_info = nullptr;
const TextureConversionShaderTiled::DecodingShaderInfo* base_info = nullptr;
SHADER program;
GLint uniform_dst_size = -1;
GLint uniform_src_size = -1;
@ -67,7 +68,8 @@ private:
bool scale_by_half) override;
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override;
bool scale_by_half, unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity) override;
bool CompileShaders() override;
void DeleteShaders() override;
@ -78,23 +80,23 @@ private:
void CreateTextureDecodingResources();
void DestroyTextureDecodingResources();
SHADER m_colorCopyProgram;
SHADER m_colorMatrixProgram;
SHADER m_depthMatrixProgram;
GLuint m_colorMatrixUniform;
GLuint m_depthMatrixUniform;
GLuint m_colorCopyPositionUniform;
GLuint m_colorMatrixPositionUniform;
GLuint m_depthCopyPositionUniform;
struct EFBCopyShader
{
SHADER shader;
GLuint position_uniform;
};
u32 m_color_cbuf_id;
u32 m_depth_cbuf_id;
std::map<TextureConversionShaderGen::TCShaderUid, EFBCopyShader> m_efb_copy_programs;
SHADER m_colorCopyProgram;
GLuint m_colorCopyPositionUniform;
std::array<PaletteShader, 3> m_palette_shaders;
std::unique_ptr<StreamBuffer> m_palette_stream_buffer;
GLuint m_palette_resolv_texture = 0;
std::map<std::pair<u32, u32>, TextureDecodingProgramInfo> m_texture_decoding_program_info;
std::array<GLuint, TextureConversionShader::BUFFER_FORMAT_COUNT> m_texture_decoding_buffer_views;
std::array<GLuint, TextureConversionShaderTiled::BUFFER_FORMAT_COUNT>
m_texture_decoding_buffer_views;
};
}

View File

@ -54,7 +54,8 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
if (iter != s_encoding_programs.end())
return iter->second;
const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::OpenGL);
const char* shader =
TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::OpenGL);
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader)

View File

@ -26,7 +26,8 @@ public:
private:
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override
bool scale_by_half, unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity) override
{
// TODO: If we ever want to "fake" vram textures, we would need to implement this
}

View File

@ -237,80 +237,12 @@ bool TextureCache::CompileShaders()
}
)";
static const char EFB_COLOR_TO_TEX_SOURCE[] = R"(
SAMPLER_BINDING(0) uniform sampler2DArray samp0;
layout(std140, push_constant) uniform PSBlock
{
vec4 colmat[7];
} C;
layout(location = 0) in vec3 uv0;
layout(location = 1) in vec4 col0;
layout(location = 0) out vec4 ocol0;
void main()
{
float4 texcol = texture(samp0, uv0);
texcol = floor(texcol * C.colmat[5]) * C.colmat[6];
ocol0 = texcol * mat4(C.colmat[0], C.colmat[1], C.colmat[2], C.colmat[3]) + C.colmat[4];
}
)";
static const char EFB_DEPTH_TO_TEX_SOURCE[] = R"(
SAMPLER_BINDING(0) uniform sampler2DArray samp0;
layout(std140, push_constant) uniform PSBlock
{
vec4 colmat[5];
} C;
layout(location = 0) in vec3 uv0;
layout(location = 1) in vec4 col0;
layout(location = 0) out vec4 ocol0;
void main()
{
#if MONO_DEPTH
vec4 texcol = texture(samp0, vec3(uv0.xy, 0.0f));
#else
vec4 texcol = texture(samp0, uv0);
#endif
int depth = int((1.0 - texcol.x) * 16777216.0);
// Convert to Z24 format
ivec4 workspace;
workspace.r = (depth >> 16) & 255;
workspace.g = (depth >> 8) & 255;
workspace.b = depth & 255;
// Convert to Z4 format
workspace.a = (depth >> 16) & 0xF0;
// Normalize components to [0.0..1.0]
texcol = vec4(workspace) / 255.0;
ocol0 = texcol * mat4(C.colmat[0], C.colmat[1], C.colmat[2], C.colmat[3]) + C.colmat[4];
}
)";
std::string header = g_shader_cache->GetUtilityShaderHeader();
std::string source;
std::string source = header + COPY_SHADER_SOURCE;
source = header + COPY_SHADER_SOURCE;
m_copy_shader = Util::CompileAndCreateFragmentShader(source);
source = header + EFB_COLOR_TO_TEX_SOURCE;
m_efb_color_to_tex_shader = Util::CompileAndCreateFragmentShader(source);
if (g_ActiveConfig.bStereoEFBMonoDepth)
source = header + "#define MONO_DEPTH 1\n" + EFB_DEPTH_TO_TEX_SOURCE;
else
source = header + EFB_DEPTH_TO_TEX_SOURCE;
m_efb_depth_to_tex_shader = Util::CompileAndCreateFragmentShader(source);
return m_copy_shader != VK_NULL_HANDLE && m_efb_color_to_tex_shader != VK_NULL_HANDLE &&
m_efb_depth_to_tex_shader != VK_NULL_HANDLE;
return m_copy_shader != VK_NULL_HANDLE;
}
void TextureCache::DeleteShaders()
@ -324,21 +256,18 @@ void TextureCache::DeleteShaders()
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_copy_shader, nullptr);
m_copy_shader = VK_NULL_HANDLE;
}
if (m_efb_color_to_tex_shader != VK_NULL_HANDLE)
for (auto& shader : m_efb_copy_to_tex_shaders)
{
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_efb_color_to_tex_shader, nullptr);
m_efb_color_to_tex_shader = VK_NULL_HANDLE;
}
if (m_efb_depth_to_tex_shader != VK_NULL_HANDLE)
{
vkDestroyShaderModule(g_vulkan_context->GetDevice(), m_efb_depth_to_tex_shader, nullptr);
m_efb_depth_to_tex_shader = VK_NULL_HANDLE;
vkDestroyShaderModule(g_vulkan_context->GetDevice(), shader.second, nullptr);
}
m_efb_copy_to_tex_shaders.clear();
}
void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
unsigned int cbuf_id, const float* colmat)
unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity)
{
VKTexture* texture = static_cast<VKTexture*>(entry->texture.get());
@ -378,13 +307,27 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
texture->GetRawTexIdentifier()->TransitionToLayout(command_buffer,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
UtilityShaderDraw draw(command_buffer,
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_PUSH_CONSTANT),
m_render_pass, g_shader_cache->GetPassthroughVertexShader(),
g_shader_cache->GetPassthroughGeometryShader(),
is_depth_copy ? m_efb_depth_to_tex_shader : m_efb_color_to_tex_shader);
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half);
auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE));
VkShaderModule& shader = it.first->second;
bool created = it.second;
if (created)
{
std::string source = g_shader_cache->GetUtilityShaderHeader();
source +=
TextureConversionShaderGen::GenerateShader(APIType::Vulkan, uid.GetUidData()).GetBuffer();
shader = Util::CompileAndCreateFragmentShader(source);
}
UtilityShaderDraw draw(command_buffer,
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_STANDARD), m_render_pass,
g_shader_cache->GetPassthroughVertexShader(),
g_shader_cache->GetPassthroughGeometryShader(), shader);
draw.SetPushConstants(colmat, (is_depth_copy ? sizeof(float) * 20 : sizeof(float) * 28));
draw.SetPSSampler(0, src_texture->GetView(), src_sampler);
VkRect2D dest_region = {{0, 0}, {texture->GetConfig().width, texture->GetConfig().height}};

View File

@ -4,11 +4,13 @@
#pragma once
#include <map>
#include <memory>
#include "Common/CommonTypes.h"
#include "VideoBackends/Vulkan/StreamBuffer.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConverterShaderGen.h"
namespace Vulkan
{
@ -53,7 +55,8 @@ private:
bool CreateRenderPasses();
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override;
bool scale_by_half, unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity) override;
VkRenderPass m_render_pass = VK_NULL_HANDLE;
@ -62,8 +65,7 @@ private:
std::unique_ptr<TextureConverter> m_texture_converter;
VkShaderModule m_copy_shader = VK_NULL_HANDLE;
VkShaderModule m_efb_color_to_tex_shader = VK_NULL_HANDLE;
VkShaderModule m_efb_depth_to_tex_shader = VK_NULL_HANDLE;
std::map<TextureConversionShaderGen::TCShaderUid, VkShaderModule> m_efb_copy_to_tex_shaders;
};
} // namespace Vulkan

View File

@ -389,7 +389,7 @@ bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat
return iter->second.valid;
TextureDecodingPipeline pipeline;
pipeline.base_info = TextureConversionShader::GetDecodingShaderInfo(format);
pipeline.base_info = TextureConversionShaderTiled::GetDecodingShaderInfo(format);
pipeline.compute_shader = VK_NULL_HANDLE;
pipeline.valid = false;
@ -400,7 +400,7 @@ bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat
}
std::string shader_source =
TextureConversionShader::GenerateDecodingShader(format, palette_format, APIType::Vulkan);
TextureConversionShaderTiled::GenerateDecodingShader(format, palette_format, APIType::Vulkan);
pipeline.compute_shader = Util::CompileAndCreateComputeShader(shader_source);
if (pipeline.compute_shader == VK_NULL_HANDLE)
@ -438,7 +438,7 @@ void TextureConverter::DecodeTexture(VkCommandBuffer command_buffer,
// Copy to GPU-visible buffer, aligned to the data type
auto info = iter->second;
u32 bytes_per_buffer_elem =
TextureConversionShader::GetBytesPerBufferElement(info.base_info->buffer_format);
TextureConversionShaderTiled::GetBytesPerBufferElement(info.base_info->buffer_format);
// Calculate total data size, including palette.
// Only copy palette if it is required.
@ -496,16 +496,16 @@ void TextureConverter::DecodeTexture(VkCommandBuffer command_buffer,
VkBufferView data_view = VK_NULL_HANDLE;
switch (iter->second.base_info->buffer_format)
{
case TextureConversionShader::BUFFER_FORMAT_R8_UINT:
case TextureConversionShaderTiled::BUFFER_FORMAT_R8_UINT:
data_view = m_texel_buffer_view_r8_uint;
break;
case TextureConversionShader::BUFFER_FORMAT_R16_UINT:
case TextureConversionShaderTiled::BUFFER_FORMAT_R16_UINT:
data_view = m_texel_buffer_view_r16_uint;
break;
case TextureConversionShader::BUFFER_FORMAT_R32G32_UINT:
case TextureConversionShaderTiled::BUFFER_FORMAT_R32G32_UINT:
data_view = m_texel_buffer_view_r32g32_uint;
break;
case TextureConversionShader::BUFFER_FORMAT_RGBA8_UINT:
case TextureConversionShaderTiled::BUFFER_FORMAT_RGBA8_UINT:
data_view = m_texel_buffer_view_rgba8_uint;
break;
default:
@ -522,8 +522,8 @@ void TextureConverter::DecodeTexture(VkCommandBuffer command_buffer,
dispatcher.SetTexelBuffer(0, data_view);
if (has_palette)
dispatcher.SetTexelBuffer(1, m_texel_buffer_view_r16_uint);
auto groups = TextureConversionShader::GetDispatchCount(iter->second.base_info, aligned_width,
aligned_height);
auto groups = TextureConversionShaderTiled::GetDispatchCount(iter->second.base_info,
aligned_width, aligned_height);
dispatcher.Dispatch(groups.first, groups.second, 1);
// Copy from temporary texture to final destination.
@ -691,7 +691,8 @@ bool TextureConverter::CompilePaletteConversionShaders()
VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyParams& params)
{
const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::Vulkan);
const char* shader =
TextureConversionShaderTiled::GenerateEncodingShader(params, APIType::Vulkan);
VkShaderModule module = Util::CompileAndCreateFragmentShader(shader);
if (module == VK_NULL_HANDLE)
PanicAlert("Failed to compile texture encoding shader.");

View File

@ -114,7 +114,7 @@ private:
// Texture decoding - GX format in memory->RGBA8
struct TextureDecodingPipeline
{
const TextureConversionShader::DecodingShaderInfo* base_info;
const TextureConversionShaderTiled::DecodingShaderInfo* base_info;
VkShaderModule compute_shader;
bool valid;
};

View File

@ -39,6 +39,7 @@ set(SRCS
TextureCacheBase.cpp
TextureConfig.cpp
TextureConversionShader.cpp
TextureConverterShaderGen.cpp
TextureDecoder_Common.cpp
VertexLoader.cpp
VertexLoaderBase.cpp

View File

@ -1997,7 +1997,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF
entry->may_have_overlapping_textures = false;
entry->is_custom_tex = false;
CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, cbufid, colmat);
CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, cbufid, colmat, dstFormat,
isIntensity);
u64 hash = entry->CalculateHash();
entry->SetHashes(hash, hash);

View File

@ -316,7 +316,8 @@ private:
virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
const EFBRectangle& src_rect, bool scale_by_half,
unsigned int cbuf_id, const float* colmat) = 0;
unsigned int cbuf_id, const float* colmat,
EFBCopyFormat dst_format, bool is_intensity) = 0;
// Removes and unlinks texture from texture cache and returns it to the pool
TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter);

View File

@ -22,7 +22,7 @@
static char text[16384];
static bool IntensityConstantAdded = false;
namespace TextureConversionShader
namespace TextureConversionShaderTiled
{
u16 GetEncodedSampleCount(EFBCopyFormat format)
{

View File

@ -15,7 +15,7 @@ enum class EFBCopyFormat;
enum class TLUTFormat;
struct EFBCopyParams;
namespace TextureConversionShader
namespace TextureConversionShaderTiled
{
u16 GetEncodedSampleCount(EFBCopyFormat format);
@ -58,4 +58,4 @@ std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width,
std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_format,
APIType api_type);
} // namespace TextureConversionShader
} // namespace TextureConversionShaderTiled

View File

@ -0,0 +1,304 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include <array>
#include <cstring>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/TextureConverterShaderGen.h"
#include "VideoCommon/VideoCommon.h"
#include "VideoCommon/VideoConfig.h"
namespace TextureConversionShaderGen
{
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half)
{
TCShaderUid out;
UidData* uid_data = out.GetUidData<UidData>();
memset(uid_data, 0, sizeof(*uid_data));
uid_data->dst_format = dst_format;
uid_data->efb_has_alpha = bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
uid_data->is_depth_copy = is_depth_copy;
uid_data->is_intensity = is_intensity;
uid_data->scale_by_half = scale_by_half;
return out;
}
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
{
ShaderCode out;
std::array<float, 28> colmat = {};
float* const const_add = &colmat[16];
float* const color_mask = &colmat[20];
color_mask[0] = color_mask[1] = color_mask[2] = color_mask[3] = 255.0f;
color_mask[4] = color_mask[5] = color_mask[6] = color_mask[7] = 1.0f / 255.0f;
if (api_type == APIType::OpenGL)
out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"#define samp0 samp9\n"
"#define uv0 f_uv0\n"
"in vec3 uv0;\n"
"out vec4 ocol0;\n");
else if (api_type == APIType::Vulkan)
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
"layout(location = 0) in vec3 uv0;\n"
"layout(location = 1) in vec4 col0;\n"
"layout(location = 0) out vec4 ocol0;");
bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth;
out.Write("void main(){\n"
" vec4 texcol = texture(samp0, %s);\n",
mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0");
if (uid_data->is_depth_copy)
{
switch (uid_data->dst_format)
{
case EFBCopyFormat::R4: // Z4
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
break;
case EFBCopyFormat::R8_0x1: // Z8
case EFBCopyFormat::R8: // Z8H
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
break;
case EFBCopyFormat::RA8: // Z16
colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
break;
case EFBCopyFormat::RG8: // Z16 (reverse order)
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
break;
case EFBCopyFormat::RGBA8: // Z24X8
colmat[0] = colmat[5] = colmat[10] = 1.0f;
break;
case EFBCopyFormat::G8: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
break;
case EFBCopyFormat::B8: // Z8L
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
break;
case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits
// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits
// stored as alpha)
// Used e.g. in Zelda: Skyward Sword
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(uid_data->dst_format));
colmat[2] = colmat[5] = colmat[8] = 1.0f;
break;
}
}
else if (uid_data->is_intensity)
{
const_add[0] = const_add[1] = const_add[2] = 16.0f / 255.0f;
switch (uid_data->dst_format)
{
case EFBCopyFormat::R4: // I4
case EFBCopyFormat::R8_0x1: // I8
case EFBCopyFormat::R8: // I8
case EFBCopyFormat::RA4: // IA4
case EFBCopyFormat::RA8: // IA8
// TODO - verify these coefficients
colmat[0] = 0.257f;
colmat[1] = 0.504f;
colmat[2] = 0.098f;
colmat[4] = 0.257f;
colmat[5] = 0.504f;
colmat[6] = 0.098f;
colmat[8] = 0.257f;
colmat[9] = 0.504f;
colmat[10] = 0.098f;
if (uid_data->dst_format == EFBCopyFormat::R4 ||
uid_data->dst_format == EFBCopyFormat::R8_0x1 ||
uid_data->dst_format == EFBCopyFormat::R8)
{
colmat[12] = 0.257f;
colmat[13] = 0.504f;
colmat[14] = 0.098f;
const_add[3] = 16.0f / 255.0f;
if (uid_data->dst_format == EFBCopyFormat::R4)
{
color_mask[0] = color_mask[1] = color_mask[2] = 255.0f / 16.0f;
color_mask[4] = color_mask[5] = color_mask[6] = 1.0f / 15.0f;
}
}
else // alpha
{
colmat[15] = 1;
if (uid_data->dst_format == EFBCopyFormat::RA4)
{
color_mask[0] = color_mask[1] = color_mask[2] = color_mask[3] = 255.0f / 16.0f;
color_mask[4] = color_mask[5] = color_mask[6] = color_mask[7] = 1.0f / 15.0f;
}
}
break;
default:
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%X",
static_cast<int>(uid_data->dst_format));
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
break;
}
}
else
{
switch (uid_data->dst_format)
{
case EFBCopyFormat::R4: // R4
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
color_mask[0] = 255.0f / 16.0f;
color_mask[4] = 1.0f / 15.0f;
break;
case EFBCopyFormat::R8_0x1: // R8
case EFBCopyFormat::R8: // R8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
break;
case EFBCopyFormat::RA4: // RA4
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
color_mask[0] = color_mask[3] = 255.0f / 16.0f;
color_mask[4] = color_mask[7] = 1.0f / 15.0f;
if (!uid_data->efb_has_alpha)
{
color_mask[3] = 0.0f;
const_add[3] = 1.0f;
}
break;
case EFBCopyFormat::RA8: // RA8
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
if (!uid_data->efb_has_alpha)
{
color_mask[3] = 0.0f;
const_add[3] = 1.0f;
}
break;
case EFBCopyFormat::A8: // A8
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
if (!uid_data->efb_has_alpha)
{
color_mask[3] = 0.0f;
const_add[0] = 1.0f;
const_add[1] = 1.0f;
const_add[2] = 1.0f;
const_add[3] = 1.0f;
}
break;
case EFBCopyFormat::G8: // G8
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
break;
case EFBCopyFormat::B8: // B8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
break;
case EFBCopyFormat::RG8: // RG8
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
break;
case EFBCopyFormat::GB8: // GB8
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
break;
case EFBCopyFormat::RGB565: // RGB565
colmat[0] = colmat[5] = colmat[10] = 1.0f;
color_mask[0] = color_mask[2] = 255.0f / 8.0f;
color_mask[4] = color_mask[6] = 1.0f / 31.0f;
color_mask[1] = 255.0f / 4.0f;
color_mask[5] = 1.0f / 63.0f;
const_add[3] = 1.0f; // set alpha to 1
break;
case EFBCopyFormat::RGB5A3: // RGB5A3
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
color_mask[0] = color_mask[1] = color_mask[2] = 255.0f / 8.0f;
color_mask[4] = color_mask[5] = color_mask[6] = 1.0f / 31.0f;
color_mask[3] = 255.0f / 32.0f;
color_mask[7] = 1.0f / 7.0f;
if (!uid_data->efb_has_alpha)
{
color_mask[3] = 0.0f;
const_add[3] = 1.0f;
}
break;
case EFBCopyFormat::RGBA8: // RGBA8
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
if (!uid_data->efb_has_alpha)
{
color_mask[3] = 0.0f;
const_add[3] = 1.0f;
}
break;
case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
color_mask[3] = 0.0f;
const_add[3] = 1.0f;
break;
default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(uid_data->dst_format));
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
break;
}
}
out.Write(" const vec4 colmat[7] = {\n");
for (size_t i = 0; i < colmat.size() / 4; i++)
{
out.Write(" vec4(%f, %f, %f, %f)%s\n", colmat[i * 4 + 0], colmat[i * 4 + 1],
colmat[i * 4 + 2], colmat[i * 4 + 3], i < 7 ? "," : "");
}
out.Write(" };\n");
if (uid_data->is_depth_copy)
{
if (api_type == APIType::Vulkan)
out.Write("texcol.x = 1.0 - texcol.x;\n");
out.Write(" int depth = int(texcol.x * 16777216.0);\n"
// Convert to Z24 format
" ivec4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n"
// Convert to Z4 format
" workspace.a = (depth >> 16) & 0xF0;\n"
// Normalize components to [0.0..1.0]
" texcol = vec4(workspace) / 255.0;\n");
}
else
{
out.Write(" texcol = floor(texcol * colmat[5]) * colmat[6];\n");
}
out.Write(" ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n"
"}\n");
return out;
}
} // namespace TextureConversionShaderGen

View File

@ -0,0 +1,35 @@
// Copyright 2017 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "Common/CommonTypes.h"
#include "VideoCommon/ShaderGenCommon.h"
#include "VideoCommon/TextureDecoder.h"
enum class APIType;
namespace TextureConversionShaderGen
{
#pragma pack(1)
struct UidData
{
u32 NumValues() const { return sizeof(UidData); }
EFBCopyFormat dst_format;
u32 efb_has_alpha : 1;
u32 is_depth_copy : 1;
u32 is_intensity : 1;
u32 scale_by_half : 1;
};
#pragma pack()
using TCShaderUid = ShaderUid<UidData>;
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data);
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half);
} // namespace TextureConversionShaderGen

View File

@ -76,6 +76,7 @@
<ClCompile Include="TextureCacheBase.cpp" />
<ClCompile Include="TextureConfig.cpp" />
<ClCompile Include="TextureConversionShader.cpp" />
<ClCompile Include="TextureConverterShaderGen.cpp" />
<ClCompile Include="UberShaderVertex.cpp" />
<ClCompile Include="VertexLoader.cpp" />
<ClCompile Include="VertexLoaderBase.cpp" />
@ -139,6 +140,7 @@
<ClInclude Include="TextureCacheBase.h" />
<ClInclude Include="TextureConfig.h" />
<ClInclude Include="TextureConversionShader.h" />
<ClInclude Include="TextureConverterShaderGen.h" />
<ClInclude Include="TextureDecoder.h" />
<ClInclude Include="UberShaderVertex.h" />
<ClInclude Include="VertexLoader.h" />

View File

@ -83,6 +83,9 @@
<ClCompile Include="TextureConversionShader.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
<ClCompile Include="TextureConverterShaderGen.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
<ClCompile Include="VertexShaderGen.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
@ -263,6 +266,9 @@
<ClInclude Include="TextureConversionShader.h">
<Filter>Shader Generators</Filter>
</ClInclude>
<ClInclude Include="TextureConvertionShaderGen.h">
<Filter>Shader Generators</Filter>
</ClInclude>
<ClInclude Include="VertexShaderGen.h">
<Filter>Shader Generators</Filter>
</ClInclude>