Merge pull request #6239 from stenzek/d3d-videocommon-efbtotex

D3D: Use VideoCommon EFB-to-Texture Shaders
This commit is contained in:
Markus Wick 2017-12-06 09:28:52 +01:00 committed by GitHub
commit cde02b5b5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 89 additions and 224 deletions

View File

@ -38,9 +38,7 @@ LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
LinearDiskCache<UberShader::PixelShaderUid, u8> g_uber_ps_disk_cache; LinearDiskCache<UberShader::PixelShaderUid, u8> g_uber_ps_disk_cache;
extern std::unique_ptr<VideoCommon::AsyncShaderCompiler> g_async_compiler; extern std::unique_ptr<VideoCommon::AsyncShaderCompiler> g_async_compiler;
ID3D11PixelShader* s_ColorMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr}; ID3D11PixelShader* s_ColorCopyProgram[2] = {nullptr};
ID3D11PixelShader* s_DepthMatrixProgram[2] = {nullptr};
ID3D11PixelShader* s_ClearProgram = nullptr; ID3D11PixelShader* s_ClearProgram = nullptr;
ID3D11PixelShader* s_AnaglyphProgram = nullptr; ID3D11PixelShader* s_AnaglyphProgram = nullptr;
ID3D11PixelShader* s_DepthResolveProgram = nullptr; ID3D11PixelShader* s_DepthResolveProgram = nullptr;
@ -104,106 +102,6 @@ const char color_copy_program_code_msaa[] = {
"ocol0 /= SAMPLES;\n" "ocol0 /= SAMPLES;\n"
"}\n"}; "}\n"};
const char color_matrix_program_code[] = {"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
"in float3 uv0 : TEXCOORD0){\n"
"float4 texcol = Tex0.Sample(samp0,uv0);\n"
"texcol = floor(texcol * cColMatrix[5])*cColMatrix[6];\n"
"ocol0 = "
"float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix["
"1]),dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3]))"
" + cColMatrix[4];\n"
"}\n"};
const char color_matrix_program_code_msaa[] = {
"#define SAMPLES %d\n"
"sampler samp0 : register(s0);\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
"in float4 pos : SV_Position,\n"
"in float3 uv0 : TEXCOORD0){\n"
"int width, height, slices, samples;\n"
"Tex0.GetDimensions(width, height, slices, samples);\n"
"float4 texcol = 0;\n"
"for(int i = 0; i < SAMPLES; ++i)\n"
" texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
"texcol /= SAMPLES;\n"
"texcol = floor(texcol * cColMatrix[5])*cColMatrix[6];\n"
"ocol0 = "
"float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot("
"texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n"};
const char depth_matrix_program[] = {"sampler samp0 : register(s0);\n"
"Texture2DArray Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0){\n"
" float4 texcol = Tex0.Sample(samp0,uv0);\n"
" int depth = int((1.0 - texcol.x) * 16777216.0);\n"
// Convert to Z24 format
" int4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n"
// Convert to Z4 format
" workspace.a = (depth >> 16) & 0xF0;\n"
// Normalize components to [0.0..1.0]
" texcol = float4(workspace) / 255.0;\n"
// Apply color matrix
" ocol0 = "
"float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),"
"dot(texcol,cColMatrix[2]),dot(texcol,cColMatrix[3])) + "
"cColMatrix[4];\n"
"}\n"};
const char depth_matrix_program_msaa[] = {
"#define SAMPLES %d\n"
"sampler samp0 : register(s0);\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
"uniform float4 cColMatrix[7] : register(c0);\n"
"void main(\n"
"out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0){\n"
" int width, height, slices, samples;\n"
" Tex0.GetDimensions(width, height, slices, samples);\n"
" float4 texcol = 0;\n"
" for(int i = 0; i < SAMPLES; ++i)\n"
" texcol += Tex0.Load(int3(uv0.x*(width), uv0.y*(height), uv0.z), i);\n"
" texcol /= SAMPLES;\n"
" int depth = int((1.0 - texcol.x) * 16777216.0);\n"
// Convert to Z24 format
" int4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n"
// Convert to Z4 format
" workspace.a = (depth >> 16) & 0xF0;\n"
// Normalize components to [0.0..1.0]
" texcol = float4(workspace) / 255.0;\n"
// Apply color matrix
" ocol0 = "
"float4(dot(texcol,cColMatrix[0]),dot(texcol,cColMatrix[1]),dot(texcol,cColMatrix[2]),dot("
"texcol,cColMatrix[3])) + cColMatrix[4];\n"
"}\n"};
const char depth_resolve_program[] = { const char depth_resolve_program[] = {
"#define SAMPLES %d\n" "#define SAMPLES %d\n"
"Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n" "Texture2DMSArray<float4, SAMPLES> Tex0 : register(t0);\n"
@ -368,49 +266,6 @@ ID3D11PixelShader* PixelShaderCache::GetColorCopyProgram(bool multisampled)
} }
} }
ID3D11PixelShader* PixelShaderCache::GetColorMatrixProgram(bool multisampled)
{
if (!multisampled || g_ActiveConfig.iMultisamples <= 1)
{
return s_ColorMatrixProgram[0];
}
else if (s_ColorMatrixProgram[1])
{
return s_ColorMatrixProgram[1];
}
else
{
// create MSAA shader for current AA mode
std::string buf =
StringFromFormat(color_matrix_program_code_msaa, g_ActiveConfig.iMultisamples);
s_ColorMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf);
CHECK(s_ColorMatrixProgram[1] != nullptr, "Create color matrix MSAA pixel shader");
D3D::SetDebugObjectName(s_ColorMatrixProgram[1], "color matrix MSAA pixel shader");
return s_ColorMatrixProgram[1];
}
}
ID3D11PixelShader* PixelShaderCache::GetDepthMatrixProgram(bool multisampled)
{
if (!multisampled || g_ActiveConfig.iMultisamples <= 1)
{
return s_DepthMatrixProgram[0];
}
else if (s_DepthMatrixProgram[1])
{
return s_DepthMatrixProgram[1];
}
else
{
// create MSAA shader for current AA mode
std::string buf = StringFromFormat(depth_matrix_program_msaa, g_ActiveConfig.iMultisamples);
s_DepthMatrixProgram[1] = D3D::CompileAndCreatePixelShader(buf);
CHECK(s_DepthMatrixProgram[1] != nullptr, "Create depth matrix MSAA pixel shader");
D3D::SetDebugObjectName(s_DepthMatrixProgram[1], "depth matrix MSAA pixel shader");
return s_DepthMatrixProgram[1];
}
}
ID3D11PixelShader* PixelShaderCache::GetClearProgram() ID3D11PixelShader* PixelShaderCache::GetClearProgram()
{ {
return s_ClearProgram; return s_ClearProgram;
@ -490,16 +345,6 @@ void PixelShaderCache::Init()
CHECK(s_ColorCopyProgram[0] != nullptr, "Create color copy pixel shader"); CHECK(s_ColorCopyProgram[0] != nullptr, "Create color copy pixel shader");
D3D::SetDebugObjectName(s_ColorCopyProgram[0], "color copy pixel shader"); D3D::SetDebugObjectName(s_ColorCopyProgram[0], "color copy pixel shader");
// used for color conversion
s_ColorMatrixProgram[0] = D3D::CompileAndCreatePixelShader(color_matrix_program_code);
CHECK(s_ColorMatrixProgram[0] != nullptr, "Create color matrix pixel shader");
D3D::SetDebugObjectName(s_ColorMatrixProgram[0], "color matrix pixel shader");
// used for depth copy
s_DepthMatrixProgram[0] = D3D::CompileAndCreatePixelShader(depth_matrix_program);
CHECK(s_DepthMatrixProgram[0] != nullptr, "Create depth matrix pixel shader");
D3D::SetDebugObjectName(s_DepthMatrixProgram[0], "depth matrix pixel shader");
Clear(); Clear();
SETSTAT(stats.numPixelShadersCreated, 0); SETSTAT(stats.numPixelShadersCreated, 0);
@ -557,8 +402,6 @@ void PixelShaderCache::Clear()
void PixelShaderCache::InvalidateMSAAShaders() void PixelShaderCache::InvalidateMSAAShaders()
{ {
SAFE_RELEASE(s_ColorCopyProgram[1]); SAFE_RELEASE(s_ColorCopyProgram[1]);
SAFE_RELEASE(s_ColorMatrixProgram[1]);
SAFE_RELEASE(s_DepthMatrixProgram[1]);
SAFE_RELEASE(s_rgb8_to_rgba6[1]); SAFE_RELEASE(s_rgb8_to_rgba6[1]);
SAFE_RELEASE(s_rgba6_to_rgb8[1]); SAFE_RELEASE(s_rgba6_to_rgb8[1]);
SAFE_RELEASE(s_DepthResolveProgram); SAFE_RELEASE(s_DepthResolveProgram);
@ -574,8 +417,6 @@ void PixelShaderCache::Shutdown()
for (int i = 0; i < 2; ++i) for (int i = 0; i < 2; ++i)
{ {
SAFE_RELEASE(s_ColorCopyProgram[i]); SAFE_RELEASE(s_ColorCopyProgram[i]);
SAFE_RELEASE(s_ColorMatrixProgram[i]);
SAFE_RELEASE(s_DepthMatrixProgram[i]);
SAFE_RELEASE(s_rgba6_to_rgb8[i]); SAFE_RELEASE(s_rgba6_to_rgb8[i]);
SAFE_RELEASE(s_rgb8_to_rgba6[i]); SAFE_RELEASE(s_rgb8_to_rgba6[i]);
} }

View File

@ -32,9 +32,7 @@ public:
static ID3D11Buffer* GetConstantBuffer(); static ID3D11Buffer* GetConstantBuffer();
static ID3D11PixelShader* GetColorMatrixProgram(bool multisampled);
static ID3D11PixelShader* GetColorCopyProgram(bool multisampled); static ID3D11PixelShader* GetColorCopyProgram(bool multisampled);
static ID3D11PixelShader* GetDepthMatrixProgram(bool multisampled);
static ID3D11PixelShader* GetClearProgram(); static ID3D11PixelShader* GetClearProgram();
static ID3D11PixelShader* GetAnaglyphProgram(); static ID3D11PixelShader* GetAnaglyphProgram();
static ID3D11PixelShader* GetDepthResolveProgram(); static ID3D11PixelShader* GetDepthResolveProgram();

View File

@ -29,8 +29,6 @@
namespace DX11 namespace DX11
{ {
static const size_t MAX_COPY_BUFFERS = 32;
static ID3D11Buffer* s_efbcopycbuf[MAX_COPY_BUFFERS] = {0};
static std::unique_ptr<PSTextureEncoder> g_encoder; static std::unique_ptr<PSTextureEncoder> g_encoder;
void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
@ -207,17 +205,16 @@ TextureCache::TextureCache()
TextureCache::~TextureCache() TextureCache::~TextureCache()
{ {
for (unsigned int k = 0; k < MAX_COPY_BUFFERS; ++k)
SAFE_RELEASE(s_efbcopycbuf[k]);
g_encoder->Shutdown(); g_encoder->Shutdown();
g_encoder.reset(); g_encoder.reset();
SAFE_RELEASE(palette_buf); SAFE_RELEASE(palette_buf);
SAFE_RELEASE(palette_buf_srv); SAFE_RELEASE(palette_buf_srv);
SAFE_RELEASE(palette_uniform); SAFE_RELEASE(palette_uniform);
for (ID3D11PixelShader*& shader : palette_pixel_shader) for (auto*& shader : palette_pixel_shader)
SAFE_RELEASE(shader); SAFE_RELEASE(shader);
for (auto& iter : m_efb_to_tex_pixel_shaders)
SAFE_RELEASE(iter.second);
} }
void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
@ -227,19 +224,24 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
{ {
auto* destination_texture = static_cast<DXTexture*>(entry->texture.get()); auto* destination_texture = static_cast<DXTexture*>(entry->texture.get());
// When copying at half size, in multisampled mode, resolve the color/depth buffer first. bool multisampled = g_ActiveConfig.iMultisamples > 1;
// This is because multisampled texture reads go through Load, not Sample, and the linear ID3D11ShaderResourceView* efb_tex_srv;
// filter is ignored. if (multisampled)
bool multisampled = (g_ActiveConfig.iMultisamples > 1);
ID3D11ShaderResourceView* efbTexSRV = is_depth_copy ?
FramebufferManager::GetEFBDepthTexture()->GetSRV() :
FramebufferManager::GetEFBColorTexture()->GetSRV();
if (multisampled && scale_by_half)
{ {
multisampled = false; efb_tex_srv = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() :
efbTexSRV = is_depth_copy ? FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : FramebufferManager::GetResolvedEFBColorTexture()->GetSRV();
FramebufferManager::GetResolvedEFBColorTexture()->GetSRV();
} }
else
{
efb_tex_srv = is_depth_copy ? FramebufferManager::GetEFBDepthTexture()->GetSRV() :
FramebufferManager::GetEFBColorTexture()->GetSRV();
}
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half);
ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid);
if (!pixel_shader)
return;
g_renderer->ResetAPIState(); g_renderer->ResetAPIState();
@ -249,20 +251,6 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
static_cast<float>(destination_texture->GetConfig().height)); static_cast<float>(destination_texture->GetConfig().height));
D3D::context->RSSetViewports(1, &vp); D3D::context->RSSetViewports(1, &vp);
// set transformation
if (nullptr == s_efbcopycbuf[cbuf_id])
{
const D3D11_BUFFER_DESC cbdesc =
CD3D11_BUFFER_DESC(28 * sizeof(float), D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT);
D3D11_SUBRESOURCE_DATA data;
data.pSysMem = colmat;
HRESULT hr = D3D::device->CreateBuffer(&cbdesc, &data, &s_efbcopycbuf[cbuf_id]);
CHECK(SUCCEEDED(hr), "Create efb copy constant buffer %d", cbuf_id);
D3D::SetDebugObjectName(s_efbcopycbuf[cbuf_id],
"a constant buffer used in TextureCache::CopyRenderTargetToTexture");
}
D3D::stateman->SetPixelConstants(s_efbcopycbuf[cbuf_id]);
const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(src_rect); const TargetRectangle targetSource = g_renderer->ConvertEFBRectangle(src_rect);
// TODO: try targetSource.asRECT(); // TODO: try targetSource.asRECT();
const D3D11_RECT sourcerect = const D3D11_RECT sourcerect =
@ -284,13 +272,24 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
// Create texture copy // Create texture copy
D3D::drawShadedTexQuad( D3D::drawShadedTexQuad(
efbTexSRV, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(), efb_tex_srv, &sourcerect, g_renderer->GetTargetWidth(), g_renderer->GetTargetHeight(),
is_depth_copy ? PixelShaderCache::GetDepthMatrixProgram(multisampled) : pixel_shader, VertexShaderCache::GetSimpleVertexShader(),
PixelShaderCache::GetColorMatrixProgram(multisampled), VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader());
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(),
GeometryShaderCache::GetCopyGeometryShader());
FramebufferManager::BindEFBRenderTarget(); FramebufferManager::BindEFBRenderTarget();
g_renderer->RestoreAPIState(); g_renderer->RestoreAPIState();
} }
ID3D11PixelShader*
TextureCache::GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid)
{
auto iter = m_efb_to_tex_pixel_shaders.find(uid);
if (iter != m_efb_to_tex_pixel_shaders.end())
return iter->second;
ShaderCode code = TextureConversionShaderGen::GenerateShader(APIType::D3D, uid.GetUidData());
ID3D11PixelShader* shader = D3D::CompileAndCreatePixelShader(code.GetBuffer());
m_efb_to_tex_pixel_shaders.emplace(uid, shader);
return shader;
}
} }

View File

@ -4,8 +4,11 @@
#pragma once #pragma once
#include <map>
#include "VideoBackends/D3D/D3DTexture.h" #include "VideoBackends/D3D/D3DTexture.h"
#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConverterShaderGen.h"
class AbstractTexture; class AbstractTexture;
struct TextureConfig; struct TextureConfig;
@ -39,9 +42,13 @@ private:
bool CompileShaders() override { return true; } bool CompileShaders() override { return true; }
void DeleteShaders() override {} void DeleteShaders() override {}
ID3D11PixelShader* GetEFBToTexPixelShader(const TextureConversionShaderGen::TCShaderUid& uid);
ID3D11Buffer* palette_buf; ID3D11Buffer* palette_buf;
ID3D11ShaderResourceView* palette_buf_srv; ID3D11ShaderResourceView* palette_buf_srv;
ID3D11Buffer* palette_uniform; ID3D11Buffer* palette_uniform;
ID3D11PixelShader* palette_pixel_shader[3]; ID3D11PixelShader* palette_pixel_shader[3];
std::map<TextureConversionShaderGen::TCShaderUid, ID3D11PixelShader*> m_efb_to_tex_pixel_shaders;
}; };
} }

View File

@ -72,6 +72,12 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
WRITE(p, "uniform float y_scale;\n"); WRITE(p, "uniform float y_scale;\n");
} }
// D3D does not have roundEven(), only round(), which is specified "to the nearest integer".
// This differs from the roundEven() behavior, but to get consistency across drivers in OpenGL
// we need to use roundEven().
if (ApiType == APIType::D3D)
WRITE(p, "#define roundEven(x) round(x)\n");
// Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel. // Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel.
WRITE(p, "float4 RGBA8ToRGB8(float4 src)\n"); WRITE(p, "float4 RGBA8ToRGB8(float4 src)\n");
WRITE(p, "{\n"); WRITE(p, "{\n");

View File

@ -32,35 +32,49 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
{ {
ShaderCode out; const bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth;
ShaderCode out;
if (api_type == APIType::OpenGL) if (api_type == APIType::OpenGL)
{
out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
"#define samp0 samp9\n" "#define samp0 samp9\n"
"#define uv0 f_uv0\n" "#define uv0 f_uv0\n"
"in vec3 uv0;\n" "in vec3 uv0;\n"
"out vec4 ocol0;\n"); "out vec4 ocol0;\n"
"void main(){\n"
" vec4 texcol = texture(samp0, %s);\n",
mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0");
}
else if (api_type == APIType::Vulkan) else if (api_type == APIType::Vulkan)
{
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n" out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"
"layout(location = 0) in vec3 uv0;\n" "layout(location = 0) in vec3 uv0;\n"
"layout(location = 1) in vec4 col0;\n" "layout(location = 1) in vec4 col0;\n"
"layout(location = 0) out vec4 ocol0;"); "layout(location = 0) out vec4 ocol0;"
"void main(){\n"
bool mono_depth = uid_data->is_depth_copy && g_ActiveConfig.bStereoEFBMonoDepth; " vec4 texcol = texture(samp0, %s);\n",
out.Write("void main(){\n" mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0");
" vec4 texcol = texture(samp0, %s);\n", }
mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0"); else if (api_type == APIType::D3D)
{
out.Write("Texture2DArray tex0 : register(t0);\n"
"SamplerState samp0 : register(s0);\n"
"void main(out float4 ocol0 : SV_Target,\n"
" in float4 pos : SV_Position,\n"
" in float3 uv0 : TEXCOORD0) {\n"
" float4 texcol = tex0.Sample(samp0, uv0);\n");
}
if (uid_data->is_depth_copy) if (uid_data->is_depth_copy)
{ {
if (api_type == APIType::Vulkan) if (api_type == APIType::D3D || api_type == APIType::Vulkan)
out.Write("texcol.x = 1.0 - texcol.x;\n"); out.Write("texcol.x = 1.0 - texcol.x;\n");
out.Write(" int depth = int(texcol.x * 16777216.0);\n" out.Write(" int depth = int(texcol.x * 16777216.0);\n"
// Convert to Z24 format // Convert to Z24 format
" ivec4 workspace;\n" " int4 workspace;\n"
" workspace.r = (depth >> 16) & 255;\n" " workspace.r = (depth >> 16) & 255;\n"
" workspace.g = (depth >> 8) & 255;\n" " workspace.g = (depth >> 8) & 255;\n"
" workspace.b = depth & 255;\n" " workspace.b = depth & 255;\n"
@ -69,7 +83,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
" workspace.a = (depth >> 16) & 0xF0;\n" " workspace.a = (depth >> 16) & 0xF0;\n"
// Normalize components to [0.0..1.0] // Normalize components to [0.0..1.0]
" texcol = vec4(workspace) / 255.0;\n"); " texcol = float4(workspace) / 255.0;\n");
switch (uid_data->dst_format) switch (uid_data->dst_format)
{ {
case EFBCopyFormat::R4: // Z4 case EFBCopyFormat::R4: // Z4
@ -90,7 +104,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
break; break;
case EFBCopyFormat::RGBA8: // Z24X8 case EFBCopyFormat::RGBA8: // Z24X8
out.Write(" ocol0 = vec4(texcol.rgb, 0.0);\n"); out.Write(" ocol0 = float4(texcol.rgb, 0.0);\n");
break; break;
case EFBCopyFormat::G8: // Z8M case EFBCopyFormat::G8: // Z8M
@ -110,7 +124,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
default: default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(uid_data->dst_format)); ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(uid_data->dst_format));
out.Write(" ocol0 = vec4(texcol.bgr, 0.0);\n"); out.Write(" ocol0 = float4(texcol.bgr, 0.0);\n");
break; break;
} }
} }
@ -165,16 +179,16 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
color_mask[4] = color_mask[5] = color_mask[6] = color_mask[7] = 1.0f / 15.0f; color_mask[4] = color_mask[5] = color_mask[6] = color_mask[7] = 1.0f / 15.0f;
} }
} }
out.Write(" const vec4 colmat[7] = {\n"); out.Write(" const float4 colmat[7] = {\n");
for (size_t i = 0; i < colmat.size() / 4; i++) for (size_t i = 0; i < colmat.size() / 4; i++)
{ {
out.Write(" vec4(%f, %f, %f, %f)%s\n", colmat[i * 4 + 0], colmat[i * 4 + 1], out.Write(" float4(%f, %f, %f, %f)%s\n", colmat[i * 4 + 0], colmat[i * 4 + 1],
colmat[i * 4 + 2], colmat[i * 4 + 3], i < 7 ? "," : ""); colmat[i * 4 + 2], colmat[i * 4 + 3], i < 7 ? "," : "");
} }
out.Write( out.Write(" };\n"
" };\n" " texcol = floor(texcol * colmat[5]) * colmat[6];\n"
" texcol = floor(texcol * colmat[5]) * colmat[6];\n" " ocol0 = float4(dot(texcol, colmat[0]), dot(texcol, colmat[1]), dot(texcol, "
" ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];\n"); "colmat[2]), dot(texcol, colmat[3])) + colmat[4];\n");
break; break;
default: default:
@ -193,7 +207,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
{ {
case EFBCopyFormat::R4: // R4 case EFBCopyFormat::R4: // R4
out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n" out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n"
" ocol0 = vec4(red, red, red, red);\n"); " ocol0 = float4(red, red, red, red);\n");
break; break;
case EFBCopyFormat::R8_0x1: // R8 case EFBCopyFormat::R8_0x1: // R8
@ -202,7 +216,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
break; break;
case EFBCopyFormat::RA4: // RA4 case EFBCopyFormat::RA4: // RA4
out.Write(" vec2 red_alpha = vec2(ivec2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n" out.Write(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n"
" ocol0 = red_alpha.rrrg;\n"); " ocol0 = red_alpha.rrrg;\n");
break; break;
@ -231,17 +245,17 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
break; break;
case EFBCopyFormat::RGB565: // RGB565 case EFBCopyFormat::RGB565: // RGB565
out.Write(" vec2 red_blue = vec2(ivec2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n" out.Write(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
" float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n" " float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n"
" ocol0 = vec4(red_blue.r, green, red_blue.g, 1.0);\n"); " ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
break; break;
case EFBCopyFormat::RGB5A3: // RGB5A3 case EFBCopyFormat::RGB5A3: // RGB5A3
// TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection // TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection
// will need to be implemented once we move away from floats. // will need to be implemented once we move away from floats.
out.Write(" vec3 color = vec3(ivec3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n" out.Write(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
" float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n" " float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n"
" ocol0 = vec4(color, alpha);\n"); " ocol0 = float4(color, alpha);\n");
break; break;
case EFBCopyFormat::RGBA8: // RGBA8 case EFBCopyFormat::RGBA8: // RGBA8
@ -249,7 +263,7 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
break; break;
case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy
out.Write(" ocol0 = vec4(texcol.rgb, 1.0);\n"); out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n");
break; break;
default: default: