From ae83a1b8216bc863c7a40eec1c5a845d7258546b Mon Sep 17 00:00:00 2001 From: Dwayne Slater Date: Sat, 28 Feb 2015 15:02:44 -0500 Subject: [PATCH] Fix OpenGLES 3.0 on Qualcomm's crappy driver, it can't bitshift sometimes. [fixed lint issues and grammar ~comex] --- Source/Core/VideoCommon/DriverDetails.cpp | 1 + Source/Core/VideoCommon/DriverDetails.h | 32 +++++ Source/Core/VideoCommon/LightingShaderGen.h | 6 +- Source/Core/VideoCommon/PixelShaderGen.cpp | 129 +++++++++++++++++--- Source/Core/VideoCommon/VertexShaderGen.cpp | 23 ++++ 5 files changed, 171 insertions(+), 20 deletions(-) diff --git a/Source/Core/VideoCommon/DriverDetails.cpp b/Source/Core/VideoCommon/DriverDetails.cpp index dbcd45133f..0fed3489a2 100644 --- a/Source/Core/VideoCommon/DriverDetails.cpp +++ b/Source/Core/VideoCommon/DriverDetails.cpp @@ -49,6 +49,7 @@ namespace DriverDetails {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENTEXTURESIZE, -1.0, 65.0, true}, {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENATTRIBUTELESS, -1.0, 94.0, true}, {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENNEGATEDBOOLEAN,-1.0, -1.0, true}, + {OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENIVECSHIFTS, -1.0, 46.0, true}, {OS_ALL, VENDOR_ARM, DRIVER_ARM, -1, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true}, {OS_ALL, VENDOR_MESA, DRIVER_NOUVEAU, -1, BUG_BROKENUBO, 900, 916, true}, {OS_ALL, VENDOR_MESA, DRIVER_R600, -1, BUG_BROKENUBO, 900, 913, true}, diff --git a/Source/Core/VideoCommon/DriverDetails.h b/Source/Core/VideoCommon/DriverDetails.h index 3a287f6a09..ec6df9efe2 100644 --- a/Source/Core/VideoCommon/DriverDetails.h +++ b/Source/Core/VideoCommon/DriverDetails.h @@ -201,6 +201,38 @@ namespace DriverDetails // if (cond == false) BUG_BROKENNEGATEDBOOLEAN, + // Bug: Qualcomm has broken ivec to scalar and ivec to ivec bitshifts + // Affected devices: Adreno + // Started Version: -1 + // Ended Version: 46 (TODO: Test more devices, the real end is currently unknown) + // Qualcomm has broken integer vector to integer bitshifts, and integer vector to integer vector bitshifts + // A compilation error is generated when trying to compile the shaders. + // + // For example: + // Broken on Qualcomm: + // ivec4 ab = ivec4(1,1,1,1); + // ab <<= 2; + // + // Working on Qualcomm: + // ivec4 ab = ivec4(1,1,1,1); + // ab.x <<= 2; + // ab.y <<= 2; + // ab.z <<= 2; + // ab.w <<= 2; + // + // Broken on Qualcomm: + // ivec4 ab = ivec4(1,1,1,1); + // ivec4 cd = ivec4(1,2,3,4); + // ab <<= cd; + // + // Working on Qualcomm: + // ivec4 ab = ivec4(1,1,1,1); + // ivec4 cd = ivec4(1,2,3,4); + // ab.x <<= cd.x; + // ab.y <<= cd.y; + // ab.z <<= cd.z; + // ab.w <<= cd.w; + BUG_BROKENIVECSHIFTS, }; // Initializes our internal vendor, device family, and driver version diff --git a/Source/Core/VideoCommon/LightingShaderGen.h b/Source/Core/VideoCommon/LightingShaderGen.h index 60979aa97b..b21fd037f6 100644 --- a/Source/Core/VideoCommon/LightingShaderGen.h +++ b/Source/Core/VideoCommon/LightingShaderGen.h @@ -5,6 +5,7 @@ #pragma once #include "VideoCommon/ConstantManager.h" +#include "VideoCommon/DriverDetails.h" #include "VideoCommon/NativeVertexFormat.h" #include "VideoCommon/ShaderGenCommon.h" #include "VideoCommon/XFMemory.h" @@ -250,7 +251,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com } } object.Write("lacc = clamp(lacc, 0, 255);\n"); - object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + object.Write("%s%d = float4(irshift((mat * (lacc + irshift(lacc, 7))), 8)) / 255.0;\n", dest, j); + else + object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j); object.Write("}\n"); } } diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index c962379f54..4873afc567 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -197,6 +197,29 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T "int3 itrunc(float3 x) { return int3(trunc(x)); }\n" "int4 itrunc(float4 x) { return int4(trunc(x)); }\n\n"); + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + { + // Add functions to do shifts on scalars and ivecs. + // These functions all have the same name to enable them to be used no matter what code is generated. + // For example: tev color op code uses .rgb as a swizzle, but alpha code only uses .a. + out.Write("int ilshift(int a, int b) { return a << b; }\n" + "int irshift(int a, int b) { return a >> b; }\n" + + "int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n" + "int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n" + "int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n" + "int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n" + + "int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n" + "int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n" + "int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n" + "int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n" + + "int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n" + "int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n" + "int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n" + "int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n"); + } if (ApiType == API_OPENGL) { @@ -477,7 +500,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T if (texcoord < numTexgen) { out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2); - out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); + + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + out.Write("\ttempcoord = irshift(fixpoint_uv%d, " I_INDTEXSCALE"[%d].%s);\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); + else + out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy"); } else out.Write("\ttempcoord = int2(0, 0);\n"); @@ -678,31 +705,64 @@ static inline void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, AP int mtxidx = 2*(bpmem.tevind[n].mid-1); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + { + out.Write("\tint2 indtevtrans%d = irshift(int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)), 3);\n", n, mtxidx, n, mtxidx+1, n); - // TODO: should use a shader uid branch for this for better performance - out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); - out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + // TODO: should use a shader uid branch for this for better performance + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + } + else + { + out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); + + // TODO: should use a shader uid branch for this for better performance + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + } } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) { // s matrix _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); - out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); - out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + { + out.Write("\tint2 indtevtrans%d = irshift(int2(fixpoint_uv%d * iindtevcrd%d.xx), 8);\n", n, texcoord, n); + + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + } + else + { + out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); + + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + } } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); - out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); - out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + { + out.Write("\tint2 indtevtrans%d = irshift(int2(fixpoint_uv%d * iindtevcrd%d.yy), 8);\n", n, texcoord, n); + + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + } + else + { + out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); + + out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); + } } else { @@ -741,7 +801,10 @@ static inline void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, AP out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans%d;\n", n); // Emulate s24 overflows - out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + out.Write("\ttevcoord.xy = irshift(ilshift(tevcoord.xy, 8), 8);\n"); + else + out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n"); } TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC; @@ -948,12 +1011,37 @@ static inline void WriteTevRegular(T& out, const char* components, int bias, int // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255 // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy // - a rounding bias is added before dividing by 256 - out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]); - out.Write(" %s ", tevOpTable[op]); - out.Write("(((((tevin_a.%s<<8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)", - components, components, components, components, components, - tevScaleTableLeft[shift], tevLerpBias[2*op+(shift!=3)]); - out.Write(")%s", tevScaleTableRight[shift]); + + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + { + // Haxx - cleaner code by not having irshift and ilshift in the emitted code by omitting them if not used. + const char* leftShift = tevScaleTableLeft[shift]; + const char* rightShift = tevScaleTableRight[shift]; + + if (rightShift[0]) + out.Write("irshift(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]); + else + out.Write("((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]); + out.Write(" %s ", tevOpTable[op]); + if (leftShift[0]) + out.Write("irshift((ilshift((ilshift(tevin_a.%s, 8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+irshift(tevin_c.%s, 7))), %s)%s), 8)", + components, components, components, components, components, + leftShift+4, tevLerpBias[2*op+(shift!=3)]); + else + out.Write("irshift(((ilshift(tevin_a.%s, 8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+irshift(tevin_c.%s, 7)))%s), 8)", + components, components, components, components, components, tevLerpBias[2*op+(shift!=3)]); + if (rightShift[0]) + out.Write(", %s)", rightShift+4); + } + else + { + out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]); + out.Write(" %s ", tevOpTable[op]); + out.Write("(((((tevin_a.%s<<8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)", + components, components, components, components, components, + tevScaleTableLeft[shift], tevLerpBias[2*op+(shift!=3)]); + out.Write(")%s", tevScaleTableRight[shift]); + } } template @@ -1116,7 +1204,10 @@ static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data) } out.Write("\tint ifog = iround(fog * 256.0);\n"); - out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n"); + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + out.Write("\tprev.rgb = irshift((prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog), 8);\n"); + else + out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n"); } template diff --git a/Source/Core/VideoCommon/VertexShaderGen.cpp b/Source/Core/VideoCommon/VertexShaderGen.cpp index cb5c23826e..fea6ee3916 100644 --- a/Source/Core/VideoCommon/VertexShaderGen.cpp +++ b/Source/Core/VideoCommon/VertexShaderGen.cpp @@ -32,6 +32,29 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ _assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens); _assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans); + if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS)) + { + // Add functions to do shifts on scalars and ivecs. + // This is included in the vertex shader for lighting shader generation. + out.Write("int ilshift(int a, int b) { return a << b; }\n" + "int irshift(int a, int b) { return a >> b; }\n" + + "int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n" + "int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n" + "int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n" + "int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n" + + "int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n" + "int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n" + "int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n" + "int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n" + + "int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n" + "int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n" + "int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n" + "int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n"); + } + out.Write("%s", s_lighting_struct); // uniforms