Fix OpenGLES 3.0 on Qualcomm's crappy driver, it can't bitshift sometimes.

[fixed lint issues and grammar ~comex]
This commit is contained in:
Dwayne Slater 2015-02-28 15:02:44 -05:00 committed by comex
parent e4b5637c3a
commit ae83a1b821
5 changed files with 171 additions and 20 deletions

View File

@ -49,6 +49,7 @@ namespace DriverDetails
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENTEXTURESIZE, -1.0, 65.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENATTRIBUTELESS, -1.0, 94.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENNEGATEDBOOLEAN,-1.0, -1.0, true},
{OS_ALL, VENDOR_QUALCOMM, DRIVER_QUALCOMM, -1, BUG_BROKENIVECSHIFTS, -1.0, 46.0, true},
{OS_ALL, VENDOR_ARM, DRIVER_ARM, -1, BUG_BROKENBUFFERSTREAM, -1.0, -1.0, true},
{OS_ALL, VENDOR_MESA, DRIVER_NOUVEAU, -1, BUG_BROKENUBO, 900, 916, true},
{OS_ALL, VENDOR_MESA, DRIVER_R600, -1, BUG_BROKENUBO, 900, 913, true},

View File

@ -201,6 +201,38 @@ namespace DriverDetails
// if (cond == false)
BUG_BROKENNEGATEDBOOLEAN,
// Bug: Qualcomm has broken ivec to scalar and ivec to ivec bitshifts
// Affected devices: Adreno
// Started Version: -1
// Ended Version: 46 (TODO: Test more devices, the real end is currently unknown)
// Qualcomm has broken integer vector to integer bitshifts, and integer vector to integer vector bitshifts
// A compilation error is generated when trying to compile the shaders.
//
// For example:
// Broken on Qualcomm:
// ivec4 ab = ivec4(1,1,1,1);
// ab <<= 2;
//
// Working on Qualcomm:
// ivec4 ab = ivec4(1,1,1,1);
// ab.x <<= 2;
// ab.y <<= 2;
// ab.z <<= 2;
// ab.w <<= 2;
//
// Broken on Qualcomm:
// ivec4 ab = ivec4(1,1,1,1);
// ivec4 cd = ivec4(1,2,3,4);
// ab <<= cd;
//
// Working on Qualcomm:
// ivec4 ab = ivec4(1,1,1,1);
// ivec4 cd = ivec4(1,2,3,4);
// ab.x <<= cd.x;
// ab.y <<= cd.y;
// ab.z <<= cd.z;
// ab.w <<= cd.w;
BUG_BROKENIVECSHIFTS,
};
// Initializes our internal vendor, device family, and driver version

View File

@ -5,6 +5,7 @@
#pragma once
#include "VideoCommon/ConstantManager.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/ShaderGenCommon.h"
#include "VideoCommon/XFMemory.h"
@ -250,7 +251,10 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
}
}
object.Write("lacc = clamp(lacc, 0, 255);\n");
object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
object.Write("%s%d = float4(irshift((mat * (lacc + irshift(lacc, 7))), 8)) / 255.0;\n", dest, j);
else
object.Write("%s%d = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0;\n", dest, j);
object.Write("}\n");
}
}

View File

@ -197,6 +197,29 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
"int3 itrunc(float3 x) { return int3(trunc(x)); }\n"
"int4 itrunc(float4 x) { return int4(trunc(x)); }\n\n");
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
// Add functions to do shifts on scalars and ivecs.
// These functions all have the same name to enable them to be used no matter what code is generated.
// For example: tev color op code uses .rgb as a swizzle, but alpha code only uses .a.
out.Write("int ilshift(int a, int b) { return a << b; }\n"
"int irshift(int a, int b) { return a >> b; }\n"
"int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n"
"int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n"
"int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n"
"int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n"
"int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n"
"int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n"
"int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n"
"int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n"
"int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n"
"int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n"
"int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n"
"int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n");
}
if (ApiType == API_OPENGL)
{
@ -477,7 +500,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
if (texcoord < numTexgen)
{
out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2);
out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
out.Write("\ttempcoord = irshift(fixpoint_uv%d, " I_INDTEXSCALE"[%d].%s);\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
else
out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
}
else
out.Write("\ttempcoord = int2(0, 0);\n");
@ -678,31 +705,64 @@ static inline void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, AP
int mtxidx = 2*(bpmem.tevind[n].mid-1);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
out.Write("\tint2 indtevtrans%d = irshift(int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)), 3);\n", n, mtxidx, n, mtxidx+1, n);
// TODO: should use a shader uid branch for this for better performance
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
// TODO: should use a shader uid branch for this for better performance
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else
{
out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n);
// TODO: should use a shader uid branch for this for better performance
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
}
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
{ // s matrix
_assert_(bpmem.tevind[n].mid >= 5);
int mtxidx = 2*(bpmem.tevind[n].mid-5);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
out.Write("\tint2 indtevtrans%d = irshift(int2(fixpoint_uv%d * iindtevcrd%d.xx), 8);\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else
{
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
}
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
{ // t matrix
_assert_(bpmem.tevind[n].mid >= 9);
int mtxidx = 2*(bpmem.tevind[n].mid-9);
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
out.Write("\tint2 indtevtrans%d = irshift(int2(fixpoint_uv%d * iindtevcrd%d.yy), 8);\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
else
{
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n);
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
}
}
else
{
@ -741,7 +801,10 @@ static inline void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, AP
out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);
// Emulate s24 overflows
out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
out.Write("\ttevcoord.xy = irshift(ilshift(tevcoord.xy, 8), 8);\n");
else
out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
}
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
@ -948,12 +1011,37 @@ static inline void WriteTevRegular(T& out, const char* components, int bias, int
// - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255
// - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy
// - a rounding bias is added before dividing by 256
out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
out.Write(" %s ", tevOpTable[op]);
out.Write("(((((tevin_a.%s<<8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)",
components, components, components, components, components,
tevScaleTableLeft[shift], tevLerpBias[2*op+(shift!=3)]);
out.Write(")%s", tevScaleTableRight[shift]);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
// Haxx - cleaner code by not having irshift and ilshift in the emitted code by omitting them if not used.
const char* leftShift = tevScaleTableLeft[shift];
const char* rightShift = tevScaleTableRight[shift];
if (rightShift[0])
out.Write("irshift(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
else
out.Write("((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
out.Write(" %s ", tevOpTable[op]);
if (leftShift[0])
out.Write("irshift((ilshift((ilshift(tevin_a.%s, 8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+irshift(tevin_c.%s, 7))), %s)%s), 8)",
components, components, components, components, components,
leftShift+4, tevLerpBias[2*op+(shift!=3)]);
else
out.Write("irshift(((ilshift(tevin_a.%s, 8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+irshift(tevin_c.%s, 7)))%s), 8)",
components, components, components, components, components, tevLerpBias[2*op+(shift!=3)]);
if (rightShift[0])
out.Write(", %s)", rightShift+4);
}
else
{
out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
out.Write(" %s ", tevOpTable[op]);
out.Write("(((((tevin_a.%s<<8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)",
components, components, components, components, components,
tevScaleTableLeft[shift], tevLerpBias[2*op+(shift!=3)]);
out.Write(")%s", tevScaleTableRight[shift]);
}
}
template<class T>
@ -1116,7 +1204,10 @@ static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data)
}
out.Write("\tint ifog = iround(fog * 256.0);\n");
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n");
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
out.Write("\tprev.rgb = irshift((prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog), 8);\n");
else
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n");
}
template<class T>

View File

@ -32,6 +32,29 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
_assert_(bpmem.genMode.numtexgens == xfmem.numTexGen.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfmem.numChan.numColorChans);
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
{
// Add functions to do shifts on scalars and ivecs.
// This is included in the vertex shader for lighting shader generation.
out.Write("int ilshift(int a, int b) { return a << b; }\n"
"int irshift(int a, int b) { return a >> b; }\n"
"int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n"
"int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n"
"int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n"
"int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n"
"int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n"
"int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n"
"int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n"
"int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n"
"int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n"
"int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n"
"int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n"
"int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n");
}
out.Write("%s", s_lighting_struct);
// uniforms