|
|
|
@ -197,6 +197,29 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|
|
|
|
"int3 itrunc(float3 x) { return int3(trunc(x)); }\n"
|
|
|
|
|
"int4 itrunc(float4 x) { return int4(trunc(x)); }\n\n");
|
|
|
|
|
|
|
|
|
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
|
|
|
|
|
{
|
|
|
|
|
// Add functions to do shifts on scalars and ivecs.
|
|
|
|
|
// These functions all have the same name to enable them to be used no matter what code is generated.
|
|
|
|
|
// For example: tev color op code uses .rgb as a swizzle, but alpha code only uses .a.
|
|
|
|
|
out.Write("int ilshift(int a, int b) { return a << b; }\n"
|
|
|
|
|
"int irshift(int a, int b) { return a >> b; }\n"
|
|
|
|
|
|
|
|
|
|
"int2 ilshift(int2 a, int2 b) { return int2(a.x << b.x, a.y << b.y); }\n"
|
|
|
|
|
"int2 ilshift(int2 a, int b) { return int2(a.x << b, a.y << b); }\n"
|
|
|
|
|
"int2 irshift(int2 a, int2 b) { return int2(a.x >> b.x, a.y >> b.y); }\n"
|
|
|
|
|
"int2 irshift(int2 a, int b) { return int2(a.x >> b, a.y >> b); }\n"
|
|
|
|
|
|
|
|
|
|
"int3 ilshift(int3 a, int3 b) { return int3(a.x << b.x, a.y << b.y, a.z << b.z); }\n"
|
|
|
|
|
"int3 ilshift(int3 a, int b) { return int3(a.x << b, a.y << b, a.z << b); }\n"
|
|
|
|
|
"int3 irshift(int3 a, int3 b) { return int3(a.x >> b.x, a.y >> b.y, a.z >> b.z); }\n"
|
|
|
|
|
"int3 irshift(int3 a, int b) { return int3(a.x >> b, a.y >> b, a.z >> b); }\n"
|
|
|
|
|
|
|
|
|
|
"int4 ilshift(int4 a, int4 b) { return int4(a.x << b.x, a.y << b.y, a.z << b.z, a.w << b.w); }\n"
|
|
|
|
|
"int4 ilshift(int4 a, int b) { return int4(a.x << b, a.y << b, a.z << b, a.w << b); }\n"
|
|
|
|
|
"int4 irshift(int4 a, int4 b) { return int4(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w); }\n"
|
|
|
|
|
"int4 irshift(int4 a, int b) { return int4(a.x >> b, a.y >> b, a.z >> b, a.w >> b); }\n\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ApiType == API_OPENGL)
|
|
|
|
|
{
|
|
|
|
@ -477,7 +500,11 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|
|
|
|
if (texcoord < numTexgen)
|
|
|
|
|
{
|
|
|
|
|
out.SetConstantsUsed(C_INDTEXSCALE+i/2,C_INDTEXSCALE+i/2);
|
|
|
|
|
out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
|
|
|
|
|
|
|
|
|
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
|
|
|
|
|
out.Write("\ttempcoord = irshift(fixpoint_uv%d, " I_INDTEXSCALE"[%d].%s);\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
|
|
|
|
|
else
|
|
|
|
|
out.Write("\ttempcoord = fixpoint_uv%d >> " I_INDTEXSCALE"[%d].%s;\n", texcoord, i / 2, (i & 1) ? "zw" : "xy");
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
out.Write("\ttempcoord = int2(0, 0);\n");
|
|
|
|
@ -678,31 +705,64 @@ static inline void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, AP
|
|
|
|
|
int mtxidx = 2*(bpmem.tevind[n].mid-1);
|
|
|
|
|
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
|
|
|
|
|
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n);
|
|
|
|
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
|
|
|
|
|
{
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = irshift(int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)), 3);\n", n, mtxidx, n, mtxidx+1, n);
|
|
|
|
|
|
|
|
|
|
// TODO: should use a shader uid branch for this for better performance
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
// TODO: should use a shader uid branch for this for better performance
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n);
|
|
|
|
|
|
|
|
|
|
// TODO: should use a shader uid branch for this for better performance
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
|
|
|
|
|
{ // s matrix
|
|
|
|
|
_assert_(bpmem.tevind[n].mid >= 5);
|
|
|
|
|
int mtxidx = 2*(bpmem.tevind[n].mid-5);
|
|
|
|
|
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n);
|
|
|
|
|
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
|
|
|
|
|
{
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = irshift(int2(fixpoint_uv%d * iindtevcrd%d.xx), 8);\n", n, texcoord, n);
|
|
|
|
|
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n);
|
|
|
|
|
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
|
|
|
|
|
{ // t matrix
|
|
|
|
|
_assert_(bpmem.tevind[n].mid >= 9);
|
|
|
|
|
int mtxidx = 2*(bpmem.tevind[n].mid-9);
|
|
|
|
|
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n);
|
|
|
|
|
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
|
|
|
|
|
{
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = irshift(int2(fixpoint_uv%d * iindtevcrd%d.yy), 8);\n", n, texcoord, n);
|
|
|
|
|
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = irshift(indtevtrans%d, " I_INDTEXMTX"[%d].w);\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = ilshift(indtevtrans%d, -" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
out.Write("\tint2 indtevtrans%d = int2(fixpoint_uv%d * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n);
|
|
|
|
|
|
|
|
|
|
out.Write("\tif (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx);
|
|
|
|
|
out.Write("\telse indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
@ -741,7 +801,10 @@ static inline void WriteStage(T& out, pixel_shader_uid_data* uid_data, int n, AP
|
|
|
|
|
out.Write("\ttevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);
|
|
|
|
|
|
|
|
|
|
// Emulate s24 overflows
|
|
|
|
|
out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
|
|
|
|
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
|
|
|
|
|
out.Write("\ttevcoord.xy = irshift(ilshift(tevcoord.xy, 8), 8);\n");
|
|
|
|
|
else
|
|
|
|
|
out.Write("\ttevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
|
|
|
|
@ -948,12 +1011,37 @@ static inline void WriteTevRegular(T& out, const char* components, int bias, int
|
|
|
|
|
// - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255
|
|
|
|
|
// - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy
|
|
|
|
|
// - a rounding bias is added before dividing by 256
|
|
|
|
|
out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
|
|
|
|
|
out.Write(" %s ", tevOpTable[op]);
|
|
|
|
|
out.Write("(((((tevin_a.%s<<8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)",
|
|
|
|
|
components, components, components, components, components,
|
|
|
|
|
tevScaleTableLeft[shift], tevLerpBias[2*op+(shift!=3)]);
|
|
|
|
|
out.Write(")%s", tevScaleTableRight[shift]);
|
|
|
|
|
|
|
|
|
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
|
|
|
|
|
{
|
|
|
|
|
// Haxx - cleaner code by not having irshift and ilshift in the emitted code by omitting them if not used.
|
|
|
|
|
const char* leftShift = tevScaleTableLeft[shift];
|
|
|
|
|
const char* rightShift = tevScaleTableRight[shift];
|
|
|
|
|
|
|
|
|
|
if (rightShift[0])
|
|
|
|
|
out.Write("irshift(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
|
|
|
|
|
else
|
|
|
|
|
out.Write("((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
|
|
|
|
|
out.Write(" %s ", tevOpTable[op]);
|
|
|
|
|
if (leftShift[0])
|
|
|
|
|
out.Write("irshift((ilshift((ilshift(tevin_a.%s, 8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+irshift(tevin_c.%s, 7))), %s)%s), 8)",
|
|
|
|
|
components, components, components, components, components,
|
|
|
|
|
leftShift+4, tevLerpBias[2*op+(shift!=3)]);
|
|
|
|
|
else
|
|
|
|
|
out.Write("irshift(((ilshift(tevin_a.%s, 8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+irshift(tevin_c.%s, 7)))%s), 8)",
|
|
|
|
|
components, components, components, components, components, tevLerpBias[2*op+(shift!=3)]);
|
|
|
|
|
if (rightShift[0])
|
|
|
|
|
out.Write(", %s)", rightShift+4);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]);
|
|
|
|
|
out.Write(" %s ", tevOpTable[op]);
|
|
|
|
|
out.Write("(((((tevin_a.%s<<8) + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)",
|
|
|
|
|
components, components, components, components, components,
|
|
|
|
|
tevScaleTableLeft[shift], tevLerpBias[2*op+(shift!=3)]);
|
|
|
|
|
out.Write(")%s", tevScaleTableRight[shift]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
@ -1116,7 +1204,10 @@ static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
out.Write("\tint ifog = iround(fog * 256.0);\n");
|
|
|
|
|
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n");
|
|
|
|
|
if (DriverDetails::HasBug(DriverDetails::BUG_BROKENIVECSHIFTS))
|
|
|
|
|
out.Write("\tprev.rgb = irshift((prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog), 8);\n");
|
|
|
|
|
else
|
|
|
|
|
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
|