[Android] Tegra 4 'support.' This brings up the OpenGL backend to support Tegra 4 to the point where it will run games but it doesn't have any video output for some reason. This is a large change that doesn't actually change much functionally. Walking through the changes.

It changes the string in the Android backend select to just OpenGL ES.
Adds a check in the Android code to check for Tegra 4 and to enable the option to select the OpenGL ES backend.
Adds a DriverDetails bug under BUG_ISTEGRA as a blanket case of Tegra 4 support.
The changes that effects most lines in this change. Removing all float suffixes in the pixel/vertex/util shaders since OpenGL ES 2 doesn't support float suffixes.
Disables the shaders for reinterpreting the EFB format since Tegra 4 doesn't support integers.
Changes GLFunctions.cpp to grab the correct Tegra extension functions.
Readds the GLSL 1.2 'hacks' as GLSLES2 'hacks' since they are required for GLSL ES 2
Adds a GLSLES2 to the GLSL_VERSION enum.
Disable the SamplerCache on Tegra since Tegra doesn't support samplers...
Enable glBufferSubData on Tegra since it is the only mobile GPU to correctly work with it.
Disable glDrawRangeElements on Tegra since it doesn't support it, This uses glDrawElements instead.
This commit is contained in:
Ryan Houdek 2013-10-06 03:12:13 -05:00
parent 2b08172a45
commit 6bdcde9dd6
20 changed files with 404 additions and 285 deletions

View File

@ -78,7 +78,7 @@
<string name="fastmem_desc">Uses potentially unsafe optimizations for memory access.</string>
<string name="video_settings">Video</string>
<string name="software_renderer">Software Renderer</string>
<string name="opengl_es3">OpenGL ES 3</string>
<string name="opengl_es3">OpenGL ES</string>
<string name="video_backend">Video Backend</string>
<string name="video_backend_to_use">Video backend to use</string>
<string name="show_fps">Show FPS</string>

View File

@ -76,7 +76,6 @@ public final class EmulationActivity extends Activity
NativeLibrary.SetDimensions((int)screenHeight, (int)screenWidth);
else
NativeLibrary.SetDimensions((int)screenWidth, (int)screenHeight);
NativeLibrary.SetFilename(gameToEmulate.getStringExtra("SelectedGame"));
Running = true;

View File

@ -27,6 +27,7 @@ public final class VideoSettingsFragment extends PreferenceFragment
public static String m_GLVersion;
public static String m_GLVendor;
public static String m_GLRenderer;
public static String m_GLExtensions;
private Activity m_activity;
/**
@ -104,6 +105,16 @@ public final class VideoSettingsFragment extends PreferenceFragment
return mGL.glGetString(GL10.GL_RENDERER);
}
/**
* Gets the extension that the device supports
*
* @return String containing the extensions
*/
public String getExtensions()
{
return mGL.glGetString(GL10.GL_EXTENSIONS);
}
private EGLConfig chooseConfig()
{
int[] attribList = new int[] {
@ -139,6 +150,7 @@ public final class VideoSettingsFragment extends PreferenceFragment
m_GLVersion = mbuffer.getVersion();
m_GLVendor = mbuffer.getVendor();
m_GLRenderer = mbuffer.getRenderer();
m_GLExtensions = mbuffer.getExtensions();
boolean mSupportsGLES3 = false;
@ -170,6 +182,14 @@ public final class VideoSettingsFragment extends PreferenceFragment
mSupportsGLES3 = true;
}
}
if (!mSupportsGLES3 &&
m_GLVendor != null && m_GLVendor.equals("NVIDIA Corporation") &&
m_GLRenderer != null && m_GLRenderer.equals("NVIDIA Tegra") &&
m_GLExtensions != null && m_GLExtensions.contains("GL_OES_depth24"))
{
// Is a Tegra 4 since it supports 24bit depth
mSupportsGLES3 = true;
}
return mSupportsGLES3;
}

View File

@ -37,7 +37,8 @@ namespace DriverDetails
{VENDOR_MESA, DRIVER_I965, BUG_BROKENUBO, 900, 920, true},
{VENDOR_ATI, DRIVER_ATI, BUG_BROKENHACKEDBUFFER, -1.0, -1.0, true},
{VENDOR_MESA, DRIVER_NOUVEAU, BUG_BROKENHACKEDBUFFER, -1.0, -1.0, true},
{VENDOR_ATI, DRIVER_ATI, BUG_BROKENPINNEDMEMORY, -1.0, -1.0, true}
{VENDOR_ATI, DRIVER_ATI, BUG_BROKENPINNEDMEMORY, -1.0, -1.0, true},
{VENDOR_TEGRA, DRIVER_NVIDIA, BUG_ISTEGRA, -1.0, -1.0, true},
};
std::map<Bug, BugInfo> m_bugs;

View File

@ -115,6 +115,13 @@ namespace DriverDetails
// Drawing on screen text causes the whole screen to swizzle in a terrible fashion
// Clearing the framebuffer causes one to never see a frame.
BUG_BROKENSWAP,
// Bug: Running on a Tegra 4 device
// Affected devices: Nvidia Tegra
// Started Version: 4
// Ended Version: 5
// Tegra 4 hardware limitations don't allow it to support OpenGL ES 3
// This is fixed in Tegra 5
BUG_ISTEGRA,
};
// Initializes our internal vendor, device family, and driver version

View File

@ -63,7 +63,7 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
case LIGHTDIF_CLAMP:
object.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(lightsName, index));
object.Write("lacc.%s += %sdot(ldir, _norm0)) * " LIGHT_COL";\n",
swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", LIGHT_COL_PARAMS(lightsName, index, swizzle));
swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(", LIGHT_COL_PARAMS(lightsName, index, swizzle));
break;
default: _assert_(0);
}
@ -76,18 +76,18 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
object.Write("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0f, dot(ldir, " LIGHT_DIR".xyz));\n",
"attn = max(0.0, dot(ldir, " LIGHT_DIR".xyz));\n",
LIGHT_DIR_PARAMS(lightsName, index));
// attn*attn may overflow
object.Write("attn = max(0.0f, " LIGHT_COSATT".x + " LIGHT_COSATT".y*attn + " LIGHT_COSATT".z*attn*attn) / dot(" LIGHT_DISTATT".xyz, float3(1.0f,dist,dist2));\n",
object.Write("attn = max(0.0, " LIGHT_COSATT".x + " LIGHT_COSATT".y*attn + " LIGHT_COSATT".z*attn*attn) / dot(" LIGHT_DISTATT".xyz, float3(1.0,dist,dist2));\n",
LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index));
}
else if (chan.attnfunc == 1)
{ // specular
object.Write("ldir = normalize(" LIGHT_POS".xyz);\n", LIGHT_POS_PARAMS(lightsName, index));
object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, " LIGHT_DIR".xyz)) : 0.0f;\n", LIGHT_DIR_PARAMS(lightsName, index));
object.Write("attn = (dot(_norm0,ldir) >= 0.0) ? max(0.0, dot(_norm0, " LIGHT_DIR".xyz)) : 0.0;\n", LIGHT_DIR_PARAMS(lightsName, index));
// attn*attn may overflow
object.Write("attn = max(0.0f, " LIGHT_COSATT".x + " LIGHT_COSATT".y*attn + " LIGHT_COSATT".z*attn*attn) / (" LIGHT_DISTATT".x + " LIGHT_DISTATT".y*attn + " LIGHT_DISTATT".z*attn*attn);\n",
object.Write("attn = max(0.0, " LIGHT_COSATT".x + " LIGHT_COSATT".y*attn + " LIGHT_COSATT".z*attn*attn) / (" LIGHT_DISTATT".x + " LIGHT_DISTATT".y*attn + " LIGHT_DISTATT".z*attn*attn);\n",
LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index), LIGHT_COSATT_PARAMS(lightsName, index),
LIGHT_DISTATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index), LIGHT_DISTATT_PARAMS(lightsName, index));
}
@ -101,7 +101,7 @@ static void GenerateLightShader(T& object, LightingUidData& uid_data, int index,
case LIGHTDIF_CLAMP:
object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * " LIGHT_COL";\n",
swizzle,
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(",
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0," :"(",
LIGHT_COL_PARAMS(lightsName, index, swizzle));
break;
default: _assert_(0);
@ -133,7 +133,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
else if (components & VB_HAS_COL0)
object.Write("mat = %s0;\n", inColorName);
else
object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
object.Write("mat = float4(1.0, 1.0, 1.0, 1.0);\n");
}
else // from color
{
@ -154,7 +154,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
// TODO: this isn't verified. Here we want to read the ambient from the vertex,
// but the vertex itself has no color. So we don't know which value to read.
// Returing 1.0 is the same as disabled lightning, so this could be fine
object.Write("lacc = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
object.Write("lacc = float4(1.0, 1.0, 1.0, 1.0);\n");
}
else // from color
{
@ -163,7 +163,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
}
else
{
object.Write("lacc = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
object.Write("lacc = float4(1.0, 1.0, 1.0, 1.0);\n");
}
// check if alpha is different
@ -176,7 +176,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
object.Write("mat.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat.w = %s0.w;\n", inColorName);
else object.Write("mat.w = 1.0f;\n");
else object.Write("mat.w = 1.0;\n");
}
else // from color
{
@ -196,7 +196,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
object.Write("lacc.w = %s0.w;\n", inColorName);
else
// TODO: The same for alpha: We want to read from vertex, but the vertex has no color
object.Write("lacc.w = 1.0f;\n");
object.Write("lacc.w = 1.0;\n");
}
else // from color
{
@ -205,7 +205,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
}
else
{
object.Write("lacc.w = 1.0f;\n");
object.Write("lacc.w = 1.0;\n");
}
if(color.enablelighting && alpha.enablelighting)
@ -256,7 +256,7 @@ static void GenerateLightingShader(T& object, LightingUidData& uid_data, int com
GenerateLightShader<T>(object, uid_data, i, lit_index, lightsName, coloralpha);
}
}
object.Write("%s%d = mat * clamp(lacc, 0.0f, 1.0f);\n", dest, j);
object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j);
object.Write("}\n");
}
}

View File

@ -29,14 +29,14 @@
static const char *tevKSelTableC[] = // KCSEL
{
"1.0f,1.0f,1.0f", // 1 = 0x00
"0.875f,0.875f,0.875f", // 7_8 = 0x01
"0.75f,0.75f,0.75f", // 3_4 = 0x02
"0.625f,0.625f,0.625f", // 5_8 = 0x03
"0.5f,0.5f,0.5f", // 1_2 = 0x04
"0.375f,0.375f,0.375f", // 3_8 = 0x05
"0.25f,0.25f,0.25f", // 1_4 = 0x06
"0.125f,0.125f,0.125f", // 1_8 = 0x07
"1.0,1.0,1.0", // 1 = 0x00
"0.875,0.875,0.875", // 7_8 = 0x01
"0.75,0.75,0.75", // 3_4 = 0x02
"0.625,0.625,0.625", // 5_8 = 0x03
"0.5,0.5,0.5", // 1_2 = 0x04
"0.375,0.375,0.375", // 3_8 = 0x05
"0.25,0.25,0.25", // 1_4 = 0x06
"0.125,0.125,0.125", // 1_8 = 0x07
"ERROR1", // 0x08
"ERROR2", // 0x09
"ERROR3", // 0x0a
@ -65,14 +65,14 @@ static const char *tevKSelTableC[] = // KCSEL
static const char *tevKSelTableA[] = // KASEL
{
"1.0f", // 1 = 0x00
"0.875f",// 7_8 = 0x01
"0.75f", // 3_4 = 0x02
"0.625f",// 5_8 = 0x03
"0.5f", // 1_2 = 0x04
"0.375f",// 3_8 = 0x05
"0.25f", // 1_4 = 0x06
"0.125f",// 1_8 = 0x07
"1.0", // 1 = 0x00
"0.875",// 7_8 = 0x01
"0.75", // 3_4 = 0x02
"0.625",// 5_8 = 0x03
"0.5", // 1_2 = 0x04
"0.375",// 3_8 = 0x05
"0.25", // 1_4 = 0x06
"0.125",// 1_8 = 0x07
"ERROR5", // 0x08
"ERROR6", // 0x09
"ERROR7", // 0x0a
@ -101,17 +101,17 @@ static const char *tevKSelTableA[] = // KASEL
static const char *tevScaleTable[] = // CS
{
"1.0f", // SCALE_1
"2.0f", // SCALE_2
"4.0f", // SCALE_4
"0.5f", // DIVIDE_2
"1.0", // SCALE_1
"2.0", // SCALE_2
"4.0", // SCALE_4
"0.5", // DIVIDE_2
};
static const char *tevBiasTable[] = // TB
{
"", // ZERO,
"+0.5f", // ADDHALF,
"-0.5f", // SUBHALF,
"+0.5", // ADDHALF,
"-0.5", // SUBHALF,
"",
};
@ -134,10 +134,10 @@ static const char *tevCInputTable[] = // CC
"(textemp.aaa)", // TEXA,
"(rastemp.rgb)", // RASC,
"(rastemp.aaa)", // RASA,
"float3(1.0f, 1.0f, 1.0f)", // ONE
"float3(0.5f, 0.5f, 0.5f)", // HALF
"float3(1.0, 1.0, 1.0)", // ONE
"float3(0.5, 0.5, 0.5)", // HALF
"(konsttemp.rgb)", //"konsttemp.rgb", // KONST
"float3(0.0f, 0.0f, 0.0f)", // ZERO
"float3(0.0, 0.0, 0.0)", // ZERO
///added extra values to map clamped values
"(cprev.rgb)", // CPREV,
"(cprev.aaa)", // APREV,
@ -151,10 +151,10 @@ static const char *tevCInputTable[] = // CC
"(textemp.aaa)", // TEXA,
"(crastemp.rgb)", // RASC,
"(crastemp.aaa)", // RASA,
"float3(1.0f, 1.0f, 1.0f)", // ONE
"float3(0.5f, 0.5f, 0.5f)", // HALF
"float3(1.0, 1.0, 1.0)", // ONE
"float3(0.5, 0.5, 0.5)", // HALF
"(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST
"float3(0.0f, 0.0f, 0.0f)", // ZERO
"float3(0.0, 0.0, 0.0)", // ZERO
"PADERROR1", "PADERROR2", "PADERROR3", "PADERROR4"
};
@ -167,7 +167,7 @@ static const char *tevAInputTable[] = // CA
"textemp", // TEXA,
"rastemp", // RASA,
"konsttemp", // KONST, (hw1 had quarter)
"float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO
"float4(0.0, 0.0, 0.0, 0.0)", // ZERO
///added extra values to map clamped values
"cprev", // APREV,
"cc0", // A0,
@ -176,7 +176,7 @@ static const char *tevAInputTable[] = // CA
"textemp", // TEXA,
"crastemp", // RASA,
"ckonsttemp", // KONST, (hw1 had quarter)
"float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO
"float4(0.0, 0.0, 0.0, 0.0)", // ZERO
"PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8",
"PADERROR9", "PADERROR10", "PADERROR11", "PADERROR12",
};
@ -189,8 +189,8 @@ static const char *tevRasTable[] =
"ERROR14", //3
"ERROR15", //4
"float4(alphabump,alphabump,alphabump,alphabump)", // use bump alpha
"(float4(alphabump,alphabump,alphabump,alphabump)*(255.0f/248.0f))", //normalized
"float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero
"(float4(alphabump,alphabump,alphabump,alphabump)*(255.0/248.0))", //normalized
"float4(0.0, 0.0, 0.0, 0.0)", // zero
};
//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
@ -199,11 +199,11 @@ static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rg
static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" };
static const char *tevIndAlphaSel[] = {"", "x", "y", "z"};
//static const char *tevIndAlphaScale[] = {"", "*32", "*16", "*8"};
static const char *tevIndAlphaScale[] = {"*(248.0f/255.0f)", "*(224.0f/255.0f)", "*(240.0f/255.0f)", "*(248.0f/255.0f)"};
static const char *tevIndAlphaScale[] = {"*(248.0/255.0)", "*(224.0/255.0)", "*(240.0/255.0)", "*(248.0/255.0)"};
static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt
static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" };
static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" };
static const char *tevIndBiasAdd[] = {"-128.0", "1.0", "1.0", "1.0" }; // indexed by fmt
static const char *tevIndWrapStart[] = {"0.0", "256.0", "128.0", "64.0", "32.0", "16.0", "0.001" };
static const char *tevIndFmtScale[] = {"255.0", "31.0", "15.0", "7.0" };
struct RegisterState
{
@ -309,7 +309,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
if (ApiType == API_OPENGL)
{
out.Write("out vec4 ocol0;\n");
out.Write("COLOROUT(ocol0)\n");
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
out.Write("out vec4 ocol1;\n");
@ -324,9 +324,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
if (xfregs.numTexGen.numTexGens < 7)
{
for (int i = 0; i < 8; ++i)
{
out.Write("VARYIN float3 uv%d_2;\n", i);
}
out.Write("VARYIN float4 clipPos_2;\n");
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
@ -428,18 +426,18 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
out.Write(",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i);
}
out.Write(" ) {\n");
out.Write("\tfloat4 clipPos = float4(0.0f, 0.0f, 0.0f, 0.0f);");
out.Write("\tfloat4 clipPos = float4(0.0, 0.0, 0.0, 0.0);");
}
}
out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"
" float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n"
" float alphabump=0.0f;\n"
" float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n"
" float2 wrappedcoord=float2(0.0f,0.0f), tempcoord=float2(0.0f,0.0f);\n"
" float4 cc0=float4(0.0f,0.0f,0.0f,0.0f), cc1=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n"
" float4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n");
out.Write(" float4 c0 = " I_COLORS"[1], c1 = " I_COLORS"[2], c2 = " I_COLORS"[3], prev = float4(0.0, 0.0, 0.0, 0.0), textemp = float4(0.0, 0.0, 0.0, 0.0), rastemp = float4(0.0, 0.0, 0.0, 0.0), konsttemp = float4(0.0, 0.0, 0.0, 0.0);\n"
" float3 comp16 = float3(1.0, 255.0, 0.0), comp24 = float3(1.0, 255.0, 255.0*255.0);\n"
" float alphabump=0.0;\n"
" float3 tevcoord=float3(0.0, 0.0, 0.0);\n"
" float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n"
" float4 cc0=float4(0.0,0.0,0.0,0.0), cc1=float4(0.0,0.0,0.0,0.0);\n"
" float4 cc2=float4(0.0,0.0,0.0,0.0), cprev=float4(0.0,0.0,0.0,0.0);\n"
" float4 crastemp=float4(0.0,0.0,0.0,0.0),ckonsttemp=float4(0.0,0.0,0.0,0.0);\n\n");
if (ApiType == API_OPENGL)
{
@ -453,12 +451,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
if (xfregs.numTexGen.numTexGens < 7)
{
if(numTexgen)
{
for (int i = 0; i < 8; ++i)
{
out.Write("float3 uv%d = uv%d_2;\n", i, i);
}
}
for (int i = 0; i < 8; ++i)
out.Write("float3 uv%d = uv%d_2;\n", i, i);
out.Write("float4 clipPos = clipPos_2;\n");
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
@ -516,7 +510,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
// HACK to handle cases where the tex gen is not enabled
if (numTexgen == 0)
{
out.Write("\tfloat3 uv0 = float3(0.0f, 0.0f, 0.0f);\n");
out.Write("\tfloat3 uv0 = float3(0.0, 0.0, 0.0);\n");
}
else
{
@ -527,7 +521,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
uid_data.texMtxInfo_n_projection |= xfregs.texMtxInfo[i].projection << i;
if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ)
{
out.Write("\tif (uv%d.z != 0.0f)", i);
out.Write("\tif (uv%d.z != 0.0)", i);
out.Write("\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i);
}
@ -561,7 +555,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
out.Write("\ttempcoord = uv%d.xy * " I_INDTEXSCALE"[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy");
}
else
out.Write("\ttempcoord = float2(0.0f, 0.0f);\n");
out.Write("\ttempcoord = float2(0.0, 0.0);\n");
out.Write("float3 indtex%d = ", i);
SampleTexture<T>(out, "tempcoord", "abg", texmap, ApiType);
@ -618,7 +612,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
}
// emulation of unsigned 8 overflow when casting if needed
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
out.Write("\tprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
out.Write("\tprev = frac(prev * (255.0/256.0)) * (256.0/255.0);\n");
AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
uid_data.Pretest = Pretest;
@ -665,9 +659,9 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
(bpmem.ztex2.op == ZTEXTURE_ADD) ? "+ zCoord" : "");
// U24 overflow emulation
out.Write("zCoord = zCoord * (16777215.0f/16777216.0f);\n");
out.Write("zCoord = zCoord * (16777215.0/16777216.0);\n");
out.Write("zCoord = frac(zCoord);\n");
out.Write("zCoord = zCoord * (16777216.0f/16777215.0f);\n");
out.Write("zCoord = zCoord * (16777216.0/16777215.0);\n");
}
if (per_pixel_depth && bpmem.UseLateDepthTest())
@ -692,7 +686,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
{
// alpha component must be 0 or the shader will not compile (Direct3D 9Ex restriction)
// Colors will be blended against the color from ocol1 in D3D 9...
out.Write("\tocol1 = float4(prev.a, prev.a, prev.a, 0.0f);\n");
out.Write("\tocol1 = float4(prev.a, prev.a, prev.a, 0.0);\n");
}
else
{
@ -722,43 +716,43 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
//table with the color compare operations
static const char *TEVCMPColorOPTable[16] =
{
"float3(0.0f, 0.0f, 0.0f)",//0
"float3(0.0f, 0.0f, 0.0f)",//1
"float3(0.0f, 0.0f, 0.0f)",//2
"float3(0.0f, 0.0f, 0.0f)",//3
"float3(0.0f, 0.0f, 0.0f)",//4
"float3(0.0f, 0.0f, 0.0f)",//5
"float3(0.0f, 0.0f, 0.0f)",//6
"float3(0.0f, 0.0f, 0.0f)",//7
" %s + ((%s.r >= %s.r + (0.25f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_GT 8
" %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_EQ 9
" %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_GT 10
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_EQ 11
" %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_GT 12
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_EQ 13
" %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)), float3(0.0f, 0.0f, 0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14
" %s + ((float3(1.0f, 1.0f, 1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)), float3(0.0f, 0.0f, 0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15
"float3(0.0, 0.0, 0.0)",//0
"float3(0.0, 0.0, 0.0)",//1
"float3(0.0, 0.0, 0.0)",//2
"float3(0.0, 0.0, 0.0)",//3
"float3(0.0, 0.0, 0.0)",//4
"float3(0.0, 0.0, 0.0)",//5
"float3(0.0, 0.0, 0.0)",//6
"float3(0.0, 0.0, 0.0)",//7
" %s + ((%s.r >= %s.r + (0.25/255.0)) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_R8_GT 8
" %s + ((abs(%s.r - %s.r) < (0.5/255.0)) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_R8_EQ 9
" %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25/255.0))) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_GR16_GT 10
" %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5/255.0) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_GR16_EQ 11
" %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25/255.0))) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_BGR24_GT 12
" %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5/255.0) ? %s : float3(0.0, 0.0, 0.0))",//#define TEVCMP_BGR24_EQ 13
" %s + (max(sign(%s.rgb - %s.rgb - (0.25/255.0)), float3(0.0, 0.0, 0.0)) * %s)",//#define TEVCMP_RGB8_GT 14
" %s + ((float3(1.0, 1.0, 1.0) - max(sign(abs(%s.rgb - %s.rgb) - (0.5/255.0)), float3(0.0, 0.0, 0.0))) * %s)"//#define TEVCMP_RGB8_EQ 15
};
//table with the alpha compare operations
static const char *TEVCMPAlphaOPTable[16] =
{
"0.0f",//0
"0.0f",//1
"0.0f",//2
"0.0f",//3
"0.0f",//4
"0.0f",//5
"0.0f",//6
"0.0f",//7
" %s.a + ((%s.r >= (%s.r + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_R8_GT 8
" %s.a + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_R8_EQ 9
" %s.a + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_GR16_GT 10
" %s.a + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_GR16_EQ 11
" %s.a + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_GT 12
" %s.a + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_EQ 13
" %s.a + ((%s.a >= (%s.a + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_A8_GT 14
" %s.a + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s.a : 0.0f)"//#define TEVCMP_A8_EQ 15
"0.0",//0
"0.0",//1
"0.0",//2
"0.0",//3
"0.0",//4
"0.0",//5
"0.0",//6
"0.0",//7
" %s.a + ((%s.r >= (%s.r + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_R8_GT 8
" %s.a + (abs(%s.r - %s.r) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_R8_EQ 9
" %s.a + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_GR16_GT 10
" %s.a + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_GR16_EQ 11
" %s.a + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_BGR24_GT 12
" %s.a + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5/255.0) ? %s.a : 0.0)",//#define TEVCMP_BGR24_EQ 13
" %s.a + ((%s.a >= (%s.a + (0.25/255.0))) ? %s.a : 0.0)",//#define TEVCMP_A8_GT 14
" %s.a + (abs(%s.a - %s.a) < (0.5/255.0) ? %s.a : 0.0)"//#define TEVCMP_A8_EQ 15
};
template<class T>
@ -821,12 +815,12 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
}
else
{
out.Write("float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n);
out.Write("float2 indtevtrans%d = float2(0.0, 0.0);\n", n);
}
}
else
{
out.Write("float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n);
out.Write("float2 indtevtrans%d = float2(0.0, 0.0);\n", n);
}
// ---------
@ -837,7 +831,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
if (bpmem.tevind[n].sw == ITW_OFF)
out.Write("wrappedcoord.x = uv%d.x;\n", texcoord);
else if (bpmem.tevind[n].sw == ITW_0)
out.Write("wrappedcoord.x = 0.0f;\n");
out.Write("wrappedcoord.x = 0.0;\n");
else
out.Write("wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);
@ -845,7 +839,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
if (bpmem.tevind[n].tw == ITW_OFF)
out.Write("wrappedcoord.y = uv%d.y;\n", texcoord);
else if (bpmem.tevind[n].tw == ITW_0)
out.Write("wrappedcoord.y = 0.0f;\n");
out.Write("wrappedcoord.y = 0.0;\n");
else
out.Write("wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);
@ -878,7 +872,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
const char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
out.Write("rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
out.Write("crastemp = frac(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n");
out.Write("crastemp = frac(rastemp * (255.0/256.0)) * (256.0/255.0);\n");
}
uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
@ -890,7 +884,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
if(bHasTexCoord)
out.Write("tevcoord.xy = uv%d.xy;\n", texcoord);
else
out.Write("tevcoord.xy = float2(0.0f, 0.0f);\n");
out.Write("tevcoord.xy = float2(0.0, 0.0);\n");
}
const int i = bpmem.combiners[n].alphaC.tswap;
@ -911,7 +905,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
}
else
{
out.Write("textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
out.Write("textemp = float4(1.0, 1.0, 1.0, 1.0);\n");
}
@ -925,7 +919,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
out.Write("konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
if(kc > 7 || ka > 7)
{
out.Write("ckonsttemp = frac(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n");
out.Write("ckonsttemp = frac(konsttemp * (255.0/256.0)) * (256.0/255.0);\n");
}
else
{
@ -944,7 +938,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
{
if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
{
out.Write("cprev = frac(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n");
out.Write("cprev = frac(prev * (255.0/256.0)) * (256.0/255.0);\n");
RegisterStates[0].AlphaNeedOverflowControl = false;
RegisterStates[0].ColorNeedOverflowControl = false;
}
@ -963,7 +957,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
out.SetConstantsUsed(C_COLORS+1,C_COLORS+1);
if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl)
{
out.Write("cc0 = frac(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
out.Write("cc0 = frac(c0 * (255.0/256.0)) * (256.0/255.0);\n");
RegisterStates[1].AlphaNeedOverflowControl = false;
RegisterStates[1].ColorNeedOverflowControl = false;
}
@ -982,7 +976,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
out.SetConstantsUsed(C_COLORS+2,C_COLORS+2);
if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl)
{
out.Write("cc1 = frac(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
out.Write("cc1 = frac(c1 * (255.0/256.0)) * (256.0/255.0);\n");
RegisterStates[2].AlphaNeedOverflowControl = false;
RegisterStates[2].ColorNeedOverflowControl = false;
}
@ -1001,7 +995,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
out.SetConstantsUsed(C_COLORS+3,C_COLORS+3);
if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl)
{
out.Write("cc2 = frac(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n");
out.Write("cc2 = frac(c2 * (255.0/256.0)) * (256.0/255.0);\n");
RegisterStates[3].AlphaNeedOverflowControl = false;
RegisterStates[3].ColorNeedOverflowControl = false;
}
@ -1055,7 +1049,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
else if (cc.a == TEVCOLORARG_ZERO)
out.Write("%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]);
else if (cc.b == TEVCOLORARG_ZERO)
out.Write("%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]);
out.Write("%s*(float3(1.0, 1.0, 1.0)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]);
else
out.Write("lerp(%s, %s, %s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]);
@ -1074,7 +1068,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
tevCInputTable[cc.c + 16]);
}
if (cc.clamp)
out.Write(", 0.0f, 1.0f)");
out.Write(", 0.0, 1.0)");
out.Write(";\n");
RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0);
@ -1102,7 +1096,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
else if (ac.a == TEVALPHAARG_ZERO)
out.Write("%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]);
else if (ac.b == TEVALPHAARG_ZERO)
out.Write("%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]);
out.Write("%s.a*(1.0-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]);
else
out.Write("lerp(%s.a, %s.a, %s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]);
@ -1123,7 +1117,7 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
tevAInputTable[ac.c + 8]);
}
if (ac.clamp)
out.Write(", 0.0f, 1.0f)");
out.Write(", 0.0, 1.0)");
out.Write(";\n\n");
out.Write("// TEV done\n");
}
@ -1142,12 +1136,12 @@ void SampleTexture(T& out, const char *texcoords, const char *texswap, int texma
static const char *tevAlphaFuncsTable[] =
{
"(false)", // NEVER
"(prev.a <= %s - (0.25f/255.0f))", // LESS
"(abs( prev.a - %s ) < (0.5f/255.0f))", // EQUAL
"(prev.a < %s + (0.25f/255.0f))", // LEQUAL
"(prev.a >= %s + (0.25f/255.0f))", // GREATER
"(abs( prev.a - %s ) >= (0.5f/255.0f))", // NEQUAL
"(prev.a > %s - (0.25f/255.0f))", // GEQUAL
"(prev.a <= %s - (0.25/255.0))", // LESS
"(abs( prev.a - %s ) < (0.5/255.0))", // EQUAL
"(prev.a < %s + (0.25/255.0))", // LEQUAL
"(prev.a >= %s + (0.25/255.0))", // GREATER
"(abs( prev.a - %s ) >= (0.5/255.0))", // NEQUAL
"(prev.a > %s - (0.25/255.0))", // GEQUAL
"(true)" // ALWAYS
};
@ -1188,11 +1182,11 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_T
out.Write(tevAlphaFuncsTable[compindex], alphaRef[1]);
out.Write(")) {\n");
out.Write("\t\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
out.Write("\t\tocol0 = float4(0.0, 0.0, 0.0, 0.0);\n");
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
out.Write("\t\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
out.Write("\t\tocol1 = float4(0.0, 0.0, 0.0, 0.0);\n");
if(per_pixel_depth)
out.Write("\t\tdepth = 1.f;\n");
out.Write("\t\tdepth = 1.0;\n");
// HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before
// or after texturing and alpha test. PC graphics APIs have no way to support this
@ -1224,10 +1218,10 @@ static const char *tevFogFuncsTable[] =
"", // ?
"", // Linear
"", // ?
"\tfog = 1.0f - pow(2.0f, -8.0f * fog);\n", // exp
"\tfog = 1.0f - pow(2.0f, -8.0f * fog * fog);\n", // exp2
"\tfog = pow(2.0f, -8.0f * (1.0f - fog));\n", // backward exp
"\tfog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" // backward exp2
"\tfog = 1.0 - pow(2.0, -8.0 * fog);\n", // exp
"\tfog = 1.0 - pow(2.0, -8.0 * fog * fog);\n", // exp2
"\tfog = pow(2.0, -8.0 * (1.0 - fog));\n", // backward exp
"\tfog = 1.0 - fog;\n fog = pow(2.0, -8.0 * fog * fog);\n" // backward exp2
};
template<class T>
@ -1260,12 +1254,12 @@ static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data)
if (bpmem.fogRange.Base.Enabled)
{
out.SetConstantsUsed(C_FOG+2, C_FOG+2);
out.Write("\tfloat x_adjust = (2.0f * (clipPos.x / " I_FOG"[2].y)) - 1.0f - " I_FOG"[2].x;\n");
out.Write("\tfloat x_adjust = (2.0 * (clipPos.x / " I_FOG"[2].y)) - 1.0 - " I_FOG"[2].x;\n");
out.Write("\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"[2].z * " I_FOG"[2].z) / " I_FOG"[2].z;\n");
out.Write("\tze *= x_adjust;\n");
}
out.Write("\tfloat fog = clamp(ze - " I_FOG"[1].z, 0.0f, 1.0f);\n");
out.Write("\tfloat fog = clamp(ze - " I_FOG"[1].z, 0.0, 1.0);\n");
if (bpmem.fog.c_proj_fsel.fsel > 3)
{

View File

@ -142,7 +142,7 @@ void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
if (ApiType != API_OPENGL)
{
WRITE(p, " sampleUv = sampleUv + float2(0.0f,1.0f);\n");// still to determine the reason for this
WRITE(p, " sampleUv = sampleUv + float2(0.0,1.0);\n");// still to determine the reason for this
WRITE(p, " sampleUv = sampleUv / " I_COLORS"[0].zw;\n");
}
}
@ -204,14 +204,14 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType)
WRITE(p, " float yb = yl * %f;\n", blkH);
WRITE(p, " float yoff = uv1.y - yb;\n");
WRITE(p, " float xp = uv1.x + (yoff * " I_COLORS"[1].x);\n");
WRITE(p, " float xel = floor(xp / 2.0f);\n");
WRITE(p, " float xel = floor(xp / 2.0);\n");
WRITE(p, " float xb = floor(xel / %f);\n", blkH);
WRITE(p, " float xoff = xel - (xb * %f);\n", blkH);
WRITE(p, " float x2 = uv1.x * 2.0f;\n");
WRITE(p, " float x2 = uv1.x * 2.0;\n");
WRITE(p, " float xl = floor(x2 / %f);\n", blkW);
WRITE(p, " float xib = x2 - (xl * %f);\n", blkW);
WRITE(p, " float halfxb = floor(xb / 2.0f);\n");
WRITE(p, " float halfxb = floor(xb / 2.0);\n");
WRITE(p, " sampleUv.x = xib + (halfxb * %f);\n", blkW);
WRITE(p, " sampleUv.y = yb + xoff;\n");
@ -224,7 +224,7 @@ void Write32BitSwizzler(char*& p, u32 format, API_TYPE ApiType)
if (ApiType != API_OPENGL)
{
WRITE(p, " sampleUv = sampleUv + float2(0.0f,1.0f);\n");// still to determine the reason for this
WRITE(p, " sampleUv = sampleUv + float2(0.0,1.0);\n");// still to determine the reason for this
WRITE(p, " sampleUv = sampleUv / " I_COLORS"[0].zw;\n");
}
}
@ -246,7 +246,7 @@ void WriteSampleColor(char*& p, const char* colorComp, const char* dest, API_TYP
else
texSampleIncrementUnit = I_COLORS"[0].x";
WRITE(p, " %s = %s(samp0, sampleUv + float2(%d.0f * (%s), 0.0f)).%s;\n",
WRITE(p, " %s = %s(samp0, sampleUv + float2(%d.0 * (%s), 0.0)).%s;\n",
dest, texSampleOpName, s_incrementSampleXCount, texSampleIncrementUnit, colorComp);
}
@ -282,7 +282,7 @@ void WriteIncrementSampleX(char*& p,API_TYPE ApiType)
void WriteToBitDepth(char*& p, u8 depth, const char* src, const char* dest)
{
float result = 255 / pow(2.0f, (8 - depth));
float result = 255 / pow(2.0, (8 - depth));
WRITE(p, " %s = floor(%s * %ff);\n", dest, src, result);
}
@ -362,7 +362,7 @@ void WriteI4Encoder(char* p, API_TYPE ApiType)
WriteToBitDepth(p, 4, "color0", "color0");
WriteToBitDepth(p, 4, "color1", "color1");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, " ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(p, ApiType);
}
@ -416,7 +416,7 @@ void WriteIA4Encoder(char* p,API_TYPE ApiType)
WriteToBitDepth(p, 4, "color0", "color0");
WriteToBitDepth(p, 4, "color1", "color1");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, " ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(p, ApiType);
}
@ -432,15 +432,15 @@ void WriteRGB565Encoder(char* p,API_TYPE ApiType)
WRITE(p, " float2 texBs = float2(texSample0.b, texSample1.b);\n");
WriteToBitDepth(p, 6, "texGs", "float2 gInt");
WRITE(p, " float2 gUpper = floor(gInt / 8.0f);\n");
WRITE(p, " float2 gLower = gInt - gUpper * 8.0f;\n");
WRITE(p, " float2 gUpper = floor(gInt / 8.0);\n");
WRITE(p, " float2 gLower = gInt - gUpper * 8.0;\n");
WriteToBitDepth(p, 5, "texRs", "ocol0.br");
WRITE(p, " ocol0.br = ocol0.br * 8.0f + gUpper;\n");
WRITE(p, " ocol0.br = ocol0.br * 8.0 + gUpper;\n");
WriteToBitDepth(p, 5, "texBs", "ocol0.ga");
WRITE(p, " ocol0.ga = ocol0.ga + gLower * 32.0f;\n");
WRITE(p, " ocol0.ga = ocol0.ga + gLower * 32.0;\n");
WRITE(p, " ocol0 = ocol0 / 255.0f;\n");
WRITE(p, " ocol0 = ocol0 / 255.0;\n");
WriteEncoderEnd(p, ApiType);
}
@ -459,13 +459,13 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType)
WRITE(p, "if(texSample.a > 0.878f) {\n");
WriteToBitDepth(p, 5, "texSample.g", "color0");
WRITE(p, " gUpper = floor(color0 / 8.0f);\n");
WRITE(p, " gLower = color0 - gUpper * 8.0f;\n");
WRITE(p, " gUpper = floor(color0 / 8.0);\n");
WRITE(p, " gLower = color0 - gUpper * 8.0;\n");
WriteToBitDepth(p, 5, "texSample.r", "ocol0.b");
WRITE(p, " ocol0.b = ocol0.b * 4.0f + gUpper + 128.0f;\n");
WRITE(p, " ocol0.b = ocol0.b * 4.0 + gUpper + 128.0;\n");
WriteToBitDepth(p, 5, "texSample.b", "ocol0.g");
WRITE(p, " ocol0.g = ocol0.g + gLower * 32.0f;\n");
WRITE(p, " ocol0.g = ocol0.g + gLower * 32.0;\n");
WRITE(p, "} else {\n");
@ -473,9 +473,9 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType)
WriteToBitDepth(p, 4, "texSample.b", "ocol0.g");
WriteToBitDepth(p, 3, "texSample.a", "color0");
WRITE(p, "ocol0.b = ocol0.b + color0 * 16.0f;\n");
WRITE(p, "ocol0.b = ocol0.b + color0 * 16.0;\n");
WriteToBitDepth(p, 4, "texSample.g", "color0");
WRITE(p, "ocol0.g = ocol0.g + color0 * 16.0f;\n");
WRITE(p, "ocol0.g = ocol0.g + color0 * 16.0;\n");
WRITE(p, "}\n");
@ -487,13 +487,13 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType)
WRITE(p, "if(texSample.a > 0.878f) {\n");
WriteToBitDepth(p, 5, "texSample.g", "color0");
WRITE(p, " gUpper = floor(color0 / 8.0f);\n");
WRITE(p, " gLower = color0 - gUpper * 8.0f;\n");
WRITE(p, " gUpper = floor(color0 / 8.0);\n");
WRITE(p, " gLower = color0 - gUpper * 8.0;\n");
WriteToBitDepth(p, 5, "texSample.r", "ocol0.r");
WRITE(p, " ocol0.r = ocol0.r * 4.0f + gUpper + 128.0f;\n");
WRITE(p, " ocol0.r = ocol0.r * 4.0 + gUpper + 128.0;\n");
WriteToBitDepth(p, 5, "texSample.b", "ocol0.a");
WRITE(p, " ocol0.a = ocol0.a + gLower * 32.0f;\n");
WRITE(p, " ocol0.a = ocol0.a + gLower * 32.0;\n");
WRITE(p, "} else {\n");
@ -501,13 +501,13 @@ void WriteRGB5A3Encoder(char* p,API_TYPE ApiType)
WriteToBitDepth(p, 4, "texSample.b", "ocol0.a");
WriteToBitDepth(p, 3, "texSample.a", "color0");
WRITE(p, "ocol0.r = ocol0.r + color0 * 16.0f;\n");
WRITE(p, "ocol0.r = ocol0.r + color0 * 16.0;\n");
WriteToBitDepth(p, 4, "texSample.g", "color0");
WRITE(p, "ocol0.a = ocol0.a + color0 * 16.0f;\n");
WRITE(p, "ocol0.a = ocol0.a + color0 * 16.0;\n");
WRITE(p, "}\n");
WRITE(p, " ocol0 = ocol0 / 255.0f;\n");
WRITE(p, " ocol0 = ocol0 / 255.0;\n");
WriteEncoderEnd(p, ApiType);
}
@ -533,7 +533,7 @@ void WriteRGBA4443Encoder(char* p,API_TYPE ApiType)
WriteToBitDepth(p, 4, "texSample.g", "color0.a");
WriteToBitDepth(p, 4, "texSample.b", "color1.a");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, " ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(p, ApiType);
}
@ -541,8 +541,8 @@ void WriteRGBA8Encoder(char* p,API_TYPE ApiType)
{
Write32BitSwizzler(p, GX_TF_RGBA8, ApiType);
WRITE(p, " float cl1 = xb - (halfxb * 2.0f);\n");
WRITE(p, " float cl0 = 1.0f - cl1;\n");
WRITE(p, " float cl1 = xb - (halfxb * 2.0);\n");
WRITE(p, " float cl0 = 1.0 - cl1;\n");
WRITE(p, " float4 texSample;\n");
WRITE(p, " float4 color0;\n");
@ -599,7 +599,7 @@ void WriteC4Encoder(char* p, const char* comp,API_TYPE ApiType)
WriteToBitDepth(p, 4, "color0", "color0");
WriteToBitDepth(p, 4, "color1", "color1");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, " ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(p, ApiType);
}
@ -650,7 +650,7 @@ void WriteCC4Encoder(char* p, const char* comp,API_TYPE ApiType)
WriteToBitDepth(p, 4, "color0", "color0");
WriteToBitDepth(p, 4, "color1", "color1");
WRITE(p, " ocol0 = (color0 * 16.0f + color1) / 255.0f;\n");
WRITE(p, " ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
WriteEncoderEnd(p, ApiType);
}
@ -701,25 +701,25 @@ void WriteZ16Encoder(char* p,API_TYPE ApiType)
WriteSampleColor(p, "b", "depth", ApiType);
WRITE(p, " depth *= 16777215.0f;\n");
WRITE(p, " expanded.r = floor(depth / (256.0f * 256.0f));\n");
WRITE(p, " depth -= expanded.r * 256.0f * 256.0f;\n");
WRITE(p, " expanded.g = floor(depth / 256.0f);\n");
WRITE(p, " depth *= 16777215.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n");
WRITE(p, " expanded.g = floor(depth / 256.0);\n");
WRITE(p, " ocol0.b = expanded.g / 255.0f;\n");
WRITE(p, " ocol0.g = expanded.r / 255.0f;\n");
WRITE(p, " ocol0.b = expanded.g / 255.0;\n");
WRITE(p, " ocol0.g = expanded.r / 255.0;\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "b", "depth", ApiType);
WRITE(p, " depth *= 16777215.0f;\n");
WRITE(p, " expanded.r = floor(depth / (256.0f * 256.0f));\n");
WRITE(p, " depth -= expanded.r * 256.0f * 256.0f;\n");
WRITE(p, " expanded.g = floor(depth / 256.0f);\n");
WRITE(p, " depth *= 16777215.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n");
WRITE(p, " expanded.g = floor(depth / 256.0);\n");
WRITE(p, " ocol0.r = expanded.g / 255.0f;\n");
WRITE(p, " ocol0.a = expanded.r / 255.0f;\n");
WRITE(p, " ocol0.r = expanded.g / 255.0;\n");
WRITE(p, " ocol0.a = expanded.r / 255.0;\n");
WriteEncoderEnd(p, ApiType);
}
@ -735,25 +735,25 @@ void WriteZ16LEncoder(char* p,API_TYPE ApiType)
WriteSampleColor(p, "b", "depth", ApiType);
WRITE(p, " depth *= 16777215.0f;\n");
WRITE(p, " expanded.r = floor(depth / (256.0f * 256.0f));\n");
WRITE(p, " depth -= expanded.r * 256.0f * 256.0f;\n");
WRITE(p, " expanded.g = floor(depth / 256.0f);\n");
WRITE(p, " depth -= expanded.g * 256.0f;\n");
WRITE(p, " depth *= 16777215.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n");
WRITE(p, " expanded.g = floor(depth / 256.0);\n");
WRITE(p, " depth -= expanded.g * 256.0;\n");
WRITE(p, " expanded.b = depth;\n");
WRITE(p, " ocol0.b = expanded.b / 255.0f;\n");
WRITE(p, " ocol0.g = expanded.g / 255.0f;\n");
WRITE(p, " ocol0.b = expanded.b / 255.0;\n");
WRITE(p, " ocol0.g = expanded.g / 255.0;\n");
WriteIncrementSampleX(p, ApiType);
WriteSampleColor(p, "b", "depth", ApiType);
WRITE(p, " depth *= 16777215.0f;\n");
WRITE(p, " expanded.r = floor(depth / (256.0f * 256.0f));\n");
WRITE(p, " depth -= expanded.r * 256.0f * 256.0f;\n");
WRITE(p, " expanded.g = floor(depth / 256.0f);\n");
WRITE(p, " depth -= expanded.g * 256.0f;\n");
WRITE(p, " depth *= 16777215.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
WRITE(p, " depth -= expanded.r * 256.0 * 256.0;\n");
WRITE(p, " expanded.g = floor(depth / 256.0);\n");
WRITE(p, " depth -= expanded.g * 256.0;\n");
WRITE(p, " expanded.b = depth;\n");
WRITE(p, " ocol0.r = expanded.b;\n");
@ -766,7 +766,7 @@ void WriteZ24Encoder(char* p, API_TYPE ApiType)
{
Write32BitSwizzler(p, GX_TF_Z24X8, ApiType);
WRITE(p, " float cl = xb - (halfxb * 2.0f);\n");
WRITE(p, " float cl = xb - (halfxb * 2.0);\n");
WRITE(p, " float depth0;\n");
WRITE(p, " float depth1;\n");
@ -779,27 +779,27 @@ void WriteZ24Encoder(char* p, API_TYPE ApiType)
for (int i = 0; i < 2; i++)
{
WRITE(p, " depth%i *= 16777215.0f;\n", i);
WRITE(p, " depth%i *= 16777215.0;\n", i);
WRITE(p, " expanded%i.r = floor(depth%i / (256.0f * 256.0f));\n", i, i);
WRITE(p, " depth%i -= expanded%i.r * 256.0f * 256.0f;\n", i, i);
WRITE(p, " expanded%i.g = floor(depth%i / 256.0f);\n", i, i);
WRITE(p, " depth%i -= expanded%i.g * 256.0f;\n", i, i);
WRITE(p, " expanded%i.r = floor(depth%i / (256.0 * 256.0));\n", i, i);
WRITE(p, " depth%i -= expanded%i.r * 256.0 * 256.0;\n", i, i);
WRITE(p, " expanded%i.g = floor(depth%i / 256.0);\n", i, i);
WRITE(p, " depth%i -= expanded%i.g * 256.0;\n", i, i);
WRITE(p, " expanded%i.b = depth%i;\n", i, i);
}
WRITE(p, " if(cl > 0.5f) {\n");
WRITE(p, " if(cl > 0.5) {\n");
// upper 16
WRITE(p, " ocol0.b = expanded0.g / 255.0f;\n");
WRITE(p, " ocol0.g = expanded0.b / 255.0f;\n");
WRITE(p, " ocol0.r = expanded1.g / 255.0f;\n");
WRITE(p, " ocol0.a = expanded1.b / 255.0f;\n");
WRITE(p, " ocol0.b = expanded0.g / 255.0;\n");
WRITE(p, " ocol0.g = expanded0.b / 255.0;\n");
WRITE(p, " ocol0.r = expanded1.g / 255.0;\n");
WRITE(p, " ocol0.a = expanded1.b / 255.0;\n");
WRITE(p, " } else {\n");
// lower 8
WRITE(p, " ocol0.b = 1.0f;\n");
WRITE(p, " ocol0.g = expanded0.r / 255.0f;\n");
WRITE(p, " ocol0.r = 1.0f;\n");
WRITE(p, " ocol0.a = expanded1.r / 255.0f;\n");
WRITE(p, " ocol0.b = 1.0;\n");
WRITE(p, " ocol0.g = expanded0.r / 255.0;\n");
WRITE(p, " ocol0.r = 1.0;\n");
WRITE(p, " ocol0.a = expanded1.r / 255.0;\n");
WRITE(p, " }\n");
WriteEncoderEnd(p, ApiType);
@ -878,10 +878,10 @@ const char *GenerateEncodingShader(u32 format,API_TYPE ApiType)
WriteC4Encoder(p, "b", ApiType);
break;
case GX_CTF_Z8M:
WriteZ8Encoder(p, "256.0f", ApiType);
WriteZ8Encoder(p, "256.0", ApiType);
break;
case GX_CTF_Z8L:
WriteZ8Encoder(p, "65536.0f" , ApiType);
WriteZ8Encoder(p, "65536.0" , ApiType);
break;
case GX_CTF_Z16L:
WriteZ16LEncoder(p, ApiType);

View File

@ -204,7 +204,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
}
else if (api_type == API_D3D11)
{
out.Write("int posmtx = blend_indices.x * 255.0f;\n");
out.Write("int posmtx = blend_indices.x * 255.0;\n");
}
else
{
@ -236,7 +236,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
}
else
{
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0f);\n");
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0);\n");
if (components & VB_HAS_NRM0)
out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n");
if (components & VB_HAS_NRM1)
@ -246,7 +246,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
}
if (!(components & VB_HAS_NRM0))
out.Write("float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n");
out.Write("float3 _norm0 = float3(0.0, 0.0, 0.0);\n");
out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
@ -261,7 +261,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
if (components & VB_HAS_COL0)
out.Write("o.colors_0 = color0;\n");
else
out.Write("o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
out.Write("o.colors_0 = float4(1.0, 1.0, 1.0, 1.0);\n");
}
GenerateLightingShader<T>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_");
@ -283,13 +283,13 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
*/
// transform texcoords
out.Write("float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
out.Write("float4 coord = float4(0.0, 0.0, 1.0, 1.0);\n");
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
{
TexMtxInfo& texinfo = xfregs.texMtxInfo[i];
out.Write("{\n");
out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
out.Write("coord = float4(0.0, 0.0, 1.0, 1.0);\n");
uid_data.texMtxInfo[i].sourcerow = xfregs.texMtxInfo[i].sourcerow;
switch (texinfo.sourcerow)
{
@ -301,7 +301,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
if (components & VB_HAS_NRM0)
{
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
out.Write("coord = float4(rawnorm0.xyz, 1.0f);\n");
out.Write("coord = float4(rawnorm0.xyz, 1.0);\n");
}
break;
case XF_SRCCOLORS_INROW:
@ -311,20 +311,20 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
if (components & VB_HAS_NRM1)
{
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
out.Write("coord = float4(rawnorm1.xyz, 1.0f);\n");
out.Write("coord = float4(rawnorm1.xyz, 1.0);\n");
}
break;
case XF_SRCBINORMAL_B_INROW:
if (components & VB_HAS_NRM2)
{
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
out.Write("coord = float4(rawnorm2.xyz, 1.0f);\n");
out.Write("coord = float4(rawnorm2.xyz, 1.0);\n");
}
break;
default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) )
out.Write("coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
out.Write("coord = float4(tex%d.x, tex%d.y, 1.0, 1.0);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
break;
}
@ -340,7 +340,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
uid_data.texMtxInfo[i].embosslightshift = xfregs.texMtxInfo[i].embosslightshift;
uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift;
out.Write("ldir = normalize(" LIGHT_POS".xyz - pos.xyz);\n", LIGHT_POS_PARAMS(I_LIGHTS, texinfo.embosslightshift));
out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift);
out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0);\n", i, texinfo.embosssourceshift);
}
else
{
@ -399,7 +399,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
// q of output is unknown
// multiply by postmatrix
out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0f);\n", i, i, i);
out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0);\n", i, i, i);
}
else
{
@ -462,14 +462,14 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
{
// this results in a scale from -1..0 to -1..1 after perspective
// divide
out.Write("o.pos.z = o.pos.w + o.pos.z * 2.0f;\n");
out.Write("o.pos.z = o.pos.w + o.pos.z * 2.0;\n");
// Sonic Unleashed puts its final rendering at the near or
// far plane of the viewing frustrum(actually box, they use
// orthogonal projection for that), and we end up putting it
// just beyond, and the rendering gets clipped away. (The
// primitive gets dropped)
out.Write("o.pos.z = o.pos.z * 1048575.0f/1048576.0f;\n");
out.Write("o.pos.z = o.pos.z * 1048575.0/1048576.0;\n");
// the next steps of the OGL pipeline are:
// (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology
@ -503,7 +503,7 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
if(i < xfregs.numTexGen.numTexGens)
out.Write(" uv%d_2.xyz = o.tex%d;\n", i, i);
else
out.Write(" uv%d_2.xyz = float3(0.0f, 0.0f, 0.0f);\n", i);
out.Write(" uv%d_2.xyz = float3(0.0, 0.0, 0.0);\n", i);
}
out.Write(" clipPos_2 = o.clipPos;\n");
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)

View File

@ -200,7 +200,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
char ps_rgba6_to_rgb8[] =
"uniform sampler2DRect samp9;\n"
"out vec4 ocol0;\n"
"COLOROUT(ocol0)\n"
"void main()\n"
"{\n"
" ivec4 src6 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 63.f));\n"
@ -214,7 +214,7 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
char ps_rgb8_to_rgba6[] =
"uniform sampler2DRect samp9;\n"
"out vec4 ocol0;\n"
"COLOROUT(ocol0)\n"
"void main()\n"
"{\n"
" ivec4 src8 = ivec4(round(texture2DRect(samp9, gl_FragCoord.xy) * 255.f));\n"
@ -226,9 +226,13 @@ FramebufferManager::FramebufferManager(int targetWidth, int targetHeight, int ms
" ocol0 = float4(dst6) / 63.f;\n"
"}";
ProgramShaderCache::CompileShader(m_pixel_format_shaders[0], vs, ps_rgb8_to_rgba6);
ProgramShaderCache::CompileShader(m_pixel_format_shaders[1], vs, ps_rgba6_to_rgb8);
if(g_ogl_config.eSupportedGLSLVersion != GLSLES2)
{
// HACK: This shaders aren't glsles2 compatible as glsles2 don't support bit operations
// it could be workaround by floor + frac + tons off additions, but I think it isn't worth
ProgramShaderCache::CompileShader(m_pixel_format_shaders[0], vs, ps_rgb8_to_rgba6);
ProgramShaderCache::CompileShader(m_pixel_format_shaders[1], vs, ps_rgba6_to_rgb8);
}
}
FramebufferManager::~FramebufferManager()
@ -359,6 +363,19 @@ GLuint FramebufferManager::ResolveAndGetDepthTarget(const EFBRectangle &source_r
void FramebufferManager::ReinterpretPixelData(unsigned int convtype)
{
if(g_ogl_config.eSupportedGLSLVersion == GLSLES2) {
// This feature isn't supported by glsles2
// TODO: move this to InitBackendInfo
// We have to disable both the active and the stored config. Else we
// would either
// show this line per format change in one frame or
// once per frame.
OSD::AddMessage("Format Change Emulation isn't supported by your GPU.", 10000);
g_ActiveConfig.bEFBEmulateFormatChanges = false;
g_Config.bEFBEmulateFormatChanges = false;
return;
}
g_renderer->ResetAPIState();
GLuint src_texture = 0;

View File

@ -1,9 +1,12 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "DriverDetails.h"
#include "GLFunctions.h"
#include "Log.h"
#include <dlfcn.h>
#ifdef USE_GLES3
PFNGLMAPBUFFERRANGEPROC glMapBufferRange;
PFNGLUNMAPBUFFERPROC glUnmapBuffer;
@ -67,16 +70,40 @@ namespace GLFunc
void Init()
{
self = dlopen(NULL, RTLD_LAZY);
LoadFunction("glBeginQuery", (void**)&glBeginQuery);
LoadFunction("glEndQuery", (void**)&glEndQuery);
LoadFunction("glGetQueryObjectuiv", (void**)&glGetQueryObjectuiv);
LoadFunction("glDeleteQueries", (void**)&glDeleteQueries);
LoadFunction("glGenQueries", (void**)&glGenQueries);
LoadFunction("glUnmapBuffer", (void**)&glUnmapBuffer);
if (DriverDetails::HasBug(DriverDetails::BUG_ISTEGRA))
{
LoadFunction("glUnmapBuffer", (void**)&glUnmapBuffer);
LoadFunction("glBeginQueryEXT", (void**)&glBeginQuery);
LoadFunction("glEndQueryEXT", (void**)&glEndQuery);
LoadFunction("glGetQueryObjectuivEXT", (void**)&glGetQueryObjectuiv);
LoadFunction("glDeleteQueriesEXT", (void**)&glDeleteQueries);
LoadFunction("glGenQueriesEXT", (void**)&glGenQueries);
LoadFunction("glMapBufferRangeNV", (void**)&glMapBufferRange);
LoadFunction("glBindBufferRangeNV", (void**)&glBindBufferRange);
LoadFunction("glBlitFramebufferNV", (void**)&glBlitFramebuffer);
LoadFunction("glGenVertexArraysOES", (void**)&glGenVertexArrays);
LoadFunction("glDeleteVertexArraysOES", (void**)&glDeleteVertexArrays);
LoadFunction("glBindVertexArrayOES", (void**)&glBindVertexArray);
LoadFunction("glRenderbufferStorageMultisampleNV", (void**)&glRenderbufferStorageMultisample);
LoadFunction("glGetUniformBlockIndexNV", (void**)&glGetUniformBlockIndex);
LoadFunction("glUniformBlockBindingNV", (void**)&glUniformBlockBinding);
}
else
{
LoadFunction("glBeginQuery", (void**)&glBeginQuery);
LoadFunction("glEndQuery", (void**)&glEndQuery);
LoadFunction("glGetQueryObjectuiv", (void**)&glGetQueryObjectuiv);
LoadFunction("glDeleteQueries", (void**)&glDeleteQueries);
LoadFunction("glGenQueries", (void**)&glGenQueries);
LoadFunction("glMapBufferRange", (void**)&glMapBufferRange);
LoadFunction("glBindBufferRange", (void**)&glBindBufferRange);
LoadFunction("glBlitFramebuffer", (void**)&glBlitFramebuffer);
LoadFunction("glGenVertexArrays", (void**)&glGenVertexArrays);
@ -86,24 +113,26 @@ namespace GLFunc
LoadFunction("glClientWaitSync", (void**)&glClientWaitSync);
LoadFunction("glDeleteSync", (void**)&glDeleteSync);
LoadFunction("glFenceSync", (void**)&glFenceSync);
LoadFunction("glSamplerParameterf", (void**)&glSamplerParameterf);
LoadFunction("glSamplerParameteri", (void**)&glSamplerParameteri);
LoadFunction("glSamplerParameterfv", (void**)&glSamplerParameterfv);
LoadFunction("glBindSampler", (void**)&glBindSampler);
LoadFunction("glDeleteSamplers", (void**)&glDeleteSamplers);
LoadFunction("glGenSamplers", (void**)&glGenSamplers);
}
LoadFunction("glGetProgramBinary", (void**)&glGetProgramBinary);
LoadFunction("glProgramBinary", (void**)&glProgramBinary);
LoadFunction("glProgramParameteri", (void**)&glProgramParameteri);
LoadFunction("glGetProgramBinary", (void**)&glGetProgramBinary);
LoadFunction("glProgramBinary", (void**)&glProgramBinary);
LoadFunction("glProgramParameteri", (void**)&glProgramParameteri);
LoadFunction("glDrawRangeElements", (void**)&glDrawRangeElements);
LoadFunction("glDrawRangeElements", (void**)&glDrawRangeElements);
LoadFunction("glRenderbufferStorageMultisample", (void**)&glRenderbufferStorageMultisample);
LoadFunction("glRenderbufferStorageMultisample", (void**)&glRenderbufferStorageMultisample);
LoadFunction("glGetUniformBlockIndex", (void**)&glGetUniformBlockIndex);
LoadFunction("glUniformBlockBinding", (void**)&glUniformBlockBinding);
LoadFunction("glGetUniformBlockIndex", (void**)&glGetUniformBlockIndex);
LoadFunction("glUniformBlockBinding", (void**)&glUniformBlockBinding);
}
dlclose(self);
}
}

View File

@ -518,16 +518,18 @@ void ProgramShaderCache::CreateHeader ( void )
{
GLSL_VERSION v = g_ogl_config.eSupportedGLSLVersion;
snprintf(s_glsl_header, sizeof(s_glsl_header),
"#version %s\n"
"%s\n" // default precision
"%s\n"
"%s\n" // ubo
"%s\n" // early-z
// Precision defines for GLSLES2/3
"%s\n"
"\n"// A few required defines and ones that will make our lives a lot easier
"#define ATTRIN in\n"
"#define ATTROUT out\n"
"#define VARYIN %s in\n"
"#define VARYOUT %s out\n"
"#define ATTRIN %s\n"
"#define ATTROUT %s\n"
"#define VARYIN %s\n"
"#define VARYOUT %s\n"
// Silly differences
"#define float2 vec2\n"
@ -542,18 +544,41 @@ void ProgramShaderCache::CreateHeader ( void )
"%s\n"
"%s\n"
"%s\n"
// GLSLES2 hacks
"%s\n"
"%s\n"
"%s\n"
"%s\n"
"%s\n"
"%s\n"
"%s\n"
"#define COLOROUT(name) %s\n"
, v==GLSLES3 ? "300 es" : v==GLSL_130 ? "130" : v==GLSL_140 ? "140" : "150"
, v==GLSLES3 ? "precision highp float;" : ""
, v==GLSLES2 ? "" : v==GLSLES3 ? "#version 300 es" : v==GLSL_130 ? "#version 130" : v==GLSL_140 ? "#version 140" : "#version 150"
, g_ActiveConfig.backend_info.bSupportsGLSLUBO && v<GLSL_140 ? "#extension GL_ARB_uniform_buffer_object : enable" : ""
, g_ActiveConfig.backend_info.bSupportsEarlyZ ? "#extension GL_ARB_shader_image_load_store : enable" : ""
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "" : "centroid"
, DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "" : "centroid"
, (v==GLSLES3 || v==GLSLES2) ? "precision highp float;" : ""
, v==GLSLES3 ? "" : v<=GLSL_130 ? "#extension GL_ARB_texture_rectangle : enable" : "#define texture2DRect texture"
, v==GLSLES2 ? "attribute" : "in"
, v==GLSLES2 ? "attribute" : "out"
, v==GLSLES2 ? "varying" : DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "in" : "centroid in"
, v==GLSLES2 ? "varying" : DriverDetails::HasBug(DriverDetails::BUG_BROKENCENTROID) ? "out" : "centroid out"
, v==GLSLES2 ? "#define texture2DRect texture2D" : v==GLSLES3 ? "" : v<=GLSL_130 ? "#extension GL_ARB_texture_rectangle : enable" : "#define texture2DRect texture"
, v==GLSLES3 ? "#define texture2DRect(samp, uv) texelFetch(samp, ivec2(floor(uv)), 0)" : ""
, v==GLSLES3 ? "#define sampler2DRect sampler2D" : ""
, (v==GLSLES3 || v==GLSLES2) ? "#define sampler2DRect sampler2D" : ""
, v==GLSLES2 ? "#define texture texture2D" : ""
, v==GLSLES2 ? "#define round(x) floor((x)+0.5)" : ""
, v==GLSLES2 ? "#define out " : ""
, v==GLSLES2 ? "#define ocol0 gl_FragColor" : ""
, v==GLSLES2 ? "#define ocol1 gl_FragColor" : ""
, v==GLSLES2 ? "#extension GL_NV_uniform_buffer_object : enable" : ""
, v==GLSLES2 ? "#extension GL_NV_fragdepth : enable" : ""
, v==GLSLES2 ? "" : "out vec4 name;"
);
}

View File

@ -127,7 +127,7 @@ static const char *s_fragmentShaderSrc =
"uniform sampler2D samp8;\n"
"uniform vec4 color;\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"COLOROUT(ocol0)\n"
"void main(void) {\n"
" ocol0 = texture(samp8,uv0) * color;\n"
"}\n";

View File

@ -371,6 +371,7 @@ Renderer::Renderer()
// Set default GLES3 options
GLFunc::Init();
WARN_LOG(VIDEO, "Running the OpenGL ES 3 backend!");
g_Config.backend_info.bSupportsDualSourceBlend = false;
g_Config.backend_info.bSupportsGLSLUBO = !DriverDetails::HasBug(DriverDetails::BUG_ANNIHILATEDUBOS);
g_Config.backend_info.bSupportsPrimitiveRestart = true;
@ -387,7 +388,10 @@ Renderer::Renderer()
g_ogl_config.bSupportCoverageMSAA = false; // XXX: GLES3 spec has MSAA
g_ogl_config.bSupportSampleShading = false;
g_ogl_config.bSupportOGL31 = false;
g_ogl_config.eSupportedGLSLVersion = GLSLES3;
if (DriverDetails::HasBug(DriverDetails::BUG_ISTEGRA))
g_ogl_config.eSupportedGLSLVersion = GLSLES2;
else
g_ogl_config.eSupportedGLSLVersion = GLSLES3;
#else
#ifdef __APPLE__
glewExperimental = 1;
@ -659,11 +663,11 @@ void Renderer::Init()
"ATTRIN vec3 color0;\n"
"VARYOUT vec4 c;\n"
"void main(void) {\n"
" gl_Position = vec4(rawpos, 0.0f, 1.0f);\n"
" c = vec4(color0, 1.0f);\n"
" gl_Position = vec4(rawpos, 0.0, 1.0);\n"
" c = vec4(color0, 1.0);\n"
"}\n",
"VARYIN vec4 c;\n"
"out vec4 ocol0;\n"
"COLOROUT(ocol0)\n"
"void main(void) {\n"
" ocol0 = c;\n"
"}\n");

View File

@ -13,6 +13,7 @@ enum GLSL_VERSION {
GLSL_130,
GLSL_140,
GLSL_150, // and above
GLSLES2,
GLSLES3
};

View File

@ -2,6 +2,7 @@
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "DriverDetails.h"
#include "SamplerCache.h"
namespace OGL
@ -15,11 +16,14 @@ SamplerCache::SamplerCache()
SamplerCache::~SamplerCache()
{
Clear();
if (!DriverDetails::HasBug(DriverDetails::BUG_ISTEGRA))
Clear();
}
void SamplerCache::SetSamplerState(int stage, const TexMode0& tm0, const TexMode1& tm1)
{
if (DriverDetails::HasBug(DriverDetails::BUG_ISTEGRA))
return;
// TODO: can this go somewhere else?
if (m_last_max_anisotropy != g_ActiveConfig.iMaxAnisotropy)
{

View File

@ -33,7 +33,7 @@ StreamBuffer::StreamBuffer(u32 type, size_t size, StreamType uploadType)
g_Config.bHackedBufferUpload = false;
}
if(!g_ogl_config.bSupportsGLBaseVertex && (m_uploadtype & BUFFERDATA))
if(!g_ogl_config.bSupportsGLBaseVertex && (m_uploadtype & BUFFERDATA) && !DriverDetails::HasBug(DriverDetails::BUG_ISTEGRA))
m_uploadtype = BUFFERDATA;
else if(!g_ogl_config.bSupportsGLBaseVertex && (m_uploadtype & BUFFERSUBDATA))
m_uploadtype = BUFFERSUBDATA;
@ -48,7 +48,7 @@ StreamBuffer::StreamBuffer(u32 type, size_t size, StreamType uploadType)
else
m_uploadtype = MAP_AND_ORPHAN;
}
Init();
}

View File

@ -411,7 +411,7 @@ TextureCache::TextureCache()
"uniform sampler2DRect samp9;\n"
"uniform vec4 colmat[7];\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"COLOROUT(ocol0)\n"
"\n"
"void main(){\n"
" vec4 texcol = texture2DRect(samp9, uv0);\n"
@ -423,12 +423,12 @@ TextureCache::TextureCache()
"uniform sampler2DRect samp9;\n"
"uniform vec4 colmat[5];\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"COLOROUT(ocol0)\n"
"\n"
"void main(){\n"
" vec4 texcol = texture2DRect(samp9, uv0);\n"
" vec4 EncodedDepth = fract((texcol.r * (16777215.0f/16777216.0f)) * vec4(1.0f,256.0f,256.0f*256.0f,1.0f));\n"
" texcol = round(EncodedDepth * (16777216.0f/16777215.0f) * vec4(255.0f,255.0f,255.0f,15.0f)) / vec4(255.0f,255.0f,255.0f,15.0f);\n"
" vec4 EncodedDepth = fract((texcol.r * (16777215.0/16777216.0)) * vec4(1.0,256.0,256.0*256.0,1.0));\n"
" texcol = round(EncodedDepth * (16777216.0/16777215.0) * vec4(255.0,255.0,255.0,15.0)) / vec4(255.0,255.0,255.0,15.0);\n"
" ocol0 = texcol * mat4(colmat[0], colmat[1], colmat[2], colmat[3]) + colmat[4];"
"}\n";

View File

@ -59,7 +59,7 @@ static const char *VProgram =
"void main()\n"
"{\n"
" uv0 = tex0;\n"
" gl_Position = vec4(rawpos, 0.0f, 1.0f);\n"
" gl_Position = vec4(rawpos, 0.0, 1.0);\n"
"}\n";
void CreatePrograms()
@ -68,7 +68,7 @@ void CreatePrograms()
const char *FProgramRgbToYuyv =
"uniform sampler2DRect samp9;\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"COLOROUT(ocol0)\n"
"void main()\n"
"{\n"
" vec3 c0 = texture2DRect(samp9, uv0).rgb;\n"
@ -77,26 +77,26 @@ void CreatePrograms()
" vec3 y_const = vec3(0.257,0.504,0.098);\n"
" vec3 u_const = vec3(-0.148,-0.291,0.439);\n"
" vec3 v_const = vec3(0.439,-0.368,-0.071);\n"
" vec4 const3 = vec4(0.0625,0.5,0.0625f,0.5);\n"
" vec4 const3 = vec4(0.0625,0.5,0.0625,0.5);\n"
" ocol0 = vec4(dot(c1,y_const),dot(c01,u_const),dot(c0,y_const),dot(c01, v_const)) + const3;\n"
"}\n";
const char *FProgramYuyvToRgb =
"uniform sampler2DRect samp9;\n"
"VARYIN vec2 uv0;\n"
"out vec4 ocol0;\n"
"COLOROUT(ocol0)\n"
"void main()\n"
"{\n"
" vec4 c0 = texture2DRect(samp9, uv0).rgba;\n"
" float f = step(0.5, fract(uv0.x));\n"
" float y = mix(c0.b, c0.r, f);\n"
" float yComp = 1.164f * (y - 0.0625f);\n"
" float uComp = c0.g - 0.5f;\n"
" float vComp = c0.a - 0.5f;\n"
" ocol0 = vec4(yComp + (1.596f * vComp),\n"
" yComp - (0.813f * vComp) - (0.391f * uComp),\n"
" yComp + (2.018f * uComp),\n"
" 1.0f);\n"
" float yComp = 1.164 * (y - 0.0625);\n"
" float uComp = c0.g - 0.5;\n"
" float vComp = c0.a - 0.5;\n"
" ocol0 = vec4(yComp + (1.596 * vComp),\n"
" yComp - (0.813 * vComp) - (0.391 * uComp),\n"
" yComp + (2.018 * uComp),\n"
" 1.0);\n"
"}\n";
ProgramShaderCache::CompileShader(s_rgbToYuyvProgram, VProgram, FProgramRgbToYuyv);

View File

@ -135,6 +135,24 @@ void VertexManager::Draw(u32 stride)
glDrawRangeElementsBaseVertex(GL_POINTS, 0, max_index, point_index_size, GL_UNSIGNED_SHORT, (u8*)NULL+s_offset[2], s_baseVertex);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
}
else if (DriverDetails::HasBug(DriverDetails::BUG_ISTEGRA))
{
if (triangle_index_size > 0)
{
glDrawElements(triangle_mode, triangle_index_size, GL_UNSIGNED_SHORT, (u8*)NULL+s_offset[0]);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (line_index_size > 0)
{
glDrawElements(GL_LINES, line_index_size, GL_UNSIGNED_SHORT, (u8*)NULL+s_offset[1]);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (point_index_size > 0)
{
glDrawElements(GL_POINTS, point_index_size, GL_UNSIGNED_SHORT, (u8*)NULL+s_offset[2]);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
} else {
if (triangle_index_size > 0)
{
@ -151,7 +169,7 @@ void VertexManager::Draw(u32 stride)
glDrawRangeElements(GL_POINTS, 0, max_index, point_index_size, GL_UNSIGNED_SHORT, (u8*)NULL+s_offset[2]);
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
}
}
}
void VertexManager::vFlush()