PixelShaderGen: Use integer math for indirect tev stage texcoord calculation.
This commit is contained in:
parent
e7a42d884f
commit
cff952c397
|
@ -478,7 +478,7 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
|
|||
case ITBA_OFF:
|
||||
AlphaBump = 0;
|
||||
break;
|
||||
case ITBA_S:
|
||||
case ITBA_S:
|
||||
AlphaBump = indmap[TextureSampler::ALP_SMP];
|
||||
break;
|
||||
case ITBA_T:
|
||||
|
|
|
@ -193,17 +193,8 @@ static const char *tevRasTable[] =
|
|||
"int4(0, 0, 0, 0)", // zero
|
||||
};
|
||||
|
||||
//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
|
||||
|
||||
static const char *tevCOutputTable[] = { "iprev.rgb", "ic0.rgb", "ic1.rgb", "ic2.rgb", "icprev.rgb", "icc0.rgb", "icc1.rgb", "icc2.rgb", };
|
||||
static const char *tevAOutputTable[] = { "iprev.a", "ic0.a", "ic1.a", "ic2.a", "icprev.a", "icc0.a", "icc1.a", "icc2.a" };
|
||||
static const char *tevIndAlphaSel[] = {"", "x", "y", "z"};
|
||||
static const char *tevIndAlphaMask[] = {"0xF8", "0xE0", "0xF0", "0xF8"};
|
||||
static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
|
||||
static const char *tevIndBiasAdd[] = {"-128", "1", "1", "1" }; // indexed by fmt
|
||||
static const char *tevIndWrapStart[] = {"0.0", "256.0", "128.0", "64.0", "32.0", "16.0", "0.001" };
|
||||
static const char *tevIndFmtScale[] = {"255.0", "31.0", "15.0", "7.0" };
|
||||
static const char *tevIndFmtMask[] = {"0xFF", "0x1F", "0x0F", "0x07" };
|
||||
|
||||
static char text[16384];
|
||||
|
||||
|
@ -386,8 +377,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|||
" int4 irastemp = int4(0, 0, 0, 0), itextemp = int4(0, 0, 0, 0), ikonsttemp = int4(0, 0, 0, 0);\n"
|
||||
" int3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n"
|
||||
" int alphabump=0;\n"
|
||||
" float3 tevcoord=float3(0.0, 0.0, 0.0);\n"
|
||||
" float2 wrappedcoord=float2(0.0,0.0), tempcoord=float2(0.0,0.0);\n"
|
||||
" int3 tevcoord=int3(0, 0, 0);\n"
|
||||
" int2 wrappedcoord=int2(0,0); float2 tempcoord=float2(0.0,0.0);\n"
|
||||
" int4 icc0=int4(0, 0, 0, 0), icc1=int4(0, 0, 0, 0);\n"
|
||||
" int4 icc2=int4(0, 0, 0, 0), icprev=int4(0, 0, 0, 0);\n"
|
||||
" int4 icrastemp = int4(0, 0, 0, 0), ickonsttemp = int4(0, 0, 0, 0);\n\n");
|
||||
|
@ -678,15 +669,25 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
|
|||
// perform the indirect op on the incoming regular coordinates using iindtex%d as the offset coords
|
||||
if (bpmem.tevind[n].bs != ITBA_OFF)
|
||||
{
|
||||
const char *tevIndAlphaSel[] = {"", "x", "y", "z"};
|
||||
const char *tevIndAlphaMask[] = {"0xF8", "0xE0", "0xF0", "0xF8"};
|
||||
out.Write("alphabump = iindtex%d.%s & %s;\n",
|
||||
bpmem.tevind[n].bt,
|
||||
tevIndAlphaSel[bpmem.tevind[n].bs],
|
||||
tevIndAlphaMask[bpmem.tevind[n].fmt]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: Should we reset alphabump to 0 here?
|
||||
}
|
||||
|
||||
// format
|
||||
const char *tevIndFmtMask[] = {"0xFF", "0x1F", "0x0F", "0x07" };
|
||||
out.Write("int3 iindtevcrd%d = iindtex%d & %s;\n", n, bpmem.tevind[n].bt, tevIndFmtMask[bpmem.tevind[n].fmt]);
|
||||
|
||||
// bias - TODO: Check if this needs to be this complicated..
|
||||
const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
|
||||
const char *tevIndBiasAdd[] = {"-128", "1", "1", "1" }; // indexed by fmt
|
||||
if (bpmem.tevind[n].bias == ITB_S || bpmem.tevind[n].bias == ITB_T || bpmem.tevind[n].bias == ITB_U)
|
||||
out.Write("iindtevcrd%d.%s += int(%s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
|
||||
else if (bpmem.tevind[n].bias == ITB_ST || bpmem.tevind[n].bias == ITB_SU || bpmem.tevind[n].bias == ITB_TU)
|
||||
|
@ -694,14 +695,16 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
|
|||
else if (bpmem.tevind[n].bias == ITB_STU)
|
||||
out.Write("iindtevcrd%d.%s += int3(%s, %s, %s);\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt], tevIndBiasAdd[bpmem.tevind[n].fmt]);
|
||||
|
||||
// multiply by offset matrix and scale
|
||||
// multiply by offset matrix and scale - calculations are likely to overflow badly,
|
||||
// yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result
|
||||
if (bpmem.tevind[n].mid != 0)
|
||||
{
|
||||
if (bpmem.tevind[n].mid <= 3)
|
||||
{
|
||||
int mtxidx = 2*(bpmem.tevind[n].mid-1);
|
||||
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
|
||||
out.Write("float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)), dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)));\n",
|
||||
|
||||
out.Write("int2 indtevtrans%d = int2(round(dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)), dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d))));\n",
|
||||
n, mtxidx, n, mtxidx+1, n);
|
||||
}
|
||||
else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
|
||||
|
@ -709,49 +712,53 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
|
|||
_assert_(bpmem.tevind[n].mid >= 5);
|
||||
int mtxidx = 2*(bpmem.tevind[n].mid-5);
|
||||
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
|
||||
out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.xx);\n", n, mtxidx, texcoord, n);
|
||||
out.Write("int2 indtevtrans%d = int2(round(" I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.xx)));\n", n, mtxidx, texcoord, n);
|
||||
}
|
||||
else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
|
||||
{ // t matrix
|
||||
_assert_(bpmem.tevind[n].mid >= 9);
|
||||
int mtxidx = 2*(bpmem.tevind[n].mid-9);
|
||||
out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx);
|
||||
out.Write("float2 indtevtrans%d = " I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.yy);\n", n, mtxidx, texcoord, n);
|
||||
out.Write("int2 indtevtrans%d = int2(round(" I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.yy)));\n", n, mtxidx, texcoord, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("float2 indtevtrans%d = float2(0.0, 0.0);\n", n);
|
||||
out.Write("int2 indtevtrans%d = int2(0, 0);\n", n);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("float2 indtevtrans%d = float2(0.0, 0.0);\n", n);
|
||||
out.Write("int2 indtevtrans%d = int2(0, 0);\n", n);
|
||||
}
|
||||
|
||||
// ---------
|
||||
// Wrapping
|
||||
// ---------
|
||||
const char *tevIndWrapStart[] = {"0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1" };
|
||||
|
||||
// wrap S
|
||||
if (bpmem.tevind[n].sw == ITW_OFF)
|
||||
out.Write("wrappedcoord.x = uv%d.x;\n", texcoord);
|
||||
out.Write("wrappedcoord.x = int(round(uv%d.x*256.0));\n", texcoord);
|
||||
else if (bpmem.tevind[n].sw == ITW_0)
|
||||
out.Write("wrappedcoord.x = 0.0;\n");
|
||||
out.Write("wrappedcoord.x = 0;\n");
|
||||
else
|
||||
out.Write("wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);
|
||||
out.Write("wrappedcoord.x = int(round(uv%d.x*256.0)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);
|
||||
|
||||
// wrap T
|
||||
if (bpmem.tevind[n].tw == ITW_OFF)
|
||||
out.Write("wrappedcoord.y = uv%d.y;\n", texcoord);
|
||||
out.Write("wrappedcoord.y = int(round(uv%d.y*256.0));\n", texcoord);
|
||||
else if (bpmem.tevind[n].tw == ITW_0)
|
||||
out.Write("wrappedcoord.y = 0.0;\n");
|
||||
out.Write("wrappedcoord.y = 0;\n");
|
||||
else
|
||||
out.Write("wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);
|
||||
out.Write("wrappedcoord.y = int(round(uv%d.y*256.0)) %% %s;\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);
|
||||
|
||||
if (bpmem.tevind[n].fb_addprev) // add previous tevcoord
|
||||
out.Write("tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n);
|
||||
else
|
||||
out.Write("tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);
|
||||
|
||||
// Emulate s24 overflows
|
||||
out.Write("tevcoord.xy = (tevcoord.xy << 8) >> 8;\n");
|
||||
}
|
||||
|
||||
TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
|
||||
|
@ -782,13 +789,14 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
|
|||
uid_data.stagehash[n].tevorders_enable = bpmem.tevorders[n / 2].getEnable(n & 1);
|
||||
if (bpmem.tevorders[n/2].getEnable(n&1))
|
||||
{
|
||||
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
|
||||
if (!bHasIndStage)
|
||||
{
|
||||
// calc tevcord
|
||||
if (bHasTexCoord)
|
||||
out.Write("tevcoord.xy = uv%d.xy;\n", texcoord);
|
||||
out.Write("tevcoord.xy = int2(round(uv%d.xy*256.0));\n", texcoord);
|
||||
else
|
||||
out.Write("tevcoord.xy = float2(0.0, 0.0);\n");
|
||||
out.Write("tevcoord.xy = int2(0, 0);\n");
|
||||
}
|
||||
|
||||
const int i = bpmem.combiners[n].alphaC.tswap;
|
||||
|
@ -801,11 +809,10 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP
|
|||
uid_data.stagehash[n].tevorders_texmap= bpmem.tevorders[n/2].getTexMap(n&1);
|
||||
|
||||
const char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
|
||||
int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
|
||||
uid_data.SetTevindrefTexmap(i, texmap);
|
||||
|
||||
out.Write("itextemp = ");
|
||||
SampleTexture<T>(out, "tevcoord", texswap, texmap, ApiType);
|
||||
SampleTexture<T>(out, "(float2(tevcoord.xy)/256.0)", texswap, texmap, ApiType);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -212,25 +212,24 @@ void PixelShaderManager::SetIndMatrixChanged(int matrixidx)
|
|||
int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) |
|
||||
((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) |
|
||||
((u32)bpmem.indmtx[matrixidx].col2.s2 << 4);
|
||||
float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;
|
||||
float fscale = powf(2.0f, (float)(scale - 17)) / 8.0f;
|
||||
|
||||
// xyz - static matrix
|
||||
// TODO w - dynamic matrix scale / 256...... somehow / 4 works better
|
||||
// rev 2972 - now using / 256.... verify that this works
|
||||
// w - dynamic matrix scale / 128
|
||||
constants.indtexmtx[2*matrixidx][0] = bpmem.indmtx[matrixidx].col0.ma * fscale;
|
||||
constants.indtexmtx[2*matrixidx][1] = bpmem.indmtx[matrixidx].col1.mc * fscale;
|
||||
constants.indtexmtx[2*matrixidx][2] = bpmem.indmtx[matrixidx].col2.me * fscale;
|
||||
constants.indtexmtx[2*matrixidx][3] = fscale * 4.0f;
|
||||
constants.indtexmtx[2*matrixidx][3] = fscale / 128.0f;
|
||||
constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb * fscale;
|
||||
constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md * fscale;
|
||||
constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf * fscale;
|
||||
constants.indtexmtx[2*matrixidx+1][3] = fscale * 4.0f;
|
||||
constants.indtexmtx[2*matrixidx+1][3] = fscale / 128.0f;
|
||||
dirty = true;
|
||||
|
||||
PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n",
|
||||
matrixidx, 1024.0f*fscale,
|
||||
matrixidx, fscale,
|
||||
bpmem.indmtx[matrixidx].col0.ma * fscale, bpmem.indmtx[matrixidx].col1.mc * fscale, bpmem.indmtx[matrixidx].col2.me * fscale,
|
||||
bpmem.indmtx[matrixidx].col0.mb * fscale, bpmem.indmtx[matrixidx].col1.md * fscale, bpmem.indmtx[matrixidx].col2.mf * fscale);
|
||||
bpmem.indmtx[matrixidx].col0.mb * fscale, bpmem.indmtx[matrixidx].col1.md * fscale, bpmem.indmtx[matrixidx].col2.mf * fscale);
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue