diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 0c80e04c93..49899f642c 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -544,19 +544,21 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t) switch (indirect.mid & 12) { case 0: - shift = 3 + (17 - scale); - indtevtrans[0] = indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2]; - indtevtrans[1] = indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2]; + // matrix values are S0.10, output format is S17.7, so divide by 8 + shift = (17 - scale); + indtevtrans[0] = (indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] + indmtx.col2.me * indcoord[2]) >> 3; + indtevtrans[1] = (indmtx.col0.mb * indcoord[0] + indmtx.col1.md * indcoord[1] + indmtx.col2.mf * indcoord[2]) >> 3; break; case 4: // s matrix - shift = 8 + (17 - scale); - indtevtrans[0] = s * indcoord[0]; - indtevtrans[1] = t * indcoord[0]; + // s is S17.7, matrix elements are divided by 256, output is S17.7, so divide by 256. - TODO: Maybe, since s is actually stored as S24, we should divide by 256*64? + shift = (17 - scale); + indtevtrans[0] = s * indcoord[0] / 256; + indtevtrans[1] = t * indcoord[0] / 256; break; case 8: // t matrix - shift = 8 + (17 - scale); - indtevtrans[0] = s * indcoord[1]; - indtevtrans[1] = t * indcoord[1]; + shift = (17 - scale); + indtevtrans[0] = s * indcoord[1] / 256; + indtevtrans[1] = t * indcoord[1] / 256; break; default: return; diff --git a/Source/Core/VideoCommon/ConstantManager.h b/Source/Core/VideoCommon/ConstantManager.h index d4c17860f5..0fb65bf917 100644 --- a/Source/Core/VideoCommon/ConstantManager.h +++ b/Source/Core/VideoCommon/ConstantManager.h @@ -17,7 +17,7 @@ struct PixelShaderConstants float4 texdims[8]; float4 zbias[2]; float4 indtexscale[2]; - float4 indtexmtx[6]; + int4 indtexmtx[6]; float4 fog[3]; // For pixel lighting diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index b59a1f99f9..fc899c5cf6 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -289,7 +289,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T DeclareUniform(out, ApiType, C_TEXDIMS, "float4", I_TEXDIMS"[8]"); DeclareUniform(out, ApiType, C_ZBIAS, "float4", I_ZBIAS"[2]"); DeclareUniform(out, ApiType, C_INDTEXSCALE, "float4", I_INDTEXSCALE"[2]"); - DeclareUniform(out, ApiType, C_INDTEXMTX, "float4", I_INDTEXMTX"[6]"); + DeclareUniform(out, ApiType, C_INDTEXMTX, "int4", I_INDTEXMTX"[6]"); DeclareUniform(out, ApiType, C_FOG, "float4", I_FOG"[3]"); // For pixel lighting - TODO: Should only be defined when per pixel lighting is enabled! @@ -704,22 +704,30 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP int mtxidx = 2*(bpmem.tevind[n].mid-1); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(round(dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d)), dot(" I_INDTEXMTX"[%d].xyz, float3(iindtevcrd%d))));\n", - n, mtxidx, n, mtxidx+1, n); + out.Write("int2 indtevtrans%d = int2(idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d), idot(" I_INDTEXMTX"[%d].xyz, iindtevcrd%d)) >> 3;\n", n, mtxidx, n, mtxidx+1, n); + + out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord) { // s matrix _assert_(bpmem.tevind[n].mid >= 5); int mtxidx = 2*(bpmem.tevind[n].mid-5); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(round(" I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.xx)));\n", n, mtxidx, texcoord, n); + out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy*255.0)) * iindtevcrd%d.xx) >> 8;\n", n, texcoord, n); + + out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord) { // t matrix _assert_(bpmem.tevind[n].mid >= 9); int mtxidx = 2*(bpmem.tevind[n].mid-9); out.SetConstantsUsed(C_INDTEXMTX+mtxidx, C_INDTEXMTX+mtxidx); - out.Write("int2 indtevtrans%d = int2(round(" I_INDTEXMTX"[%d].ww * uv%d.xy * float3(iindtevcrd%d.yy)));\n", n, mtxidx, texcoord, n); + out.Write("int2 indtevtrans%d = int2(int2(round(uv%d.xy*255.0)) * iindtevcrd%d.yy) >> 8;\n", n, texcoord, n); + + out.Write("if (" I_INDTEXMTX"[%d].w >= 0) indtevtrans%d = indtevtrans%d >> " I_INDTEXMTX"[%d].w;\n", mtxidx, n, n, mtxidx); + out.Write("else indtevtrans%d = indtevtrans%d << (-" I_INDTEXMTX"[%d].w);\n", n, n, mtxidx); } else { diff --git a/Source/Core/VideoCommon/PixelShaderManager.cpp b/Source/Core/VideoCommon/PixelShaderManager.cpp index 29e13dd717..30d6013efc 100644 --- a/Source/Core/VideoCommon/PixelShaderManager.cpp +++ b/Source/Core/VideoCommon/PixelShaderManager.cpp @@ -210,26 +210,25 @@ void PixelShaderManager::SetIndTexScaleChanged(bool high) void PixelShaderManager::SetIndMatrixChanged(int matrixidx) { int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) | - ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) | - ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4); - float fscale = powf(2.0f, (float)(scale - 17)) / 8.0f; + ((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) | + ((u32)bpmem.indmtx[matrixidx].col2.s2 << 4); // xyz - static matrix // w - dynamic matrix scale / 128 - constants.indtexmtx[2*matrixidx][0] = bpmem.indmtx[matrixidx].col0.ma * fscale; - constants.indtexmtx[2*matrixidx][1] = bpmem.indmtx[matrixidx].col1.mc * fscale; - constants.indtexmtx[2*matrixidx][2] = bpmem.indmtx[matrixidx].col2.me * fscale; - constants.indtexmtx[2*matrixidx][3] = fscale / 128.0f; - constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb * fscale; - constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md * fscale; - constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf * fscale; - constants.indtexmtx[2*matrixidx+1][3] = fscale / 128.0f; + constants.indtexmtx[2*matrixidx ][0] = bpmem.indmtx[matrixidx].col0.ma; + constants.indtexmtx[2*matrixidx ][1] = bpmem.indmtx[matrixidx].col1.mc; + constants.indtexmtx[2*matrixidx ][2] = bpmem.indmtx[matrixidx].col2.me; + constants.indtexmtx[2*matrixidx ][3] = 17 - scale; + constants.indtexmtx[2*matrixidx+1][0] = bpmem.indmtx[matrixidx].col0.mb; + constants.indtexmtx[2*matrixidx+1][1] = bpmem.indmtx[matrixidx].col1.md; + constants.indtexmtx[2*matrixidx+1][2] = bpmem.indmtx[matrixidx].col2.mf; + constants.indtexmtx[2*matrixidx+1][3] = 17 - scale; dirty = true; - PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n", - matrixidx, fscale, - bpmem.indmtx[matrixidx].col0.ma * fscale, bpmem.indmtx[matrixidx].col1.mc * fscale, bpmem.indmtx[matrixidx].col2.me * fscale, - bpmem.indmtx[matrixidx].col0.mb * fscale, bpmem.indmtx[matrixidx].col1.md * fscale, bpmem.indmtx[matrixidx].col2.mf * fscale); + PRIM_LOG("indmtx%d: scale=%d, mat=(%d %d %d; %d %d %d)\n", + matrixidx, scale, + bpmem.indmtx[matrixidx].col0.ma, bpmem.indmtx[matrixidx].col1.mc, bpmem.indmtx[matrixidx].col2.me, + bpmem.indmtx[matrixidx].col0.mb, bpmem.indmtx[matrixidx].col1.md, bpmem.indmtx[matrixidx].col2.mf); }