Merge pull request #9651 from Pokechu22/oob-texcoord

Fix out of bounds tex coord behavior; always apply fb_addprev and tex coord wrapping
This commit is contained in:
JMC47 2021-05-09 15:00:40 -04:00 committed by GitHub
commit a66852d37c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 236 additions and 195 deletions

View File

@ -6,6 +6,7 @@
#include <algorithm>
#include <cmath>
#include <cstring>
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
@ -485,20 +486,16 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
// matrix multiply - results might overflow, but we don't care since we only use the lower 24 bits
// of the result.
const int indmtxid = indirect.mid & 3;
if (indmtxid)
if (indirect.matrix_index != IndMtxIndex::Off)
{
const IND_MTX& indmtx = bpmem.indmtx[indmtxid - 1];
const int scale =
((u32)indmtx.col0.s0 << 0) | ((u32)indmtx.col1.s1 << 2) | ((u32)indmtx.col2.s2 << 4);
const IND_MTX& indmtx = bpmem.indmtx[static_cast<u32>(indirect.matrix_index.Value()) - 1];
int shift;
const int shift = 17 - indmtx.GetScale();
switch (indirect.mid & 12)
switch (indirect.matrix_id)
{
case 0:
case IndMtxId::Indirect:
// matrix values are S0.10, output format is S17.7, so divide by 8
shift = (17 - scale);
indtevtrans[0] = (indmtx.col0.ma * indcoord[0] + indmtx.col1.mc * indcoord[1] +
indmtx.col2.me * indcoord[2]) >>
3;
@ -506,25 +503,29 @@ void Tev::Indirect(unsigned int stageNum, s32 s, s32 t)
indmtx.col2.mf * indcoord[2]) >>
3;
break;
case 4: // s matrix
case IndMtxId::S:
// s is S17.7, matrix elements are divided by 256, output is S17.7, so divide by 256. - TODO:
// Maybe, since s is actually stored as S24, we should divide by 256*64?
shift = (17 - scale);
indtevtrans[0] = s * indcoord[0] / 256;
indtevtrans[1] = t * indcoord[0] / 256;
break;
case 8: // t matrix
shift = (17 - scale);
case IndMtxId::T:
indtevtrans[0] = s * indcoord[1] / 256;
indtevtrans[1] = t * indcoord[1] / 256;
break;
default:
PanicAlertFmt("Invalid indirect matrix ID {}", indirect.matrix_id);
return;
}
indtevtrans[0] = shift >= 0 ? indtevtrans[0] >> shift : indtevtrans[0] << -shift;
indtevtrans[1] = shift >= 0 ? indtevtrans[1] >> shift : indtevtrans[1] << -shift;
}
else
{
// If matrix_index is Off (0), matrix_id should be Indirect (0)
ASSERT(indirect.matrix_id == IndMtxId::Indirect);
}
if (indirect.fb_addprev)
{
@ -559,9 +560,16 @@ void Tev::Draw()
const int stageNum2 = stageNum >> 1;
const int stageOdd = stageNum & 1;
const u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum);
u32 texcoordSel = bpmem.tevindref.getTexCoord(stageNum);
const u32 texmap = bpmem.tevindref.getTexMap(stageNum);
// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
// not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
// This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set
// the number of tex gens to 2 (bug 11462).
if (texcoordSel >= bpmem.genMode.numtexgens)
texcoordSel = 0;
const TEXSCALE& texscale = bpmem.texscale[stageNum2];
const s32 scaleS = stageOdd ? texscale.ss1 : texscale.ss0;
const s32 scaleT = stageOdd ? texscale.ts1 : texscale.ts0;
@ -592,8 +600,13 @@ void Tev::Draw()
const TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stageNum].colorC;
const TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stageNum].alphaC;
const int texcoordSel = order.getTexCoord(stageOdd);
const int texmap = order.getTexMap(stageOdd);
u32 texcoordSel = order.getTexCoord(stageOdd);
const u32 texmap = order.getTexMap(stageOdd);
// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
// not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
if (texcoordSel >= bpmem.genMode.numtexgens)
texcoordSel = 0;
Indirect(stageNum, Uv[texcoordSel].s, Uv[texcoordSel].t);
@ -603,8 +616,17 @@ void Tev::Draw()
// RGBA
u8 texel[4];
TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum], TextureLinear[stageNum],
texmap, texel);
if (bpmem.genMode.numtexgens > 0)
{
TextureSampler::Sample(TexCoord.s, TexCoord.t, TextureLod[stageNum],
TextureLinear[stageNum], texmap, texel);
}
else
{
// It seems like the result is always black when no tex coords are enabled, but further
// hardware testing is needed.
std::memset(texel, 0, 4);
}
#if ALLOW_TEV_DUMPS
if (g_ActiveConfig.bDumpTevTextureFetches)

View File

@ -300,6 +300,31 @@ struct fmt::formatter<IndTexBias> : EnumFormatter<IndTexBias::STU>
formatter() : EnumFormatter({"None", "S", "T", "ST", "U", "SU", "TU", "STU"}) {}
};
enum class IndMtxIndex : u32
{
Off = 0,
Matrix0 = 1,
Matrix1 = 2,
Matrix2 = 3,
};
template <>
struct fmt::formatter<IndMtxIndex> : EnumFormatter<IndMtxIndex::Matrix2>
{
formatter() : EnumFormatter({"Off", "Matrix 0", "Matrix 1", "Matrix 2"}) {}
};
enum class IndMtxId : u32
{
Indirect = 0,
S = 1,
T = 2,
};
template <>
struct fmt::formatter<IndMtxId> : EnumFormatter<IndMtxId::T>
{
formatter() : EnumFormatter({"Indirect", "S", "T"}) {}
};
// Indirect texture bump alpha
enum class IndTexBumpAlpha : u32
{
@ -335,7 +360,7 @@ union IND_MTXA
{
BitField<0, 11, s32> ma;
BitField<11, 11, s32> mb;
BitField<22, 2, u32> s0; // bits 0-1 of scale factor
BitField<22, 2, u8, u32> s0; // bits 0-1 of scale factor
u32 hex;
};
@ -343,7 +368,7 @@ union IND_MTXB
{
BitField<0, 11, s32> mc;
BitField<11, 11, s32> md;
BitField<22, 2, u32> s1; // bits 2-3 of scale factor
BitField<22, 2, u8, u32> s1; // bits 2-3 of scale factor
u32 hex;
};
@ -351,7 +376,7 @@ union IND_MTXC
{
BitField<0, 11, s32> me;
BitField<11, 11, s32> mf;
BitField<22, 2, u32> s2; // bits 4-5 of scale factor
BitField<22, 2, u8, u32> s2; // bits 4-5 of scale factor
u32 hex;
};
@ -360,6 +385,7 @@ struct IND_MTX
IND_MTXA col0;
IND_MTXB col1;
IND_MTXC col2;
u8 GetScale() const { return (col0.s0 << 0) | (col1.s1 << 2) | (col2.s2 << 4); }
};
union IND_IMASK
@ -475,8 +501,12 @@ union TevStageIndirect
BitField<4, 1, bool, u32> bias_s;
BitField<5, 1, bool, u32> bias_t;
BitField<6, 1, bool, u32> bias_u;
BitField<7, 2, IndTexBumpAlpha> bs; // Indicates which coordinate will become the 'bump alpha'
BitField<9, 4, u32> mid; // Matrix ID to multiply offsets with
BitField<7, 2, IndTexBumpAlpha> bs; // Indicates which coordinate will become the 'bump alpha'
// Indicates which indirect matrix is used when matrix_id is Indirect.
// Also always indicates which indirect matrix to use for the scale factor, even with S or T.
BitField<9, 2, IndMtxIndex> matrix_index;
// Should be set to Indirect (0) if matrix_index is Off (0)
BitField<11, 2, IndMtxId> matrix_id;
BitField<13, 3, IndTexWrap> sw; // Wrapping factor for S of regular coord
BitField<16, 3, IndTexWrap> tw; // Wrapping factor for T of regular coord
BitField<19, 1, bool, u32> lb_utclod; // Use modified or unmodified texture
@ -492,9 +522,9 @@ union TevStageIndirect
u32 fullhex;
// If bs and mid are zero, the result of the stage is independent of
// If bs and matrix are zero, the result of the stage is independent of
// the texture sample data, so we can skip sampling the texture.
bool IsActive() const { return bs != IndTexBumpAlpha::Off || mid != 0; }
bool IsActive() const { return bs != IndTexBumpAlpha::Off || matrix_index != IndMtxIndex::Off; }
};
template <>
struct fmt::formatter<TevStageIndirect>
@ -508,13 +538,15 @@ struct fmt::formatter<TevStageIndirect>
"Format: {}\n"
"Bias: {}\n"
"Bump alpha: {}\n"
"Offset matrix index: {}\n"
"Offset matrix ID: {}\n"
"Regular coord S wrapping factor: {}\n"
"Regular coord T wrapping factor: {}\n"
"Use modified texture coordinates for LOD computation: {}\n"
"Add texture coordinates from previous TEV stage: {}",
tevind.bt, tevind.fmt, tevind.bias, tevind.bs, tevind.mid, tevind.sw,
tevind.tw, tevind.lb_utclod ? "Yes" : "No", tevind.fb_addprev ? "Yes" : "No");
tevind.bt, tevind.fmt, tevind.bias, tevind.bs, tevind.matrix_index,
tevind.matrix_id, tevind.sw, tevind.tw, tevind.lb_utclod ? "Yes" : "No",
tevind.fb_addprev ? "Yes" : "No");
}
};
@ -600,13 +632,13 @@ struct fmt::formatter<TEXSCALE>
union RAS1_IREF
{
BitField<0, 3, u32> bi0; // Indirect tex stage 0 ntexmap
BitField<3, 3, u32> bc0; // Indirect tex stage 0 ntexmap
BitField<3, 3, u32> bc0; // Indirect tex stage 0 ntexcoord
BitField<6, 3, u32> bi1;
BitField<9, 3, u32> bc1;
BitField<12, 3, u32> bi2;
BitField<15, 3, u32> bc3; // Typo?
BitField<18, 3, u32> bi4;
BitField<21, 3, u32> bc4;
BitField<15, 3, u32> bc2;
BitField<18, 3, u32> bi3;
BitField<21, 3, u32> bc3;
u32 hex;
u32 getTexCoord(int i) const { return (hex >> (6 * i + 3)) & 7; }
@ -625,8 +657,8 @@ struct fmt::formatter<RAS1_IREF>
"Stage 1 ntexmap: {}\nStage 1 ntexcoord: {}\n"
"Stage 2 ntexmap: {}\nStage 2 ntexcoord: {}\n"
"Stage 3 ntexmap: {}\nStage 3 ntexcoord: {}",
indref.bi0, indref.bc0, indref.bi1, indref.bc1, indref.bi2, indref.bc3,
indref.bi4, indref.bc4);
indref.bi0, indref.bc0, indref.bi1, indref.bc1, indref.bi2, indref.bc2,
indref.bi3, indref.bc3);
}
};
@ -911,6 +943,8 @@ union GenMode
BitField<7, 1, u32> unused; // 1 bit unused?
BitField<8, 1, bool, u32> flat_shading; // unconfirmed
BitField<9, 1, bool, u32> multisampling;
// This value is 1 less than the actual number (0-15 map to 1-16).
// In other words there is always at least 1 tev stage
BitField<10, 4, u32> numtevstages;
BitField<14, 2, CullMode> cullmode;
BitField<16, 3, u32> numindstages;
@ -937,7 +971,7 @@ struct fmt::formatter<GenMode>
"ZFreeze: {}",
mode.numtexgens, mode.numcolchans, mode.unused,
mode.flat_shading ? "Yes" : "No", mode.multisampling ? "Yes" : "No",
mode.numtevstages, mode.cullmode, mode.numindstages,
mode.numtevstages + 1, mode.cullmode, mode.numindstages,
mode.zfreeze ? "Yes" : "No");
}
};
@ -1912,7 +1946,7 @@ struct BPMemory
GenMode genMode;
u32 display_copy_filter[4]; // 01-04
u32 unknown; // 05
// indirect matrices (set by GXSetIndTexMtx, selected by TevStageIndirect::mid)
// indirect matrices (set by GXSetIndTexMtx, selected by TevStageIndirect::matrix_index)
// abc form a 2x3 offset matrix, there's 3 such matrices
// the 3 offset matrices can either be indirect type, S-type, or T-type
// 6bit scale factor s is distributed across IND_MTXA/B/C.

View File

@ -20,6 +20,7 @@ namespace VideoCommon
// As pipelines encompass both shader UIDs and render states, changes to either of these should
// also increment the pipeline UID version. Incrementing the UID version will cause all UID
// caches to be invalidated.
// TODO: Remove PixelShaderUid hasindstage on the next UID version bump
constexpr u32 GX_PIPELINE_UID_VERSION = 2; // Last changed in PR 9122
struct GXPipelineUid

View File

@ -220,13 +220,10 @@ PixelShaderUid GetPixelShaderUid()
// indirect texture map lookup
int nIndirectStagesUsed = 0;
if (uid_data->genMode_numindstages > 0)
for (unsigned int i = 0; i < numStages; ++i)
{
for (unsigned int i = 0; i < numStages; ++i)
{
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < uid_data->genMode_numindstages)
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
if (bpmem.tevind[i].IsActive())
nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
}
uid_data->nIndirectStagesUsed = nIndirectStagesUsed;
@ -238,16 +235,14 @@ PixelShaderUid GetPixelShaderUid()
for (unsigned int n = 0; n < numStages; n++)
{
int texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1);
bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
// HACK to handle cases where the tex gen is not enabled
if (!bHasTexCoord)
texcoord = bpmem.genMode.numtexgens;
uid_data->stagehash[n].tevorders_texcoord = bpmem.tevorders[n / 2].getTexCoord(n & 1);
// hasindstage previously was used as a criterion to set tevind to 0, but there are variables in
// tevind that are used even if the indirect stage is disabled, so now it is only left in to
// avoid breaking existing UIDs (in most cases, games will have 0 in tevind anyways)
// TODO: Remove hasindstage on the next UID version bump
uid_data->stagehash[n].hasindstage = bpmem.tevind[n].bt < bpmem.genMode.numindstages;
uid_data->stagehash[n].tevorders_texcoord = texcoord;
if (uid_data->stagehash[n].hasindstage)
uid_data->stagehash[n].tevind = bpmem.tevind[n].hex;
uid_data->stagehash[n].tevind = bpmem.tevind[n].hex;
TevStageCombiner::ColorCombiner& cc = bpmem.combiners[n].colorC;
TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[n].alphaC;
@ -361,7 +356,7 @@ void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& hos
uid_data->bounding_box &= host_config.bounding_box & host_config.backend_bbox;
}
void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_texgens,
void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config, bool bounding_box)
{
// dot product for integer vectors
@ -546,8 +541,7 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
uid_data->genMode_numtexgens, uid_data->genMode_numindstages);
// Stuff that is shared between ubershaders and pixelgen.
WritePixelShaderCommonHeader(out, api_type, uid_data->genMode_numtexgens, host_config,
uid_data->bounding_box);
WritePixelShaderCommonHeader(out, api_type, host_config, uid_data->bounding_box);
if (uid_data->forced_early_z && g_ActiveConfig.backend_info.bSupportsEarlyZ)
{
@ -775,9 +769,11 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
out.Write("col1 = float4(0.0, 0.0, 0.0, 0.0);\n");
}
// HACK to handle cases where the tex gen is not enabled
if (uid_data->genMode_numtexgens == 0)
{
// TODO: This is a hack to ensure that shaders still compile when setting out of bounds tex
// coord indices to 0. Ideally, it shouldn't exist at all, but the exact behavior hasn't been
// tested.
out.Write("\tint2 fixpoint_uv0 = int2(0, 0);\n\n");
}
else
@ -796,24 +792,34 @@ ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& hos
{
if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0)
{
const u32 texcoord = uid_data->GetTevindirefCoord(i);
u32 texcoord = uid_data->GetTevindirefCoord(i);
const u32 texmap = uid_data->GetTevindirefMap(i);
if (texcoord < uid_data->genMode_numtexgens)
{
out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2);
out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
(i & 1) != 0 ? "zw" : "xy");
}
else
{
out.Write("\ttempcoord = int2(0, 0);\n");
}
// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
// not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
// This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set
// the number of tex gens to 2 (bug 11462).
if (texcoord >= uid_data->genMode_numtexgens)
texcoord = 0;
out.SetConstantsUsed(C_INDTEXSCALE + i / 2, C_INDTEXSCALE + i / 2);
out.Write("\ttempcoord = fixpoint_uv{} >> " I_INDTEXSCALE "[{}].{};\n", texcoord, i / 2,
(i & 1) ? "zw" : "xy");
out.Write("\tint3 iindtex{} = ", i);
SampleTexture(out, "float2(tempcoord)", "abg", texmap, stereo, api_type);
}
}
for (u32 i = uid_data->genMode_numindstages; i < 4; i++)
{
// Referencing a stage above the number of ind stages is undefined behavior,
// and on console produces a noise pattern (details unknown).
// TODO: This behavior is nowhere near that, but it ensures the shader still compiles.
if ((uid_data->nIndirectStagesUsed & (1U << i)) != 0)
{
out.Write("\tint3 iindtex{} = int3(0, 0, 0); // Undefined behavior on console\n", i);
}
}
for (u32 i = 0; i < numStages; i++)
{
@ -950,17 +956,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
const auto& stage = uid_data->stagehash[n];
out.Write("\n\t// TEV stage {}\n", n);
// HACK to handle cases where the tex gen is not enabled
// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does not
// exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
u32 texcoord = stage.tevorders_texcoord;
const bool has_tex_coord = texcoord < uid_data->genMode_numtexgens;
if (!has_tex_coord)
texcoord = 0;
if (stage.hasindstage)
{
TevStageIndirect tevind;
tevind.hex = stage.tevind;
const TevStageIndirect tevind{.hex = stage.tevind};
out.Write("\t// indirect op\n");
// Perform the indirect op on the incoming regular coordinates
@ -991,7 +995,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
// TODO: Should we reset alphabump to 0 here?
}
if (tevind.mid != 0)
if (tevind.matrix_index != IndMtxIndex::Off)
{
// format
static constexpr std::array<const char*, 4> tev_ind_fmt_mask{
@ -1038,11 +1042,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
tev_ind_bias_add[u32(tevind.fmt.Value())]);
}
// Multiplied by 2 because each matrix has two rows.
// Note also that the 4th column of the matrix contains the scale factor.
const u32 mtxidx = 2 * (static_cast<u32>(tevind.matrix_index.Value()) - 1);
// multiply by offset matrix and scale - calculations are likely to overflow badly,
// yet it works out since we only care about the lower 23 bits (+1 sign bit) of the result
if (tevind.mid <= 3)
if (tevind.matrix_id == IndMtxId::Indirect)
{
const u32 mtxidx = 2 * (tevind.mid - 1);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.Write("\tint2 indtevtrans{} = int2(idot(" I_INDTEXMTX
@ -1064,10 +1071,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
}
}
else if (tevind.mid <= 7 && has_tex_coord)
{ // s matrix
ASSERT(tevind.mid >= 5);
const u32 mtxidx = 2 * (tevind.mid - 5);
else if (tevind.matrix_id == IndMtxId::S)
{
ASSERT(has_tex_coord);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.xx) >> 8;\n", n,
@ -1086,10 +1092,9 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
out.Write("\telse indtevtrans{} <<= (-" I_INDTEXMTX "[{}].w);\n", n, mtxidx);
}
}
else if (tevind.mid <= 11 && has_tex_coord)
{ // t matrix
ASSERT(tevind.mid >= 9);
const u32 mtxidx = 2 * (tevind.mid - 9);
else if (tevind.matrix_id == IndMtxId::T)
{
ASSERT(has_tex_coord);
out.SetConstantsUsed(C_INDTEXMTX + mtxidx, C_INDTEXMTX + mtxidx);
out.Write("\tint2 indtevtrans{} = int2(fixpoint_uv{} * iindtevcrd{}.yy) >> 8;\n", n,
@ -1112,20 +1117,22 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
else
{
out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n);
ASSERT(false); // Unknown value for matrix_id
}
}
else
{
out.Write("\tint2 indtevtrans{} = int2(0, 0);\n", n);
// If matrix_index is Off (0), matrix_id should be Indirect (0)
ASSERT(tevind.matrix_id == IndMtxId::Indirect);
}
// ---------
// Wrapping
// ---------
// TODO: Should the last element be 1 or (1<<7)?
static constexpr std::array<const char*, 7> tev_ind_wrap_start{
"0", "(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)", "1",
static constexpr std::array<const char*, 5> tev_ind_wrap_start{
"(256<<7)", "(128<<7)", "(64<<7)", "(32<<7)", "(16<<7)",
};
// wrap S
@ -1133,14 +1140,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
{
out.Write("\twrappedcoord.x = fixpoint_uv{}.x;\n", texcoord);
}
else if (tevind.sw == IndTexWrap::ITW_0)
else if (tevind.sw >= IndTexWrap::ITW_0) // 7 (Invalid) appears to behave the same as 6 (ITW_0)
{
out.Write("\twrappedcoord.x = 0;\n");
}
else
{
out.Write("\twrappedcoord.x = fixpoint_uv{}.x & ({} - 1);\n", texcoord,
tev_ind_wrap_start[u32(tevind.sw.Value())]);
tev_ind_wrap_start[u32(tevind.sw.Value()) - u32(IndTexWrap::ITW_256)]);
}
// wrap T
@ -1148,14 +1155,14 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
{
out.Write("\twrappedcoord.y = fixpoint_uv{}.y;\n", texcoord);
}
else if (tevind.tw == IndTexWrap::ITW_0)
else if (tevind.tw >= IndTexWrap::ITW_0) // 7 (Invalid) appears to behave the same as 6 (ITW_0)
{
out.Write("\twrappedcoord.y = 0;\n");
}
else
{
out.Write("\twrappedcoord.y = fixpoint_uv{}.y & ({} - 1);\n", texcoord,
tev_ind_wrap_start[u32(tevind.tw.Value())]);
tev_ind_wrap_start[u32(tevind.tw.Value()) - u32(IndTexWrap::ITW_256)]);
}
if (tevind.fb_addprev) // add previous tevcoord
@ -1191,7 +1198,7 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
out.Write("\trastemp = {}.{};\n", tev_ras_table[u32(stage.tevorders_colorchan)], rasswap);
}
if (stage.tevorders_enable)
if (stage.tevorders_enable && uid_data->genMode_numtexgens > 0)
{
// Generate swizzle string to represent the texture color channel swapping
const char texswap[5] = {
@ -1202,17 +1209,15 @@ static void WriteStage(ShaderCode& out, const pixel_shader_uid_data* uid_data, i
'\0',
};
if (!stage.hasindstage)
{
// calc tevcord
if (has_tex_coord)
out.Write("\ttevcoord.xy = fixpoint_uv{};\n", texcoord);
else
out.Write("\ttevcoord.xy = int2(0, 0);\n");
}
out.Write("\ttextemp = ");
SampleTexture(out, "float2(tevcoord.xy)", texswap, stage.tevorders_texmap, stereo, api_type);
}
else if (uid_data->genMode_numtexgens == 0)
{
// It seems like the result is always black when no tex coords are enabled, but further testing
// is needed.
out.Write("\ttextemp = int4(0, 0, 0, 0);\n");
}
else
{
out.Write("\ttextemp = int4(255, 255, 255, 255);\n");

View File

@ -66,9 +66,9 @@ struct pixel_shader_uid_data
u32 tevindref_bi1 : 3;
u32 tevindref_bc1 : 3;
u32 tevindref_bi2 : 3;
u32 tevindref_bc2 : 3;
u32 tevindref_bi3 : 3;
u32 tevindref_bc3 : 3;
u32 tevindref_bi4 : 3;
u32 tevindref_bc4 : 3;
void SetTevindrefValues(int index, u32 texcoord, u32 texmap)
{
@ -84,55 +84,39 @@ struct pixel_shader_uid_data
}
else if (index == 2)
{
tevindref_bc3 = texcoord;
tevindref_bc2 = texcoord;
tevindref_bi2 = texmap;
}
else if (index == 3)
{
tevindref_bc4 = texcoord;
tevindref_bi4 = texmap;
tevindref_bc3 = texcoord;
tevindref_bi3 = texmap;
}
}
u32 GetTevindirefCoord(int index) const
{
if (index == 0)
{
return tevindref_bc0;
}
else if (index == 1)
{
return tevindref_bc1;
}
else if (index == 2)
{
return tevindref_bc3;
}
return tevindref_bc2;
else if (index == 3)
{
return tevindref_bc4;
}
return tevindref_bc3;
return 0;
}
u32 GetTevindirefMap(int index) const
{
if (index == 0)
{
return tevindref_bi0;
}
else if (index == 1)
{
return tevindref_bi1;
}
else if (index == 2)
{
return tevindref_bi2;
}
else if (index == 3)
{
return tevindref_bi4;
}
return tevindref_bi3;
return 0;
}
@ -149,6 +133,7 @@ struct pixel_shader_uid_data
u32 pad1 : 6;
// TODO: Clean up the swapXY mess
// TODO: remove hasindstage, as it no longer does anything useful
u32 hasindstage : 1;
u32 tevind : 21;
u32 tevksel_swap1a : 2;
@ -174,7 +159,7 @@ using PixelShaderUid = ShaderUid<pixel_shader_uid_data>;
ShaderCode GeneratePixelShaderCode(APIType api_type, const ShaderHostConfig& host_config,
const pixel_shader_uid_data* uid_data);
void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type, u32 num_texgens,
void WritePixelShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config, bool bounding_box);
void ClearUnusedPixelShaderUidBits(APIType api_type, const ShaderHostConfig& host_config,
PixelShaderUid* uid);

View File

@ -148,29 +148,18 @@ void PixelShaderManager::SetConstants()
for (u32 i = 0; i < (bpmem.genMode.numtevstages + 1); ++i)
{
u32 stage = bpmem.tevind[i].bt;
if (stage < bpmem.genMode.numindstages)
{
// We set some extra bits so the ubershader can quickly check if these
// features are in use.
if (bpmem.tevind[i].IsActive())
constants.pack1[stage][3] =
bpmem.tevindref.getTexCoord(stage) | bpmem.tevindref.getTexMap(stage) << 8 | 1 << 16;
// Note: a tevind of zero just happens to be a passthrough, so no need
// to set an extra bit.
constants.pack1[i][2] = bpmem.tevind[i].hex; // TODO: This match shadergen, but videosw
// will always wrap.
// Note: a tevind of zero just happens to be a passthrough, so no need
// to set an extra bit. Furthermore, wrap and add to previous apply even if there is no
// indirect stage.
constants.pack1[i][2] = bpmem.tevind[i].hex;
// The ubershader uses tevind != 0 as a condition whether to calculate texcoords,
// even when texture is disabled, instead of the stage < bpmem.genMode.numindstages.
// We set an unused bit here to indicate that the stage is active, even if it
// is just a pass-through.
constants.pack1[i][2] |= 0x80000000;
}
else
{
constants.pack1[i][2] = 0;
}
u32 stage = bpmem.tevind[i].bt;
// We use an extra bit (1 << 16) to provide a fast way of testing if this feature is in use.
// Note also that this is indexed by indirect stage, not by TEV stage.
if (bpmem.tevind[i].IsActive() && stage < bpmem.genMode.numindstages)
constants.pack1[stage][3] =
bpmem.tevindref.getTexCoord(stage) | bpmem.tevindref.getTexMap(stage) << 8 | 1 << 16;
}
dirty = true;
@ -336,9 +325,7 @@ void PixelShaderManager::SetIndTexScaleChanged(bool high)
void PixelShaderManager::SetIndMatrixChanged(int matrixidx)
{
int scale = ((u32)bpmem.indmtx[matrixidx].col0.s0 << 0) |
((u32)bpmem.indmtx[matrixidx].col1.s1 << 2) |
((u32)bpmem.indmtx[matrixidx].col2.s2 << 4);
const u8 scale = bpmem.indmtx[matrixidx].GetScale();
// xyz - static matrix
// w - dynamic matrix scale / 128

View File

@ -64,7 +64,7 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write("// Pixel UberShader for {} texgens{}{}\n", numTexgen,
early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : "");
WritePixelShaderCommonHeader(out, ApiType, numTexgen, host_config, bounding_box);
WritePixelShaderCommonHeader(out, ApiType, host_config, bounding_box);
WriteUberShaderCommonHeader(out, ApiType, host_config);
if (per_pixel_lighting)
WriteLightingFunction(out);
@ -148,19 +148,16 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
}
// Uniform index -> texture coordinates
// Quirk: when the tex coord is not less than the number of tex gens (i.e. the tex coord does
// not exist), then tex coord 0 is used (though sometimes glitchy effects happen on console).
// This affects the Mario portrait in Luigi's Mansion, where the developers forgot to set
// the number of tex gens to 2 (bug 11462).
if (numTexgen > 0)
{
if (ApiType != APIType::D3D)
{
out.Write("float3 selectTexCoord(uint index) {{\n");
}
else
{
out.Write("float3 selectTexCoord(uint index");
for (u32 i = 0; i < numTexgen; i++)
out.Write(", float3 tex{}", i);
out.Write(") {{\n");
}
out.Write("int2 selectTexCoord(uint index");
for (u32 i = 0; i < numTexgen; i++)
out.Write(", int2 fixpoint_uv{}", i);
out.Write(") {{\n");
if (ApiType == APIType::D3D)
{
@ -168,48 +165,51 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
for (u32 i = 0; i < numTexgen; i++)
{
out.Write(" case {}u:\n"
" return tex{};\n",
" return fixpoint_uv{};\n",
i, i);
}
out.Write(" default:\n"
" return float3(0.0, 0.0, 0.0);\n"
" return fixpoint_uv0;\n"
" }}\n");
}
else
{
out.Write(" if (index >= {}u) {{\n", numTexgen);
out.Write(" return fixpoint_uv0;\n"
" }}\n");
if (numTexgen > 4)
out.Write(" if (index < 4u) {{\n");
if (numTexgen > 2)
out.Write(" if (index < 2u) {{\n");
if (numTexgen > 1)
out.Write(" return (index == 0u) ? tex0 : tex1;\n");
out.Write(" return (index == 0u) ? fixpoint_uv0 : fixpoint_uv1;\n");
else
out.Write(" return (index == 0u) ? tex0 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return fixpoint_uv0;\n");
if (numTexgen > 2)
{
out.Write(" }} else {{\n"); // >= 2
out.Write(" }} else {{\n"); // >= 2 < min(4, numTexgen)
if (numTexgen > 3)
out.Write(" return (index == 2u) ? tex2 : tex3;\n");
out.Write(" return (index == 2u) ? fixpoint_uv2 : fixpoint_uv3;\n");
else
out.Write(" return (index == 2u) ? tex2 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return fixpoint_uv2;\n");
out.Write(" }}\n");
}
if (numTexgen > 4)
{
out.Write(" }} else {{\n"); // >= 4 <= 8
out.Write(" }} else {{\n"); // >= 4 < min(8, numTexgen)
if (numTexgen > 6)
out.Write(" if (index < 6u) {{\n");
if (numTexgen > 5)
out.Write(" return (index == 4u) ? tex4 : tex5;\n");
out.Write(" return (index == 4u) ? fixpoint_uv4 : fixpoint_uv5;\n");
else
out.Write(" return (index == 4u) ? tex4 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return fixpoint_uv4;\n");
if (numTexgen > 6)
{
out.Write(" }} else {{\n"); // >= 6 <= 8
out.Write(" }} else {{\n"); // >= 6 < min(8, numTexgen)
if (numTexgen > 7)
out.Write(" return (index == 6u) ? tex6 : tex7;\n");
out.Write(" return (index == 6u) ? fixpoint_uv6 : fixpoint_uv7;\n");
else
out.Write(" return (index == 6u) ? tex6 : float3(0.0, 0.0, 0.0);\n");
out.Write(" return fixpoint_uv6;\n");
out.Write(" }}\n");
}
out.Write(" }}\n");
@ -287,15 +287,15 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// ======================
const auto LookupIndirectTexture = [&out, stereo](std::string_view out_var_name,
std::string_view in_index_name) {
// in_index_name is the indirect stage, not the tev stage
// bpmem_iref is packed differently from RAS1_IREF
out.Write("{{\n"
" uint iref = bpmem_iref({});\n"
" if ( iref != 0u)\n"
" {{\n"
" uint texcoord = bitfieldExtract(iref, 0, 3);\n"
" uint texmap = bitfieldExtract(iref, 8, 3);\n"
" float3 uv = getTexCoord(texcoord);\n"
" int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS
"[texcoord].zw);\n"
" int2 fixedPoint_uv = getTexCoord(texcoord);\n"
"\n"
" if (({} & 1u) == 0u)\n"
" fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].xy;\n"
@ -306,6 +306,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"[texmap].xy, {})).abg;\n",
in_index_name, in_index_name, in_index_name, in_index_name, out_var_name,
stereo ? "float(layer)" : "0.0");
// There is always a bit set in bpmem_iref if the data is valid (matrix is not off, and the
// indirect texture stage is enabled). If the matrix is off, the result doesn't matter; if the
// indirect texture stage is disabled, the result is undefined (and produces a glitchy pattern
// on hardware, different from this).
out.Write(" }}\n"
" else\n"
" {{\n"
@ -666,21 +670,14 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"\n");
}
// Since the texture coodinate variables aren't global, we need to pass
// them to the select function in D3D.
// Since the fixed-point texture coodinate variables aren't global, we need to pass
// them to the select function. This applies to all backends.
if (numTexgen > 0)
{
if (ApiType != APIType::D3D)
{
out.Write("#define getTexCoord(index) selectTexCoord((index))\n\n");
}
else
{
out.Write("#define getTexCoord(index) selectTexCoord((index)");
for (u32 i = 0; i < numTexgen; i++)
out.Write(", tex{}", i);
out.Write(")\n\n");
}
out.Write("#define getTexCoord(index) selectTexCoord((index)");
for (u32 i = 0; i < numTexgen; i++)
out.Write(", fixpoint_uv{}", i);
out.Write(")\n\n");
}
if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan)
@ -788,11 +785,18 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
// Disable texturing when there are no texgens (for now)
if (numTexgen != 0)
{
out.Write(" uint tex_coord = {};\n",
for (u32 i = 0; i < numTexgen; i++)
{
out.Write(" int2 fixpoint_uv{} = int2(", i);
out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
out.Write(" * " I_TEXDIMS "[{}].zw);\n", i);
// TODO: S24 overflows here?
}
out.Write("\n"
" uint tex_coord = {};\n",
BitfieldExtract<&TwoTevStageOrders::texcoord0>("ss.order"));
out.Write(" float3 uv = getTexCoord(tex_coord);\n"
" int2 fixedPoint_uv = int2((uv.z == 0.0 ? uv.xy : (uv.xy / uv.z)) * " I_TEXDIMS
"[tex_coord].zw);\n"
out.Write(" int2 fixedPoint_uv = getTexCoord(tex_coord);\n"
"\n"
" bool texture_enabled = (ss.order & {}u) != 0u;\n",
1 << TwoTevStageOrders().enable0.StartBit());
@ -806,7 +810,10 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
out.Write(" uint fmt = {};\n", BitfieldExtract<&TevStageIndirect::fmt>("tevind"));
out.Write(" uint bias = {};\n", BitfieldExtract<&TevStageIndirect::bias>("tevind"));
out.Write(" uint bt = {};\n", BitfieldExtract<&TevStageIndirect::bt>("tevind"));
out.Write(" uint mid = {};\n", BitfieldExtract<&TevStageIndirect::mid>("tevind"));
out.Write(" uint matrix_index = {};\n",
BitfieldExtract<&TevStageIndirect::matrix_index>("tevind"));
out.Write(" uint matrix_id = {};\n",
BitfieldExtract<&TevStageIndirect::matrix_id>("tevind"));
out.Write("\n");
out.Write(" int3 indcoord;\n");
LookupIndirectTexture("indcoord", "bt");
@ -846,12 +853,12 @@ ShaderCode GenPixelShader(APIType ApiType, const ShaderHostConfig& host_config,
"\n"
" // Matrix multiply\n"
" int2 indtevtrans = int2(0, 0);\n"
" if ((mid & 3u) != 0u)\n"
" if (matrix_index != 0u)\n"
" {{\n"
" uint mtxidx = 2u * ((mid & 3u) - 1u);\n"
" uint mtxidx = 2u * (matrix_index - 1u);\n"
" int shift = " I_INDTEXMTX "[mtxidx].w;\n"
"\n"
" switch (mid >> 2)\n"
" switch (matrix_id)\n"
" {{\n"
" case 0u: // 3x2 S0.10 matrix\n"
" indtevtrans = int2(idot(" I_INDTEXMTX