diff --git a/Source/Core/VideoBackends/Software/Tev.cpp b/Source/Core/VideoBackends/Software/Tev.cpp index 1890aaa3e3..8ee3715e03 100644 --- a/Source/Core/VideoBackends/Software/Tev.cpp +++ b/Source/Core/VideoBackends/Software/Tev.cpp @@ -55,14 +55,14 @@ void Tev::Init() m_ColorInputLUT[14][RED_INP] = &StageKonst[RED_C]; m_ColorInputLUT[14][GRN_INP] = &StageKonst[GRN_C]; m_ColorInputLUT[14][BLU_INP] = &StageKonst[BLU_C]; // konst m_ColorInputLUT[15][RED_INP] = &FixedConstants[0]; m_ColorInputLUT[15][GRN_INP] = &FixedConstants[0]; m_ColorInputLUT[15][BLU_INP] = &FixedConstants[0]; // zero - m_AlphaInputLUT[0] = Reg[0]; // prev - m_AlphaInputLUT[1] = Reg[1]; // c0 - m_AlphaInputLUT[2] = Reg[2]; // c1 - m_AlphaInputLUT[3] = Reg[3]; // c2 - m_AlphaInputLUT[4] = TexColor; // tex - m_AlphaInputLUT[5] = RasColor; // ras - m_AlphaInputLUT[6] = StageKonst; // konst - m_AlphaInputLUT[7] = Zero16; // zero + m_AlphaInputLUT[0] = &Reg[0][ALP_C]; // prev + m_AlphaInputLUT[1] = &Reg[1][ALP_C]; // c0 + m_AlphaInputLUT[2] = &Reg[2][ALP_C]; // c1 + m_AlphaInputLUT[3] = &Reg[3][ALP_C]; // c2 + m_AlphaInputLUT[4] = &TexColor[ALP_C]; // tex + m_AlphaInputLUT[5] = &RasColor[ALP_C]; // ras + m_AlphaInputLUT[6] = &StageKonst[ALP_C]; // konst + m_AlphaInputLUT[7] = &Zero16[ALP_C]; // zero for (int comp = 0; comp < 4; comp++) { @@ -176,239 +176,150 @@ void Tev::SetRasColor(int colorChan, int swaptable) } } -void Tev::DrawColorRegular(TevStageCombiner::ColorCombiner &cc) +void Tev::DrawColorRegular(TevStageCombiner::ColorCombiner &cc, const InputRegType inputs[4]) { - InputRegType InputReg; - for (int i = 0; i < 3; i++) { - InputReg.a = *m_ColorInputLUT[cc.a][i]; - InputReg.b = *m_ColorInputLUT[cc.b][i]; - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; + const InputRegType& InputReg = inputs[BLU_C + i]; u16 c = InputReg.c + (InputReg.c >> 7); s32 temp = InputReg.a * (256 - c) + (InputReg.b * c); - temp = cc.op?(-temp >> 8):(temp >> 8); + temp <<= m_ScaleLShiftLUT[cc.shift]; + temp += (cc.shift != 3) ? 0 : (cc.op == 1) ? 127 : 128; + temp = cc.op ? (-temp >> 8) : (temp >> 8); - s32 result = InputReg.d + temp + m_BiasLUT[cc.bias]; - result = result << m_ScaleLShiftLUT[cc.shift]; + s32 result = ((InputReg.d + m_BiasLUT[cc.bias]) << m_ScaleLShiftLUT[cc.shift]) + temp; result = result >> m_ScaleRShiftLUT[cc.shift]; Reg[cc.dest][BLU_C + i] = result; } } -void Tev::DrawColorCompare(TevStageCombiner::ColorCombiner &cc) +void Tev::DrawColorCompare(TevStageCombiner::ColorCombiner &cc, const InputRegType inputs[4]) { - int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here + for (int i = BLU_C; i < RED_C; i++) + { + switch ((cc.shift<<1)|cc.op|8) // encoded compare mode + { + case TEVCMP_R8_GT: + Reg[cc.dest][i] = inputs[i].d + ((inputs[RED_C].a > inputs[RED_C].b) ? inputs[i].c : 0); + break; - u32 a; - u32 b; + case TEVCMP_R8_EQ: + Reg[cc.dest][i] = inputs[i].d + ((inputs[RED_C].a == inputs[RED_C].b) ? inputs[i].c : 0); + break; - InputRegType InputReg; + case TEVCMP_GR16_GT: + { + u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[cc.dest][i] = inputs[i].d + ((a > b) ? inputs[i].c : 0); + } + break; - switch (cmp) { - case TEVCMP_R8_GT: - { - a = *m_ColorInputLUT[cc.a][RED_INP] & 0xff; - b = *m_ColorInputLUT[cc.b][RED_INP] & 0xff; - for (int i = 0; i < 3; i++) + case TEVCMP_GR16_EQ: { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a > b) ? InputReg.c : 0); + u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[cc.dest][i] = inputs[i].d + ((a == b) ? inputs[i].c : 0); } - } - break; + break; - case TEVCMP_R8_EQ: - { - a = *m_ColorInputLUT[cc.a][RED_INP] & 0xff; - b = *m_ColorInputLUT[cc.b][RED_INP] & 0xff; - for (int i = 0; i < 3; i++) + case TEVCMP_BGR24_GT: { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a == b) ? InputReg.c : 0); + u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[cc.dest][i] = inputs[i].d + ((a > b) ? inputs[i].c : 0); } - } - break; - case TEVCMP_GR16_GT: - { - a = ((*m_ColorInputLUT[cc.a][GRN_INP] & 0xff) << 8) | (*m_ColorInputLUT[cc.a][RED_INP] & 0xff); - b = ((*m_ColorInputLUT[cc.b][GRN_INP] & 0xff) << 8) | (*m_ColorInputLUT[cc.b][RED_INP] & 0xff); - for (int i = 0; i < 3; i++) + break; + + case TEVCMP_BGR24_EQ: { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a > b) ? InputReg.c : 0); + u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[cc.dest][i] = inputs[i].d + ((a == b) ? inputs[i].c : 0); } + break; + + case TEVCMP_RGB8_GT: + Reg[cc.dest][i] = inputs[i].d + ((inputs[i].a > inputs[i].b) ? inputs[i].c : 0); + break; + + case TEVCMP_RGB8_EQ: + Reg[cc.dest][i] = inputs[i].d + ((inputs[i].a == inputs[i].b) ? inputs[i].c : 0); + break; } - break; - case TEVCMP_GR16_EQ: - { - a = ((*m_ColorInputLUT[cc.a][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.a][RED_INP] & 0xff); - b = ((*m_ColorInputLUT[cc.b][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.b][RED_INP] & 0xff); - for (int i = 0; i < 3; i++) - { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a == b) ? InputReg.c : 0); - } - } - break; - case TEVCMP_BGR24_GT: - { - a = ((*m_ColorInputLUT[cc.a][BLU_C] & 0xff) << 16) | ((*m_ColorInputLUT[cc.a][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.a][RED_INP] & 0xff); - b = ((*m_ColorInputLUT[cc.b][BLU_C] & 0xff) << 16) | ((*m_ColorInputLUT[cc.b][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.b][RED_INP] & 0xff); - for (int i = 0; i < 3; i++) - { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a > b) ? InputReg.c : 0); - } - } - break; - case TEVCMP_BGR24_EQ: - { - a = ((*m_ColorInputLUT[cc.a][BLU_C] & 0xff) << 16) | ((*m_ColorInputLUT[cc.a][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.a][RED_INP] & 0xff); - b = ((*m_ColorInputLUT[cc.b][BLU_C] & 0xff) << 16) | ((*m_ColorInputLUT[cc.b][GRN_C] & 0xff) << 8) | (*m_ColorInputLUT[cc.b][RED_INP] & 0xff); - for (int i = 0; i < 3; i++) - { - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((a == b) ? InputReg.c : 0); - } - } - break; - case TEVCMP_RGB8_GT: - for (int i = 0; i < 3; i++) - { - InputReg.a = *m_ColorInputLUT[cc.a][i]; - InputReg.b = *m_ColorInputLUT[cc.b][i]; - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((InputReg.a > InputReg.b) ? InputReg.c : 0); - } - break; - case TEVCMP_RGB8_EQ: - for (int i = 0; i < 3; i++) - { - InputReg.a = *m_ColorInputLUT[cc.a][i]; - InputReg.b = *m_ColorInputLUT[cc.b][i]; - InputReg.c = *m_ColorInputLUT[cc.c][i]; - InputReg.d = *m_ColorInputLUT[cc.d][i]; - Reg[cc.dest][BLU_C + i] = InputReg.d + ((InputReg.a == InputReg.b) ? InputReg.c : 0); - } - break; } } -void Tev::DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac) +void Tev::DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac, const InputRegType inputs[4]) { - InputRegType InputReg; - - InputReg.a = m_AlphaInputLUT[ac.a][ALP_C]; - InputReg.b = m_AlphaInputLUT[ac.b][ALP_C]; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; + const InputRegType& InputReg = inputs[ALP_C]; u16 c = InputReg.c + (InputReg.c >> 7); s32 temp = InputReg.a * (256 - c) + (InputReg.b * c); - temp = ac.op?(-temp >> 8):(temp >> 8); + temp <<= m_ScaleLShiftLUT[ac.shift]; + temp += (ac.shift != 3) ? 0 : (ac.op == 1) ? 127 : 128; + temp = ac.op ? (-temp >> 8) : (temp >> 8); - s32 result = InputReg.d + temp + m_BiasLUT[ac.bias]; - result = result << m_ScaleLShiftLUT[ac.shift]; + s32 result = ((InputReg.d + m_BiasLUT[ac.bias]) << m_ScaleLShiftLUT[ac.shift]) + temp; result = result >> m_ScaleRShiftLUT[ac.shift]; Reg[ac.dest][ALP_C] = result; } -void Tev::DrawAlphaCompare(TevStageCombiner::AlphaCombiner &ac) +void Tev::DrawAlphaCompare(TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4]) { - int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here - - u32 a; - u32 b; - - InputRegType InputReg; - - switch (cmp) { + switch ((ac.shift<<1)|ac.op|8) // encoded compare mode + { case TEVCMP_R8_GT: - { - a = m_AlphaInputLUT[ac.a][RED_C] & 0xff; - b = m_AlphaInputLUT[ac.b][RED_C] & 0xff; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a > b) ? InputReg.c : 0); - } + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((inputs[RED_C].a > inputs[RED_C].b) ? inputs[ALP_C].c : 0); break; case TEVCMP_R8_EQ: - { - a = m_AlphaInputLUT[ac.a][RED_C] & 0xff; - b = m_AlphaInputLUT[ac.b][RED_C] & 0xff; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a == b) ? InputReg.c : 0); - } + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((inputs[RED_C].a == inputs[RED_C].b) ? inputs[ALP_C].c : 0); break; + case TEVCMP_GR16_GT: { - a = ((m_AlphaInputLUT[ac.a][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.a][RED_C] & 0xff); - b = ((m_AlphaInputLUT[ac.b][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.b][RED_C] & 0xff); - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a > b) ? InputReg.c : 0); + u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a > b) ? inputs[ALP_C].c : 0); } break; + case TEVCMP_GR16_EQ: { - a = ((m_AlphaInputLUT[ac.a][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.a][RED_C] & 0xff); - b = ((m_AlphaInputLUT[ac.b][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.b][RED_C] & 0xff); - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a == b) ? InputReg.c : 0); + u32 a = (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a == b) ? inputs[ALP_C].c : 0); } break; + case TEVCMP_BGR24_GT: { - a = ((m_AlphaInputLUT[ac.a][BLU_C] & 0xff) << 16) | ((m_AlphaInputLUT[ac.a][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.a][RED_C] & 0xff); - b = ((m_AlphaInputLUT[ac.b][BLU_C] & 0xff) << 16) | ((m_AlphaInputLUT[ac.b][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.b][RED_C] & 0xff); - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a > b) ? InputReg.c : 0); + u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a > b) ? inputs[ALP_C].c : 0); } break; + case TEVCMP_BGR24_EQ: { - a = ((m_AlphaInputLUT[ac.a][BLU_C] & 0xff) << 16) | ((m_AlphaInputLUT[ac.a][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.a][RED_C] & 0xff); - b = ((m_AlphaInputLUT[ac.b][BLU_C] & 0xff) << 16) | ((m_AlphaInputLUT[ac.b][GRN_C] & 0xff) << 8) | (m_AlphaInputLUT[ac.b][RED_C] & 0xff); - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((a == b) ? InputReg.c : 0); + u32 a = (inputs[BLU_C].a << 16) | (inputs[GRN_C].a << 8) | inputs[RED_C].a; + u32 b = (inputs[BLU_C].b << 16) | (inputs[GRN_C].b << 8) | inputs[RED_C].b; + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((a == b) ? inputs[ALP_C].c : 0); } break; + case TEVCMP_A8_GT: - { - InputReg.a = m_AlphaInputLUT[ac.a][ALP_C]; - InputReg.b = m_AlphaInputLUT[ac.b][ALP_C]; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((InputReg.a > InputReg.b) ? InputReg.c : 0); - } + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((inputs[ALP_C].a > inputs[ALP_C].b) ? inputs[ALP_C].c : 0); break; + case TEVCMP_A8_EQ: - { - InputReg.a = m_AlphaInputLUT[ac.a][ALP_C]; - InputReg.b = m_AlphaInputLUT[ac.b][ALP_C]; - InputReg.c = m_AlphaInputLUT[ac.c][ALP_C]; - InputReg.d = m_AlphaInputLUT[ac.d][ALP_C]; - Reg[ac.dest][ALP_C] = InputReg.d + ((InputReg.a == InputReg.b) ? InputReg.c : 0); - } + Reg[ac.dest][ALP_C] = inputs[ALP_C].d + ((inputs[ALP_C].a == inputs[ALP_C].b) ? inputs[ALP_C].c : 0); break; } } @@ -666,10 +577,23 @@ void Tev::Draw() SetRasColor(order.getColorChan(stageOdd), ac.rswap * 2); // combine inputs + InputRegType inputs[4]; + for (int i = 0; i < 3; i++) + { + inputs[BLU_C + i].a = *m_ColorInputLUT[cc.a][i]; + inputs[BLU_C + i].b = *m_ColorInputLUT[cc.b][i]; + inputs[BLU_C + i].c = *m_ColorInputLUT[cc.c][i]; + inputs[BLU_C + i].d = *m_ColorInputLUT[cc.d][i]; + } + inputs[ALP_C].a = *m_AlphaInputLUT[ac.a]; + inputs[ALP_C].b = *m_AlphaInputLUT[ac.b]; + inputs[ALP_C].c = *m_AlphaInputLUT[ac.c]; + inputs[ALP_C].d = *m_AlphaInputLUT[ac.d]; + if (cc.bias != 3) - DrawColorRegular(cc); + DrawColorRegular(cc, inputs); else - DrawColorCompare(cc); + DrawColorCompare(cc, inputs); if (cc.clamp) { @@ -685,9 +609,9 @@ void Tev::Draw() } if (ac.bias != 3) - DrawAlphaRegular(ac); + DrawAlphaRegular(ac, inputs); else - DrawAlphaCompare(ac); + DrawAlphaCompare(ac, inputs); if (ac.clamp) Reg[ac.dest][ALP_C] = Clamp255(Reg[ac.dest][ALP_C]); diff --git a/Source/Core/VideoBackends/Software/Tev.h b/Source/Core/VideoBackends/Software/Tev.h index 617dee842b..ecb5fde9f0 100644 --- a/Source/Core/VideoBackends/Software/Tev.h +++ b/Source/Core/VideoBackends/Software/Tev.h @@ -60,10 +60,10 @@ class Tev void SetRasColor(int colorChan, int swaptable); - void DrawColorRegular(TevStageCombiner::ColorCombiner &cc); - void DrawColorCompare(TevStageCombiner::ColorCombiner &cc); - void DrawAlphaRegular(TevStageCombiner::AlphaCombiner &ac); - void DrawAlphaCompare(TevStageCombiner::AlphaCombiner &ac); + void DrawColorRegular(TevStageCombiner::ColorCombiner& cc, const InputRegType inputs[4]); + void DrawColorCompare(TevStageCombiner::ColorCombiner& cc, const InputRegType inputs[4]); + void DrawAlphaRegular(TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4]); + void DrawAlphaCompare(TevStageCombiner::AlphaCombiner& ac, const InputRegType inputs[4]); void Indirect(unsigned int stageNum, s32 s, s32 t); diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 29399a253f..85d25b2cac 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -90,27 +90,6 @@ static const char *tevKSelTableA[] = I_KCOLORS"[3].a", // K3_A = 0x1F }; -static const char *tevScaleTable[] = -{ - "", // SCALE_1 - " << 1", // SCALE_2 - " << 2", // SCALE_4 - " >> 1", // DIVIDE_2 -}; - -static const char *tevBiasTable[] = -{ - "", // ZERO, - "+ 128", // ADDHALF, - "- 128", // SUBHALF, - "", -}; - -static const char *tevOpTable[] = { - "+", // TEVOP_ADD = 0, - "-", // TEVOP_SUB = 1, -}; - static const char *tevCInputTable[] = { "prev.rgb", // CPREV, @@ -133,14 +112,14 @@ static const char *tevCInputTable[] = static const char *tevAInputTable[] = { - "prev", // APREV, - "c0", // A0, - "c1", // A1, - "c2", // A2, - "textemp", // TEXA, - "rastemp", // RASA, - "konsttemp", // KONST, (hw1 had quarter) - "int4(0,0,0,0)", // ZERO + "prev.a", // APREV, + "c0.a", // A0, + "c1.a", // A1, + "c2.a", // A2, + "textemp.a", // TEXA, + "rastemp.a", // RASA, + "konsttemp.a", // KONST, (hw1 had quarter) + "0", // ZERO }; static const char *tevRasTable[] = @@ -161,6 +140,7 @@ static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" }; static char text[16384]; template static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, API_TYPE ApiType, const char swapModeTable[4][5]); +template static inline void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift); template static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType); template static inline void WriteAlphaTest(T& out, pixel_shader_uid_data& uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth); template static inline void WriteFog(T& out, pixel_shader_uid_data& uid_data); @@ -343,7 +323,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T "\tint3 comp16 = int3(1, 256, 0), comp24 = int3(1, 256, 256*256);\n" "\tint alphabump=0;\n" "\tint3 tevcoord=int3(0, 0, 0);\n" - "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n\n"); + "\tint2 wrappedcoord=int2(0,0), tempcoord=int2(0,0);\n" + "\tint4 tevin_a=int4(0,0,0,0),tevin_b=int4(0,0,0,0),tevin_c=int4(0,0,0,0),tevin_d=int4(0,0,0,0);\n\n"); // tev combiner inputs if (ApiType == API_OPENGL) { @@ -778,44 +759,35 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP if (ac.dest >= GX_TEVREG0 && ac.dest <= GX_TEVREG2) out.SetConstantsUsed(C_COLORS+ac.dest, C_COLORS+ac.dest); + + out.Write("tevin_a = int4(%s, %s)&255;\n", tevCInputTable[cc.a], tevAInputTable[ac.a]); + out.Write("tevin_b = int4(%s, %s)&255;\n", tevCInputTable[cc.b], tevAInputTable[ac.b]); + out.Write("tevin_c = int4(%s, %s)&255;\n", tevCInputTable[cc.c], tevAInputTable[ac.c]); + out.Write("tevin_d = int4(%s, %s);\n", tevCInputTable[cc.d], tevAInputTable[ac.d]); + out.Write("\t// color combine\n"); out.Write("\t%s = clamp(", tevCOutputTable[cc.dest]); - - // combine the color channel - if (cc.bias != TevBias_COMPARE) // if not compare + if (cc.bias != TevBias_COMPARE) { - //normal color combiner goes here - if (cc.shift > TEVSCALE_1) - out.Write("("); - - if (!(cc.d == TEVCOLORARG_ZERO && cc.op == TEVOP_ADD)) - out.Write("%s %s ", tevCInputTable[cc.d], tevOpTable[cc.op]); - - out.Write("((%s&255) * (int3(255,255,255) - (%s&255)) + (%s&255) * (%s&255)) / 255", tevCInputTable[cc.a], tevCInputTable[cc.c], tevCInputTable[cc.b], tevCInputTable[cc.c]); - - out.Write(" %s", tevBiasTable[cc.bias]); - - if (cc.shift > TEVSCALE_1) - out.Write(")%s", tevScaleTable[cc.shift]); + WriteTevRegular(out, "rgb", cc.bias, cc.op, cc.clamp, cc.shift); } else { const char *function_table[] = { - "(((%s.r&255) > %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_GT - "(((%s.r&255) == %s.r) ? (%s&255): int3(0,0,0))", // TEVCMP_R8_EQ - "((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_GT - "((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s&255): int3(0,0,0))", // TEVCMP_GR16_EQ - "((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_GT - "((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s&255): int3(0,0,0))", // TEVCMP_BGR24_EQ - "int3(max(sign(int3((%s.rgb&255)) - int3((%s.rgb&255))), int3(0,0,0)) * (%s&255))", // TEVCMP_RGB8_GT - "int3((int3(255,255,255) - max(sign(abs(int3((%s.rgb&255)) - int3((%s.rgb&255)))), int3(0,0,0))) * (%s&255))" // TEVCMP_RGB8_EQ + "((tevin_a.r > tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_GT + "((tevin_a.r == tevin_b.r) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_R8_EQ + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_GT + "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_GR16_EQ + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_GT + "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.rgb : int3(0,0,0))", // TEVCMP_BGR24_EQ + "(max(sign(tevin_a.rgb - tevin_b.rgb), int3(0,0,0)) * tevin_c.rgb)", // TEVCMP_RGB8_GT + "((int3(255,255,255) - max(sign(abs(tevin_a.rgb - tevin_b.rgb))), int3(0,0,0))) * tevin_c.rgb)" // TEVCMP_RGB8_EQ }; int mode = (cc.shift<<1)|cc.op; - out.Write(" %s + ", tevCInputTable[cc.d]); - out.Write(function_table[mode], tevCInputTable[cc.a], - tevCInputTable[cc.b], tevCInputTable[cc.c]); + out.Write(" tevin_d.rgb + "); + out.Write(function_table[mode]); } if (cc.clamp) out.Write(", int3(0,0,0), int3(255,255,255))"); @@ -825,41 +797,27 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write("\t// alpha combine\n"); out.Write("\t%s = clamp(", tevAOutputTable[ac.dest]); - - if (ac.bias != TevBias_COMPARE) // if not compare + if (ac.bias != TevBias_COMPARE) { - //normal alpha combiner goes here - if (ac.shift > 0) - out.Write("("); - - if (!(ac.d == TEVALPHAARG_ZERO && ac.op == TEVOP_ADD)) - out.Write("%s.a %s ", tevAInputTable[ac.d], tevOpTable[ac.op]); - - out.Write("((%s.a&255) * (255 - (%s.a&255)) + (%s.a&255) * (%s.a&255)) / 255", tevAInputTable[ac.a], tevAInputTable[ac.c], tevAInputTable[ac.b], tevAInputTable[ac.c]); - - out.Write(" %s",tevBiasTable[ac.bias]); - - if (ac.shift>0) - out.Write(")%s", tevScaleTable[ac.shift]); + WriteTevRegular(out, "a", ac.bias, ac.op, ac.clamp, ac.shift); } else { const char *function_table[] = { - "(((%s.r&255) > (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_GT - "(((%s.r&255) == (%s.r&255)) ? (%s.a&255) : 0)", // TEVCMP_R8_EQ - "((idot((%s.rgb&255), comp16) > idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_GT - "((idot((%s.rgb&255), comp16) == idot((%s.rgb&255), comp16)) ? (%s.a&255) : 0)", // TEVCMP_GR16_EQ - "((idot((%s.rgb&255), comp24) > idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_GT - "((idot((%s.rgb&255), comp24) == idot((%s.rgb&255), comp24)) ? (%s.a&255) : 0)", // TEVCMP_BGR24_EQ - "(((%s.a&255) > (%s.a&255)) ? (%s.a&255) : 0)", // TEVCMP_A8_GT - "(((%s.a&255) == (%s.a&255)) ? (%s.a&255) : 0)" // TEVCMP_A8_EQ + "((tevin_a.r > tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_GT + "((tevin_a.r == tevin_b.r) ? tevin_c.a : 0)", // TEVCMP_R8_EQ + "((idot(tevin_a.rgb, comp16) > idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_GT + "((idot(tevin_a.rgb, comp16) == idot(tevin_b.rgb, comp16)) ? tevin_c.a : 0)", // TEVCMP_GR16_EQ + "((idot(tevin_a.rgb, comp24) > idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_GT + "((idot(tevin_a.rgb, comp24) == idot(tevin_b.rgb, comp24)) ? tevin_c.a : 0)", // TEVCMP_BGR24_EQ + "((tevin_a.a > tevin_b.a) ? tevin_c.a : 0)", // TEVCMP_A8_GT + "((tevin_a.a == tevin_b.a) ? tevin_c.a : 0)" // TEVCMP_A8_EQ }; int mode = (ac.shift<<1)|ac.op; - out.Write(" %s.a + ", tevAInputTable[ac.d]); - out.Write(function_table[mode], tevAInputTable[ac.a], - tevAInputTable[ac.b], tevAInputTable[ac.c]); + out.Write(" tevin_d.a + "); + out.Write(function_table[mode]); } if (ac.clamp) out.Write(", 0, 255)"); @@ -869,6 +827,59 @@ static inline void WriteStage(T& out, pixel_shader_uid_data& uid_data, int n, AP out.Write(";\n\n"); } +template +static inline void WriteTevRegular(T& out, const char* components, int bias, int op, int clamp, int shift) +{ + const char *tevScaleTableLeft[] = + { + "", // SCALE_1 + " << 1", // SCALE_2 + " << 2", // SCALE_4 + "", // DIVIDE_2 + }; + + const char *tevScaleTableRight[] = + { + "", // SCALE_1 + "", // SCALE_2 + "", // SCALE_4 + " >> 1", // DIVIDE_2 + }; + + const char *tevLerpBias[] = // indexed by 2*op+(shift==3) + { + "", + " + 128", + "", + " + 127", + }; + + const char *tevBiasTable[] = + { + "", // ZERO, + " + 128", // ADDHALF, + " - 128", // SUBHALF, + "", + }; + + const char *tevOpTable[] = { + "+", // TEVOP_ADD = 0, + "-", // TEVOP_SUB = 1, + }; + + // Regular TEV stage: (d + bias + lerp(a,b,c)) * scale + // The GC/Wii GPU uses a very sophisticated algorithm for scale-lerping: + // - c is scaled from 0..255 to 0..256, which allows dividing the result by 256 instead of 255 + // - if scale is bigger than one, it is moved inside the lerp calculation for increased accuracy + // - a rounding bias is added before dividing by 256 + out.Write("(((tevin_d.%s%s)%s)", components, tevBiasTable[bias], tevScaleTableLeft[shift]); + out.Write(" %s ", tevOpTable[op]); + out.Write("((((tevin_a.%s*256 + (tevin_b.%s-tevin_a.%s)*(tevin_c.%s+(tevin_c.%s>>7)))%s)%s)>>8)", + components, components, components, components, components, + tevScaleTableLeft[shift], tevLerpBias[2*op+(shift==3)]); + out.Write(")%s", tevScaleTableRight[shift]); +} + template static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType) {