gsdx sw: factorize color split in split16_2x8

This commit is contained in:
Gregory Hainaut 2016-11-18 22:48:06 +01:00
parent d58e43edbf
commit e728a14c19
5 changed files with 71 additions and 132 deletions

View File

@ -375,4 +375,36 @@ void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
#endif
}
void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src)
{
// l = src & 0xFF; (1 left shift + 1 right shift)
// h = (src >> 8) & 0xFF; (1 right shift)
#if _M_SSE >= 0x500
if (src == h) {
vpsllw(l, src, 8);
vpsrlw(h, 8);
} else if (src == l) {
vpsrlw(h, src, 8);
vpsllw(l, 8);
} else {
vpsllw(l, src, 8);
vpsrlw(h, src, 8);
}
vpsrlw(l, 8);
#else
if (src == h) {
movdqa(l, src);
} else if (src == l) {
movdqa(h, src);
} else {
movdqa(l, src);
movdqa(h, src);
}
psllw(l, 8);
psrlw(l, 8);
psrlw(h, 8);
#endif
}
#endif

View File

@ -128,6 +128,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void blendr(const Xmm& b, const Xmm& a, const Xmm& mask);
void blend8(const Xmm& a, const Xmm& b);
void blend8r(const Xmm& b, const Xmm& a);
void split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src);
#endif

View File

@ -866,16 +866,12 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
vpsllw(xmm4, xmm0, 8);
vpsrlw(xmm4, 8);
vpsrlw(xmm5, xmm0, 8);
split16_2x8(xmm4, xmm5, xmm0);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
vpsllw(xmm0, xmm1, 8);
vpsrlw(xmm0, 8);
vpsrlw(xmm1, 8);
split16_2x8(xmm0, xmm1, xmm1);
// xmm0 = rb01
// xmm1 = ga01
@ -902,16 +898,12 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
vpsrlw(xmm5, xmm2, 8);
vpsllw(xmm4, xmm2, 8);
vpsrlw(xmm4, 8);
split16_2x8(xmm4, xmm5, xmm2);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
vpsllw(xmm2, xmm3, 8);
vpsrlw(xmm3, 8);
vpsrlw(xmm2, 8);
split16_2x8(xmm2, xmm3, xmm3);
// xmm0 = rb00
// xmm1 = ga00
@ -955,9 +947,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
vpsllw(_rb, xmm0, 8);
vpsrlw(_rb, 8);
vpsrlw(_ga, xmm0, 8);
split16_2x8(_rb, _ga, xmm0);
}
// xmm2 = rb
@ -1605,9 +1595,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
// c[2] = fd & mask;
// c[3] = (fd >> 8) & mask;
vpsllw(_dst_rb, _fd, 8);
vpsrlw(_dst_rb, 8);
vpsrlw(_dst_ga, _fd, 8);
split16_2x8(_dst_rb, _dst_ga, _fd);
break;

View File

@ -857,16 +857,12 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
vpsllw(xmm2, xmm6, 8);
vpsrlw(xmm2, 8);
vpsrlw(xmm6, 8);
split16_2x8(xmm2, xmm6, xmm6);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
vpsllw(xmm3, xmm4, 8);
vpsrlw(xmm3, 8);
vpsrlw(xmm4, 8);
split16_2x8(xmm3, xmm4, xmm4);
// xmm0 = uf
// xmm2 = rb00
@ -894,16 +890,12 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
vpsrlw(xmm2, xmm1, 8);
vpsllw(xmm1, 8);
vpsrlw(xmm1, 8);
split16_2x8(xmm1, xmm2, xmm1);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
vpsrlw(xmm6, xmm5, 8);
vpsllw(xmm5, 8);
vpsrlw(xmm5, 8);
split16_2x8(xmm5, xmm6, xmm5);
// xmm0 = uf
// xmm3 = rb00
@ -950,9 +942,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
vpsllw(xmm5, xmm6, 8);
vpsrlw(xmm5, 8);
vpsrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm6);
}
}
@ -1452,16 +1442,12 @@ return;
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
vpsllw(xmm2, xmm6, 8);
vpsrlw(xmm2, 8);
vpsrlw(xmm6, 8);
split16_2x8(xmm2, xmm6, xmm6);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
vpsllw(xmm3, xmm4, 8);
vpsrlw(xmm3, 8);
vpsrlw(xmm4, 8);
split16_2x8(xmm3, xmm4, xmm4);
// xmm0 = uf
// xmm2 = rb00
@ -1489,16 +1475,12 @@ return;
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
vpsrlw(xmm2, xmm1, 8);
vpsllw(xmm1, 8);
vpsrlw(xmm1, 8);
split16_2x8(xmm1, xmm2, xmm1);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
vpsrlw(xmm6, xmm5, 8);
vpsllw(xmm5, 8);
vpsrlw(xmm5, 8);
split16_2x8(xmm5, xmm6, xmm5);
// xmm0 = uf
// xmm3 = rb00
@ -1545,9 +1527,7 @@ return;
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
vpsllw(xmm5, xmm6, 8);
vpsrlw(xmm5, 8);
vpsrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm6);
}
if(m_sel.mmin != 1) // !round-off mode
@ -1693,16 +1673,12 @@ return;
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
vpsllw(xmm2, xmm6, 8);
vpsrlw(xmm2, 8);
vpsrlw(xmm6, 8);
split16_2x8(xmm2, xmm6, xmm6);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
vpsllw(xmm3, xmm4, 8);
vpsrlw(xmm3, 8);
vpsrlw(xmm4, 8);
split16_2x8(xmm3, xmm4, xmm4);
// xmm0 = uf
// xmm2 = rb00
@ -1730,16 +1706,12 @@ return;
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
vpsrlw(xmm2, xmm1, 8);
vpsllw(xmm1, 8);
vpsrlw(xmm1, 8);
split16_2x8(xmm1, xmm2, xmm1);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
vpsrlw(xmm6, xmm5, 8);
vpsllw(xmm5, 8);
vpsrlw(xmm5, 8);
split16_2x8(xmm5, xmm6, xmm5);
// xmm0 = uf
// xmm3 = rb00
@ -1786,9 +1758,7 @@ return;
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
vpsllw(xmm5, xmm6, 8);
vpsrlw(xmm5, 8);
vpsrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm6);
}
vmovdqa(xmm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]);
@ -2436,9 +2406,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
// c[2] = fd & mask;
// c[3] = (fd >> 8) & mask;
vpsllw(xmm0, xmm2, 8);
vpsrlw(xmm0, 8);
vpsrlw(xmm1, xmm2, 8);
split16_2x8(xmm0, xmm1, xmm2);
break;

View File

@ -867,18 +867,12 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
movdqa(xmm2, xmm6);
psllw(xmm2, 8);
psrlw(xmm2, 8);
psrlw(xmm6, 8);
split16_2x8(xmm2, xmm6, xmm6);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
movdqa(xmm3, xmm4);
psllw(xmm3, 8);
psrlw(xmm3, 8);
psrlw(xmm4, 8);
split16_2x8(xmm3, xmm4, xmm4);
// xmm0 = uf
// xmm2 = rb00
@ -906,18 +900,12 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
movdqa(xmm2, xmm1);
psllw(xmm1, 8);
psrlw(xmm1, 8);
psrlw(xmm2, 8);
split16_2x8(xmm1, xmm2, xmm1);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
movdqa(xmm6, xmm5);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm5);
// xmm0 = uf
// xmm3 = rb00
@ -965,10 +953,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
movdqa(xmm5, xmm6);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm6);
}
}
@ -1467,18 +1452,12 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
movdqa(xmm2, xmm6);
psrlw(xmm6, 8);
psllw(xmm2, 8);
psrlw(xmm2, 8);
split16_2x8(xmm2, xmm6, xmm6);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
movdqa(xmm3, xmm4);
psrlw(xmm4, 8);
psllw(xmm3, 8);
psrlw(xmm3, 8);
split16_2x8(xmm3, xmm4, xmm4);
// xmm0 = uf
// xmm2 = rb00
@ -1506,18 +1485,12 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
movdqa(xmm2, xmm1);
psllw(xmm1, 8);
psrlw(xmm1, 8);
psrlw(xmm2, 8);
split16_2x8(xmm1, xmm2, xmm1);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
movdqa(xmm6, xmm5);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm5);
// xmm0 = uf
// xmm3 = rb00
@ -1565,10 +1538,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
movdqa(xmm5, xmm6);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm6);
}
if(m_sel.mmin != 1) // !round-off mode
@ -1720,18 +1690,12 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// GSVector4i rb00 = c00 & mask;
// GSVector4i ga00 = (c00 >> 8) & mask;
movdqa(xmm2, xmm6);
psllw(xmm2, 8);
psrlw(xmm2, 8);
psrlw(xmm6, 8);
split16_2x8(xmm2, xmm6, xmm6);
// GSVector4i rb01 = c01 & mask;
// GSVector4i ga01 = (c01 >> 8) & mask;
movdqa(xmm3, xmm4);
psllw(xmm3, 8);
psrlw(xmm3, 8);
psrlw(xmm4, 8);
split16_2x8(xmm3, xmm4, xmm4);
// xmm0 = uf
// xmm2 = rb00
@ -1759,18 +1723,12 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// GSVector4i rb10 = c10 & mask;
// GSVector4i ga10 = (c10 >> 8) & mask;
movdqa(xmm2, xmm1);
psllw(xmm1, 8);
psrlw(xmm1, 8);
psrlw(xmm2, 8);
split16_2x8(xmm1, xmm2, xmm1);
// GSVector4i rb11 = c11 & mask;
// GSVector4i ga11 = (c11 >> 8) & mask;
movdqa(xmm6, xmm5);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm5);
// xmm0 = uf
// xmm3 = rb00
@ -1818,10 +1776,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
// c[0] = c00 & mask;
// c[1] = (c00 >> 8) & mask;
movdqa(xmm5, xmm6);
psllw(xmm5, 8);
psrlw(xmm5, 8);
psrlw(xmm6, 8);
split16_2x8(xmm5, xmm6, xmm5);
}
movdqa(xmm0, ptr[m_sel.lcm ? &m_local.gd->lod.f : &m_local.temp.lod.f]);
@ -2506,12 +2461,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
// c[2] = fd & mask;
// c[3] = (fd >> 8) & mask;
movdqa(xmm0, xmm2);
movdqa(xmm1, xmm2);
psllw(xmm0, 8);
psrlw(xmm0, 8);
psrlw(xmm1, 8);
split16_2x8(xmm0, xmm1, xmm2);
break;