Last commit only requires SSSE3, not SSE4.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6769 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
53474403e2
commit
a6acc99a89
|
@ -1068,8 +1068,10 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
|||
for (int y = 0; y < height; y += 4)
|
||||
for (int x = 0; x < width; x += 8)
|
||||
{
|
||||
#if _M_SSE >= 0x401
|
||||
// SSE4 intrinsics: About 5-10% faster than SSE2 version
|
||||
#if _M_SSE >= 0x301
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
// SSSE3 intrinsics: About 5-10% faster than SSE2 version
|
||||
for (int iy = 0; iy < 4; ++iy, src+=8)
|
||||
{
|
||||
const __m128i mask3210 = _mm_set_epi8(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0);
|
||||
|
@ -1086,7 +1088,9 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
|||
_mm_storeu_si128(quaddst, rgba0);
|
||||
_mm_storeu_si128(quaddst+1, rgba1);
|
||||
}
|
||||
#else
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
// JSD optimized with SSE2 intrinsics.
|
||||
// Produces an ~86% speed increase over reference C implementation.
|
||||
// Each loop iteration processes 4 rows from 4 64-bit reads.
|
||||
|
@ -1165,7 +1169,7 @@ PC_TexFormat TexDecoder_Decode_RGBA(u32 * dst, const u8 * src, int width, int he
|
|||
_mm_storeu_si128(quaddst+1, rgba7);
|
||||
|
||||
src += 8;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// Reference C implementation
|
||||
|
|
Loading…
Reference in New Issue