mirror of https://github.com/PCSX2/pcsx2.git
IPU: Fix regression from #10617
This commit is contained in:
parent
3d13c5d13c
commit
0bbde2ca52
|
@ -55,6 +55,11 @@ using r128 = __m128i;
|
||||||
return _mm_set_epi64x(0, val);
|
return _mm_set_epi64x(0, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[maybe_unused]] __fi static r128 r128_from_u32_dup(u32 val)
|
||||||
|
{
|
||||||
|
return _mm_set1_epi32(val);
|
||||||
|
}
|
||||||
|
|
||||||
[[maybe_unused]] __fi static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
|
[[maybe_unused]] __fi static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
|
||||||
{
|
{
|
||||||
return _mm_setr_epi32(lo0, lo1, hi0, hi1);
|
return _mm_setr_epi32(lo0, lo1, hi0, hi1);
|
||||||
|
@ -150,6 +155,11 @@ using r128 = uint32x4_t;
|
||||||
return vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(val), vcreate_u64(0)));
|
return vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(val), vcreate_u64(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[maybe_unused]] __fi static r128 r128_from_u32_dup(u32 val)
|
||||||
|
{
|
||||||
|
return vdupq_n_u32(val);
|
||||||
|
}
|
||||||
|
|
||||||
[[maybe_unused]] __fi static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
|
[[maybe_unused]] __fi static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
|
||||||
{
|
{
|
||||||
const u32 values[4] = {lo0, lo1, hi0, hi1};
|
const u32 values[4] = {lo0, lo1, hi0, hi1};
|
||||||
|
|
|
@ -346,19 +346,19 @@ __ri static void IDCT_Copy(s16* block, u8* dest, const int stride)
|
||||||
|
|
||||||
|
|
||||||
// stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]).
|
// stride = increment for dest in 16-bit units (typically either 8 [128 bits] or 16 [256 bits]).
|
||||||
__ri static void IDCT_Add(s16* block, s16* dest, const int stride)
|
__ri static void IDCT_Add(const int last, s16* block, s16* dest, const int stride)
|
||||||
{
|
{
|
||||||
// on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0).
|
// on the IPU, stride is always assured to be multiples of QWC (bottom 3 bits are 0).
|
||||||
|
|
||||||
if ((block[0] & 7) == 4)
|
if (last != 129 || (block[0] & 7) == 4)
|
||||||
{
|
{
|
||||||
IDCT_Block(block);
|
IDCT_Block(block);
|
||||||
|
|
||||||
__m128 zero = _mm_setzero_ps();
|
const r128 zero = r128_zero();
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
{
|
{
|
||||||
_mm_store_ps((float*)dest, _mm_load_ps((float*)block));
|
r128_store(dest, r128_load(block));
|
||||||
_mm_store_ps((float*)block, zero);
|
r128_store(block, zero);
|
||||||
|
|
||||||
dest += stride;
|
dest += stride;
|
||||||
block += 8;
|
block += 8;
|
||||||
|
@ -366,14 +366,12 @@ __ri static void IDCT_Add(s16* block, s16* dest, const int stride)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
s16 DC = ((int)block[0] + 4) >> 3;
|
const u16 DC = static_cast<u16>((static_cast<s32>(block[0]) + 4) >> 3);
|
||||||
s16 dcf[2] = {DC, DC};
|
const r128 dc128 = r128_from_u32_dup(static_cast<u32>(DC) | (static_cast<u32>(DC) << 16));
|
||||||
block[0] = block[63] = 0;
|
block[0] = block[63] = 0;
|
||||||
|
|
||||||
__m128 dc128 = _mm_set_ps1(*(float*)dcf);
|
|
||||||
|
|
||||||
for (int i = 0; i < 8; ++i)
|
for (int i = 0; i < 8; ++i)
|
||||||
_mm_store_ps((float*)(dest + (stride * i)), dc128);
|
r128_store((dest + (stride * i)), dc128);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -942,20 +940,14 @@ __ri static bool slice_intra_DCT(const int cc, u8 * const dest, const int stride
|
||||||
|
|
||||||
__ri static bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
|
__ri static bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
|
||||||
{
|
{
|
||||||
int last;
|
|
||||||
|
|
||||||
if (!skip)
|
if (!skip)
|
||||||
{
|
|
||||||
std::memset(decoder.DCTblock, 0, sizeof(decoder.DCTblock));
|
std::memset(decoder.DCTblock, 0, sizeof(decoder.DCTblock));
|
||||||
}
|
|
||||||
|
|
||||||
|
int last = 0;
|
||||||
if (!get_non_intra_block(&last))
|
if (!get_non_intra_block(&last))
|
||||||
{
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
IDCT_Add(decoder.DCTblock, dest, stride);
|
|
||||||
|
|
||||||
|
IDCT_Add(last, decoder.DCTblock, dest, stride);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue