mirror of https://github.com/PCSX2/pcsx2.git
SPU2: Multi-isa resampling functions
This commit is contained in:
parent
f29346f0b7
commit
5eacc7c327
|
@ -280,6 +280,10 @@ set(pcsx2SPU2Sources
|
||||||
SPU2/Wavedump_wav.cpp
|
SPU2/Wavedump_wav.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(pcsx2SPU2SourcesUnshared
|
||||||
|
SPU2/ReverbResample.cpp
|
||||||
|
)
|
||||||
|
|
||||||
# SPU2 headers
|
# SPU2 headers
|
||||||
set(pcsx2SPU2Headers
|
set(pcsx2SPU2Headers
|
||||||
SPU2/Debug.h
|
SPU2/Debug.h
|
||||||
|
@ -762,7 +766,7 @@ if(DISABLE_ADVANCE_SIMD)
|
||||||
# Note: ld64 (macOS's linker) does not act the same way when presented with .a files, unless linked with `-force_load` (cmake WHOLE_ARCHIVE).
|
# Note: ld64 (macOS's linker) does not act the same way when presented with .a files, unless linked with `-force_load` (cmake WHOLE_ARCHIVE).
|
||||||
set(is_first_isa "1")
|
set(is_first_isa "1")
|
||||||
foreach(isa "sse4" "avx" "avx2")
|
foreach(isa "sse4" "avx" "avx2")
|
||||||
add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared})
|
add_library(GS-${isa} STATIC ${pcsx2GSSourcesUnshared} ${pcsx2IPUSourcesUnshared} ${pcsx2SPU2SourcesUnshared})
|
||||||
target_link_libraries(GS-${isa} PRIVATE PCSX2_FLAGS)
|
target_link_libraries(GS-${isa} PRIVATE PCSX2_FLAGS)
|
||||||
target_compile_definitions(GS-${isa} PRIVATE MULTI_ISA_UNSHARED_COMPILATION=isa_${isa} MULTI_ISA_IS_FIRST=${is_first_isa} ${pcsx2_defs_${isa}})
|
target_compile_definitions(GS-${isa} PRIVATE MULTI_ISA_UNSHARED_COMPILATION=isa_${isa} MULTI_ISA_IS_FIRST=${is_first_isa} ${pcsx2_defs_${isa}})
|
||||||
target_compile_options(GS-${isa} PRIVATE ${compile_options_${isa}})
|
target_compile_options(GS-${isa} PRIVATE ${compile_options_${isa}})
|
||||||
|
@ -778,6 +782,7 @@ if(DISABLE_ADVANCE_SIMD)
|
||||||
else()
|
else()
|
||||||
list(APPEND pcsx2GSSources ${pcsx2GSSourcesUnshared})
|
list(APPEND pcsx2GSSources ${pcsx2GSSourcesUnshared})
|
||||||
list(APPEND pcsx2IPUSources ${pcsx2IPUSourcesUnshared})
|
list(APPEND pcsx2IPUSources ${pcsx2IPUSourcesUnshared})
|
||||||
|
list(APPEND pcsx2SPU2Sources ${pcsx2SPU2SourcesUnshared})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# DebugTools sources
|
# DebugTools sources
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
|
|
||||||
void V_Core::AnalyzeReverbPreset()
|
void V_Core::AnalyzeReverbPreset()
|
||||||
{
|
{
|
||||||
Console.WriteLn("Reverb Parameter Update for Core %d:", Index);
|
Console.WriteLn("Reverb Parameter Update for Core %d:", Index);
|
||||||
|
@ -55,193 +54,6 @@ void V_Core::AnalyzeReverbPreset()
|
||||||
Console.WriteLn("----------------------------------------------------------");
|
Console.WriteLn("----------------------------------------------------------");
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr u32 NUM_TAPS = 39;
|
|
||||||
// 39 tap filter, the 0's could be optimized out
|
|
||||||
static constexpr std::array<s16, 48> filter_down_coefs alignas(32) = {
|
|
||||||
-1,
|
|
||||||
0,
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
-10,
|
|
||||||
0,
|
|
||||||
35,
|
|
||||||
0,
|
|
||||||
-103,
|
|
||||||
0,
|
|
||||||
266,
|
|
||||||
0,
|
|
||||||
-616,
|
|
||||||
0,
|
|
||||||
1332,
|
|
||||||
0,
|
|
||||||
-2960,
|
|
||||||
0,
|
|
||||||
10246,
|
|
||||||
16384,
|
|
||||||
10246,
|
|
||||||
0,
|
|
||||||
-2960,
|
|
||||||
0,
|
|
||||||
1332,
|
|
||||||
0,
|
|
||||||
-616,
|
|
||||||
0,
|
|
||||||
266,
|
|
||||||
0,
|
|
||||||
-103,
|
|
||||||
0,
|
|
||||||
35,
|
|
||||||
0,
|
|
||||||
-10,
|
|
||||||
0,
|
|
||||||
2,
|
|
||||||
0,
|
|
||||||
-1,
|
|
||||||
};
|
|
||||||
|
|
||||||
static constexpr std::array<s16, 48> make_up_coefs()
|
|
||||||
{
|
|
||||||
std::array<s16, 48> ret = {};
|
|
||||||
|
|
||||||
for (u32 i = 0; i < NUM_TAPS; i++)
|
|
||||||
{
|
|
||||||
ret[i] = static_cast<s16>(std::clamp<s32>(filter_down_coefs[i] * 2, INT16_MIN, INT16_MAX));
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr std::array<s16, 48> filter_up_coefs alignas(32) = make_up_coefs();
|
|
||||||
|
|
||||||
s32 __forceinline V_Core::ReverbDownsample(bool right)
|
|
||||||
{
|
|
||||||
int index = (RevbSampleBufPos - NUM_TAPS) & 63;
|
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
|
||||||
auto c = GSVector8i::load<true>(&filter_down_coefs[0]);
|
|
||||||
auto s = GSVector8i::load<false>(&RevbDownBuf[right][index]);
|
|
||||||
auto acc = s.mul16hrs(c);
|
|
||||||
|
|
||||||
c = GSVector8i::load<true>(&filter_down_coefs[16]);
|
|
||||||
s = GSVector8i::load<false>(&RevbDownBuf[right][index + 16]);
|
|
||||||
acc = acc.adds16(s.mul16hrs(c));
|
|
||||||
|
|
||||||
c = GSVector8i::load<true>(&filter_down_coefs[32]);
|
|
||||||
s = GSVector8i::load<false>(&RevbDownBuf[right][index + 32]);
|
|
||||||
acc = acc.adds16(s.mul16hrs(c));
|
|
||||||
|
|
||||||
acc = acc.adds16(acc.ba());
|
|
||||||
|
|
||||||
acc = acc.hadds16(acc);
|
|
||||||
acc = acc.hadds16(acc);
|
|
||||||
acc = acc.hadds16(acc);
|
|
||||||
#else
|
|
||||||
auto c = GSVector4i::load<true>(&filter_down_coefs[0]);
|
|
||||||
auto s = GSVector4i::load<false>(&RevbDownBuf[right][index]);
|
|
||||||
auto acc = s.mul16hrs(c);
|
|
||||||
|
|
||||||
c = GSVector4i::load<true>(&filter_down_coefs[8]);
|
|
||||||
s = GSVector4i::load<false>(&RevbDownBuf[right][index + 8]);
|
|
||||||
acc = acc.adds16(s.mul16hrs(c));
|
|
||||||
|
|
||||||
c = GSVector4i::load<true>(&filter_down_coefs[16]);
|
|
||||||
s = GSVector4i::load<false>(&RevbDownBuf[right][index + 16]);
|
|
||||||
acc = acc.adds16(s.mul16hrs(c));
|
|
||||||
|
|
||||||
c = GSVector4i::load<true>(&filter_down_coefs[24]);
|
|
||||||
s = GSVector4i::load<false>(&RevbDownBuf[right][index + 24]);
|
|
||||||
acc = acc.adds16(s.mul16hrs(c));
|
|
||||||
|
|
||||||
c = GSVector4i::load<true>(&filter_down_coefs[32]);
|
|
||||||
s = GSVector4i::load<false>(&RevbDownBuf[right][index + 32]);
|
|
||||||
acc = acc.adds16(s.mul16hrs(c));
|
|
||||||
|
|
||||||
acc = acc.hadds16(acc);
|
|
||||||
acc = acc.hadds16(acc);
|
|
||||||
acc = acc.hadds16(acc);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return acc.I16[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
StereoOut32 __forceinline V_Core::ReverbUpsample()
|
|
||||||
{
|
|
||||||
int index = (RevbSampleBufPos - NUM_TAPS) & 63;
|
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
|
||||||
auto c = GSVector8i::load<true>(&filter_up_coefs[0]);
|
|
||||||
auto l = GSVector8i::load<false>(&RevbUpBuf[0][index]);
|
|
||||||
auto r = GSVector8i::load<false>(&RevbUpBuf[1][index]);
|
|
||||||
|
|
||||||
auto lacc = l.mul16hrs(c);
|
|
||||||
auto racc = r.mul16hrs(c);
|
|
||||||
|
|
||||||
c = GSVector8i::load<true>(&filter_up_coefs[16]);
|
|
||||||
l = GSVector8i::load<false>(&RevbUpBuf[0][index + 16]);
|
|
||||||
r = GSVector8i::load<false>(&RevbUpBuf[1][index + 16]);
|
|
||||||
lacc = lacc.adds16(l.mul16hrs(c));
|
|
||||||
racc = racc.adds16(r.mul16hrs(c));
|
|
||||||
|
|
||||||
c = GSVector8i::load<true>(&filter_up_coefs[32]);
|
|
||||||
l = GSVector8i::load<false>(&RevbUpBuf[0][index + 32]);
|
|
||||||
r = GSVector8i::load<false>(&RevbUpBuf[1][index + 32]);
|
|
||||||
lacc = lacc.adds16(l.mul16hrs(c));
|
|
||||||
racc = racc.adds16(r.mul16hrs(c));
|
|
||||||
|
|
||||||
lacc = lacc.adds16(lacc.ba());
|
|
||||||
racc = racc.adds16(racc.ba());
|
|
||||||
|
|
||||||
lacc = lacc.hadds16(lacc);
|
|
||||||
lacc = lacc.hadds16(lacc);
|
|
||||||
lacc = lacc.hadds16(lacc);
|
|
||||||
|
|
||||||
racc = racc.hadds16(racc);
|
|
||||||
racc = racc.hadds16(racc);
|
|
||||||
racc = racc.hadds16(racc);
|
|
||||||
#else
|
|
||||||
auto c = GSVector4i::load<true>(&filter_up_coefs[0]);
|
|
||||||
auto l = GSVector4i::load<false>(&RevbUpBuf[0][index]);
|
|
||||||
auto r = GSVector4i::load<false>(&RevbUpBuf[1][index]);
|
|
||||||
|
|
||||||
auto lacc = l.mul16hrs(c);
|
|
||||||
auto racc = r.mul16hrs(c);
|
|
||||||
|
|
||||||
c = GSVector4i::load<true>(&filter_up_coefs[8]);
|
|
||||||
l = GSVector4i::load<false>(&RevbUpBuf[0][index + 8]);
|
|
||||||
r = GSVector4i::load<false>(&RevbUpBuf[1][index + 8]);
|
|
||||||
lacc = lacc.adds16(l.mul16hrs(c));
|
|
||||||
racc = racc.adds16(r.mul16hrs(c));
|
|
||||||
|
|
||||||
c = GSVector4i::load<true>(&filter_up_coefs[16]);
|
|
||||||
l = GSVector4i::load<false>(&RevbUpBuf[0][index + 16]);
|
|
||||||
r = GSVector4i::load<false>(&RevbUpBuf[1][index + 16]);
|
|
||||||
lacc = lacc.adds16(l.mul16hrs(c));
|
|
||||||
racc = racc.adds16(r.mul16hrs(c));
|
|
||||||
|
|
||||||
c = GSVector4i::load<true>(&filter_up_coefs[24]);
|
|
||||||
l = GSVector4i::load<false>(&RevbUpBuf[0][index + 24]);
|
|
||||||
r = GSVector4i::load<false>(&RevbUpBuf[1][index + 24]);
|
|
||||||
lacc = lacc.adds16(l.mul16hrs(c));
|
|
||||||
racc = racc.adds16(r.mul16hrs(c));
|
|
||||||
|
|
||||||
c = GSVector4i::load<true>(&filter_up_coefs[32]);
|
|
||||||
l = GSVector4i::load<false>(&RevbUpBuf[0][index + 32]);
|
|
||||||
r = GSVector4i::load<false>(&RevbUpBuf[1][index + 32]);
|
|
||||||
lacc = lacc.adds16(l.mul16hrs(c));
|
|
||||||
racc = racc.adds16(r.mul16hrs(c));
|
|
||||||
|
|
||||||
lacc = lacc.hadds16(lacc);
|
|
||||||
lacc = lacc.hadds16(lacc);
|
|
||||||
lacc = lacc.hadds16(lacc);
|
|
||||||
|
|
||||||
racc = racc.hadds16(racc);
|
|
||||||
racc = racc.hadds16(racc);
|
|
||||||
racc = racc.hadds16(racc);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return {lacc.I16[0], racc.I16[0]};
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline s32 V_Core::RevbGetIndexer(s32 offset)
|
__forceinline s32 V_Core::RevbGetIndexer(s32 offset)
|
||||||
{
|
{
|
||||||
u32 start = EffectsStartA & 0x3f'ffff;
|
u32 start = EffectsStartA & 0x3f'ffff;
|
||||||
|
@ -325,7 +137,7 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
|
||||||
s32 in, same, diff, apf1, apf2, out;
|
s32 in, same, diff, apf1, apf2, out;
|
||||||
|
|
||||||
#define MUL(x, y) ((x) * (y) >> 15)
|
#define MUL(x, y) ((x) * (y) >> 15)
|
||||||
in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(R));
|
in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, ReverbDownsample(*this, R));
|
||||||
|
|
||||||
same = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[same_src]) - _spu2mem[same_prv]) + _spu2mem[same_prv];
|
same = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[same_src]) - _spu2mem[same_prv]) + _spu2mem[same_prv];
|
||||||
diff = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[diff_src]) - _spu2mem[diff_prv]) + _spu2mem[diff_prv];
|
diff = MUL(Revb.IIR_VOL, in + MUL(Revb.WALL_VOL, _spu2mem[diff_src]) - _spu2mem[diff_prv]) + _spu2mem[diff_prv];
|
||||||
|
@ -356,5 +168,5 @@ StereoOut32 V_Core::DoReverb(const StereoOut32& Input)
|
||||||
|
|
||||||
RevbSampleBufPos = (RevbSampleBufPos + 1) & 63;
|
RevbSampleBufPos = (RevbSampleBufPos + 1) & 63;
|
||||||
|
|
||||||
return ReverbUpsample();
|
return ReverbUpsample(*this);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,257 @@
|
||||||
|
#include "GS/GSVector.h"
|
||||||
|
#include "Global.h"
|
||||||
|
|
||||||
|
MULTI_ISA_UNSHARED_START
|
||||||
|
|
||||||
|
static constexpr u32 NUM_TAPS = 39;
|
||||||
|
// 39 tap filter, the 0's could be optimized out
|
||||||
|
static constexpr std::array<s16, 48> filter_down_coefs alignas(32) = {
|
||||||
|
-1,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
-10,
|
||||||
|
0,
|
||||||
|
35,
|
||||||
|
0,
|
||||||
|
-103,
|
||||||
|
0,
|
||||||
|
266,
|
||||||
|
0,
|
||||||
|
-616,
|
||||||
|
0,
|
||||||
|
1332,
|
||||||
|
0,
|
||||||
|
-2960,
|
||||||
|
0,
|
||||||
|
10246,
|
||||||
|
16384,
|
||||||
|
10246,
|
||||||
|
0,
|
||||||
|
-2960,
|
||||||
|
0,
|
||||||
|
1332,
|
||||||
|
0,
|
||||||
|
-616,
|
||||||
|
0,
|
||||||
|
266,
|
||||||
|
0,
|
||||||
|
-103,
|
||||||
|
0,
|
||||||
|
35,
|
||||||
|
0,
|
||||||
|
-10,
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
0,
|
||||||
|
-1,
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr std::array<s16, 48> make_up_coefs()
|
||||||
|
{
|
||||||
|
std::array<s16, 48> ret = {};
|
||||||
|
|
||||||
|
for (u32 i = 0; i < NUM_TAPS; i++)
|
||||||
|
{
|
||||||
|
ret[i] = static_cast<s16>(std::clamp<s32>(filter_down_coefs[i] * 2, INT16_MIN, INT16_MAX));
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr std::array<s16, 48> filter_up_coefs alignas(32) = make_up_coefs();
|
||||||
|
|
||||||
|
s32 __forceinline ReverbDownsample_reference(V_Core& core, bool right)
|
||||||
|
{
|
||||||
|
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
|
||||||
|
s32 out = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < NUM_TAPS; i++)
|
||||||
|
{
|
||||||
|
out += core.RevbDownBuf[right][index + i] * filter_down_coefs[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
out >>= 15;
|
||||||
|
|
||||||
|
return clamp_mix(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
s32 __forceinline ReverbDownsample_avx(V_Core& core, bool right)
|
||||||
|
{
|
||||||
|
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
|
||||||
|
|
||||||
|
auto c = GSVector8i::load<true>(&filter_down_coefs[0]);
|
||||||
|
auto s = GSVector8i::load<false>(&core.RevbDownBuf[right][index]);
|
||||||
|
auto acc = s.mul16hrs(c);
|
||||||
|
|
||||||
|
c = GSVector8i::load<true>(&filter_down_coefs[16]);
|
||||||
|
s = GSVector8i::load<false>(&core.RevbDownBuf[right][index + 16]);
|
||||||
|
acc = acc.adds16(s.mul16hrs(c));
|
||||||
|
|
||||||
|
c = GSVector8i::load<true>(&filter_down_coefs[32]);
|
||||||
|
s = GSVector8i::load<false>(&core.RevbDownBuf[right][index + 32]);
|
||||||
|
acc = acc.adds16(s.mul16hrs(c));
|
||||||
|
|
||||||
|
acc = acc.adds16(acc.ba());
|
||||||
|
|
||||||
|
acc = acc.hadds16(acc);
|
||||||
|
acc = acc.hadds16(acc);
|
||||||
|
acc = acc.hadds16(acc);
|
||||||
|
|
||||||
|
return acc.I16[0];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
s32 __forceinline ReverbDownsample_sse(V_Core& core, bool right)
|
||||||
|
{
|
||||||
|
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
|
||||||
|
|
||||||
|
auto c = GSVector4i::load<true>(&filter_down_coefs[0]);
|
||||||
|
auto s = GSVector4i::load<false>(&core.RevbDownBuf[right][index]);
|
||||||
|
auto acc = s.mul16hrs(c);
|
||||||
|
|
||||||
|
c = GSVector4i::load<true>(&filter_down_coefs[8]);
|
||||||
|
s = GSVector4i::load<false>(&core.RevbDownBuf[right][index + 8]);
|
||||||
|
acc = acc.adds16(s.mul16hrs(c));
|
||||||
|
|
||||||
|
c = GSVector4i::load<true>(&filter_down_coefs[16]);
|
||||||
|
s = GSVector4i::load<false>(&core.RevbDownBuf[right][index + 16]);
|
||||||
|
acc = acc.adds16(s.mul16hrs(c));
|
||||||
|
|
||||||
|
c = GSVector4i::load<true>(&filter_down_coefs[24]);
|
||||||
|
s = GSVector4i::load<false>(&core.RevbDownBuf[right][index + 24]);
|
||||||
|
acc = acc.adds16(s.mul16hrs(c));
|
||||||
|
|
||||||
|
c = GSVector4i::load<true>(&filter_down_coefs[32]);
|
||||||
|
s = GSVector4i::load<false>(&core.RevbDownBuf[right][index + 32]);
|
||||||
|
acc = acc.adds16(s.mul16hrs(c));
|
||||||
|
|
||||||
|
acc = acc.hadds16(acc);
|
||||||
|
acc = acc.hadds16(acc);
|
||||||
|
acc = acc.hadds16(acc);
|
||||||
|
|
||||||
|
return acc.I16[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
s32 ReverbDownsample(V_Core& core, bool right)
|
||||||
|
{
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
return ReverbDownsample_avx(core, right);
|
||||||
|
#else
|
||||||
|
return ReverbDownsample_sse(core, right);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
StereoOut32 __forceinline ReverbUpsample_reference(V_Core& core)
|
||||||
|
{
|
||||||
|
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
|
||||||
|
s32 l = 0, r = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < NUM_TAPS; i++)
|
||||||
|
{
|
||||||
|
l += core.RevbUpBuf[0][index + i] * filter_up_coefs[i];
|
||||||
|
r += core.RevbUpBuf[1][index + i] * filter_up_coefs[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
l >>= 15;
|
||||||
|
r >>= 15;
|
||||||
|
|
||||||
|
return {clamp_mix(l), clamp_mix(r)};
|
||||||
|
}
|
||||||
|
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
StereoOut32 __forceinline ReverbUpsample_avx(V_Core& core)
|
||||||
|
{
|
||||||
|
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
|
||||||
|
|
||||||
|
auto c = GSVector8i::load<true>(&filter_up_coefs[0]);
|
||||||
|
auto l = GSVector8i::load<false>(&core.RevbUpBuf[0][index]);
|
||||||
|
auto r = GSVector8i::load<false>(&core.RevbUpBuf[1][index]);
|
||||||
|
|
||||||
|
auto lacc = l.mul16hrs(c);
|
||||||
|
auto racc = r.mul16hrs(c);
|
||||||
|
|
||||||
|
c = GSVector8i::load<true>(&filter_up_coefs[16]);
|
||||||
|
l = GSVector8i::load<false>(&core.RevbUpBuf[0][index + 16]);
|
||||||
|
r = GSVector8i::load<false>(&core.RevbUpBuf[1][index + 16]);
|
||||||
|
lacc = lacc.adds16(l.mul16hrs(c));
|
||||||
|
racc = racc.adds16(r.mul16hrs(c));
|
||||||
|
|
||||||
|
c = GSVector8i::load<true>(&filter_up_coefs[32]);
|
||||||
|
l = GSVector8i::load<false>(&core.RevbUpBuf[0][index + 32]);
|
||||||
|
r = GSVector8i::load<false>(&core.RevbUpBuf[1][index + 32]);
|
||||||
|
lacc = lacc.adds16(l.mul16hrs(c));
|
||||||
|
racc = racc.adds16(r.mul16hrs(c));
|
||||||
|
|
||||||
|
lacc = lacc.adds16(lacc.ba());
|
||||||
|
racc = racc.adds16(racc.ba());
|
||||||
|
|
||||||
|
lacc = lacc.hadds16(lacc);
|
||||||
|
lacc = lacc.hadds16(lacc);
|
||||||
|
lacc = lacc.hadds16(lacc);
|
||||||
|
|
||||||
|
racc = racc.hadds16(racc);
|
||||||
|
racc = racc.hadds16(racc);
|
||||||
|
racc = racc.hadds16(racc);
|
||||||
|
|
||||||
|
return {lacc.I16[0], racc.I16[0]};
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
StereoOut32 __forceinline ReverbUpsample_sse(V_Core& core)
|
||||||
|
{
|
||||||
|
int index = (core.RevbSampleBufPos - NUM_TAPS) & 63;
|
||||||
|
|
||||||
|
auto c = GSVector4i::load<true>(&filter_up_coefs[0]);
|
||||||
|
auto l = GSVector4i::load<false>(&core.RevbUpBuf[0][index]);
|
||||||
|
auto r = GSVector4i::load<false>(&core.RevbUpBuf[1][index]);
|
||||||
|
|
||||||
|
auto lacc = l.mul16hrs(c);
|
||||||
|
auto racc = r.mul16hrs(c);
|
||||||
|
|
||||||
|
c = GSVector4i::load<true>(&filter_up_coefs[8]);
|
||||||
|
l = GSVector4i::load<false>(&core.RevbUpBuf[0][index + 8]);
|
||||||
|
r = GSVector4i::load<false>(&core.RevbUpBuf[1][index + 8]);
|
||||||
|
lacc = lacc.adds16(l.mul16hrs(c));
|
||||||
|
racc = racc.adds16(r.mul16hrs(c));
|
||||||
|
|
||||||
|
c = GSVector4i::load<true>(&filter_up_coefs[16]);
|
||||||
|
l = GSVector4i::load<false>(&core.RevbUpBuf[0][index + 16]);
|
||||||
|
r = GSVector4i::load<false>(&core.RevbUpBuf[1][index + 16]);
|
||||||
|
lacc = lacc.adds16(l.mul16hrs(c));
|
||||||
|
racc = racc.adds16(r.mul16hrs(c));
|
||||||
|
|
||||||
|
c = GSVector4i::load<true>(&filter_up_coefs[24]);
|
||||||
|
l = GSVector4i::load<false>(&core.RevbUpBuf[0][index + 24]);
|
||||||
|
r = GSVector4i::load<false>(&core.RevbUpBuf[1][index + 24]);
|
||||||
|
lacc = lacc.adds16(l.mul16hrs(c));
|
||||||
|
racc = racc.adds16(r.mul16hrs(c));
|
||||||
|
|
||||||
|
c = GSVector4i::load<true>(&filter_up_coefs[32]);
|
||||||
|
l = GSVector4i::load<false>(&core.RevbUpBuf[0][index + 32]);
|
||||||
|
r = GSVector4i::load<false>(&core.RevbUpBuf[1][index + 32]);
|
||||||
|
lacc = lacc.adds16(l.mul16hrs(c));
|
||||||
|
racc = racc.adds16(r.mul16hrs(c));
|
||||||
|
|
||||||
|
lacc = lacc.hadds16(lacc);
|
||||||
|
lacc = lacc.hadds16(lacc);
|
||||||
|
lacc = lacc.hadds16(lacc);
|
||||||
|
|
||||||
|
racc = racc.hadds16(racc);
|
||||||
|
racc = racc.hadds16(racc);
|
||||||
|
racc = racc.hadds16(racc);
|
||||||
|
|
||||||
|
return {lacc.I16[0], racc.I16[0]};
|
||||||
|
}
|
||||||
|
|
||||||
|
StereoOut32 ReverbUpsample(V_Core& core)
|
||||||
|
{
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
return ReverbUpsample_avx(core);
|
||||||
|
#else
|
||||||
|
return ReverbUpsample_sse(core);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
MULTI_ISA_UNSHARED_END
|
|
@ -19,6 +19,8 @@
|
||||||
#include "SPU2/SndOut.h"
|
#include "SPU2/SndOut.h"
|
||||||
#include "SPU2/Global.h"
|
#include "SPU2/Global.h"
|
||||||
|
|
||||||
|
#include "GS/MultiISA.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
|
@ -486,9 +488,6 @@ struct V_Core
|
||||||
StereoOut32 DoReverb(const StereoOut32& Input);
|
StereoOut32 DoReverb(const StereoOut32& Input);
|
||||||
s32 RevbGetIndexer(s32 offset);
|
s32 RevbGetIndexer(s32 offset);
|
||||||
|
|
||||||
s32 ReverbDownsample(bool right);
|
|
||||||
StereoOut32 ReverbUpsample();
|
|
||||||
|
|
||||||
StereoOut32 ReadInput();
|
StereoOut32 ReadInput();
|
||||||
StereoOut32 ReadInput_HiFi();
|
StereoOut32 ReadInput_HiFi();
|
||||||
|
|
||||||
|
@ -537,6 +536,14 @@ struct V_Core
|
||||||
void FinishDMAwrite();
|
void FinishDMAwrite();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
MULTI_ISA_DEF(
|
||||||
|
StereoOut32 ReverbUpsample(V_Core& core);
|
||||||
|
s32 ReverbDownsample(V_Core& core, bool right);
|
||||||
|
)
|
||||||
|
|
||||||
|
extern StereoOut32 (*ReverbUpsample)(V_Core& core);
|
||||||
|
extern s32 (*ReverbDownsample)(V_Core& core, bool right);
|
||||||
|
|
||||||
extern V_Core Cores[2];
|
extern V_Core Cores[2];
|
||||||
extern V_SPDIF Spdif;
|
extern V_SPDIF Spdif;
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,9 @@ int PlayMode;
|
||||||
|
|
||||||
static bool has_to_call_irq[2] = { false, false };
|
static bool has_to_call_irq[2] = { false, false };
|
||||||
static bool has_to_call_irq_dma[2] = { false, false };
|
static bool has_to_call_irq_dma[2] = { false, false };
|
||||||
|
StereoOut32 (*ReverbUpsample)(V_Core& core);
|
||||||
|
s32 (*ReverbDownsample)(V_Core& core, bool right);
|
||||||
|
|
||||||
|
|
||||||
static bool psxmode = false;
|
static bool psxmode = false;
|
||||||
|
|
||||||
|
@ -111,6 +114,9 @@ void V_Core::Init(int index)
|
||||||
if (SPU2::MsgToConsole())
|
if (SPU2::MsgToConsole())
|
||||||
SPU2::ConLog("* SPU2: Init SPU2 core %d \n", index);
|
SPU2::ConLog("* SPU2: Init SPU2 core %d \n", index);
|
||||||
|
|
||||||
|
ReverbDownsample = MULTI_ISA_SELECT(ReverbDownsample);
|
||||||
|
ReverbUpsample = MULTI_ISA_SELECT(ReverbUpsample);
|
||||||
|
|
||||||
//memset(this, 0, sizeof(V_Core));
|
//memset(this, 0, sizeof(V_Core));
|
||||||
// Explicitly initializing variables instead.
|
// Explicitly initializing variables instead.
|
||||||
Mute = false;
|
Mute = false;
|
||||||
|
|
|
@ -264,6 +264,7 @@
|
||||||
<ClCompile Include="SPU2\Mixer.cpp" />
|
<ClCompile Include="SPU2\Mixer.cpp" />
|
||||||
<ClCompile Include="SPU2\ReadInput.cpp" />
|
<ClCompile Include="SPU2\ReadInput.cpp" />
|
||||||
<ClCompile Include="SPU2\Reverb.cpp" />
|
<ClCompile Include="SPU2\Reverb.cpp" />
|
||||||
|
<ClCompile Include="SPU2\ReverbResample.cpp" />
|
||||||
<ClCompile Include="SPU2\spu2.cpp" />
|
<ClCompile Include="SPU2\spu2.cpp" />
|
||||||
<ClCompile Include="IPU\IPUdma.cpp" />
|
<ClCompile Include="IPU\IPUdma.cpp" />
|
||||||
<ClCompile Include="IPU\IPUdither.cpp" />
|
<ClCompile Include="IPU\IPUdither.cpp" />
|
||||||
|
@ -872,4 +873,4 @@
|
||||||
<Import Condition="$(Configuration.Contains(Debug)) Or $(Configuration.Contains(Devel))" Project="$(SolutionDir)3rdparty\winpixeventruntime\WinPixEventRuntime.props" />
|
<Import Condition="$(Configuration.Contains(Debug)) Or $(Configuration.Contains(Devel))" Project="$(SolutionDir)3rdparty\winpixeventruntime\WinPixEventRuntime.props" />
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets" />
|
<ImportGroup Label="ExtensionTargets" />
|
||||||
</Project>
|
</Project>
|
||||||
|
|
Loading…
Reference in New Issue