mirror of https://github.com/PCSX2/pcsx2.git
Common: Replace x86_intrin.h with generic Intrin.h
For later Apple Silicon support.
This commit is contained in:
parent
d9abe10308
commit
0bc9c7ffa1
|
@ -111,6 +111,11 @@ if(${PCSX2_TARGET_ARCHITECTURES} MATCHES "x86_64")
|
|||
endif()
|
||||
list(APPEND PCSX2_DEFS _M_X86=1)
|
||||
set(_M_X86 1)
|
||||
|
||||
# SSE4.1 is not set by MSVC, it uses _M_SSE instead.
|
||||
if(MSVC)
|
||||
list(APPEND PCSX2_DEFS __SSE4_1__=1)
|
||||
endif()
|
||||
else()
|
||||
message(FATAL_ERROR "Unsupported architecture: ${PCSX2_TARGET_ARCHITECTURES}")
|
||||
endif()
|
||||
|
|
|
@ -93,12 +93,14 @@ target_sources(common PRIVATE
|
|||
ScopedGuard.h
|
||||
SettingsInterface.h
|
||||
SettingsWrapper.h
|
||||
SingleRegisterTypes.h
|
||||
SmallString.h
|
||||
StringUtil.h
|
||||
Timer.h
|
||||
TextureDecompress.h
|
||||
Threading.h
|
||||
TraceLog.h
|
||||
VectorIntrin.h
|
||||
WAVWriter.h
|
||||
WindowInfo.h
|
||||
WrappedMemCopy.h
|
||||
|
|
|
@ -3,12 +3,13 @@
|
|||
|
||||
#include "General.h"
|
||||
#include "Console.h"
|
||||
#include "emitter/x86_intrin.h"
|
||||
#include "VectorIntrin.h"
|
||||
|
||||
static u32 PAUSE_TIME = 0;
|
||||
|
||||
static void MultiPause()
|
||||
{
|
||||
#ifdef _M_X86
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
|
@ -17,6 +18,27 @@ static void MultiPause()
|
|||
_mm_pause();
|
||||
_mm_pause();
|
||||
_mm_pause();
|
||||
#elif defined(_M_ARM64) && defined(_MSC_VER)
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
__isb(_ARM64_BARRIER_SY);
|
||||
#elif defined(_M_ARM64)
|
||||
__asm__ __volatile__("isb");
|
||||
__asm__ __volatile__("isb");
|
||||
__asm__ __volatile__("isb");
|
||||
__asm__ __volatile__("isb");
|
||||
__asm__ __volatile__("isb");
|
||||
__asm__ __volatile__("isb");
|
||||
__asm__ __volatile__("isb");
|
||||
__asm__ __volatile__("isb");
|
||||
#else
|
||||
#error Unknown architecture.
|
||||
#endif
|
||||
}
|
||||
|
||||
static u32 MeasurePauseTime()
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: LGPL-3.0+
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// r64 / r128 - Types that are guaranteed to fit in one register
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Note: Recompilers rely on some of these types and the registers they allocate to,
|
||||
// so be careful if you want to change them
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Pcsx2Defs.h"
|
||||
#include "Pcsx2Types.h"
|
||||
#include "VectorIntrin.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#if defined(_M_X86)
|
||||
|
||||
// Can't stick them in structs because it breaks calling convention things, yay
|
||||
using r128 = __m128i;
|
||||
|
||||
// Calling convention setting, yay
|
||||
#define RETURNS_R128 r128 __vectorcall
|
||||
#define TAKES_R128 __vectorcall
|
||||
|
||||
// And since we can't stick them in structs, we get lots of static methods, yay!
|
||||
[[maybe_unused]] __fi static r128 r128_load(const void* ptr)
|
||||
{
|
||||
return _mm_load_si128(reinterpret_cast<const r128*>(ptr));
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void r128_store(void* ptr, r128 val)
|
||||
{
|
||||
return _mm_store_si128(reinterpret_cast<r128*>(ptr), val);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void r128_store_unaligned(void* ptr, r128 val)
|
||||
{
|
||||
return _mm_storeu_si128(reinterpret_cast<r128*>(ptr), val);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static r128 r128_zero()
|
||||
{
|
||||
return _mm_setzero_si128();
|
||||
}
|
||||
|
||||
/// Expects that r64 came from r64-handling code, and not from a recompiler or something
|
||||
[[maybe_unused]] __fi static r128 r128_from_u64_dup(u64 val)
|
||||
{
|
||||
return _mm_set1_epi64x(val);
|
||||
}
|
||||
[[maybe_unused]] __fi static r128 r128_from_u64_zext(u64 val)
|
||||
{
|
||||
return _mm_set_epi64x(0, val);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
|
||||
{
|
||||
return _mm_setr_epi32(lo0, lo1, hi0, hi1);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static r128 r128_from_u128(const u128& u)
|
||||
{
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(&u));
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static u32 r128_to_u32(r128 val)
|
||||
{
|
||||
return _mm_cvtsi128_si32(val);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static u64 r128_to_u64(r128 val)
|
||||
{
|
||||
return _mm_cvtsi128_si64(val);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static u128 r128_to_u128(r128 val)
|
||||
{
|
||||
alignas(16) u128 ret;
|
||||
_mm_store_si128(reinterpret_cast<r128*>(&ret), val);
|
||||
return ret;
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void CopyQWC(void* dest, const void* src)
|
||||
{
|
||||
_mm_store_ps((float*)dest, _mm_load_ps((const float*)src));
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void ZeroQWC(void* dest)
|
||||
{
|
||||
_mm_store_ps((float*)dest, _mm_setzero_ps());
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void ZeroQWC(u128& dest)
|
||||
{
|
||||
_mm_store_ps((float*)&dest, _mm_setzero_ps());
|
||||
}
|
||||
|
||||
#elif defined(_M_ARM64)
|
||||
|
||||
using r128 = uint32x4_t;
|
||||
|
||||
#define RETURNS_R128 r128 __vectorcall
|
||||
#define TAKES_R128 __vectorcall
|
||||
|
||||
[[maybe_unused]] __fi static void CopyQWC(void* dest, const void* src)
|
||||
{
|
||||
vst1q_u8(static_cast<u8*>(dest), vld1q_u8(static_cast<const u8*>(src)));
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void ZeroQWC(void* dest)
|
||||
{
|
||||
vst1q_u8(static_cast<u8*>(dest), vmovq_n_u8(0));
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void ZeroQWC(u128& dest)
|
||||
{
|
||||
vst1q_u8(&dest._u8[0], vmovq_n_u8(0));
|
||||
}
|
||||
|
||||
|
||||
[[maybe_unused]] __fi static r128 r128_load(const void* ptr)
|
||||
{
|
||||
return vld1q_u32(reinterpret_cast<const uint32_t*>(ptr));
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void r128_store(void* ptr, r128 value)
|
||||
{
|
||||
return vst1q_u32(reinterpret_cast<uint32_t*>(ptr), value);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static void r128_store_unaligned(void* ptr, r128 value)
|
||||
{
|
||||
return vst1q_u32(reinterpret_cast<uint32_t*>(ptr), value);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static r128 r128_zero()
|
||||
{
|
||||
return vmovq_n_u32(0);
|
||||
}
|
||||
|
||||
/// Expects that r64 came from r64-handling code, and not from a recompiler or something
|
||||
[[maybe_unused]] __fi static r128 r128_from_u64_dup(u64 val)
|
||||
{
|
||||
return vreinterpretq_u32_u64(vdupq_n_u64(val));
|
||||
}
|
||||
[[maybe_unused]] __fi static r128 r128_from_u64_zext(u64 val)
|
||||
{
|
||||
return vreinterpretq_u32_u64(vcombine_u64(vcreate_u64(val), vcreate_u64(0)));
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
|
||||
{
|
||||
const u32 values[4] = {lo0, lo1, hi0, hi1};
|
||||
return vld1q_u32(values);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static r128 r128_from_u128(const u128& u)
|
||||
{
|
||||
return vld1q_u32(reinterpret_cast<const uint32_t*>(u._u32));
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static u32 r128_to_u32(r128 val)
|
||||
{
|
||||
return vgetq_lane_u32(val, 0);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static u64 r128_to_u64(r128 val)
|
||||
{
|
||||
return vgetq_lane_u64(vreinterpretq_u64_u32(val), 0);
|
||||
}
|
||||
|
||||
[[maybe_unused]] __fi static u128 r128_to_u128(r128 val)
|
||||
{
|
||||
alignas(16) u128 ret;
|
||||
vst1q_u32(ret._u32, val);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#error Unknown architecture.
|
||||
|
||||
#endif
|
|
@ -0,0 +1,50 @@
|
|||
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: LGPL-3.0+
|
||||
|
||||
// Includes appropriate intrinsic header based on platform.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(_M_X86)
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define _M_SSE 0x501
|
||||
#elif defined(__AVX__)
|
||||
#define _M_SSE 0x500
|
||||
#elif defined(__SSE4_1__)
|
||||
#define _M_SSE 0x401
|
||||
#else
|
||||
#error PCSX2 requires compiling for at least SSE 4.1
|
||||
#endif
|
||||
|
||||
// Starting with AVX, processors have fast unaligned loads
|
||||
// Reduce code duplication by not compiling multiple versions
|
||||
#if _M_SSE >= 0x500
|
||||
#define FAST_UNALIGNED 1
|
||||
#else
|
||||
#define FAST_UNALIGNED 0
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
#elif defined(_M_ARM64)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#include <arm64_neon.h>
|
||||
#else
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <stdlib.h> // alloca
|
||||
#else
|
||||
#include <malloc.h> // alloca
|
||||
#endif
|
|
@ -118,6 +118,8 @@
|
|||
<ClInclude Include="HeapArray.h" />
|
||||
<ClInclude Include="HeterogeneousContainers.h" />
|
||||
<ClInclude Include="Image.h" />
|
||||
<ClInclude Include="SingleRegisterTypes.h" />
|
||||
<ClInclude Include="VectorIntrin.h" />
|
||||
<ClInclude Include="LRUCache.h" />
|
||||
<ClInclude Include="HTTPDownloader.h" />
|
||||
<ClInclude Include="HTTPDownloaderCurl.h">
|
||||
|
|
|
@ -354,6 +354,10 @@
|
|||
<ClInclude Include="SmallString.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="VectorIntrin.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="SingleRegisterTypes.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Source Files">
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#include "common/General.h"
|
||||
#include "common/emitter/tools.h"
|
||||
#include "common/emitter/internal.h"
|
||||
#include "common/emitter/x86_intrin.h"
|
||||
#include "common/VectorIntrin.h"
|
||||
#include <atomic>
|
||||
|
||||
// CPU information support
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "common/emitter/internal.h"
|
||||
#include "common/emitter/tools.h"
|
||||
#include "common/emitter/x86_intrin.h"
|
||||
#include "common/VectorIntrin.h"
|
||||
|
||||
// Mask of valid bit fields for the target CPU. Typically this is either 0xFFFF (SSE2
|
||||
// or better) or 0xFFBF (SSE1 and earlier). Code can ensure a safe/valid MXCSR by
|
||||
|
|
|
@ -1,42 +0,0 @@
|
|||
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: LGPL-3.0+
|
||||
|
||||
#pragma once
|
||||
|
||||
// Because nobody can't agree on a single name !
|
||||
#if defined(__GNUC__)
|
||||
|
||||
// Yes there are several files for the same features!
|
||||
// x86intrin.h which is the general include provided by the compiler
|
||||
// x86_intrin.h, this file, which is compatibility layer for severals intrinsics
|
||||
#include "x86intrin.h"
|
||||
|
||||
#else
|
||||
|
||||
#include "Intrin.h"
|
||||
|
||||
#endif
|
||||
|
||||
// Rotate instruction
|
||||
#if defined(__clang__) && __clang_major__ < 9
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunused-function"
|
||||
|
||||
// Seriously what is so complicated to provided this bunch of intrinsics in clangs.
|
||||
static unsigned int _rotr(unsigned int x, int s)
|
||||
{
|
||||
return (x >> s) | (x << (32 - s));
|
||||
}
|
||||
|
||||
static unsigned int _rotl(unsigned int x, int s)
|
||||
{
|
||||
return (x << s) | (x >> (32 - s));
|
||||
}
|
||||
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
// Not correctly defined in GCC4.8 and below ! (dunno for VS)
|
||||
#ifndef _MM_MK_INSERTPS_NDX
|
||||
#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField) << 6) | ((dstField) << 4) | (zeroMask))
|
||||
#endif
|
|
@ -55,7 +55,6 @@ endif()
|
|||
if(WIN32)
|
||||
set(MIN_WIN32 0x0A00)
|
||||
target_compile_definitions(PCSX2_FLAGS INTERFACE
|
||||
__SSE4_1__
|
||||
WINVER=${MIN_WIN32}
|
||||
_WIN32_WINNT=${MIN_WIN32}
|
||||
WIN32_LEAN_AND_MEAN
|
||||
|
@ -190,7 +189,6 @@ set(pcsx2Headers
|
|||
Memory.h
|
||||
MemoryTypes.h
|
||||
Patch.h
|
||||
PCSX2Base.h
|
||||
PerformanceMetrics.h
|
||||
PrecompiledHeader.h
|
||||
R3000A.h
|
||||
|
@ -200,7 +198,6 @@ set(pcsx2Headers
|
|||
ShaderCacheVersion.h
|
||||
Sifcmd.h
|
||||
Sif.h
|
||||
SingleRegisterTypes.h
|
||||
SIO/Sio.h
|
||||
SIO/Sio2.h
|
||||
SIO/Sio0.h
|
||||
|
|
|
@ -4,7 +4,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common.h"
|
||||
#include "SingleRegisterTypes.h"
|
||||
|
||||
#include "common/SingleRegisterTypes.h"
|
||||
|
||||
void resetCache();
|
||||
void writeCache8(u32 mem, u8 value);
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
#include "Common.h"
|
||||
#include "Gif.h"
|
||||
#include "GS/GS.h"
|
||||
#include "SingleRegisterTypes.h"
|
||||
|
||||
#include "common/SingleRegisterTypes.h"
|
||||
|
||||
extern double GetVerticalFrequency();
|
||||
alignas(16) extern u8 g_RealGSMem[Ps2MemSize::GSregs];
|
||||
|
|
|
@ -5,16 +5,7 @@
|
|||
|
||||
#include "common/Pcsx2Defs.h"
|
||||
#include "common/Assertions.h"
|
||||
|
||||
#include "PCSX2Base.h"
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#if _M_SSE >= 0x500
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#include "common/VectorIntrin.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "PCSX2Base.h"
|
||||
#include "common/Pcsx2Defs.h"
|
||||
#include "common/VectorIntrin.h"
|
||||
|
||||
// For multiple-isa compilation
|
||||
#ifdef MULTI_ISA_UNSHARED_COMPILATION
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "PCSX2Base.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
|
|
@ -3,33 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#ifdef __linux__
|
||||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
#include "vtlb.h"
|
||||
|
||||
#include "common/emitter/x86_intrin.h"
|
||||
|
||||
// [TODO] This *could* be replaced with an assignment operator on u128 that implicitly
|
||||
// uses _mm_store and _mm_load internally. However, there are alignment concerns --
|
||||
// u128 is not alignment strict. (we would need a u128 and u128a for types known to
|
||||
// be strictly 128-bit aligned).
|
||||
static __fi void CopyQWC( void* dest, const void* src )
|
||||
{
|
||||
_mm_store_ps( (float*)dest, _mm_load_ps((const float*)src) );
|
||||
}
|
||||
|
||||
static __fi void ZeroQWC( void* dest )
|
||||
{
|
||||
_mm_store_ps( (float*)dest, _mm_setzero_ps() );
|
||||
}
|
||||
|
||||
static __fi void ZeroQWC( u128& dest )
|
||||
{
|
||||
_mm_store_ps( (float*)&dest, _mm_setzero_ps() );
|
||||
}
|
||||
|
||||
#define PSM(mem) (vtlb_GetPhyPtr((mem)&0x1fffffff)) //pcsx2 is a competition.The one with most hacks wins :D
|
||||
|
||||
#define psHs8(mem) (*(s8 *)&eeHw[(mem) & 0xffff])
|
||||
|
@ -108,7 +83,7 @@ extern void memMapVUmicro();
|
|||
#define memWrite32 vtlb_memWrite<mem32_t>
|
||||
#define memWrite64 vtlb_memWrite<mem64_t>
|
||||
|
||||
static __fi void memRead128(u32 mem, mem128_t* out) { _mm_store_si128((__m128i*)out, vtlb_memRead128(mem)); }
|
||||
static __fi void memRead128(u32 mem, mem128_t* out) { r128_store(out, vtlb_memRead128(mem)); }
|
||||
static __fi void memRead128(u32 mem, mem128_t& out) { memRead128(mem, &out); }
|
||||
|
||||
static __fi void memWrite128(u32 mem, const mem128_t* val) { vtlb_memWrite128(mem, r128_load(val)); }
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: LGPL-3.0+
|
||||
|
||||
/// Base defines and typedefs that are needed by all code in PCSX2
|
||||
/// Prefer this over including Pcsx2Defs.h to make sure everyone gets all the defines, as missing defines fail silently
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/Pcsx2Defs.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
#define _M_SSE 0x501
|
||||
#elif defined(__AVX__)
|
||||
#define _M_SSE 0x500
|
||||
#elif defined(__SSE4_1__)
|
||||
#define _M_SSE 0x401
|
||||
#else
|
||||
#error PCSX2 requires compiling for at least SSE 4.1
|
||||
#endif
|
||||
|
||||
// Starting with AVX, processors have fast unaligned loads
|
||||
// Reduce code duplication by not compiling multiple versions
|
||||
#if _M_SSE >= 0x500
|
||||
#define FAST_UNALIGNED 1
|
||||
#else
|
||||
#define FAST_UNALIGNED 0
|
||||
#endif
|
|
@ -43,9 +43,3 @@
|
|||
// We use fmt a fair bit now.
|
||||
#include "fmt/core.h"
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Begin Pcsx2 Includes: Add items here that are local to Pcsx2 but stay relatively
|
||||
// unchanged for long periods of time, or happen to be used by almost everything, so they
|
||||
// need a full recompile anyway, when modified (etc)
|
||||
|
||||
#include "PCSX2Base.h"
|
||||
|
|
|
@ -1,79 +0,0 @@
|
|||
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
||||
// SPDX-License-Identifier: LGPL-3.0+
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// r64 / r128 - Types that are guaranteed to fit in one register
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Note: Recompilers rely on some of these types and the registers they allocate to,
|
||||
// so be careful if you want to change them
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
// Can't stick them in structs because it breaks calling convention things, yay
|
||||
using r128 = __m128i;
|
||||
|
||||
// Calling convention setting, yay
|
||||
#define RETURNS_R128 r128 __vectorcall
|
||||
#define TAKES_R128 __vectorcall
|
||||
|
||||
// And since we can't stick them in structs, we get lots of static methods, yay!
|
||||
__forceinline static r128 r128_load(const void* ptr)
|
||||
{
|
||||
return _mm_load_si128(reinterpret_cast<const r128*>(ptr));
|
||||
}
|
||||
|
||||
__forceinline static void r128_store(void* ptr, r128 val)
|
||||
{
|
||||
return _mm_store_si128(reinterpret_cast<r128*>(ptr), val);
|
||||
}
|
||||
|
||||
__forceinline static void r128_store_unaligned(void* ptr, r128 val)
|
||||
{
|
||||
return _mm_storeu_si128(reinterpret_cast<r128*>(ptr), val);
|
||||
}
|
||||
|
||||
__forceinline static r128 r128_zero()
|
||||
{
|
||||
return _mm_setzero_si128();
|
||||
}
|
||||
|
||||
/// Expects that r64 came from r64-handling code, and not from a recompiler or something
|
||||
__forceinline static r128 r128_from_u64_dup(u64 val)
|
||||
{
|
||||
return _mm_set1_epi64x(val);
|
||||
}
|
||||
__forceinline static r128 r128_from_u64_zext(u64 val)
|
||||
{
|
||||
return _mm_set_epi64x(0, val);
|
||||
}
|
||||
|
||||
__forceinline static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
|
||||
{
|
||||
return _mm_setr_epi32(lo0, lo1, hi0, hi1);
|
||||
}
|
||||
|
||||
__forceinline static r128 r128_from_u128(const u128& u)
|
||||
{
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(&u));
|
||||
}
|
||||
|
||||
__forceinline static u32 r128_to_u32(r128 val)
|
||||
{
|
||||
return _mm_cvtsi128_si32(val);
|
||||
}
|
||||
|
||||
__forceinline static u64 r128_to_u64(r128 val)
|
||||
{
|
||||
return _mm_cvtsi128_si64(val);
|
||||
}
|
||||
|
||||
__forceinline static u128 r128_to_u128(r128 val)
|
||||
{
|
||||
alignas(16) u128 ret;
|
||||
_mm_store_si128(reinterpret_cast<r128*>(&ret), val);
|
||||
return ret;
|
||||
}
|
|
@ -22,7 +22,7 @@
|
|||
#include "common/StringUtil.h"
|
||||
|
||||
#ifdef _M_X86
|
||||
#include "common/emitter/x86_intrin.h"
|
||||
#include "common/emitter/tools.h"
|
||||
#endif
|
||||
|
||||
extern R5900cpu GSDumpReplayerCpu;
|
||||
|
|
|
@ -24,7 +24,6 @@
|
|||
#include "LogSink.h"
|
||||
#include "MTGS.h"
|
||||
#include "MTVU.h"
|
||||
#include "PCSX2Base.h"
|
||||
#include "PINE.h"
|
||||
#include "Patch.h"
|
||||
#include "PerformanceMetrics.h"
|
||||
|
@ -50,7 +49,6 @@
|
|||
#include "common/StringUtil.h"
|
||||
#include "common/Threading.h"
|
||||
#include "common/Timer.h"
|
||||
#include "common/emitter/tools.h"
|
||||
|
||||
#include "IconsFontAwesome5.h"
|
||||
#include "discord_rpc.h"
|
||||
|
@ -61,7 +59,7 @@
|
|||
#include <sstream>
|
||||
|
||||
#ifdef _M_X86
|
||||
#include "common/emitter/x86_intrin.h"
|
||||
#include "common/emitter/tools.h"
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
|
|
|
@ -672,7 +672,6 @@
|
|||
<ClInclude Include="IPU\IPUdma.h" />
|
||||
<ClInclude Include="Mdec.h" />
|
||||
<ClInclude Include="Patch.h" />
|
||||
<ClInclude Include="PCSX2Base.h" />
|
||||
<ClInclude Include="PrecompiledHeader.h" />
|
||||
<ClInclude Include="ps2\pgif.h" />
|
||||
<ClInclude Include="StateWrapper.h" />
|
||||
|
@ -711,7 +710,6 @@
|
|||
<ClInclude Include="Common.h" />
|
||||
<ClInclude Include="Config.h" />
|
||||
<ClInclude Include="SaveState.h" />
|
||||
<ClInclude Include="SingleRegisterTypes.h" />
|
||||
<ClInclude Include="System.h" />
|
||||
<ClInclude Include="Counters.h" />
|
||||
<ClInclude Include="Dmac.h" />
|
||||
|
|
|
@ -1418,9 +1418,6 @@
|
|||
<ClInclude Include="Patch.h">
|
||||
<Filter>Misc</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="PCSX2Base.h">
|
||||
<Filter>Misc</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="PrecompiledHeader.h">
|
||||
<Filter>Misc</Filter>
|
||||
</ClInclude>
|
||||
|
@ -1442,9 +1439,6 @@
|
|||
<ClInclude Include="SaveState.h">
|
||||
<Filter>System\Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="SingleRegisterTypes.h">
|
||||
<Filter>System\Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="System.h">
|
||||
<Filter>System\Include</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "Hw.h"
|
||||
#include "SingleRegisterTypes.h"
|
||||
|
||||
#include "common/SingleRegisterTypes.h"
|
||||
|
||||
// hw read functions
|
||||
template< uint page > extern mem8_t hwRead8 (u32 mem);
|
||||
|
|
|
@ -4,9 +4,10 @@
|
|||
#pragma once
|
||||
|
||||
#include "MemoryTypes.h"
|
||||
#include "SingleRegisterTypes.h"
|
||||
#include "System.h"
|
||||
|
||||
#include "common/SingleRegisterTypes.h"
|
||||
|
||||
static const uptr VTLB_AllocUpperBounds = _1gb * 2;
|
||||
|
||||
// Specialized function pointers for each read type
|
||||
|
|
|
@ -7,10 +7,6 @@
|
|||
#include "Vif_Dma.h"
|
||||
#include "newVif.h"
|
||||
|
||||
#include "common/emitter/x86_intrin.h"
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// VifUnpackSSE_Base
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue