mirror of https://github.com/PCSX2/pcsx2.git
Common: ARM64 compatibility
This commit is contained in:
parent
6ee99d8b81
commit
4e0e8cef54
|
@ -189,6 +189,27 @@ std::vector<DarwinMisc::CPUClass> DarwinMisc::GetCPUClasses()
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static std::optional<T> sysctlbyname_T(const char* name)
|
||||||
|
{
|
||||||
|
T output = 0;
|
||||||
|
size_t output_size = sizeof(output);
|
||||||
|
if (sysctlbyname(name, &output, &output_size, nullptr, 0) != 0)
|
||||||
|
return std::nullopt;
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t HostSys::GetRuntimePageSize()
|
||||||
|
{
|
||||||
|
return sysctlbyname_T<u32>("hw.pagesize").value_or(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t HostSys::GetRuntimeCacheLineSize()
|
||||||
|
{
|
||||||
|
return static_cast<size_t>(std::max<s64>(sysctlbyname_T<s64>("hw.cachelinesize").value_or(0), 0));
|
||||||
|
}
|
||||||
|
|
||||||
static __ri vm_prot_t MachProt(const PageProtectionMode& mode)
|
static __ri vm_prot_t MachProt(const PageProtectionMode& mode)
|
||||||
{
|
{
|
||||||
vm_prot_t machmode = (mode.CanWrite()) ? VM_PROT_WRITE : 0;
|
vm_prot_t machmode = (mode.CanWrite()) ? VM_PROT_WRITE : 0;
|
||||||
|
|
|
@ -123,6 +123,12 @@ namespace HostSys
|
||||||
#else
|
#else
|
||||||
void FlushInstructionCache(void* address, u32 size);
|
void FlushInstructionCache(void* address, u32 size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/// Returns the size of pages for the current host.
|
||||||
|
size_t GetRuntimePageSize();
|
||||||
|
|
||||||
|
/// Returns the size of a cache line for the current host.
|
||||||
|
size_t GetRuntimeCacheLineSize();
|
||||||
} // namespace HostSys
|
} // namespace HostSys
|
||||||
|
|
||||||
namespace PageFaultHandler
|
namespace PageFaultHandler
|
||||||
|
|
|
@ -134,6 +134,34 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
|
||||||
pxFailRel("Failed to unmap shared memory");
|
pxFailRel("Failed to unmap shared memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t HostSys::GetRuntimePageSize()
|
||||||
|
{
|
||||||
|
int res = sysconf(_SC_PAGESIZE);
|
||||||
|
return (res > 0) ? static_cast<size_t>(res) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t HostSys::GetRuntimeCacheLineSize()
|
||||||
|
{
|
||||||
|
int l1i = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
|
||||||
|
int l1d = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
|
||||||
|
int res = (l1i > l1d) ? l1i : l1d;
|
||||||
|
for (int index = 0; index < 16; index++)
|
||||||
|
{
|
||||||
|
char buf[128];
|
||||||
|
snprintf(buf, sizeof(buf), "/sys/devices/system/cpu/cpu0/cache/index%d/coherency_line_size", index);
|
||||||
|
std::FILE* fp = std::fopen(buf, "rb");
|
||||||
|
if (!fp)
|
||||||
|
break;
|
||||||
|
|
||||||
|
std::fread(buf, sizeof(buf), 1, fp);
|
||||||
|
std::fclose(fp);
|
||||||
|
int val = std::atoi(buf);
|
||||||
|
res = (val > res) ? val : res;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (res > 0) ? static_cast<size_t>(res) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
|
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
|
||||||
: m_base_ptr(base_ptr)
|
: m_base_ptr(base_ptr)
|
||||||
, m_size(size)
|
, m_size(size)
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Pcsx2Types.h"
|
#include "Pcsx2Types.h"
|
||||||
|
|
||||||
|
#include <bit>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
|
@ -21,11 +23,33 @@ static constexpr bool IsDebugBuild = true;
|
||||||
static constexpr bool IsDebugBuild = false;
|
static constexpr bool IsDebugBuild = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Defines the memory page size for the target platform at compilation. All supported platforms
|
// Defines the memory page size for the target platform at compilation.
|
||||||
// (which means Intel only right now) have a 4k granularity.
|
#if defined(OVERRIDE_HOST_PAGE_SIZE)
|
||||||
static constexpr unsigned int __pagesize = 0x1000;
|
static constexpr unsigned int __pagesize = OVERRIDE_HOST_PAGE_SIZE;
|
||||||
static constexpr unsigned int __pageshift = 12;
|
static constexpr unsigned int __pagemask = __pagesize - 1;
|
||||||
static constexpr unsigned int __pagemask = __pagesize - 1;
|
static constexpr unsigned int __pageshift = std::bit_width(__pagemask);
|
||||||
|
#elif defined(_M_ARM64)
|
||||||
|
// Apple Silicon uses 16KB pages and 128 byte cache lines.
|
||||||
|
static constexpr unsigned int __pagesize = 0x4000;
|
||||||
|
static constexpr unsigned int __pageshift = 14;
|
||||||
|
static constexpr unsigned int __pagemask = __pagesize - 1;
|
||||||
|
#else
|
||||||
|
// X86 uses a 4KB granularity and 64 byte cache lines.
|
||||||
|
static constexpr unsigned int __pagesize = 0x1000;
|
||||||
|
static constexpr unsigned int __pageshift = 12;
|
||||||
|
static constexpr unsigned int __pagemask = __pagesize - 1;
|
||||||
|
#endif
|
||||||
|
#if defined(OVERRIDE_HOST_CACHE_LINE_SIZE)
|
||||||
|
static constexpr unsigned int __cachelinesize = OVERRIDE_HOST_CACHE_LINE_SIZE;
|
||||||
|
#elif defined(_M_ARM64)
|
||||||
|
static constexpr unsigned int __cachelinesize = 128;
|
||||||
|
#else
|
||||||
|
static constexpr unsigned int __cachelinesize = 64;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// We use 4KB alignment for globals for both Apple and x86 platforms, since computing the
|
||||||
|
// address on ARM64 is a single instruction (adrp).
|
||||||
|
static constexpr unsigned int __pagealignsize = 0x1000;
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
// Microsoft Visual Studio
|
// Microsoft Visual Studio
|
||||||
|
|
|
@ -5,12 +5,12 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(_M_X86)
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_M_X86)
|
|
||||||
|
|
||||||
#if defined(__AVX2__)
|
#if defined(__AVX2__)
|
||||||
#define _M_SSE 0x501
|
#define _M_SSE 0x501
|
||||||
#elif defined(__AVX__)
|
#elif defined(__AVX__)
|
||||||
|
@ -36,12 +36,8 @@
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
#elif defined(_M_ARM64)
|
#elif defined(_M_ARM64)
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
|
||||||
#include <arm64_neon.h>
|
|
||||||
#else
|
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#include <stdlib.h> // alloca
|
#include <stdlib.h> // alloca
|
||||||
|
|
|
@ -100,6 +100,35 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
|
||||||
pxFail("Failed to unmap shared memory");
|
pxFail("Failed to unmap shared memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t HostSys::GetRuntimePageSize()
|
||||||
|
{
|
||||||
|
SYSTEM_INFO si = {};
|
||||||
|
GetSystemInfo(&si);
|
||||||
|
return si.dwPageSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t HostSys::GetRuntimeCacheLineSize()
|
||||||
|
{
|
||||||
|
DWORD size = 0;
|
||||||
|
if (!GetLogicalProcessorInformation(nullptr, &size) && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
std::unique_ptr<SYSTEM_LOGICAL_PROCESSOR_INFORMATION[]> lpi =
|
||||||
|
std::make_unique<SYSTEM_LOGICAL_PROCESSOR_INFORMATION[]>(
|
||||||
|
(size + (sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) - 1)) / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION));
|
||||||
|
if (!GetLogicalProcessorInformation(lpi.get(), &size))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
u32 max_line_size = 0;
|
||||||
|
for (u32 i = 0; i < size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); i++)
|
||||||
|
{
|
||||||
|
if (lpi[i].Relationship == RelationCache)
|
||||||
|
max_line_size = std::max<u32>(max_line_size, lpi[i].Cache.LineSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
return max_line_size;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef _M_ARM64
|
#ifdef _M_ARM64
|
||||||
|
|
||||||
void HostSys::FlushInstructionCache(void* address, u32 size)
|
void HostSys::FlushInstructionCache(void* address, u32 size)
|
||||||
|
|
|
@ -107,10 +107,20 @@ Threading::ThreadHandle& Threading::ThreadHandle::operator=(const ThreadHandle&
|
||||||
|
|
||||||
u64 Threading::ThreadHandle::GetCPUTime() const
|
u64 Threading::ThreadHandle::GetCPUTime() const
|
||||||
{
|
{
|
||||||
|
#ifndef _M_ARM64
|
||||||
u64 ret = 0;
|
u64 ret = 0;
|
||||||
if (m_native_handle)
|
if (m_native_handle)
|
||||||
QueryThreadCycleTime((HANDLE)m_native_handle, &ret);
|
QueryThreadCycleTime((HANDLE)m_native_handle, &ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
#else
|
||||||
|
FILETIME user, kernel, unused;
|
||||||
|
if (!GetThreadTimes((HANDLE)m_native_handle, &unused, &unused, &kernel, &user))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
const u64 user_time = (static_cast<u64>(user.dwHighDateTime) << 32) | static_cast<u64>(user.dwLowDateTime);
|
||||||
|
const u64 kernel_time = (static_cast<u64>(kernel.dwHighDateTime) << 32) | static_cast<u64>(kernel.dwLowDateTime);
|
||||||
|
return user_time + kernel_time;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Threading::ThreadHandle::SetAffinity(u64 processor_mask) const
|
bool Threading::ThreadHandle::SetAffinity(u64 processor_mask) const
|
||||||
|
@ -198,13 +208,24 @@ Threading::ThreadHandle& Threading::Thread::operator=(Thread&& thread)
|
||||||
|
|
||||||
u64 Threading::GetThreadCpuTime()
|
u64 Threading::GetThreadCpuTime()
|
||||||
{
|
{
|
||||||
|
#ifndef _M_ARM64
|
||||||
u64 ret = 0;
|
u64 ret = 0;
|
||||||
QueryThreadCycleTime(GetCurrentThread(), &ret);
|
QueryThreadCycleTime(GetCurrentThread(), &ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
#else
|
||||||
|
FILETIME user, kernel, unused;
|
||||||
|
if (!GetThreadTimes(GetCurrentThread(), &unused, &unused, &kernel, &user))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
const u64 user_time = (static_cast<u64>(user.dwHighDateTime) << 32) | static_cast<u64>(user.dwLowDateTime);
|
||||||
|
const u64 kernel_time = (static_cast<u64>(kernel.dwHighDateTime) << 32) | static_cast<u64>(kernel.dwLowDateTime);
|
||||||
|
return user_time + kernel_time;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 Threading::GetThreadTicksPerSecond()
|
u64 Threading::GetThreadTicksPerSecond()
|
||||||
{
|
{
|
||||||
|
#ifndef _M_ARM64
|
||||||
// On x86, despite what the MS documentation says, this basically appears to be rdtsc.
|
// On x86, despite what the MS documentation says, this basically appears to be rdtsc.
|
||||||
// So, the frequency is our base clock speed (and stable regardless of power management).
|
// So, the frequency is our base clock speed (and stable regardless of power management).
|
||||||
static u64 frequency = 0;
|
static u64 frequency = 0;
|
||||||
|
@ -224,6 +245,9 @@ u64 Threading::GetThreadTicksPerSecond()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return frequency;
|
return frequency;
|
||||||
|
#else
|
||||||
|
return 10000000;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void Threading::SetNameOfCurrentThread(const char* name)
|
void Threading::SetNameOfCurrentThread(const char* name)
|
||||||
|
|
|
@ -46,11 +46,12 @@
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include "AlignedMalloc.h"
|
#include "AlignedMalloc.h"
|
||||||
|
#include "Pcsx2Defs.h"
|
||||||
|
|
||||||
template <typename T, size_t max_size>
|
template <typename T, size_t max_size>
|
||||||
class ringbuffer_base
|
class ringbuffer_base
|
||||||
{
|
{
|
||||||
static const int padding_size = 64 - sizeof(size_t);
|
static const int padding_size = __cachelinesize - sizeof(size_t);
|
||||||
|
|
||||||
std::atomic<size_t> write_index_;
|
std::atomic<size_t> write_index_;
|
||||||
char padding1[padding_size]; /* force read_index and write_index to different cache lines */
|
char padding1[padding_size]; /* force read_index and write_index to different cache lines */
|
||||||
|
|
|
@ -211,7 +211,7 @@ namespace GSCapture
|
||||||
static std::unique_ptr<s16[]> s_audio_buffer;
|
static std::unique_ptr<s16[]> s_audio_buffer;
|
||||||
static std::atomic<u32> s_audio_buffer_size{0};
|
static std::atomic<u32> s_audio_buffer_size{0};
|
||||||
static u32 s_audio_buffer_write_pos = 0;
|
static u32 s_audio_buffer_write_pos = 0;
|
||||||
alignas(64) static u32 s_audio_buffer_read_pos = 0;
|
alignas(__cachelinesize) static u32 s_audio_buffer_read_pos = 0;
|
||||||
} // namespace GSCapture
|
} // namespace GSCapture
|
||||||
|
|
||||||
#ifndef USE_LINKED_FFMPEG
|
#ifndef USE_LINKED_FFMPEG
|
||||||
|
|
|
@ -278,7 +278,7 @@ namespace
|
||||||
size_t m_block_pos = 0;
|
size_t m_block_pos = 0;
|
||||||
|
|
||||||
DynamicHeapArray<u8, 64> m_block_read_buffer;
|
DynamicHeapArray<u8, 64> m_block_read_buffer;
|
||||||
alignas(64) CXzUnpacker m_unpacker = {};
|
alignas(__cachelinesize) CXzUnpacker m_unpacker = {};
|
||||||
};
|
};
|
||||||
|
|
||||||
GSDumpLzma::GSDumpLzma() = default;
|
GSDumpLzma::GSDumpLzma() = default;
|
||||||
|
|
|
@ -61,12 +61,12 @@ namespace MTGS
|
||||||
|
|
||||||
static void SetEvent();
|
static void SetEvent();
|
||||||
|
|
||||||
alignas(32) BufferedData RingBuffer;
|
alignas(__cachelinesize) BufferedData RingBuffer;
|
||||||
|
|
||||||
// note: when m_ReadPos == m_WritePos, the fifo is empty
|
// note: when m_ReadPos == m_WritePos, the fifo is empty
|
||||||
// Threading info: m_ReadPos is updated by the MTGS thread. m_WritePos is updated by the EE thread
|
// Threading info: m_ReadPos is updated by the MTGS thread. m_WritePos is updated by the EE thread
|
||||||
alignas(64) static std::atomic<unsigned int> s_ReadPos; // cur pos gs is reading from
|
alignas(__cachelinesize) static std::atomic<unsigned int> s_ReadPos; // cur pos gs is reading from
|
||||||
alignas(64) static std::atomic<unsigned int> s_WritePos; // cur pos ee thread is writing to
|
alignas(__cachelinesize) static std::atomic<unsigned int> s_WritePos; // cur pos ee thread is writing to
|
||||||
|
|
||||||
// These vars maintain instance data for sending Data Packets.
|
// These vars maintain instance data for sending Data Packets.
|
||||||
// Only one data packet can be constructed and uploaded at a time.
|
// Only one data packet can be constructed and uploaded at a time.
|
||||||
|
|
|
@ -21,9 +21,9 @@ class VU_Thread final {
|
||||||
|
|
||||||
u32 buffer[buffer_size];
|
u32 buffer[buffer_size];
|
||||||
// Note: keep atomic on separate cache line to avoid CPU conflict
|
// Note: keep atomic on separate cache line to avoid CPU conflict
|
||||||
alignas(64) std::atomic<int> m_ato_read_pos; // Only modified by VU thread
|
alignas(__cachelinesize) std::atomic<int> m_ato_read_pos; // Only modified by VU thread
|
||||||
alignas(64) std::atomic<int> m_ato_write_pos; // Only modified by EE thread
|
alignas(__cachelinesize) std::atomic<int> m_ato_write_pos; // Only modified by EE thread
|
||||||
alignas(64) int m_read_pos; // temporary read pos (local to the VU thread)
|
alignas(__cachelinesize) int m_read_pos; // temporary read pos (local to the VU thread)
|
||||||
int m_write_pos; // temporary write pos (local to the EE thread)
|
int m_write_pos; // temporary write pos (local to the EE thread)
|
||||||
Threading::WorkSema semaEvent;
|
Threading::WorkSema semaEvent;
|
||||||
std::atomic_bool m_shutdown_flag{false};
|
std::atomic_bool m_shutdown_flag{false};
|
||||||
|
|
Loading…
Reference in New Issue