mirror of https://github.com/PCSX2/pcsx2.git
Compare commits
10 Commits
a861ee113b
...
8e438a305d
Author | SHA1 | Date |
---|---|---|
TellowKrinkle | 8e438a305d | |
lightningterror | 5486eed151 | |
Ty Lamontagne | d1721360ff | |
TellowKrinkle | 87b795e1c6 | |
TellowKrinkle | 2eabebc82a | |
TellowKrinkle | 51c7a723db | |
TellowKrinkle | d8b8af44a0 | |
TellowKrinkle | 5a6b3cba6e | |
TellowKrinkle | fe2f97eeb5 | |
TellowKrinkle | 8a9fbb43e6 |
|
@ -149,6 +149,7 @@ elseif(APPLE)
|
||||||
Darwin/DarwinThreads.cpp
|
Darwin/DarwinThreads.cpp
|
||||||
Darwin/DarwinMisc.cpp
|
Darwin/DarwinMisc.cpp
|
||||||
Darwin/DarwinMisc.h
|
Darwin/DarwinMisc.h
|
||||||
|
Linux/LnxHostSys.cpp
|
||||||
)
|
)
|
||||||
target_compile_options(common PRIVATE -fobjc-arc)
|
target_compile_options(common PRIVATE -fobjc-arc)
|
||||||
target_link_options(common PRIVATE -fobjc-link-runtime)
|
target_link_options(common PRIVATE -fobjc-link-runtime)
|
||||||
|
|
|
@ -16,16 +16,10 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <mach/mach_init.h>
|
|
||||||
#include <mach/mach_port.h>
|
|
||||||
#include <mach/mach_time.h>
|
#include <mach/mach_time.h>
|
||||||
#include <mach/mach_vm.h>
|
|
||||||
#include <mach/task.h>
|
#include <mach/task.h>
|
||||||
#include <mach/vm_map.h>
|
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <IOKit/pwr_mgt/IOPMLib.h>
|
#include <IOKit/pwr_mgt/IOPMLib.h>
|
||||||
|
|
||||||
|
@ -201,200 +195,6 @@ size_t HostSys::GetRuntimeCacheLineSize()
|
||||||
return static_cast<size_t>(std::max<s64>(sysctlbyname_T<s64>("hw.cachelinesize").value_or(0), 0));
|
return static_cast<size_t>(std::max<s64>(sysctlbyname_T<s64>("hw.cachelinesize").value_or(0), 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __ri vm_prot_t MachProt(const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
vm_prot_t machmode = (mode.CanWrite()) ? VM_PROT_WRITE : 0;
|
|
||||||
machmode |= (mode.CanRead()) ? VM_PROT_READ : 0;
|
|
||||||
machmode |= (mode.CanExecute()) ? (VM_PROT_EXECUTE | VM_PROT_READ) : 0;
|
|
||||||
return machmode;
|
|
||||||
}
|
|
||||||
|
|
||||||
void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
|
|
||||||
if (mode.IsNone())
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
#ifdef __aarch64__
|
|
||||||
// We can't allocate executable memory with mach_vm_allocate() on Apple Silicon.
|
|
||||||
// Instead, we need to use MAP_JIT with mmap(), which does not support fixed mappings.
|
|
||||||
if (mode.CanExecute())
|
|
||||||
{
|
|
||||||
if (base)
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
const u32 mmap_prot = mode.CanWrite() ? (PROT_READ | PROT_WRITE | PROT_EXEC) : (PROT_READ | PROT_EXEC);
|
|
||||||
const u32 flags = MAP_PRIVATE | MAP_ANON | MAP_JIT;
|
|
||||||
void* const res = mmap(nullptr, size, mmap_prot, flags, -1, 0);
|
|
||||||
return (res == MAP_FAILED) ? nullptr : res;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
kern_return_t ret = mach_vm_allocate(mach_task_self(), reinterpret_cast<mach_vm_address_t*>(&base), size,
|
|
||||||
base ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE);
|
|
||||||
if (ret != KERN_SUCCESS)
|
|
||||||
{
|
|
||||||
DEV_LOG("mach_vm_allocate() returned {}", ret);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = mach_vm_protect(mach_task_self(), reinterpret_cast<mach_vm_address_t>(base), size, false, MachProt(mode));
|
|
||||||
if (ret != KERN_SUCCESS)
|
|
||||||
{
|
|
||||||
DEV_LOG("mach_vm_protect() returned {}", ret);
|
|
||||||
mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(base), size);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return base;
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::Munmap(void* base, size_t size)
|
|
||||||
{
|
|
||||||
if (!base)
|
|
||||||
return;
|
|
||||||
|
|
||||||
mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(base), size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
|
|
||||||
|
|
||||||
kern_return_t res = mach_vm_protect(mach_task_self(), reinterpret_cast<mach_vm_address_t>(baseaddr), size, false,
|
|
||||||
MachProt(mode));
|
|
||||||
if (res != KERN_SUCCESS) [[unlikely]]
|
|
||||||
{
|
|
||||||
ERROR_LOG("mach_vm_protect() failed: {}", res);
|
|
||||||
pxFailRel("mach_vm_protect() failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string HostSys::GetFileMappingName(const char* prefix)
|
|
||||||
{
|
|
||||||
// name actually is not used.
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
void* HostSys::CreateSharedMemory(const char* name, size_t size)
|
|
||||||
{
|
|
||||||
mach_vm_size_t vm_size = size;
|
|
||||||
mach_port_t port;
|
|
||||||
const kern_return_t res = mach_make_memory_entry_64(
|
|
||||||
mach_task_self(), &vm_size, 0, MAP_MEM_NAMED_CREATE | VM_PROT_READ | VM_PROT_WRITE, &port, MACH_PORT_NULL);
|
|
||||||
if (res != KERN_SUCCESS)
|
|
||||||
{
|
|
||||||
ERROR_LOG("mach_make_memory_entry_64() failed: {}", res);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return reinterpret_cast<void*>(static_cast<uintptr_t>(port));
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::DestroySharedMemory(void* ptr)
|
|
||||||
{
|
|
||||||
mach_port_deallocate(mach_task_self(), static_cast<mach_port_t>(reinterpret_cast<uintptr_t>(ptr)));
|
|
||||||
}
|
|
||||||
|
|
||||||
void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
mach_vm_address_t ptr = reinterpret_cast<mach_vm_address_t>(baseaddr);
|
|
||||||
const kern_return_t res = mach_vm_map(mach_task_self(), &ptr, size, 0, baseaddr ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE,
|
|
||||||
static_cast<mach_port_t>(reinterpret_cast<uintptr_t>(handle)), offset, FALSE,
|
|
||||||
MachProt(mode), VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_NONE);
|
|
||||||
if (res != KERN_SUCCESS)
|
|
||||||
{
|
|
||||||
ERROR_LOG("mach_vm_map() failed: {}", res);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
return reinterpret_cast<void*>(ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
|
|
||||||
{
|
|
||||||
const kern_return_t res = mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(baseaddr), size);
|
|
||||||
if (res != KERN_SUCCESS)
|
|
||||||
pxFailRel("Failed to unmap shared memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _M_ARM64
|
|
||||||
|
|
||||||
void HostSys::FlushInstructionCache(void* address, u32 size)
|
|
||||||
{
|
|
||||||
__builtin___clear_cache(reinterpret_cast<char*>(address), reinterpret_cast<char*>(address) + size);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
|
|
||||||
: m_base_ptr(base_ptr)
|
|
||||||
, m_size(size)
|
|
||||||
, m_num_pages(num_pages)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
SharedMemoryMappingArea::~SharedMemoryMappingArea()
|
|
||||||
{
|
|
||||||
pxAssertRel(m_num_mappings == 0, "No mappings left");
|
|
||||||
|
|
||||||
if (mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(m_base_ptr), m_size) != KERN_SUCCESS)
|
|
||||||
pxFailRel("Failed to release shared memory area");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
|
|
||||||
{
|
|
||||||
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
|
|
||||||
|
|
||||||
mach_vm_address_t alloc;
|
|
||||||
const kern_return_t res =
|
|
||||||
mach_vm_map(mach_task_self(), &alloc, size, 0, VM_FLAGS_ANYWHERE,
|
|
||||||
MEMORY_OBJECT_NULL, 0, false, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_NONE);
|
|
||||||
if (res != KERN_SUCCESS)
|
|
||||||
{
|
|
||||||
ERROR_LOG("mach_vm_map() failed: {}", res);
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::unique_ptr<SharedMemoryMappingArea>(new SharedMemoryMappingArea(reinterpret_cast<u8*>(alloc), size, size / __pagesize));
|
|
||||||
}
|
|
||||||
|
|
||||||
u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
|
|
||||||
|
|
||||||
const kern_return_t res =
|
|
||||||
mach_vm_map(mach_task_self(), reinterpret_cast<mach_vm_address_t*>(&map_base), map_size, 0, VM_FLAGS_OVERWRITE,
|
|
||||||
static_cast<mach_port_t>(reinterpret_cast<uintptr_t>(file_handle)), file_offset, false,
|
|
||||||
MachProt(mode), VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_NONE);
|
|
||||||
if (res != KERN_SUCCESS) [[unlikely]]
|
|
||||||
{
|
|
||||||
ERROR_LOG("mach_vm_map() failed: {}", res);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_num_mappings++;
|
|
||||||
return static_cast<u8*>(map_base);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
|
|
||||||
{
|
|
||||||
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
|
|
||||||
|
|
||||||
const kern_return_t res =
|
|
||||||
mach_vm_map(mach_task_self(), reinterpret_cast<mach_vm_address_t*>(&map_base), map_size, 0, VM_FLAGS_OVERWRITE,
|
|
||||||
MEMORY_OBJECT_NULL, 0, false, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_NONE);
|
|
||||||
if (res != KERN_SUCCESS) [[unlikely]]
|
|
||||||
{
|
|
||||||
ERROR_LOG("mach_vm_map() failed: {}", res);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_num_mappings--;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef _M_ARM64
|
#ifdef _M_ARM64
|
||||||
|
|
||||||
static thread_local int s_code_write_depth = 0;
|
static thread_local int s_code_write_depth = 0;
|
||||||
|
|
|
@ -90,20 +90,11 @@ static __fi PageProtectionMode PageAccess_Any()
|
||||||
// --------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------
|
||||||
namespace HostSys
|
namespace HostSys
|
||||||
{
|
{
|
||||||
// Maps a block of memory for use as a recompiled code buffer.
|
|
||||||
// Returns NULL on allocation failure.
|
|
||||||
extern void* Mmap(void* base, size_t size, const PageProtectionMode& mode);
|
|
||||||
|
|
||||||
// Unmaps a block allocated by SysMmap
|
|
||||||
extern void Munmap(void* base, size_t size);
|
|
||||||
|
|
||||||
extern void MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode);
|
extern void MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode);
|
||||||
|
|
||||||
extern std::string GetFileMappingName(const char* prefix);
|
extern std::string GetFileMappingName(const char* prefix);
|
||||||
extern void* CreateSharedMemory(const char* name, size_t size);
|
extern void* CreateSharedMemory(const char* name, size_t size);
|
||||||
extern void DestroySharedMemory(void* ptr);
|
extern void DestroySharedMemory(void* ptr);
|
||||||
extern void* MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode);
|
|
||||||
extern void UnmapSharedMemory(void* baseaddr, size_t size);
|
|
||||||
|
|
||||||
/// JIT write protect for Apple Silicon. Needs to be called prior to writing to any RWX pages.
|
/// JIT write protect for Apple Silicon. Needs to be called prior to writing to any RWX pages.
|
||||||
#if !defined(__APPLE__) || !defined(_M_ARM64)
|
#if !defined(__APPLE__) || !defined(_M_ARM64)
|
||||||
|
@ -146,7 +137,7 @@ namespace PageFaultHandler
|
||||||
class SharedMemoryMappingArea
|
class SharedMemoryMappingArea
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static std::unique_ptr<SharedMemoryMappingArea> Create(size_t size);
|
static std::unique_ptr<SharedMemoryMappingArea> Create(size_t size, bool jit = false);
|
||||||
|
|
||||||
~SharedMemoryMappingArea();
|
~SharedMemoryMappingArea();
|
||||||
|
|
||||||
|
|
|
@ -14,8 +14,10 @@
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <ucontext.h>
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#ifndef __APPLE__
|
||||||
|
#include <ucontext.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "fmt/core.h"
|
#include "fmt/core.h"
|
||||||
|
|
||||||
|
@ -23,12 +25,6 @@
|
||||||
#include "cpuinfo.h"
|
#include "cpuinfo.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// FreeBSD does not have MAP_FIXED_NOREPLACE, but does have MAP_EXCL.
|
|
||||||
// MAP_FIXED combined with MAP_EXCL behaves like MAP_FIXED_NOREPLACE.
|
|
||||||
#if defined(__FreeBSD__) && !defined(MAP_FIXED_NOREPLACE)
|
|
||||||
#define MAP_FIXED_NOREPLACE (MAP_FIXED | MAP_EXCL)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static __ri uint LinuxProt(const PageProtectionMode& mode)
|
static __ri uint LinuxProt(const PageProtectionMode& mode)
|
||||||
{
|
{
|
||||||
u32 lnxmode = 0;
|
u32 lnxmode = 0;
|
||||||
|
@ -43,34 +39,6 @@ static __ri uint LinuxProt(const PageProtectionMode& mode)
|
||||||
return lnxmode;
|
return lnxmode;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
|
|
||||||
|
|
||||||
if (mode.IsNone())
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
const u32 prot = LinuxProt(mode);
|
|
||||||
|
|
||||||
u32 flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
|
||||||
if (base)
|
|
||||||
flags |= MAP_FIXED_NOREPLACE;
|
|
||||||
|
|
||||||
void* res = mmap(base, size, prot, flags, -1, 0);
|
|
||||||
if (res == MAP_FAILED)
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::Munmap(void* base, size_t size)
|
|
||||||
{
|
|
||||||
if (!base)
|
|
||||||
return;
|
|
||||||
|
|
||||||
munmap((void*)base, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode)
|
void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode)
|
||||||
{
|
{
|
||||||
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
|
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
|
||||||
|
@ -120,23 +88,7 @@ void HostSys::DestroySharedMemory(void* ptr)
|
||||||
close(static_cast<int>(reinterpret_cast<intptr_t>(ptr)));
|
close(static_cast<int>(reinterpret_cast<intptr_t>(ptr)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode)
|
#ifndef __APPLE__
|
||||||
{
|
|
||||||
const uint lnxmode = LinuxProt(mode);
|
|
||||||
|
|
||||||
const int flags = (baseaddr != nullptr) ? (MAP_SHARED | MAP_FIXED_NOREPLACE) : MAP_SHARED;
|
|
||||||
void* ptr = mmap(baseaddr, size, lnxmode, flags, static_cast<int>(reinterpret_cast<intptr_t>(handle)), static_cast<off_t>(offset));
|
|
||||||
if (ptr == MAP_FAILED)
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
|
|
||||||
{
|
|
||||||
if (munmap(baseaddr, size) != 0)
|
|
||||||
pxFailRel("Failed to unmap shared memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t HostSys::GetRuntimePageSize()
|
size_t HostSys::GetRuntimePageSize()
|
||||||
{
|
{
|
||||||
|
@ -183,6 +135,8 @@ size_t HostSys::GetRuntimeCacheLineSize()
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
|
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
|
||||||
: m_base_ptr(base_ptr)
|
: m_base_ptr(base_ptr)
|
||||||
, m_size(size)
|
, m_size(size)
|
||||||
|
@ -199,11 +153,16 @@ SharedMemoryMappingArea::~SharedMemoryMappingArea()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
|
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size, bool jit)
|
||||||
{
|
{
|
||||||
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
|
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
|
||||||
|
|
||||||
void* alloc = mmap(nullptr, size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
uint flags = MAP_ANONYMOUS | MAP_PRIVATE;
|
||||||
|
#ifdef __APPLE__
|
||||||
|
if (jit)
|
||||||
|
flags |= MAP_JIT;
|
||||||
|
#endif
|
||||||
|
void* alloc = mmap(nullptr, size, PROT_NONE, flags, -1, 0);
|
||||||
if (alloc == MAP_FAILED)
|
if (alloc == MAP_FAILED)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
|
@ -214,15 +173,26 @@ u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* ma
|
||||||
{
|
{
|
||||||
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
|
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
|
||||||
|
|
||||||
// MAP_FIXED is okay here, since we've reserved the entire region, and *want* to overwrite the mapping.
|
|
||||||
const uint lnxmode = LinuxProt(mode);
|
const uint lnxmode = LinuxProt(mode);
|
||||||
void* const ptr = mmap(map_base, map_size, lnxmode, MAP_SHARED | MAP_FIXED,
|
if (file_handle)
|
||||||
static_cast<int>(reinterpret_cast<intptr_t>(file_handle)), static_cast<off_t>(file_offset));
|
{
|
||||||
if (ptr == MAP_FAILED)
|
const int fd = static_cast<int>(reinterpret_cast<intptr_t>(file_handle));
|
||||||
return nullptr;
|
// MAP_FIXED is okay here, since we've reserved the entire region, and *want* to overwrite the mapping.
|
||||||
|
void* const ptr = mmap(map_base, map_size, lnxmode, MAP_SHARED | MAP_FIXED, fd, static_cast<off_t>(file_offset));
|
||||||
|
if (ptr == MAP_FAILED)
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// macOS doesn't seem to allow MAP_JIT with MAP_FIXED
|
||||||
|
// So we do the MAP_JIT in the allocation, and just mprotect here
|
||||||
|
// Note that this will only work the first time for a given region
|
||||||
|
if (mprotect(map_base, map_size, lnxmode) < 0)
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
m_num_mappings++;
|
m_num_mappings++;
|
||||||
return static_cast<u8*>(ptr);
|
return static_cast<u8*>(map_base);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
|
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
|
||||||
|
@ -236,6 +206,8 @@ bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef __APPLE__ // These are done in DarwinMisc
|
||||||
|
|
||||||
namespace PageFaultHandler
|
namespace PageFaultHandler
|
||||||
{
|
{
|
||||||
static std::recursive_mutex s_exception_handler_mutex;
|
static std::recursive_mutex s_exception_handler_mutex;
|
||||||
|
@ -370,3 +342,4 @@ bool PageFaultHandler::Install(Error* error)
|
||||||
s_installed = true;
|
s_installed = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
#endif // __APPLE__
|
||||||
|
|
|
@ -35,22 +35,6 @@ static DWORD ConvertToWinApi(const PageProtectionMode& mode)
|
||||||
return winmode;
|
return winmode;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
if (mode.IsNone())
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
return VirtualAlloc(base, size, MEM_RESERVE | MEM_COMMIT, ConvertToWinApi(mode));
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::Munmap(void* base, size_t size)
|
|
||||||
{
|
|
||||||
if (!base)
|
|
||||||
return;
|
|
||||||
|
|
||||||
VirtualFree((void*)base, 0, MEM_RELEASE);
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode)
|
void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode)
|
||||||
{
|
{
|
||||||
pxAssert((size & (__pagesize - 1)) == 0);
|
pxAssert((size & (__pagesize - 1)) == 0);
|
||||||
|
@ -77,29 +61,6 @@ void HostSys::DestroySharedMemory(void* ptr)
|
||||||
CloseHandle(static_cast<HANDLE>(ptr));
|
CloseHandle(static_cast<HANDLE>(ptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode)
|
|
||||||
{
|
|
||||||
void* ret = MapViewOfFileEx(static_cast<HANDLE>(handle), FILE_MAP_READ | FILE_MAP_WRITE,
|
|
||||||
static_cast<DWORD>(offset >> 32), static_cast<DWORD>(offset), size, baseaddr);
|
|
||||||
if (!ret)
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
const DWORD prot = ConvertToWinApi(mode);
|
|
||||||
if (prot != PAGE_READWRITE)
|
|
||||||
{
|
|
||||||
DWORD old_prot;
|
|
||||||
if (!VirtualProtect(ret, size, prot, &old_prot))
|
|
||||||
pxFail("Failed to protect memory mapping");
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
|
|
||||||
{
|
|
||||||
if (!UnmapViewOfFile(baseaddr))
|
|
||||||
pxFail("Failed to unmap shared memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t HostSys::GetRuntimePageSize()
|
size_t HostSys::GetRuntimePageSize()
|
||||||
{
|
{
|
||||||
SYSTEM_INFO si = {};
|
SYSTEM_INFO si = {};
|
||||||
|
@ -183,7 +144,7 @@ SharedMemoryMappingArea::PlaceholderMap::iterator SharedMemoryMappingArea::FindP
|
||||||
return m_placeholder_ranges.end();
|
return m_placeholder_ranges.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
|
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size, bool jit)
|
||||||
{
|
{
|
||||||
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
|
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
|
||||||
|
|
||||||
|
@ -241,11 +202,22 @@ u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* ma
|
||||||
}
|
}
|
||||||
|
|
||||||
// actually do the mapping, replacing the placeholder on the range
|
// actually do the mapping, replacing the placeholder on the range
|
||||||
if (!MapViewOfFile3(static_cast<HANDLE>(file_handle), GetCurrentProcess(),
|
if (file_handle)
|
||||||
map_base, file_offset, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
|
|
||||||
{
|
{
|
||||||
Console.Error("(SharedMemoryMappingArea) MapViewOfFile3() failed: %u", GetLastError());
|
if (!MapViewOfFile3(static_cast<HANDLE>(file_handle), GetCurrentProcess(),
|
||||||
return nullptr;
|
map_base, file_offset, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
|
||||||
|
{
|
||||||
|
Console.Error("(SharedMemoryMappingArea) MapViewOfFile3() failed: %u", GetLastError());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!VirtualAlloc2(GetCurrentProcess(), map_base, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
|
||||||
|
{
|
||||||
|
Console.Error("(SharedMemoryMappingArea) VirtualAlloc2() failed: %u", GetLastError());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const DWORD prot = ConvertToWinApi(mode);
|
const DWORD prot = ConvertToWinApi(mode);
|
||||||
|
|
|
@ -49,6 +49,7 @@
|
||||||
|
|
||||||
|
|
||||||
thread_local u8* x86Ptr;
|
thread_local u8* x86Ptr;
|
||||||
|
thread_local u8* xTextPtr;
|
||||||
thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT};
|
thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT};
|
||||||
|
|
||||||
namespace x86Emitter
|
namespace x86Emitter
|
||||||
|
@ -295,13 +296,27 @@ const xRegister32
|
||||||
void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset)
|
void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset)
|
||||||
{
|
{
|
||||||
sptr displacement = (sptr)address;
|
sptr displacement = (sptr)address;
|
||||||
|
sptr textRelative = (sptr)address - (sptr)xTextPtr;
|
||||||
sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset);
|
sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset);
|
||||||
|
// Can we use an 8-bit offset from the text pointer?
|
||||||
|
if (textRelative == (s8)textRelative && xTextPtr)
|
||||||
|
{
|
||||||
|
ModRM(1, regfield, RTEXTPTR.GetId());
|
||||||
|
xWrite<s8>((s8)textRelative);
|
||||||
|
return;
|
||||||
|
}
|
||||||
// Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter)
|
// Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter)
|
||||||
if (ripRelative == (s32)ripRelative)
|
else if (ripRelative == (s32)ripRelative)
|
||||||
{
|
{
|
||||||
ModRM(0, regfield, ModRm_UseDisp32);
|
ModRM(0, regfield, ModRm_UseDisp32);
|
||||||
displacement = ripRelative;
|
displacement = ripRelative;
|
||||||
}
|
}
|
||||||
|
// How about from the text pointer?
|
||||||
|
else if (textRelative == (s32)textRelative && xTextPtr)
|
||||||
|
{
|
||||||
|
ModRM(2, regfield, RTEXTPTR.GetId());
|
||||||
|
displacement = textRelative;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
pxAssertMsg(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register");
|
pxAssertMsg(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register");
|
||||||
|
@ -539,6 +554,12 @@ const xRegister32
|
||||||
x86Ptr = (u8*)ptr;
|
x86Ptr = (u8*)ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Assigns the current emitter text base address.
|
||||||
|
__emitinline void xSetTextPtr(void* ptr)
|
||||||
|
{
|
||||||
|
xTextPtr = (u8*)ptr;
|
||||||
|
}
|
||||||
|
|
||||||
// Retrieves the current emitter buffer target address.
|
// Retrieves the current emitter buffer target address.
|
||||||
// This is provided instead of using x86Ptr directly, since we may in the future find
|
// This is provided instead of using x86Ptr directly, since we may in the future find
|
||||||
// a need to change the storage class system for the x86Ptr 'under the hood.'
|
// a need to change the storage class system for the x86Ptr 'under the hood.'
|
||||||
|
@ -547,6 +568,12 @@ const xRegister32
|
||||||
return x86Ptr;
|
return x86Ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Retrieves the current emitter text base address.
|
||||||
|
__emitinline u8* xGetTextPtr()
|
||||||
|
{
|
||||||
|
return xTextPtr;
|
||||||
|
}
|
||||||
|
|
||||||
__emitinline void xAlignPtr(uint bytes)
|
__emitinline void xAlignPtr(uint bytes)
|
||||||
{
|
{
|
||||||
// forward align
|
// forward align
|
||||||
|
@ -1229,6 +1256,9 @@ const xRegister32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
stackAlign(m_offset, true);
|
stackAlign(m_offset, true);
|
||||||
|
|
||||||
|
if (u8* ptr = xGetTextPtr())
|
||||||
|
xMOV64(RTEXTPTR, (sptr)ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
xScopedStackFrame::~xScopedStackFrame()
|
xScopedStackFrame::~xScopedStackFrame()
|
||||||
|
@ -1285,11 +1315,14 @@ const xRegister32
|
||||||
{
|
{
|
||||||
return offset + base;
|
return offset + base;
|
||||||
}
|
}
|
||||||
else
|
if (u8* ptr = xGetTextPtr())
|
||||||
{
|
{
|
||||||
xLEA(tmpRegister, ptr[base]);
|
sptr tbase = (sptr)base - (sptr)ptr;
|
||||||
return offset + tmpRegister;
|
if (tbase == (s32)tbase)
|
||||||
|
return offset + RTEXTPTR + tbase;
|
||||||
}
|
}
|
||||||
|
xLEA(tmpRegister, ptr[base]);
|
||||||
|
return offset + tmpRegister;
|
||||||
}
|
}
|
||||||
|
|
||||||
void xLoadFarAddr(const xAddressReg& dst, void* addr)
|
void xLoadFarAddr(const xAddressReg& dst, void* addr)
|
||||||
|
|
|
@ -149,11 +149,13 @@ namespace x86Emitter
|
||||||
static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field)
|
static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field)
|
||||||
|
|
||||||
extern void xSetPtr(void* ptr);
|
extern void xSetPtr(void* ptr);
|
||||||
|
extern void xSetTextPtr(void* ptr);
|
||||||
extern void xAlignPtr(uint bytes);
|
extern void xAlignPtr(uint bytes);
|
||||||
extern void xAdvancePtr(uint bytes);
|
extern void xAdvancePtr(uint bytes);
|
||||||
extern void xAlignCallTarget();
|
extern void xAlignCallTarget();
|
||||||
|
|
||||||
extern u8* xGetPtr();
|
extern u8* xGetPtr();
|
||||||
|
extern u8* xGetTextPtr();
|
||||||
extern u8* xGetAlignedCallTarget();
|
extern u8* xGetAlignedCallTarget();
|
||||||
|
|
||||||
extern JccComparisonType xInvertCond(JccComparisonType src);
|
extern JccComparisonType xInvertCond(JccComparisonType src);
|
||||||
|
@ -646,6 +648,8 @@ extern const xRegister32
|
||||||
calleeSavedReg1d,
|
calleeSavedReg1d,
|
||||||
calleeSavedReg2d;
|
calleeSavedReg2d;
|
||||||
|
|
||||||
|
/// Holds a pointer to program text at all times so we don't need to be within 2GB of text
|
||||||
|
static constexpr const xAddressReg& RTEXTPTR = rbx;
|
||||||
|
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
|
|
|
@ -829,7 +829,7 @@ bool R5900DebugInterface::isValidAddress(u32 addr)
|
||||||
break;
|
break;
|
||||||
case 8:
|
case 8:
|
||||||
case 0xA:
|
case 0xA:
|
||||||
if(lopart <= 0xFFFFF)
|
if (lopart <= 0xFFFFF)
|
||||||
return true;
|
return true;
|
||||||
break;
|
break;
|
||||||
case 9:
|
case 9:
|
||||||
|
|
|
@ -4208,14 +4208,8 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
|
||||||
const bool alpha_mask = (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000;
|
const bool alpha_mask = (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000;
|
||||||
bool blend_ad_alpha_masked = blend_ad && alpha_mask;
|
bool blend_ad_alpha_masked = blend_ad && alpha_mask;
|
||||||
const bool is_basic_blend = GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic;
|
const bool is_basic_blend = GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic;
|
||||||
if ((is_basic_blend || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked)
|
if (blend_ad_alpha_masked && (((is_basic_blend || (COLCLAMP.CLAMP == 0)) && features.texture_barrier)
|
||||||
{
|
|| ((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium) || m_conf.require_one_barrier)))
|
||||||
// Swap Ad with As for hw blend.
|
|
||||||
m_conf.ps.a_masked = 1;
|
|
||||||
m_conf.ps.blend_c = 0;
|
|
||||||
m_conf.require_one_barrier |= true;
|
|
||||||
}
|
|
||||||
else if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium) || m_conf.require_one_barrier) && blend_ad_alpha_masked)
|
|
||||||
{
|
{
|
||||||
// Swap Ad with As for hw blend.
|
// Swap Ad with As for hw blend.
|
||||||
m_conf.ps.a_masked = 1;
|
m_conf.ps.a_masked = 1;
|
||||||
|
|
|
@ -49,9 +49,6 @@ namespace Ps2MemSize
|
||||||
|
|
||||||
namespace SysMemory
|
namespace SysMemory
|
||||||
{
|
{
|
||||||
static u8* TryAllocateVirtualMemory(const char* name, void* file_handle, uptr base, size_t size);
|
|
||||||
static u8* AllocateVirtualMemory(const char* name, void* file_handle, size_t size, size_t offset_from_base);
|
|
||||||
|
|
||||||
static bool AllocateMemoryMap();
|
static bool AllocateMemoryMap();
|
||||||
static void DumpMemoryMap();
|
static void DumpMemoryMap();
|
||||||
static void ReleaseMemoryMap();
|
static void ReleaseMemoryMap();
|
||||||
|
@ -59,6 +56,7 @@ namespace SysMemory
|
||||||
static u8* s_data_memory;
|
static u8* s_data_memory;
|
||||||
static void* s_data_memory_file_handle;
|
static void* s_data_memory_file_handle;
|
||||||
static u8* s_code_memory;
|
static u8* s_code_memory;
|
||||||
|
static std::unique_ptr<SharedMemoryMappingArea> s_memory_mapping_area;
|
||||||
} // namespace SysMemory
|
} // namespace SysMemory
|
||||||
|
|
||||||
static void memAllocate();
|
static void memAllocate();
|
||||||
|
@ -86,77 +84,6 @@ namespace HostMemoryMap
|
||||||
}
|
}
|
||||||
} // namespace HostMemoryMap
|
} // namespace HostMemoryMap
|
||||||
|
|
||||||
u8* SysMemory::TryAllocateVirtualMemory(const char* name, void* file_handle, uptr base, size_t size)
|
|
||||||
{
|
|
||||||
u8* baseptr;
|
|
||||||
|
|
||||||
if (file_handle)
|
|
||||||
baseptr = static_cast<u8*>(HostSys::MapSharedMemory(file_handle, 0, (void*)base, size, PageAccess_ReadWrite()));
|
|
||||||
else
|
|
||||||
baseptr = static_cast<u8*>(HostSys::Mmap((void*)base, size, PageAccess_Any()));
|
|
||||||
|
|
||||||
if (!baseptr)
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
if (base != 0 && (uptr)baseptr != base)
|
|
||||||
{
|
|
||||||
if (file_handle)
|
|
||||||
{
|
|
||||||
if (baseptr)
|
|
||||||
HostSys::UnmapSharedMemory(baseptr, size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (baseptr)
|
|
||||||
HostSys::Munmap(baseptr, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
DevCon.WriteLn(Color_Gray, "%-32s @ 0x%016" PRIXPTR " -> 0x%016" PRIXPTR " %s", name,
|
|
||||||
baseptr, (uptr)baseptr + size, fmt::format("[{}mb]", size / _1mb).c_str());
|
|
||||||
|
|
||||||
return baseptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
u8* SysMemory::AllocateVirtualMemory(const char* name, void* file_handle, size_t size, size_t offset_from_base)
|
|
||||||
{
|
|
||||||
// ARM64 does not need the rec areas to be in +/- 2GB.
|
|
||||||
#ifdef _M_X86
|
|
||||||
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Virtual memory size is page aligned");
|
|
||||||
|
|
||||||
// Everything looks nicer when the start of all the sections is a nice round looking number.
|
|
||||||
// Also reduces the variation in the address due to small changes in code.
|
|
||||||
// Breaks ASLR but so does anything else that tries to make addresses constant for our debugging pleasure
|
|
||||||
uptr codeBase = (uptr)(void*)AllocateVirtualMemory / (1 << 28) * (1 << 28);
|
|
||||||
|
|
||||||
// The allocation is ~640mb in size, slighly under 3*2^28.
|
|
||||||
// We'll hope that the code generated for the PCSX2 executable stays under 512mb (which is likely)
|
|
||||||
// On x86-64, code can reach 8*2^28 from its address [-6*2^28, 4*2^28] is the region that allows for code in the 640mb allocation to reach 512mb of code that either starts at codeBase or 256mb before it.
|
|
||||||
// We start high and count down because on macOS code starts at the beginning of useable address space, so starting as far ahead as possible reduces address variations due to code size. Not sure about other platforms. Obviously this only actually affects what shows up in a debugger and won't affect performance or correctness of anything.
|
|
||||||
for (int offset = 4; offset >= -6; offset--)
|
|
||||||
{
|
|
||||||
uptr base = codeBase + (offset << 28) + offset_from_base;
|
|
||||||
if ((sptr)base < 0 || (sptr)(base + size - 1) < 0)
|
|
||||||
{
|
|
||||||
// VTLB will throw a fit if we try to put EE main memory here
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (u8* ret = TryAllocateVirtualMemory(name, file_handle, base, size))
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
DevCon.Warning("%s: host memory @ 0x%016" PRIXPTR " -> 0x%016" PRIXPTR " is unavailable; attempting to map elsewhere...", name,
|
|
||||||
base, base + size);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
return TryAllocateVirtualMemory(name, file_handle, 0, size);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SysMemory::AllocateMemoryMap()
|
bool SysMemory::AllocateMemoryMap()
|
||||||
{
|
{
|
||||||
s_data_memory_file_handle = HostSys::CreateSharedMemory(HostSys::GetFileMappingName("pcsx2").c_str(), HostMemoryMap::MainSize);
|
s_data_memory_file_handle = HostSys::CreateSharedMemory(HostSys::GetFileMappingName("pcsx2").c_str(), HostMemoryMap::MainSize);
|
||||||
|
@ -167,16 +94,23 @@ bool SysMemory::AllocateMemoryMap()
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((s_data_memory = AllocateVirtualMemory("Data Memory", s_data_memory_file_handle, HostMemoryMap::MainSize, 0)) == nullptr)
|
if (!(s_memory_mapping_area = SharedMemoryMappingArea::Create(HostMemoryMap::MainSize + HostMemoryMap::CodeSize, true)))
|
||||||
{
|
{
|
||||||
Host::ReportErrorAsync("Error", "Failed to map data memory at an acceptable location.");
|
Host::ReportErrorAsync("Error", "Failed to map main memory.");
|
||||||
ReleaseMemoryMap();
|
ReleaseMemoryMap();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((s_code_memory = AllocateVirtualMemory("Code Memory", nullptr, HostMemoryMap::CodeSize, HostMemoryMap::MainSize)) == nullptr)
|
if ((s_data_memory = s_memory_mapping_area->Map(s_data_memory_file_handle, 0, s_memory_mapping_area->BasePointer(), HostMemoryMap::MainSize, PageAccess_ReadWrite())) == nullptr)
|
||||||
{
|
{
|
||||||
Host::ReportErrorAsync("Error", "Failed to allocate code memory at an acceptable location.");
|
Host::ReportErrorAsync("Error", "Failed to map data memory.");
|
||||||
|
ReleaseMemoryMap();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((s_code_memory = s_memory_mapping_area->Map(nullptr, 0, s_memory_mapping_area->OffsetPointer(HostMemoryMap::MainSize), HostMemoryMap::CodeSize, PageAccess_Any())) == nullptr)
|
||||||
|
{
|
||||||
|
Host::ReportErrorAsync("Error", "Failed to allocate code memory.");
|
||||||
ReleaseMemoryMap();
|
ReleaseMemoryMap();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -218,16 +152,18 @@ void SysMemory::ReleaseMemoryMap()
|
||||||
{
|
{
|
||||||
if (s_code_memory)
|
if (s_code_memory)
|
||||||
{
|
{
|
||||||
HostSys::Munmap(s_code_memory, HostMemoryMap::CodeSize);
|
s_memory_mapping_area->Unmap(s_code_memory, HostMemoryMap::CodeSize);
|
||||||
s_code_memory = nullptr;
|
s_code_memory = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (s_data_memory)
|
if (s_data_memory)
|
||||||
{
|
{
|
||||||
HostSys::UnmapSharedMemory(s_data_memory, HostMemoryMap::MainSize);
|
s_memory_mapping_area->Unmap(s_data_memory, HostMemoryMap::MainSize);
|
||||||
s_data_memory = nullptr;
|
s_data_memory = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s_memory_mapping_area.reset();
|
||||||
|
|
||||||
if (s_data_memory_file_handle)
|
if (s_data_memory_file_handle)
|
||||||
{
|
{
|
||||||
HostSys::DestroySharedMemory(s_data_memory_file_handle);
|
HostSys::DestroySharedMemory(s_data_memory_file_handle);
|
||||||
|
|
|
@ -364,14 +364,13 @@ struct eeProfiler
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warning dirty ebx
|
void EmitMem(int addr_reg)
|
||||||
void EmitMem()
|
|
||||||
{
|
{
|
||||||
// Compact the 4GB virtual address to a 512KB virtual address
|
// Compact the 4GB virtual address to a 512KB virtual address
|
||||||
if (x86caps.hasBMI2)
|
if (x86caps.hasBMI2)
|
||||||
{
|
{
|
||||||
xPEXT(ebx, ecx, ptr[&memMask]);
|
xPEXT(arg1regd, xRegister32(addr_reg), ptr[&memMask]);
|
||||||
xADD(ptr32[(rbx * 4) + memStats], 1);
|
xADD(ptr32[(arg1reg * 4) + memStats], 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -403,7 +402,7 @@ struct eeProfiler
|
||||||
__fi void Reset() {}
|
__fi void Reset() {}
|
||||||
__fi void EmitOp(eeOpcode op) {}
|
__fi void EmitOp(eeOpcode op) {}
|
||||||
__fi void Print() {}
|
__fi void Print() {}
|
||||||
__fi void EmitMem() {}
|
__fi void EmitMem(int addrReg) {}
|
||||||
__fi void EmitConstMem(u32 add) {}
|
__fi void EmitConstMem(u32 add) {}
|
||||||
__fi void EmitSlowMem() {}
|
__fi void EmitSlowMem() {}
|
||||||
__fi void EmitFastMem() {}
|
__fi void EmitFastMem() {}
|
||||||
|
|
|
@ -52,6 +52,10 @@ bool _isAllocatableX86reg(int x86reg)
|
||||||
if (CHECK_FASTMEM && x86reg == 5)
|
if (CHECK_FASTMEM && x86reg == 5)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// rbx is used to reference PCSX2 program text
|
||||||
|
if (xGetTextPtr() && x86reg == RTEXTPTR.GetId())
|
||||||
|
return false;
|
||||||
|
|
||||||
#ifdef ENABLE_VTUNE
|
#ifdef ENABLE_VTUNE
|
||||||
// vtune needs ebp...
|
// vtune needs ebp...
|
||||||
if (!CHECK_FASTMEM && x86reg == 5)
|
if (!CHECK_FASTMEM && x86reg == 5)
|
||||||
|
|
|
@ -175,10 +175,10 @@ static const void* _DynGen_JITCompile()
|
||||||
xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc]);
|
xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc]);
|
||||||
|
|
||||||
xMOV(eax, ptr[&psxRegs.pc]);
|
xMOV(eax, ptr[&psxRegs.pc]);
|
||||||
xMOV(ebx, eax);
|
xMOV(edx, eax);
|
||||||
xSHR(eax, 16);
|
xSHR(eax, 16);
|
||||||
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
||||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
xJMP(ptrNative[rdx * (wordsize / 4) + rcx]);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
@ -196,10 +196,10 @@ static const void* _DynGen_DispatcherReg()
|
||||||
u8* retval = xGetPtr();
|
u8* retval = xGetPtr();
|
||||||
|
|
||||||
xMOV(eax, ptr[&psxRegs.pc]);
|
xMOV(eax, ptr[&psxRegs.pc]);
|
||||||
xMOV(ebx, eax);
|
xMOV(edx, eax);
|
||||||
xSHR(eax, 16);
|
xSHR(eax, 16);
|
||||||
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
|
||||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
xJMP(ptrNative[rdx * (wordsize / 4) + rcx]);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
@ -890,10 +890,13 @@ static void recReserve()
|
||||||
pxFailRel("Failed to allocate R3000 InstCache array.");
|
pxFailRel("Failed to allocate R3000 InstCache array.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define R3000A_TEXTPTR (&psxRegs.GPR.r[33])
|
||||||
|
|
||||||
void recResetIOP()
|
void recResetIOP()
|
||||||
{
|
{
|
||||||
DevCon.WriteLn("iR3000A Recompiler reset.");
|
DevCon.WriteLn("iR3000A Recompiler reset.");
|
||||||
|
|
||||||
|
xSetTextPtr(R3000A_TEXTPTR);
|
||||||
xSetPtr(SysMemory::GetIOPRec());
|
xSetPtr(SysMemory::GetIOPRec());
|
||||||
_DynGen_Dispatchers();
|
_DynGen_Dispatchers();
|
||||||
recPtr = xGetPtr();
|
recPtr = xGetPtr();
|
||||||
|
@ -1181,16 +1184,16 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
xMOV(ebx, ptr32[&psxRegs.cycle]);
|
xMOV(r12d, ptr32[&psxRegs.cycle]);
|
||||||
xADD(ebx, blockCycles);
|
xADD(r12d, blockCycles);
|
||||||
xMOV(ptr32[&psxRegs.cycle], ebx); // update cycles
|
xMOV(ptr32[&psxRegs.cycle], r12d); // update cycles
|
||||||
|
|
||||||
// jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE)
|
// jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE)
|
||||||
iPsxAddEECycles(blockCycles);
|
iPsxAddEECycles(blockCycles);
|
||||||
xJLE(iopExitRecompiledCode);
|
xJLE(iopExitRecompiledCode);
|
||||||
|
|
||||||
// check if an event is pending
|
// check if an event is pending
|
||||||
xSUB(ebx, ptr32[&psxRegs.iopNextEventCycle]);
|
xSUB(r12d, ptr32[&psxRegs.iopNextEventCycle]);
|
||||||
xForwardJS<u8> nointerruptpending;
|
xForwardJS<u8> nointerruptpending;
|
||||||
|
|
||||||
xFastCall((void*)iopEventTest);
|
xFastCall((void*)iopEventTest);
|
||||||
|
@ -1565,6 +1568,7 @@ static void iopRecRecompile(const u32 startpc)
|
||||||
recResetIOP();
|
recResetIOP();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xSetTextPtr(R3000A_TEXTPTR);
|
||||||
xSetPtr(recPtr);
|
xSetPtr(recPtr);
|
||||||
recPtr = xGetAlignedCallTarget();
|
recPtr = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,11 @@ extern u32 target; // branch target
|
||||||
extern u32 s_nBlockCycles; // cycles of current block recompiling
|
extern u32 s_nBlockCycles; // cycles of current block recompiling
|
||||||
extern bool s_nBlockInterlocked; // Current block has VU0 interlocking
|
extern bool s_nBlockInterlocked; // Current block has VU0 interlocking
|
||||||
|
|
||||||
|
// x86 can use shorter displacement if it fits in an s8, so offset 144 bytes into the cpuRegs
|
||||||
|
// This will allow us to reach r1-r16 with a shorter encoding
|
||||||
|
// TODO: Actually figure out what things are used most often, maybe rearrange the cpuRegs struct, and point at that
|
||||||
|
#define R5900_TEXTPTR (&cpuRegs.GPR.r[9])
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
|
@ -381,10 +381,10 @@ static const void* _DynGen_JITCompile()
|
||||||
// void(**base)() = (void(**)())recLUT[addr >> 16];
|
// void(**base)() = (void(**)())recLUT[addr >> 16];
|
||||||
// base[addr >> 2]();
|
// base[addr >> 2]();
|
||||||
xMOV(eax, ptr[&cpuRegs.pc]);
|
xMOV(eax, ptr[&cpuRegs.pc]);
|
||||||
xMOV(ebx, eax);
|
xMOV(edx, eax);
|
||||||
xSHR(eax, 16);
|
xSHR(eax, 16);
|
||||||
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
||||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
xJMP(ptrNative[rdx * (wordsize / 4) + rcx]);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
@ -406,10 +406,10 @@ static const void* _DynGen_DispatcherReg()
|
||||||
// void(**base)() = (void(**)())recLUT[addr >> 16];
|
// void(**base)() = (void(**)())recLUT[addr >> 16];
|
||||||
// base[addr >> 2]();
|
// base[addr >> 2]();
|
||||||
xMOV(eax, ptr[&cpuRegs.pc]);
|
xMOV(eax, ptr[&cpuRegs.pc]);
|
||||||
xMOV(ebx, eax);
|
xMOV(edx, eax);
|
||||||
xSHR(eax, 16);
|
xSHR(eax, 16);
|
||||||
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
|
||||||
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
|
xJMP(ptrNative[rdx * (wordsize / 4) + rcx]);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
@ -445,6 +445,8 @@ static const void* _DynGen_EnterRecompiledCode()
|
||||||
xSUB(rsp, stack_size);
|
xSUB(rsp, stack_size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (u8* ptr = xGetTextPtr())
|
||||||
|
xMOV64(RTEXTPTR, (sptr)ptr);
|
||||||
if (CHECK_FASTMEM)
|
if (CHECK_FASTMEM)
|
||||||
xMOV(RFASTMEMBASE, ptrNative[&vtlb_private::vtlbdata.fastmem_base]);
|
xMOV(RFASTMEMBASE, ptrNative[&vtlb_private::vtlbdata.fastmem_base]);
|
||||||
|
|
||||||
|
@ -585,6 +587,7 @@ static void recResetRaw()
|
||||||
|
|
||||||
EE::Profiler.Reset();
|
EE::Profiler.Reset();
|
||||||
|
|
||||||
|
xSetTextPtr(R5900_TEXTPTR);
|
||||||
xSetPtr(SysMemory::GetEERec());
|
xSetPtr(SysMemory::GetEERec());
|
||||||
_DynGen_Dispatchers();
|
_DynGen_Dispatchers();
|
||||||
vtlb_DynGenDispatchers();
|
vtlb_DynGenDispatchers();
|
||||||
|
@ -897,6 +900,7 @@ u8* recBeginThunk()
|
||||||
if (recPtr >= recPtrEnd)
|
if (recPtr >= recPtrEnd)
|
||||||
eeRecNeedsReset = true;
|
eeRecNeedsReset = true;
|
||||||
|
|
||||||
|
xSetTextPtr(R5900_TEXTPTR);
|
||||||
xSetPtr(recPtr);
|
xSetPtr(recPtr);
|
||||||
recPtr = xGetAlignedCallTarget();
|
recPtr = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
@ -2138,26 +2142,26 @@ static bool recSkipTimeoutLoop(s32 reg, bool is_timeout_loop)
|
||||||
// if new_v0 > 0 { jump to dispatcher because loop exited early }
|
// if new_v0 > 0 { jump to dispatcher because loop exited early }
|
||||||
// else new_v0 is 0, so exit loop
|
// else new_v0 is 0, so exit loop
|
||||||
|
|
||||||
xMOV(ebx, ptr32[&cpuRegs.cycle]); // ebx = cycle
|
xMOV(r12d, ptr32[&cpuRegs.cycle]); // r12d = cycle
|
||||||
xMOV(ecx, ptr32[&cpuRegs.nextEventCycle]); // ecx = nextEventCycle
|
xMOV(ecx, ptr32[&cpuRegs.nextEventCycle]); // ecx = nextEventCycle
|
||||||
xCMP(ebx, ecx);
|
xCMP(r12d, ecx);
|
||||||
//xJAE((void*)DispatcherEvent); // jump to dispatcher if event immediately
|
//xJAE((void*)DispatcherEvent); // jump to dispatcher if event immediately
|
||||||
|
|
||||||
// TODO: In the case where nextEventCycle < cycle because it's overflowed, tack 8
|
// TODO: In the case where nextEventCycle < cycle because it's overflowed, tack 8
|
||||||
// cycles onto the event count, so hopefully it'll wrap around. This is pretty
|
// cycles onto the event count, so hopefully it'll wrap around. This is pretty
|
||||||
// gross, but until we switch to 64-bit counters, not many better options.
|
// gross, but until we switch to 64-bit counters, not many better options.
|
||||||
xForwardJB8 not_dispatcher;
|
xForwardJB8 not_dispatcher;
|
||||||
xADD(ebx, 8);
|
xADD(r12d, 8);
|
||||||
xMOV(ptr32[&cpuRegs.cycle], ebx);
|
xMOV(ptr32[&cpuRegs.cycle], r12d);
|
||||||
xJMP((void*)DispatcherEvent);
|
xJMP((void*)DispatcherEvent);
|
||||||
not_dispatcher.SetTarget();
|
not_dispatcher.SetTarget();
|
||||||
|
|
||||||
xMOV(edx, ptr32[&cpuRegs.GPR.r[reg].UL[0]]); // eax = v0
|
xMOV(edx, ptr32[&cpuRegs.GPR.r[reg].UL[0]]); // eax = v0
|
||||||
xLEA(rax, ptrNative[rdx * 8 + rbx]); // edx = v0 * 8 + cycle
|
xLEA(rax, ptrNative[rdx * 8 + r12]); // edx = v0 * 8 + cycle
|
||||||
xCMP(rcx, rax);
|
xCMP(rcx, rax);
|
||||||
xCMOVB(rax, rcx); // eax = new_cycles = min(v8 * 8, nextEventCycle)
|
xCMOVB(rax, rcx); // eax = new_cycles = min(v8 * 8, nextEventCycle)
|
||||||
xMOV(ptr32[&cpuRegs.cycle], eax); // writeback new_cycles
|
xMOV(ptr32[&cpuRegs.cycle], eax); // writeback new_cycles
|
||||||
xSUB(eax, ebx); // new_cycles -= cycle
|
xSUB(eax, r12d); // new_cycles -= cycle
|
||||||
xSHR(eax, 3); // compute new v0 value
|
xSHR(eax, 3); // compute new v0 value
|
||||||
xSUB(edx, eax); // v0 -= cycle_diff
|
xSUB(edx, eax); // v0 -= cycle_diff
|
||||||
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], edx); // write back new value of v0
|
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], edx); // write back new value of v0
|
||||||
|
@ -2191,6 +2195,7 @@ static void recRecompile(const u32 startpc)
|
||||||
recResetRaw();
|
recResetRaw();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xSetTextPtr(R5900_TEXTPTR);
|
||||||
xSetPtr(recPtr);
|
xSetPtr(recPtr);
|
||||||
recPtr = xGetAlignedCallTarget();
|
recPtr = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
|
|
@ -119,14 +119,12 @@ static void __vectorcall LogWriteQuad(u32 addr, __m128i val)
|
||||||
namespace vtlb_private
|
namespace vtlb_private
|
||||||
{
|
{
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
// Prepares eax, ecx, and, ebx for Direct or Indirect operations.
|
// Prepares eax and ecx for Direct or Indirect operations.
|
||||||
// Returns the writeback pointer for ebx (return address from indirect handling)
|
|
||||||
//
|
//
|
||||||
static void DynGen_PrepRegs(int addr_reg, int value_reg, u32 sz, bool xmm)
|
static void DynGen_PrepRegs(int addr_reg, int value_reg, u32 sz, bool xmm)
|
||||||
{
|
{
|
||||||
EE::Profiler.EmitMem();
|
|
||||||
|
|
||||||
_freeX86reg(arg1regd);
|
_freeX86reg(arg1regd);
|
||||||
|
EE::Profiler.EmitMem(addr_reg);
|
||||||
xMOV(arg1regd, xRegister32(addr_reg));
|
xMOV(arg1regd, xRegister32(addr_reg));
|
||||||
|
|
||||||
if (value_reg >= 0)
|
if (value_reg >= 0)
|
||||||
|
@ -269,7 +267,7 @@ static void DynGen_HandlerTest(const GenDirectFn& gen_direct, int mode, int bits
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
// Generates the various instances of the indirect dispatchers
|
// Generates the various instances of the indirect dispatchers
|
||||||
// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64), rbx: function return ptr
|
// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64)
|
||||||
// Out: eax: result (if mode < 64)
|
// Out: eax: result (if mode < 64)
|
||||||
static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
|
static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
|
||||||
{
|
{
|
||||||
|
@ -347,6 +345,7 @@ void vtlb_DynGenDispatchers()
|
||||||
for (int sign = 0; sign < (!mode && bits < 3 ? 2 : 1); sign++)
|
for (int sign = 0; sign < (!mode && bits < 3 ? 2 : 1); sign++)
|
||||||
{
|
{
|
||||||
xSetPtr(GetIndirectDispatcherPtr(mode, bits, !!sign));
|
xSetPtr(GetIndirectDispatcherPtr(mode, bits, !!sign));
|
||||||
|
xSetTextPtr(R5900_TEXTPTR);
|
||||||
|
|
||||||
DynGen_IndirectTlbDispatcher(mode, bits, !!sign);
|
DynGen_IndirectTlbDispatcher(mode, bits, !!sign);
|
||||||
}
|
}
|
||||||
|
@ -939,14 +938,13 @@ void vtlb_DynBackpatchLoadStore(uptr code_address, u32 code_size, u32 guest_pc,
|
||||||
u32 num_gprs = 0;
|
u32 num_gprs = 0;
|
||||||
u32 num_fprs = 0;
|
u32 num_fprs = 0;
|
||||||
|
|
||||||
const u32 rbxid = static_cast<u32>(rbx.GetId());
|
|
||||||
const u32 arg1id = static_cast<u32>(arg1reg.GetId());
|
const u32 arg1id = static_cast<u32>(arg1reg.GetId());
|
||||||
const u32 arg2id = static_cast<u32>(arg2reg.GetId());
|
const u32 arg2id = static_cast<u32>(arg2reg.GetId());
|
||||||
const u32 arg3id = static_cast<u32>(arg3reg.GetId());
|
const u32 arg3id = static_cast<u32>(arg3reg.GetId());
|
||||||
|
|
||||||
for (u32 i = 0; i < iREGCNT_GPR; i++)
|
for (u32 i = 0; i < iREGCNT_GPR; i++)
|
||||||
{
|
{
|
||||||
if ((gpr_bitmask & (1u << i)) && (i == rbxid || i == arg1id || i == arg2id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
|
if ((gpr_bitmask & (1u << i)) && (i == arg1id || i == arg2id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
|
||||||
num_gprs++;
|
num_gprs++;
|
||||||
}
|
}
|
||||||
for (u32 i = 0; i < iREGCNT_XMM; i++)
|
for (u32 i = 0; i < iREGCNT_XMM; i++)
|
||||||
|
|
|
@ -42,6 +42,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
|
||||||
VU0.VI[REG_VPU_STAT].UL &= ~0x100;
|
VU0.VI[REG_VPU_STAT].UL &= ~0x100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xSetTextPtr(mVU.textPtr());
|
||||||
xSetPtr(mVU.cache);
|
xSetPtr(mVU.cache);
|
||||||
mVUdispatcherAB(mVU);
|
mVUdispatcherAB(mVU);
|
||||||
mVUdispatcherCD(mVU);
|
mVUdispatcherCD(mVU);
|
||||||
|
|
|
@ -123,6 +123,7 @@ struct microVU
|
||||||
s32 cycles; // Cycles Counter
|
s32 cycles; // Cycles Counter
|
||||||
|
|
||||||
VURegs& regs() const { return ::vuRegs[index]; }
|
VURegs& regs() const { return ::vuRegs[index]; }
|
||||||
|
void* textPtr() const { return (index && THREAD_VU1) ? (void*)®s().VF[9] : (void*)R5900_TEXTPTR; }
|
||||||
|
|
||||||
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
|
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
|
||||||
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
|
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }
|
||||||
|
|
|
@ -207,15 +207,17 @@ static void mVUGenerateCopyPipelineState(mV)
|
||||||
{
|
{
|
||||||
mVU.copyPLState = xGetAlignedCallTarget();
|
mVU.copyPLState = xGetAlignedCallTarget();
|
||||||
|
|
||||||
|
xLoadFarAddr(rdx, reinterpret_cast<u8*>(&mVU.prog.lpState));
|
||||||
|
|
||||||
if (cpuinfo_has_x86_avx())
|
if (cpuinfo_has_x86_avx())
|
||||||
{
|
{
|
||||||
xVMOVAPS(ymm0, ptr[rax]);
|
xVMOVAPS(ymm0, ptr[rax]);
|
||||||
xVMOVAPS(ymm1, ptr[rax + 32u]);
|
xVMOVAPS(ymm1, ptr[rax + 32u]);
|
||||||
xVMOVAPS(ymm2, ptr[rax + 64u]);
|
xVMOVAPS(ymm2, ptr[rax + 64u]);
|
||||||
|
|
||||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], ymm0);
|
xVMOVUPS(ptr[rdx], ymm0);
|
||||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], ymm1);
|
xVMOVUPS(ptr[rdx + 32u], ymm1);
|
||||||
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], ymm2);
|
xVMOVUPS(ptr[rdx + 64u], ymm2);
|
||||||
|
|
||||||
xVZEROUPPER();
|
xVZEROUPPER();
|
||||||
}
|
}
|
||||||
|
@ -228,12 +230,12 @@ static void mVUGenerateCopyPipelineState(mV)
|
||||||
xMOVAPS(xmm4, ptr[rax + 64u]);
|
xMOVAPS(xmm4, ptr[rax + 64u]);
|
||||||
xMOVAPS(xmm5, ptr[rax + 80u]);
|
xMOVAPS(xmm5, ptr[rax + 80u]);
|
||||||
|
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], xmm0);
|
xMOVUPS(ptr[rdx], xmm0);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 16u], xmm1);
|
xMOVUPS(ptr[rdx + 16u], xmm1);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], xmm2);
|
xMOVUPS(ptr[rdx + 32u], xmm2);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 48u], xmm3);
|
xMOVUPS(ptr[rdx + 48u], xmm3);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], xmm4);
|
xMOVUPS(ptr[rdx + 64u], xmm4);
|
||||||
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 80u], xmm5);
|
xMOVUPS(ptr[rdx + 80u], xmm5);
|
||||||
}
|
}
|
||||||
|
|
||||||
xRET();
|
xRET();
|
||||||
|
@ -326,6 +328,7 @@ _mVUt void* mVUexecute(u32 startPC, u32 cycles)
|
||||||
mVU.cycles = cycles;
|
mVU.cycles = cycles;
|
||||||
mVU.totalCycles = cycles;
|
mVU.totalCycles = cycles;
|
||||||
|
|
||||||
|
xSetTextPtr(mVU.textPtr());
|
||||||
xSetPtr(mVU.prog.x86ptr); // Set x86ptr to where last program left off
|
xSetPtr(mVU.prog.x86ptr); // Set x86ptr to where last program left off
|
||||||
return mVUsearchProg<vuIndex>(startPC & vuLimit, (uptr)&mVU.prog.lpState); // Find and set correct program
|
return mVUsearchProg<vuIndex>(startPC & vuLimit, (uptr)&mVU.prog.lpState); // Find and set correct program
|
||||||
}
|
}
|
||||||
|
|
|
@ -411,6 +411,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gprMap[RTEXTPTR.GetId()].usable = !xGetTextPtr();
|
||||||
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
|
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1106,7 +1106,7 @@ mVUop(mVU_ILW)
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT1, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
|
|
||||||
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
|
@ -1133,7 +1133,7 @@ mVUop(mVU_ILWR)
|
||||||
if (_Is_)
|
if (_Is_)
|
||||||
{
|
{
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
mVUaddrFix (mVU, gprT1q);
|
mVUaddrFix (mVU, gprT1q, gprT2q);
|
||||||
|
|
||||||
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
|
||||||
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
|
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
|
||||||
|
@ -1170,7 +1170,7 @@ mVUop(mVU_ISW)
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT1, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If regT is dirty, the high bits might not be zero.
|
// If regT is dirty, the high bits might not be zero.
|
||||||
|
@ -1201,7 +1201,7 @@ mVUop(mVU_ISWR)
|
||||||
if (_Is_)
|
if (_Is_)
|
||||||
{
|
{
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
is = gprT1q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
|
||||||
|
@ -1257,7 +1257,7 @@ mVUop(mVU_LQ)
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT1, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
|
|
||||||
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
|
||||||
|
@ -1281,7 +1281,7 @@ mVUop(mVU_LQD)
|
||||||
xDEC(regS);
|
xDEC(regS);
|
||||||
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
||||||
mVU.regAlloc->clearNeeded(regS);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
is = gprT1q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1319,7 +1319,7 @@ mVUop(mVU_LQI)
|
||||||
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
|
||||||
xINC(regS);
|
xINC(regS);
|
||||||
mVU.regAlloc->clearNeeded(regS);
|
mVU.regAlloc->clearNeeded(regS);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
is = gprT1q;
|
is = gprT1q;
|
||||||
}
|
}
|
||||||
if (!mVUlow.noWriteVF)
|
if (!mVUlow.noWriteVF)
|
||||||
|
@ -1351,7 +1351,7 @@ mVUop(mVU_SQ)
|
||||||
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
|
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
|
||||||
if (_Imm11_ != 0)
|
if (_Imm11_ != 0)
|
||||||
xADD(gprT1, _Imm11_);
|
xADD(gprT1, _Imm11_);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
|
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
||||||
|
@ -1375,7 +1375,7 @@ mVUop(mVU_SQD)
|
||||||
xDEC(regT);
|
xDEC(regT);
|
||||||
xMOVZX(gprT1, xRegister16(regT));
|
xMOVZX(gprT1, xRegister16(regT));
|
||||||
mVU.regAlloc->clearNeeded(regT);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
it = gprT1q;
|
it = gprT1q;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1405,7 +1405,7 @@ mVUop(mVU_SQI)
|
||||||
xMOVZX(gprT1, xRegister16(regT));
|
xMOVZX(gprT1, xRegister16(regT));
|
||||||
xINC(regT);
|
xINC(regT);
|
||||||
mVU.regAlloc->clearNeeded(regT);
|
mVU.regAlloc->clearNeeded(regT);
|
||||||
mVUaddrFix(mVU, gprT1q);
|
mVUaddrFix(mVU, gprT1q, gprT2q);
|
||||||
}
|
}
|
||||||
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
|
||||||
if (_It_)
|
if (_It_)
|
||||||
|
|
|
@ -147,10 +147,10 @@ static const char branchSTR[16][8] = {
|
||||||
#define gprT1b ax // Low 16-bit of gprT1 (eax)
|
#define gprT1b ax // Low 16-bit of gprT1 (eax)
|
||||||
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
|
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
|
||||||
|
|
||||||
#define gprF0 ebx // Status Flag 0
|
#define gprF0 r12d // Status Flag 0
|
||||||
#define gprF1 r12d // Status Flag 1
|
#define gprF1 r13d // Status Flag 1
|
||||||
#define gprF2 r13d // Status Flag 2
|
#define gprF2 r14d // Status Flag 2
|
||||||
#define gprF3 r14d // Status Flag 3
|
#define gprF3 r15d // Status Flag 3
|
||||||
|
|
||||||
// Function Params
|
// Function Params
|
||||||
#define mP microVU& mVU, int recPass
|
#define mP microVU& mVU, int recPass
|
||||||
|
|
|
@ -295,7 +295,7 @@ static void mVUwaitMTVU()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
// Transforms the Address in gprReg to valid VU0/VU1 Address
|
||||||
__fi void mVUaddrFix(mV, const xAddressReg& gprReg)
|
__fi void mVUaddrFix(mV, const xAddressReg& gprReg, const xAddressReg& tmpReg)
|
||||||
{
|
{
|
||||||
if (isVU1)
|
if (isVU1)
|
||||||
{
|
{
|
||||||
|
@ -324,7 +324,16 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
|
||||||
xFastCall((void*)mVU.waitMTVU);
|
xFastCall((void*)mVU.waitMTVU);
|
||||||
}
|
}
|
||||||
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
|
||||||
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
|
sptr offset = (u128*)VU1.VF - (u128*)VU0.Mem;
|
||||||
|
if (offset == (s32)offset)
|
||||||
|
{
|
||||||
|
xADD(gprReg, offset);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
xMOV64(tmpReg, offset);
|
||||||
|
xADD(gprReg, tmpReg);
|
||||||
|
}
|
||||||
jmpB.SetTarget();
|
jmpB.SetTarget();
|
||||||
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
|
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,8 @@ void dVifRelease(int idx)
|
||||||
}
|
}
|
||||||
|
|
||||||
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
|
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
|
||||||
: v(vif_)
|
: vifPtr(rax)
|
||||||
|
, v(vif_)
|
||||||
, vB(vifBlock_)
|
, vB(vifBlock_)
|
||||||
{
|
{
|
||||||
const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2)
|
const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2)
|
||||||
|
@ -42,9 +43,6 @@ __fi void makeMergeMask(u32& x)
|
||||||
|
|
||||||
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
||||||
{
|
{
|
||||||
const int idx = v.idx;
|
|
||||||
const vifStruct& vif = MTVU_VifX;
|
|
||||||
|
|
||||||
//This could have ended up copying the row when there was no row to write.1810080
|
//This could have ended up copying the row when there was no row to write.1810080
|
||||||
u32 m0 = vB.mask; //The actual mask example 0x03020100
|
u32 m0 = vB.mask; //The actual mask example 0x03020100
|
||||||
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
|
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
|
||||||
|
@ -52,14 +50,14 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
|
||||||
|
|
||||||
if ((doMask && m2) || doMode)
|
if ((doMask && m2) || doMode)
|
||||||
{
|
{
|
||||||
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
|
xMOVAPS(xmmRow, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)]);
|
||||||
MSKPATH3_LOG("Moving row");
|
MSKPATH3_LOG("Moving row");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doMask && m3)
|
if (doMask && m3)
|
||||||
{
|
{
|
||||||
VIF_LOG("Merging Cols");
|
VIF_LOG("Merging Cols");
|
||||||
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
|
xMOVAPS(xmmCol0, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskCol)]);
|
||||||
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
||||||
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
||||||
if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
|
if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
|
||||||
|
@ -137,8 +135,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
|
||||||
|
|
||||||
void VifUnpackSSE_Dynarec::writeBackRow() const
|
void VifUnpackSSE_Dynarec::writeBackRow() const
|
||||||
{
|
{
|
||||||
const int idx = v.idx;
|
xMOVAPS(ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)], xmmRow);
|
||||||
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
|
|
||||||
|
|
||||||
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
|
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
|
||||||
}
|
}
|
||||||
|
@ -239,6 +236,7 @@ void VifUnpackSSE_Dynarec::ProcessMasks()
|
||||||
|
|
||||||
void VifUnpackSSE_Dynarec::CompileRoutine()
|
void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
{
|
{
|
||||||
|
const int idx = v.idx;
|
||||||
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
|
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
|
||||||
const int upkNum = vB.upkType & 0xf;
|
const int upkNum = vB.upkType & 0xf;
|
||||||
const u8& vift = nVifT[upkNum];
|
const u8& vift = nVifT[upkNum];
|
||||||
|
@ -252,6 +250,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
|
||||||
VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
|
||||||
|
|
||||||
pxAssume(vCL == 0);
|
pxAssume(vCL == 0);
|
||||||
|
xLoadFarAddr(vifPtr, &MTVU_VifX);
|
||||||
|
|
||||||
// Value passed determines # of col regs we need to load
|
// Value passed determines # of col regs we need to load
|
||||||
SetMasks(isFill ? blockSize : cycleSize);
|
SetMasks(isFill ? blockSize : cycleSize);
|
||||||
|
@ -336,6 +335,7 @@ _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compile the block now
|
// Compile the block now
|
||||||
|
xSetTextPtr(nullptr);
|
||||||
xSetPtr(v.recWritePtr);
|
xSetPtr(v.recWritePtr);
|
||||||
|
|
||||||
block.startPtr = (uptr)xGetAlignedCallTarget();
|
block.startPtr = (uptr)xGetAlignedCallTarget();
|
||||||
|
|
|
@ -329,9 +329,11 @@ void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const
|
||||||
{
|
{
|
||||||
xMOVAPS(xmm7, ptr[dstIndirect]);
|
xMOVAPS(xmm7, ptr[dstIndirect]);
|
||||||
int offX = std::min(curCycle, 3);
|
int offX = std::min(curCycle, 3);
|
||||||
xPAND(regX, ptr32[nVifMask[0][offX]]);
|
sptr base = reinterpret_cast<sptr>(nVifMask[2]);
|
||||||
xPAND(xmm7, ptr32[nVifMask[1][offX]]);
|
xLoadFarAddr(rax, nVifMask);
|
||||||
xPOR (regX, ptr32[nVifMask[2][offX]]);
|
xPAND(regX, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[0][offX]) - base)]);
|
||||||
|
xPAND(xmm7, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[1][offX]) - base)]);
|
||||||
|
xPOR (regX, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[2][offX]) - base)]);
|
||||||
xPOR (regX, xmm7);
|
xPOR (regX, xmm7);
|
||||||
xMOVAPS(ptr[dstIndirect], regX);
|
xMOVAPS(ptr[dstIndirect], regX);
|
||||||
}
|
}
|
||||||
|
@ -362,6 +364,7 @@ void VifUnpackSSE_Init()
|
||||||
{
|
{
|
||||||
DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters...");
|
DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters...");
|
||||||
|
|
||||||
|
xSetTextPtr(nullptr);
|
||||||
xSetPtr(SysMemory::GetVIFUnpackRec());
|
xSetPtr(SysMemory::GetVIFUnpackRec());
|
||||||
|
|
||||||
for (int a = 0; a < 2; a++)
|
for (int a = 0; a < 2; a++)
|
||||||
|
|
|
@ -98,6 +98,7 @@ public:
|
||||||
bool inputMasked;
|
bool inputMasked;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
xAddressReg vifPtr;
|
||||||
const nVifStruct& v; // vif0 or vif1
|
const nVifStruct& v; // vif0 or vif1
|
||||||
const nVifBlock& vB; // some pre-collected data from VifStruct
|
const nVifBlock& vB; // some pre-collected data from VifStruct
|
||||||
int vCL; // internal copy of vif->cl
|
int vCL; // internal copy of vif->cl
|
||||||
|
|
Loading…
Reference in New Issue