Compare commits

...

10 Commits

Author SHA1 Message Date
TellowKrinkle 8e438a305d
Merge 87b795e1c6 into 5486eed151 2024-09-18 20:21:55 -03:00
lightningterror 5486eed151 GS/HW: Merge blend ad a mask separate conditions in to one.
Duplicate code.
2024-09-19 00:19:05 +02:00
Ty Lamontagne d1721360ff
DebugInterface: Fix formatting 2024-09-18 16:57:20 -04:00
TellowKrinkle 87b795e1c6 Common: Remove unused functions 2024-08-21 03:12:32 -05:00
TellowKrinkle 2eabebc82a Core: Reserve data and code areas together
They need to stay near each other for the x86 JIT to work
2024-08-21 03:12:32 -05:00
TellowKrinkle 51c7a723db Common: Allow non-file mappings in SharedMemoryMappingArea 2024-08-21 03:12:32 -05:00
TellowKrinkle d8b8af44a0 Common: Switch back to Linux allocation routines on macOS
We don't need MAP_FIXED anymore
2024-08-21 03:12:32 -05:00
TellowKrinkle 5a6b3cba6e Core: Map sys memory anywhere 2024-08-21 03:12:32 -05:00
TellowKrinkle fe2f97eeb5 EE:Rec: Allow rec memory anywhere 2024-08-21 03:12:32 -05:00
TellowKrinkle 8a9fbb43e6 EE:Rec: Avoid rbx
Will be used for holding a pointer to the PCSX2 text section
2024-08-21 00:52:16 -05:00
26 changed files with 211 additions and 473 deletions

View File

@ -149,6 +149,7 @@ elseif(APPLE)
Darwin/DarwinThreads.cpp
Darwin/DarwinMisc.cpp
Darwin/DarwinMisc.h
Linux/LnxHostSys.cpp
)
target_compile_options(common PRIVATE -fobjc-arc)
target_link_options(common PRIVATE -fobjc-link-runtime)

View File

@ -16,16 +16,10 @@
#include <cstring>
#include <cstdlib>
#include <optional>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <time.h>
#include <mach/mach_init.h>
#include <mach/mach_port.h>
#include <mach/mach_time.h>
#include <mach/mach_vm.h>
#include <mach/task.h>
#include <mach/vm_map.h>
#include <mutex>
#include <IOKit/pwr_mgt/IOPMLib.h>
@ -201,200 +195,6 @@ size_t HostSys::GetRuntimeCacheLineSize()
return static_cast<size_t>(std::max<s64>(sysctlbyname_T<s64>("hw.cachelinesize").value_or(0), 0));
}
static __ri vm_prot_t MachProt(const PageProtectionMode& mode)
{
vm_prot_t machmode = (mode.CanWrite()) ? VM_PROT_WRITE : 0;
machmode |= (mode.CanRead()) ? VM_PROT_READ : 0;
machmode |= (mode.CanExecute()) ? (VM_PROT_EXECUTE | VM_PROT_READ) : 0;
return machmode;
}
void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode)
{
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
if (mode.IsNone())
return nullptr;
#ifdef __aarch64__
// We can't allocate executable memory with mach_vm_allocate() on Apple Silicon.
// Instead, we need to use MAP_JIT with mmap(), which does not support fixed mappings.
if (mode.CanExecute())
{
if (base)
return nullptr;
const u32 mmap_prot = mode.CanWrite() ? (PROT_READ | PROT_WRITE | PROT_EXEC) : (PROT_READ | PROT_EXEC);
const u32 flags = MAP_PRIVATE | MAP_ANON | MAP_JIT;
void* const res = mmap(nullptr, size, mmap_prot, flags, -1, 0);
return (res == MAP_FAILED) ? nullptr : res;
}
#endif
kern_return_t ret = mach_vm_allocate(mach_task_self(), reinterpret_cast<mach_vm_address_t*>(&base), size,
base ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE);
if (ret != KERN_SUCCESS)
{
DEV_LOG("mach_vm_allocate() returned {}", ret);
return nullptr;
}
ret = mach_vm_protect(mach_task_self(), reinterpret_cast<mach_vm_address_t>(base), size, false, MachProt(mode));
if (ret != KERN_SUCCESS)
{
DEV_LOG("mach_vm_protect() returned {}", ret);
mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(base), size);
return nullptr;
}
return base;
}
void HostSys::Munmap(void* base, size_t size)
{
if (!base)
return;
mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(base), size);
}
void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode)
{
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
kern_return_t res = mach_vm_protect(mach_task_self(), reinterpret_cast<mach_vm_address_t>(baseaddr), size, false,
MachProt(mode));
if (res != KERN_SUCCESS) [[unlikely]]
{
ERROR_LOG("mach_vm_protect() failed: {}", res);
pxFailRel("mach_vm_protect() failed");
}
}
std::string HostSys::GetFileMappingName(const char* prefix)
{
// name actually is not used.
return {};
}
void* HostSys::CreateSharedMemory(const char* name, size_t size)
{
mach_vm_size_t vm_size = size;
mach_port_t port;
const kern_return_t res = mach_make_memory_entry_64(
mach_task_self(), &vm_size, 0, MAP_MEM_NAMED_CREATE | VM_PROT_READ | VM_PROT_WRITE, &port, MACH_PORT_NULL);
if (res != KERN_SUCCESS)
{
ERROR_LOG("mach_make_memory_entry_64() failed: {}", res);
return nullptr;
}
return reinterpret_cast<void*>(static_cast<uintptr_t>(port));
}
void HostSys::DestroySharedMemory(void* ptr)
{
mach_port_deallocate(mach_task_self(), static_cast<mach_port_t>(reinterpret_cast<uintptr_t>(ptr)));
}
void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode)
{
mach_vm_address_t ptr = reinterpret_cast<mach_vm_address_t>(baseaddr);
const kern_return_t res = mach_vm_map(mach_task_self(), &ptr, size, 0, baseaddr ? VM_FLAGS_FIXED : VM_FLAGS_ANYWHERE,
static_cast<mach_port_t>(reinterpret_cast<uintptr_t>(handle)), offset, FALSE,
MachProt(mode), VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_NONE);
if (res != KERN_SUCCESS)
{
ERROR_LOG("mach_vm_map() failed: {}", res);
return nullptr;
}
return reinterpret_cast<void*>(ptr);
}
void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
{
const kern_return_t res = mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(baseaddr), size);
if (res != KERN_SUCCESS)
pxFailRel("Failed to unmap shared memory");
}
#ifdef _M_ARM64
void HostSys::FlushInstructionCache(void* address, u32 size)
{
__builtin___clear_cache(reinterpret_cast<char*>(address), reinterpret_cast<char*>(address) + size);
}
#endif
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
: m_base_ptr(base_ptr)
, m_size(size)
, m_num_pages(num_pages)
{
}
SharedMemoryMappingArea::~SharedMemoryMappingArea()
{
pxAssertRel(m_num_mappings == 0, "No mappings left");
if (mach_vm_deallocate(mach_task_self(), reinterpret_cast<mach_vm_address_t>(m_base_ptr), m_size) != KERN_SUCCESS)
pxFailRel("Failed to release shared memory area");
}
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
{
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
mach_vm_address_t alloc;
const kern_return_t res =
mach_vm_map(mach_task_self(), &alloc, size, 0, VM_FLAGS_ANYWHERE,
MEMORY_OBJECT_NULL, 0, false, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_NONE);
if (res != KERN_SUCCESS)
{
ERROR_LOG("mach_vm_map() failed: {}", res);
return {};
}
return std::unique_ptr<SharedMemoryMappingArea>(new SharedMemoryMappingArea(reinterpret_cast<u8*>(alloc), size, size / __pagesize));
}
u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode)
{
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
const kern_return_t res =
mach_vm_map(mach_task_self(), reinterpret_cast<mach_vm_address_t*>(&map_base), map_size, 0, VM_FLAGS_OVERWRITE,
static_cast<mach_port_t>(reinterpret_cast<uintptr_t>(file_handle)), file_offset, false,
MachProt(mode), VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_NONE);
if (res != KERN_SUCCESS) [[unlikely]]
{
ERROR_LOG("mach_vm_map() failed: {}", res);
return nullptr;
}
m_num_mappings++;
return static_cast<u8*>(map_base);
}
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
{
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
const kern_return_t res =
mach_vm_map(mach_task_self(), reinterpret_cast<mach_vm_address_t*>(&map_base), map_size, 0, VM_FLAGS_OVERWRITE,
MEMORY_OBJECT_NULL, 0, false, VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_NONE);
if (res != KERN_SUCCESS) [[unlikely]]
{
ERROR_LOG("mach_vm_map() failed: {}", res);
return false;
}
m_num_mappings--;
return true;
}
#ifdef _M_ARM64
static thread_local int s_code_write_depth = 0;

View File

@ -90,20 +90,11 @@ static __fi PageProtectionMode PageAccess_Any()
// --------------------------------------------------------------------------------------
namespace HostSys
{
// Maps a block of memory for use as a recompiled code buffer.
// Returns NULL on allocation failure.
extern void* Mmap(void* base, size_t size, const PageProtectionMode& mode);
// Unmaps a block allocated by SysMmap
extern void Munmap(void* base, size_t size);
extern void MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode);
extern std::string GetFileMappingName(const char* prefix);
extern void* CreateSharedMemory(const char* name, size_t size);
extern void DestroySharedMemory(void* ptr);
extern void* MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode);
extern void UnmapSharedMemory(void* baseaddr, size_t size);
/// JIT write protect for Apple Silicon. Needs to be called prior to writing to any RWX pages.
#if !defined(__APPLE__) || !defined(_M_ARM64)
@ -146,7 +137,7 @@ namespace PageFaultHandler
class SharedMemoryMappingArea
{
public:
static std::unique_ptr<SharedMemoryMappingArea> Create(size_t size);
static std::unique_ptr<SharedMemoryMappingArea> Create(size_t size, bool jit = false);
~SharedMemoryMappingArea();

View File

@ -14,8 +14,10 @@
#include <fcntl.h>
#include <mutex>
#include <sys/mman.h>
#include <ucontext.h>
#include <unistd.h>
#ifndef __APPLE__
#include <ucontext.h>
#endif
#include "fmt/core.h"
@ -23,12 +25,6 @@
#include "cpuinfo.h"
#endif
// FreeBSD does not have MAP_FIXED_NOREPLACE, but does have MAP_EXCL.
// MAP_FIXED combined with MAP_EXCL behaves like MAP_FIXED_NOREPLACE.
#if defined(__FreeBSD__) && !defined(MAP_FIXED_NOREPLACE)
#define MAP_FIXED_NOREPLACE (MAP_FIXED | MAP_EXCL)
#endif
static __ri uint LinuxProt(const PageProtectionMode& mode)
{
u32 lnxmode = 0;
@ -43,34 +39,6 @@ static __ri uint LinuxProt(const PageProtectionMode& mode)
return lnxmode;
}
void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode)
{
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
if (mode.IsNone())
return nullptr;
const u32 prot = LinuxProt(mode);
u32 flags = MAP_PRIVATE | MAP_ANONYMOUS;
if (base)
flags |= MAP_FIXED_NOREPLACE;
void* res = mmap(base, size, prot, flags, -1, 0);
if (res == MAP_FAILED)
return nullptr;
return res;
}
void HostSys::Munmap(void* base, size_t size)
{
if (!base)
return;
munmap((void*)base, size);
}
void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode)
{
pxAssertMsg((size & (__pagesize - 1)) == 0, "Size is page aligned");
@ -120,23 +88,7 @@ void HostSys::DestroySharedMemory(void* ptr)
close(static_cast<int>(reinterpret_cast<intptr_t>(ptr)));
}
void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode)
{
const uint lnxmode = LinuxProt(mode);
const int flags = (baseaddr != nullptr) ? (MAP_SHARED | MAP_FIXED_NOREPLACE) : MAP_SHARED;
void* ptr = mmap(baseaddr, size, lnxmode, flags, static_cast<int>(reinterpret_cast<intptr_t>(handle)), static_cast<off_t>(offset));
if (ptr == MAP_FAILED)
return nullptr;
return ptr;
}
void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
{
if (munmap(baseaddr, size) != 0)
pxFailRel("Failed to unmap shared memory");
}
#ifndef __APPLE__
size_t HostSys::GetRuntimePageSize()
{
@ -183,6 +135,8 @@ size_t HostSys::GetRuntimeCacheLineSize()
#endif
}
#endif
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
: m_base_ptr(base_ptr)
, m_size(size)
@ -199,11 +153,16 @@ SharedMemoryMappingArea::~SharedMemoryMappingArea()
}
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size, bool jit)
{
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
void* alloc = mmap(nullptr, size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
uint flags = MAP_ANONYMOUS | MAP_PRIVATE;
#ifdef __APPLE__
if (jit)
flags |= MAP_JIT;
#endif
void* alloc = mmap(nullptr, size, PROT_NONE, flags, -1, 0);
if (alloc == MAP_FAILED)
return nullptr;
@ -214,15 +173,26 @@ u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* ma
{
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
// MAP_FIXED is okay here, since we've reserved the entire region, and *want* to overwrite the mapping.
const uint lnxmode = LinuxProt(mode);
void* const ptr = mmap(map_base, map_size, lnxmode, MAP_SHARED | MAP_FIXED,
static_cast<int>(reinterpret_cast<intptr_t>(file_handle)), static_cast<off_t>(file_offset));
if (file_handle)
{
const int fd = static_cast<int>(reinterpret_cast<intptr_t>(file_handle));
// MAP_FIXED is okay here, since we've reserved the entire region, and *want* to overwrite the mapping.
void* const ptr = mmap(map_base, map_size, lnxmode, MAP_SHARED | MAP_FIXED, fd, static_cast<off_t>(file_offset));
if (ptr == MAP_FAILED)
return nullptr;
}
else
{
// macOS doesn't seem to allow MAP_JIT with MAP_FIXED
// So we do the MAP_JIT in the allocation, and just mprotect here
// Note that this will only work the first time for a given region
if (mprotect(map_base, map_size, lnxmode) < 0)
return nullptr;
}
m_num_mappings++;
return static_cast<u8*>(ptr);
return static_cast<u8*>(map_base);
}
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
@ -236,6 +206,8 @@ bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
return true;
}
#ifndef __APPLE__ // These are done in DarwinMisc
namespace PageFaultHandler
{
static std::recursive_mutex s_exception_handler_mutex;
@ -370,3 +342,4 @@ bool PageFaultHandler::Install(Error* error)
s_installed = true;
return true;
}
#endif // __APPLE__

View File

@ -35,22 +35,6 @@ static DWORD ConvertToWinApi(const PageProtectionMode& mode)
return winmode;
}
void* HostSys::Mmap(void* base, size_t size, const PageProtectionMode& mode)
{
if (mode.IsNone())
return nullptr;
return VirtualAlloc(base, size, MEM_RESERVE | MEM_COMMIT, ConvertToWinApi(mode));
}
void HostSys::Munmap(void* base, size_t size)
{
if (!base)
return;
VirtualFree((void*)base, 0, MEM_RELEASE);
}
void HostSys::MemProtect(void* baseaddr, size_t size, const PageProtectionMode& mode)
{
pxAssert((size & (__pagesize - 1)) == 0);
@ -77,29 +61,6 @@ void HostSys::DestroySharedMemory(void* ptr)
CloseHandle(static_cast<HANDLE>(ptr));
}
void* HostSys::MapSharedMemory(void* handle, size_t offset, void* baseaddr, size_t size, const PageProtectionMode& mode)
{
void* ret = MapViewOfFileEx(static_cast<HANDLE>(handle), FILE_MAP_READ | FILE_MAP_WRITE,
static_cast<DWORD>(offset >> 32), static_cast<DWORD>(offset), size, baseaddr);
if (!ret)
return nullptr;
const DWORD prot = ConvertToWinApi(mode);
if (prot != PAGE_READWRITE)
{
DWORD old_prot;
if (!VirtualProtect(ret, size, prot, &old_prot))
pxFail("Failed to protect memory mapping");
}
return ret;
}
void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
{
if (!UnmapViewOfFile(baseaddr))
pxFail("Failed to unmap shared memory");
}
size_t HostSys::GetRuntimePageSize()
{
SYSTEM_INFO si = {};
@ -183,7 +144,7 @@ SharedMemoryMappingArea::PlaceholderMap::iterator SharedMemoryMappingArea::FindP
return m_placeholder_ranges.end();
}
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size, bool jit)
{
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
@ -241,12 +202,23 @@ u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* ma
}
// actually do the mapping, replacing the placeholder on the range
if (file_handle)
{
if (!MapViewOfFile3(static_cast<HANDLE>(file_handle), GetCurrentProcess(),
map_base, file_offset, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
{
Console.Error("(SharedMemoryMappingArea) MapViewOfFile3() failed: %u", GetLastError());
return nullptr;
}
}
else
{
if (!VirtualAlloc2(GetCurrentProcess(), map_base, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
{
Console.Error("(SharedMemoryMappingArea) VirtualAlloc2() failed: %u", GetLastError());
return nullptr;
}
}
const DWORD prot = ConvertToWinApi(mode);
if (prot != PAGE_READWRITE)

View File

@ -49,6 +49,7 @@
thread_local u8* x86Ptr;
thread_local u8* xTextPtr;
thread_local XMMSSEType g_xmmtypes[iREGCNT_XMM] = {XMMT_INT};
namespace x86Emitter
@ -295,13 +296,27 @@ const xRegister32
void EmitSibMagic(uint regfield, const void* address, int extraRIPOffset)
{
sptr displacement = (sptr)address;
sptr textRelative = (sptr)address - (sptr)xTextPtr;
sptr ripRelative = (sptr)address - ((sptr)x86Ptr + sizeof(s8) + sizeof(s32) + extraRIPOffset);
// Can we use an 8-bit offset from the text pointer?
if (textRelative == (s8)textRelative && xTextPtr)
{
ModRM(1, regfield, RTEXTPTR.GetId());
xWrite<s8>((s8)textRelative);
return;
}
// Can we use a rip-relative address? (Prefer this over eiz because it's a byte shorter)
if (ripRelative == (s32)ripRelative)
else if (ripRelative == (s32)ripRelative)
{
ModRM(0, regfield, ModRm_UseDisp32);
displacement = ripRelative;
}
// How about from the text pointer?
else if (textRelative == (s32)textRelative && xTextPtr)
{
ModRM(2, regfield, RTEXTPTR.GetId());
displacement = textRelative;
}
else
{
pxAssertMsg(displacement == (s32)displacement, "SIB target is too far away, needs an indirect register");
@ -539,6 +554,12 @@ const xRegister32
x86Ptr = (u8*)ptr;
}
// Assigns the current emitter text base address.
__emitinline void xSetTextPtr(void* ptr)
{
xTextPtr = (u8*)ptr;
}
// Retrieves the current emitter buffer target address.
// This is provided instead of using x86Ptr directly, since we may in the future find
// a need to change the storage class system for the x86Ptr 'under the hood.'
@ -547,6 +568,12 @@ const xRegister32
return x86Ptr;
}
// Retrieves the current emitter text base address.
__emitinline u8* xGetTextPtr()
{
return xTextPtr;
}
__emitinline void xAlignPtr(uint bytes)
{
// forward align
@ -1229,6 +1256,9 @@ const xRegister32
#endif
stackAlign(m_offset, true);
if (u8* ptr = xGetTextPtr())
xMOV64(RTEXTPTR, (sptr)ptr);
}
xScopedStackFrame::~xScopedStackFrame()
@ -1285,12 +1315,15 @@ const xRegister32
{
return offset + base;
}
else
if (u8* ptr = xGetTextPtr())
{
sptr tbase = (sptr)base - (sptr)ptr;
if (tbase == (s32)tbase)
return offset + RTEXTPTR + tbase;
}
xLEA(tmpRegister, ptr[base]);
return offset + tmpRegister;
}
}
void xLoadFarAddr(const xAddressReg& dst, void* addr)
{

View File

@ -149,11 +149,13 @@ namespace x86Emitter
static const int Sib_UseDisp32 = 5; // same index value as EBP (used in Base field)
extern void xSetPtr(void* ptr);
extern void xSetTextPtr(void* ptr);
extern void xAlignPtr(uint bytes);
extern void xAdvancePtr(uint bytes);
extern void xAlignCallTarget();
extern u8* xGetPtr();
extern u8* xGetTextPtr();
extern u8* xGetAlignedCallTarget();
extern JccComparisonType xInvertCond(JccComparisonType src);
@ -646,6 +648,8 @@ extern const xRegister32
calleeSavedReg1d,
calleeSavedReg2d;
/// Holds a pointer to program text at all times so we don't need to be within 2GB of text
static constexpr const xAddressReg& RTEXTPTR = rbx;
// clang-format on

View File

@ -4208,14 +4208,8 @@ void GSRendererHW::EmulateBlending(int rt_alpha_min, int rt_alpha_max, const boo
const bool alpha_mask = (m_cached_ctx.FRAME.FBMSK & 0xFF000000) == 0xFF000000;
bool blend_ad_alpha_masked = blend_ad && alpha_mask;
const bool is_basic_blend = GSConfig.AccurateBlendingUnit >= AccBlendLevel::Basic;
if ((is_basic_blend || (COLCLAMP.CLAMP == 0)) && features.texture_barrier && blend_ad_alpha_masked)
{
// Swap Ad with As for hw blend.
m_conf.ps.a_masked = 1;
m_conf.ps.blend_c = 0;
m_conf.require_one_barrier |= true;
}
else if (((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium) || m_conf.require_one_barrier) && blend_ad_alpha_masked)
if (blend_ad_alpha_masked && (((is_basic_blend || (COLCLAMP.CLAMP == 0)) && features.texture_barrier)
|| ((GSConfig.AccurateBlendingUnit >= AccBlendLevel::Medium) || m_conf.require_one_barrier)))
{
// Swap Ad with As for hw blend.
m_conf.ps.a_masked = 1;

View File

@ -49,9 +49,6 @@ namespace Ps2MemSize
namespace SysMemory
{
static u8* TryAllocateVirtualMemory(const char* name, void* file_handle, uptr base, size_t size);
static u8* AllocateVirtualMemory(const char* name, void* file_handle, size_t size, size_t offset_from_base);
static bool AllocateMemoryMap();
static void DumpMemoryMap();
static void ReleaseMemoryMap();
@ -59,6 +56,7 @@ namespace SysMemory
static u8* s_data_memory;
static void* s_data_memory_file_handle;
static u8* s_code_memory;
static std::unique_ptr<SharedMemoryMappingArea> s_memory_mapping_area;
} // namespace SysMemory
static void memAllocate();
@ -86,77 +84,6 @@ namespace HostMemoryMap
}
} // namespace HostMemoryMap
u8* SysMemory::TryAllocateVirtualMemory(const char* name, void* file_handle, uptr base, size_t size)
{
u8* baseptr;
if (file_handle)
baseptr = static_cast<u8*>(HostSys::MapSharedMemory(file_handle, 0, (void*)base, size, PageAccess_ReadWrite()));
else
baseptr = static_cast<u8*>(HostSys::Mmap((void*)base, size, PageAccess_Any()));
if (!baseptr)
return nullptr;
if (base != 0 && (uptr)baseptr != base)
{
if (file_handle)
{
if (baseptr)
HostSys::UnmapSharedMemory(baseptr, size);
}
else
{
if (baseptr)
HostSys::Munmap(baseptr, size);
}
return nullptr;
}
DevCon.WriteLn(Color_Gray, "%-32s @ 0x%016" PRIXPTR " -> 0x%016" PRIXPTR " %s", name,
baseptr, (uptr)baseptr + size, fmt::format("[{}mb]", size / _1mb).c_str());
return baseptr;
}
u8* SysMemory::AllocateVirtualMemory(const char* name, void* file_handle, size_t size, size_t offset_from_base)
{
// ARM64 does not need the rec areas to be in +/- 2GB.
#ifdef _M_X86
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Virtual memory size is page aligned");
// Everything looks nicer when the start of all the sections is a nice round looking number.
// Also reduces the variation in the address due to small changes in code.
// Breaks ASLR but so does anything else that tries to make addresses constant for our debugging pleasure
uptr codeBase = (uptr)(void*)AllocateVirtualMemory / (1 << 28) * (1 << 28);
// The allocation is ~640mb in size, slighly under 3*2^28.
// We'll hope that the code generated for the PCSX2 executable stays under 512mb (which is likely)
// On x86-64, code can reach 8*2^28 from its address [-6*2^28, 4*2^28] is the region that allows for code in the 640mb allocation to reach 512mb of code that either starts at codeBase or 256mb before it.
// We start high and count down because on macOS code starts at the beginning of useable address space, so starting as far ahead as possible reduces address variations due to code size. Not sure about other platforms. Obviously this only actually affects what shows up in a debugger and won't affect performance or correctness of anything.
for (int offset = 4; offset >= -6; offset--)
{
uptr base = codeBase + (offset << 28) + offset_from_base;
if ((sptr)base < 0 || (sptr)(base + size - 1) < 0)
{
// VTLB will throw a fit if we try to put EE main memory here
continue;
}
if (u8* ret = TryAllocateVirtualMemory(name, file_handle, base, size))
return ret;
DevCon.Warning("%s: host memory @ 0x%016" PRIXPTR " -> 0x%016" PRIXPTR " is unavailable; attempting to map elsewhere...", name,
base, base + size);
}
#else
return TryAllocateVirtualMemory(name, file_handle, 0, size);
#endif
return nullptr;
}
bool SysMemory::AllocateMemoryMap()
{
s_data_memory_file_handle = HostSys::CreateSharedMemory(HostSys::GetFileMappingName("pcsx2").c_str(), HostMemoryMap::MainSize);
@ -167,16 +94,23 @@ bool SysMemory::AllocateMemoryMap()
return false;
}
if ((s_data_memory = AllocateVirtualMemory("Data Memory", s_data_memory_file_handle, HostMemoryMap::MainSize, 0)) == nullptr)
if (!(s_memory_mapping_area = SharedMemoryMappingArea::Create(HostMemoryMap::MainSize + HostMemoryMap::CodeSize, true)))
{
Host::ReportErrorAsync("Error", "Failed to map data memory at an acceptable location.");
Host::ReportErrorAsync("Error", "Failed to map main memory.");
ReleaseMemoryMap();
return false;
}
if ((s_code_memory = AllocateVirtualMemory("Code Memory", nullptr, HostMemoryMap::CodeSize, HostMemoryMap::MainSize)) == nullptr)
if ((s_data_memory = s_memory_mapping_area->Map(s_data_memory_file_handle, 0, s_memory_mapping_area->BasePointer(), HostMemoryMap::MainSize, PageAccess_ReadWrite())) == nullptr)
{
Host::ReportErrorAsync("Error", "Failed to allocate code memory at an acceptable location.");
Host::ReportErrorAsync("Error", "Failed to map data memory.");
ReleaseMemoryMap();
return false;
}
if ((s_code_memory = s_memory_mapping_area->Map(nullptr, 0, s_memory_mapping_area->OffsetPointer(HostMemoryMap::MainSize), HostMemoryMap::CodeSize, PageAccess_Any())) == nullptr)
{
Host::ReportErrorAsync("Error", "Failed to allocate code memory.");
ReleaseMemoryMap();
return false;
}
@ -218,16 +152,18 @@ void SysMemory::ReleaseMemoryMap()
{
if (s_code_memory)
{
HostSys::Munmap(s_code_memory, HostMemoryMap::CodeSize);
s_memory_mapping_area->Unmap(s_code_memory, HostMemoryMap::CodeSize);
s_code_memory = nullptr;
}
if (s_data_memory)
{
HostSys::UnmapSharedMemory(s_data_memory, HostMemoryMap::MainSize);
s_memory_mapping_area->Unmap(s_data_memory, HostMemoryMap::MainSize);
s_data_memory = nullptr;
}
s_memory_mapping_area.reset();
if (s_data_memory_file_handle)
{
HostSys::DestroySharedMemory(s_data_memory_file_handle);

View File

@ -364,14 +364,13 @@ struct eeProfiler
}
}
// Warning dirty ebx
void EmitMem()
void EmitMem(int addr_reg)
{
// Compact the 4GB virtual address to a 512KB virtual address
if (x86caps.hasBMI2)
{
xPEXT(ebx, ecx, ptr[&memMask]);
xADD(ptr32[(rbx * 4) + memStats], 1);
xPEXT(arg1regd, xRegister32(addr_reg), ptr[&memMask]);
xADD(ptr32[(arg1reg * 4) + memStats], 1);
}
}
@ -403,7 +402,7 @@ struct eeProfiler
__fi void Reset() {}
__fi void EmitOp(eeOpcode op) {}
__fi void Print() {}
__fi void EmitMem() {}
__fi void EmitMem(int addrReg) {}
__fi void EmitConstMem(u32 add) {}
__fi void EmitSlowMem() {}
__fi void EmitFastMem() {}

View File

@ -52,6 +52,10 @@ bool _isAllocatableX86reg(int x86reg)
if (CHECK_FASTMEM && x86reg == 5)
return false;
// rbx is used to reference PCSX2 program text
if (xGetTextPtr() && x86reg == RTEXTPTR.GetId())
return false;
#ifdef ENABLE_VTUNE
// vtune needs ebp...
if (!CHECK_FASTMEM && x86reg == 5)

View File

@ -175,10 +175,10 @@ static const void* _DynGen_JITCompile()
xFastCall((void*)iopRecRecompile, ptr32[&psxRegs.pc]);
xMOV(eax, ptr[&psxRegs.pc]);
xMOV(ebx, eax);
xMOV(edx, eax);
xSHR(eax, 16);
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
xJMP(ptrNative[rdx * (wordsize / 4) + rcx]);
return retval;
}
@ -196,10 +196,10 @@ static const void* _DynGen_DispatcherReg()
u8* retval = xGetPtr();
xMOV(eax, ptr[&psxRegs.pc]);
xMOV(ebx, eax);
xMOV(edx, eax);
xSHR(eax, 16);
xMOV(rcx, ptrNative[xComplexAddress(rcx, psxRecLUT, rax * wordsize)]);
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
xJMP(ptrNative[rdx * (wordsize / 4) + rcx]);
return retval;
}
@ -890,10 +890,13 @@ static void recReserve()
pxFailRel("Failed to allocate R3000 InstCache array.");
}
#define R3000A_TEXTPTR (&psxRegs.GPR.r[33])
void recResetIOP()
{
DevCon.WriteLn("iR3000A Recompiler reset.");
xSetTextPtr(R3000A_TEXTPTR);
xSetPtr(SysMemory::GetIOPRec());
_DynGen_Dispatchers();
recPtr = xGetPtr();
@ -1181,16 +1184,16 @@ static void iPsxBranchTest(u32 newpc, u32 cpuBranch)
}
else
{
xMOV(ebx, ptr32[&psxRegs.cycle]);
xADD(ebx, blockCycles);
xMOV(ptr32[&psxRegs.cycle], ebx); // update cycles
xMOV(r12d, ptr32[&psxRegs.cycle]);
xADD(r12d, blockCycles);
xMOV(ptr32[&psxRegs.cycle], r12d); // update cycles
// jump if iopCycleEE <= 0 (iop's timeslice timed out, so time to return control to the EE)
iPsxAddEECycles(blockCycles);
xJLE(iopExitRecompiledCode);
// check if an event is pending
xSUB(ebx, ptr32[&psxRegs.iopNextEventCycle]);
xSUB(r12d, ptr32[&psxRegs.iopNextEventCycle]);
xForwardJS<u8> nointerruptpending;
xFastCall((void*)iopEventTest);
@ -1565,6 +1568,7 @@ static void iopRecRecompile(const u32 startpc)
recResetIOP();
}
xSetTextPtr(R3000A_TEXTPTR);
xSetPtr(recPtr);
recPtr = xGetAlignedCallTarget();

View File

@ -21,6 +21,11 @@ extern u32 target; // branch target
extern u32 s_nBlockCycles; // cycles of current block recompiling
extern bool s_nBlockInterlocked; // Current block has VU0 interlocking
// x86 can use shorter displacement if it fits in an s8, so offset 144 bytes into the cpuRegs
// This will allow us to reach r1-r16 with a shorter encoding
// TODO: Actually figure out what things are used most often, maybe rearrange the cpuRegs struct, and point at that
#define R5900_TEXTPTR (&cpuRegs.GPR.r[9])
//////////////////////////////////////////////////////////////////////////////////////////
//

View File

@ -381,10 +381,10 @@ static const void* _DynGen_JITCompile()
// void(**base)() = (void(**)())recLUT[addr >> 16];
// base[addr >> 2]();
xMOV(eax, ptr[&cpuRegs.pc]);
xMOV(ebx, eax);
xMOV(edx, eax);
xSHR(eax, 16);
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
xJMP(ptrNative[rdx * (wordsize / 4) + rcx]);
return retval;
}
@ -406,10 +406,10 @@ static const void* _DynGen_DispatcherReg()
// void(**base)() = (void(**)())recLUT[addr >> 16];
// base[addr >> 2]();
xMOV(eax, ptr[&cpuRegs.pc]);
xMOV(ebx, eax);
xMOV(edx, eax);
xSHR(eax, 16);
xMOV(rcx, ptrNative[xComplexAddress(rcx, recLUT, rax * wordsize)]);
xJMP(ptrNative[rbx * (wordsize / 4) + rcx]);
xJMP(ptrNative[rdx * (wordsize / 4) + rcx]);
return retval;
}
@ -445,6 +445,8 @@ static const void* _DynGen_EnterRecompiledCode()
xSUB(rsp, stack_size);
#endif
if (u8* ptr = xGetTextPtr())
xMOV64(RTEXTPTR, (sptr)ptr);
if (CHECK_FASTMEM)
xMOV(RFASTMEMBASE, ptrNative[&vtlb_private::vtlbdata.fastmem_base]);
@ -585,6 +587,7 @@ static void recResetRaw()
EE::Profiler.Reset();
xSetTextPtr(R5900_TEXTPTR);
xSetPtr(SysMemory::GetEERec());
_DynGen_Dispatchers();
vtlb_DynGenDispatchers();
@ -897,6 +900,7 @@ u8* recBeginThunk()
if (recPtr >= recPtrEnd)
eeRecNeedsReset = true;
xSetTextPtr(R5900_TEXTPTR);
xSetPtr(recPtr);
recPtr = xGetAlignedCallTarget();
@ -2138,26 +2142,26 @@ static bool recSkipTimeoutLoop(s32 reg, bool is_timeout_loop)
// if new_v0 > 0 { jump to dispatcher because loop exited early }
// else new_v0 is 0, so exit loop
xMOV(ebx, ptr32[&cpuRegs.cycle]); // ebx = cycle
xMOV(r12d, ptr32[&cpuRegs.cycle]); // r12d = cycle
xMOV(ecx, ptr32[&cpuRegs.nextEventCycle]); // ecx = nextEventCycle
xCMP(ebx, ecx);
xCMP(r12d, ecx);
//xJAE((void*)DispatcherEvent); // jump to dispatcher if event immediately
// TODO: In the case where nextEventCycle < cycle because it's overflowed, tack 8
// cycles onto the event count, so hopefully it'll wrap around. This is pretty
// gross, but until we switch to 64-bit counters, not many better options.
xForwardJB8 not_dispatcher;
xADD(ebx, 8);
xMOV(ptr32[&cpuRegs.cycle], ebx);
xADD(r12d, 8);
xMOV(ptr32[&cpuRegs.cycle], r12d);
xJMP((void*)DispatcherEvent);
not_dispatcher.SetTarget();
xMOV(edx, ptr32[&cpuRegs.GPR.r[reg].UL[0]]); // eax = v0
xLEA(rax, ptrNative[rdx * 8 + rbx]); // edx = v0 * 8 + cycle
xLEA(rax, ptrNative[rdx * 8 + r12]); // edx = v0 * 8 + cycle
xCMP(rcx, rax);
xCMOVB(rax, rcx); // eax = new_cycles = min(v8 * 8, nextEventCycle)
xMOV(ptr32[&cpuRegs.cycle], eax); // writeback new_cycles
xSUB(eax, ebx); // new_cycles -= cycle
xSUB(eax, r12d); // new_cycles -= cycle
xSHR(eax, 3); // compute new v0 value
xSUB(edx, eax); // v0 -= cycle_diff
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], edx); // write back new value of v0
@ -2191,6 +2195,7 @@ static void recRecompile(const u32 startpc)
recResetRaw();
}
xSetTextPtr(R5900_TEXTPTR);
xSetPtr(recPtr);
recPtr = xGetAlignedCallTarget();

View File

@ -119,14 +119,12 @@ static void __vectorcall LogWriteQuad(u32 addr, __m128i val)
namespace vtlb_private
{
// ------------------------------------------------------------------------
// Prepares eax, ecx, and, ebx for Direct or Indirect operations.
// Returns the writeback pointer for ebx (return address from indirect handling)
// Prepares eax and ecx for Direct or Indirect operations.
//
static void DynGen_PrepRegs(int addr_reg, int value_reg, u32 sz, bool xmm)
{
EE::Profiler.EmitMem();
_freeX86reg(arg1regd);
EE::Profiler.EmitMem(addr_reg);
xMOV(arg1regd, xRegister32(addr_reg));
if (value_reg >= 0)
@ -269,7 +267,7 @@ static void DynGen_HandlerTest(const GenDirectFn& gen_direct, int mode, int bits
// ------------------------------------------------------------------------
// Generates the various instances of the indirect dispatchers
// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64), rbx: function return ptr
// In: arg1reg: vtlb entry, arg2reg: data ptr (if mode >= 64)
// Out: eax: result (if mode < 64)
static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
{
@ -347,6 +345,7 @@ void vtlb_DynGenDispatchers()
for (int sign = 0; sign < (!mode && bits < 3 ? 2 : 1); sign++)
{
xSetPtr(GetIndirectDispatcherPtr(mode, bits, !!sign));
xSetTextPtr(R5900_TEXTPTR);
DynGen_IndirectTlbDispatcher(mode, bits, !!sign);
}
@ -939,14 +938,13 @@ void vtlb_DynBackpatchLoadStore(uptr code_address, u32 code_size, u32 guest_pc,
u32 num_gprs = 0;
u32 num_fprs = 0;
const u32 rbxid = static_cast<u32>(rbx.GetId());
const u32 arg1id = static_cast<u32>(arg1reg.GetId());
const u32 arg2id = static_cast<u32>(arg2reg.GetId());
const u32 arg3id = static_cast<u32>(arg3reg.GetId());
for (u32 i = 0; i < iREGCNT_GPR; i++)
{
if ((gpr_bitmask & (1u << i)) && (i == rbxid || i == arg1id || i == arg2id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
if ((gpr_bitmask & (1u << i)) && (i == arg1id || i == arg2id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
num_gprs++;
}
for (u32 i = 0; i < iREGCNT_XMM; i++)

View File

@ -42,6 +42,7 @@ void mVUreset(microVU& mVU, bool resetReserve)
VU0.VI[REG_VPU_STAT].UL &= ~0x100;
}
xSetTextPtr(mVU.textPtr());
xSetPtr(mVU.cache);
mVUdispatcherAB(mVU);
mVUdispatcherCD(mVU);

View File

@ -123,6 +123,7 @@ struct microVU
s32 cycles; // Cycles Counter
VURegs& regs() const { return ::vuRegs[index]; }
void* textPtr() const { return (index && THREAD_VU1) ? (void*)&regs().VF[9] : (void*)R5900_TEXTPTR; }
__fi REG_VI& getVI(uint reg) const { return regs().VI[reg]; }
__fi VECTOR& getVF(uint reg) const { return regs().VF[reg]; }

View File

@ -207,15 +207,17 @@ static void mVUGenerateCopyPipelineState(mV)
{
mVU.copyPLState = xGetAlignedCallTarget();
xLoadFarAddr(rdx, reinterpret_cast<u8*>(&mVU.prog.lpState));
if (cpuinfo_has_x86_avx())
{
xVMOVAPS(ymm0, ptr[rax]);
xVMOVAPS(ymm1, ptr[rax + 32u]);
xVMOVAPS(ymm2, ptr[rax + 64u]);
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], ymm0);
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], ymm1);
xVMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], ymm2);
xVMOVUPS(ptr[rdx], ymm0);
xVMOVUPS(ptr[rdx + 32u], ymm1);
xVMOVUPS(ptr[rdx + 64u], ymm2);
xVZEROUPPER();
}
@ -228,12 +230,12 @@ static void mVUGenerateCopyPipelineState(mV)
xMOVAPS(xmm4, ptr[rax + 64u]);
xMOVAPS(xmm5, ptr[rax + 80u]);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState)], xmm0);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 16u], xmm1);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 32u], xmm2);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 48u], xmm3);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 64u], xmm4);
xMOVUPS(ptr[reinterpret_cast<u8*>(&mVU.prog.lpState) + 80u], xmm5);
xMOVUPS(ptr[rdx], xmm0);
xMOVUPS(ptr[rdx + 16u], xmm1);
xMOVUPS(ptr[rdx + 32u], xmm2);
xMOVUPS(ptr[rdx + 48u], xmm3);
xMOVUPS(ptr[rdx + 64u], xmm4);
xMOVUPS(ptr[rdx + 80u], xmm5);
}
xRET();
@ -326,6 +328,7 @@ _mVUt void* mVUexecute(u32 startPC, u32 cycles)
mVU.cycles = cycles;
mVU.totalCycles = cycles;
xSetTextPtr(mVU.textPtr());
xSetPtr(mVU.prog.x86ptr); // Set x86ptr to where last program left off
return mVUsearchProg<vuIndex>(startPC & vuLimit, (uptr)&mVU.prog.lpState); // Find and set correct program
}

View File

@ -411,6 +411,7 @@ public:
}
}
gprMap[RTEXTPTR.GetId()].usable = !xGetTextPtr();
gprMap[RFASTMEMBASE.GetId()].usable = !cop2mode || !CHECK_FASTMEM;
}

View File

@ -1106,7 +1106,7 @@ mVUop(mVU_ILW)
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (_Imm11_ != 0)
xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
}
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
@ -1133,7 +1133,7 @@ mVUop(mVU_ILWR)
if (_Is_)
{
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
mVUaddrFix (mVU, gprT1q);
mVUaddrFix (mVU, gprT1q, gprT2q);
const xRegister32& regT = mVU.regAlloc->allocGPR(-1, _It_, mVUlow.backupVI);
xMOVZX(regT, ptr16[xComplexAddress(gprT2q, ptr, gprT1q)]);
@ -1170,7 +1170,7 @@ mVUop(mVU_ISW)
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (_Imm11_ != 0)
xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
}
// If regT is dirty, the high bits might not be zero.
@ -1201,7 +1201,7 @@ mVUop(mVU_ISWR)
if (_Is_)
{
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
is = gprT1q;
}
const xRegister32& regT = mVU.regAlloc->allocGPR(_It_, -1, false, true);
@ -1257,7 +1257,7 @@ mVUop(mVU_LQ)
mVU.regAlloc->moveVIToGPR(gprT1, _Is_);
if (_Imm11_ != 0)
xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
}
const xmm& Ft = mVU.regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W);
@ -1281,7 +1281,7 @@ mVUop(mVU_LQD)
xDEC(regS);
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
mVU.regAlloc->clearNeeded(regS);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
is = gprT1q;
}
else
@ -1319,7 +1319,7 @@ mVUop(mVU_LQI)
xMOVSX(gprT1, xRegister16(regS)); // TODO: Confirm
xINC(regS);
mVU.regAlloc->clearNeeded(regS);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
is = gprT1q;
}
if (!mVUlow.noWriteVF)
@ -1351,7 +1351,7 @@ mVUop(mVU_SQ)
mVU.regAlloc->moveVIToGPR(gprT1, _It_);
if (_Imm11_ != 0)
xADD(gprT1, _Imm11_);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
}
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
@ -1375,7 +1375,7 @@ mVUop(mVU_SQD)
xDEC(regT);
xMOVZX(gprT1, xRegister16(regT));
mVU.regAlloc->clearNeeded(regT);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
it = gprT1q;
}
else
@ -1405,7 +1405,7 @@ mVUop(mVU_SQI)
xMOVZX(gprT1, xRegister16(regT));
xINC(regT);
mVU.regAlloc->clearNeeded(regT);
mVUaddrFix(mVU, gprT1q);
mVUaddrFix(mVU, gprT1q, gprT2q);
}
const xmm& Fs = mVU.regAlloc->allocReg(_Fs_, _XYZW_PS ? -1 : 0, _X_Y_Z_W);
if (_It_)

View File

@ -147,10 +147,10 @@ static const char branchSTR[16][8] = {
#define gprT1b ax // Low 16-bit of gprT1 (eax)
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
#define gprF0 ebx // Status Flag 0
#define gprF1 r12d // Status Flag 1
#define gprF2 r13d // Status Flag 2
#define gprF3 r14d // Status Flag 3
#define gprF0 r12d // Status Flag 0
#define gprF1 r13d // Status Flag 1
#define gprF2 r14d // Status Flag 2
#define gprF3 r15d // Status Flag 3
// Function Params
#define mP microVU& mVU, int recPass

View File

@ -295,7 +295,7 @@ static void mVUwaitMTVU()
}
// Transforms the Address in gprReg to valid VU0/VU1 Address
__fi void mVUaddrFix(mV, const xAddressReg& gprReg)
__fi void mVUaddrFix(mV, const xAddressReg& gprReg, const xAddressReg& tmpReg)
{
if (isVU1)
{
@ -324,7 +324,16 @@ __fi void mVUaddrFix(mV, const xAddressReg& gprReg)
xFastCall((void*)mVU.waitMTVU);
}
xAND(xRegister32(gprReg.Id), 0x3f); // ToDo: theres a potential problem if VU0 overrides VU1's VF0/VI0 regs!
xADD(gprReg, (u128*)VU1.VF - (u128*)VU0.Mem);
sptr offset = (u128*)VU1.VF - (u128*)VU0.Mem;
if (offset == (s32)offset)
{
xADD(gprReg, offset);
}
else
{
xMOV64(tmpReg, offset);
xADD(gprReg, tmpReg);
}
jmpB.SetTarget();
xSHL(gprReg, 4); // multiply by 16 (shift left by 4)
}

View File

@ -23,7 +23,8 @@ void dVifRelease(int idx)
}
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
: v(vif_)
: vifPtr(rax)
, v(vif_)
, vB(vifBlock_)
{
const int wl = vB.wl ? vB.wl : 256; //0 is taken as 256 (KH2)
@ -42,9 +43,6 @@ __fi void makeMergeMask(u32& x)
__fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
{
const int idx = v.idx;
const vifStruct& vif = MTVU_VifX;
//This could have ended up copying the row when there was no row to write.1810080
u32 m0 = vB.mask; //The actual mask example 0x03020100
u32 m3 = ((m0 & 0xaaaaaaaa) >> 1) & ~m0; //all the upper bits, so our example 0x01010000 & 0xFCFDFEFF = 0x00010000 just the cols (shifted right for maskmerge)
@ -52,14 +50,14 @@ __fi void VifUnpackSSE_Dynarec::SetMasks(int cS) const
if ((doMask && m2) || doMode)
{
xMOVAPS(xmmRow, ptr128[&vif.MaskRow]);
xMOVAPS(xmmRow, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)]);
MSKPATH3_LOG("Moving row");
}
if (doMask && m3)
{
VIF_LOG("Merging Cols");
xMOVAPS(xmmCol0, ptr128[&vif.MaskCol]);
xMOVAPS(xmmCol0, ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskCol)]);
if ((cS >= 2) && (m3 & 0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
if ((cS >= 3) && (m3 & 0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
if ((cS >= 4) && (m3 & 0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
@ -137,8 +135,7 @@ void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const
void VifUnpackSSE_Dynarec::writeBackRow() const
{
const int idx = v.idx;
xMOVAPS(ptr128[&(MTVU_VifX.MaskRow)], xmmRow);
xMOVAPS(ptr128[vifPtr + (sptr)offsetof(vifStruct, MaskRow)], xmmRow);
VIF_LOG("nVif: writing back row reg! [doMode = %d]", doMode);
}
@ -239,6 +236,7 @@ void VifUnpackSSE_Dynarec::ProcessMasks()
void VifUnpackSSE_Dynarec::CompileRoutine()
{
const int idx = v.idx;
const int wl = vB.wl ? vB.wl : 256; // 0 is taken as 256 (KH2)
const int upkNum = vB.upkType & 0xf;
const u8& vift = nVifT[upkNum];
@ -252,6 +250,7 @@ void VifUnpackSSE_Dynarec::CompileRoutine()
VIF_LOG("Compiling new block, unpack number %x, mode %x, masking %x, vNum %x", upkNum, doMode, doMask, vNum);
pxAssume(vCL == 0);
xLoadFarAddr(vifPtr, &MTVU_VifX);
// Value passed determines # of col regs we need to load
SetMasks(isFill ? blockSize : cycleSize);
@ -336,6 +335,7 @@ _vifT __fi nVifBlock* dVifCompile(nVifBlock& block, bool isFill)
}
// Compile the block now
xSetTextPtr(nullptr);
xSetPtr(v.recWritePtr);
block.startPtr = (uptr)xGetAlignedCallTarget();

View File

@ -329,9 +329,11 @@ void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const
{
xMOVAPS(xmm7, ptr[dstIndirect]);
int offX = std::min(curCycle, 3);
xPAND(regX, ptr32[nVifMask[0][offX]]);
xPAND(xmm7, ptr32[nVifMask[1][offX]]);
xPOR (regX, ptr32[nVifMask[2][offX]]);
sptr base = reinterpret_cast<sptr>(nVifMask[2]);
xLoadFarAddr(rax, nVifMask);
xPAND(regX, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[0][offX]) - base)]);
xPAND(xmm7, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[1][offX]) - base)]);
xPOR (regX, ptr128[rax + (reinterpret_cast<sptr>(nVifMask[2][offX]) - base)]);
xPOR (regX, xmm7);
xMOVAPS(ptr[dstIndirect], regX);
}
@ -362,6 +364,7 @@ void VifUnpackSSE_Init()
{
DevCon.WriteLn("Generating SSE-optimized unpacking functions for VIF interpreters...");
xSetTextPtr(nullptr);
xSetPtr(SysMemory::GetVIFUnpackRec());
for (int a = 0; a < 2; a++)

View File

@ -98,6 +98,7 @@ public:
bool inputMasked;
protected:
xAddressReg vifPtr;
const nVifStruct& v; // vif0 or vif1
const nVifBlock& vB; // some pre-collected data from VifStruct
int vCL; // internal copy of vif->cl