mirror of https://github.com/PCSX2/pcsx2.git
EE Rec/IOP Rec: Rewrite large portions
- Add fastmem - Add delay slot swapping - Add COP2 sync elision - Add block analysis and use analysis - Add GPR register caching and renaming
This commit is contained in:
parent
56501e0811
commit
1ccddb92d4
|
@ -145,6 +145,41 @@ namespace HostSys
|
|||
extern void UnmapSharedMemory(void* baseaddr, size_t size);
|
||||
}
|
||||
|
||||
class SharedMemoryMappingArea
|
||||
{
|
||||
public:
|
||||
static std::unique_ptr<SharedMemoryMappingArea> Create(size_t size);
|
||||
|
||||
~SharedMemoryMappingArea();
|
||||
|
||||
__fi size_t GetSize() const { return m_size; }
|
||||
__fi size_t GetNumPages() const { return m_num_pages; }
|
||||
|
||||
__fi u8* BasePointer() const { return m_base_ptr; }
|
||||
__fi u8* OffsetPointer(size_t offset) const { return m_base_ptr + offset; }
|
||||
__fi u8* PagePointer(size_t page) const { return m_base_ptr + __pagesize * page; }
|
||||
|
||||
u8* Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode);
|
||||
bool Unmap(void* map_base, size_t map_size);
|
||||
|
||||
private:
|
||||
SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages);
|
||||
|
||||
u8* m_base_ptr;
|
||||
size_t m_size;
|
||||
size_t m_num_pages;
|
||||
size_t m_num_mappings = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
using PlaceholderMap = std::map<size_t, size_t>;
|
||||
|
||||
PlaceholderMap::iterator FindPlaceholder(size_t page);
|
||||
|
||||
PlaceholderMap m_placeholder_ranges;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
// Safe version of Munmap -- NULLs the pointer variable immediately after free'ing it.
|
||||
#define SafeSysMunmap(ptr, size) \
|
||||
((void)(HostSys::Munmap(ptr, size), (ptr) = 0))
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include "fmt/core.h"
|
||||
|
||||
#include "common/Align.h"
|
||||
#include "common/PageFaultSource.h"
|
||||
#include "common/Assertions.h"
|
||||
#include "common/Console.h"
|
||||
|
@ -34,12 +35,26 @@
|
|||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#include <cerrno>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifndef __APPLE__
|
||||
#include <ucontext.h>
|
||||
#endif
|
||||
|
||||
extern void SignalExit(int sig);
|
||||
|
||||
static const uptr m_pagemask = getpagesize() - 1;
|
||||
|
||||
static struct sigaction s_old_sigsegv_action;
|
||||
#if defined(__APPLE__)
|
||||
static struct sigaction s_old_sigbus_action;
|
||||
#endif
|
||||
|
||||
// Linux implementation of SIGSEGV handler. Bind it using sigaction().
|
||||
static void SysPageFaultSignalFilter(int signal, siginfo_t* siginfo, void*)
|
||||
static void SysPageFaultSignalFilter(int signal, siginfo_t* siginfo, void* ctx)
|
||||
{
|
||||
// [TODO] : Add a thread ID filter to the Linux Signal handler here.
|
||||
// Rationale: On windows, the __try/__except model allows per-thread specific behavior
|
||||
|
@ -57,13 +72,20 @@ static void SysPageFaultSignalFilter(int signal, siginfo_t* siginfo, void*)
|
|||
// Note: Use of stdio functions isn't safe here. Avoid console logs,
|
||||
// assertions, file logs, or just about anything else useful.
|
||||
|
||||
#if defined(__APPLE__) && defined(__x86_64__)
|
||||
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext->__ss.__rip);
|
||||
#elif defined(__x86_64__)
|
||||
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_RIP]);
|
||||
#else
|
||||
void* const exception_pc = nullptr;
|
||||
#endif
|
||||
|
||||
// Note: This signal can be accessed by the EE or MTVU thread
|
||||
// Source_PageFault is a global variable with its own state information
|
||||
// so for now we lock this exception code unless someone can fix this better...
|
||||
std::unique_lock lock(PageFault_Mutex);
|
||||
|
||||
Source_PageFault->Dispatch(PageFaultInfo((uptr)siginfo->si_addr & ~m_pagemask));
|
||||
Source_PageFault->Dispatch(PageFaultInfo((uptr)exception_pc, (uptr)siginfo->si_addr & ~m_pagemask));
|
||||
|
||||
// resumes execution right where we left off (re-executes instruction that
|
||||
// caused the SIGSEGV).
|
||||
|
@ -89,11 +111,11 @@ void _platform_InstallSignalHandler()
|
|||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
sa.sa_sigaction = SysPageFaultSignalFilter;
|
||||
#ifdef __APPLE__
|
||||
#if defined(__APPLE__)
|
||||
// MacOS uses SIGBUS for memory permission violations
|
||||
sigaction(SIGBUS, &sa, NULL);
|
||||
sigaction(SIGBUS, &sa, &s_old_sigbus_action);
|
||||
#else
|
||||
sigaction(SIGSEGV, &sa, NULL);
|
||||
sigaction(SIGSEGV, &sa, &s_old_sigsegv_action);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -210,4 +232,56 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
|
|||
pxFailRel("Failed to unmap shared memory");
|
||||
}
|
||||
|
||||
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
|
||||
: m_base_ptr(base_ptr)
|
||||
, m_size(size)
|
||||
, m_num_pages(num_pages)
|
||||
{
|
||||
}
|
||||
|
||||
SharedMemoryMappingArea::~SharedMemoryMappingArea()
|
||||
{
|
||||
pxAssertRel(m_num_mappings == 0, "No mappings left");
|
||||
|
||||
if (munmap(m_base_ptr, m_size) != 0)
|
||||
pxFailRel("Failed to release shared memory area");
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
|
||||
{
|
||||
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
|
||||
|
||||
void* alloc = mmap(nullptr, size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
if (alloc == MAP_FAILED)
|
||||
return nullptr;
|
||||
|
||||
return std::unique_ptr<SharedMemoryMappingArea>(new SharedMemoryMappingArea(static_cast<u8*>(alloc), size, size / __pagesize));
|
||||
}
|
||||
|
||||
u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode)
|
||||
{
|
||||
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
|
||||
|
||||
const uint lnxmode = LinuxProt(mode);
|
||||
void* const ptr = mmap(map_base, map_size, lnxmode, MAP_SHARED | MAP_FIXED,
|
||||
static_cast<int>(reinterpret_cast<intptr_t>(file_handle)), static_cast<off_t>(file_offset));
|
||||
if (ptr == MAP_FAILED)
|
||||
return nullptr;
|
||||
|
||||
m_num_mappings++;
|
||||
return static_cast<u8*>(ptr);
|
||||
}
|
||||
|
||||
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
|
||||
{
|
||||
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
|
||||
|
||||
if (mmap(map_base, map_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == MAP_FAILED)
|
||||
return false;
|
||||
|
||||
m_num_mappings--;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -34,10 +34,12 @@
|
|||
|
||||
struct PageFaultInfo
|
||||
{
|
||||
uptr pc;
|
||||
uptr addr;
|
||||
|
||||
PageFaultInfo(uptr address)
|
||||
PageFaultInfo(uptr pc_, uptr address)
|
||||
{
|
||||
pc = pc_;
|
||||
addr = address;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -24,14 +24,8 @@
|
|||
#define NOMINMAX
|
||||
#endif
|
||||
|
||||
// Qt build requires Windows 10+, WX Windows 8.1+.
|
||||
#ifndef _WIN32_WINNT
|
||||
#ifdef PCSX2_CORE
|
||||
// We require Windows 10+.
|
||||
#define _WIN32_WINNT 0x0A00 // Windows 10
|
||||
#else
|
||||
#define _WIN32_WINNT 0x0603 // Windows 8.1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
#include <VersionHelpers.h>
|
||||
|
|
|
@ -24,16 +24,24 @@
|
|||
#include "common/AlignedMalloc.h"
|
||||
#include "fmt/core.h"
|
||||
|
||||
#include "fmt/format.h"
|
||||
|
||||
static long DoSysPageFaultExceptionFilter(EXCEPTION_POINTERS* eps)
|
||||
{
|
||||
if (eps->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION)
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
|
||||
#if defined(_M_AMD64)
|
||||
void* const exception_pc = reinterpret_cast<void*>(eps->ContextRecord->Rip);
|
||||
#else
|
||||
void* const exception_pc = nullptr;
|
||||
#endif
|
||||
|
||||
// Note: This exception can be accessed by the EE or MTVU thread
|
||||
// Source_PageFault is a global variable with its own state information
|
||||
// so for now we lock this exception code unless someone can fix this better...
|
||||
std::unique_lock lock(PageFault_Mutex);
|
||||
Source_PageFault->Dispatch(PageFaultInfo((uptr)eps->ExceptionRecord->ExceptionInformation[1]));
|
||||
Source_PageFault->Dispatch(PageFaultInfo((uptr)exception_pc, (uptr)eps->ExceptionRecord->ExceptionInformation[1]));
|
||||
return Source_PageFault->WasHandled() ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH;
|
||||
}
|
||||
|
||||
|
@ -148,4 +156,185 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
|
|||
pxFail("Failed to unmap shared memory");
|
||||
}
|
||||
|
||||
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
|
||||
: m_base_ptr(base_ptr)
|
||||
, m_size(size)
|
||||
, m_num_pages(num_pages)
|
||||
{
|
||||
m_placeholder_ranges.emplace(0, size);
|
||||
}
|
||||
|
||||
SharedMemoryMappingArea::~SharedMemoryMappingArea()
|
||||
{
|
||||
pxAssertRel(m_num_mappings == 0, "No mappings left");
|
||||
|
||||
// hopefully this will be okay, and we don't need to coalesce all the placeholders...
|
||||
if (!VirtualFreeEx(GetCurrentProcess(), m_base_ptr, 0, MEM_RELEASE))
|
||||
pxFailRel("Failed to release shared memory area");
|
||||
}
|
||||
|
||||
SharedMemoryMappingArea::PlaceholderMap::iterator SharedMemoryMappingArea::FindPlaceholder(size_t offset)
|
||||
{
|
||||
if (m_placeholder_ranges.empty())
|
||||
return m_placeholder_ranges.end();
|
||||
|
||||
// this will give us an iterator equal or after page
|
||||
auto it = m_placeholder_ranges.lower_bound(offset);
|
||||
if (it == m_placeholder_ranges.end())
|
||||
{
|
||||
// check the last page
|
||||
it = (++m_placeholder_ranges.rbegin()).base();
|
||||
}
|
||||
|
||||
// it's the one we found?
|
||||
if (offset >= it->first && offset < it->second)
|
||||
return it;
|
||||
|
||||
// otherwise try the one before
|
||||
if (it == m_placeholder_ranges.begin())
|
||||
return m_placeholder_ranges.end();
|
||||
|
||||
--it;
|
||||
if (offset >= it->first && offset < it->second)
|
||||
return it;
|
||||
else
|
||||
return m_placeholder_ranges.end();
|
||||
}
|
||||
|
||||
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
|
||||
{
|
||||
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
|
||||
|
||||
void* alloc = VirtualAlloc2(GetCurrentProcess(), nullptr, size, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, nullptr, 0);
|
||||
if (!alloc)
|
||||
return nullptr;
|
||||
|
||||
return std::unique_ptr<SharedMemoryMappingArea>(new SharedMemoryMappingArea(static_cast<u8*>(alloc), size, size / __pagesize));
|
||||
}
|
||||
|
||||
u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode)
|
||||
{
|
||||
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
|
||||
|
||||
const size_t map_offset = static_cast<u8*>(map_base) - m_base_ptr;
|
||||
pxAssert(Common::IsAlignedPow2(map_offset, __pagesize));
|
||||
pxAssert(Common::IsAlignedPow2(map_size, __pagesize));
|
||||
|
||||
// should be a placeholder. unless there's some other mapping we didn't free.
|
||||
PlaceholderMap::iterator phit = FindPlaceholder(map_offset);
|
||||
pxAssertMsg(phit != m_placeholder_ranges.end(), "Page we're mapping is a placeholder");
|
||||
pxAssertMsg(map_offset >= phit->first && map_offset < phit->second, "Page is in returned placeholder range");
|
||||
pxAssertMsg((map_offset + map_size) <= phit->second, "Page range is in returned placeholder range");
|
||||
|
||||
// do we need to split to the left? (i.e. is there a placeholder before this range)
|
||||
const size_t old_ph_end = phit->second;
|
||||
if (map_offset != phit->first)
|
||||
{
|
||||
phit->second = map_offset;
|
||||
|
||||
// split it (i.e. left..start and start..end are now separated)
|
||||
if (!VirtualFreeEx(GetCurrentProcess(), OffsetPointer(phit->first),
|
||||
(map_offset - phit->first), MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER))
|
||||
{
|
||||
pxFailRel("Failed to left split placeholder for map");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// start of the placeholder is getting used, we'll split it right below if there's anything left over
|
||||
m_placeholder_ranges.erase(phit);
|
||||
}
|
||||
|
||||
// do we need to split to the right? (i.e. is there a placeholder after this range)
|
||||
if ((map_offset + map_size) != old_ph_end)
|
||||
{
|
||||
// split out end..ph_end
|
||||
m_placeholder_ranges.emplace(map_offset + map_size, old_ph_end);
|
||||
|
||||
if (!VirtualFreeEx(GetCurrentProcess(), OffsetPointer(map_offset), map_size,
|
||||
MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER))
|
||||
{
|
||||
pxFailRel("Failed to right split placeholder for map");
|
||||
}
|
||||
}
|
||||
|
||||
// actually do the mapping, replacing the placeholder on the range
|
||||
if (!MapViewOfFile3(static_cast<HANDLE>(file_handle), GetCurrentProcess(),
|
||||
map_base, file_offset, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
|
||||
{
|
||||
Console.Error("(SharedMemoryMappingArea) MapViewOfFile3() failed: %u", GetLastError());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const DWORD prot = ConvertToWinApi(mode);
|
||||
if (prot != PAGE_READWRITE)
|
||||
{
|
||||
DWORD old_prot;
|
||||
if (!VirtualProtect(map_base, map_size, prot, &old_prot))
|
||||
pxFail("Failed to protect memory mapping");
|
||||
}
|
||||
|
||||
m_num_mappings++;
|
||||
return static_cast<u8*>(map_base);
|
||||
}
|
||||
|
||||
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
|
||||
{
|
||||
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
|
||||
|
||||
const size_t map_offset = static_cast<u8*>(map_base) - m_base_ptr;
|
||||
pxAssert(Common::IsAlignedPow2(map_offset, __pagesize));
|
||||
pxAssert(Common::IsAlignedPow2(map_size, __pagesize));
|
||||
|
||||
const size_t page = map_offset / __pagesize;
|
||||
|
||||
// unmap the specified range
|
||||
if (!UnmapViewOfFile2(GetCurrentProcess(), map_base, MEM_PRESERVE_PLACEHOLDER))
|
||||
{
|
||||
Console.Error("(SharedMemoryMappingArea) UnmapViewOfFile2() failed: %u", GetLastError());
|
||||
return false;
|
||||
}
|
||||
|
||||
// can we coalesce to the left?
|
||||
PlaceholderMap::iterator left_it = (map_offset > 0) ? FindPlaceholder(map_offset - 1) : m_placeholder_ranges.end();
|
||||
if (left_it != m_placeholder_ranges.end())
|
||||
{
|
||||
// the left placeholder should end at our start
|
||||
pxAssert(map_offset == left_it->second);
|
||||
left_it->second = map_offset + map_size;
|
||||
|
||||
// combine placeholders before and the range we're unmapping, i.e. to the left
|
||||
if (!VirtualFreeEx(GetCurrentProcess(), OffsetPointer(left_it->first),
|
||||
left_it->second - left_it->first, MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS))
|
||||
{
|
||||
pxFail("Failed to coalesce placeholders left for unmap");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// this is a new placeholder
|
||||
left_it = m_placeholder_ranges.emplace(map_offset, map_offset + map_size).first;
|
||||
}
|
||||
|
||||
// can we coalesce to the right?
|
||||
PlaceholderMap::iterator right_it = ((map_offset + map_size) < m_size) ? FindPlaceholder(map_offset + map_size) : m_placeholder_ranges.end();
|
||||
if (right_it != m_placeholder_ranges.end())
|
||||
{
|
||||
// should start at our end
|
||||
pxAssert(right_it->first == (map_offset + map_size));
|
||||
left_it->second = right_it->second;
|
||||
m_placeholder_ranges.erase(right_it);
|
||||
|
||||
// combine our placeholder and the next, i.e. to the right
|
||||
if (!VirtualFreeEx(GetCurrentProcess(), OffsetPointer(left_it->first),
|
||||
left_it->second - left_it->first, MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS))
|
||||
{
|
||||
pxFail("Failed to coalescae placeholders right for unmap");
|
||||
}
|
||||
}
|
||||
|
||||
m_num_mappings--;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -35,6 +35,7 @@ AdvancedSystemSettingsWidget::AdvancedSystemSettingsWidget(SettingsDialog* dialo
|
|||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeCache, "EmuCore/CPU/Recompiler", "EnableEECache", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeINTCSpinDetection, "EmuCore/Speedhacks", "IntcStat", true);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeWaitLoopDetection, "EmuCore/Speedhacks", "WaitLoop", true);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeFastmem, "EmuCore/CPU/Recompiler", "EnableFastmem", true);
|
||||
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0Recompiler, "EmuCore/CPU/Recompiler", "EnableVU0", true);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1Recompiler, "EmuCore/CPU/Recompiler", "EnableVU1", true);
|
||||
|
@ -60,6 +61,9 @@ AdvancedSystemSettingsWidget::AdvancedSystemSettingsWidget(SettingsDialog* dialo
|
|||
dialog->registerWidgetHelp(m_ui.eeINTCSpinDetection, tr("INTC Spin Detection"), tr("Checked"),
|
||||
tr("Huge speedup for some games, with almost no compatibility side effects."));
|
||||
|
||||
dialog->registerWidgetHelp(m_ui.eeFastmem, tr("Enable Fast Memory Access"), tr("Checked"),
|
||||
tr("Uses backpatching to avoid register flushing on every memory access."));
|
||||
|
||||
dialog->registerWidgetHelp(m_ui.vu0Recompiler, tr("Enable VU0 Recompiler"), tr("Checked"),
|
||||
tr("Enables VU0 Recompiler."));
|
||||
|
||||
|
|
|
@ -32,13 +32,6 @@
|
|||
<string>EmotionEngine (MIPS-IV)</string>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout_4">
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="eeRecompiler">
|
||||
<property name="text">
|
||||
<string>Enable Recompiler</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QCheckBox" name="eeWaitLoopDetection">
|
||||
<property name="text">
|
||||
|
@ -46,6 +39,20 @@
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1">
|
||||
<widget class="QCheckBox" name="eeINTCSpinDetection">
|
||||
<property name="text">
|
||||
<string>INTC Spin Detection</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="eeRecompiler">
|
||||
<property name="text">
|
||||
<string>Enable Recompiler</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<widget class="QCheckBox" name="eeCache">
|
||||
<property name="text">
|
||||
|
@ -53,10 +60,10 @@
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1">
|
||||
<widget class="QCheckBox" name="eeINTCSpinDetection">
|
||||
<item row="3" column="0">
|
||||
<widget class="QCheckBox" name="eeFastmem">
|
||||
<property name="text">
|
||||
<string>INTC Spin Detection</string>
|
||||
<string>Enable Fast Memory Access</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
|
@ -45,6 +45,7 @@ GameFixSettingsWidget::GameFixSettingsWidget(SettingsDialog* dialog, QWidget* pa
|
|||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VIF1StallHack, "EmuCore/Gamefixes", "VIF1StallHack", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VuAddSubHack, "EmuCore/Gamefixes", "VuAddSubHack", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.IbitHack, "EmuCore/Gamefixes", "IbitHack", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.FullVU0SyncHack, "EmuCore/Gamefixes", "FullVU0SyncHack", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VUSyncHack, "EmuCore/Gamefixes", "VUSyncHack", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VUOverflowHack, "EmuCore/Gamefixes", "VUOverflowHack", false);
|
||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.XgKickHack, "EmuCore/Gamefixes", "XgKickHack", false);
|
||||
|
|
|
@ -113,6 +113,13 @@
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="FullVU0SyncHack">
|
||||
<property name="text">
|
||||
<string>Full VU0 Synchronization (Correct But Slower)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="IbitHack">
|
||||
<property name="text">
|
||||
|
|
|
@ -1698,12 +1698,9 @@ if(WIN32)
|
|||
strmiids.lib
|
||||
opengl32.lib
|
||||
comsuppw.lib
|
||||
)
|
||||
if(PCSX2_CORE)
|
||||
target_link_libraries(PCSX2_FLAGS INTERFACE
|
||||
OneCore.lib
|
||||
)
|
||||
else()
|
||||
if(NOT PCSX2_CORE)
|
||||
target_link_libraries(PCSX2_FLAGS INTERFACE
|
||||
pthreads4w
|
||||
)
|
||||
|
|
|
@ -46,6 +46,7 @@ enum GamefixId
|
|||
Fix_VUOverflow,
|
||||
Fix_XGKick,
|
||||
Fix_BlitInternalFPS,
|
||||
Fix_FullVU0Sync,
|
||||
|
||||
GamefixId_COUNT
|
||||
};
|
||||
|
@ -382,6 +383,8 @@ struct Pcsx2Config
|
|||
|
||||
bool
|
||||
EnableEECache : 1;
|
||||
bool
|
||||
EnableFastmem : 1;
|
||||
BITFIELD_END
|
||||
|
||||
RecompilerOptions();
|
||||
|
@ -845,7 +848,8 @@ struct Pcsx2Config
|
|||
VUSyncHack : 1, // Makes microVU run behind the EE to avoid VU register reading/writing sync issues. Useful for M-Bit games
|
||||
VUOverflowHack : 1, // Tries to simulate overflow flag checks (not really possible on x86 without soft floats)
|
||||
XgKickHack : 1, // Erementar Gerad, adds more delay to VU XGkick instructions. Corrects the color of some graphics, but breaks Tri-ace games and others.
|
||||
BlitInternalFPSHack : 1; // Disables privileged register write-based FPS detection.
|
||||
BlitInternalFPSHack : 1, // Disables privileged register write-based FPS detection.
|
||||
FullVU0SyncHack : 1; // Forces tight VU0 sync on every COP2 instruction.
|
||||
BITFIELD_END
|
||||
|
||||
GamefixOptions();
|
||||
|
@ -1146,6 +1150,7 @@ namespace EmuFolders
|
|||
#define CHECK_EEREC (EmuConfig.Cpu.Recompiler.EnableEE)
|
||||
#define CHECK_CACHE (EmuConfig.Cpu.Recompiler.EnableEECache)
|
||||
#define CHECK_IOPREC (EmuConfig.Cpu.Recompiler.EnableIOP)
|
||||
#define CHECK_FASTMEM (EmuConfig.Cpu.Recompiler.EnableEE && EmuConfig.Cpu.Recompiler.EnableFastmem)
|
||||
|
||||
//------------ SPECIAL GAME FIXES!!! ---------------
|
||||
#define CHECK_VUADDSUBHACK (EmuConfig.Gamefixes.VuAddSubHack) // Special Fix for Tri-ace games, they use an encryption algorithm that requires VU addi opcode to be bit-accurate.
|
||||
|
@ -1161,6 +1166,7 @@ namespace EmuFolders
|
|||
#define CHECK_VIF1STALLHACK (EmuConfig.Gamefixes.VIF1StallHack) // Like above, processes FIFO data before the stall is allowed (to make sure data goes over).
|
||||
#define CHECK_GIFFIFOHACK (EmuConfig.Gamefixes.GIFFIFOHack) // Enabled the GIF FIFO (more correct but slower)
|
||||
#define CHECK_VUOVERFLOWHACK (EmuConfig.Gamefixes.VUOverflowHack) // Special Fix for Superman Returns, they check for overflows on PS2 floats which we can't do without soft floats.
|
||||
#define CHECK_FULLVU0SYNCHACK (EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
|
||||
//------------ Advanced Options!!! ---------------
|
||||
#define CHECK_VU_OVERFLOW (EmuConfig.Cpu.Recompiler.vuOverflow)
|
||||
|
|
|
@ -298,8 +298,8 @@ void iDumpBlock( int startpc, u8 * ptr )
|
|||
|
||||
// write the instruction info
|
||||
|
||||
std::fprintf(eff, "\n\nlive0 - %x, live2 - %x, lastuse - %x\nxmm - %x, used - %x\n",
|
||||
EEINST_LIVE0, EEINST_LIVE2, EEINST_LASTUSE, EEINST_XMM, EEINST_USED
|
||||
std::fprintf(eff, "\n\nlive0 - %x, lastuse - %x\nxmm - %x, used - %x\n",
|
||||
EEINST_LIVE, EEINST_LASTUSE, EEINST_XMM, EEINST_USED
|
||||
);
|
||||
|
||||
memzero(used);
|
||||
|
|
|
@ -3801,6 +3801,8 @@ void FullscreenUI::DrawAdvancedSettingsPage()
|
|||
"EmuCore/Speedhacks", "IntcStat", true);
|
||||
DrawToggleSetting(bsi, "Enable Wait Loop Detection", "Moderate speedup for some games, with no known side effects.",
|
||||
"EmuCore/Speedhacks", "WaitLoop", true);
|
||||
DrawToggleSetting(bsi, "Enable Fast Memory Access", "Uses backpatching to avoid register flushing on every memory access.",
|
||||
"EmuCore/CPU/Recompiler", "EnableFastmem", true);
|
||||
DrawToggleSetting(bsi, "Enable VU0 Recompiler (Micro Mode)",
|
||||
"New Vector Unit recompiler with much improved compatibility. Recommended.", "EmuCore/CPU/Recompiler", "EnableVU0", true);
|
||||
DrawToggleSetting(bsi, "Enable VU1 Recompiler", "New Vector Unit recompiler with much improved compatibility. Recommended.",
|
||||
|
@ -3857,6 +3859,8 @@ void FullscreenUI::DrawGameFixesSettingsPage()
|
|||
"EmuCore/Gamefixes", "VuAddSubHack", false);
|
||||
DrawToggleSetting(bsi, "VU I bit Hack avoid constant recompilation in some games",
|
||||
"Scarface The World Is Yours, Crash Tag Team Racing.", "EmuCore/Gamefixes", "IbitHack", false);
|
||||
DrawToggleSetting(
|
||||
bsi, "Full VU0 Synchronization", "Forces tight VU0 sync on every COP2 instruction.", "EmuCore/Gamefixes", "FullVU0SyncHack", false);
|
||||
DrawToggleSetting(bsi, "VU Sync (Run behind)", "To avoid sync problems when reading or writing VU registers.", "EmuCore/Gamefixes",
|
||||
"VUSyncHack", false);
|
||||
DrawToggleSetting(
|
||||
|
|
|
@ -404,6 +404,10 @@ void CommonHost::UpdateLogging(SettingsInterface& si)
|
|||
DevConWriterEnabled = any_logging_sinks && (IsDevBuild || si.GetBoolValue("Logging", "EnableVerbose", false));
|
||||
SysConsole.eeConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableEEConsole", false);
|
||||
SysConsole.iopConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableIOPConsole", false);
|
||||
SysTrace.IOP.R3000A.Enabled = true;
|
||||
SysTrace.IOP.COP2.Enabled = true;
|
||||
SysTrace.IOP.Memory.Enabled = true;
|
||||
SysTrace.SIF.Enabled = true;
|
||||
|
||||
// Input Recording Logs
|
||||
SysConsole.recordingConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableInputRecordingLogs", true);
|
||||
|
|
|
@ -963,6 +963,7 @@ void mmap_MarkCountedRamPage( u32 paddr )
|
|||
|
||||
m_PageProtectInfo[rampage].Mode = ProtMode_Write;
|
||||
HostSys::MemProtect( &eeMem->Main[rampage<<__pageshift], __pagesize, PageAccess_ReadOnly() );
|
||||
vtlb_UpdateFastmemProtection(rampage << __pageshift, __pagesize, PageAccess_ReadOnly());
|
||||
}
|
||||
|
||||
// offset - offset of address relative to psM.
|
||||
|
@ -980,6 +981,7 @@ static __fi void mmap_ClearCpuBlock( uint offset )
|
|||
"Attempted to clear a block that is already under manual protection." );
|
||||
|
||||
HostSys::MemProtect( &eeMem->Main[rampage<<__pageshift], __pagesize, PageAccess_ReadWrite() );
|
||||
vtlb_UpdateFastmemProtection(rampage << __pageshift, __pagesize, PageAccess_ReadWrite());
|
||||
m_PageProtectInfo[rampage].Mode = ProtMode_Manual;
|
||||
Cpu->Clear( m_PageProtectInfo[rampage].ReverseRamMap, __pagesize );
|
||||
}
|
||||
|
@ -988,13 +990,38 @@ void mmap_PageFaultHandler::OnPageFaultEvent( const PageFaultInfo& info, bool& h
|
|||
{
|
||||
pxAssert( eeMem );
|
||||
|
||||
u32 vaddr;
|
||||
if (CHECK_FASTMEM && vtlb_GetGuestAddress(info.addr, &vaddr))
|
||||
{
|
||||
// this was inside the fastmem area. check if it's a code page
|
||||
// fprintf(stderr, "Fault on fastmem %p vaddr %08X\n", info.addr, vaddr);
|
||||
|
||||
uptr ptr = (uptr)PSM(vaddr);
|
||||
uptr offset = (ptr - (uptr)eeMem->Main);
|
||||
if (ptr && m_PageProtectInfo[offset >> __pageshift].Mode == ProtMode_Write)
|
||||
{
|
||||
// fprintf(stderr, "Not backpatching code write at %08X\n", vaddr);
|
||||
mmap_ClearCpuBlock(offset);
|
||||
handled = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// fprintf(stderr, "Trying backpatching vaddr %08X\n", vaddr);
|
||||
if (vtlb_BackpatchLoadStore(info.pc, info.addr))
|
||||
handled = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// get bad virtual address
|
||||
uptr offset = info.addr - (uptr)eeMem->Main;
|
||||
if( offset >= Ps2MemSize::MainRam ) return;
|
||||
if (offset >= Ps2MemSize::MainRam)
|
||||
return;
|
||||
|
||||
mmap_ClearCpuBlock(offset);
|
||||
handled = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Clears all block tracking statuses, manual protection flags, and write protection.
|
||||
// This does not clear any recompiler blocks. It is assumed (and necessary) for the caller
|
||||
|
@ -1005,4 +1032,5 @@ void mmap_ResetBlockTracking()
|
|||
//DbgCon.WriteLn( "vtlb/mmap: Block Tracking reset..." );
|
||||
memzero( m_PageProtectInfo );
|
||||
if (eeMem) HostSys::MemProtect( eeMem->Main, Ps2MemSize::MainRam, PageAccess_ReadWrite() );
|
||||
vtlb_UpdateFastmemProtection(0, Ps2MemSize::MainRam, PageAccess_ReadWrite());
|
||||
}
|
||||
|
|
|
@ -155,6 +155,7 @@ Pcsx2Config::RecompilerOptions::RecompilerOptions()
|
|||
EnableIOP = true;
|
||||
EnableVU0 = true;
|
||||
EnableVU1 = true;
|
||||
EnableFastmem = true;
|
||||
|
||||
// vu and fpu clamping default to standard overflow.
|
||||
vuOverflow = true;
|
||||
|
@ -211,6 +212,7 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap)
|
|||
SettingsWrapBitBool(EnableEECache);
|
||||
SettingsWrapBitBool(EnableVU0);
|
||||
SettingsWrapBitBool(EnableVU1);
|
||||
SettingsWrapBitBool(EnableFastmem);
|
||||
|
||||
SettingsWrapBitBool(vuOverflow);
|
||||
SettingsWrapBitBool(vuExtraOverflow);
|
||||
|
@ -864,7 +866,8 @@ static const char* const tbl_GamefixNames[] =
|
|||
"VUSync",
|
||||
"VUOverflow",
|
||||
"XGKick",
|
||||
"BlitInternalFPS"
|
||||
"BlitInternalFPS",
|
||||
"FullVU0Sync",
|
||||
};
|
||||
|
||||
const char* EnumToString(GamefixId id)
|
||||
|
@ -907,6 +910,7 @@ void Pcsx2Config::GamefixOptions::Set(GamefixId id, bool enabled)
|
|||
case Fix_VUSync: VUSyncHack = enabled; break;
|
||||
case Fix_VUOverflow: VUOverflowHack = enabled; break;
|
||||
case Fix_BlitInternalFPS: BlitInternalFPSHack = enabled; break;
|
||||
case Fix_FullVU0Sync: FullVU0SyncHack = enabled; break;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
}
|
||||
|
@ -934,6 +938,7 @@ bool Pcsx2Config::GamefixOptions::Get(GamefixId id) const
|
|||
case Fix_VUSync: return VUSyncHack;
|
||||
case Fix_VUOverflow: return VUOverflowHack;
|
||||
case Fix_BlitInternalFPS: return BlitInternalFPSHack;
|
||||
case Fix_FullVU0Sync: return FullVU0SyncHack;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
return false; // unreachable, but we still need to suppress warnings >_<
|
||||
|
@ -961,6 +966,7 @@ void Pcsx2Config::GamefixOptions::LoadSave(SettingsWrapper& wrap)
|
|||
SettingsWrapBitBool(VUSyncHack);
|
||||
SettingsWrapBitBool(VUOverflowHack);
|
||||
SettingsWrapBitBool(BlitInternalFPSHack);
|
||||
SettingsWrapBitBool(FullVU0SyncHack);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -46,12 +46,6 @@ namespace Exception
|
|||
public:
|
||||
explicit CancelInstruction() { }
|
||||
};
|
||||
|
||||
class FailedToAllocateRegister
|
||||
{
|
||||
public:
|
||||
explicit FailedToAllocateRegister() { }
|
||||
};
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
|
|
@ -109,12 +109,18 @@ void RecompiledCodeReserve::Reset()
|
|||
|
||||
void RecompiledCodeReserve::AllowModification()
|
||||
{
|
||||
// Apple Silicon enforces write protection in hardware.
|
||||
#if !defined(__APPLE__) || !defined(_M_ARM64)
|
||||
HostSys::MemProtect(m_baseptr, m_size, PageAccess_Any());
|
||||
#endif
|
||||
}
|
||||
|
||||
void RecompiledCodeReserve::ForbidModification()
|
||||
{
|
||||
// Apple Silicon enforces write protection in hardware.
|
||||
#if !defined(__APPLE__) || !defined(_M_ARM64)
|
||||
HostSys::MemProtect(m_baseptr, m_size, PageProtectionMode().Read().Execute());
|
||||
#endif
|
||||
}
|
||||
|
||||
// Sets the abbreviated name used by the profiler. Name should be under 10 characters long.
|
||||
|
|
|
@ -113,6 +113,10 @@ public:
|
|||
|
||||
VirtualMemoryBumpAllocator& BumpAllocator() { return m_bumpAllocator; }
|
||||
|
||||
const eeMemoryReserve& EEMemory() const { return m_ee; }
|
||||
const iopMemoryReserve& IOPMemory() const { return m_iop; }
|
||||
const vuMemoryReserve& VUMemory() const { return m_vu; }
|
||||
|
||||
bool Allocate();
|
||||
void Reset();
|
||||
void Release();
|
||||
|
|
|
@ -1475,6 +1475,7 @@ void VMManager::Execute()
|
|||
// We need to switch the cpus out, and reset the new ones if so.
|
||||
s_cpu_provider_pack->ApplyConfig();
|
||||
SysClearExecutionCache();
|
||||
vtlb_ResetFastmem();
|
||||
}
|
||||
|
||||
// Execute until we're asked to stop.
|
||||
|
@ -1553,6 +1554,9 @@ void VMManager::CheckForCPUConfigChanges(const Pcsx2Config& old_config)
|
|||
SysClearExecutionCache();
|
||||
memBindConditionalHandlers();
|
||||
|
||||
if (EmuConfig.Cpu.Recompiler.EnableFastmem != old_config.Cpu.Recompiler.EnableFastmem)
|
||||
vtlb_ResetFastmem();
|
||||
|
||||
// did we toggle recompilers?
|
||||
if (EmuConfig.Cpu.CpusChanged(old_config.Cpu))
|
||||
{
|
||||
|
|
|
@ -71,7 +71,7 @@
|
|||
<Link>
|
||||
<LargeAddressAware>Yes</LargeAddressAware>
|
||||
<AdditionalDependencies>comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;rpcrt4.lib;iphlpapi.lib;dsound.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>dxguid.lib;dinput8.lib;hid.lib;PowrProf.lib;d3dcompiler.lib;d3d11.lib;dxgi.lib;strmiids.lib;opengl32.lib;comsuppw.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>dxguid.lib;dinput8.lib;hid.lib;PowrProf.lib;d3dcompiler.lib;d3d11.lib;dxgi.lib;strmiids.lib;opengl32.lib;comsuppw.lib;OneCore.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
|
|
462
pcsx2/vtlb.cpp
462
pcsx2/vtlb.cpp
|
@ -42,6 +42,13 @@
|
|||
|
||||
#include "fmt/core.h"
|
||||
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
|
||||
#define FASTMEM_LOG(...)
|
||||
//#define FASTMEM_LOG(...) Console.WriteLn(__VA_ARGS__)
|
||||
|
||||
using namespace R5900;
|
||||
using namespace vtlb_private;
|
||||
|
||||
|
@ -60,6 +67,36 @@ static vtlbHandler UnmappedVirtHandler1;
|
|||
static vtlbHandler UnmappedPhyHandler0;
|
||||
static vtlbHandler UnmappedPhyHandler1;
|
||||
|
||||
struct FastmemVirtualMapping
|
||||
{
|
||||
u32 offset;
|
||||
u32 size;
|
||||
};
|
||||
|
||||
struct LoadstoreBackpatchInfo
|
||||
{
|
||||
u32 guest_pc;
|
||||
u32 gpr_bitmask;
|
||||
u32 fpr_bitmask;
|
||||
u8 code_size;
|
||||
u8 address_register;
|
||||
u8 data_register;
|
||||
u8 size_in_bits;
|
||||
bool is_signed;
|
||||
bool is_load;
|
||||
bool is_fpr;
|
||||
};
|
||||
|
||||
static constexpr size_t FASTMEM_AREA_SIZE = 0x100000000ULL;
|
||||
static constexpr u32 FASTMEM_PAGE_COUNT = FASTMEM_AREA_SIZE / VTLB_PAGE_SIZE;
|
||||
static constexpr u32 NO_FASTMEM_MAPPING = 0xFFFFFFFFu;
|
||||
|
||||
static std::unique_ptr<SharedMemoryMappingArea> s_fastmem_area;
|
||||
static std::vector<u32> s_fastmem_virtual_mapping; // maps vaddr -> mainmem offset
|
||||
static std::unordered_multimap<u32, u32> s_fastmem_physical_mapping; // maps mainmem offset -> vaddr
|
||||
static std::unordered_map<uptr, LoadstoreBackpatchInfo> s_fastmem_backpatch_info;
|
||||
static std::unordered_set<u32> s_fastmem_faulting_pcs;
|
||||
|
||||
vtlb_private::VTLBPhysical vtlb_private::VTLBPhysical::fromPointer(sptr ptr) {
|
||||
pxAssertMsg(ptr >= 0, "Address too high");
|
||||
return VTLBPhysical(ptr);
|
||||
|
@ -659,6 +696,341 @@ __fi u32 vtlb_V2P(u32 vaddr)
|
|||
return paddr;
|
||||
}
|
||||
|
||||
static constexpr bool vtlb_MismatchedHostPageSize()
|
||||
{
|
||||
return (__pagesize != VTLB_PAGE_SIZE);
|
||||
}
|
||||
|
||||
static bool vtlb_IsHostAligned(u32 paddr)
|
||||
{
|
||||
if constexpr (!vtlb_MismatchedHostPageSize())
|
||||
return true;
|
||||
|
||||
return ((paddr & __pagemask) == 0);
|
||||
}
|
||||
|
||||
static u32 vtlb_HostPage(u32 page)
|
||||
{
|
||||
if constexpr (!vtlb_MismatchedHostPageSize())
|
||||
return page;
|
||||
|
||||
return page >> (__pageshift - VTLB_PAGE_BITS);
|
||||
}
|
||||
|
||||
static u32 vtlb_HostAlignOffset(u32 offset)
|
||||
{
|
||||
if constexpr (!vtlb_MismatchedHostPageSize())
|
||||
return offset;
|
||||
|
||||
return offset & ~__pagemask;
|
||||
}
|
||||
|
||||
static bool vtlb_IsHostCoalesced(u32 page)
|
||||
{
|
||||
if constexpr (__pagesize == VTLB_PAGE_SIZE)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
static constexpr u32 shift = __pageshift - VTLB_PAGE_BITS;
|
||||
static constexpr u32 count = (1u << shift);
|
||||
static constexpr u32 mask = count - 1;
|
||||
|
||||
const u32 base = page & ~mask;
|
||||
const u32 base_offset = s_fastmem_virtual_mapping[base];
|
||||
if ((base_offset & __pagemask) != 0)
|
||||
return false;
|
||||
|
||||
for (u32 i = 0, expected_offset = base_offset; i < count; i++, expected_offset += VTLB_PAGE_SIZE)
|
||||
{
|
||||
if (s_fastmem_virtual_mapping[base + i] != expected_offset)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool vtlb_GetMainMemoryOffsetFromPtr(uptr ptr, u32* mainmem_offset, u32* mainmem_size, PageProtectionMode* prot)
|
||||
{
|
||||
const uptr page_end = ptr + VTLB_PAGE_SIZE;
|
||||
SysMainMemory& vmmem = GetVmMemory();
|
||||
|
||||
// EE memory and ROMs.
|
||||
if (ptr >= (uptr)eeMem->Main && page_end <= (uptr)eeMem->ZeroRead)
|
||||
{
|
||||
const u32 eemem_offset = static_cast<u32>(ptr - (uptr)eeMem->Main);
|
||||
const bool writeable = ((eemem_offset < Ps2MemSize::MainRam) ? (mmap_GetRamPageInfo(eemem_offset) != ProtMode_Write) : true);
|
||||
*mainmem_offset = (eemem_offset + HostMemoryMap::EEmemOffset);
|
||||
*mainmem_size = (offsetof(EEVM_MemoryAllocMess, ZeroRead) - eemem_offset);
|
||||
*prot = PageProtectionMode().Read().Write(writeable);
|
||||
return true;
|
||||
}
|
||||
|
||||
// IOP memory.
|
||||
if (ptr >= (uptr)iopMem->Main && page_end <= (uptr)iopMem->P)
|
||||
{
|
||||
const u32 iopmem_offset = static_cast<u32>(ptr - (uptr)iopMem->Main);
|
||||
*mainmem_offset = iopmem_offset + HostMemoryMap::IOPmemOffset;
|
||||
*mainmem_size = (offsetof(IopVM_MemoryAllocMess, P) - iopmem_offset);
|
||||
*prot = PageProtectionMode().Read().Write();
|
||||
return true;
|
||||
}
|
||||
|
||||
// VU memory - this includes both data and code for VU0/VU1.
|
||||
// Practically speaking, this is only data, because the code goes through a handler.
|
||||
if (ptr >= (uptr)vmmem.VUMemory().GetPtr() && page_end <= (uptr)vmmem.VUMemory().GetPtrEnd())
|
||||
{
|
||||
const u32 vumem_offset = static_cast<u32>(ptr - (uptr)vmmem.VUMemory().GetPtr());
|
||||
*mainmem_offset = vumem_offset + HostMemoryMap::VUmemOffset;
|
||||
*mainmem_size = vmmem.VUMemory().GetSize() - vumem_offset;
|
||||
*prot = PageProtectionMode().Read().Write();
|
||||
return true;
|
||||
}
|
||||
|
||||
// We end up with some unknown mappings here; currently the IOP memory, instead of being physically mapped
|
||||
// as 2MB, ends up being mapped as 8MB. But this shouldn't be virtual mapped anyway, so fallback to slowmem
|
||||
// in such cases.
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool vtlb_GetMainMemoryOffset(u32 paddr, u32* mainmem_offset, u32* mainmem_size, PageProtectionMode* prot)
|
||||
{
|
||||
if (paddr >= VTLB_PMAP_SZ)
|
||||
return false;
|
||||
|
||||
// Handlers aren't in our shared memory, obviously.
|
||||
const VTLBPhysical& vm = vtlbdata.pmap[paddr >> VTLB_PAGE_BITS];
|
||||
if (vm.isHandler())
|
||||
return false;
|
||||
|
||||
return vtlb_GetMainMemoryOffsetFromPtr(vm.raw(), mainmem_offset, mainmem_size, prot);
|
||||
}
|
||||
|
||||
static void vtlb_CreateFastmemMapping(u32 vaddr, u32 mainmem_offset, const PageProtectionMode& mode)
|
||||
{
|
||||
FASTMEM_LOG("Create fastmem mapping @ vaddr %08X mainmem %08X", vaddr, mainmem_offset);
|
||||
|
||||
const u32 page = vaddr / VTLB_PAGE_SIZE;
|
||||
|
||||
if (s_fastmem_virtual_mapping[page] == mainmem_offset)
|
||||
{
|
||||
// current mapping is fine
|
||||
return;
|
||||
}
|
||||
|
||||
if (s_fastmem_virtual_mapping[page] != NO_FASTMEM_MAPPING)
|
||||
{
|
||||
// current mapping needs to be removed
|
||||
const bool was_coalesced = vtlb_IsHostCoalesced(page);
|
||||
|
||||
s_fastmem_virtual_mapping[page] = NO_FASTMEM_MAPPING;
|
||||
if (was_coalesced && !s_fastmem_area->Unmap(s_fastmem_area->PagePointer(vtlb_HostPage(page)), __pagesize))
|
||||
Console.Error("Failed to unmap vaddr %08X", vaddr);
|
||||
|
||||
// remove reverse mapping
|
||||
auto range = s_fastmem_physical_mapping.equal_range(mainmem_offset);
|
||||
for (auto it = range.first; it != range.second; )
|
||||
{
|
||||
auto this_it = it++;
|
||||
if (this_it->second == vaddr)
|
||||
s_fastmem_physical_mapping.erase(this_it);
|
||||
}
|
||||
}
|
||||
|
||||
s_fastmem_virtual_mapping[page] = mainmem_offset;
|
||||
if (vtlb_IsHostCoalesced(page))
|
||||
{
|
||||
const u32 host_page = vtlb_HostPage(page);
|
||||
const u32 host_offset = vtlb_HostAlignOffset(mainmem_offset);
|
||||
|
||||
if (!s_fastmem_area->Map(GetVmMemory().MainMemory()->GetFileHandle(), host_offset,
|
||||
s_fastmem_area->PagePointer(host_page), __pagesize, mode))
|
||||
{
|
||||
Console.Error("Failed to map vaddr %08X to mainmem offset %08X", vtlb_HostAlignOffset(vaddr), host_offset);
|
||||
s_fastmem_virtual_mapping[page] = NO_FASTMEM_MAPPING;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
s_fastmem_physical_mapping.emplace(mainmem_offset, vaddr);
|
||||
}
|
||||
|
||||
static void vtlb_RemoveFastmemMapping(u32 vaddr)
|
||||
{
|
||||
const u32 page = vaddr / VTLB_PAGE_SIZE;
|
||||
if (s_fastmem_virtual_mapping[page] == NO_FASTMEM_MAPPING)
|
||||
return;
|
||||
|
||||
const u32 mainmem_offset = s_fastmem_virtual_mapping[page];
|
||||
const bool was_coalesced = vtlb_IsHostCoalesced(page);
|
||||
FASTMEM_LOG("Remove fastmem mapping @ vaddr %08X mainmem %08X", vaddr, mainmem_offset);
|
||||
s_fastmem_virtual_mapping[page] = NO_FASTMEM_MAPPING;
|
||||
|
||||
if (was_coalesced && !s_fastmem_area->Unmap(s_fastmem_area->PagePointer(vtlb_HostPage(page)), __pagesize))
|
||||
Console.Error("Failed to unmap vaddr %08X", vtlb_HostAlignOffset(vaddr));
|
||||
|
||||
// remove from reverse map
|
||||
auto range = s_fastmem_physical_mapping.equal_range(mainmem_offset);
|
||||
for (auto it = range.first; it != range.second;)
|
||||
{
|
||||
auto this_it = it++;
|
||||
if (this_it->second == vaddr)
|
||||
s_fastmem_physical_mapping.erase(this_it);
|
||||
}
|
||||
}
|
||||
|
||||
static void vtlb_RemoveFastmemMappings(u32 vaddr, u32 size)
|
||||
{
|
||||
pxAssert((vaddr & VTLB_PAGE_MASK) == 0);
|
||||
pxAssert(size > 0 && (size & VTLB_PAGE_MASK) == 0);
|
||||
|
||||
const u32 num_pages = size / VTLB_PAGE_SIZE;
|
||||
for (u32 i = 0; i < num_pages; i++, vaddr += VTLB_PAGE_SIZE)
|
||||
vtlb_RemoveFastmemMapping(vaddr);
|
||||
}
|
||||
|
||||
static void vtlb_RemoveFastmemMappings()
|
||||
{
|
||||
if (s_fastmem_virtual_mapping.empty())
|
||||
{
|
||||
// not initialized yet
|
||||
return;
|
||||
}
|
||||
|
||||
for (u32 page = 0; page < FASTMEM_PAGE_COUNT; page++)
|
||||
{
|
||||
if (s_fastmem_virtual_mapping[page] == NO_FASTMEM_MAPPING)
|
||||
continue;
|
||||
|
||||
s_fastmem_virtual_mapping[page] = NO_FASTMEM_MAPPING;
|
||||
|
||||
if (!vtlb_IsHostAligned(page << VTLB_PAGE_BITS))
|
||||
continue;
|
||||
|
||||
if (!s_fastmem_area->Unmap(s_fastmem_area->PagePointer(vtlb_HostPage(page)), __pagesize))
|
||||
Console.Error("Failed to unmap vaddr %08X", page * __pagesize);
|
||||
}
|
||||
|
||||
s_fastmem_physical_mapping.clear();
|
||||
}
|
||||
|
||||
bool vtlb_ResolveFastmemMapping(uptr* addr)
|
||||
{
|
||||
uptr uaddr = *addr;
|
||||
uptr fastmem_start = (uptr)vtlbdata.fastmem_base;
|
||||
uptr fastmem_end = fastmem_start + 0xFFFFFFFFu;
|
||||
if (uaddr < fastmem_start || uaddr > fastmem_end)
|
||||
return false;
|
||||
|
||||
const u32 vaddr = static_cast<u32>(uaddr - fastmem_start);
|
||||
FASTMEM_LOG("Trying to resolve %p (vaddr %08X)", (void*)uaddr, vaddr);
|
||||
|
||||
const u32 vpage = vaddr / VTLB_PAGE_SIZE;
|
||||
if (s_fastmem_virtual_mapping[vpage] == NO_FASTMEM_MAPPING)
|
||||
{
|
||||
FASTMEM_LOG("%08X is not virtual mapped", vaddr);
|
||||
return false;
|
||||
}
|
||||
|
||||
const u32 mainmem_offset = s_fastmem_virtual_mapping[vpage] + (vaddr & VTLB_PAGE_MASK);
|
||||
FASTMEM_LOG("Resolved %p (vaddr %08X) to mainmem offset %08X", uaddr, vaddr, mainmem_offset);
|
||||
*addr = ((uptr)GetVmMemory().MainMemory()->GetBase()) + mainmem_offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool vtlb_GetGuestAddress(uptr host_addr, u32* guest_addr)
|
||||
{
|
||||
uptr fastmem_start = (uptr)vtlbdata.fastmem_base;
|
||||
uptr fastmem_end = fastmem_start + 0xFFFFFFFFu;
|
||||
if (host_addr < fastmem_start || host_addr > fastmem_end)
|
||||
return false;
|
||||
|
||||
*guest_addr = static_cast<u32>(host_addr - fastmem_start);
|
||||
return true;
|
||||
}
|
||||
|
||||
void vtlb_UpdateFastmemProtection(u32 paddr, u32 size, const PageProtectionMode& prot)
|
||||
{
|
||||
if (!CHECK_FASTMEM)
|
||||
return;
|
||||
|
||||
pxAssert((paddr & VTLB_PAGE_MASK) == 0);
|
||||
pxAssert(size > 0 && (size & VTLB_PAGE_MASK) == 0);
|
||||
|
||||
u32 mainmem_start, mainmem_size;
|
||||
PageProtectionMode old_prot;
|
||||
if (!vtlb_GetMainMemoryOffset(paddr, &mainmem_start, &mainmem_size, &old_prot))
|
||||
return;
|
||||
|
||||
FASTMEM_LOG("UpdateFastmemProtection %08X mmoffset %08X %08X", paddr, mainmem_start, size);
|
||||
|
||||
u32 current_mainmem = mainmem_start;
|
||||
const u32 num_pages = std::min(size, mainmem_size) / VTLB_PAGE_SIZE;
|
||||
for (u32 i = 0; i < num_pages; i++, current_mainmem += VTLB_PAGE_SIZE)
|
||||
{
|
||||
// update virtual mapping mapping
|
||||
auto range = s_fastmem_physical_mapping.equal_range(current_mainmem);
|
||||
for (auto it = range.first; it != range.second; ++it)
|
||||
{
|
||||
FASTMEM_LOG(" valias %08X (size %u)", it->second, VTLB_PAGE_SIZE);
|
||||
|
||||
if (vtlb_IsHostAligned(it->second))
|
||||
HostSys::MemProtect(s_fastmem_area->OffsetPointer(it->second), __pagesize, prot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vtlb_ClearLoadStoreInfo()
|
||||
{
|
||||
s_fastmem_backpatch_info.clear();
|
||||
s_fastmem_faulting_pcs.clear();
|
||||
}
|
||||
|
||||
void vtlb_AddLoadStoreInfo(uptr code_address, u32 code_size, u32 guest_pc, u32 gpr_bitmask, u32 fpr_bitmask, u8 address_register, u8 data_register, u8 size_in_bits, bool is_signed, bool is_load, bool is_fpr)
|
||||
{
|
||||
pxAssert(code_size < std::numeric_limits<u8>::max());
|
||||
|
||||
auto iter = s_fastmem_backpatch_info.find(code_address);
|
||||
if (iter != s_fastmem_backpatch_info.end())
|
||||
s_fastmem_backpatch_info.erase(iter);
|
||||
|
||||
LoadstoreBackpatchInfo info{guest_pc, gpr_bitmask, fpr_bitmask, static_cast<u8>(code_size), address_register, data_register, size_in_bits, is_signed, is_load, is_fpr};
|
||||
s_fastmem_backpatch_info.emplace(code_address, info);
|
||||
}
|
||||
|
||||
bool vtlb_BackpatchLoadStore(uptr code_address, uptr fault_address)
|
||||
{
|
||||
uptr fastmem_start = (uptr)vtlbdata.fastmem_base;
|
||||
uptr fastmem_end = fastmem_start + 0xFFFFFFFFu;
|
||||
if (fault_address < fastmem_start || fault_address > fastmem_end)
|
||||
return false;
|
||||
|
||||
auto iter = s_fastmem_backpatch_info.find(code_address);
|
||||
if (iter == s_fastmem_backpatch_info.end())
|
||||
return false;
|
||||
|
||||
const LoadstoreBackpatchInfo& info = iter->second;
|
||||
const u32 guest_addr = static_cast<u32>(fault_address - fastmem_start);
|
||||
vtlb_DynBackpatchLoadStore(code_address, info.code_size, info.guest_pc, guest_addr,
|
||||
info.gpr_bitmask, info.fpr_bitmask, info.address_register, info.data_register,
|
||||
info.size_in_bits, info.is_signed, info.is_load, info.is_fpr);
|
||||
|
||||
// queue block for recompilation later
|
||||
Cpu->Clear(info.guest_pc, 1);
|
||||
|
||||
// and store the pc in the faulting list, so that we don't emit another fastmem loadstore
|
||||
s_fastmem_faulting_pcs.insert(info.guest_pc);
|
||||
s_fastmem_backpatch_info.erase(iter);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool vtlb_IsFaultingPC(u32 guest_pc)
|
||||
{
|
||||
return (s_fastmem_faulting_pcs.find(guest_pc) != s_fastmem_faulting_pcs.end());
|
||||
}
|
||||
|
||||
//virtual mappings
|
||||
//TODO: Add invalid paddr checks
|
||||
void vtlb_VMap(u32 vaddr,u32 paddr,u32 size)
|
||||
|
@ -667,6 +1039,23 @@ void vtlb_VMap(u32 vaddr,u32 paddr,u32 size)
|
|||
verify(0==(paddr&VTLB_PAGE_MASK));
|
||||
verify(0==(size&VTLB_PAGE_MASK) && size>0);
|
||||
|
||||
if (CHECK_FASTMEM)
|
||||
{
|
||||
const u32 num_pages = size / VTLB_PAGE_SIZE;
|
||||
u32 current_vaddr = vaddr;
|
||||
u32 current_paddr = paddr;
|
||||
|
||||
for (u32 i = 0; i < num_pages; i++, current_vaddr += VTLB_PAGE_SIZE, current_paddr += VTLB_PAGE_SIZE)
|
||||
{
|
||||
u32 hoffset, hsize;
|
||||
PageProtectionMode mode;
|
||||
if (vtlb_GetMainMemoryOffset(current_paddr, &hoffset, &hsize, &mode))
|
||||
vtlb_CreateFastmemMapping(current_vaddr, hoffset, mode);
|
||||
else
|
||||
vtlb_RemoveFastmemMapping(current_vaddr);
|
||||
}
|
||||
}
|
||||
|
||||
while (size > 0)
|
||||
{
|
||||
VTLBVirtual vmv;
|
||||
|
@ -696,6 +1085,22 @@ void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 size)
|
|||
verify(0==(vaddr&VTLB_PAGE_MASK));
|
||||
verify(0==(size&VTLB_PAGE_MASK) && size>0);
|
||||
|
||||
if (CHECK_FASTMEM)
|
||||
{
|
||||
if (buffer == eeMem->Scratch && size == Ps2MemSize::Scratch)
|
||||
{
|
||||
u32 fm_vaddr = vaddr;
|
||||
u32 fm_hostoffset = HostMemoryMap::EEmemOffset + offsetof(EEVM_MemoryAllocMess, Scratch);
|
||||
PageProtectionMode mode = PageProtectionMode().Read().Write();
|
||||
for (u32 i = 0; i < (Ps2MemSize::Scratch / VTLB_PAGE_SIZE); i++, fm_vaddr += VTLB_PAGE_SIZE, fm_hostoffset += VTLB_PAGE_SIZE)
|
||||
vtlb_CreateFastmemMapping(fm_vaddr, fm_hostoffset, mode);
|
||||
}
|
||||
else
|
||||
{
|
||||
vtlb_RemoveFastmemMappings(vaddr, size);
|
||||
}
|
||||
}
|
||||
|
||||
uptr bu8 = (uptr)buffer;
|
||||
while (size > 0)
|
||||
{
|
||||
|
@ -711,6 +1116,8 @@ void vtlb_VMapUnmap(u32 vaddr,u32 size)
|
|||
verify(0==(vaddr&VTLB_PAGE_MASK));
|
||||
verify(0==(size&VTLB_PAGE_MASK) && size>0);
|
||||
|
||||
vtlb_RemoveFastmemMappings(vaddr, size);
|
||||
|
||||
while (size > 0)
|
||||
{
|
||||
|
||||
|
@ -775,11 +1182,45 @@ void vtlb_Init()
|
|||
// This function should probably be part of the COP0 rather than here in VTLB.
|
||||
void vtlb_Reset()
|
||||
{
|
||||
vtlb_RemoveFastmemMappings();
|
||||
for(int i=0; i<48; i++) UnmapTLB(i);
|
||||
}
|
||||
|
||||
void vtlb_Shutdown()
|
||||
{
|
||||
vtlb_RemoveFastmemMappings();
|
||||
s_fastmem_backpatch_info.clear();
|
||||
s_fastmem_faulting_pcs.clear();
|
||||
}
|
||||
|
||||
void vtlb_ResetFastmem()
|
||||
{
|
||||
DevCon.WriteLn("Resetting fastmem mappings...");
|
||||
|
||||
vtlb_RemoveFastmemMappings();
|
||||
s_fastmem_backpatch_info.clear();
|
||||
s_fastmem_faulting_pcs.clear();
|
||||
|
||||
if (!CHECK_FASTMEM || !CHECK_EEREC || !vtlbdata.vmap)
|
||||
return;
|
||||
|
||||
// we need to go through and look at the vtlb pointers, to remap the host area
|
||||
for (size_t i = 0; i < VTLB_VMAP_ITEMS; i++)
|
||||
{
|
||||
const VTLBVirtual& vm = vtlbdata.vmap[i];
|
||||
const u32 vaddr = static_cast<u32>(i) << VTLB_PAGE_BITS;
|
||||
if (vm.isHandler(vaddr))
|
||||
{
|
||||
// Handlers should be unmapped.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if it's a physical mapping to our main memory area.
|
||||
u32 mainmem_offset, mainmem_size;
|
||||
PageProtectionMode prot;
|
||||
if (vtlb_GetMainMemoryOffsetFromPtr(vm.assumePtr(vaddr), &mainmem_offset, &mainmem_size, &prot))
|
||||
vtlb_CreateFastmemMapping(vaddr, mainmem_offset, prot);
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr size_t VMAP_SIZE = sizeof(VTLBVirtual) * VTLB_VMAP_ITEMS;
|
||||
|
@ -804,6 +1245,19 @@ void vtlb_Core_Alloc()
|
|||
HostSys::MemProtect(vmap, VMAP_SIZE, PageProtectionMode().Read().Write());
|
||||
vtlbdata.vmap = vmap;
|
||||
}
|
||||
|
||||
if (!vtlbdata.fastmem_base)
|
||||
{
|
||||
pxAssert(!s_fastmem_area);
|
||||
s_fastmem_area = SharedMemoryMappingArea::Create(FASTMEM_AREA_SIZE);
|
||||
if (!s_fastmem_area)
|
||||
pxFailRel("Failed to allocate fastmem area");
|
||||
|
||||
s_fastmem_virtual_mapping.resize(FASTMEM_PAGE_COUNT, NO_FASTMEM_MAPPING);
|
||||
vtlbdata.fastmem_base = (uptr)s_fastmem_area->BasePointer();
|
||||
Console.WriteLn(Color_StrongGreen, "Fastmem area: %p - %p",
|
||||
vtlbdata.fastmem_base, vtlbdata.fastmem_base + (FASTMEM_AREA_SIZE - 1));
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr size_t PPMAP_SIZE = sizeof(*vtlbdata.ppmap) * VTLB_VMAP_ITEMS;
|
||||
|
@ -840,6 +1294,14 @@ void vtlb_Core_Free()
|
|||
HostSys::MemProtect(vtlbdata.ppmap, PPMAP_SIZE, PageProtectionMode());
|
||||
vtlbdata.ppmap = nullptr;
|
||||
}
|
||||
|
||||
vtlb_RemoveFastmemMappings();
|
||||
vtlb_ClearLoadStoreInfo();
|
||||
|
||||
vtlbdata.fastmem_base = 0;
|
||||
decltype(s_fastmem_physical_mapping)().swap(s_fastmem_physical_mapping);
|
||||
decltype(s_fastmem_virtual_mapping)().swap(s_fastmem_virtual_mapping);
|
||||
s_fastmem_area.reset();
|
||||
}
|
||||
|
||||
static std::string GetHostVmErrorMsg()
|
||||
|
|
32
pcsx2/vtlb.h
32
pcsx2/vtlb.h
|
@ -57,6 +57,7 @@ extern void vtlb_Alloc_Ppmap();
|
|||
extern void vtlb_Init();
|
||||
extern void vtlb_Shutdown();
|
||||
extern void vtlb_Reset();
|
||||
extern void vtlb_ResetFastmem();
|
||||
|
||||
extern vtlbHandler vtlb_NewHandler();
|
||||
|
||||
|
@ -82,6 +83,15 @@ extern void vtlb_DynV2P();
|
|||
extern void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz);
|
||||
extern void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz);
|
||||
extern void vtlb_VMapUnmap(u32 vaddr,u32 sz);
|
||||
extern bool vtlb_ResolveFastmemMapping(uptr* addr);
|
||||
extern bool vtlb_GetGuestAddress(uptr host_addr, u32* guest_addr);
|
||||
extern void vtlb_UpdateFastmemProtection(u32 paddr, u32 size, const PageProtectionMode& prot);
|
||||
extern bool vtlb_BackpatchLoadStore(uptr code_address, uptr fault_address);
|
||||
|
||||
extern void vtlb_ClearLoadStoreInfo();
|
||||
extern void vtlb_AddLoadStoreInfo(uptr code_address, u32 code_size, u32 guest_pc, u32 gpr_bitmask, u32 fpr_bitmask, u8 address_register, u8 data_register, u8 size_in_bits, bool is_signed, bool is_load, bool is_fpr);
|
||||
extern void vtlb_DynBackpatchLoadStore(uptr code_address, u32 code_size, u32 guest_pc, u32 guest_addr, u32 gpr_bitmask, u32 fpr_bitmask, u8 address_register, u8 data_register, u8 size_in_bits, bool is_signed, bool is_load, bool is_fpr);
|
||||
extern bool vtlb_IsFaultingPC(u32 guest_pc);
|
||||
|
||||
//Memory functions
|
||||
|
||||
|
@ -101,13 +111,14 @@ extern DataType vtlb_ramRead(u32 mem);
|
|||
template <typename DataType>
|
||||
extern bool vtlb_ramWrite(u32 mem, const DataType& value);
|
||||
|
||||
extern void vtlb_DynGenWrite(u32 sz);
|
||||
extern void vtlb_DynGenReadNonQuad(u32 bits, bool sign);
|
||||
extern int vtlb_DynGenReadQuad(u32 sz, int gpr);
|
||||
using vtlb_ReadRegAllocCallback = int(*)();
|
||||
extern int vtlb_DynGenReadNonQuad(u32 bits, bool sign, bool xmm, int addr_reg, vtlb_ReadRegAllocCallback dest_reg_alloc = nullptr);
|
||||
extern int vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, bool xmm, u32 addr_const, vtlb_ReadRegAllocCallback dest_reg_alloc = nullptr);
|
||||
extern int vtlb_DynGenReadQuad(u32 bits, int addr_reg, vtlb_ReadRegAllocCallback dest_reg_alloc = nullptr);
|
||||
extern int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, vtlb_ReadRegAllocCallback dest_reg_alloc = nullptr);
|
||||
|
||||
extern void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const );
|
||||
extern int vtlb_DynGenReadQuad_Const( u32 bits, u32 addr_const, int gpr );
|
||||
extern void vtlb_DynGenReadNonQuad_Const( u32 bits, bool sign, u32 addr_const );
|
||||
extern void vtlb_DynGenWrite(u32 sz, bool xmm, int addr_reg, int value_reg);
|
||||
extern void vtlb_DynGenWrite_Const(u32 bits, bool xmm, u32 addr_const, int value_reg);
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// VtlbMemoryReserve
|
||||
|
@ -125,7 +136,7 @@ public:
|
|||
// --------------------------------------------------------------------------------------
|
||||
// eeMemoryReserve
|
||||
// --------------------------------------------------------------------------------------
|
||||
class eeMemoryReserve : private VtlbMemoryReserve
|
||||
class eeMemoryReserve : public VtlbMemoryReserve
|
||||
{
|
||||
typedef VtlbMemoryReserve _parent;
|
||||
|
||||
|
@ -142,7 +153,7 @@ public:
|
|||
// --------------------------------------------------------------------------------------
|
||||
// iopMemoryReserve
|
||||
// --------------------------------------------------------------------------------------
|
||||
class iopMemoryReserve : private VtlbMemoryReserve
|
||||
class iopMemoryReserve : public VtlbMemoryReserve
|
||||
{
|
||||
typedef VtlbMemoryReserve _parent;
|
||||
|
||||
|
@ -159,7 +170,7 @@ public:
|
|||
// --------------------------------------------------------------------------------------
|
||||
// vuMemoryReserve
|
||||
// --------------------------------------------------------------------------------------
|
||||
class vuMemoryReserve : private VtlbMemoryReserve
|
||||
class vuMemoryReserve : public VtlbMemoryReserve
|
||||
{
|
||||
typedef VtlbMemoryReserve _parent;
|
||||
|
||||
|
@ -253,10 +264,13 @@ namespace vtlb_private
|
|||
|
||||
u32* ppmap; //4MB (allocated by vtlb_init) // PS2 virtual to PS2 physical
|
||||
|
||||
uptr fastmem_base;
|
||||
|
||||
MapData()
|
||||
{
|
||||
vmap = NULL;
|
||||
ppmap = NULL;
|
||||
fastmem_base = 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ namespace COP0 {
|
|||
// this should be a conditional Jump -- JZ or JNZ normally.
|
||||
static void _setupBranchTest()
|
||||
{
|
||||
_eeFlushAllUnused();
|
||||
_eeFlushAllDirty();
|
||||
|
||||
// COP0 branch conditionals are based on the following equation:
|
||||
// (((psHu16(DMAC_STAT) | ~psHu16(DMAC_PCR)) & 0x3ff) == 0x3ff)
|
||||
|
@ -64,26 +64,32 @@ static void _setupBranchTest()
|
|||
|
||||
void recBC0F()
|
||||
{
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
const bool swap = TrySwapDelaySlot(0, 0, 0);
|
||||
_setupBranchTest();
|
||||
recDoBranchImm(JE32(0));
|
||||
recDoBranchImm(branchTo, JE32(0), false, swap);
|
||||
}
|
||||
|
||||
void recBC0T()
|
||||
{
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
const bool swap = TrySwapDelaySlot(0, 0, 0);
|
||||
_setupBranchTest();
|
||||
recDoBranchImm(JNE32(0));
|
||||
recDoBranchImm(branchTo, JNE32(0), false, swap);
|
||||
}
|
||||
|
||||
void recBC0FL()
|
||||
{
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
_setupBranchTest();
|
||||
recDoBranchImm_Likely(JE32(0));
|
||||
recDoBranchImm(branchTo, JE32(0), true, false);
|
||||
}
|
||||
|
||||
void recBC0TL()
|
||||
{
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
_setupBranchTest();
|
||||
recDoBranchImm_Likely(JNE32(0));
|
||||
recDoBranchImm(branchTo, JNE32(0), true, false);
|
||||
}
|
||||
|
||||
void recTLBR() { recCall(Interp::TLBR); }
|
||||
|
@ -118,7 +124,7 @@ void recDI()
|
|||
// Jak X, Namco 50th anniversary, Spongebob the Movie, Spongebob Battle for Bikini Bottom,
|
||||
// The Incredibles, The Incredibles rize of the underminer, Soukou kihei armodyne, Garfield Saving Arlene, Tales of Fandom Vol. 2.
|
||||
if (!g_recompilingDelaySlot)
|
||||
recompileNextInstruction(0); // DI execution is delayed by one instruction
|
||||
recompileNextInstruction(false, false); // DI execution is delayed by one instruction
|
||||
|
||||
xMOV(eax, ptr[&cpuRegs.CP0.n.Status]);
|
||||
xTEST(eax, 0x20006); // EXL | ERL | EDI
|
||||
|
@ -152,13 +158,12 @@ void recMFC0()
|
|||
x86SetJ8(skipInc);
|
||||
xADD(ptr[&cpuRegs.CP0.n.Count], eax);
|
||||
xMOV(ptr[&cpuRegs.lastCOP0Cycle], ecx);
|
||||
xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]);
|
||||
|
||||
if (!_Rt_)
|
||||
return;
|
||||
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
eeSignExtendTo(_Rt_);
|
||||
const int regt = _Rt_ ? _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE) : -1;
|
||||
xMOVSX(xRegister64(regt), ptr32[&cpuRegs.CP0.r[_Rd_]]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -169,22 +174,25 @@ void recMFC0()
|
|||
{
|
||||
if (0 == (_Imm_ & 1)) // MFPS, register value ignored
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.PERF.n.pccr]);
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
xMOVSX(xRegister64(regt), ptr32[&cpuRegs.PERF.n.pccr]);
|
||||
}
|
||||
else if (0 == (_Imm_ & 2)) // MFPC 0, only LSB of register matters
|
||||
{
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
xFastCall((void*)COP0_UpdatePCCR);
|
||||
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
|
||||
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
xMOVSX(xRegister64(regt), ptr32[&cpuRegs.PERF.n.pcr0]);
|
||||
}
|
||||
else // MFPC 1
|
||||
{
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
xFastCall((void*)COP0_UpdatePCCR);
|
||||
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
|
||||
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
xMOVSX(xRegister64(regt), ptr32[&cpuRegs.PERF.n.pcr1]);
|
||||
}
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
eeSignExtendTo(_Rt_);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -193,10 +201,9 @@ void recMFC0()
|
|||
COP0_LOG("MFC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF);
|
||||
return;
|
||||
}
|
||||
_eeOnWriteReg(_Rt_, 1);
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]);
|
||||
eeSignExtendTo(_Rt_);
|
||||
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
xMOVSX(xRegister64(regt), ptr32[&cpuRegs.CP0.r[_Rd_]]);
|
||||
}
|
||||
|
||||
void recMTC0()
|
||||
|
@ -260,15 +267,15 @@ void recMTC0()
|
|||
switch (_Rd_)
|
||||
{
|
||||
case 12:
|
||||
_eeMoveGPRtoR(arg1reg, _Rt_);
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
_eeMoveGPRtoR(ecx, _Rt_);
|
||||
xFastCall((void*)WriteCP0Status, ecx);
|
||||
xFastCall((void*)WriteCP0Status);
|
||||
break;
|
||||
|
||||
case 16:
|
||||
_eeMoveGPRtoR(arg1reg, _Rt_);
|
||||
iFlushCall(FLUSH_INTERPRETER);
|
||||
_eeMoveGPRtoR(ecx, _Rt_);
|
||||
xFastCall((void*)WriteCP0Config, ecx);
|
||||
xFastCall((void*)WriteCP0Config);
|
||||
break;
|
||||
|
||||
case 9:
|
||||
|
|
1002
pcsx2/x86/iCore.cpp
1002
pcsx2/x86/iCore.cpp
File diff suppressed because it is too large
Load Diff
|
@ -22,86 +22,72 @@
|
|||
// Namespace Note : iCore32 contains all of the Register Allocation logic, in addition to a handful
|
||||
// of utility functions for emitting frequent code.
|
||||
|
||||
//#define RALOG(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define RALOG(...)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Shared Register allocation flags (apply to X86, XMM, MMX, etc).
|
||||
|
||||
#define MODE_READ 1
|
||||
#define MODE_WRITE 2
|
||||
#define MODE_READHALF 4 // read only low 64 bits
|
||||
#define MODE_VUXY 8 // vector only has xy valid (real zw are in mem), not the same as MODE_READHALF
|
||||
#define MODE_VUZ 0x10 // z only doesn't work for now
|
||||
#define MODE_VUXYZ (MODE_VUZ | MODE_VUXY) // vector only has xyz valid (real w is in memory)
|
||||
#define MODE_NOFLUSH 0x20 // can't flush reg to mem
|
||||
#define MODE_NOFRAME 0x40 // when allocating x86regs, don't use ebp reg
|
||||
#define MODE_8BITREG 0x80 // when allocating x86regs, use only eax, ecx, edx, and ebx
|
||||
#define MODE_CALLEESAVED 0x20 // can't flush reg to mem
|
||||
|
||||
#define PROCESS_EE_XMM 0x02
|
||||
|
||||
// currently only used in FPU
|
||||
#define PROCESS_EE_S 0x04 // S is valid, otherwise take from mem
|
||||
#define PROCESS_EE_T 0x08 // T is valid, otherwise take from mem
|
||||
#define PROCESS_EE_D 0x10 // D is valid, otherwise take from mem
|
||||
|
||||
// not used in VU recs
|
||||
#define PROCESS_EE_MODEWRITES 0x10 // if s is a reg, set if not in cpuRegs
|
||||
#define PROCESS_EE_MODEWRITET 0x20 // if t is a reg, set if not in cpuRegs
|
||||
#define PROCESS_EE_LO 0x40 // lo reg is valid
|
||||
#define PROCESS_EE_HI 0x80 // hi reg is valid
|
||||
#define PROCESS_EE_ACC 0x40 // acc reg is valid
|
||||
|
||||
// used in VU recs
|
||||
#define PROCESS_VU_UPDATEFLAGS 0x10
|
||||
#define PROCESS_VU_COP2 0x80 // simple cop2
|
||||
|
||||
#define EEREC_S (((info) >> 8) & 0xf)
|
||||
#define EEREC_T (((info) >> 12) & 0xf)
|
||||
#define EEREC_D (((info) >> 16) & 0xf)
|
||||
#define EEREC_LO (((info) >> 20) & 0xf)
|
||||
#define EEREC_HI (((info) >> 24) & 0xf)
|
||||
#define EEREC_ACC (((info) >> 20) & 0xf)
|
||||
#define EEREC_TEMP (((info) >> 24) & 0xf)
|
||||
#define VUREC_FMAC ((info)&0x80000000)
|
||||
|
||||
#define PROCESS_EE_SET_S(reg) ((reg) << 8)
|
||||
#define PROCESS_EE_SET_T(reg) ((reg) << 12)
|
||||
#define PROCESS_EE_SET_D(reg) ((reg) << 16)
|
||||
#define PROCESS_EE_SET_LO(reg) ((reg) << 20)
|
||||
#define PROCESS_EE_SET_HI(reg) ((reg) << 24)
|
||||
#define PROCESS_EE_SET_ACC(reg) ((reg) << 20)
|
||||
|
||||
#define PROCESS_VU_SET_ACC(reg) PROCESS_EE_SET_ACC(reg)
|
||||
#define PROCESS_VU_SET_TEMP(reg) ((reg) << 24)
|
||||
|
||||
#define PROCESS_VU_SET_FMAC() 0x80000000
|
||||
#define PROCESS_EE_SET_S(reg) (((reg) << 8) | PROCESS_EE_S)
|
||||
#define PROCESS_EE_SET_T(reg) (((reg) << 12) | PROCESS_EE_T)
|
||||
#define PROCESS_EE_SET_D(reg) (((reg) << 16) | PROCESS_EE_D)
|
||||
#define PROCESS_EE_SET_LO(reg) (((reg) << 20) | PROCESS_EE_LO)
|
||||
#define PROCESS_EE_SET_HI(reg) (((reg) << 24) | PROCESS_EE_HI)
|
||||
#define PROCESS_EE_SET_ACC(reg) (((reg) << 20) | PROCESS_EE_ACC)
|
||||
|
||||
// special info not related to above flags
|
||||
#define PROCESS_CONSTS 1
|
||||
#define PROCESS_CONSTT 2
|
||||
|
||||
// XMM caching helpers
|
||||
#define XMMINFO_READLO 0x001
|
||||
#define XMMINFO_READHI 0x002
|
||||
#define XMMINFO_WRITELO 0x004
|
||||
#define XMMINFO_WRITEHI 0x008
|
||||
#define XMMINFO_WRITED 0x010
|
||||
#define XMMINFO_READD 0x020
|
||||
#define XMMINFO_READS 0x040
|
||||
#define XMMINFO_READT 0x080
|
||||
#define XMMINFO_READACC 0x200
|
||||
#define XMMINFO_WRITEACC 0x400
|
||||
#define XMMINFO_WRITET 0x800
|
||||
|
||||
#define XMMINFO_64BITOP 0x1000
|
||||
#define XMMINFO_FORCEREGS 0x2000
|
||||
#define XMMINFO_FORCEREGT 0x4000
|
||||
#define XMMINFO_NORENAME 0x8000 // disables renaming of Rs to Rt in Rt = Rs op imm
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// X86 (32-bit) Register Allocation Tools
|
||||
|
||||
#define X86TYPE_TEMP 0
|
||||
#define X86TYPE_GPR 1
|
||||
#define X86TYPE_VI 2
|
||||
#define X86TYPE_MEMOFFSET 3
|
||||
#define X86TYPE_VIMEMOFFSET 4
|
||||
#define X86TYPE_VUQREAD 5
|
||||
#define X86TYPE_VUPREAD 6
|
||||
#define X86TYPE_VUQWRITE 7
|
||||
#define X86TYPE_VUPWRITE 8
|
||||
#define X86TYPE_PSX 9
|
||||
#define X86TYPE_PCWRITEBACK 10
|
||||
#define X86TYPE_PSX_PCWRITEBACK 12
|
||||
#define X86TYPE_VITEMP 13
|
||||
#define X86TYPE_FNARG 14 // function parameter, max is 4
|
||||
|
||||
#define X86TYPE_VU1 0x80
|
||||
|
||||
//#define X86_ISVI(type) ((type&~X86TYPE_VU1) == X86TYPE_VI)
|
||||
static __fi int X86_ISVI(int type)
|
||||
{
|
||||
return ((type & ~X86TYPE_VU1) == X86TYPE_VI);
|
||||
}
|
||||
#define X86TYPE_FPRC 2
|
||||
#define X86TYPE_VIREG 3
|
||||
#define X86TYPE_PCWRITEBACK 4
|
||||
#define X86TYPE_PSX 5
|
||||
#define X86TYPE_PSX_PCWRITEBACK 6
|
||||
|
||||
struct _x86regs
|
||||
{
|
||||
|
@ -116,79 +102,83 @@ struct _x86regs
|
|||
|
||||
extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
|
||||
|
||||
uptr _x86GetAddr(int type, int reg);
|
||||
bool _isAllocatableX86reg(int x86reg);
|
||||
void _initX86regs();
|
||||
int _getFreeX86reg(int mode);
|
||||
int _allocX86reg(x86Emitter::xRegister32 x86reg, int type, int reg, int mode);
|
||||
void _deleteX86reg(int type, int reg, int flush);
|
||||
int _allocX86reg(int type, int reg, int mode);
|
||||
int _checkX86reg(int type, int reg, int mode);
|
||||
bool _hasX86reg(int type, int reg, int required_mode = 0);
|
||||
void _addNeededX86reg(int type, int reg);
|
||||
void _clearNeededX86regs();
|
||||
void _freeX86reg(const x86Emitter::xRegister32& x86reg);
|
||||
void _freeX86reg(int x86reg);
|
||||
void _freeX86regWithoutWriteback(int x86reg);
|
||||
void _freeX86regs();
|
||||
void _flushCachedRegs();
|
||||
void _flushX86regs();
|
||||
void _flushConstRegs();
|
||||
void _flushConstReg(int reg);
|
||||
void _validateRegs();
|
||||
void _writebackX86Reg(int x86reg);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// XMM (128-bit) Register Allocation Tools
|
||||
|
||||
#define XMM_CONV_VU(VU) (VU == &VU1)
|
||||
|
||||
#define XMMTYPE_TEMP 0 // has to be 0
|
||||
#define XMMTYPE_VFREG 1
|
||||
#define XMMTYPE_ACC 2
|
||||
#define XMMTYPE_FPREG 3
|
||||
#define XMMTYPE_FPACC 4
|
||||
#define XMMTYPE_GPRREG 5
|
||||
#define XMMTYPE_GPRREG X86TYPE_GPR
|
||||
#define XMMTYPE_FPREG 6
|
||||
#define XMMTYPE_FPACC 7
|
||||
#define XMMTYPE_VFREG 8
|
||||
|
||||
// lo and hi regs
|
||||
#define XMMGPR_LO 33
|
||||
#define XMMGPR_HI 32
|
||||
#define XMMFPU_ACC 32
|
||||
|
||||
enum : int
|
||||
{
|
||||
DELETE_REG_FREE = 0,
|
||||
DELETE_REG_FLUSH = 1,
|
||||
DELETE_REG_FLUSH_AND_FREE = 2,
|
||||
DELETE_REG_FREE_NO_WRITEBACK = 3
|
||||
};
|
||||
|
||||
struct _xmmregs
|
||||
{
|
||||
u8 inuse;
|
||||
u8 reg;
|
||||
s8 reg;
|
||||
u8 type;
|
||||
u8 mode;
|
||||
u8 needed;
|
||||
u8 VU; // 0 = VU0, 1 = VU1
|
||||
u16 counter;
|
||||
};
|
||||
|
||||
void _cop2BackupRegs();
|
||||
void _cop2RestoreRegs();
|
||||
void _initXMMregs();
|
||||
int _getFreeXMMreg();
|
||||
int _allocTempXMMreg(XMMSSEType type, int xmmreg);
|
||||
int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode);
|
||||
int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode);
|
||||
int _allocFPACCtoXMMreg(int xmmreg, int mode);
|
||||
int _getFreeXMMreg(u32 maxreg = iREGCNT_XMM);
|
||||
int _allocTempXMMreg(XMMSSEType type);
|
||||
int _allocFPtoXMMreg(int fpreg, int mode);
|
||||
int _allocGPRtoXMMreg(int gprreg, int mode);
|
||||
int _allocFPACCtoXMMreg(int mode);
|
||||
void _reallocateXMMreg(int xmmreg, int newtype, int newreg, int newmode, bool writeback = true);
|
||||
int _checkXMMreg(int type, int reg, int mode);
|
||||
bool _hasXMMreg(int type, int reg, int required_mode = 0);
|
||||
void _addNeededFPtoXMMreg(int fpreg);
|
||||
void _addNeededFPACCtoXMMreg();
|
||||
void _addNeededGPRtoX86reg(int gprreg);
|
||||
void _addNeededPSXtoX86reg(int gprreg);
|
||||
void _addNeededGPRtoXMMreg(int gprreg);
|
||||
void _clearNeededXMMregs();
|
||||
//void _deleteACCtoXMMreg(int vu, int flush);
|
||||
void _deleteGPRtoX86reg(int reg, int flush);
|
||||
void _deletePSXtoX86reg(int reg, int flush);
|
||||
void _deleteGPRtoXMMreg(int reg, int flush);
|
||||
void _deleteFPtoXMMreg(int reg, int flush);
|
||||
void _freeXMMreg(u32 xmmreg);
|
||||
void _clearNeededCOP2Regs();
|
||||
u16 _freeXMMregsCOP2();
|
||||
//void _moveXMMreg(int xmmreg); // instead of freeing, moves it to a diff location
|
||||
void _freeXMMreg(int xmmreg);
|
||||
void _freeXMMregWithoutWriteback(int xmmreg);
|
||||
void _writebackXMMreg(int xmmreg);
|
||||
int _allocVFtoXMMreg(int vfreg, int mode);
|
||||
void mVUFreeCOP2XMMreg(int hostreg);
|
||||
void _flushCOP2regs();
|
||||
void _flushXMMreg(int xmmreg);
|
||||
void _flushXMMregs();
|
||||
u8 _hasFreeXMMreg();
|
||||
void _freeXMMregs();
|
||||
int _getNumXMMwrite();
|
||||
void _signExtendSFtoM(uptr mem);
|
||||
|
||||
// returns new index of reg, lower 32 bits already in mmx
|
||||
// shift is used when the data is in the top bits of the mmx reg to begin with
|
||||
// a negative shift is for sign extension
|
||||
int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns true if reg destroyed
|
||||
|
||||
//////////////////////
|
||||
// Instruction Info //
|
||||
|
@ -205,54 +195,99 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns t
|
|||
// 3/ EEINST_LIVE* is cleared when register is written. And set again when register is read.
|
||||
// My guess: the purpose is to detect the usage hole in the flow
|
||||
|
||||
#define EEINST_LIVE0 1 // if var is ever used (read or write)
|
||||
#define EEINST_LIVE2 4 // if cur var's next 64 bits are needed
|
||||
#define EEINST_LIVE 1 // if var is ever used (read or write)
|
||||
#define EEINST_LASTUSE 8 // if var isn't written/read anymore
|
||||
//#define EEINST_MMX 0x10 // removed
|
||||
#define EEINST_XMM 0x20 // var will be used in xmm ops
|
||||
#define EEINST_USED 0x40
|
||||
|
||||
#define EEINSTINFO_COP1 1
|
||||
#define EEINSTINFO_COP2 2
|
||||
|
||||
#define EEINST_COP2_DENORMALIZE_STATUS_FLAG 0x100
|
||||
#define EEINST_COP2_NORMALIZE_STATUS_FLAG 0x200
|
||||
#define EEINST_COP2_STATUS_FLAG 0x400
|
||||
#define EEINST_COP2_MAC_FLAG 0x800
|
||||
#define EEINST_COP2_CLIP_FLAG 0x1000
|
||||
#define EEINST_COP2_FINISH_VU0_MICRO 0x2000
|
||||
#define EEINST_COP2_SYNC_VU0 0x2000
|
||||
#define EEINST_COP2_FINISH_VU0 0x4000
|
||||
#define EEINST_COP2_FLUSH_VU0_REGISTERS 0x8000
|
||||
|
||||
struct EEINST
|
||||
{
|
||||
u16 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM
|
||||
u8 regs[34]; // includes HI/LO (HI=32, LO=33)
|
||||
u8 fpuregs[33]; // ACC=32
|
||||
u8 vfregs[33]; // ACC=32
|
||||
u8 viregs[16];
|
||||
|
||||
// uses XMMTYPE_ flags; if type == XMMTYPE_TEMP, not used
|
||||
u8 writeType[3], writeReg[3]; // reg written in this inst, 0 if no reg
|
||||
u8 readType[4], readReg[4];
|
||||
|
||||
// valid if info & EEINSTINFO_COP2
|
||||
int cycle; // cycle of inst (at offset from block)
|
||||
_VURegsNum vuregs;
|
||||
};
|
||||
|
||||
extern EEINST* g_pCurInstInfo; // info for the cur instruction
|
||||
extern void _recClearInst(EEINST* pinst);
|
||||
|
||||
// returns the number of insts + 1 until written (0 if not written)
|
||||
extern u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg);
|
||||
// returns the number of insts + 1 until used (0 if not used)
|
||||
//extern u32 _recIsRegUsed(EEINST* pinst, int size, u8 xmmtype, u8 reg);
|
||||
extern u32 _recIsRegReadOrWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg);
|
||||
|
||||
extern void _recFillRegister(EEINST& pinst, int type, int reg, int write);
|
||||
|
||||
static __fi bool EEINST_ISLIVE64(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0)); }
|
||||
static __fi bool EEINST_ISLIVEXMM(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0 | EEINST_LIVE2)); }
|
||||
static __fi bool EEINST_ISLIVE2(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE2); }
|
||||
// If unset, values which are not live will not be written back to memory.
|
||||
// Tends to break stuff at the moment.
|
||||
#define EE_WRITE_DEAD_VALUES 1
|
||||
|
||||
static __fi bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); }
|
||||
/// Returns true if the register is used later in the block, and this isn't the last instruction to use it.
|
||||
/// In other words, the register is worth keeping in a host register/caching it.
|
||||
static __fi bool EEINST_USEDTEST(u32 reg)
|
||||
{
|
||||
return (g_pCurInstInfo->regs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
|
||||
}
|
||||
|
||||
/// Returns true if the register is used later in the block as an XMM/128-bit value.
|
||||
static __fi bool EEINST_XMMUSEDTEST(u32 reg)
|
||||
{
|
||||
return (g_pCurInstInfo->regs[reg] & (EEINST_USED | EEINST_XMM | EEINST_LASTUSE)) == (EEINST_USED | EEINST_XMM);
|
||||
}
|
||||
|
||||
/// Returns true if the specified VF register is used later in the block.
|
||||
static __fi bool COP2INST_USEDTEST(u32 reg)
|
||||
{
|
||||
return (g_pCurInstInfo->vfregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
|
||||
}
|
||||
|
||||
/// Returns true if the value should be computed/written back.
|
||||
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
|
||||
static __fi bool EEINST_LIVETEST(u32 reg)
|
||||
{
|
||||
return EE_WRITE_DEAD_VALUES || ((g_pCurInstInfo->regs[reg] & EEINST_LIVE) != 0);
|
||||
}
|
||||
|
||||
/// Returns true if the register can be renamed into another.
|
||||
static __fi bool EEINST_RENAMETEST(u32 reg)
|
||||
{
|
||||
return (reg == 0 || !EEINST_USEDTEST(reg) || !EEINST_LIVETEST(reg));
|
||||
}
|
||||
|
||||
static __fi bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE); }
|
||||
static __fi bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); }
|
||||
|
||||
/// Returns true if the register is used later in the block, and this isn't the last instruction to use it.
|
||||
/// In other words, the register is worth keeping in a host register/caching it.
|
||||
static __fi bool FPUINST_USEDTEST(u32 reg)
|
||||
{
|
||||
return (g_pCurInstInfo->fpuregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
|
||||
}
|
||||
|
||||
/// Returns true if the value should be computed/written back.
|
||||
static __fi bool FPUINST_LIVETEST(u32 reg)
|
||||
{
|
||||
return EE_WRITE_DEAD_VALUES || FPUINST_ISLIVE(reg);
|
||||
}
|
||||
|
||||
/// Returns true if the register can be renamed into another.
|
||||
static __fi bool FPUINST_RENAMETEST(u32 reg)
|
||||
{
|
||||
return (!EEINST_USEDTEST(reg) || !EEINST_LIVETEST(reg));
|
||||
}
|
||||
|
||||
extern _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM];
|
||||
|
||||
extern thread_local u8* j8Ptr[32]; // depreciated item. use local u8* vars instead.
|
||||
|
@ -261,47 +296,32 @@ extern thread_local u32* j32Ptr[32]; // depreciated item. use local u32* vars i
|
|||
extern u16 g_x86AllocCounter;
|
||||
extern u16 g_xmmAllocCounter;
|
||||
|
||||
// allocates only if later insts use XMM, otherwise checks
|
||||
int _allocCheckGPRtoXMM(EEINST* pinst, int gprreg, int mode);
|
||||
int _allocCheckFPUtoXMM(EEINST* pinst, int fpureg, int mode);
|
||||
|
||||
// allocates only if later insts use this register
|
||||
int _allocCheckGPRtoX86(EEINST* pinst, int gprreg, int mode);
|
||||
int _allocIfUsedGPRtoX86(int gprreg, int mode);
|
||||
int _allocIfUsedGPRtoXMM(int gprreg, int mode);
|
||||
int _allocIfUsedFPUtoXMM(int fpureg, int mode);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// iFlushCall / _psxFlushCall Parameters
|
||||
|
||||
// Flushing vs. Freeing, as understood by Air (I could be wrong still....)
|
||||
|
||||
// "Freeing" registers means that the contents of the registers are flushed to memory.
|
||||
// This is good for any sort of C code function that plans to modify the actual
|
||||
// registers. When the Recs resume, they'll reload the registers with values saved
|
||||
// as needed. (similar to a "FreezeXMMRegs")
|
||||
|
||||
// "Flushing" means that in addition to the standard free (which is actually a flush)
|
||||
// the register allocations are additionally wiped. This should only be necessary if
|
||||
// the code being called is going to modify register allocations -- ie, be doing
|
||||
// some kind of recompiling of its own.
|
||||
|
||||
#define FLUSH_CACHED_REGS 0x001
|
||||
#define FLUSH_NONE 0x000 // frees caller saved registers
|
||||
#define FLUSH_CONSTANT_REGS 0x001
|
||||
#define FLUSH_FLUSH_XMM 0x002
|
||||
#define FLUSH_FREE_XMM 0x004 // both flushes and frees
|
||||
#define FLUSH_FLUSH_ALLX86 0x020 // flush x86
|
||||
#define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs
|
||||
#define FLUSH_FREE_ALLX86 0x080 // free all x86 regs
|
||||
#define FLUSH_ALL_X86 0x020 // flush x86
|
||||
#define FLUSH_FREE_TEMP_X86 0x040 // flush and free temporary x86 regs
|
||||
#define FLUSH_FREE_NONTEMP_X86 0x080 // free all x86 regs, except temporary
|
||||
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
|
||||
#define FLUSH_PC 0x200 // program counter
|
||||
#define FLUSH_CAUSE 0x000 // disabled for now: cause register, only the branch delay bit
|
||||
//#define FLUSH_CAUSE 0x000 // disabled for now: cause register, only the branch delay bit
|
||||
#define FLUSH_CODE 0x800 // opcode for interpreter
|
||||
|
||||
#define FLUSH_EVERYTHING 0x1ff
|
||||
//#define FLUSH_EXCEPTION 0x1ff // will probably do this totally differently actually
|
||||
#define FLUSH_INTERPRETER 0xfff
|
||||
#define FLUSH_FULLVTLB FLUSH_NOCONST
|
||||
#define FLUSH_FULLVTLB 0x000
|
||||
|
||||
// no freeing, used when callee won't destroy xmm regs
|
||||
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS | FLUSH_FLUSH_XMM | FLUSH_FLUSH_ALLX86)
|
||||
// used when regs aren't going to be changed be callee
|
||||
#define FLUSH_NOCONST (FLUSH_FREE_XMM | FLUSH_FREE_TEMPX86)
|
||||
#define FLUSH_NODESTROY (FLUSH_CONSTANT_REGS | FLUSH_FLUSH_XMM | FLUSH_ALL_X86)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -126,23 +126,18 @@ void recCFC1(void)
|
|||
return;
|
||||
EE::Profiler.EmitOp(eeOpcode::CFC1);
|
||||
|
||||
_eeOnWriteReg(_Rt_, 1);
|
||||
|
||||
if (_Fs_ >= 16)
|
||||
xMOV(eax, ptr[&fpuRegs.fprc[31]]);
|
||||
else
|
||||
xMOV(eax, ptr[&fpuRegs.fprc[0]]);
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
if (_Fs_ >= 16)
|
||||
{
|
||||
xAND(eax, 0x0083c078); //remove always-zero bits
|
||||
xOR(eax, 0x01000001); //set always-one bits
|
||||
xMOV(xRegister32(regt), ptr32[&fpuRegs.fprc[31]]);
|
||||
xAND(xRegister32(regt), 0x0083c078); //remove always-zero bits
|
||||
xOR(xRegister32(regt), 0x01000001); //set always-one bits
|
||||
xMOVSX(xRegister64(regt), xRegister32(regt));
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOVSX(xRegister64(regt), ptr32[&fpuRegs.fprc[0]]);
|
||||
}
|
||||
|
||||
xCDQ();
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
|
||||
}
|
||||
|
||||
void recCTC1()
|
||||
|
@ -163,7 +158,10 @@ void recCTC1()
|
|||
{
|
||||
xMOVSS(ptr[&fpuRegs.fprc[_Fs_]], xRegisterSSE(mmreg));
|
||||
}
|
||||
|
||||
else if ((mmreg = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ)) >= 0)
|
||||
{
|
||||
xMOV(ptr32[&fpuRegs.fprc[_Fs_]], xRegister32(mmreg));
|
||||
}
|
||||
else
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
|
@ -184,36 +182,42 @@ void recMFC1()
|
|||
{
|
||||
if (!_Rt_)
|
||||
return;
|
||||
|
||||
EE::Profiler.EmitOp(eeOpcode::MFC1);
|
||||
|
||||
_eeOnWriteReg(_Rt_, 1);
|
||||
const int xmmregt = _allocIfUsedGPRtoXMM(_Rt_, MODE_READ | MODE_WRITE);
|
||||
const int regs = _allocIfUsedFPUtoXMM(_Fs_, MODE_READ);
|
||||
if (regs >= 0 && xmmregt >= 0)
|
||||
{
|
||||
// if we're in xmm, we shouldn't be const
|
||||
pxAssert(!GPR_IS_CONST1(_Rt_));
|
||||
|
||||
const int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
|
||||
// both in xmm, sign extend and insert lower bits
|
||||
const int temp = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVAPS(xRegisterSSE(temp), xRegisterSSE(regs));
|
||||
xPSRA.D(xRegisterSSE(temp), 31);
|
||||
xMOVSS(xRegisterSSE(xmmregt), xRegisterSSE(regs));
|
||||
xINSERTPS(xRegisterSSE(xmmregt), xRegisterSSE(temp), _MM_MK_INSERTPS_NDX(0, 1, 0));
|
||||
_freeXMMreg(temp);
|
||||
return;
|
||||
}
|
||||
|
||||
// storing to a gpr..
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
|
||||
// shouldn't be const after we're writing.
|
||||
pxAssert(!GPR_IS_CONST1(_Rt_));
|
||||
|
||||
if (regs >= 0)
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rt_, 2);
|
||||
_signExtendXMMtoM((uptr)&cpuRegs.GPR.r[_Rt_].UL[0], regs, 0);
|
||||
// xmm -> gpr
|
||||
xMOVD(xRegister32(regt), xRegisterSSE(regs));
|
||||
xMOVSX(xRegister64(regt), xRegister32(regt));
|
||||
}
|
||||
else
|
||||
{
|
||||
const int regt = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
|
||||
|
||||
if (regt >= 0)
|
||||
{
|
||||
if (xmmregs[regt].mode & MODE_WRITE)
|
||||
{
|
||||
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rt_].UL[2]], xRegisterSSE(regt));
|
||||
}
|
||||
xmmregs[regt].inuse = 0;
|
||||
}
|
||||
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
xMOV(eax, ptr[&fpuRegs.fpr[_Fs_].UL]);
|
||||
|
||||
xCDQ();
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
|
||||
// mem -> gpr
|
||||
xMOVSX(xRegister64(regt), ptr32[&fpuRegs.fpr[_Fs_].UL]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -228,44 +232,60 @@ void recMTC1()
|
|||
EE::Profiler.EmitOp(eeOpcode::MTC1);
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
_deleteFPtoXMMreg(_Fs_, 0);
|
||||
xMOV(ptr32[&fpuRegs.fpr[_Fs_].UL], g_cpuConstRegs[_Rt_].UL[0]);
|
||||
const int xmmreg = _allocIfUsedFPUtoXMM(_Fs_, MODE_WRITE);
|
||||
if (xmmreg >= 0)
|
||||
{
|
||||
// common case: mtc1 zero, fnn
|
||||
if (g_cpuConstRegs[_Rt_].UL[0] == 0)
|
||||
{
|
||||
xPXOR(xRegisterSSE(xmmreg), xRegisterSSE(xmmreg));
|
||||
}
|
||||
else
|
||||
{
|
||||
int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
|
||||
|
||||
if (mmreg >= 0)
|
||||
// may as well flush the constant register, since we're needing it in a gpr anyway
|
||||
const int x86reg = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
|
||||
xMOVDZX(xRegisterSSE(xmmreg), xRegister32(x86reg));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pxAssert(!_hasXMMreg(XMMTYPE_FPREG, _Fs_));
|
||||
xMOV(ptr32[&fpuRegs.fpr[_Fs_].UL], g_cpuConstRegs[_Rt_].UL[0]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const int xmmgpr = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
|
||||
if (xmmgpr >= 0)
|
||||
{
|
||||
if (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE)
|
||||
{
|
||||
// transfer the reg directly
|
||||
_deleteGPRtoXMMreg(_Rt_, 2);
|
||||
_deleteFPtoXMMreg(_Fs_, 2);
|
||||
_allocFPtoXMMreg(mmreg, _Fs_, MODE_WRITE);
|
||||
_deleteFPtoXMMreg(_Fs_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
_reallocateXMMreg(xmmgpr, XMMTYPE_FPREG, _Fs_, MODE_WRITE);
|
||||
}
|
||||
else
|
||||
{
|
||||
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE);
|
||||
|
||||
if (mmreg2 >= 0)
|
||||
xMOVSS(xRegisterSSE(mmreg2), xRegisterSSE(mmreg));
|
||||
const int xmmreg2 = _allocIfUsedFPUtoXMM(_Fs_, MODE_WRITE);
|
||||
if (xmmreg2 >= 0)
|
||||
xMOVSS(xRegisterSSE(xmmreg2), xRegisterSSE(xmmgpr));
|
||||
else
|
||||
xMOVSS(ptr[&fpuRegs.fpr[_Fs_].UL], xRegisterSSE(mmreg));
|
||||
xMOVSS(ptr[&fpuRegs.fpr[_Fs_].UL], xRegisterSSE(xmmgpr));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE);
|
||||
// may as well cache it..
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
|
||||
const int mmreg2 = _allocIfUsedFPUtoXMM(_Fs_, MODE_WRITE);
|
||||
|
||||
if (mmreg2 >= 0)
|
||||
{
|
||||
xMOVSSZX(xRegisterSSE(mmreg2), ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
xMOVDZX(xRegisterSSE(mmreg2), xRegister32(regt));
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
xMOV(ptr[&fpuRegs.fpr[_Fs_].UL], eax);
|
||||
xMOV(ptr32[&fpuRegs.fpr[_Fs_].UL], xRegister32(regt));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -311,12 +331,33 @@ REC_FPUFUNC(RSQRT_S);
|
|||
// Clamp Functions (Converts NaN's and Infinities to Normal Numbers)
|
||||
//------------------------------------------------------------------
|
||||
|
||||
alignas(16) static u64 FPU_FLOAT_TEMP[2];
|
||||
static int fpuCopyToTempForClamp(int fpureg, int xmmreg)
|
||||
{
|
||||
if (FPUINST_USEDTEST(fpureg))
|
||||
{
|
||||
const int tempreg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSS(xRegisterSSE(tempreg), xRegisterSSE(xmmreg));
|
||||
return tempreg;
|
||||
}
|
||||
|
||||
// flush back the original value, before we mess with it below
|
||||
if (FPUINST_LIVETEST(fpureg))
|
||||
_flushXMMreg(xmmreg);
|
||||
|
||||
// turn it into a temp, so in case the liveness was incorrect, we don't reuse it after clamp
|
||||
_reallocateXMMreg(xmmreg, XMMTYPE_TEMP, 0, 0, true);
|
||||
return xmmreg;
|
||||
}
|
||||
|
||||
static void fpuFreeIfTemp(int xmmreg)
|
||||
{
|
||||
if (xmmregs[xmmreg].inuse && xmmregs[xmmreg].type == XMMTYPE_TEMP)
|
||||
_freeXMMreg(xmmreg);
|
||||
}
|
||||
|
||||
__fi void fpuFloat3(int regd) // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax
|
||||
{
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
if (t1reg >= 0)
|
||||
{
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSS(xRegisterSSE(t1reg), xRegisterSSE(regd));
|
||||
xAND.PS(xRegisterSSE(t1reg), ptr[&s_neg[0]]);
|
||||
xMIN.SS(xRegisterSSE(regd), ptr[&g_maxvals[0]]);
|
||||
|
@ -324,19 +365,6 @@ __fi void fpuFloat3(int regd) // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -I
|
|||
xOR.PS(xRegisterSSE(regd), xRegisterSSE(t1reg));
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.Error("fpuFloat2() allocation error");
|
||||
t1reg = (regd == 0) ? 1 : 0; // get a temp reg thats not regd
|
||||
xMOVAPS(ptr[&FPU_FLOAT_TEMP[0]], xRegisterSSE(t1reg)); // backup data in t1reg to a temp address
|
||||
xMOVSS(xRegisterSSE(t1reg), xRegisterSSE(regd));
|
||||
xAND.PS(xRegisterSSE(t1reg), ptr[&s_neg[0]]);
|
||||
xMIN.SS(xRegisterSSE(regd), ptr[&g_maxvals[0]]);
|
||||
xMAX.SS(xRegisterSSE(regd), ptr[&g_minvals[0]]);
|
||||
xOR.PS(xRegisterSSE(regd), xRegisterSSE(t1reg));
|
||||
xMOVAPS(xRegisterSSE(t1reg), ptr[&FPU_FLOAT_TEMP[0]]); // restore t1reg data
|
||||
}
|
||||
}
|
||||
|
||||
__fi void fpuFloat(int regd) // +/-NaN -> +fMax, +Inf -> +fMax, -Inf -> -fMax
|
||||
{
|
||||
|
@ -396,34 +424,31 @@ FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED | XMMINFO_READS);
|
|||
//------------------------------------------------------------------
|
||||
void FPU_ADD_SUB(int regd, int regt, int issub)
|
||||
{
|
||||
int tempecx = _allocX86reg(ecx, X86TYPE_TEMP, 0, 0); //receives regd
|
||||
int temp2 = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); //receives regt
|
||||
int xmmtemp = _allocTempXMMreg(XMMT_FPS, -1); //temporary for anding with regd/regt
|
||||
|
||||
xMOVD(xRegister32(tempecx), xRegisterSSE(regd));
|
||||
xMOVD(xRegister32(temp2), xRegisterSSE(regt));
|
||||
const int xmmtemp = _allocTempXMMreg(XMMT_FPS); //temporary for anding with regd/regt
|
||||
xMOVD(ecx, xRegisterSSE(regd)); // ecx receives regd
|
||||
xMOVD(eax, xRegisterSSE(regt)); // eax receives regt
|
||||
|
||||
//mask the exponents
|
||||
xSHR(xRegister32(tempecx), 23);
|
||||
xSHR(xRegister32(temp2), 23);
|
||||
xAND(xRegister32(tempecx), 0xff);
|
||||
xAND(xRegister32(temp2), 0xff);
|
||||
xSHR(ecx, 23);
|
||||
xSHR(eax, 23);
|
||||
xAND(ecx, 0xff);
|
||||
xAND(eax, 0xff);
|
||||
|
||||
xSUB(xRegister32(tempecx), xRegister32(temp2)); //tempecx = exponent difference
|
||||
xCMP(xRegister32(tempecx), 25);
|
||||
xSUB(ecx, eax); //tempecx = exponent difference
|
||||
xCMP(ecx, 25);
|
||||
j8Ptr[0] = JGE8(0);
|
||||
xCMP(xRegister32(tempecx), 0);
|
||||
xCMP(ecx, 0);
|
||||
j8Ptr[1] = JG8(0);
|
||||
j8Ptr[2] = JE8(0);
|
||||
xCMP(xRegister32(tempecx), -25);
|
||||
xCMP(ecx, -25);
|
||||
j8Ptr[3] = JLE8(0);
|
||||
|
||||
//diff = -24 .. -1 , expd < expt
|
||||
xNEG(xRegister32(tempecx));
|
||||
xDEC(xRegister32(tempecx));
|
||||
xMOV(xRegister32(temp2), 0xffffffff);
|
||||
xSHL(xRegister32(temp2), cl); //temp2 = 0xffffffff << tempecx
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), xRegister32(temp2));
|
||||
xNEG(ecx);
|
||||
xDEC(ecx);
|
||||
xMOV(eax, 0xffffffff);
|
||||
xSHL(eax, cl); //temp2 = 0xffffffff << tempecx
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), eax);
|
||||
xAND.PS(xRegisterSSE(regd), xRegisterSSE(xmmtemp));
|
||||
if (issub)
|
||||
xSUB.SS(xRegisterSSE(regd), xRegisterSSE(regt));
|
||||
|
@ -443,10 +468,10 @@ void FPU_ADD_SUB(int regd, int regt, int issub)
|
|||
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
//diff = 1 .. 24, expt < expd
|
||||
xDEC(xRegister32(tempecx));
|
||||
xMOV(xRegister32(temp2), 0xffffffff);
|
||||
xSHL(xRegister32(temp2), cl); //temp2 = 0xffffffff << tempecx
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), xRegister32(temp2));
|
||||
xDEC(ecx);
|
||||
xMOV(eax, 0xffffffff);
|
||||
xSHL(eax, cl); //temp2 = 0xffffffff << tempecx
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), eax);
|
||||
xAND.PS(xRegisterSSE(xmmtemp), xRegisterSSE(regt));
|
||||
if (issub)
|
||||
xSUB.SS(xRegisterSSE(regd), xRegisterSSE(xmmtemp));
|
||||
|
@ -476,8 +501,6 @@ void FPU_ADD_SUB(int regd, int regt, int issub)
|
|||
x86SetJ8(j8Ptr[7]);
|
||||
|
||||
_freeXMMreg(xmmtemp);
|
||||
_freeX86reg(temp2);
|
||||
_freeX86reg(tempecx);
|
||||
}
|
||||
|
||||
void FPU_ADD(int regd, int regt)
|
||||
|
@ -550,7 +573,7 @@ static void (*recComOpXMM_to_XMM_REV[])(x86SSERegType, x86SSERegType) = { //reve
|
|||
|
||||
int recCommutativeOp(int info, int regd, int op)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
|
||||
switch (info & (PROCESS_EE_S | PROCESS_EE_T))
|
||||
{
|
||||
|
@ -667,7 +690,7 @@ FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC | XMMINFO_READS | XMMINFO_READT)
|
|||
|
||||
static void _setupBranchTest()
|
||||
{
|
||||
_eeFlushAllUnused();
|
||||
_eeFlushAllDirty();
|
||||
|
||||
// COP1 branch conditionals are based on the following equation:
|
||||
// (fpuRegs.fprc[31] & 0x00800000)
|
||||
|
@ -680,29 +703,35 @@ static void _setupBranchTest()
|
|||
void recBC1F()
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::BC1F);
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
const bool swap = TrySwapDelaySlot(0, 0, 0);
|
||||
_setupBranchTest();
|
||||
recDoBranchImm(JNZ32(0));
|
||||
recDoBranchImm(branchTo, JNZ32(0), false, swap);
|
||||
}
|
||||
|
||||
void recBC1T()
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::BC1T);
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
const bool swap = TrySwapDelaySlot(0, 0, 0);
|
||||
_setupBranchTest();
|
||||
recDoBranchImm(JZ32(0));
|
||||
recDoBranchImm(branchTo, JZ32(0), false, swap);
|
||||
}
|
||||
|
||||
void recBC1FL()
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::BC1FL);
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
_setupBranchTest();
|
||||
recDoBranchImm_Likely(JNZ32(0));
|
||||
recDoBranchImm(branchTo, JNZ32(0), true, false);
|
||||
}
|
||||
|
||||
void recBC1TL()
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::BC1TL);
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
_setupBranchTest();
|
||||
recDoBranchImm_Likely(JZ32(0));
|
||||
recDoBranchImm(branchTo, JZ32(0), true, false);
|
||||
}
|
||||
//------------------------------------------------------------------
|
||||
|
||||
|
@ -713,49 +742,62 @@ void recBC1TL()
|
|||
void recC_EQ_xmm(int info)
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::CEQ_F);
|
||||
int tempReg;
|
||||
int t0reg;
|
||||
|
||||
//Console.WriteLn("recC_EQ_xmm()");
|
||||
|
||||
switch (info & (PROCESS_EE_S | PROCESS_EE_T))
|
||||
{
|
||||
case PROCESS_EE_S:
|
||||
fpuFloat3(EEREC_S);
|
||||
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
if (t0reg >= 0)
|
||||
{
|
||||
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
|
||||
fpuFloat3(regs);
|
||||
|
||||
const int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
fpuFloat3(t0reg);
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg));
|
||||
|
||||
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(t0reg));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
fpuFreeIfTemp(regs);
|
||||
}
|
||||
else
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
break;
|
||||
|
||||
case PROCESS_EE_T:
|
||||
fpuFloat3(EEREC_T);
|
||||
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
if (t0reg >= 0)
|
||||
{
|
||||
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
|
||||
fpuFloat3(regt);
|
||||
|
||||
const int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
fpuFloat3(t0reg);
|
||||
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
|
||||
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(regt));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
fpuFreeIfTemp(regt);
|
||||
}
|
||||
else
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
break;
|
||||
|
||||
case (PROCESS_EE_S | PROCESS_EE_T):
|
||||
fpuFloat3(EEREC_S);
|
||||
fpuFloat3(EEREC_T);
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_T));
|
||||
{
|
||||
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
|
||||
fpuFloat3(regs);
|
||||
|
||||
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
|
||||
fpuFloat3(regt);
|
||||
|
||||
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(regt));
|
||||
|
||||
fpuFreeIfTemp(regs);
|
||||
fpuFreeIfTemp(regt);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
Console.WriteLn(Color_Magenta, "recC_EQ_xmm: Default");
|
||||
tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
xMOV(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
xMOV(eax, ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xCMP(eax, ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
|
||||
j8Ptr[0] = JZ8(0);
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
|
||||
|
@ -763,9 +805,6 @@ void recC_EQ_xmm(int info)
|
|||
x86SetJ8(j8Ptr[0]);
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
|
||||
if (tempReg >= 0)
|
||||
_freeX86reg(tempReg);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -790,59 +829,62 @@ void recC_F()
|
|||
void recC_LE_xmm(int info)
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::CLE_F);
|
||||
int tempReg; //tempX86reg
|
||||
int t0reg; //tempXMMreg
|
||||
|
||||
//Console.WriteLn("recC_LE_xmm()");
|
||||
|
||||
switch (info & (PROCESS_EE_S | PROCESS_EE_T))
|
||||
{
|
||||
case PROCESS_EE_S:
|
||||
fpuFloat3(EEREC_S);
|
||||
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
if (t0reg >= 0)
|
||||
{
|
||||
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
|
||||
fpuFloat3(regs);
|
||||
|
||||
const int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
fpuFloat3(t0reg);
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg));
|
||||
|
||||
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(t0reg));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
fpuFreeIfTemp(regs);
|
||||
}
|
||||
else
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
break;
|
||||
|
||||
case PROCESS_EE_T:
|
||||
fpuFloat3(EEREC_T);
|
||||
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
if (t0reg >= 0)
|
||||
{
|
||||
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
|
||||
fpuFloat3(regt);
|
||||
|
||||
const int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
fpuFloat3(t0reg);
|
||||
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
|
||||
j8Ptr[0] = JAE8(0);
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
|
||||
j8Ptr[1] = JMP8(0);
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
return;
|
||||
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(regt));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
fpuFreeIfTemp(regt);
|
||||
}
|
||||
break;
|
||||
|
||||
case (PROCESS_EE_S | PROCESS_EE_T):
|
||||
fpuFloat3(EEREC_S);
|
||||
fpuFloat3(EEREC_T);
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_T));
|
||||
{
|
||||
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
|
||||
fpuFloat3(regs);
|
||||
|
||||
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
|
||||
fpuFloat3(regt);
|
||||
|
||||
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(regt));
|
||||
|
||||
fpuFreeIfTemp(regs);
|
||||
fpuFreeIfTemp(regt);
|
||||
}
|
||||
break;
|
||||
|
||||
default: // Untested and incorrect, but this case is never reached AFAIK (cottonvibes)
|
||||
Console.WriteLn(Color_Magenta, "recC_LE_xmm: Default");
|
||||
tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
xMOV(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
xMOV(eax, ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xCMP(eax, ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
|
||||
j8Ptr[0] = JLE8(0);
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
|
||||
|
@ -850,9 +892,6 @@ void recC_LE_xmm(int info)
|
|||
x86SetJ8(j8Ptr[0]);
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
|
||||
if (tempReg >= 0)
|
||||
_freeX86reg(tempReg);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -870,61 +909,62 @@ FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS | XMMINFO_READT);
|
|||
void recC_LT_xmm(int info)
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::CLT_F);
|
||||
int tempReg;
|
||||
int t0reg;
|
||||
|
||||
//Console.WriteLn("recC_LT_xmm()");
|
||||
|
||||
switch (info & (PROCESS_EE_S | PROCESS_EE_T))
|
||||
{
|
||||
case PROCESS_EE_S:
|
||||
fpuFloat3(EEREC_S);
|
||||
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
if (t0reg >= 0)
|
||||
{
|
||||
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
|
||||
fpuFloat3(regs);
|
||||
|
||||
const int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
fpuFloat3(t0reg);
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg));
|
||||
|
||||
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(t0reg));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
fpuFreeIfTemp(regs);
|
||||
}
|
||||
else
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
break;
|
||||
|
||||
case PROCESS_EE_T:
|
||||
fpuFloat3(EEREC_T);
|
||||
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
if (t0reg >= 0)
|
||||
{
|
||||
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
|
||||
fpuFloat3(regt);
|
||||
|
||||
const int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
fpuFloat3(t0reg);
|
||||
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
|
||||
j8Ptr[0] = JA8(0);
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
|
||||
j8Ptr[1] = JMP8(0);
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
return;
|
||||
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(regt));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
fpuFreeIfTemp(regt);
|
||||
}
|
||||
break;
|
||||
|
||||
case (PROCESS_EE_S | PROCESS_EE_T):
|
||||
// Clamp NaNs
|
||||
// Note: This fixes a crash in Rule of Rose.
|
||||
fpuFloat3(EEREC_S);
|
||||
fpuFloat3(EEREC_T);
|
||||
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_T));
|
||||
{
|
||||
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
|
||||
fpuFloat3(regs);
|
||||
|
||||
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
|
||||
fpuFloat3(regt);
|
||||
|
||||
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(regt));
|
||||
|
||||
fpuFreeIfTemp(regs);
|
||||
fpuFreeIfTemp(regt);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
Console.WriteLn(Color_Magenta, "recC_LT_xmm: Default");
|
||||
tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
xMOV(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
xMOV(eax, ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xCMP(eax, ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
|
||||
j8Ptr[0] = JL8(0);
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
|
||||
|
@ -932,9 +972,6 @@ void recC_LT_xmm(int info)
|
|||
x86SetJ8(j8Ptr[0]);
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
|
||||
if (tempReg >= 0)
|
||||
_freeX86reg(tempReg);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -957,13 +994,19 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT);
|
|||
void recCVT_S_xmm(int info)
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::CVTS_F);
|
||||
if (!(info & PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info & PROCESS_EE_MODEWRITES)))
|
||||
if (info & PROCESS_EE_D)
|
||||
{
|
||||
if (info & PROCESS_EE_S)
|
||||
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else
|
||||
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
const int temp = _allocTempXMMreg(XMMT_FPS);
|
||||
xCVTSI2SS(xRegisterSSE(temp), ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
xMOVSS(ptr32[&fpuRegs.fpr[_Fd_]], xRegisterSSE(temp));
|
||||
_freeXMMreg(temp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -998,7 +1041,7 @@ void recCVT_W()
|
|||
}
|
||||
|
||||
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
|
||||
_deleteFPtoXMMreg(_Fd_, 2);
|
||||
_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
|
||||
xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
|
||||
|
||||
|
@ -1018,23 +1061,22 @@ void recDIVhelper1(int regd, int regt) // Sets flags
|
|||
{
|
||||
u8 *pjmp1, *pjmp2;
|
||||
u32 *ajmp32, *bjmp32;
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS);
|
||||
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
|
||||
|
||||
/*--- Check for divide by zero ---*/
|
||||
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
|
||||
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt));
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set)
|
||||
xMOVMSKPS(eax, xRegisterSSE(t1reg));
|
||||
xAND(eax, 1); //Check sign (if regt == zero, sign will be set)
|
||||
ajmp32 = JZ32(0); //Skip if not set
|
||||
|
||||
/*--- Check for 0/0 ---*/
|
||||
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
|
||||
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd));
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set)
|
||||
xMOVMSKPS(eax, xRegisterSSE(t1reg));
|
||||
xAND(eax, 1); //Check sign (if regd == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 )
|
||||
pjmp2 = JMP8(0);
|
||||
|
@ -1059,7 +1101,6 @@ void recDIVhelper1(int regd, int regt) // Sets flags
|
|||
x86SetJ32(bjmp32);
|
||||
|
||||
_freeXMMreg(t1reg);
|
||||
_freeX86reg(tempReg);
|
||||
}
|
||||
|
||||
void recDIVhelper2(int regd, int regt) // Doesn't sets flags
|
||||
|
@ -1075,7 +1116,7 @@ void recDIV_S_xmm(int info)
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::DIV_F);
|
||||
bool roundmodeFlag = false;
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
//Console.WriteLn("DIV");
|
||||
|
||||
if (CHECK_FPUNEGDIVHACK)
|
||||
|
@ -1181,7 +1222,7 @@ FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
|
|||
//------------------------------------------------------------------
|
||||
void recMADDtemp(int info, int regd)
|
||||
{
|
||||
const int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
const int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
|
||||
switch (info & (PROCESS_EE_S | PROCESS_EE_T))
|
||||
{
|
||||
|
@ -1203,7 +1244,7 @@ void recMADDtemp(int info, int regd)
|
|||
FPU_ADD(regd, t0reg);
|
||||
}
|
||||
}
|
||||
else if (regd == EEREC_ACC)
|
||||
else if ((info & PROCESS_EE_ACC) && regd == EEREC_ACC)
|
||||
{
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); }
|
||||
|
@ -1306,7 +1347,7 @@ void recMADDtemp(int info, int regd)
|
|||
FPU_ADD(regd, t0reg);
|
||||
}
|
||||
}
|
||||
else if (regd == EEREC_ACC)
|
||||
else if ((info & PROCESS_EE_ACC) && regd == EEREC_ACC)
|
||||
{
|
||||
xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); }
|
||||
|
@ -1335,7 +1376,7 @@ void recMADDtemp(int info, int regd)
|
|||
default:
|
||||
if (regd == EEREC_ACC)
|
||||
{
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xMOVSSZX(xRegisterSSE(t1reg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); }
|
||||
|
@ -1433,7 +1474,7 @@ FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED | XMMINFO_READS);
|
|||
//------------------------------------------------------------------
|
||||
void recMSUBtemp(int info, int regd)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
|
||||
switch (info & (PROCESS_EE_S | PROCESS_EE_T))
|
||||
{
|
||||
|
@ -1559,7 +1600,7 @@ void recMSUBtemp(int info, int regd)
|
|||
default:
|
||||
if (regd == EEREC_ACC)
|
||||
{
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS);
|
||||
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
|
||||
xMOVSSZX(xRegisterSSE(t1reg), ptr[&fpuRegs.fpr[_Ft_]]);
|
||||
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); }
|
||||
|
@ -1663,7 +1704,7 @@ void recSUBhelper(int regd, int regt)
|
|||
|
||||
void recSUBop(int info, int regd)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
|
||||
//xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags
|
||||
|
||||
|
@ -1761,19 +1802,15 @@ void recSQRT_S_xmm(int info)
|
|||
|
||||
if (CHECK_FPU_EXTRA_FLAGS)
|
||||
{
|
||||
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
|
||||
|
||||
/*--- Check for negative SQRT ---*/
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(EEREC_D));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign
|
||||
xMOVMSKPS(eax, xRegisterSSE(EEREC_D));
|
||||
xAND(eax, 1); //Check sign
|
||||
u8* pjmp = JZ8(0); //Skip if none are
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags
|
||||
xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive
|
||||
x86SetJ8(pjmp);
|
||||
|
||||
_freeX86reg(tempReg);
|
||||
}
|
||||
else
|
||||
xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive
|
||||
|
@ -1800,14 +1837,13 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
|
|||
u8 *pjmp1, *pjmp2;
|
||||
u32 *pjmp32;
|
||||
u8 *qjmp1, *qjmp2;
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS);
|
||||
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
|
||||
|
||||
/*--- (first) Check for negative SQRT ---*/
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t0reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign
|
||||
xMOVMSKPS(eax, xRegisterSSE(t0reg));
|
||||
xAND(eax, 1); //Check sign
|
||||
pjmp2 = JZ8(0); //Skip if not set
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags
|
||||
xAND.PS(xRegisterSSE(t0reg), ptr[&s_pos[0]]); // Make t0reg Positive
|
||||
|
@ -1816,14 +1852,14 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
|
|||
/*--- Check for zero ---*/
|
||||
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
|
||||
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(t0reg));
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign (if t0reg == zero, sign will be set)
|
||||
xMOVMSKPS(eax, xRegisterSSE(t1reg));
|
||||
xAND(eax, 1); //Check sign (if t0reg == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
/*--- Check for 0/0 ---*/
|
||||
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
|
||||
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd));
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set)
|
||||
xMOVMSKPS(eax, xRegisterSSE(t1reg));
|
||||
xAND(eax, 1); //Check sign (if regd == zero, sign will be set)
|
||||
qjmp1 = JZ8(0); //Skip if not set
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 )
|
||||
qjmp2 = JMP8(0);
|
||||
|
@ -1850,7 +1886,6 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
|
|||
x86SetJ32(pjmp32);
|
||||
|
||||
_freeXMMreg(t1reg);
|
||||
_freeX86reg(tempReg);
|
||||
}
|
||||
|
||||
void recRSQRThelper2(int regd, int t0reg) // Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags)
|
||||
|
@ -1872,7 +1907,7 @@ void recRSQRT_S_xmm(int info)
|
|||
// iFPUd (Full mode) sets roundmode to nearest for rSQRT.
|
||||
// Should this do the same, or should Full mode leave roundmode alone? --air
|
||||
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_FPS);
|
||||
//Console.WriteLn("FPU: RSQRT");
|
||||
|
||||
switch (info & (PROCESS_EE_S | PROCESS_EE_T))
|
||||
|
|
|
@ -288,7 +288,7 @@ void SetMaxValue(int regd)
|
|||
|
||||
#define ALLOC_S(sreg) \
|
||||
do { \
|
||||
(sreg) = _allocTempXMMreg(XMMT_FPS, -1); \
|
||||
(sreg) = _allocTempXMMreg(XMMT_FPS); \
|
||||
GET_S(sreg); \
|
||||
} while (0)
|
||||
|
||||
|
@ -302,7 +302,7 @@ void SetMaxValue(int regd)
|
|||
|
||||
#define ALLOC_T(treg) \
|
||||
do { \
|
||||
(treg) = _allocTempXMMreg(XMMT_FPS, -1); \
|
||||
(treg) = _allocTempXMMreg(XMMT_FPS); \
|
||||
GET_T(treg); \
|
||||
} while (0)
|
||||
|
||||
|
@ -316,7 +316,7 @@ void SetMaxValue(int regd)
|
|||
|
||||
#define ALLOC_ACC(areg) \
|
||||
do { \
|
||||
(areg) = _allocTempXMMreg(XMMT_FPS, -1); \
|
||||
(areg) = _allocTempXMMreg(XMMT_FPS); \
|
||||
GET_ACC(areg); \
|
||||
} while (0)
|
||||
|
||||
|
@ -355,34 +355,31 @@ FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED | XMMINFO_READS);
|
|||
//------------------------------------------------------------------
|
||||
void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they are floats
|
||||
{
|
||||
int tempecx = _allocX86reg(ecx, X86TYPE_TEMP, 0, 0); //receives regd
|
||||
int temp2 = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); //receives regt
|
||||
int xmmtemp = _allocTempXMMreg(XMMT_FPS, -1); //temporary for anding with regd/regt
|
||||
|
||||
xMOVD(xRegister32(tempecx), xRegisterSSE(tempd));
|
||||
xMOVD(xRegister32(temp2), xRegisterSSE(tempt));
|
||||
const int xmmtemp = _allocTempXMMreg(XMMT_FPS); //temporary for anding with regd/regt
|
||||
xMOVD(ecx, xRegisterSSE(tempd)); //receives regd
|
||||
xMOVD(eax, xRegisterSSE(tempt)); //receives regt
|
||||
|
||||
//mask the exponents
|
||||
xSHR(xRegister32(tempecx), 23);
|
||||
xSHR(xRegister32(temp2), 23);
|
||||
xAND(xRegister32(tempecx), 0xff);
|
||||
xAND(xRegister32(temp2), 0xff);
|
||||
xSHR(ecx, 23);
|
||||
xSHR(eax, 23);
|
||||
xAND(ecx, 0xff);
|
||||
xAND(eax, 0xff);
|
||||
|
||||
xSUB(xRegister32(tempecx), xRegister32(temp2)); //tempecx = exponent difference
|
||||
xCMP(xRegister32(tempecx), 25);
|
||||
xSUB(ecx, eax); //tempecx = exponent difference
|
||||
xCMP(ecx, 25);
|
||||
j8Ptr[0] = JGE8(0);
|
||||
xCMP(xRegister32(tempecx), 0);
|
||||
xCMP(ecx, 0);
|
||||
j8Ptr[1] = JG8(0);
|
||||
j8Ptr[2] = JE8(0);
|
||||
xCMP(xRegister32(tempecx), -25);
|
||||
xCMP(ecx, -25);
|
||||
j8Ptr[3] = JLE8(0);
|
||||
|
||||
//diff = -24 .. -1 , expd < expt
|
||||
xNEG(xRegister32(tempecx));
|
||||
xDEC(xRegister32(tempecx));
|
||||
xMOV(xRegister32(temp2), 0xffffffff);
|
||||
xSHL(xRegister32(temp2), cl); //temp2 = 0xffffffff << tempecx
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), xRegister32(temp2));
|
||||
xNEG(ecx);
|
||||
xDEC(ecx);
|
||||
xMOV(eax, 0xffffffff);
|
||||
xSHL(eax, cl); //temp2 = 0xffffffff << tempecx
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), eax);
|
||||
xAND.PS(xRegisterSSE(tempd), xRegisterSSE(xmmtemp));
|
||||
j8Ptr[4] = JMP8(0);
|
||||
|
||||
|
@ -393,10 +390,10 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a
|
|||
|
||||
x86SetJ8(j8Ptr[1]);
|
||||
//diff = 1 .. 24, expt < expd
|
||||
xDEC(xRegister32(tempecx));
|
||||
xMOV(xRegister32(temp2), 0xffffffff);
|
||||
xSHL(xRegister32(temp2), cl); //temp2 = 0xffffffff << tempecx
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), xRegister32(temp2));
|
||||
xDEC(ecx);
|
||||
xMOV(eax, 0xffffffff);
|
||||
xSHL(eax, cl); //temp2 = 0xffffffff << tempecx
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), eax);
|
||||
xAND.PS(xRegisterSSE(tempt), xRegisterSSE(xmmtemp));
|
||||
j8Ptr[6] = JMP8(0);
|
||||
|
||||
|
@ -412,8 +409,6 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a
|
|||
x86SetJ8(j8Ptr[6]);
|
||||
|
||||
_freeXMMreg(xmmtemp);
|
||||
_freeX86reg(temp2);
|
||||
_freeX86reg(tempecx);
|
||||
}
|
||||
|
||||
void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
|
||||
|
@ -554,10 +549,21 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT);
|
|||
void recCVT_S_xmm(int info)
|
||||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::CVTS_F);
|
||||
if (!(info & PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info & PROCESS_EE_MODEWRITES)))
|
||||
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
else
|
||||
|
||||
if (info & PROCESS_EE_D)
|
||||
{
|
||||
if (info & PROCESS_EE_S)
|
||||
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else
|
||||
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
const int temp = _allocTempXMMreg(XMMT_FPS);
|
||||
xCVTSI2SS(xRegisterSSE(temp), ptr32[&fpuRegs.fpr[_Fs_]]);
|
||||
xMOVSS(ptr32[&fpuRegs.fpr[_Fd_]], xRegisterSSE(temp));
|
||||
_freeXMMreg(temp);
|
||||
}
|
||||
}
|
||||
|
||||
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
|
||||
|
@ -581,7 +587,7 @@ void recCVT_W() //called from iFPU.cpp's recCVT_W
|
|||
}
|
||||
|
||||
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
|
||||
_deleteFPtoXMMreg(_Fd_, 2);
|
||||
_deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
|
||||
xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
|
||||
|
||||
|
@ -601,23 +607,22 @@ void recDIVhelper1(int regd, int regt) // Sets flags
|
|||
{
|
||||
u8 *pjmp1, *pjmp2;
|
||||
u32 *ajmp32, *bjmp32;
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS);
|
||||
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
|
||||
|
||||
//--- Check for divide by zero ---
|
||||
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
|
||||
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt));
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set)
|
||||
xMOVMSKPS(eax, xRegisterSSE(t1reg));
|
||||
xAND(eax, 1); //Check sign (if regt == zero, sign will be set)
|
||||
ajmp32 = JZ32(0); //Skip if not set
|
||||
|
||||
//--- Check for 0/0 ---
|
||||
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
|
||||
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd));
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set)
|
||||
xMOVMSKPS(eax, xRegisterSSE(t1reg));
|
||||
xAND(eax, 1); //Check sign (if regd == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 )
|
||||
pjmp2 = JMP8(0);
|
||||
|
@ -642,7 +647,6 @@ void recDIVhelper1(int regd, int regt) // Sets flags
|
|||
x86SetJ32(bjmp32);
|
||||
|
||||
_freeXMMreg(t1reg);
|
||||
_freeX86reg(tempReg);
|
||||
}
|
||||
|
||||
void recDIVhelper2(int regd, int regt) // Doesn't sets flags
|
||||
|
@ -951,8 +955,7 @@ void recSQRT_S_xmm(int info)
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::SQRT_F);
|
||||
int roundmodeFlag = 0;
|
||||
const int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
const int t1reg = _allocTempXMMreg(XMMT_FPS);
|
||||
//Console.WriteLn("FPU: SQRT");
|
||||
|
||||
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
|
||||
|
@ -972,8 +975,8 @@ void recSQRT_S_xmm(int info)
|
|||
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
|
||||
|
||||
//--- Check for negative SQRT --- (sqrt(-0) = 0, unlike what the docs say)
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(EEREC_D));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign
|
||||
xMOVMSKPS(eax, xRegisterSSE(EEREC_D));
|
||||
xAND(eax, 1); //Check sign
|
||||
u8* pjmp = JZ8(0); //Skip if none are
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags
|
||||
xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_const.pos[0]]); // Make EEREC_D Positive
|
||||
|
@ -994,7 +997,6 @@ void recSQRT_S_xmm(int info)
|
|||
if (roundmodeFlag == 1)
|
||||
xLDMXCSR(g_sseMXCSR);
|
||||
|
||||
_freeX86reg(tempReg);
|
||||
_freeXMMreg(t1reg);
|
||||
}
|
||||
|
||||
|
@ -1010,14 +1012,13 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
|
|||
u8 *pjmp1, *pjmp2;
|
||||
u8 *qjmp1, *qjmp2;
|
||||
u32* pjmp32;
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
||||
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
|
||||
int t1reg = _allocTempXMMreg(XMMT_FPS);
|
||||
|
||||
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
|
||||
|
||||
//--- (first) Check for negative SQRT ---
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(regt));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign
|
||||
xMOVMSKPS(eax, xRegisterSSE(regt));
|
||||
xAND(eax, 1); //Check sign
|
||||
pjmp2 = JZ8(0); //Skip if not set
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags
|
||||
xAND.PS(xRegisterSSE(regt), ptr[&s_const.pos[0]]); // Make regt Positive
|
||||
|
@ -1026,15 +1027,15 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
|
|||
//--- Check for zero ---
|
||||
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
|
||||
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt));
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set)
|
||||
xMOVMSKPS(eax, xRegisterSSE(t1reg));
|
||||
xAND(eax, 1); //Check sign (if regt == zero, sign will be set)
|
||||
pjmp1 = JZ8(0); //Skip if not set
|
||||
|
||||
//--- Check for 0/0 ---
|
||||
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
|
||||
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd));
|
||||
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg));
|
||||
xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set)
|
||||
xMOVMSKPS(eax, xRegisterSSE(t1reg));
|
||||
xAND(eax, 1); //Check sign (if regd == zero, sign will be set)
|
||||
qjmp1 = JZ8(0); //Skip if not set
|
||||
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 )
|
||||
qjmp2 = JMP8(0);
|
||||
|
@ -1055,7 +1056,6 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
|
|||
x86SetJ32(pjmp32);
|
||||
|
||||
_freeXMMreg(t1reg);
|
||||
_freeX86reg(tempReg);
|
||||
}
|
||||
|
||||
void recRSQRThelper2(int regd, int regt) // Preforms the RSQRT function when regd <- Fs and regt <- Ft (Doesn't set flags)
|
||||
|
|
|
@ -56,11 +56,14 @@ REC_FUNC_DEL(PSLLW, _Rd_);
|
|||
|
||||
void recPLZCW()
|
||||
{
|
||||
int regs = -1;
|
||||
int x86regs = -1;
|
||||
int xmmregs = -1;
|
||||
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
// TODO(Stenzek): Don't flush to memory at the end here. Careful of Rs == Rd.
|
||||
|
||||
EE::Profiler.EmitOp(eeOpcode::PLZCW);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
|
@ -78,16 +81,20 @@ void recPLZCW()
|
|||
|
||||
_eeOnWriteReg(_Rd_, 0);
|
||||
|
||||
if ((regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0)
|
||||
if ((xmmregs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0)
|
||||
{
|
||||
xMOVD(eax, xRegisterSSE(regs));
|
||||
xMOVD(eax, xRegisterSSE(xmmregs));
|
||||
}
|
||||
else if ((x86regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ)) >= 0)
|
||||
{
|
||||
xMOV(eax, xRegister32(x86regs));
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
}
|
||||
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
_deleteEEreg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
|
||||
// Count the number of leading bits (MSB) that match the sign bit, excluding the sign
|
||||
// bit itself.
|
||||
|
@ -115,11 +122,14 @@ void recPLZCW()
|
|||
|
||||
// second word
|
||||
|
||||
if (regs >= 0)
|
||||
if (xmmregs >= 0)
|
||||
{
|
||||
xPSHUF.D(xRegisterSSE(regs & 0xf), xRegisterSSE(regs & 0xf), 0xe1);
|
||||
xMOVD(eax, xRegisterSSE(regs & 0xf));
|
||||
xPSHUF.D(xRegisterSSE(regs & 0xf), xRegisterSSE(regs & 0xf), 0xe1);
|
||||
xPEXTR.D(eax, xRegisterSSE(xmmregs), 1);
|
||||
}
|
||||
else if (x86regs >= 0)
|
||||
{
|
||||
xMOV(rax, xRegister64(x86regs));
|
||||
xSHR(rax, 32);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -158,7 +168,7 @@ void recPMFHL()
|
|||
{
|
||||
case 0x00: // LW
|
||||
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0x88);
|
||||
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0x88);
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -167,7 +177,7 @@ void recPMFHL()
|
|||
break;
|
||||
|
||||
case 0x01: // UW
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xdd);
|
||||
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0xdd);
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -182,7 +192,7 @@ void recPMFHL()
|
|||
break;
|
||||
|
||||
case 0x03: // LH
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPSHUF.LW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0x88);
|
||||
xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0x88);
|
||||
xPSHUF.HW(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x88);
|
||||
|
@ -452,7 +462,7 @@ void recPPACW()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
if (EEREC_D == EEREC_T)
|
||||
{
|
||||
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S), 0x88);
|
||||
|
@ -492,7 +502,7 @@ void recPPACH()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPSHUF.LW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S), 0x88);
|
||||
xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
|
||||
xPSHUF.HW(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x88);
|
||||
|
@ -518,28 +528,19 @@ void recPPACB()
|
|||
int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED);
|
||||
if (_Rs_ == 0)
|
||||
{
|
||||
if (_hasFreeXMMreg())
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
const int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPSLL.W(xRegisterSSE(EEREC_D), 8);
|
||||
xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSRL.W(xRegisterSSE(EEREC_D), 8);
|
||||
xPACK.USWB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPSLL.W(xRegisterSSE(EEREC_D), 8);
|
||||
xPSRL.W(xRegisterSSE(EEREC_D), 8);
|
||||
xPACK.USWB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
|
||||
xPSRL.DQ(xRegisterSSE(EEREC_D), 8);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
const int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
|
@ -563,8 +564,8 @@ void recPEXT5()
|
|||
EE::Profiler.EmitOp(eeOpcode::PEXT5);
|
||||
|
||||
int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); // for bit 5..9
|
||||
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); // for bit 15
|
||||
|
@ -602,8 +603,8 @@ void recPPAC5()
|
|||
EE::Profiler.EmitOp(eeOpcode::PPAC5);
|
||||
|
||||
int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); // for bit 10..14
|
||||
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); // for bit 15
|
||||
|
@ -671,7 +672,7 @@ void recPCGTB()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPCMP.GTB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -696,7 +697,7 @@ void recPCGTH()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPCMP.GTW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -722,7 +723,7 @@ void recPCGTW()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPCMP.GTD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -783,9 +784,9 @@ void recPADDSW()
|
|||
EE::Profiler.EmitOp(eeOpcode::PADDSW);
|
||||
|
||||
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t2reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
// The idea is:
|
||||
// s = x + y; (wrap-arounded)
|
||||
|
@ -843,7 +844,7 @@ void recPSUBSB()
|
|||
xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -870,7 +871,7 @@ void recPSUBSH()
|
|||
xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -894,9 +895,9 @@ void recPSUBSW()
|
|||
EE::Profiler.EmitOp(eeOpcode::PSUBSW);
|
||||
|
||||
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t2reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
// The idea is:
|
||||
// s = x - y; (wrap-arounded)
|
||||
|
@ -1050,7 +1051,7 @@ void recPSUBB()
|
|||
xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1077,7 +1078,7 @@ void recPSUBH()
|
|||
xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1104,7 +1105,7 @@ void recPSUBW()
|
|||
xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1138,7 +1139,7 @@ void recPEXTLW()
|
|||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else if (EEREC_D == EEREC_S)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1172,7 +1173,7 @@ void recPEXTLB()
|
|||
xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else if (EEREC_D == EEREC_S)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1206,7 +1207,7 @@ void recPEXTLH()
|
|||
xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else if (EEREC_D == EEREC_S)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1264,7 +1265,7 @@ void recPABSW() //needs clamping
|
|||
EE::Profiler.EmitOp(eeOpcode::PABSW);
|
||||
|
||||
int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSLL.D(xRegisterSSE(t0reg), 31);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffffffff if equal to 0x80000000
|
||||
|
@ -1284,7 +1285,7 @@ void recPABSH()
|
|||
EE::Profiler.EmitOp(eeOpcode::PABSH);
|
||||
|
||||
int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSLL.W(xRegisterSSE(t0reg), 15);
|
||||
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffff if equal to 0x8000
|
||||
|
@ -1337,7 +1338,7 @@ void recPADSBH()
|
|||
}
|
||||
else
|
||||
{
|
||||
const int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
const int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
|
||||
|
@ -1387,8 +1388,8 @@ void recPADDUW()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xPCMP.EQB(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSLL.D(xRegisterSSE(t0reg), 31); // 0x80000000
|
||||
|
@ -1432,7 +1433,7 @@ void recPSUBUB()
|
|||
xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1459,7 +1460,7 @@ void recPSUBUH()
|
|||
xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1482,8 +1483,8 @@ void recPSUBUW()
|
|||
EE::Profiler.EmitOp(eeOpcode::PSUBUW);
|
||||
|
||||
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xPCMP.EQB(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSLL.D(xRegisterSSE(t0reg), 31); // 0x80000000
|
||||
|
@ -1545,7 +1546,7 @@ void recPEXTUH()
|
|||
xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else if (EEREC_D == EEREC_S)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1614,7 +1615,7 @@ void recPEXTUB()
|
|||
xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else if (EEREC_D == EEREC_S)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1649,7 +1650,7 @@ void recPEXTUW()
|
|||
xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
else if (EEREC_D == EEREC_S)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
|
@ -1910,8 +1911,8 @@ void recPSLLVW()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
// shamt is 5-bit
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
|
@ -1967,8 +1968,8 @@ void recPSRLVW()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
// shamt is 5-bit
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
|
@ -2134,7 +2135,7 @@ void recPHMADH()
|
|||
EE::Profiler.EmitOp(eeOpcode::PHMADH);
|
||||
|
||||
int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xPSRL.D(xRegisterSSE(t0reg), 16);
|
||||
|
@ -2181,8 +2182,8 @@ void recPMSUBH()
|
|||
EE::Profiler.EmitOp(eeOpcode::PMSUBH);
|
||||
|
||||
int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
if (!_Rd_)
|
||||
{
|
||||
|
@ -2247,7 +2248,7 @@ void recPHMSBH()
|
|||
EE::Profiler.EmitOp(eeOpcode::PHMSBH);
|
||||
|
||||
int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xPCMP.EQD(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO));
|
||||
xPSRL.D(xRegisterSSE(EEREC_LO), 16);
|
||||
|
@ -2316,7 +2317,7 @@ void recPINTH()
|
|||
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
if (EEREC_D == EEREC_S)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVHL.PS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
if (EEREC_D != EEREC_T)
|
||||
xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
|
@ -2360,7 +2361,7 @@ void recPMULTH()
|
|||
EE::Profiler.EmitOp(eeOpcode::PMULTH);
|
||||
|
||||
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S));
|
||||
|
@ -2506,8 +2507,8 @@ void recPMADDH()
|
|||
EE::Profiler.EmitOp(eeOpcode::PMADDH);
|
||||
|
||||
int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
if (!_Rd_)
|
||||
{
|
||||
|
@ -2616,8 +2617,8 @@ void recPSRAVW()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
// shamt is 5-bit
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
|
@ -2699,7 +2700,7 @@ void recPINTEH()
|
|||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
pxAssert(EEREC_D != EEREC_S);
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPSLL.D(xRegisterSSE(EEREC_D), 16);
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xPSRL.D(xRegisterSSE(EEREC_D), 16);
|
||||
|
@ -2708,7 +2709,7 @@ void recPINTEH()
|
|||
}
|
||||
else
|
||||
{
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
|
||||
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
xPSLL.D(xRegisterSSE(t0reg), 16);
|
||||
|
@ -2767,7 +2768,7 @@ void recPMULTUW()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8);
|
||||
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg));
|
||||
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg));
|
||||
|
@ -2833,7 +2834,7 @@ void recPMADDUW()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8);
|
||||
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg));
|
||||
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg));
|
||||
|
@ -2902,7 +2903,7 @@ void recPNOR()
|
|||
{
|
||||
if (EEREC_D == EEREC_T)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
_freeXMMreg(t0reg);
|
||||
|
@ -2919,7 +2920,7 @@ void recPNOR()
|
|||
{
|
||||
if (EEREC_D == EEREC_S)
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
|
||||
_freeXMMreg(t0reg);
|
||||
|
@ -2932,7 +2933,7 @@ void recPNOR()
|
|||
}
|
||||
else
|
||||
{
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
if (EEREC_D == EEREC_S)
|
||||
xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
|
||||
|
|
|
@ -104,6 +104,7 @@ static EEINST* s_psaveInstInfo = NULL;
|
|||
|
||||
u32 s_psxBlockCycles = 0; // cycles of current block recompiling
|
||||
static u32 s_savenBlockCycles = 0;
|
||||
static bool s_recompilingDelaySlot = false;
|
||||
|
||||
static void iPsxBranchTest(u32 newpc, u32 cpuBranch);
|
||||
void psxRecompileNextInstruction(int delayslot);
|
||||
|
@ -119,7 +120,58 @@ static u32 psxdump = 0;
|
|||
|
||||
#define PSXREC_CLEARM(mem) \
|
||||
(((mem) < g_psxMaxRecMem && (psxRecLUT[(mem) >> 16] + (mem))) ? \
|
||||
psxRecClearMem(mem) : 4)
|
||||
psxRecClearMem(mem) : \
|
||||
4)
|
||||
|
||||
#ifdef DUMP_BLOCKS
|
||||
static ZydisFormatterFunc s_old_print_address;
|
||||
|
||||
static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* formatter,
|
||||
ZydisFormatterBuffer* buffer, ZydisFormatterContext* context)
|
||||
{
|
||||
ZyanU64 address;
|
||||
ZYAN_CHECK(ZydisCalcAbsoluteAddress(context->instruction, context->operand,
|
||||
context->runtime_address, &address));
|
||||
|
||||
char buf[128];
|
||||
u32 len = 0;
|
||||
|
||||
#define A(x) ((u64)(x))
|
||||
|
||||
if (address >= A(iopMem->Main) && address < A(iopMem->P))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "iopMem+0x%08X", static_cast<u32>(address - A(iopMem->Main)));
|
||||
}
|
||||
else if (address >= A(&psxRegs.GPR) && address < A(&psxRegs.CP0))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "psxRegs.GPR.%s", R3000A::disRNameGPR[static_cast<u32>(address - A(&psxRegs)) / 4u]);
|
||||
}
|
||||
else if (address == A(&psxRegs.pc))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "psxRegs.pc");
|
||||
}
|
||||
else if (address == A(&psxRegs.cycle))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "psxRegs.cycle");
|
||||
}
|
||||
else if (address == A(&g_nextEventCycle))
|
||||
{
|
||||
len = snprintf(buf, sizeof(buf), "g_nextEventCycle");
|
||||
}
|
||||
|
||||
#undef A
|
||||
|
||||
if (len > 0)
|
||||
{
|
||||
ZYAN_CHECK(ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL));
|
||||
ZyanString* string;
|
||||
ZYAN_CHECK(ZydisFormatterBufferGetString(buffer, &string));
|
||||
return ZyanStringAppendFormat(string, "&%s", buf);
|
||||
}
|
||||
|
||||
return s_old_print_address(formatter, buffer, context);
|
||||
}
|
||||
#endif
|
||||
|
||||
// =====================================================================================================
|
||||
// Dynamically Compiled Dispatchers - R3000A style
|
||||
|
@ -197,9 +249,9 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
|
|||
|
||||
{ // Properly scope the frame prologue/epilogue
|
||||
#ifdef ENABLE_VTUNE
|
||||
xScopedStackFrame frame(true);
|
||||
xScopedStackFrame frame(true, true);
|
||||
#else
|
||||
xScopedStackFrame frame(IsDevBuild);
|
||||
xScopedStackFrame frame(false, true);
|
||||
#endif
|
||||
|
||||
xJMP((void*)iopDispatcherReg);
|
||||
|
@ -266,7 +318,7 @@ static void iIopDumpBlock(int startpc, u8* ptr)
|
|||
}
|
||||
|
||||
// write the instruction info
|
||||
std::fprintf(f, "\n\nlive0 - %x, lastuse - %x used - %x\n", EEINST_LIVE0, EEINST_LASTUSE, EEINST_USED);
|
||||
std::fprintf(f, "\n\nlive0 - %x, lastuse - %x used - %x\n", EEINST_LIVE, EEINST_LASTUSE, EEINST_USED);
|
||||
|
||||
memzero(used);
|
||||
numused = 0;
|
||||
|
@ -325,85 +377,14 @@ static void iIopDumpBlock(int startpc, u8* ptr)
|
|||
}
|
||||
|
||||
int status = std::system(fmt::format("objdump -D -b binary -mi386 -M intel --no-show-raw-insn {} >> {}; rm {}",
|
||||
"mydump1", filename.c_str(), "mydump1").c_str());
|
||||
"mydump1", filename.c_str(), "mydump1")
|
||||
.c_str());
|
||||
|
||||
if (!WIFEXITED(status))
|
||||
Console.Error("IOP dump didn't terminate normally");
|
||||
#endif
|
||||
}
|
||||
|
||||
u8 _psxLoadWritesRs(u32 tempcode)
|
||||
{
|
||||
switch (tempcode >> 26)
|
||||
{
|
||||
case 32: case 33: case 34: case 35: case 36: case 37: case 38:
|
||||
return ((tempcode >> 21) & 0x1f) == ((tempcode >> 16) & 0x1f); // rs==rt
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
u8 _psxIsLoadStore(u32 tempcode)
|
||||
{
|
||||
switch (tempcode >> 26)
|
||||
{
|
||||
case 32: case 33: case 34: case 35: case 36: case 37: case 38:
|
||||
// 4 byte stores
|
||||
case 40: case 41: case 42: case 43: case 46:
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void _psxFlushAllUnused()
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 34; ++i)
|
||||
{
|
||||
if (psxpc < s_nEndBlock)
|
||||
{
|
||||
if ((g_pCurInstInfo[1].regs[i] & EEINST_USED))
|
||||
continue;
|
||||
}
|
||||
else if ((g_pCurInstInfo[0].regs[i] & EEINST_USED))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i < 32 && PSX_IS_CONST1(i))
|
||||
{
|
||||
_psxFlushConstReg(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
_deleteX86reg(X86TYPE_PSX, i, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int _psxFlushUnusedConstReg()
|
||||
{
|
||||
int i;
|
||||
for (i = 1; i < 32; ++i)
|
||||
{
|
||||
if ((g_psxHasConstReg & (1 << i)) && !(g_psxFlushedConstReg & (1 << i)) &&
|
||||
!_recIsRegWritten(g_pCurInstInfo + 1, (s_nEndBlock - psxpc) / 4, XMMTYPE_GPRREG, i))
|
||||
{
|
||||
|
||||
// check if will be written in the future
|
||||
xMOV(ptr32[&psxRegs.GPR.r[i]], g_psxConstRegs[i]);
|
||||
g_psxFlushedConstReg |= 1 << i;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void _psxFlushCachedRegs()
|
||||
{
|
||||
_psxFlushConstRegs();
|
||||
}
|
||||
|
||||
void _psxFlushConstReg(int reg)
|
||||
{
|
||||
if (PSX_IS_CONST1(reg) && !(g_psxFlushedConstReg & (1 << reg)))
|
||||
|
@ -415,6 +396,8 @@ void _psxFlushConstReg(int reg)
|
|||
|
||||
void _psxFlushConstRegs()
|
||||
{
|
||||
// TODO: Combine flushes
|
||||
|
||||
int i;
|
||||
|
||||
// flush constants
|
||||
|
@ -442,66 +425,88 @@ void _psxDeleteReg(int reg, int flush)
|
|||
if (!reg)
|
||||
return;
|
||||
if (flush && PSX_IS_CONST1(reg))
|
||||
{
|
||||
_psxFlushConstReg(reg);
|
||||
return;
|
||||
}
|
||||
|
||||
PSX_DEL_CONST(reg);
|
||||
_deleteX86reg(X86TYPE_PSX, reg, flush ? 0 : 2);
|
||||
_deletePSXtoX86reg(reg, flush ? DELETE_REG_FREE : DELETE_REG_FREE_NO_WRITEBACK);
|
||||
}
|
||||
|
||||
void _psxMoveGPRtoR(const xRegister32& to, int fromgpr)
|
||||
{
|
||||
if (PSX_IS_CONST1(fromgpr))
|
||||
{
|
||||
xMOV(to, g_psxConstRegs[fromgpr]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// check x86
|
||||
const int reg = EEINST_USEDTEST(fromgpr) ? _allocX86reg(X86TYPE_PSX, fromgpr, MODE_READ) : _checkX86reg(X86TYPE_PSX, fromgpr, MODE_READ);
|
||||
if (reg >= 0)
|
||||
xMOV(to, xRegister32(reg));
|
||||
else
|
||||
xMOV(to, ptr[&psxRegs.GPR.r[fromgpr]]);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void _psxMoveGPRtoM(uptr to, int fromgpr)
|
||||
{
|
||||
if (PSX_IS_CONST1(fromgpr))
|
||||
xMOV(ptr32[(u32*)(to)], g_psxConstRegs[fromgpr] );
|
||||
else {
|
||||
// check x86
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[ fromgpr ] ]);
|
||||
xMOV(ptr[(void*)(to)], eax);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr)
|
||||
{
|
||||
if( PSX_IS_CONST1(fromgpr) )
|
||||
xMOV(ptr32[xAddressReg(to)], g_psxConstRegs[fromgpr] );
|
||||
else {
|
||||
// check x86
|
||||
xMOV(ptr32[(u32*)(to)], g_psxConstRegs[fromgpr]);
|
||||
}
|
||||
else
|
||||
{
|
||||
const int reg = EEINST_USEDTEST(fromgpr) ? _allocX86reg(X86TYPE_PSX, fromgpr, MODE_READ) : _checkX86reg(X86TYPE_PSX, fromgpr, MODE_READ);
|
||||
if (reg >= 0)
|
||||
{
|
||||
xMOV(ptr32[(u32*)(to)], xRegister32(reg));
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&psxRegs.GPR.r[fromgpr]]);
|
||||
xMOV(ptr[xAddressReg(to)], eax);
|
||||
xMOV(ptr32[(u32*)(to)], eax);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void _psxFlushCall(int flushtype)
|
||||
{
|
||||
// x86-32 ABI : These registers are not preserved across calls:
|
||||
_freeX86reg(eax);
|
||||
_freeX86reg(ecx);
|
||||
_freeX86reg(edx);
|
||||
// Free registers that are not saved across function calls (x86-32 ABI):
|
||||
for (u32 i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (!x86regs[i].inuse)
|
||||
continue;
|
||||
|
||||
if (xRegisterBase::IsCallerSaved(i) ||
|
||||
((flushtype & FLUSH_FREE_NONTEMP_X86) && x86regs[i].type != X86TYPE_TEMP) ||
|
||||
((flushtype & FLUSH_FREE_TEMP_X86) && x86regs[i].type == X86TYPE_TEMP))
|
||||
{
|
||||
_freeX86reg(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (flushtype & FLUSH_ALL_X86)
|
||||
_flushX86regs();
|
||||
|
||||
if (flushtype & FLUSH_CONSTANT_REGS)
|
||||
_psxFlushConstRegs();
|
||||
|
||||
if ((flushtype & FLUSH_PC) /*&& !g_cpuFlushedPC*/)
|
||||
{
|
||||
xMOV(ptr32[&psxRegs.pc], psxpc);
|
||||
//g_cpuFlushedPC = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (flushtype & FLUSH_CACHED_REGS)
|
||||
_psxFlushConstRegs();
|
||||
void _psxFlushAllDirty()
|
||||
{
|
||||
// TODO: Combine flushes
|
||||
for (u32 i = 0; i < 32; ++i)
|
||||
{
|
||||
if (PSX_IS_CONST1(i))
|
||||
_psxFlushConstReg(i);
|
||||
}
|
||||
|
||||
_flushX86regs();
|
||||
}
|
||||
|
||||
void psxSaveBranchState()
|
||||
|
@ -538,41 +543,235 @@ void _psxOnWriteReg(int reg)
|
|||
PSX_DEL_CONST(reg);
|
||||
}
|
||||
|
||||
bool psxTrySwapDelaySlot(u32 rs, u32 rt, u32 rd)
|
||||
{
|
||||
#if 1
|
||||
if (s_recompilingDelaySlot)
|
||||
return false;
|
||||
|
||||
const u32 opcode_encoded = iopMemRead32(psxpc);
|
||||
if (opcode_encoded == 0)
|
||||
{
|
||||
psxRecompileNextInstruction(true, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
const u32 opcode_rs = ((opcode_encoded >> 21) & 0x1F);
|
||||
const u32 opcode_rt = ((opcode_encoded >> 16) & 0x1F);
|
||||
const u32 opcode_rd = ((opcode_encoded >> 11) & 0x1F);
|
||||
|
||||
switch (opcode_encoded >> 26)
|
||||
{
|
||||
case 8: // ADDI
|
||||
case 9: // ADDIU
|
||||
case 10: // SLTI
|
||||
case 11: // SLTIU
|
||||
case 12: // ANDIU
|
||||
case 13: // ORI
|
||||
case 14: // XORI
|
||||
case 15: // LUI
|
||||
case 32: // LB
|
||||
case 33: // LH
|
||||
case 34: // LWL
|
||||
case 35: // LW
|
||||
case 36: // LBU
|
||||
case 37: // LHU
|
||||
case 38: // LWR
|
||||
case 39: // LWU
|
||||
case 40: // SB
|
||||
case 41: // SH
|
||||
case 42: // SWL
|
||||
case 43: // SW
|
||||
case 46: // SWR
|
||||
{
|
||||
if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
|
||||
goto is_unsafe;
|
||||
}
|
||||
break;
|
||||
|
||||
case 50: // LWC2
|
||||
case 58: // SWC2
|
||||
break;
|
||||
|
||||
case 0: // SPECIAL
|
||||
{
|
||||
switch (opcode_encoded & 0x3F)
|
||||
{
|
||||
case 0: // SLL
|
||||
case 2: // SRL
|
||||
case 3: // SRA
|
||||
case 4: // SLLV
|
||||
case 6: // SRLV
|
||||
case 7: // SRAV
|
||||
case 32: // ADD
|
||||
case 33: // ADDU
|
||||
case 34: // SUB
|
||||
case 35: // SUBU
|
||||
case 36: // AND
|
||||
case 37: // OR
|
||||
case 38: // XOR
|
||||
case 39: // NOR
|
||||
case 42: // SLT
|
||||
case 43: // SLTU
|
||||
{
|
||||
if ((rs != 0 && rs == opcode_rd) || (rt != 0 && rt == opcode_rd) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
|
||||
goto is_unsafe;
|
||||
}
|
||||
break;
|
||||
|
||||
case 15: // SYNC
|
||||
case 24: // MULT
|
||||
case 25: // MULTU
|
||||
case 26: // DIV
|
||||
case 27: // DIVU
|
||||
break;
|
||||
|
||||
default:
|
||||
goto is_unsafe;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 16: // COP0
|
||||
case 17: // COP1
|
||||
case 18: // COP2
|
||||
case 19: // COP3
|
||||
{
|
||||
switch ((opcode_encoded >> 21) & 0x1F)
|
||||
{
|
||||
case 0: // MFC0
|
||||
case 2: // CFC0
|
||||
{
|
||||
if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && rd == opcode_rt))
|
||||
goto is_unsafe;
|
||||
}
|
||||
break;
|
||||
|
||||
case 4: // MTC0
|
||||
case 6: // CTC0
|
||||
break;
|
||||
|
||||
default:
|
||||
{
|
||||
// swap when it's GTE
|
||||
if ((opcode_encoded >> 26) != 18)
|
||||
goto is_unsafe;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
goto is_unsafe;
|
||||
}
|
||||
|
||||
RALOG("Swapping delay slot %08X %s\n", psxpc, disR3000AF(iopMemRead32(psxpc), psxpc));
|
||||
psxRecompileNextInstruction(true, true);
|
||||
return true;
|
||||
|
||||
is_unsafe:
|
||||
RALOG("NOT SWAPPING delay slot %08X %s\n", psxpc, disR3000AF(iopMemRead32(psxpc), psxpc));
|
||||
return false;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
int psxTryRenameReg(int to, int from, int fromx86, int other, int xmminfo)
|
||||
{
|
||||
// can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt
|
||||
if ((xmminfo & XMMINFO_NORENAME) || fromx86 < 0 || to == from || to == other || !EEINST_RENAMETEST(from))
|
||||
return -1;
|
||||
|
||||
RALOG("Renaming %s to %s\n", R3000A::disRNameGPR[from], R3000A::disRNameGPR[to]);
|
||||
|
||||
// flush back when it's been modified
|
||||
if (x86regs[fromx86].mode & MODE_WRITE && EEINST_LIVETEST(from))
|
||||
_writebackX86Reg(fromx86);
|
||||
|
||||
// remove all references to renamed-to register
|
||||
_deletePSXtoX86reg(to, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
PSX_DEL_CONST(to);
|
||||
|
||||
// and do the actual rename, new register has been modified.
|
||||
x86regs[fromx86].reg = to;
|
||||
x86regs[fromx86].mode |= MODE_READ | MODE_WRITE;
|
||||
return fromx86;
|
||||
}
|
||||
|
||||
// rd = rs op rt
|
||||
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode)
|
||||
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
|
||||
{
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
// for now, don't support xmm
|
||||
|
||||
_deleteX86reg(X86TYPE_PSX, _Rs_, 1);
|
||||
_deleteX86reg(X86TYPE_PSX, _Rt_, 1);
|
||||
_deleteX86reg(X86TYPE_PSX, _Rd_, 0);
|
||||
|
||||
if (PSX_IS_CONST2(_Rs_, _Rt_))
|
||||
{
|
||||
_deletePSXtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
PSX_SET_CONST(_Rd_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
if (PSX_IS_CONST1(_Rs_))
|
||||
// we have to put these up here, because the register allocator below will wipe out const flags
|
||||
// for the destination register when/if it switches it to write mode.
|
||||
const bool s_is_const = PSX_IS_CONST1(_Rs_);
|
||||
const bool t_is_const = PSX_IS_CONST1(_Rt_);
|
||||
const bool d_is_const = PSX_IS_CONST1(_Rd_);
|
||||
const bool s_is_used = EEINST_USEDTEST(_Rs_);
|
||||
const bool t_is_used = EEINST_USEDTEST(_Rt_);
|
||||
|
||||
if (!s_is_const)
|
||||
_addNeededGPRtoX86reg(_Rs_);
|
||||
if (!t_is_const)
|
||||
_addNeededGPRtoX86reg(_Rt_);
|
||||
if (!d_is_const)
|
||||
_addNeededGPRtoX86reg(_Rd_);
|
||||
|
||||
u32 info = 0;
|
||||
int regs = _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
||||
if (regs < 0 && ((!s_is_const && s_is_used) || _Rs_ == _Rd_))
|
||||
regs = _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
||||
if (regs >= 0)
|
||||
info |= PROCESS_EE_SET_S(regs);
|
||||
|
||||
int regt = _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
||||
if (regt < 0 && ((!t_is_const && t_is_used) || _Rt_ == _Rd_))
|
||||
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
||||
if (regt >= 0)
|
||||
info |= PROCESS_EE_SET_T(regt);
|
||||
|
||||
// If S is no longer live, swap D for S. Saves the move.
|
||||
int regd = psxTryRenameReg(_Rd_, _Rs_, regs, _Rt_, xmminfo);
|
||||
if (regd < 0)
|
||||
{
|
||||
constscode(0);
|
||||
// TODO: If not live, write direct to memory.
|
||||
regd = _allocX86reg(X86TYPE_PSX, _Rd_, MODE_WRITE);
|
||||
}
|
||||
if (regd >= 0)
|
||||
info |= PROCESS_EE_SET_D(regd);
|
||||
|
||||
_validateRegs();
|
||||
|
||||
if (s_is_const && regs < 0)
|
||||
{
|
||||
// This *must* go inside the if, because of when _Rs_ = _Rd_
|
||||
PSX_DEL_CONST(_Rd_);
|
||||
constscode(info /*| PROCESS_CONSTS*/);
|
||||
return;
|
||||
}
|
||||
|
||||
if (PSX_IS_CONST1(_Rt_))
|
||||
if (t_is_const && regt < 0)
|
||||
{
|
||||
consttcode(0);
|
||||
PSX_DEL_CONST(_Rd_);
|
||||
consttcode(info /*| PROCESS_CONSTT*/);
|
||||
return;
|
||||
}
|
||||
|
||||
noconstcode(0);
|
||||
PSX_DEL_CONST(_Rd_);
|
||||
noconstcode(info);
|
||||
}
|
||||
|
||||
static void psxRecompileIrxImport()
|
||||
|
@ -619,7 +818,7 @@ static void psxRecompileIrxImport()
|
|||
}
|
||||
|
||||
// rt = rs op imm16
|
||||
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
|
||||
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
|
||||
{
|
||||
if (!_Rt_)
|
||||
{
|
||||
|
@ -629,75 +828,157 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
|
|||
return;
|
||||
}
|
||||
|
||||
// for now, don't support xmm
|
||||
|
||||
_deleteX86reg(X86TYPE_PSX, _Rs_, 1);
|
||||
_deleteX86reg(X86TYPE_PSX, _Rt_, 0);
|
||||
|
||||
if (PSX_IS_CONST1(_Rs_))
|
||||
{
|
||||
_deletePSXtoX86reg(_Rt_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
PSX_SET_CONST(_Rt_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
noconstcode(0);
|
||||
_addNeededPSXtoX86reg(_Rs_);
|
||||
_addNeededPSXtoX86reg(_Rt_);
|
||||
|
||||
u32 info = 0;
|
||||
|
||||
const bool s_is_used = EEINST_USEDTEST(_Rs_);
|
||||
const int regs = s_is_used ? _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ) : _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
||||
if (regs >= 0)
|
||||
info |= PROCESS_EE_SET_S(regs);
|
||||
|
||||
int regt = psxTryRenameReg(_Rt_, _Rs_, regs, 0, xmminfo);
|
||||
if (regt < 0)
|
||||
{
|
||||
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_WRITE);
|
||||
}
|
||||
if (regt >= 0)
|
||||
info |= PROCESS_EE_SET_T(regt);
|
||||
|
||||
_validateRegs();
|
||||
|
||||
PSX_DEL_CONST(_Rt_);
|
||||
noconstcode(info);
|
||||
}
|
||||
|
||||
// rd = rt op sa
|
||||
void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
|
||||
void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
|
||||
{
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
// for now, don't support xmm
|
||||
|
||||
_deleteX86reg(X86TYPE_PSX, _Rt_, 1);
|
||||
_deleteX86reg(X86TYPE_PSX, _Rd_, 0);
|
||||
|
||||
if (PSX_IS_CONST1(_Rt_))
|
||||
{
|
||||
_deletePSXtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
PSX_SET_CONST(_Rd_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
noconstcode(0);
|
||||
_addNeededPSXtoX86reg(_Rt_);
|
||||
_addNeededPSXtoX86reg(_Rd_);
|
||||
|
||||
u32 info = 0;
|
||||
const bool s_is_used = EEINST_USEDTEST(_Rt_);
|
||||
const int regt = s_is_used ? _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ) : _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
||||
if (regt >= 0)
|
||||
info |= PROCESS_EE_SET_T(regt);
|
||||
|
||||
int regd = psxTryRenameReg(_Rd_, _Rt_, regt, 0, xmminfo);
|
||||
if (regd < 0)
|
||||
{
|
||||
regd = _allocX86reg(X86TYPE_PSX, _Rd_, MODE_WRITE);
|
||||
}
|
||||
if (regd >= 0)
|
||||
info |= PROCESS_EE_SET_D(regd);
|
||||
|
||||
_validateRegs();
|
||||
|
||||
PSX_DEL_CONST(_Rd_);
|
||||
noconstcode(info);
|
||||
}
|
||||
|
||||
// rd = rt MULT rs (SPECIAL)
|
||||
void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int LOHI)
|
||||
{
|
||||
_deleteX86reg(X86TYPE_PSX, _Rs_, 1);
|
||||
_deleteX86reg(X86TYPE_PSX, _Rt_, 1);
|
||||
|
||||
if (LOHI)
|
||||
{
|
||||
_deleteX86reg(X86TYPE_PSX, PSX_HI, 1);
|
||||
_deleteX86reg(X86TYPE_PSX, PSX_LO, 1);
|
||||
}
|
||||
|
||||
if (PSX_IS_CONST2(_Rs_, _Rt_))
|
||||
{
|
||||
if (LOHI)
|
||||
{
|
||||
_deletePSXtoX86reg(PSX_LO, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
_deletePSXtoX86reg(PSX_HI, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
}
|
||||
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
if (PSX_IS_CONST1(_Rs_))
|
||||
// we have to put these up here, because the register allocator below will wipe out const flags
|
||||
// for the destination register when/if it switches it to write mode.
|
||||
const bool s_is_const = PSX_IS_CONST1(_Rs_);
|
||||
const bool t_is_const = PSX_IS_CONST1(_Rt_);
|
||||
const bool s_is_used = EEINST_USEDTEST(_Rs_);
|
||||
const bool t_is_used = EEINST_USEDTEST(_Rt_);
|
||||
|
||||
if (!s_is_const)
|
||||
_addNeededGPRtoX86reg(_Rs_);
|
||||
if (!t_is_const)
|
||||
_addNeededGPRtoX86reg(_Rt_);
|
||||
if (LOHI)
|
||||
{
|
||||
constscode(0);
|
||||
if (EEINST_LIVETEST(PSX_LO))
|
||||
_addNeededPSXtoX86reg(PSX_LO);
|
||||
if (EEINST_LIVETEST(PSX_HI))
|
||||
_addNeededPSXtoX86reg(PSX_HI);
|
||||
}
|
||||
|
||||
u32 info = 0;
|
||||
int regs = _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
||||
if (regs < 0 && !s_is_const && s_is_used)
|
||||
regs = _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
|
||||
if (regs >= 0)
|
||||
info |= PROCESS_EE_SET_S(regs);
|
||||
|
||||
// need at least one in a register
|
||||
int regt = _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
||||
if (regs < 0 || (regt < 0 && !t_is_const && t_is_used))
|
||||
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
|
||||
if (regt >= 0)
|
||||
info |= PROCESS_EE_SET_T(regt);
|
||||
|
||||
if (LOHI)
|
||||
{
|
||||
// going to destroy lo/hi, so invalidate if we're writing it back to state
|
||||
const bool lo_is_used = EEINST_USEDTEST(PSX_LO);
|
||||
const int reglo = lo_is_used ? _allocX86reg(X86TYPE_PSX, PSX_LO, MODE_WRITE) : -1;
|
||||
if (reglo >= 0)
|
||||
info |= PROCESS_EE_SET_LO(reglo) | PROCESS_EE_LO;
|
||||
else
|
||||
_deletePSXtoX86reg(PSX_LO, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
|
||||
const bool hi_is_live = EEINST_USEDTEST(PSX_HI);
|
||||
const int reghi = hi_is_live ? _allocX86reg(X86TYPE_PSX, PSX_HI, MODE_WRITE) : -1;
|
||||
if (reghi >= 0)
|
||||
info |= PROCESS_EE_SET_HI(reghi) | PROCESS_EE_HI;
|
||||
else
|
||||
_deletePSXtoX86reg(PSX_HI, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
}
|
||||
|
||||
_validateRegs();
|
||||
|
||||
if (s_is_const && regs < 0)
|
||||
{
|
||||
// This *must* go inside the if, because of when _Rs_ = _Rd_
|
||||
constscode(info /*| PROCESS_CONSTS*/);
|
||||
return;
|
||||
}
|
||||
|
||||
if (PSX_IS_CONST1(_Rt_))
|
||||
if (t_is_const && regt < 0)
|
||||
{
|
||||
consttcode(0);
|
||||
consttcode(info /*| PROCESS_CONSTT*/);
|
||||
return;
|
||||
}
|
||||
|
||||
noconstcode(0);
|
||||
noconstcode(info);
|
||||
}
|
||||
|
||||
static u8* m_recBlockAlloc = NULL;
|
||||
|
@ -730,10 +1011,14 @@ static void recAlloc()
|
|||
}
|
||||
|
||||
u8* curpos = m_recBlockAlloc;
|
||||
recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::IopRam / 4) * sizeof(BASEBLOCK);
|
||||
recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK);
|
||||
recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK);
|
||||
recROM2 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom2 / 4) * sizeof(BASEBLOCK);
|
||||
recRAM = (BASEBLOCK*)curpos;
|
||||
curpos += (Ps2MemSize::IopRam / 4) * sizeof(BASEBLOCK);
|
||||
recROM = (BASEBLOCK*)curpos;
|
||||
curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK);
|
||||
recROM1 = (BASEBLOCK*)curpos;
|
||||
curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK);
|
||||
recROM2 = (BASEBLOCK*)curpos;
|
||||
curpos += (Ps2MemSize::Rom2 / 4) * sizeof(BASEBLOCK);
|
||||
|
||||
|
||||
if (!s_pInstCache)
|
||||
|
@ -929,35 +1214,39 @@ void psxSetBranchReg(u32 reg)
|
|||
|
||||
if (reg != 0xffffffff)
|
||||
{
|
||||
_allocX86reg(calleeSavedReg2d, X86TYPE_PSX_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_psxMoveGPRtoR(calleeSavedReg2d, reg);
|
||||
const bool swap = psxTrySwapDelaySlot(reg, 0, 0);
|
||||
|
||||
psxRecompileNextInstruction(1);
|
||||
|
||||
if (x86regs[calleeSavedReg2d.GetId()].inuse)
|
||||
int wbreg = -1;
|
||||
if (!swap)
|
||||
{
|
||||
pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PSX_PCWRITEBACK);
|
||||
xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d);
|
||||
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
|
||||
#ifdef PCSX2_DEBUG
|
||||
xOR(calleeSavedReg2d, calleeSavedReg2d);
|
||||
#endif
|
||||
wbreg = _allocX86reg(X86TYPE_PCWRITEBACK, 0, MODE_WRITE | MODE_CALLEESAVED);
|
||||
_psxMoveGPRtoR(xRegister32(wbreg), reg);
|
||||
|
||||
psxRecompileNextInstruction(true, false);
|
||||
|
||||
if (x86regs[wbreg].inuse && x86regs[wbreg].type == X86TYPE_PCWRITEBACK)
|
||||
{
|
||||
xMOV(ptr32[&psxRegs.pc], xRegister32(wbreg));
|
||||
x86regs[wbreg].inuse = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr32[&psxRegs.pcWriteback]);
|
||||
xMOV(ptr32[&psxRegs.pc], eax);
|
||||
|
||||
#ifdef PCSX2_DEBUG
|
||||
xOR(eax, eax);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef PCSX2_DEBUG
|
||||
xForwardJNZ8 skipAssert;
|
||||
xWrite8(0xcc);
|
||||
skipAssert.SetTarget();
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
if (PSX_IS_DIRTY_CONST(reg) || _hasX86reg(X86TYPE_PSX, reg, 0))
|
||||
{
|
||||
const int x86reg = _allocX86reg(X86TYPE_PSX, reg, MODE_READ);
|
||||
xMOV(ptr32[&psxRegs.pc], xRegister32(x86reg));
|
||||
}
|
||||
else
|
||||
{
|
||||
_psxMoveGPRtoM((uptr)&psxRegs.pc, reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_psxFlushCall(FLUSH_EVERYTHING);
|
||||
|
@ -1239,17 +1528,47 @@ static void psxEncodeMemcheck()
|
|||
bool store = (opcode.flags & IS_STORE) != 0;
|
||||
switch (opcode.flags & MEMTYPE_MASK)
|
||||
{
|
||||
case MEMTYPE_BYTE: psxRecMemcheck(op, 8, store); break;
|
||||
case MEMTYPE_HALF: psxRecMemcheck(op, 16, store); break;
|
||||
case MEMTYPE_WORD: psxRecMemcheck(op, 32, store); break;
|
||||
case MEMTYPE_DWORD: psxRecMemcheck(op, 64, store); break;
|
||||
case MEMTYPE_BYTE:
|
||||
psxRecMemcheck(op, 8, store);
|
||||
break;
|
||||
case MEMTYPE_HALF:
|
||||
psxRecMemcheck(op, 16, store);
|
||||
break;
|
||||
case MEMTYPE_WORD:
|
||||
psxRecMemcheck(op, 32, store);
|
||||
break;
|
||||
case MEMTYPE_DWORD:
|
||||
psxRecMemcheck(op, 64, store);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void psxRecompileNextInstruction(int delayslot)
|
||||
void psxRecompileNextInstruction(bool delayslot, bool swapped_delayslot)
|
||||
{
|
||||
// pblock isn't used elsewhere in this function.
|
||||
//BASEBLOCK* pblock = PSX_GETBLOCK(psxpc);
|
||||
#ifdef DUMP_BLOCKS
|
||||
const bool dump_block = true;
|
||||
|
||||
const u8* instStart = x86Ptr;
|
||||
ZydisDecoder disas_decoder;
|
||||
ZydisFormatter disas_formatter;
|
||||
ZydisDecodedInstruction disas_instruction;
|
||||
|
||||
if (dump_block)
|
||||
{
|
||||
fprintf(stderr, "Compiling %s%s\n", delayslot ? "delay slot " : "", disR3000AF(iopMemRead32(psxpc), psxpc));
|
||||
if (!delayslot)
|
||||
{
|
||||
ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64);
|
||||
ZydisFormatterInit(&disas_formatter, ZYDIS_FORMATTER_STYLE_INTEL);
|
||||
s_old_print_address = (ZydisFormatterFunc)&ZydisFormatterPrintAddressAbsolute;
|
||||
ZydisFormatterSetHook(&disas_formatter, ZYDIS_FORMATTER_FUNC_PRINT_ADDRESS_ABS, (const void**)&s_old_print_address);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const int old_code = psxRegs.code;
|
||||
EEINST* old_inst_info = g_pCurInstInfo;
|
||||
s_recompilingDelaySlot = delayslot;
|
||||
|
||||
// add breakpoint
|
||||
if (!delayslot)
|
||||
|
@ -1257,11 +1576,9 @@ void psxRecompileNextInstruction(int delayslot)
|
|||
psxEncodeBreakpoint();
|
||||
psxEncodeMemcheck();
|
||||
}
|
||||
|
||||
if (IsDebugBuild)
|
||||
else
|
||||
{
|
||||
xNOP();
|
||||
xMOV(eax, psxpc);
|
||||
_clearNeededX86regs();
|
||||
}
|
||||
|
||||
psxRegs.code = iopMemRead32(psxpc);
|
||||
|
@ -1274,7 +1591,31 @@ void psxRecompileNextInstruction(int delayslot)
|
|||
rpsxBSC[psxRegs.code >> 26]();
|
||||
s_psxBlockCycles += g_iopCyclePenalty;
|
||||
|
||||
if (!swapped_delayslot)
|
||||
_clearNeededX86regs();
|
||||
|
||||
if (swapped_delayslot)
|
||||
{
|
||||
psxRegs.code = old_code;
|
||||
g_pCurInstInfo = old_inst_info;
|
||||
}
|
||||
|
||||
#ifdef DUMP_BLOCKS
|
||||
if (dump_block && !delayslot)
|
||||
{
|
||||
const u8* instPtr = instStart;
|
||||
ZyanUSize instLength = static_cast<ZyanUSize>(x86Ptr - instStart);
|
||||
while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&disas_decoder, instPtr, instLength, &disas_instruction)))
|
||||
{
|
||||
char buffer[256];
|
||||
if (ZYAN_SUCCESS(ZydisFormatterFormatInstruction(&disas_formatter, &disas_instruction, buffer, sizeof(buffer), (ZyanU64)instPtr)))
|
||||
std::fprintf(stderr, " %016" PRIX64 " %s\n", (u64)instPtr, buffer);
|
||||
|
||||
instPtr += disas_instruction.length;
|
||||
instLength -= disas_instruction.length;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void PreBlockCheck(u32 blockpc)
|
||||
|
@ -1370,8 +1711,7 @@ static void iopRecRecompile(const u32 startpc)
|
|||
|
||||
s_pCurBlock = PSX_GETBLOCK(startpc);
|
||||
|
||||
pxAssert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile
|
||||
|| s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock);
|
||||
pxAssert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile || s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock);
|
||||
|
||||
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
|
||||
|
||||
|
@ -1408,9 +1748,7 @@ static void iopRecRecompile(const u32 startpc)
|
|||
while (1)
|
||||
{
|
||||
BASEBLOCK* pblock = PSX_GETBLOCK(i);
|
||||
if (i != startpc
|
||||
&& pblock->GetFnptr() != (uptr)iopJITCompile
|
||||
&& pblock->GetFnptr() != (uptr)iopJITCompileInBlock)
|
||||
if (i != startpc && pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetFnptr() != (uptr)iopJITCompileInBlock)
|
||||
{
|
||||
// branch = 3
|
||||
willbranch3 = 1;
|
||||
|
@ -1449,7 +1787,10 @@ static void iopRecRecompile(const u32 startpc)
|
|||
goto StartRecomp;
|
||||
|
||||
// branches
|
||||
case 4: case 5: case 6: case 7:
|
||||
case 4:
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
s_branchTo = _Imm_ * 4 + i + 4;
|
||||
if (s_branchTo > startpc && s_branchTo < i)
|
||||
s_nEndBlock = s_branchTo;
|
||||
|
@ -1525,7 +1866,7 @@ StartRecomp:
|
|||
g_pCurInstInfo = s_pInstCache;
|
||||
while (!psxbranch && psxpc < s_nEndBlock)
|
||||
{
|
||||
psxRecompileNextInstruction(0);
|
||||
psxRecompileNextInstruction(false, false);
|
||||
}
|
||||
|
||||
if (IsDebugBuild && (psxdump & 1))
|
||||
|
|
|
@ -34,25 +34,17 @@ static const int psxInstCycles_Load = 0;
|
|||
|
||||
extern uptr psxRecLUT[];
|
||||
|
||||
u8 _psxLoadWritesRs(u32 tempcode);
|
||||
u8 _psxIsLoadStore(u32 tempcode);
|
||||
|
||||
void _psxFlushAllUnused();
|
||||
int _psxFlushUnusedConstReg();
|
||||
void _psxFlushCachedRegs();
|
||||
void _psxFlushConstReg(int reg);
|
||||
void _psxFlushConstRegs();
|
||||
|
||||
void _psxDeleteReg(int reg, int flush);
|
||||
void _psxFlushCall(int flushtype);
|
||||
void _psxFlushAllDirty();
|
||||
|
||||
void _psxOnWriteReg(int reg);
|
||||
|
||||
void _psxMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
|
||||
#if 0
|
||||
void _psxMoveGPRtoM(uptr to, int fromgpr);
|
||||
void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr);
|
||||
#endif
|
||||
|
||||
extern u32 psxpc; // recompiler pc
|
||||
extern int psxbranch; // set for branch
|
||||
|
@ -63,13 +55,14 @@ void psxLoadBranchState();
|
|||
|
||||
extern void psxSetBranchReg(u32 reg);
|
||||
extern void psxSetBranchImm(u32 imm);
|
||||
extern void psxRecompileNextInstruction(int delayslot);
|
||||
extern void psxRecompileNextInstruction(bool delayslot, bool swapped_delayslot);
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// IOP Constant Propagation Defines, Vars, and API - From here down!
|
||||
|
||||
#define PSX_IS_CONST1(reg) ((reg) < 32 && (g_psxHasConstReg & (1 << (reg))))
|
||||
#define PSX_IS_CONST2(reg1, reg2) ((g_psxHasConstReg & (1 << (reg1))) && (g_psxHasConstReg & (1 << (reg2))))
|
||||
#define PSX_IS_DIRTY_CONST(reg) ((reg) < 32 && (g_psxHasConstReg & (1 << (reg))) && (!(g_psxFlushedConstReg & (1 << (reg)))))
|
||||
#define PSX_SET_CONST(reg) \
|
||||
{ \
|
||||
if ((reg) < 32) \
|
||||
|
@ -91,28 +84,31 @@ extern u32 g_psxHasConstReg, g_psxFlushedConstReg;
|
|||
typedef void (*R3000AFNPTR)();
|
||||
typedef void (*R3000AFNPTR_INFO)(int info);
|
||||
|
||||
bool psxTrySwapDelaySlot(u32 rs, u32 rt, u32 rd);
|
||||
int psxTryRenameReg(int to, int from, int fromx86, int other, int xmminfo);
|
||||
|
||||
//
|
||||
// non mmx/xmm version, slower
|
||||
//
|
||||
// rd = rs op rt
|
||||
#define PSXRECOMPILE_CONSTCODE0(fn) \
|
||||
#define PSXRECOMPILE_CONSTCODE0(fn, info) \
|
||||
void rpsx##fn(void) \
|
||||
{ \
|
||||
psxRecompileCodeConst0(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_); \
|
||||
psxRecompileCodeConst0(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, info); \
|
||||
}
|
||||
|
||||
// rt = rs op imm16
|
||||
#define PSXRECOMPILE_CONSTCODE1(fn) \
|
||||
#define PSXRECOMPILE_CONSTCODE1(fn, info) \
|
||||
void rpsx##fn(void) \
|
||||
{ \
|
||||
psxRecompileCodeConst1(rpsx##fn##_const, rpsx##fn##_); \
|
||||
psxRecompileCodeConst1(rpsx##fn##_const, rpsx##fn##_, info); \
|
||||
}
|
||||
|
||||
// rd = rt op sa
|
||||
#define PSXRECOMPILE_CONSTCODE2(fn) \
|
||||
#define PSXRECOMPILE_CONSTCODE2(fn, info) \
|
||||
void rpsx##fn(void) \
|
||||
{ \
|
||||
psxRecompileCodeConst2(rpsx##fn##_const, rpsx##fn##_); \
|
||||
psxRecompileCodeConst2(rpsx##fn##_const, rpsx##fn##_, info); \
|
||||
}
|
||||
|
||||
// [lo,hi] = rt op rs
|
||||
|
@ -130,11 +126,11 @@ typedef void (*R3000AFNPTR_INFO)(int info);
|
|||
}
|
||||
|
||||
// rd = rs op rt
|
||||
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode);
|
||||
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int xmminfo);
|
||||
// rt = rs op imm16
|
||||
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode);
|
||||
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo);
|
||||
// rd = rt op sa
|
||||
void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode);
|
||||
void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo);
|
||||
// [lo,hi] = rt op rs
|
||||
void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int LOHI);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -21,6 +21,9 @@
|
|||
#include "iCore.h"
|
||||
#include "R5900_Profiler.h"
|
||||
|
||||
// Register containing a pointer to our fastmem (4GB) area
|
||||
#define RFASTMEMBASE x86Emitter::rbp
|
||||
|
||||
extern u32 maxrecmem;
|
||||
extern u32 pc; // recompiler pc
|
||||
extern int g_branch; // set for branch
|
||||
|
@ -61,11 +64,16 @@ extern bool s_nBlockInterlocked; // Current block has VU0 interlocking
|
|||
|
||||
extern bool g_recompilingDelaySlot;
|
||||
|
||||
// Used for generating backpatch thunks for fastmem.
|
||||
u8* recBeginThunk();
|
||||
u8* recEndThunk();
|
||||
|
||||
// used when processing branches
|
||||
bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd);
|
||||
void SaveBranchState();
|
||||
void LoadBranchState();
|
||||
|
||||
void recompileNextInstruction(int delayslot);
|
||||
void recompileNextInstruction(bool delayslot, bool swapped_delay_slot);
|
||||
void SetBranchReg(u32 reg);
|
||||
void SetBranchImm(u32 imm);
|
||||
|
||||
|
@ -78,8 +86,7 @@ namespace R5900
|
|||
{
|
||||
namespace Dynarec
|
||||
{
|
||||
extern void recDoBranchImm(u32* jmpSkip, bool isLikely = false);
|
||||
extern void recDoBranchImm_Likely(u32* jmpSkip);
|
||||
extern void recDoBranchImm(u32 branchTo, u32* jmpSkip, bool isLikely = false, bool swappedDelaySlot = false);
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
|
||||
|
@ -88,6 +95,7 @@ namespace R5900
|
|||
|
||||
#define GPR_IS_CONST1(reg) (EE_CONST_PROP && (reg) < 32 && (g_cpuHasConstReg & (1 << (reg))))
|
||||
#define GPR_IS_CONST2(reg1, reg2) (EE_CONST_PROP && (g_cpuHasConstReg & (1 << (reg1))) && (g_cpuHasConstReg & (1 << (reg2))))
|
||||
#define GPR_IS_DIRTY_CONST(reg) (EE_CONST_PROP && (reg) < 32 && (g_cpuHasConstReg & (1 << (reg))) && (!(g_cpuFlushedConstReg & (1 << (reg)))))
|
||||
#define GPR_SET_CONST(reg) \
|
||||
{ \
|
||||
if ((reg) < 32) \
|
||||
|
@ -106,29 +114,23 @@ namespace R5900
|
|||
alignas(16) extern GPR_reg64 g_cpuConstRegs[32];
|
||||
extern u32 g_cpuHasConstReg, g_cpuFlushedConstReg;
|
||||
|
||||
// gets a memory pointer to the constant reg
|
||||
u32* _eeGetConstReg(int reg);
|
||||
|
||||
// finds where the GPR is stored and moves lower 32 bits to EAX
|
||||
void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
|
||||
void _eeMoveGPRtoR(const x86Emitter::xRegister64& to, int fromgpr);
|
||||
void _eeMoveGPRtoM(uptr to, int fromgpr);
|
||||
void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr);
|
||||
void _signExtendToMem(void* mem);
|
||||
void eeSignExtendTo(int gpr, bool onlyupper = false);
|
||||
void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr, bool allow_preload = true);
|
||||
void _eeMoveGPRtoR(const x86Emitter::xRegister64& to, int fromgpr, bool allow_preload = true);
|
||||
void _eeMoveGPRtoM(uptr to, int fromgpr); // 32-bit only
|
||||
|
||||
void _eeFlushAllUnused();
|
||||
void _eeFlushAllDirty();
|
||||
void _eeOnWriteReg(int reg, int signext);
|
||||
|
||||
// totally deletes from const, xmm, and mmx entries
|
||||
// if flush is 1, also flushes to memory
|
||||
// if 0, only flushes if not an xmm reg (used when overwriting lower 64bits of reg)
|
||||
void _deleteEEreg(int reg, int flush);
|
||||
void _deleteEEreg128(int reg);
|
||||
|
||||
void _flushEEreg(int reg, bool clear = false);
|
||||
|
||||
// allocates memory on the instruction size and returns the pointer
|
||||
u32* recGetImm64(u32 hi, u32 lo);
|
||||
int _eeTryRenameReg(int to, int from, int fromx86, int other, int xmminfo);
|
||||
|
||||
//////////////////////////////////////
|
||||
// Templates for code recompilation //
|
||||
|
@ -141,14 +143,27 @@ typedef void (*R5900FNPTR_INFO)(int info);
|
|||
void rec##fn(void) \
|
||||
{ \
|
||||
EE::Profiler.EmitOp(eeOpcode::fn); \
|
||||
eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \
|
||||
eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, (xmminfo)); \
|
||||
}
|
||||
|
||||
#define EERECOMPILE_CODEX(codename, fn) \
|
||||
#define EERECOMPILE_CODERC0(fn, xmminfo) \
|
||||
void rec##fn(void) \
|
||||
{ \
|
||||
EE::Profiler.EmitOp(eeOpcode::fn); \
|
||||
codename(rec##fn##_const, rec##fn##_); \
|
||||
eeRecompileCodeRC0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, (xmminfo)); \
|
||||
}
|
||||
|
||||
#define EERECOMPILE_CODEX(codename, fn, xmminfo) \
|
||||
void rec##fn(void) \
|
||||
{ \
|
||||
EE::Profiler.EmitOp(eeOpcode::fn); \
|
||||
codename(rec##fn##_const, rec##fn##_, (xmminfo)); \
|
||||
}
|
||||
|
||||
#define EERECOMPILE_CODEI(codename, fn, xmminfo) \
|
||||
void rec##fn(void) \
|
||||
{ \
|
||||
EE::Profiler.EmitOp(eeOpcode::fn); \
|
||||
codename(rec##fn##_const, rec##fn##_, (xmminfo)); \
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -156,66 +171,11 @@ typedef void (*R5900FNPTR_INFO)(int info);
|
|||
//
|
||||
|
||||
// rd = rs op rt
|
||||
void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo);
|
||||
void eeRecompileCodeRC0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo);
|
||||
// rt = rs op imm16
|
||||
void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode);
|
||||
void eeRecompileCodeRC1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode, int xmminfo);
|
||||
// rd = rt op sa
|
||||
void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode);
|
||||
// rt op rs (SPECIAL)
|
||||
void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode);
|
||||
|
||||
//
|
||||
// non mmx/xmm version, slower
|
||||
//
|
||||
// rd = rs op rt
|
||||
#define EERECOMPILE_CONSTCODE0(fn) \
|
||||
void rec##fn(void) \
|
||||
{ \
|
||||
eeRecompileCodeConst0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_); \
|
||||
}
|
||||
|
||||
// rt = rs op imm16
|
||||
#define EERECOMPILE_CONSTCODE1(fn) \
|
||||
void rec##fn(void) \
|
||||
{ \
|
||||
eeRecompileCodeConst1(rec##fn##_const, rec##fn##_); \
|
||||
}
|
||||
|
||||
// rd = rt op sa
|
||||
#define EERECOMPILE_CONSTCODE2(fn) \
|
||||
void rec##fn(void) \
|
||||
{ \
|
||||
eeRecompileCodeConst2(rec##fn##_const, rec##fn##_); \
|
||||
}
|
||||
|
||||
// rd = rt op rs
|
||||
#define EERECOMPILE_CONSTCODESPECIAL(fn, mult) \
|
||||
void rec##fn(void) \
|
||||
{ \
|
||||
eeRecompileCodeConstSPECIAL(rec##fn##_const, rec##fn##_, mult); \
|
||||
}
|
||||
|
||||
// rd = rs op rt
|
||||
void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode);
|
||||
// rt = rs op imm16
|
||||
void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode);
|
||||
// rd = rt op sa
|
||||
void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode);
|
||||
// rd = rt MULT rs (SPECIAL)
|
||||
void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT);
|
||||
|
||||
// XMM caching helpers
|
||||
#define XMMINFO_READLO 0x001
|
||||
#define XMMINFO_READHI 0x002
|
||||
#define XMMINFO_WRITELO 0x004
|
||||
#define XMMINFO_WRITEHI 0x008
|
||||
#define XMMINFO_WRITED 0x010
|
||||
#define XMMINFO_READD 0x020
|
||||
#define XMMINFO_READS 0x040
|
||||
#define XMMINFO_READT 0x080
|
||||
#define XMMINFO_READD_LO 0x100 // if set and XMMINFO_READD is set, reads only low 64 bits of D
|
||||
#define XMMINFO_READACC 0x200
|
||||
#define XMMINFO_WRITEACC 0x400
|
||||
void eeRecompileCodeRC2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode, int xmminfo);
|
||||
|
||||
#define FPURECOMPILE_CONSTCODE(fn, xmminfo) \
|
||||
void rec##fn(void) \
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -72,3 +72,5 @@ namespace R5900
|
|||
void Run(u32 start, u32 end, EEINST* inst_cache) override;
|
||||
};
|
||||
} // namespace R5900
|
||||
|
||||
void recBackpropBSC(u32 code, EEINST* prev, EEINST* pinst);
|
||||
|
|
|
@ -31,17 +31,18 @@ namespace Dynarec {
|
|||
// Parameters:
|
||||
// jmpSkip - This parameter is the result of the appropriate J32 instruction
|
||||
// (usually JZ32 or JNZ32).
|
||||
void recDoBranchImm(u32* jmpSkip, bool isLikely)
|
||||
void recDoBranchImm(u32 branchTo, u32* jmpSkip, bool isLikely, bool swappedDelaySlot)
|
||||
{
|
||||
// All R5900 branches use this format:
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
// First up is the Branch Taken Path : Save the recompiler's state, compile the
|
||||
// DelaySlot, and issue a BranchTest insertion. The state is reloaded below for
|
||||
// the "did not branch" path (maintains consts, register allocations, and other optimizations).
|
||||
|
||||
if (!swappedDelaySlot)
|
||||
{
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
// Jump target when the branch is *not* taken, skips the branchtest code
|
||||
|
@ -50,18 +51,17 @@ void recDoBranchImm(u32* jmpSkip, bool isLikely)
|
|||
|
||||
// if it's a likely branch then we'll need to skip the delay slot here, since
|
||||
// MIPS cancels the delay slot instruction when branches aren't taken.
|
||||
if (!swappedDelaySlot)
|
||||
{
|
||||
LoadBranchState();
|
||||
if (!isLikely)
|
||||
{
|
||||
pc -= 4; // instruction rewinder for delay slot, if non-likely.
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
SetBranchImm(pc); // start a new recompiled block.
|
||||
}
|
||||
|
||||
void recDoBranchImm_Likely(u32* jmpSkip)
|
||||
{
|
||||
recDoBranchImm(jmpSkip, true);
|
||||
SetBranchImm(pc); // start a new recompiled block.
|
||||
}
|
||||
|
||||
namespace OpcodeImpl {
|
||||
|
@ -95,6 +95,7 @@ void recMFSA()
|
|||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
// TODO(Stenzek): Make these less rubbish
|
||||
mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
|
||||
if (mmreg >= 0)
|
||||
{
|
||||
|
@ -102,10 +103,9 @@ void recMFSA()
|
|||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.sa]);
|
||||
xMOV(rax, ptr32[&cpuRegs.sa]);
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -124,6 +124,10 @@ void recMTSA()
|
|||
{
|
||||
xMOVSS(ptr[&cpuRegs.sa], xRegisterSSE(mmreg));
|
||||
}
|
||||
else if ((mmreg = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ)) >= 0)
|
||||
{
|
||||
xMOV(ptr[&cpuRegs.sa], xRegister32(mmreg));
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "VU.h"
|
||||
#include "common/emitter/x86emitter.h"
|
||||
#include "R3000A.h"
|
||||
#include "x86/iR3000A.h"
|
||||
|
||||
using namespace x86Emitter;
|
||||
|
||||
|
@ -29,7 +30,7 @@ using namespace x86Emitter;
|
|||
extern u32 g_psxConstRegs[32];
|
||||
|
||||
// X86 caching
|
||||
static int g_x86checknext;
|
||||
static uint g_x86checknext;
|
||||
|
||||
// use special x86 register allocation for ia32
|
||||
|
||||
|
@ -40,92 +41,19 @@ void _initX86regs()
|
|||
g_x86checknext = 0;
|
||||
}
|
||||
|
||||
uptr _x86GetAddr(int type, int reg)
|
||||
{
|
||||
uptr ret = 0;
|
||||
|
||||
switch (type & ~X86TYPE_VU1)
|
||||
{
|
||||
case X86TYPE_GPR:
|
||||
ret = (uptr)&cpuRegs.GPR.r[reg];
|
||||
break;
|
||||
|
||||
case X86TYPE_VI:
|
||||
if (type & X86TYPE_VU1)
|
||||
ret = (uptr)&VU1.VI[reg];
|
||||
else
|
||||
ret = (uptr)&VU0.VI[reg];
|
||||
break;
|
||||
|
||||
case X86TYPE_MEMOFFSET:
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case X86TYPE_VIMEMOFFSET:
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case X86TYPE_VUQREAD:
|
||||
if (type & X86TYPE_VU1)
|
||||
ret = (uptr)&VU1.VI[REG_Q];
|
||||
else
|
||||
ret = (uptr)&VU0.VI[REG_Q];
|
||||
break;
|
||||
|
||||
case X86TYPE_VUPREAD:
|
||||
if (type & X86TYPE_VU1)
|
||||
ret = (uptr)&VU1.VI[REG_P];
|
||||
else
|
||||
ret = (uptr)&VU0.VI[REG_P];
|
||||
break;
|
||||
|
||||
case X86TYPE_VUQWRITE:
|
||||
if (type & X86TYPE_VU1)
|
||||
ret = (uptr)&VU1.q;
|
||||
else
|
||||
ret = (uptr)&VU0.q;
|
||||
break;
|
||||
|
||||
case X86TYPE_VUPWRITE:
|
||||
if (type & X86TYPE_VU1)
|
||||
ret = (uptr)&VU1.p;
|
||||
else
|
||||
ret = (uptr)&VU0.p;
|
||||
break;
|
||||
|
||||
case X86TYPE_PSX:
|
||||
ret = (uptr)&psxRegs.GPR.r[reg];
|
||||
break;
|
||||
|
||||
case X86TYPE_PCWRITEBACK:
|
||||
ret = (uptr)&cpuRegs.pcWriteback;
|
||||
break;
|
||||
|
||||
case X86TYPE_PSX_PCWRITEBACK:
|
||||
ret = (uptr)&psxRegs.pcWriteback;
|
||||
break;
|
||||
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int _getFreeX86reg(int mode)
|
||||
{
|
||||
int tempi = -1;
|
||||
u32 bestcount = 0x10000;
|
||||
|
||||
int maxreg = (mode & MODE_8BITREG) ? 4 : iREGCNT_GPR;
|
||||
|
||||
for (uint i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
int reg = (g_x86checknext + i) % iREGCNT_GPR;
|
||||
if (reg == 0 || reg == esp.GetId() || reg == ebp.GetId())
|
||||
const int reg = (g_x86checknext + i) % iREGCNT_GPR;
|
||||
if (x86regs[reg].inuse || !_isAllocatableX86reg(reg))
|
||||
continue;
|
||||
if (reg >= maxreg)
|
||||
|
||||
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(reg))
|
||||
continue;
|
||||
//if( (mode&MODE_NOFRAME) && reg==EBP ) continue;
|
||||
|
||||
if (x86regs[reg].inuse == 0)
|
||||
{
|
||||
|
@ -134,20 +62,26 @@ int _getFreeX86reg(int mode)
|
|||
}
|
||||
}
|
||||
|
||||
for (int i = 1; i < maxreg; i++)
|
||||
for (uint i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (i == esp.GetId() || i == ebp.GetId())
|
||||
if (!_isAllocatableX86reg(i))
|
||||
continue;
|
||||
//if( (mode&MODE_NOFRAME) && i==EBP ) continue;
|
||||
|
||||
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(i))
|
||||
continue;
|
||||
|
||||
// should have checked inuse in the previous loop.
|
||||
pxAssert(x86regs[i].inuse);
|
||||
|
||||
if (x86regs[i].needed)
|
||||
continue;
|
||||
|
||||
if (x86regs[i].type != X86TYPE_TEMP)
|
||||
{
|
||||
|
||||
if (x86regs[i].counter < bestcount)
|
||||
{
|
||||
tempi = i;
|
||||
tempi = static_cast<int>(i);
|
||||
bestcount = x86regs[i].counter;
|
||||
}
|
||||
continue;
|
||||
|
@ -163,22 +97,15 @@ int _getFreeX86reg(int mode)
|
|||
return tempi;
|
||||
}
|
||||
|
||||
pxFailDev("x86 register allocation error");
|
||||
throw Exception::FailedToAllocateRegister();
|
||||
}
|
||||
|
||||
void _flushCachedRegs()
|
||||
{
|
||||
_flushConstRegs();
|
||||
_flushXMMregs();
|
||||
pxFailRel("x86 register allocation error");
|
||||
return -1;
|
||||
}
|
||||
|
||||
void _flushConstReg(int reg)
|
||||
{
|
||||
if (GPR_IS_CONST1(reg) && !(g_cpuFlushedConstReg & (1 << reg)))
|
||||
{
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], g_cpuConstRegs[reg].UL[0]);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[1]], g_cpuConstRegs[reg].UL[1]);
|
||||
xWriteImm64ToMem(&cpuRegs.GPR.r[reg].UD[0], rax, g_cpuConstRegs[reg].SD[0]);
|
||||
g_cpuFlushedConstReg |= (1 << reg);
|
||||
if (reg == 0)
|
||||
DevCon.Warning("Flushing r0!");
|
||||
|
@ -187,243 +114,367 @@ void _flushConstReg(int reg)
|
|||
|
||||
void _flushConstRegs()
|
||||
{
|
||||
s32 zero_cnt = 0, minusone_cnt = 0;
|
||||
s32 eaxval = 1; // 0, -1
|
||||
u32 done[4] = {0, 0, 0, 0};
|
||||
u8* rewindPtr;
|
||||
|
||||
// flush constants
|
||||
|
||||
// flush 0 and -1 first
|
||||
// ignore r0
|
||||
for (int i = 1, j = 0; i < 32; j++ && ++i, j %= 2)
|
||||
int zero_reg_count = 0;
|
||||
int minusone_reg_count = 0;
|
||||
for (u32 i = 0; i < 32; i++)
|
||||
{
|
||||
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1 << i))
|
||||
continue;
|
||||
if (g_cpuConstRegs[i].SL[j] != 0)
|
||||
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1u << i))
|
||||
continue;
|
||||
|
||||
if (eaxval != 0)
|
||||
if (g_cpuConstRegs[i].SD[0] == 0)
|
||||
zero_reg_count++;
|
||||
else if (g_cpuConstRegs[i].SD[0] == -1)
|
||||
minusone_reg_count++;
|
||||
}
|
||||
|
||||
// if we have more than one of zero/minus-one, precompute
|
||||
bool rax_is_zero = false;
|
||||
if (zero_reg_count > 1)
|
||||
{
|
||||
xXOR(eax, eax);
|
||||
eaxval = 0;
|
||||
}
|
||||
|
||||
xMOV(ptr[&cpuRegs.GPR.r[i].SL[j]], eax);
|
||||
done[j] |= 1 << i;
|
||||
zero_cnt++;
|
||||
}
|
||||
|
||||
rewindPtr = x86Ptr;
|
||||
|
||||
for (int i = 1, j = 0; i < 32; j++ && ++i, j %= 2)
|
||||
for (u32 i = 0; i < 32; i++)
|
||||
{
|
||||
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1 << i))
|
||||
continue;
|
||||
if (g_cpuConstRegs[i].SL[j] != -1)
|
||||
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1u << i))
|
||||
continue;
|
||||
|
||||
if (eaxval > 0)
|
||||
if (g_cpuConstRegs[i].SD[0] == 0)
|
||||
{
|
||||
xXOR(eax, eax);
|
||||
eaxval = 0;
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[i].UD[0]], rax);
|
||||
g_cpuFlushedConstReg |= 1u << i;
|
||||
}
|
||||
if (eaxval == 0)
|
||||
}
|
||||
rax_is_zero = true;
|
||||
}
|
||||
if (minusone_reg_count > 1)
|
||||
{
|
||||
xNOT(eax);
|
||||
eaxval = -1;
|
||||
}
|
||||
|
||||
xMOV(ptr[&cpuRegs.GPR.r[i].SL[j]], eax);
|
||||
done[j + 2] |= 1 << i;
|
||||
minusone_cnt++;
|
||||
}
|
||||
|
||||
if (minusone_cnt == 1 && !zero_cnt) // not worth it for one byte
|
||||
{
|
||||
x86SetPtr(rewindPtr);
|
||||
}
|
||||
if (!rax_is_zero)
|
||||
xMOV(rax, -1);
|
||||
else
|
||||
{
|
||||
done[0] |= done[2];
|
||||
done[1] |= done[3];
|
||||
}
|
||||
xNOT(rax);
|
||||
|
||||
for (int i = 1; i < 32; ++i)
|
||||
for (u32 i = 0; i < 32; i++)
|
||||
{
|
||||
if (GPR_IS_CONST1(i))
|
||||
{
|
||||
if (!(g_cpuFlushedConstReg & (1 << i)))
|
||||
{
|
||||
if (!(done[0] & (1 << i)))
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[i].UL[0]], g_cpuConstRegs[i].UL[0]);
|
||||
if (!(done[1] & (1 << i)))
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[i].UL[1]], g_cpuConstRegs[i].UL[1]);
|
||||
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1u << i))
|
||||
continue;
|
||||
|
||||
g_cpuFlushedConstReg |= 1 << i;
|
||||
}
|
||||
if (g_cpuHasConstReg == g_cpuFlushedConstReg)
|
||||
break;
|
||||
if (g_cpuConstRegs[i].SD[0] == -1)
|
||||
{
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[i].UD[0]], rax);
|
||||
g_cpuFlushedConstReg |= 1u << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode)
|
||||
// and whatever's left over..
|
||||
for (u32 i = 0; i < 32; i++)
|
||||
{
|
||||
uint i;
|
||||
pxAssertDev(reg >= 0 && reg < 32, "Register index out of bounds.");
|
||||
pxAssertDev(x86reg != esp && x86reg != ebp, "Allocation of ESP/EBP is not allowed!");
|
||||
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1u << i))
|
||||
continue;
|
||||
|
||||
// don't alloc EAX and ESP,EBP if MODE_NOFRAME
|
||||
int oldmode = mode;
|
||||
//int noframe = mode & MODE_NOFRAME;
|
||||
uint maxreg = (mode & MODE_8BITREG) ? 4 : iREGCNT_GPR;
|
||||
mode &= ~(MODE_NOFRAME | MODE_8BITREG);
|
||||
int readfromreg = -1;
|
||||
xWriteImm64ToMem(&cpuRegs.GPR.r[i].UD[0], rax, g_cpuConstRegs[i].UD[0]);
|
||||
g_cpuFlushedConstReg |= 1u << i;
|
||||
}
|
||||
}
|
||||
|
||||
static const char* GetModeString(int mode)
|
||||
{
|
||||
return ((mode & MODE_READ)) ? ((mode & MODE_WRITE) ? "readwrite" : "read") : "write";
|
||||
}
|
||||
|
||||
void _validateRegs()
|
||||
{
|
||||
#ifdef PCSX2_DEVBUILD
|
||||
// check that no two registers are in write mode in both fprs and gprs
|
||||
for (s8 guestreg = 0; guestreg < 32; guestreg++)
|
||||
{
|
||||
u32 gprreg = 0, gprmode = 0;
|
||||
u32 fprreg = 0, fprmode = 0;
|
||||
for (u32 hostreg = 0; hostreg < iREGCNT_GPR; hostreg++)
|
||||
{
|
||||
if (x86regs[hostreg].inuse && x86regs[hostreg].type == X86TYPE_GPR && x86regs[hostreg].reg == guestreg)
|
||||
{
|
||||
pxAssertMsg(gprreg == 0 && gprmode == 0, "register is not already allocated in a GPR");
|
||||
gprreg = hostreg;
|
||||
gprmode = x86regs[hostreg].mode;
|
||||
}
|
||||
}
|
||||
for (u32 hostreg = 0; hostreg < iREGCNT_XMM; hostreg++)
|
||||
{
|
||||
if (xmmregs[hostreg].inuse && xmmregs[hostreg].type == XMMTYPE_GPRREG && xmmregs[hostreg].reg == guestreg)
|
||||
{
|
||||
pxAssertMsg(fprreg == 0 && fprmode == 0, "register is not already allocated in a XMM");
|
||||
fprreg = hostreg;
|
||||
fprmode = xmmregs[hostreg].mode;
|
||||
}
|
||||
}
|
||||
|
||||
if ((gprmode | fprmode) & MODE_WRITE)
|
||||
pxAssertMsg((gprmode & MODE_WRITE) != (fprmode & MODE_WRITE), "only one of gpr or fps is in write state");
|
||||
|
||||
if (gprmode & MODE_WRITE)
|
||||
pxAssertMsg(fprmode == 0, "when writing to the gpr, fpr is invalid");
|
||||
if (fprmode & MODE_WRITE)
|
||||
pxAssertMsg(gprmode == 0, "when writing to the fpr, gpr is invalid");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int _allocX86reg(int type, int reg, int mode)
|
||||
{
|
||||
if (type == X86TYPE_GPR || type == X86TYPE_PSX)
|
||||
{
|
||||
pxAssertDev(reg >= 0 && reg < 34, "Register index out of bounds.");
|
||||
}
|
||||
|
||||
int hostXMMreg = (type == X86TYPE_GPR) ? _checkXMMreg(XMMTYPE_GPRREG, reg, 0) : -1;
|
||||
if (type != X86TYPE_TEMP)
|
||||
{
|
||||
if (maxreg < iREGCNT_GPR)
|
||||
{
|
||||
// make sure reg isn't in the higher regs
|
||||
|
||||
for (i = maxreg; i < iREGCNT_GPR; ++i)
|
||||
for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
|
||||
{
|
||||
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg)
|
||||
continue;
|
||||
|
||||
if (mode & MODE_READ)
|
||||
{
|
||||
readfromreg = i;
|
||||
x86regs[i].inuse = 0;
|
||||
break;
|
||||
}
|
||||
else if (mode & MODE_WRITE)
|
||||
{
|
||||
x86regs[i].inuse = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
pxAssert(type != X86TYPE_GPR || !GPR_IS_CONST1(reg) || (GPR_IS_CONST1(reg) && g_cpuFlushedConstReg & (1u << reg)));
|
||||
|
||||
for (i = 1; i < maxreg; i++)
|
||||
{
|
||||
if ((int)i == esp.GetId() || (int)i == ebp.GetId())
|
||||
continue;
|
||||
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg)
|
||||
continue;
|
||||
|
||||
// We're in a for loop until i<maxreg. This will never happen.
|
||||
/*if( i >= maxreg ) {
|
||||
if (x86regs[i].mode & MODE_READ) readfromreg = i;
|
||||
|
||||
mode |= x86regs[i].mode&MODE_WRITE;
|
||||
x86regs[i].inuse = 0;
|
||||
break;
|
||||
}*/
|
||||
|
||||
if (!x86reg.IsEmpty())
|
||||
{
|
||||
// requested specific reg, so return that instead
|
||||
if (i != (uint)x86reg.GetId())
|
||||
{
|
||||
if (x86regs[i].mode & MODE_READ)
|
||||
readfromreg = i;
|
||||
mode |= x86regs[i].mode & MODE_WRITE;
|
||||
x86regs[i].inuse = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (type != X86TYPE_TEMP && !(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
|
||||
{
|
||||
// can't go from write to read
|
||||
pxAssert(!((x86regs[i].mode & (MODE_READ | MODE_WRITE)) == MODE_WRITE && (mode & (MODE_READ | MODE_WRITE)) == MODE_READ));
|
||||
// if (type != X86TYPE_TEMP && !(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
|
||||
|
||||
if (type == X86TYPE_GPR)
|
||||
_flushConstReg(reg);
|
||||
{
|
||||
RALOG("Changing host reg %d for guest reg %d from %s to %s mode\n", i, reg, GetModeString(x86regs[i].mode), GetModeString(x86regs[i].mode | mode));
|
||||
|
||||
if (X86_ISVI(type) && reg < 16)
|
||||
xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]);
|
||||
else
|
||||
xMOV(xRegister32(i), ptr[(void*)(_x86GetAddr(type, reg))]);
|
||||
|
||||
x86regs[i].mode |= MODE_READ;
|
||||
if (mode & MODE_WRITE)
|
||||
{
|
||||
if (GPR_IS_CONST1(reg))
|
||||
{
|
||||
RALOG("Clearing constant value for guest reg %d on change to write mode\n", reg);
|
||||
GPR_DEL_CONST(reg);
|
||||
}
|
||||
|
||||
x86regs[i].needed = 1;
|
||||
x86regs[i].mode |= mode;
|
||||
if (hostXMMreg >= 0)
|
||||
{
|
||||
// ensure upper bits get written
|
||||
RALOG("Invalidating host XMM reg %d for guest reg %d due to GPR write transition\n", hostXMMreg, reg);
|
||||
pxAssert(!(xmmregs[hostXMMreg].mode & MODE_WRITE));
|
||||
_freeXMMreg(hostXMMreg);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (type == X86TYPE_PSX)
|
||||
{
|
||||
RALOG("Changing host reg %d for guest PSX reg %d from %s to %s mode\n", i, reg, GetModeString(x86regs[i].mode), GetModeString(x86regs[i].mode | mode));
|
||||
|
||||
if (mode & MODE_WRITE)
|
||||
{
|
||||
if (PSX_IS_CONST1(reg))
|
||||
{
|
||||
RALOG("Clearing constant value for guest PSX reg %d on change to write mode\n", reg);
|
||||
PSX_DEL_CONST(reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (type == X86TYPE_VIREG)
|
||||
{
|
||||
// keep VI temporaries separate
|
||||
if (reg < 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
x86regs[i].counter = g_x86AllocCounter++;
|
||||
x86regs[i].mode |= mode & ~MODE_CALLEESAVED;
|
||||
x86regs[i].needed = true;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
if (x86reg.IsEmpty())
|
||||
x86reg = xRegister32(_getFreeX86reg(oldmode));
|
||||
else
|
||||
_freeX86reg(x86reg);
|
||||
const int regnum = _getFreeX86reg(mode);
|
||||
xRegister64 new_reg(regnum);
|
||||
x86regs[regnum].type = type;
|
||||
x86regs[regnum].reg = reg;
|
||||
x86regs[regnum].mode = mode & ~MODE_CALLEESAVED;
|
||||
x86regs[regnum].counter = g_x86AllocCounter++;
|
||||
x86regs[regnum].needed = true;
|
||||
x86regs[regnum].inuse = true;
|
||||
|
||||
x86regs[x86reg.GetId()].type = type;
|
||||
x86regs[x86reg.GetId()].reg = reg;
|
||||
x86regs[x86reg.GetId()].mode = mode;
|
||||
x86regs[x86reg.GetId()].needed = 1;
|
||||
x86regs[x86reg.GetId()].inuse = 1;
|
||||
if (type == X86TYPE_GPR)
|
||||
{
|
||||
RALOG("Allocating host reg %d to guest reg %d in %s mode\n", regnum, reg, GetModeString(mode));
|
||||
}
|
||||
|
||||
if (mode & MODE_READ)
|
||||
{
|
||||
if (readfromreg >= 0)
|
||||
xMOV(x86reg, xRegister32(readfromreg));
|
||||
else
|
||||
switch (type)
|
||||
{
|
||||
if (type == X86TYPE_GPR)
|
||||
{
|
||||
|
||||
if (reg == 0)
|
||||
{
|
||||
xXOR(x86reg, x86reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
_flushConstReg(reg);
|
||||
_deleteGPRtoXMMreg(reg, 1);
|
||||
|
||||
_eeMoveGPRtoR(x86reg, reg);
|
||||
|
||||
_deleteGPRtoXMMreg(reg, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (X86_ISVI(type) && reg < 16)
|
||||
case X86TYPE_GPR:
|
||||
{
|
||||
if (reg == 0)
|
||||
xXOR(x86reg, x86reg);
|
||||
else
|
||||
xMOVZX(x86reg, ptr16[(u16*)(_x86GetAddr(type, reg))]);
|
||||
{
|
||||
xXOR(xRegister32(new_reg), xRegister32(new_reg)); // 32-bit is smaller and zexts anyway
|
||||
}
|
||||
else
|
||||
xMOV(x86reg, ptr[(void*)(_x86GetAddr(type, reg))]);
|
||||
{
|
||||
if (hostXMMreg >= 0)
|
||||
{
|
||||
// is in a XMM. we don't need to free the XMM since we're not writing, and it's still valid
|
||||
RALOG("Copying %d from XMM %d to GPR %d on read\n", reg, hostXMMreg, regnum);
|
||||
xMOVD(new_reg, xRegisterSSE(hostXMMreg)); // actually MOVQ
|
||||
|
||||
// if the XMM was dirty, just get rid of it, we don't want to try to sync the values up...
|
||||
if (xmmregs[hostXMMreg].mode & MODE_WRITE)
|
||||
{
|
||||
RALOG("Freeing dirty XMM %d for GPR %d\n", hostXMMreg, reg);
|
||||
_freeXMMreg(hostXMMreg);
|
||||
}
|
||||
}
|
||||
else if (GPR_IS_CONST1(reg))
|
||||
{
|
||||
xMOV64(new_reg, g_cpuConstRegs[reg].SD[0]);
|
||||
g_cpuFlushedConstReg |= (1u << reg);
|
||||
x86regs[regnum].mode |= MODE_WRITE; // reg is dirty
|
||||
|
||||
RALOG("Writing constant value %lld from guest reg %d to host reg %d\n", g_cpuConstRegs[reg].SD[0], reg, regnum);
|
||||
}
|
||||
else
|
||||
{
|
||||
// not loaded
|
||||
RALOG("Loading guest reg %d to GPR %d\n", reg, regnum);
|
||||
xMOV(new_reg, ptr64[&cpuRegs.GPR.r[reg].UD[0]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case X86TYPE_FPRC:
|
||||
RALOG("Loading guest reg FPCR %d to GPR %d\n", reg, regnum);
|
||||
xMOV(xRegister32(regnum), ptr32[&fpuRegs.fprc[reg]]);
|
||||
break;
|
||||
|
||||
case X86TYPE_PSX:
|
||||
{
|
||||
const xRegister32 new_reg32(regnum);
|
||||
if (reg == 0)
|
||||
{
|
||||
xXOR(new_reg32, new_reg32);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (PSX_IS_CONST1(reg))
|
||||
{
|
||||
xMOV(new_reg32, g_psxConstRegs[reg]);
|
||||
g_psxFlushedConstReg |= (1u << reg);
|
||||
x86regs[regnum].mode |= MODE_WRITE; // reg is dirty
|
||||
|
||||
RALOG("Writing constant value %d from guest PSX reg %d to host reg %d\n", g_psxConstRegs[reg], reg, regnum);
|
||||
}
|
||||
else
|
||||
{
|
||||
RALOG("Loading guest PSX reg %d to GPR %d\n", reg, regnum);
|
||||
xMOV(new_reg32, ptr32[&psxRegs.GPR.r[reg]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Need to port all the code
|
||||
// return x86reg;
|
||||
return x86reg.GetId();
|
||||
if (type == X86TYPE_GPR && (mode & MODE_WRITE))
|
||||
{
|
||||
if (reg < 32 && GPR_IS_CONST1(reg))
|
||||
{
|
||||
RALOG("Clearing constant value for guest reg %d on write allocation\n", reg);
|
||||
GPR_DEL_CONST(reg);
|
||||
}
|
||||
if (hostXMMreg >= 0)
|
||||
{
|
||||
// writing, so kill the xmm allocation. gotta ensure the upper bits gets stored first.
|
||||
RALOG("Invalidating %d from XMM %d because of GPR %d write\n", reg, hostXMMreg, regnum);
|
||||
_freeXMMreg(hostXMMreg);
|
||||
}
|
||||
}
|
||||
else if (type == X86TYPE_PSX && (mode & MODE_WRITE))
|
||||
{
|
||||
if (reg < 32 && PSX_IS_CONST1(reg))
|
||||
{
|
||||
RALOG("Clearing constant value for guest PSX reg %d on write allocation\n", reg);
|
||||
PSX_DEL_CONST(reg);
|
||||
}
|
||||
}
|
||||
|
||||
// Console.WriteLn("Allocating reg %d", regnum);
|
||||
return regnum;
|
||||
}
|
||||
|
||||
void _writebackX86Reg(int x86reg)
|
||||
{
|
||||
switch (x86regs[x86reg].type)
|
||||
{
|
||||
case X86TYPE_GPR:
|
||||
RALOG("Writing back GPR reg %d for guest reg %d P2\n", x86reg, x86regs[x86reg].reg);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[x86regs[x86reg].reg].UD[0]], xRegister64(x86reg));
|
||||
break;
|
||||
|
||||
case X86TYPE_FPRC:
|
||||
RALOG("Writing back GPR reg %d for guest reg FPCR %d P2\n", x86reg, x86regs[x86reg].reg);
|
||||
xMOV(ptr32[&fpuRegs.fprc[x86regs[x86reg].reg]], xRegister32(x86reg));
|
||||
break;
|
||||
|
||||
case X86TYPE_VIREG:
|
||||
RALOG("Writing back VI reg %d for guest reg %d P2\n", x86reg, x86regs[x86reg].reg);
|
||||
xMOV(ptr16[&VU0.VI[x86regs[x86reg].reg].UL], xRegister16(x86reg));
|
||||
break;
|
||||
|
||||
case X86TYPE_PCWRITEBACK:
|
||||
RALOG("Writing back PC writeback in host reg %d\n", x86reg);
|
||||
xMOV(ptr32[&cpuRegs.pcWriteback], xRegister32(x86reg));
|
||||
break;
|
||||
|
||||
case X86TYPE_PSX:
|
||||
RALOG("Writing back PSX GPR reg %d for guest reg %d P2\n", x86reg, x86regs[x86reg].reg);
|
||||
xMOV(ptr32[&psxRegs.GPR.r[x86regs[x86reg].reg]], xRegister32(x86reg));
|
||||
break;
|
||||
|
||||
case X86TYPE_PSX_PCWRITEBACK:
|
||||
RALOG("Writing back PSX PC writeback in host reg %d\n", x86reg);
|
||||
xMOV(ptr32[&psxRegs.pcWriteback], xRegister32(x86reg));
|
||||
break;
|
||||
|
||||
default:
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int _checkX86reg(int type, int reg, int mode)
|
||||
{
|
||||
uint i;
|
||||
|
||||
for (i = 0; i < iREGCNT_GPR; i++)
|
||||
for (uint i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type)
|
||||
{
|
||||
// shouldn't have dirty constants...
|
||||
pxAssert((type != X86TYPE_GPR || !GPR_IS_DIRTY_CONST(reg)) &&
|
||||
(type != X86TYPE_PSX || !PSX_IS_DIRTY_CONST(reg)));
|
||||
|
||||
if (!(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
|
||||
if ((type == X86TYPE_GPR || type == X86TYPE_PSX) && !(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
|
||||
pxFailRel("Somehow ended up with an allocated x86 without mode");
|
||||
|
||||
// ensure constants get deleted once we alloc as write
|
||||
if (mode & MODE_WRITE)
|
||||
{
|
||||
if (X86_ISVI(type))
|
||||
xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]);
|
||||
else
|
||||
xMOV(xRegister32(i), ptr[(void*)(_x86GetAddr(type, reg))]);
|
||||
if (type == X86TYPE_GPR)
|
||||
{
|
||||
// go through the alloc path instead, because we might need to invalidate an xmm.
|
||||
return _allocX86reg(X86TYPE_GPR, reg, mode);
|
||||
}
|
||||
else if (type == X86TYPE_PSX)
|
||||
{
|
||||
pxAssert(!PSX_IS_DIRTY_CONST(reg));
|
||||
PSX_DEL_CONST(reg);
|
||||
}
|
||||
}
|
||||
|
||||
x86regs[i].mode |= mode;
|
||||
|
@ -438,9 +489,7 @@ int _checkX86reg(int type, int reg, int mode)
|
|||
|
||||
void _addNeededX86reg(int type, int reg)
|
||||
{
|
||||
uint i;
|
||||
|
||||
for (i = 0; i < iREGCNT_GPR; i++)
|
||||
for (uint i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (!x86regs[i].inuse || x86regs[i].reg != reg || x86regs[i].type != type)
|
||||
continue;
|
||||
|
@ -452,9 +501,7 @@ void _addNeededX86reg(int type, int reg)
|
|||
|
||||
void _clearNeededX86regs()
|
||||
{
|
||||
uint i;
|
||||
|
||||
for (i = 0; i < iREGCNT_GPR; i++)
|
||||
for (uint i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (x86regs[i].needed)
|
||||
{
|
||||
|
@ -465,44 +512,6 @@ void _clearNeededX86regs()
|
|||
}
|
||||
}
|
||||
|
||||
void _deleteX86reg(int type, int reg, int flush)
|
||||
{
|
||||
uint i;
|
||||
|
||||
for (i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type)
|
||||
{
|
||||
switch (flush)
|
||||
{
|
||||
case 0:
|
||||
_freeX86reg(i);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
if (x86regs[i].mode & MODE_WRITE)
|
||||
{
|
||||
|
||||
if (X86_ISVI(type) && x86regs[i].reg < 16)
|
||||
xMOV(ptr[(void*)(_x86GetAddr(type, x86regs[i].reg))], xRegister16(i));
|
||||
else
|
||||
xMOV(ptr[(void*)(_x86GetAddr(type, x86regs[i].reg))], xRegister32(i));
|
||||
|
||||
// get rid of MODE_WRITE since don't want to flush again
|
||||
x86regs[i].mode &= ~MODE_WRITE;
|
||||
x86regs[i].mode |= MODE_READ;
|
||||
}
|
||||
return;
|
||||
|
||||
case 2:
|
||||
x86regs[i].inuse = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Temporary solution to support eax/ebx... type
|
||||
void _freeX86reg(const x86Emitter::xRegister32& x86reg)
|
||||
{
|
||||
_freeX86reg(x86reg.GetId());
|
||||
|
@ -514,17 +523,33 @@ void _freeX86reg(int x86reg)
|
|||
|
||||
if (x86regs[x86reg].inuse && (x86regs[x86reg].mode & MODE_WRITE))
|
||||
{
|
||||
_writebackX86Reg(x86reg);
|
||||
x86regs[x86reg].mode &= ~MODE_WRITE;
|
||||
}
|
||||
|
||||
if (X86_ISVI(x86regs[x86reg].type) && x86regs[x86reg].reg < 16)
|
||||
_freeX86regWithoutWriteback(x86reg);
|
||||
}
|
||||
|
||||
void _freeX86regWithoutWriteback(int x86reg)
|
||||
{
|
||||
xMOV(ptr[(void*)(_x86GetAddr(x86regs[x86reg].type, x86regs[x86reg].reg))], xRegister16(x86reg));
|
||||
}
|
||||
else
|
||||
xMOV(ptr[(void*)(_x86GetAddr(x86regs[x86reg].type, x86regs[x86reg].reg))], xRegister32(x86reg));
|
||||
}
|
||||
pxAssert(x86reg >= 0 && x86reg < (int)iREGCNT_GPR);
|
||||
|
||||
x86regs[x86reg].inuse = 0;
|
||||
|
||||
if (x86regs[x86reg].type == X86TYPE_VIREG)
|
||||
{
|
||||
RALOG("Freeing VI reg %d in host GPR %d\n", x86regs[x86reg].reg, x86reg);
|
||||
//mVUFreeCOP2GPR(x86reg);
|
||||
abort();
|
||||
}
|
||||
else if (x86regs[x86reg].inuse && x86regs[x86reg].type == X86TYPE_GPR)
|
||||
{
|
||||
RALOG("Freeing X86 register %d (was guest %d)...\n", x86reg, x86regs[x86reg].reg);
|
||||
}
|
||||
else if (x86regs[x86reg].inuse)
|
||||
{
|
||||
RALOG("Freeing X86 register %d...\n", x86reg);
|
||||
}
|
||||
}
|
||||
|
||||
void _freeX86regs()
|
||||
|
@ -533,12 +558,18 @@ void _freeX86regs()
|
|||
_freeX86reg(i);
|
||||
}
|
||||
|
||||
// Misc
|
||||
|
||||
void _signExtendSFtoM(uptr mem)
|
||||
void _flushX86regs()
|
||||
{
|
||||
xLAHF();
|
||||
xSAR(ax, 15);
|
||||
xCWDE();
|
||||
xMOV(ptr[(void*)(mem)], eax);
|
||||
for (u32 i = 0; i < iREGCNT_GPR; ++i)
|
||||
{
|
||||
if (x86regs[i].inuse && x86regs[i].mode & MODE_WRITE)
|
||||
{
|
||||
// shouldn't be const, because if we got to write mode, we should've flushed then
|
||||
pxAssert(x86regs[i].type != X86TYPE_GPR || !GPR_IS_DIRTY_CONST(x86regs[i].reg));
|
||||
|
||||
RALOG("Flushing x86 reg %u in _eeFlushAllDirty()\n", i);
|
||||
_writebackX86Reg(i);
|
||||
x86regs[i].mode = (x86regs[i].mode & ~MODE_WRITE) | MODE_READ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -22,10 +22,8 @@
|
|||
|
||||
using namespace x86Emitter;
|
||||
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
|
||||
namespace R5900::Dynarec::OpcodeImpl
|
||||
{
|
||||
/*********************************************************
|
||||
* Register arithmetic *
|
||||
* Format: OP rd, rs, rt *
|
||||
|
@ -54,50 +52,109 @@ REC_FUNC_DEL(SLTU, _Rd_);
|
|||
|
||||
#else
|
||||
|
||||
static void recMoveStoD(int info)
|
||||
{
|
||||
if (info & PROCESS_EE_S)
|
||||
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
|
||||
else
|
||||
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
}
|
||||
|
||||
static void recMoveStoD64(int info)
|
||||
{
|
||||
if (info & PROCESS_EE_S)
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
else
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
|
||||
static void recMoveTtoD(int info)
|
||||
{
|
||||
if (info & PROCESS_EE_T)
|
||||
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_T));
|
||||
else
|
||||
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
}
|
||||
|
||||
static void recMoveTtoD64(int info)
|
||||
{
|
||||
if (info & PROCESS_EE_T)
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_T));
|
||||
else
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
}
|
||||
|
||||
//// ADD
|
||||
void recADD_const()
|
||||
static void recADD_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] + g_cpuConstRegs[_Rt_].UL[0]));
|
||||
}
|
||||
|
||||
void recADD_constv(int info, int creg, u32 vreg)
|
||||
// s is constant
|
||||
static void recADD_consts(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
s32 cval = g_cpuConstRegs[creg].SL[0];
|
||||
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].SL[0]]);
|
||||
if (cval)
|
||||
xADD(eax, cval);
|
||||
eeSignExtendTo(_Rd_, _Rd_ == vreg && !cval);
|
||||
}
|
||||
|
||||
// s is constant
|
||||
void recADD_consts(int info)
|
||||
{
|
||||
recADD_constv(info, _Rs_, _Rt_);
|
||||
const s32 cval = g_cpuConstRegs[_Rs_].SL[0];
|
||||
recMoveTtoD(info);
|
||||
if (cval != 0)
|
||||
xADD(xRegister32(EEREC_D), cval);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
// t is constant
|
||||
void recADD_constt(int info)
|
||||
{
|
||||
recADD_constv(info, _Rt_, _Rs_);
|
||||
}
|
||||
|
||||
// nothing is constant
|
||||
void recADD_(int info)
|
||||
static void recADD_constt(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]);
|
||||
if (_Rs_ == _Rt_)
|
||||
xADD(eax, eax);
|
||||
else
|
||||
xADD(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
|
||||
eeSignExtendTo(_Rd_);
|
||||
const s32 cval = g_cpuConstRegs[_Rt_].SL[0];
|
||||
recMoveStoD(info);
|
||||
if (cval != 0)
|
||||
xADD(xRegister32(EEREC_D), cval);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(ADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
|
||||
// nothing is constant
|
||||
static void recADD_(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
if ((info & PROCESS_EE_S) && (info & PROCESS_EE_T))
|
||||
{
|
||||
if (EEREC_D == EEREC_S)
|
||||
{
|
||||
xADD(xRegister32(EEREC_D), xRegister32(EEREC_T));
|
||||
}
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
xADD(xRegister32(EEREC_D), xRegister32(EEREC_S));
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
|
||||
xADD(xRegister32(EEREC_D), xRegister32(EEREC_T));
|
||||
}
|
||||
}
|
||||
else if (info & PROCESS_EE_S)
|
||||
{
|
||||
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
|
||||
xADD(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
}
|
||||
else if (info & PROCESS_EE_T)
|
||||
{
|
||||
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_T));
|
||||
xADD(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xADD(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
}
|
||||
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
EERECOMPILE_CODERC0(ADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
|
||||
|
||||
//// ADDU
|
||||
void recADDU(void)
|
||||
|
@ -111,77 +168,67 @@ void recDADD_const(void)
|
|||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] + g_cpuConstRegs[_Rt_].UD[0];
|
||||
}
|
||||
|
||||
void recDADD_constv(int info, int creg, u32 vreg)
|
||||
// s is constant
|
||||
static void recDADD_consts(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
GPR_reg64 cval = g_cpuConstRegs[creg];
|
||||
|
||||
if (_Rd_ == vreg)
|
||||
{
|
||||
if (!cval.SD[0])
|
||||
return; // no-op
|
||||
xImm64Op(xADD, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, cval.SD[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cval.SD[0])
|
||||
{
|
||||
xMOV64(rax, cval.SD[0]);
|
||||
xADD(rax, ptr64[&cpuRegs.GPR.r[vreg].SD[0]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(rax, ptr64[&cpuRegs.GPR.r[vreg].SD[0]]);
|
||||
}
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
|
||||
}
|
||||
const s64 cval = g_cpuConstRegs[_Rs_].SD[0];
|
||||
recMoveTtoD64(info);
|
||||
if (cval != 0)
|
||||
xImm64Op(xADD, xRegister64(EEREC_D), rax, cval);
|
||||
}
|
||||
|
||||
void recDADD_consts(int info)
|
||||
{
|
||||
recDADD_constv(info, _Rs_, _Rt_);
|
||||
}
|
||||
|
||||
void recDADD_constt(int info)
|
||||
{
|
||||
recDADD_constv(info, _Rt_, _Rs_);
|
||||
}
|
||||
|
||||
void recDADD_(int info)
|
||||
// t is constant
|
||||
static void recDADD_constt(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
u32 rs = _Rs_, rt = _Rt_;
|
||||
if (_Rd_ == _Rt_)
|
||||
rs = _Rt_, rt = _Rs_;
|
||||
|
||||
if (_Rd_ == _Rs_ && _Rs_ == _Rt_)
|
||||
{
|
||||
xSHL(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 1);
|
||||
return;
|
||||
const s64 cval = g_cpuConstRegs[_Rt_].SD[0];
|
||||
recMoveStoD64(info);
|
||||
if (cval != 0)
|
||||
xImm64Op(xADD, xRegister64(EEREC_D), rax, cval);
|
||||
}
|
||||
|
||||
xMOV(rax, ptr64[&cpuRegs.GPR.r[rt].SD[0]]);
|
||||
// nothing is constant
|
||||
static void recDADD_(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
if (_Rd_ == rs)
|
||||
if ((info & PROCESS_EE_S) && (info & PROCESS_EE_T))
|
||||
{
|
||||
xADD(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
|
||||
return;
|
||||
if (EEREC_D == EEREC_S)
|
||||
{
|
||||
xADD(xRegister64(EEREC_D), xRegister64(EEREC_T));
|
||||
}
|
||||
else if (rs == rt)
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
xADD(rax, rax);
|
||||
xADD(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
}
|
||||
else
|
||||
{
|
||||
xADD(rax, ptr32[&cpuRegs.GPR.r[rs].SD[0]]);
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
xADD(xRegister64(EEREC_D), xRegister64(EEREC_T));
|
||||
}
|
||||
}
|
||||
else if (info & PROCESS_EE_S)
|
||||
{
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
xADD(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
}
|
||||
else if (info & PROCESS_EE_T)
|
||||
{
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_T));
|
||||
xADD(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xADD(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
}
|
||||
}
|
||||
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
|
||||
EERECOMPILE_CODERC0(DADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT | XMMINFO_64BITOP);
|
||||
|
||||
//// DADDU
|
||||
void recDADDU(void)
|
||||
|
@ -191,50 +238,92 @@ void recDADDU(void)
|
|||
|
||||
//// SUB
|
||||
|
||||
void recSUB_const()
|
||||
static void recSUB_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] - g_cpuConstRegs[_Rt_].UL[0]));
|
||||
}
|
||||
|
||||
void recSUB_consts(int info)
|
||||
static void recSUB_consts(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
s32 sval = g_cpuConstRegs[_Rs_].SL[0];
|
||||
|
||||
const s32 sval = g_cpuConstRegs[_Rs_].SL[0];
|
||||
xMOV(eax, sval);
|
||||
|
||||
if (info & PROCESS_EE_T)
|
||||
xSUB(eax, xRegister32(EEREC_T));
|
||||
else
|
||||
xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
|
||||
eeSignExtendTo(_Rd_);
|
||||
|
||||
xMOVSX(xRegister64(EEREC_D), eax);
|
||||
}
|
||||
|
||||
void recSUB_constt(int info)
|
||||
static void recSUB_constt(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
s32 tval = g_cpuConstRegs[_Rt_].SL[0];
|
||||
const s32 tval = g_cpuConstRegs[_Rt_].SL[0];
|
||||
recMoveStoD(info);
|
||||
if (tval != 0)
|
||||
xSUB(xRegister32(EEREC_D), tval);
|
||||
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]);
|
||||
if (tval)
|
||||
xSUB(eax, tval);
|
||||
eeSignExtendTo(_Rd_, _Rd_ == _Rs_ && !tval);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
void recSUB_(int info)
|
||||
static void recSUB_(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
if (_Rs_ == _Rt_)
|
||||
{
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0);
|
||||
xXOR(xRegister32(EEREC_D), xRegister32(EEREC_D));
|
||||
return;
|
||||
}
|
||||
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]);
|
||||
xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
|
||||
eeSignExtendTo(_Rd_);
|
||||
// a bit messier here because it's not commutative..
|
||||
if ((info & PROCESS_EE_S) && (info & PROCESS_EE_T))
|
||||
{
|
||||
if (EEREC_D == EEREC_S)
|
||||
{
|
||||
xSUB(xRegister32(EEREC_D), xRegister32(EEREC_T));
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
else if (EEREC_D == EEREC_T)
|
||||
{
|
||||
// D might equal T
|
||||
xMOV(eax, xRegister32(EEREC_S));
|
||||
xSUB(eax, xRegister32(EEREC_T));
|
||||
xMOVSX(xRegister64(EEREC_D), eax);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
|
||||
xSUB(xRegister32(EEREC_D), xRegister32(EEREC_T));
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
}
|
||||
else if (info & PROCESS_EE_S)
|
||||
{
|
||||
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
|
||||
xSUB(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
else if (info & PROCESS_EE_T)
|
||||
{
|
||||
// D might equal T
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
xSUB(eax, xRegister32(EEREC_T));
|
||||
xMOVSX(xRegister64(EEREC_D), eax);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
xSUB(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(SUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(SUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
|
||||
//// SUBU
|
||||
void recSUBU(void)
|
||||
|
@ -243,74 +332,79 @@ void recSUBU(void)
|
|||
}
|
||||
|
||||
//// DSUB
|
||||
void recDSUB_const()
|
||||
static void recDSUB_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] - g_cpuConstRegs[_Rt_].UD[0];
|
||||
}
|
||||
|
||||
void recDSUB_consts(int info)
|
||||
static void recDSUB_consts(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
GPR_reg64 sval = g_cpuConstRegs[_Rs_];
|
||||
// gross, because if d == t, we can't destroy t
|
||||
const s64 sval = g_cpuConstRegs[_Rs_].SD[0];
|
||||
const xRegister64 regd((info & PROCESS_EE_T && EEREC_D == EEREC_T) ? rax.GetId() : EEREC_D);
|
||||
xMOV64(regd, sval);
|
||||
|
||||
if (!sval.SD[0] && _Rd_ == _Rt_)
|
||||
{
|
||||
xNEG(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]]);
|
||||
return;
|
||||
}
|
||||
if (info & PROCESS_EE_T)
|
||||
xSUB(regd, xRegister64(EEREC_T));
|
||||
else
|
||||
{
|
||||
xMOV64(rax, sval.SD[0]);
|
||||
xSUB(regd, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]);
|
||||
|
||||
// emitter will eliminate redundant moves.
|
||||
xMOV(xRegister64(EEREC_D), regd);
|
||||
}
|
||||
|
||||
xSUB(rax, ptr32[&cpuRegs.GPR.r[_Rt_].SD[0]]);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
|
||||
}
|
||||
|
||||
void recDSUB_constt(int info)
|
||||
static void recDSUB_constt(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
GPR_reg64 tval = g_cpuConstRegs[_Rt_];
|
||||
|
||||
if (_Rd_ == _Rs_)
|
||||
{
|
||||
xImm64Op(xSUB, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, tval.SD[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rs_].SD[0]]);
|
||||
if (tval.SD[0])
|
||||
{
|
||||
xImm64Op(xSUB, rax, rdx, tval.SD[0]);
|
||||
}
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
|
||||
}
|
||||
const s64 tval = g_cpuConstRegs[_Rt_].SD[0];
|
||||
recMoveStoD64(info);
|
||||
if (tval != 0)
|
||||
xImm64Op(xSUB, xRegister64(EEREC_D), rax, tval);
|
||||
}
|
||||
|
||||
void recDSUB_(int info)
|
||||
static void recDSUB_(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
if (_Rs_ == _Rt_)
|
||||
{
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0);
|
||||
xXOR(xRegister32(EEREC_D), xRegister32(EEREC_D));
|
||||
return;
|
||||
}
|
||||
else if (_Rd_ == _Rs_)
|
||||
|
||||
// a bit messier here because it's not commutative..
|
||||
if ((info & PROCESS_EE_S) && (info & PROCESS_EE_T))
|
||||
{
|
||||
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]);
|
||||
xSUB(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
|
||||
// D might equal T
|
||||
const xRegister64 regd(EEREC_D == EEREC_T ? rax.GetId() : EEREC_D);
|
||||
xMOV(regd, xRegister64(EEREC_S));
|
||||
xSUB(regd, xRegister64(EEREC_T));
|
||||
xMOV(xRegister64(EEREC_D), regd);
|
||||
}
|
||||
else if (info & PROCESS_EE_S)
|
||||
{
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
xSUB(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
}
|
||||
else if (info & PROCESS_EE_T)
|
||||
{
|
||||
// D might equal T
|
||||
const xRegister64 regd(EEREC_D == EEREC_T ? rax.GetId() : EEREC_D);
|
||||
xMOV(regd, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xSUB(regd, xRegister64(EEREC_T));
|
||||
xMOV(xRegister64(EEREC_D), regd);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rs_].SD[0]]);
|
||||
xSUB(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xSUB(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
}
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DSUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(DSUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
|
||||
|
||||
//// DSUBU
|
||||
void recDSUBU(void)
|
||||
|
@ -329,15 +423,15 @@ namespace
|
|||
};
|
||||
} // namespace
|
||||
|
||||
static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg)
|
||||
static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg, int regv)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xImpl_G1Logic bad{};
|
||||
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND
|
||||
: op == LogicalOp::OR ? xOR
|
||||
: op == LogicalOp::XOR ? xXOR
|
||||
: op == LogicalOp::NOR ? xOR : bad;
|
||||
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND : op == LogicalOp::OR ? xOR :
|
||||
op == LogicalOp::XOR ? xXOR :
|
||||
op == LogicalOp::NOR ? xOR :
|
||||
bad;
|
||||
s64 fixedInput, fixedOutput, identityInput;
|
||||
bool hasFixed = true;
|
||||
switch (op)
|
||||
|
@ -369,29 +463,18 @@ static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg)
|
|||
|
||||
if (hasFixed && cval.SD[0] == fixedInput)
|
||||
{
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], fixedOutput);
|
||||
}
|
||||
else if (_Rd_ == vreg)
|
||||
{
|
||||
if (cval.SD[0] != identityInput)
|
||||
xImm64Op(xOP, ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax, cval.UD[0]);
|
||||
if (op == LogicalOp::NOR)
|
||||
xNOT(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]]);
|
||||
xMOV64(xRegister64(EEREC_D), fixedOutput);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cval.SD[0] != identityInput)
|
||||
{
|
||||
xMOV64(rax, cval.SD[0]);
|
||||
xOP(rax, ptr32[&cpuRegs.GPR.r[vreg].UD[0]]);
|
||||
}
|
||||
if (regv >= 0)
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(regv));
|
||||
else
|
||||
{
|
||||
xMOV(rax, ptr32[&cpuRegs.GPR.r[vreg].UD[0]]);
|
||||
}
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[vreg].UD[0]]);
|
||||
if (cval.SD[0] != identityInput)
|
||||
xImm64Op(xOP, xRegister64(EEREC_D), rax, cval.UD[0]);
|
||||
if (op == LogicalOp::NOR)
|
||||
xNOT(rax);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
xNOT(xRegister64(EEREC_D));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -400,208 +483,234 @@ static void recLogicalOp(LogicalOp op, int info)
|
|||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xImpl_G1Logic bad{};
|
||||
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND
|
||||
: op == LogicalOp::OR ? xOR
|
||||
: op == LogicalOp::XOR ? xXOR
|
||||
: op == LogicalOp::NOR ? xOR : bad;
|
||||
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND : op == LogicalOp::OR ? xOR :
|
||||
op == LogicalOp::XOR ? xXOR :
|
||||
op == LogicalOp::NOR ? xOR :
|
||||
bad;
|
||||
pxAssert(&xOP != &bad);
|
||||
|
||||
// swap because it's commutative and Rd might be Rt
|
||||
u32 rs = _Rs_, rt = _Rt_;
|
||||
int regs = (info & PROCESS_EE_S) ? EEREC_S : -1, regt = (info & PROCESS_EE_T) ? EEREC_T : -1;
|
||||
if (_Rd_ == _Rt_)
|
||||
rs = _Rt_, rt = _Rs_;
|
||||
{
|
||||
std::swap(rs, rt);
|
||||
std::swap(regs, regt);
|
||||
}
|
||||
|
||||
if (op == LogicalOp::XOR && rs == rt)
|
||||
{
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], 0);
|
||||
}
|
||||
else if (_Rd_ == rs)
|
||||
{
|
||||
if (rs != rt)
|
||||
{
|
||||
xMOV(rax, ptr64[&cpuRegs.GPR.r[rt].UD[0]]);
|
||||
xOP(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
}
|
||||
if (op == LogicalOp::NOR)
|
||||
xNOT(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]]);
|
||||
xXOR(xRegister32(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(rax, ptr64[&cpuRegs.GPR.r[rs].UD[0]]);
|
||||
if (rs != rt)
|
||||
xOP(rax, ptr64[&cpuRegs.GPR.r[rt].UD[0]]);
|
||||
if (regs >= 0)
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(regs));
|
||||
else
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[rs].UD[0]]);
|
||||
|
||||
if (regt >= 0)
|
||||
xOP(xRegister64(EEREC_D), xRegister64(regt));
|
||||
else
|
||||
xOP(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[rt].UD[0]]);
|
||||
|
||||
if (op == LogicalOp::NOR)
|
||||
xNOT(rax);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
xNOT(xRegister64(EEREC_D));
|
||||
}
|
||||
}
|
||||
|
||||
//// AND
|
||||
void recAND_const()
|
||||
static void recAND_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] & g_cpuConstRegs[_Rt_].UD[0];
|
||||
}
|
||||
|
||||
void recAND_consts(int info)
|
||||
static void recAND_consts(int info)
|
||||
{
|
||||
recLogicalOp_constv(LogicalOp::AND, info, _Rs_, _Rt_);
|
||||
recLogicalOp_constv(LogicalOp::AND, info, _Rs_, _Rt_, (info & PROCESS_EE_T) ? EEREC_T : -1);
|
||||
}
|
||||
|
||||
void recAND_constt(int info)
|
||||
static void recAND_constt(int info)
|
||||
{
|
||||
recLogicalOp_constv(LogicalOp::AND, info, _Rt_, _Rs_);
|
||||
recLogicalOp_constv(LogicalOp::AND, info, _Rt_, _Rs_, (info & PROCESS_EE_S) ? EEREC_S : -1);
|
||||
}
|
||||
|
||||
void recAND_(int info)
|
||||
static void recAND_(int info)
|
||||
{
|
||||
recLogicalOp(LogicalOp::AND, info);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(AND, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(AND, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
|
||||
|
||||
//// OR
|
||||
void recOR_const()
|
||||
static void recOR_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] | g_cpuConstRegs[_Rt_].UD[0];
|
||||
}
|
||||
|
||||
void recOR_consts(int info)
|
||||
static void recOR_consts(int info)
|
||||
{
|
||||
recLogicalOp_constv(LogicalOp::OR, info, _Rs_, _Rt_);
|
||||
recLogicalOp_constv(LogicalOp::OR, info, _Rs_, _Rt_, (info & PROCESS_EE_T) ? EEREC_T : -1);
|
||||
}
|
||||
|
||||
void recOR_constt(int info)
|
||||
static void recOR_constt(int info)
|
||||
{
|
||||
recLogicalOp_constv(LogicalOp::OR, info, _Rt_, _Rs_);
|
||||
recLogicalOp_constv(LogicalOp::OR, info, _Rt_, _Rs_, (info & PROCESS_EE_S) ? EEREC_S : -1);
|
||||
}
|
||||
|
||||
void recOR_(int info)
|
||||
static void recOR_(int info)
|
||||
{
|
||||
recLogicalOp(LogicalOp::OR, info);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(OR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(OR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
|
||||
|
||||
//// XOR
|
||||
void recXOR_const()
|
||||
static void recXOR_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] ^ g_cpuConstRegs[_Rt_].UD[0];
|
||||
}
|
||||
|
||||
void recXOR_consts(int info)
|
||||
static void recXOR_consts(int info)
|
||||
{
|
||||
recLogicalOp_constv(LogicalOp::XOR, info, _Rs_, _Rt_);
|
||||
recLogicalOp_constv(LogicalOp::XOR, info, _Rs_, _Rt_, (info & PROCESS_EE_T) ? EEREC_T : -1);
|
||||
}
|
||||
|
||||
void recXOR_constt(int info)
|
||||
static void recXOR_constt(int info)
|
||||
{
|
||||
recLogicalOp_constv(LogicalOp::XOR, info, _Rt_, _Rs_);
|
||||
recLogicalOp_constv(LogicalOp::XOR, info, _Rt_, _Rs_, (info & PROCESS_EE_S) ? EEREC_S : -1);
|
||||
}
|
||||
|
||||
void recXOR_(int info)
|
||||
static void recXOR_(int info)
|
||||
{
|
||||
recLogicalOp(LogicalOp::XOR, info);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(XOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(XOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
|
||||
|
||||
//// NOR
|
||||
void recNOR_const()
|
||||
static void recNOR_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = ~(g_cpuConstRegs[_Rs_].UD[0] | g_cpuConstRegs[_Rt_].UD[0]);
|
||||
}
|
||||
|
||||
void recNOR_consts(int info)
|
||||
static void recNOR_consts(int info)
|
||||
{
|
||||
recLogicalOp_constv(LogicalOp::NOR, info, _Rs_, _Rt_);
|
||||
recLogicalOp_constv(LogicalOp::NOR, info, _Rs_, _Rt_, (info & PROCESS_EE_T) ? EEREC_T : -1);
|
||||
}
|
||||
|
||||
void recNOR_constt(int info)
|
||||
static void recNOR_constt(int info)
|
||||
{
|
||||
recLogicalOp_constv(LogicalOp::NOR, info, _Rt_, _Rs_);
|
||||
recLogicalOp_constv(LogicalOp::NOR, info, _Rt_, _Rs_, (info & PROCESS_EE_S) ? EEREC_S : -1);
|
||||
}
|
||||
|
||||
void recNOR_(int info)
|
||||
static void recNOR_(int info)
|
||||
{
|
||||
recLogicalOp(LogicalOp::NOR, info);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(NOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(NOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
|
||||
|
||||
//// SLT - test with silent hill, lemans
|
||||
void recSLT_const()
|
||||
static void recSLT_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].SD[0] < g_cpuConstRegs[_Rt_].SD[0];
|
||||
}
|
||||
|
||||
void recSLTs_const(int info, int sign, int st)
|
||||
static void recSLTs_const(int info, int sign, int st)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
GPR_reg64 cval = g_cpuConstRegs[st ? _Rt_ : _Rs_];
|
||||
const s64 cval = g_cpuConstRegs[st ? _Rt_ : _Rs_].SD[0];
|
||||
|
||||
const xImpl_Set& SET = st ? (sign ? xSETL : xSETB) : (sign ? xSETG : xSETA);
|
||||
|
||||
xXOR(eax, eax);
|
||||
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UD[0]], rdx, cval.UD[0]);
|
||||
SET(al);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
// If Rd == Rs or Rt, we can't xor it before it's used.
|
||||
// So, allocate a temporary register first, and then reallocate it to Rd.
|
||||
const xRegister32 dreg((_Rd_ == (st ? _Rs_ : _Rt_)) ? _allocX86reg(X86TYPE_TEMP, 0, 0) : EEREC_D);
|
||||
const int regs = st ? ((info & PROCESS_EE_S) ? EEREC_S : -1) : ((info & PROCESS_EE_T) ? EEREC_T : -1);
|
||||
xXOR(dreg, dreg);
|
||||
|
||||
if (regs >= 0)
|
||||
xImm64Op(xCMP, xRegister64(regs), rcx, cval);
|
||||
else
|
||||
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UD[0]], rcx, cval);
|
||||
SET(xRegister8(dreg));
|
||||
|
||||
if (dreg.GetId() != EEREC_D)
|
||||
{
|
||||
std::swap(x86regs[dreg.GetId()], x86regs[EEREC_D]);
|
||||
_freeX86reg(EEREC_D);
|
||||
}
|
||||
}
|
||||
|
||||
void recSLTs_(int info, int sign)
|
||||
static void recSLTs_(int info, int sign)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
const xImpl_Set& SET = sign ? xSETL : xSETB;
|
||||
|
||||
xXOR(eax, eax);
|
||||
xMOV(rdx, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xCMP(rdx, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
SET(al);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
// need to keep Rs/Rt around.
|
||||
const xRegister32 dreg((_Rd_ == _Rt_ || _Rd_ == _Rs_) ? _allocX86reg(X86TYPE_TEMP, 0, 0) : EEREC_D);
|
||||
|
||||
// force Rs into a register, may as well cache it since we're loading anyway.
|
||||
const int regs = (info & PROCESS_EE_S) ? EEREC_S : _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
|
||||
xXOR(dreg, dreg);
|
||||
if (info & PROCESS_EE_T)
|
||||
xCMP(xRegister64(regs), xRegister64(EEREC_T));
|
||||
else
|
||||
xCMP(xRegister64(regs), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
|
||||
SET(xRegister8(dreg));
|
||||
|
||||
if (dreg.GetId() != EEREC_D)
|
||||
{
|
||||
std::swap(x86regs[dreg.GetId()], x86regs[EEREC_D]);
|
||||
_freeX86reg(EEREC_D);
|
||||
}
|
||||
}
|
||||
|
||||
void recSLT_consts(int info)
|
||||
static void recSLT_consts(int info)
|
||||
{
|
||||
recSLTs_const(info, 1, 0);
|
||||
}
|
||||
|
||||
void recSLT_constt(int info)
|
||||
static void recSLT_constt(int info)
|
||||
{
|
||||
recSLTs_const(info, 1, 1);
|
||||
}
|
||||
|
||||
void recSLT_(int info)
|
||||
static void recSLT_(int info)
|
||||
{
|
||||
recSLTs_(info, 1);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(SLT, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(SLT, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_NORENAME);
|
||||
|
||||
// SLTU - test with silent hill, lemans
|
||||
void recSLTU_const()
|
||||
static void recSLTU_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] < g_cpuConstRegs[_Rt_].UD[0];
|
||||
}
|
||||
|
||||
void recSLTU_consts(int info)
|
||||
static void recSLTU_consts(int info)
|
||||
{
|
||||
recSLTs_const(info, 0, 0);
|
||||
}
|
||||
|
||||
void recSLTU_constt(int info)
|
||||
static void recSLTU_constt(int info)
|
||||
{
|
||||
recSLTs_const(info, 0, 1);
|
||||
}
|
||||
|
||||
void recSLTU_(int info)
|
||||
static void recSLTU_(int info)
|
||||
{
|
||||
recSLTs_(info, 0);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(SLTU, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(SLTU, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_NORENAME);
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace OpcodeImpl
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
} // namespace R5900::Dynarec::OpcodeImpl
|
||||
|
|
|
@ -22,10 +22,8 @@
|
|||
|
||||
using namespace x86Emitter;
|
||||
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
|
||||
namespace R5900::Dynarec::OpcodeImpl
|
||||
{
|
||||
/*********************************************************
|
||||
* Arithmetic with immediate operand *
|
||||
* Format: OP rt, rs, immediate *
|
||||
|
@ -48,34 +46,37 @@ REC_FUNC_DEL(SLTIU, _Rt_);
|
|||
|
||||
#else
|
||||
|
||||
static void recMoveStoT(int info)
|
||||
{
|
||||
if (info & PROCESS_EE_S)
|
||||
xMOV(xRegister32(EEREC_T), xRegister32(EEREC_S));
|
||||
else
|
||||
xMOV(xRegister32(EEREC_T), ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
}
|
||||
|
||||
static void recMoveStoT64(int info)
|
||||
{
|
||||
if (info & PROCESS_EE_S)
|
||||
xMOV(xRegister64(EEREC_T), xRegister64(EEREC_S));
|
||||
else
|
||||
xMOV(xRegister64(EEREC_T), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
|
||||
//// ADDI
|
||||
void recADDI_const(void)
|
||||
static void recADDI_const(void)
|
||||
{
|
||||
g_cpuConstRegs[_Rt_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] + u32(s32(_Imm_))));
|
||||
}
|
||||
|
||||
void recADDI_(int info)
|
||||
static void recADDI_(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
if (_Rt_ == _Rs_)
|
||||
{
|
||||
// must perform the ADD unconditionally, to maintain flags status:
|
||||
xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_);
|
||||
_signExtendSFtoM((uptr)&cpuRegs.GPR.r[_Rt_].UL[1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
|
||||
if (_Imm_ != 0)
|
||||
xADD(eax, _Imm_);
|
||||
|
||||
eeSignExtendTo(_Rt_);
|
||||
}
|
||||
recMoveStoT(info);
|
||||
xADD(xRegister32(EEREC_T), _Imm_);
|
||||
xMOVSX(xRegister64(EEREC_T), xRegister32(EEREC_T));
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode1, ADDI);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC1, ADDI, XMMINFO_WRITET | XMMINFO_READS);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recADDIU()
|
||||
|
@ -84,33 +85,19 @@ void recADDIU()
|
|||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recDADDI_const()
|
||||
static void recDADDI_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] + u64(s64(_Imm_));
|
||||
}
|
||||
|
||||
void recDADDI_(int info)
|
||||
static void recDADDI_(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
if (_Rt_ == _Rs_)
|
||||
{
|
||||
xADD(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _Imm_);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
|
||||
if (_Imm_ != 0)
|
||||
{
|
||||
xADD(rax, _Imm_);
|
||||
recMoveStoT64(info);
|
||||
xADD(xRegister64(EEREC_T), _Imm_);
|
||||
}
|
||||
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
|
||||
}
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode1, DADDI);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC1, DADDI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP);
|
||||
|
||||
//// DADDIU
|
||||
void recDADDIU()
|
||||
|
@ -119,43 +106,64 @@ void recDADDIU()
|
|||
}
|
||||
|
||||
//// SLTIU
|
||||
void recSLTIU_const()
|
||||
static void recSLTIU_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] < (u64)(_Imm_);
|
||||
}
|
||||
|
||||
extern void recSLTmemconstt(int regd, int regs, u32 mem, int sign);
|
||||
extern u32 s_sltone;
|
||||
|
||||
void recSLTIU_(int info)
|
||||
static void recSLTIU_(int info)
|
||||
{
|
||||
xXOR(eax, eax);
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
// TODO(Stenzek): this can be made to suck less by turning Rs into a temp and reallocating Rt.
|
||||
const xRegister32 dreg((_Rt_ == _Rs_) ? _allocX86reg(X86TYPE_TEMP, 0, 0) : EEREC_T);
|
||||
xXOR(dreg, dreg);
|
||||
|
||||
if (info & PROCESS_EE_S)
|
||||
xCMP(xRegister64(EEREC_S), _Imm_);
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_);
|
||||
xSETB(al);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
|
||||
|
||||
xSETB(xRegister8(dreg));
|
||||
|
||||
if (dreg.GetId() != EEREC_T)
|
||||
{
|
||||
std::swap(x86regs[dreg.GetId()], x86regs[EEREC_T]);
|
||||
_freeX86reg(EEREC_T);
|
||||
}
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode1, SLTIU);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC1, SLTIU, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP | XMMINFO_NORENAME);
|
||||
|
||||
//// SLTI
|
||||
void recSLTI_const()
|
||||
static void recSLTI_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].SD[0] < (s64)(_Imm_);
|
||||
}
|
||||
|
||||
void recSLTI_(int info)
|
||||
static void recSLTI_(int info)
|
||||
{
|
||||
// test silent hill if modding
|
||||
xXOR(eax, eax);
|
||||
const xRegister32 dreg((_Rt_ == _Rs_) ? _allocX86reg(X86TYPE_TEMP, 0, 0) : EEREC_T);
|
||||
xXOR(dreg, dreg);
|
||||
|
||||
if (info & PROCESS_EE_S)
|
||||
xCMP(xRegister64(EEREC_S), _Imm_);
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_);
|
||||
xSETL(al);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
|
||||
|
||||
xSETL(xRegister8(dreg));
|
||||
|
||||
if (dreg.GetId() != EEREC_T)
|
||||
{
|
||||
std::swap(x86regs[dreg.GetId()], x86regs[EEREC_T]);
|
||||
_freeX86reg(EEREC_T);
|
||||
}
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode1, SLTI);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC1, SLTI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP | XMMINFO_NORENAME);
|
||||
|
||||
//// ANDI
|
||||
void recANDI_const()
|
||||
static void recANDI_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] & (u64)_ImmU_; // Zero-extended Immediate
|
||||
}
|
||||
|
@ -173,79 +181,62 @@ namespace
|
|||
static void recLogicalOpI(int info, LogicalOp op)
|
||||
{
|
||||
xImpl_G1Logic bad{};
|
||||
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND
|
||||
: op == LogicalOp::OR ? xOR
|
||||
: op == LogicalOp::XOR ? xXOR : bad;
|
||||
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND : op == LogicalOp::OR ? xOR :
|
||||
op == LogicalOp::XOR ? xXOR :
|
||||
bad;
|
||||
pxAssert(&xOP != &bad);
|
||||
|
||||
if (_ImmU_ != 0)
|
||||
{
|
||||
if (_Rt_ == _Rs_)
|
||||
{
|
||||
if (op == LogicalOp::AND)
|
||||
xOP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _ImmU_);
|
||||
else
|
||||
xOP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _ImmU_);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xOP(rax, _ImmU_);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
|
||||
}
|
||||
recMoveStoT64(info);
|
||||
xOP(xRegister64(EEREC_T), _ImmU_);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op == LogicalOp::AND)
|
||||
{
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
|
||||
xXOR(xRegister32(EEREC_T), xRegister32(EEREC_T));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (_Rt_ != _Rs_)
|
||||
{
|
||||
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
|
||||
}
|
||||
recMoveStoT64(info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void recANDI_(int info)
|
||||
static void recANDI_(int info)
|
||||
{
|
||||
recLogicalOpI(info, LogicalOp::AND);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode1, ANDI);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC1, ANDI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recORI_const()
|
||||
static void recORI_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] | (u64)_ImmU_; // Zero-extended Immediate
|
||||
}
|
||||
|
||||
void recORI_(int info)
|
||||
static void recORI_(int info)
|
||||
{
|
||||
recLogicalOpI(info, LogicalOp::OR);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode1, ORI);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC1, ORI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recXORI_const()
|
||||
static void recXORI_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] ^ (u64)_ImmU_; // Zero-extended Immediate
|
||||
}
|
||||
|
||||
void recXORI_(int info)
|
||||
static void recXORI_(int info)
|
||||
{
|
||||
recLogicalOpI(info, LogicalOp::XOR);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode1, XORI);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC1, XORI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP);
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace OpcodeImpl
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
} // namespace R5900::Dynarec::OpcodeImpl
|
||||
|
|
|
@ -24,10 +24,8 @@
|
|||
|
||||
using namespace x86Emitter;
|
||||
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
|
||||
namespace R5900::Dynarec::OpcodeImpl
|
||||
{
|
||||
/*********************************************************
|
||||
* Register branch logic *
|
||||
* Format: OP rs, rt, offset *
|
||||
|
@ -55,135 +53,62 @@ REC_SYS_DEL(BGEZALL, 31);
|
|||
|
||||
#else
|
||||
|
||||
void recSetBranchEQ(int info, int bne, int process)
|
||||
static void recSetBranchEQ(int bne, int process)
|
||||
{
|
||||
if (info & PROCESS_EE_XMM)
|
||||
{
|
||||
int t0reg;
|
||||
// TODO(Stenzek): This is suboptimal if the registers are in XMMs.
|
||||
// If the constant register is already in a host register, we don't need the immediate...
|
||||
|
||||
if (process & PROCESS_CONSTS)
|
||||
{
|
||||
if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_))
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
xmmregs[EEREC_T].inuse = 0;
|
||||
t0reg = EEREC_T;
|
||||
}
|
||||
_eeFlushAllDirty();
|
||||
|
||||
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH_AND_FREE);
|
||||
const int regt = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
|
||||
if (regt >= 0)
|
||||
xImm64Op(xCMP, xRegister64(regt), rax, g_cpuConstRegs[_Rs_].UD[0]);
|
||||
else
|
||||
{
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
|
||||
_flushConstReg(_Rs_);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
|
||||
|
||||
if (t0reg != EEREC_T)
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
else if (process & PROCESS_CONSTT)
|
||||
{
|
||||
if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
xmmregs[EEREC_S].inuse = 0;
|
||||
t0reg = EEREC_S;
|
||||
}
|
||||
else
|
||||
{
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
}
|
||||
|
||||
_flushConstReg(_Rt_);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
|
||||
if (t0reg != EEREC_S)
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
xmmregs[EEREC_S].inuse = 0;
|
||||
t0reg = EEREC_S;
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
else if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_))
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
xmmregs[EEREC_T].inuse = 0;
|
||||
t0reg = EEREC_T;
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
}
|
||||
else
|
||||
{
|
||||
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
|
||||
}
|
||||
|
||||
if (t0reg != EEREC_S && t0reg != EEREC_T)
|
||||
_freeXMMreg(t0reg);
|
||||
}
|
||||
|
||||
xMOVMSKPS(eax, xRegisterSSE(t0reg));
|
||||
|
||||
_eeFlushAllUnused();
|
||||
|
||||
xAND(al, 3);
|
||||
xCMP(al, 0x3);
|
||||
|
||||
if (bne)
|
||||
j32Ptr[1] = JE32(0);
|
||||
else
|
||||
j32Ptr[0] = j32Ptr[1] = JNE32(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
_eeFlushAllUnused();
|
||||
|
||||
if (process & PROCESS_CONSTS)
|
||||
{
|
||||
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax, g_cpuConstRegs[_Rs_].UD[0]);
|
||||
}
|
||||
else if (process & PROCESS_CONSTT)
|
||||
{
|
||||
_eeFlushAllDirty();
|
||||
|
||||
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH_AND_FREE);
|
||||
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
if (regs >= 0)
|
||||
xImm64Op(xCMP, xRegister64(regs), rax, g_cpuConstRegs[_Rt_].UD[0]);
|
||||
else
|
||||
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], rax, g_cpuConstRegs[_Rt_].UD[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xCMP(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
// force S into register, since we need to load it, may as well cache.
|
||||
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH_AND_FREE);
|
||||
const int regs = _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
const int regt = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
|
||||
_eeFlushAllDirty();
|
||||
|
||||
if (regt >= 0)
|
||||
xCMP(xRegister64(regs), xRegister64(regt));
|
||||
else
|
||||
xCMP(xRegister64(regs), ptr64[&cpuRegs.GPR.r[_Rt_]]);
|
||||
}
|
||||
|
||||
if (bne)
|
||||
{
|
||||
j32Ptr[1] = JE32(0);
|
||||
}
|
||||
j32Ptr[0] = JE32(0);
|
||||
else
|
||||
{
|
||||
j32Ptr[0] = j32Ptr[1] = JNE32(0);
|
||||
}
|
||||
j32Ptr[0] = JNE32(0);
|
||||
}
|
||||
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
||||
void recSetBranchL(int ltz)
|
||||
static void recSetBranchL(int ltz)
|
||||
{
|
||||
int regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
|
||||
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
const int regsxmm = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
|
||||
_eeFlushAllDirty();
|
||||
|
||||
if (regs >= 0)
|
||||
if (regsxmm >= 0)
|
||||
{
|
||||
xMOVMSKPS(eax, xRegisterSSE(regs));
|
||||
|
||||
_eeFlushAllUnused();
|
||||
|
||||
xMOVMSKPS(eax, xRegisterSSE(regsxmm));
|
||||
xTEST(al, 2);
|
||||
|
||||
if (ltz)
|
||||
|
@ -194,17 +119,19 @@ void recSetBranchL(int ltz)
|
|||
return;
|
||||
}
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0);
|
||||
if (regs >= 0)
|
||||
xCMP(xRegister64(regs), 0);
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
|
||||
|
||||
if (ltz)
|
||||
j32Ptr[0] = JGE32(0);
|
||||
else
|
||||
j32Ptr[0] = JL32(0);
|
||||
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
||||
//// BEQ
|
||||
void recBEQ_const()
|
||||
static void recBEQ_const()
|
||||
{
|
||||
u32 branchTo;
|
||||
|
||||
|
@ -213,48 +140,62 @@ void recBEQ_const()
|
|||
else
|
||||
branchTo = pc + 4;
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
|
||||
void recBEQ_process(int info, int process)
|
||||
static void recBEQ_process(int process)
|
||||
{
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
if (_Rs_ == _Rt_)
|
||||
{
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
else
|
||||
{
|
||||
recSetBranchEQ(info, 0, process);
|
||||
const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0);
|
||||
|
||||
recSetBranchEQ(0, process);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
x86SetJ32(j32Ptr[1]);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
// recopy the next inst
|
||||
pc -= 4;
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
}
|
||||
|
||||
void recBEQ_(int info) { recBEQ_process(info, 0); }
|
||||
void recBEQ_consts(int info) { recBEQ_process(info, PROCESS_CONSTS); }
|
||||
void recBEQ_constt(int info) { recBEQ_process(info, PROCESS_CONSTT); }
|
||||
|
||||
EERECOMPILE_CODE0(BEQ, XMMINFO_READS | XMMINFO_READT);
|
||||
void recBEQ()
|
||||
{
|
||||
// prefer using the host register over an immediate, it'll be smaller code.
|
||||
if (GPR_IS_CONST2(_Rs_, _Rt_))
|
||||
recBEQ_const();
|
||||
else if (GPR_IS_CONST1(_Rs_) && _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ) < 0)
|
||||
recBEQ_process(PROCESS_CONSTS);
|
||||
else if (GPR_IS_CONST1(_Rt_) && _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ) < 0)
|
||||
recBEQ_process(PROCESS_CONSTT);
|
||||
else
|
||||
recBEQ_process(0);
|
||||
}
|
||||
|
||||
//// BNE
|
||||
void recBNE_const()
|
||||
static void recBNE_const()
|
||||
{
|
||||
u32 branchTo;
|
||||
|
||||
|
@ -263,51 +204,65 @@ void recBNE_const()
|
|||
else
|
||||
branchTo = pc + 4;
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
|
||||
void recBNE_process(int info, int process)
|
||||
static void recBNE_process(int process)
|
||||
{
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
if (_Rs_ == _Rt_)
|
||||
{
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(pc);
|
||||
return;
|
||||
}
|
||||
|
||||
recSetBranchEQ(info, 1, process);
|
||||
const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0);
|
||||
|
||||
recSetBranchEQ(1, process);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[1]);
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
// recopy the next inst
|
||||
pc -= 4;
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
|
||||
void recBNE_(int info) { recBNE_process(info, 0); }
|
||||
void recBNE_consts(int info) { recBNE_process(info, PROCESS_CONSTS); }
|
||||
void recBNE_constt(int info) { recBNE_process(info, PROCESS_CONSTT); }
|
||||
|
||||
EERECOMPILE_CODE0(BNE, XMMINFO_READS | XMMINFO_READT);
|
||||
void recBNE()
|
||||
{
|
||||
if (GPR_IS_CONST2(_Rs_, _Rt_))
|
||||
recBNE_const();
|
||||
else if (GPR_IS_CONST1(_Rs_) && _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ) < 0)
|
||||
recBNE_process(PROCESS_CONSTS);
|
||||
else if (GPR_IS_CONST1(_Rt_) && _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ) < 0)
|
||||
recBNE_process(PROCESS_CONSTT);
|
||||
else
|
||||
recBNE_process(0);
|
||||
}
|
||||
|
||||
//// BEQL
|
||||
void recBEQL_const()
|
||||
static void recBEQL_const()
|
||||
{
|
||||
if (g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0])
|
||||
{
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
else
|
||||
|
@ -316,35 +271,40 @@ void recBEQL_const()
|
|||
}
|
||||
}
|
||||
|
||||
void recBEQL_process(int info, int process)
|
||||
static void recBEQL_process(int process)
|
||||
{
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
recSetBranchEQ(info, 0, process);
|
||||
recSetBranchEQ(0, process);
|
||||
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
x86SetJ32(j32Ptr[1]);
|
||||
|
||||
LoadBranchState();
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
|
||||
void recBEQL_(int info) { recBEQL_process(info, 0); }
|
||||
void recBEQL_consts(int info) { recBEQL_process(info, PROCESS_CONSTS); }
|
||||
void recBEQL_constt(int info) { recBEQL_process(info, PROCESS_CONSTT); }
|
||||
|
||||
EERECOMPILE_CODE0(BEQL, XMMINFO_READS | XMMINFO_READT);
|
||||
void recBEQL()
|
||||
{
|
||||
if (GPR_IS_CONST2(_Rs_, _Rt_))
|
||||
recBEQL_const();
|
||||
else if (GPR_IS_CONST1(_Rs_) && _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ) < 0)
|
||||
recBEQL_process(PROCESS_CONSTS);
|
||||
else if (GPR_IS_CONST1(_Rt_) && _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ) < 0)
|
||||
recBEQL_process(PROCESS_CONSTT);
|
||||
else
|
||||
recBEQL_process(0);
|
||||
}
|
||||
|
||||
//// BNEL
|
||||
void recBNEL_const()
|
||||
static void recBNEL_const()
|
||||
{
|
||||
if (g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0])
|
||||
{
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
else
|
||||
|
@ -353,29 +313,34 @@ void recBNEL_const()
|
|||
}
|
||||
}
|
||||
|
||||
void recBNEL_process(int info, int process)
|
||||
static void recBNEL_process(int process)
|
||||
{
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
recSetBranchEQ(info, 0, process);
|
||||
recSetBranchEQ(0, process);
|
||||
|
||||
SaveBranchState();
|
||||
SetBranchImm(pc + 4);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
x86SetJ32(j32Ptr[1]);
|
||||
|
||||
// recopy the next inst
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
|
||||
void recBNEL_(int info) { recBNEL_process(info, 0); }
|
||||
void recBNEL_consts(int info) { recBNEL_process(info, PROCESS_CONSTS); }
|
||||
void recBNEL_constt(int info) { recBNEL_process(info, PROCESS_CONSTT); }
|
||||
|
||||
EERECOMPILE_CODE0(BNEL, XMMINFO_READS | XMMINFO_READT);
|
||||
void recBNEL()
|
||||
{
|
||||
if (GPR_IS_CONST2(_Rs_, _Rt_))
|
||||
recBNEL_const();
|
||||
else if (GPR_IS_CONST1(_Rs_) && _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ) < 0)
|
||||
recBNEL_process(PROCESS_CONSTS);
|
||||
else if (GPR_IS_CONST1(_Rt_) && _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ) < 0)
|
||||
recBNEL_process(PROCESS_CONSTT);
|
||||
else
|
||||
recBNEL_process(0);
|
||||
}
|
||||
|
||||
/*********************************************************
|
||||
* Register branch logic *
|
||||
|
@ -402,36 +367,43 @@ void recBLTZAL()
|
|||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeOnWriteReg(31, 0);
|
||||
_eeFlushAllUnused();
|
||||
_eeFlushAllDirty();
|
||||
|
||||
_deleteEEreg(31, 0);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
|
||||
xMOV64(rax, pc + 4);
|
||||
xMOV(ptr64[&cpuRegs.GPR.n.ra.UD[0]], rax);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
if (!(g_cpuConstRegs[_Rs_].SD[0] < 0))
|
||||
branchTo = pc + 4;
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
return;
|
||||
}
|
||||
|
||||
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
|
||||
|
||||
recSetBranchL(1);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
SaveBranchState();
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
// recopy the next inst
|
||||
pc -= 4;
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
|
@ -444,36 +416,43 @@ void recBGEZAL()
|
|||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeOnWriteReg(31, 0);
|
||||
_eeFlushAllUnused();
|
||||
_eeFlushAllDirty();
|
||||
|
||||
_deleteEEreg(31, 0);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
|
||||
xMOV64(rax, pc + 4);
|
||||
xMOV(ptr64[&cpuRegs.GPR.n.ra.UD[0]], rax);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0))
|
||||
branchTo = pc + 4;
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
return;
|
||||
}
|
||||
|
||||
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
|
||||
|
||||
recSetBranchL(0);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
SaveBranchState();
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
// recopy the next inst
|
||||
pc -= 4;
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
|
@ -486,11 +465,11 @@ void recBLTZALL()
|
|||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeOnWriteReg(31, 0);
|
||||
_eeFlushAllUnused();
|
||||
_eeFlushAllDirty();
|
||||
|
||||
_deleteEEreg(31, 0);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
|
||||
xMOV64(rax, pc + 4);
|
||||
xMOV(ptr64[&cpuRegs.GPR.n.ra.UD[0]], rax);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -498,7 +477,7 @@ void recBLTZALL()
|
|||
SetBranchImm(pc + 4);
|
||||
else
|
||||
{
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
return;
|
||||
|
@ -507,7 +486,7 @@ void recBLTZALL()
|
|||
recSetBranchL(1);
|
||||
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
@ -524,11 +503,11 @@ void recBGEZALL()
|
|||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeOnWriteReg(31, 0);
|
||||
_eeFlushAllUnused();
|
||||
_eeFlushAllDirty();
|
||||
|
||||
_deleteEEreg(31, 0);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
|
||||
xMOV64(rax, pc + 4);
|
||||
xMOV(ptr64[&cpuRegs.GPR.n.ra.UD[0]], rax);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -536,7 +515,7 @@ void recBGEZALL()
|
|||
SetBranchImm(pc + 4);
|
||||
else
|
||||
{
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
return;
|
||||
|
@ -545,7 +524,7 @@ void recBGEZALL()
|
|||
recSetBranchL(0);
|
||||
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
@ -562,43 +541,44 @@ void recBLEZ()
|
|||
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeFlushAllUnused();
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0))
|
||||
branchTo = pc + 4;
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
return;
|
||||
}
|
||||
|
||||
_flushEEreg(_Rs_);
|
||||
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
|
||||
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
_eeFlushAllDirty();
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0);
|
||||
j8Ptr[0] = JL8(0);
|
||||
j32Ptr[1] = JG32(0);
|
||||
if (regs >= 0)
|
||||
xCMP(xRegister64(regs), 0);
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0);
|
||||
j32Ptr[2] = JNZ32(0);
|
||||
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
|
||||
_clearNeededXMMregs();
|
||||
j32Ptr[0] = JG32(0);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[1]);
|
||||
x86SetJ32(j32Ptr[2]);
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
// recopy the next inst
|
||||
pc -= 4;
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
|
@ -610,43 +590,44 @@ void recBGTZ()
|
|||
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeFlushAllUnused();
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
if (!(g_cpuConstRegs[_Rs_].SD[0] > 0))
|
||||
branchTo = pc + 4;
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
return;
|
||||
}
|
||||
|
||||
_flushEEreg(_Rs_);
|
||||
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
|
||||
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
_eeFlushAllDirty();
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0);
|
||||
j8Ptr[0] = JG8(0);
|
||||
j32Ptr[1] = JL32(0);
|
||||
if (regs >= 0)
|
||||
xCMP(xRegister64(regs), 0);
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0);
|
||||
j32Ptr[2] = JZ32(0);
|
||||
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
|
||||
_clearNeededXMMregs();
|
||||
j32Ptr[0] = JLE32(0);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[1]);
|
||||
x86SetJ32(j32Ptr[2]);
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
// recopy the next inst
|
||||
pc -= 4;
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
|
@ -658,31 +639,37 @@ void recBLTZ()
|
|||
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeFlushAllUnused();
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
if (!(g_cpuConstRegs[_Rs_].SD[0] < 0))
|
||||
branchTo = pc + 4;
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
return;
|
||||
}
|
||||
|
||||
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
|
||||
_eeFlushAllDirty();
|
||||
recSetBranchL(1);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
// recopy the next inst
|
||||
pc -= 4;
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
|
@ -694,31 +681,38 @@ void recBGEZ()
|
|||
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeFlushAllUnused();
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0))
|
||||
branchTo = pc + 4;
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
return;
|
||||
}
|
||||
|
||||
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
|
||||
_eeFlushAllDirty();
|
||||
|
||||
recSetBranchL(0);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
if (!swap)
|
||||
{
|
||||
// recopy the next inst
|
||||
pc -= 4;
|
||||
LoadBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
}
|
||||
|
||||
SetBranchImm(pc);
|
||||
}
|
||||
|
@ -728,9 +722,7 @@ void recBLTZL()
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::BLTZL);
|
||||
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeFlushAllUnused();
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -738,16 +730,17 @@ void recBLTZL()
|
|||
SetBranchImm(pc + 4);
|
||||
else
|
||||
{
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
_eeFlushAllDirty();
|
||||
recSetBranchL(1);
|
||||
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
@ -762,9 +755,7 @@ void recBGEZL()
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::BGEZL);
|
||||
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeFlushAllUnused();
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -772,16 +763,17 @@ void recBGEZL()
|
|||
SetBranchImm(pc + 4);
|
||||
else
|
||||
{
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
_eeFlushAllDirty();
|
||||
recSetBranchL(0);
|
||||
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
@ -802,9 +794,7 @@ void recBLEZL()
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::BLEZL);
|
||||
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeFlushAllUnused();
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -812,32 +802,27 @@ void recBLEZL()
|
|||
SetBranchImm(pc + 4);
|
||||
else
|
||||
{
|
||||
_clearNeededXMMregs();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
_flushEEreg(_Rs_);
|
||||
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
_eeFlushAllDirty();
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0);
|
||||
j32Ptr[0] = JL32(0);
|
||||
j32Ptr[1] = JG32(0);
|
||||
if (regs >= 0)
|
||||
xCMP(xRegister64(regs), 0);
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0);
|
||||
j32Ptr[2] = JNZ32(0);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
_clearNeededXMMregs();
|
||||
j32Ptr[0] = JG32(0);
|
||||
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[1]);
|
||||
x86SetJ32(j32Ptr[2]);
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
LoadBranchState();
|
||||
SetBranchImm(pc);
|
||||
|
@ -848,9 +833,7 @@ void recBGTZL()
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::BGTZL);
|
||||
|
||||
u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
_eeFlushAllUnused();
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -859,31 +842,27 @@ void recBGTZL()
|
|||
else
|
||||
{
|
||||
_clearNeededXMMregs();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
_flushEEreg(_Rs_);
|
||||
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
_eeFlushAllDirty();
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0);
|
||||
j32Ptr[0] = JG32(0);
|
||||
j32Ptr[1] = JL32(0);
|
||||
if (regs >= 0)
|
||||
xCMP(xRegister64(regs), 0);
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
|
||||
|
||||
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0);
|
||||
j32Ptr[2] = JZ32(0);
|
||||
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
_clearNeededXMMregs();
|
||||
j32Ptr[0] = JLE32(0);
|
||||
|
||||
SaveBranchState();
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
SetBranchImm(branchTo);
|
||||
|
||||
x86SetJ32(j32Ptr[1]);
|
||||
x86SetJ32(j32Ptr[2]);
|
||||
x86SetJ32(j32Ptr[0]);
|
||||
|
||||
LoadBranchState();
|
||||
SetBranchImm(pc);
|
||||
|
@ -891,6 +870,4 @@ void recBGTZL()
|
|||
|
||||
#endif
|
||||
|
||||
} // namespace OpcodeImpl
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
} // namespace R5900::Dynarec::OpcodeImpl
|
||||
|
|
|
@ -14,8 +14,6 @@
|
|||
*/
|
||||
|
||||
|
||||
// recompiler reworked to add dynamic linking zerofrog(@gmail.com) Jan06
|
||||
|
||||
#include "PrecompiledHeader.h"
|
||||
|
||||
#include "Common.h"
|
||||
|
@ -24,9 +22,8 @@
|
|||
|
||||
using namespace x86Emitter;
|
||||
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
namespace R5900::Dynarec::OpcodeImpl
|
||||
{
|
||||
|
||||
/*********************************************************
|
||||
* Jump to target *
|
||||
|
@ -50,7 +47,7 @@ void recJ()
|
|||
|
||||
// SET_FPUSTATE;
|
||||
u32 newpc = (_InstrucTarget_ << 2) + (pc & 0xf0000000);
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
if (EmuConfig.Gamefixes.GoemonTlbHack)
|
||||
SetBranchImm(vtlb_V2P(newpc));
|
||||
else
|
||||
|
@ -76,7 +73,7 @@ void recJAL()
|
|||
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
|
||||
}
|
||||
|
||||
recompileNextInstruction(1);
|
||||
recompileNextInstruction(true, false);
|
||||
if (EmuConfig.Gamefixes.GoemonTlbHack)
|
||||
SetBranchImm(vtlb_V2P(newpc));
|
||||
else
|
||||
|
@ -101,16 +98,9 @@ void recJALR()
|
|||
{
|
||||
EE::Profiler.EmitOp(eeOpcode::JALR);
|
||||
|
||||
int newpc = pc + 4;
|
||||
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE);
|
||||
_eeMoveGPRtoR(calleeSavedReg2d, _Rs_);
|
||||
const u32 newpc = pc + 4;
|
||||
const bool swap = (EmuConfig.Gamefixes.GoemonTlbHack || _Rd_ == _Rs_) ? false : TrySwapDelaySlot(_Rs_, 0, _Rd_);
|
||||
|
||||
if (EmuConfig.Gamefixes.GoemonTlbHack)
|
||||
{
|
||||
xMOV(ecx, calleeSavedReg2d);
|
||||
vtlb_DynV2P();
|
||||
xMOV(calleeSavedReg2d, eax);
|
||||
}
|
||||
// uncomment when there are NO instructions that need to call interpreter
|
||||
// int mmreg;
|
||||
// if (GPR_IS_CONST1(_Rs_))
|
||||
|
@ -129,6 +119,19 @@ void recJALR()
|
|||
// }
|
||||
// }
|
||||
|
||||
int wbreg = -1;
|
||||
if (!swap)
|
||||
{
|
||||
wbreg = _allocX86reg(X86TYPE_PCWRITEBACK, 0, MODE_WRITE | MODE_CALLEESAVED);
|
||||
_eeMoveGPRtoR(xRegister32(wbreg), _Rs_);
|
||||
|
||||
if (EmuConfig.Gamefixes.GoemonTlbHack)
|
||||
{
|
||||
xMOV(ecx, xRegister32(wbreg));
|
||||
vtlb_DynV2P();
|
||||
xMOV(xRegister32(wbreg), eax);
|
||||
}
|
||||
}
|
||||
|
||||
if (_Rd_)
|
||||
{
|
||||
|
@ -136,36 +139,46 @@ void recJALR()
|
|||
if (EE_CONST_PROP)
|
||||
{
|
||||
GPR_SET_CONST(_Rd_);
|
||||
g_cpuConstRegs[_Rd_].UL[0] = newpc;
|
||||
g_cpuConstRegs[_Rd_].UL[1] = 0;
|
||||
g_cpuConstRegs[_Rd_].UD[0] = newpc;
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], newpc);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
|
||||
xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, newpc);
|
||||
}
|
||||
}
|
||||
|
||||
_clearNeededXMMregs();
|
||||
recompileNextInstruction(1);
|
||||
|
||||
if (x86regs[calleeSavedReg2d.GetId()].inuse)
|
||||
if (!swap)
|
||||
{
|
||||
pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK);
|
||||
xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
|
||||
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
|
||||
recompileNextInstruction(true, false);
|
||||
|
||||
// the next instruction may have flushed the register.. so reload it if so.
|
||||
if (x86regs[wbreg].inuse && x86regs[wbreg].type == X86TYPE_PCWRITEBACK)
|
||||
{
|
||||
xMOV(ptr[&cpuRegs.pc], xRegister32(wbreg));
|
||||
x86regs[wbreg].inuse = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.pcWriteback]);
|
||||
xMOV(ptr[&cpuRegs.pc], eax);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (GPR_IS_DIRTY_CONST(_Rs_) || _hasX86reg(X86TYPE_GPR, _Rs_, 0))
|
||||
{
|
||||
const int x86reg = _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
xMOV(ptr32[&cpuRegs.pc], xRegister32(x86reg));
|
||||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoM((uptr)&cpuRegs.pc, _Rs_);
|
||||
}
|
||||
}
|
||||
|
||||
SetBranchReg(0xffffffff);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace OpcodeImpl
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
} // namespace R5900::Dynarec::OpcodeImpl
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -22,9 +22,8 @@
|
|||
|
||||
using namespace x86Emitter;
|
||||
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
namespace R5900::Dynarec::OpcodeImpl
|
||||
{
|
||||
|
||||
/*********************************************************
|
||||
* Shift arithmetic with constant shift *
|
||||
|
@ -56,11 +55,6 @@ static void xCopy64(u64* dst, u64* src)
|
|||
xMOV(ptr64[dst], rax);
|
||||
}
|
||||
|
||||
static void xCMPToZero64(u64* mem)
|
||||
{
|
||||
xCMP(ptr64[mem], 0);
|
||||
}
|
||||
|
||||
/*********************************************************
|
||||
* Load higher 16 bits of the first word in GPR with imm *
|
||||
* Format: OP rt, immediate *
|
||||
|
@ -69,22 +63,13 @@ static void xCMPToZero64(u64* mem)
|
|||
//// LUI
|
||||
void recLUI()
|
||||
{
|
||||
int mmreg;
|
||||
if (!_Rt_)
|
||||
return;
|
||||
|
||||
_eeOnWriteReg(_Rt_, 1);
|
||||
|
||||
if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE)) >= 0)
|
||||
{
|
||||
if (xmmregs[mmreg].mode & MODE_WRITE)
|
||||
{
|
||||
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rt_].UL[2]], xRegisterSSE(mmreg));
|
||||
}
|
||||
xmmregs[mmreg].inuse = 0;
|
||||
}
|
||||
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
// need to flush the upper 64 bits for xmm
|
||||
GPR_DEL_CONST(_Rt_);
|
||||
_deleteGPRtoX86reg(_Rt_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH_AND_FREE);
|
||||
|
||||
if (EE_CONST_PROP)
|
||||
{
|
||||
|
@ -93,363 +78,300 @@ void recLUI()
|
|||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, (s32)(cpuRegs.code << 16));
|
||||
eeSignExtendTo(_Rt_);
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
xMOV64(xRegister64(regt), (s64)(s32)(cpuRegs.code << 16));
|
||||
}
|
||||
|
||||
EE::Profiler.EmitOp(eeOpcode::LUI);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recMFHILO(int hi)
|
||||
static void recMFHILO(bool hi, bool upper)
|
||||
{
|
||||
int reghi, regd, xmmhilo;
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO;
|
||||
reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ);
|
||||
|
||||
// kill any constants on rd, lower 64 bits get written regardless of upper
|
||||
_eeOnWriteReg(_Rd_, 0);
|
||||
|
||||
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE);
|
||||
|
||||
if (reghi >= 0)
|
||||
const int reg = hi ? XMMGPR_HI : XMMGPR_LO;
|
||||
const int xmmd = EEINST_XMMUSEDTEST(_Rd_) ? _allocGPRtoXMMreg(_Rd_, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE);
|
||||
const int xmmhilo = EEINST_XMMUSEDTEST(reg) ? _allocGPRtoXMMreg(reg, MODE_READ) : _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ);
|
||||
if (xmmd >= 0)
|
||||
{
|
||||
if (regd >= 0)
|
||||
if (xmmhilo >= 0)
|
||||
{
|
||||
pxAssert(regd != reghi);
|
||||
|
||||
xmmregs[regd].inuse = 0;
|
||||
|
||||
xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], xRegisterSSE(reghi));
|
||||
|
||||
if (xmmregs[regd].mode & MODE_WRITE)
|
||||
if (upper)
|
||||
xMOVHL.PS(xRegisterSSE(xmmd), xRegisterSSE(xmmhilo));
|
||||
else
|
||||
xMOVSD(xRegisterSSE(xmmd), xRegisterSSE(xmmhilo));
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UL[2]], xRegisterSSE(regd));
|
||||
const int gprhilo = upper ? -1 : _allocIfUsedGPRtoX86(reg, MODE_READ);
|
||||
if (gprhilo >= 0)
|
||||
xPINSR.Q(xRegisterSSE(xmmd), xRegister64(gprhilo), 0);
|
||||
else
|
||||
xPINSR.Q(xRegisterSSE(xmmd), ptr64[hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]], 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi));
|
||||
// try rename {hi,lo} -> rd
|
||||
const int gprreg = upper ? -1 : _checkX86reg(X86TYPE_GPR, reg, MODE_READ);
|
||||
if (gprreg >= 0 && _eeTryRenameReg(_Rd_, reg, gprreg, -1, 0) >= 0)
|
||||
return;
|
||||
|
||||
const int gprd = _allocIfUsedGPRtoX86(_Rd_, MODE_WRITE);
|
||||
if (gprd >= 0 && xmmhilo >= 0)
|
||||
{
|
||||
pxAssert(gprreg < 0);
|
||||
if (upper)
|
||||
xPEXTR.Q(xRegister64(gprd), xRegisterSSE(xmmhilo), 1);
|
||||
else
|
||||
xMOVD(xRegister64(gprd), xRegisterSSE(xmmhilo));
|
||||
}
|
||||
else if (gprd < 0 && xmmhilo >= 0)
|
||||
{
|
||||
pxAssert(gprreg < 0);
|
||||
if (upper)
|
||||
xPEXTR.Q(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(xmmhilo), 1);
|
||||
else
|
||||
xMOVQ(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(xmmhilo));
|
||||
}
|
||||
else if (gprd >= 0)
|
||||
{
|
||||
if (gprreg >= 0)
|
||||
xMOV(xRegister64(gprd), xRegister64(gprreg));
|
||||
else
|
||||
xMOV(xRegister64(gprd), ptr64[hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]]);
|
||||
}
|
||||
else if (gprreg >= 0)
|
||||
{
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegister64(gprreg));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (regd >= 0)
|
||||
{
|
||||
if (EEINST_ISLIVE2(_Rd_))
|
||||
xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
|
||||
else
|
||||
xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
|
||||
}
|
||||
else
|
||||
{
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], hi ? &cpuRegs.HI.UD[0] : &cpuRegs.LO.UD[0]);
|
||||
xMOV(rax, ptr64[hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]]);
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void recMTHILO(int hi)
|
||||
static void recMTHILO(bool hi, bool upper)
|
||||
{
|
||||
int reghi, regs, xmmhilo;
|
||||
uptr addrhilo;
|
||||
const int reg = hi ? XMMGPR_HI : XMMGPR_LO;
|
||||
_eeOnWriteReg(reg, 0);
|
||||
|
||||
xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO;
|
||||
addrhilo = hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0];
|
||||
|
||||
regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
|
||||
reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ | MODE_WRITE);
|
||||
|
||||
if (reghi >= 0)
|
||||
const int xmms = EEINST_XMMUSEDTEST(_Rs_) ? _allocGPRtoXMMreg(_Rs_, MODE_READ) : _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
|
||||
const int xmmhilo = EEINST_XMMUSEDTEST(reg) ? _allocGPRtoXMMreg(reg, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ | MODE_WRITE);
|
||||
if (xmms >= 0)
|
||||
{
|
||||
if (regs >= 0)
|
||||
if (xmmhilo >= 0)
|
||||
{
|
||||
pxAssert(reghi != regs);
|
||||
|
||||
_deleteGPRtoXMMreg(_Rs_, 0);
|
||||
xPUNPCK.HQDQ(xRegisterSSE(reghi), xRegisterSSE(reghi));
|
||||
xPUNPCK.LQDQ(xRegisterSSE(regs), xRegisterSSE(reghi));
|
||||
|
||||
// swap regs
|
||||
xmmregs[regs] = xmmregs[reghi];
|
||||
xmmregs[reghi].inuse = 0;
|
||||
xmmregs[regs].mode |= MODE_WRITE;
|
||||
if (upper)
|
||||
xMOVLH.PS(xRegisterSSE(xmmhilo), xRegisterSSE(xmms));
|
||||
else
|
||||
xMOVSD(xRegisterSSE(xmmhilo), xRegisterSSE(xmms));
|
||||
}
|
||||
else
|
||||
{
|
||||
_flushConstReg(_Rs_);
|
||||
xMOVL.PS(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
xmmregs[reghi].mode |= MODE_WRITE;
|
||||
const int gprhilo = upper ? -1 : _allocIfUsedGPRtoX86(reg, MODE_WRITE);
|
||||
if (gprhilo >= 0)
|
||||
xMOVD(xRegister64(gprhilo), xRegisterSSE(xmms)); // actually movq
|
||||
else
|
||||
xMOVQ(ptr64[hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]], xRegisterSSE(xmms));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (regs >= 0)
|
||||
// try rename rs -> {hi,lo}
|
||||
const int gprs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
if (gprs >= 0 && !upper && _eeTryRenameReg(reg, _Rs_, gprs, -1, 0) >= 0)
|
||||
return;
|
||||
|
||||
if (xmmhilo >= 0)
|
||||
{
|
||||
xMOVQ(ptr[(void*)(addrhilo)], xRegisterSSE(regs));
|
||||
if (gprs >= 0)
|
||||
{
|
||||
xPINSR.Q(xRegisterSSE(xmmhilo), xRegister64(gprs), static_cast<u8>(upper));
|
||||
}
|
||||
else if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
_eeMoveGPRtoR(rax, _Rs_);
|
||||
xPINSR.Q(xRegisterSSE(xmmhilo), rax, static_cast<u8>(upper));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
xWriteImm64ToMem((u64*)addrhilo, rax, g_cpuConstRegs[_Rs_].UD[0]);
|
||||
xPINSR.Q(xRegisterSSE(xmmhilo), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], static_cast<u8>(upper));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoR(ecx, _Rs_);
|
||||
_flushEEreg(_Rs_);
|
||||
xCopy64((u64*)addrhilo, &cpuRegs.GPR.r[_Rs_].UD[0]);
|
||||
}
|
||||
const int gprreg = upper ? -1 : _allocIfUsedGPRtoX86(reg, MODE_WRITE);
|
||||
if (gprreg >= 0)
|
||||
_eeMoveGPRtoR(xRegister64(gprreg), _Rs_);
|
||||
else
|
||||
_eeMoveGPRtoM((uptr)(hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]), _Rs_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void recMFHI()
|
||||
{
|
||||
recMFHILO(1);
|
||||
recMFHILO(true, false);
|
||||
EE::Profiler.EmitOp(eeOpcode::MFHI);
|
||||
}
|
||||
|
||||
void recMFLO()
|
||||
{
|
||||
recMFHILO(0);
|
||||
recMFHILO(false, false);
|
||||
EE::Profiler.EmitOp(eeOpcode::MFLO);
|
||||
}
|
||||
|
||||
void recMTHI()
|
||||
{
|
||||
recMTHILO(1);
|
||||
recMTHILO(true, false);
|
||||
EE::Profiler.EmitOp(eeOpcode::MTHI);
|
||||
}
|
||||
|
||||
void recMTLO()
|
||||
{
|
||||
recMTHILO(0);
|
||||
recMTHILO(false, false);
|
||||
EE::Profiler.EmitOp(eeOpcode::MTLO);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recMFHILO1(int hi)
|
||||
{
|
||||
int reghi, regd, xmmhilo;
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO;
|
||||
reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ);
|
||||
|
||||
_eeOnWriteReg(_Rd_, 0);
|
||||
|
||||
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE);
|
||||
|
||||
if (reghi >= 0)
|
||||
{
|
||||
if (regd >= 0)
|
||||
{
|
||||
xMOVHL.PS(xRegisterSSE(regd), xRegisterSSE(reghi));
|
||||
xmmregs[regd].mode |= MODE_WRITE;
|
||||
}
|
||||
else
|
||||
{
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (regd >= 0)
|
||||
{
|
||||
if (EEINST_ISLIVE2(_Rd_))
|
||||
{
|
||||
xPUNPCK.HQDQ(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
|
||||
xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[1] : (uptr)&cpuRegs.LO.UD[1])]);
|
||||
}
|
||||
|
||||
xmmregs[regd].mode |= MODE_WRITE;
|
||||
}
|
||||
else
|
||||
{
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], hi ? &cpuRegs.HI.UD[1] : &cpuRegs.LO.UD[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void recMTHILO1(int hi)
|
||||
{
|
||||
int reghi, regs, xmmhilo;
|
||||
uptr addrhilo;
|
||||
|
||||
xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO;
|
||||
addrhilo = hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0];
|
||||
|
||||
regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
|
||||
reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, xmmhilo, MODE_WRITE | MODE_READ);
|
||||
|
||||
if (reghi >= 0)
|
||||
{
|
||||
if (regs >= 0)
|
||||
{
|
||||
xPUNPCK.LQDQ(xRegisterSSE(reghi), xRegisterSSE(regs));
|
||||
}
|
||||
else
|
||||
{
|
||||
_flushEEreg(_Rs_);
|
||||
xPUNPCK.LQDQ(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (regs >= 0)
|
||||
{
|
||||
xMOVQ(ptr[(void*)(addrhilo + 8)], xRegisterSSE(regs));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
xWriteImm64ToMem((u64*)(addrhilo + 8), rax, g_cpuConstRegs[_Rs_].UD[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
_flushEEreg(_Rs_);
|
||||
xCopy64((u64*)(addrhilo + 8), &cpuRegs.GPR.r[_Rs_].UD[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void recMFHI1()
|
||||
{
|
||||
recMFHILO1(1);
|
||||
recMFHILO(true, true);
|
||||
EE::Profiler.EmitOp(eeOpcode::MFHI1);
|
||||
}
|
||||
|
||||
void recMFLO1()
|
||||
{
|
||||
recMFHILO1(0);
|
||||
recMFHILO(false, true);
|
||||
EE::Profiler.EmitOp(eeOpcode::MFLO1);
|
||||
}
|
||||
|
||||
void recMTHI1()
|
||||
{
|
||||
recMTHILO1(1);
|
||||
recMTHILO(true, true);
|
||||
EE::Profiler.EmitOp(eeOpcode::MTHI1);
|
||||
}
|
||||
|
||||
void recMTLO1()
|
||||
{
|
||||
recMTHILO1(0);
|
||||
recMTHILO(false, true);
|
||||
EE::Profiler.EmitOp(eeOpcode::MTLO1);
|
||||
}
|
||||
|
||||
//// MOVZ
|
||||
void recMOVZtemp_const()
|
||||
// if (rt == 0) then rd <- rs
|
||||
static void recMOVZtemp_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0];
|
||||
}
|
||||
|
||||
void recMOVZtemp_consts(int info)
|
||||
static void recMOVZtemp_consts(int info)
|
||||
{
|
||||
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]);
|
||||
j8Ptr[0] = JNZ8(0);
|
||||
// we need the constant anyway, so just force it into a register
|
||||
const int regs = (info & PROCESS_EE_S) ? EEREC_S : _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
if (info & PROCESS_EE_T)
|
||||
xTEST(xRegister64(EEREC_T), xRegister64(EEREC_T));
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
|
||||
|
||||
xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, g_cpuConstRegs[_Rs_].UD[0]);
|
||||
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
xCMOVE(xRegister64(EEREC_D), xRegister64(regs));
|
||||
}
|
||||
|
||||
void recMOVZtemp_constt(int info)
|
||||
static void recMOVZtemp_constt(int info)
|
||||
{
|
||||
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]);
|
||||
if (info & PROCESS_EE_S)
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
else
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
|
||||
void recMOVZtemp_(int info)
|
||||
static void recMOVZtemp_(int info)
|
||||
{
|
||||
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]);
|
||||
j8Ptr[0] = JNZ8(0);
|
||||
if (info & PROCESS_EE_T)
|
||||
xTEST(xRegister64(EEREC_T), xRegister64(EEREC_T));
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
|
||||
|
||||
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]);
|
||||
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
if (info & PROCESS_EE_S)
|
||||
xCMOVE(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
else
|
||||
xCMOVE(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(MOVZtemp, XMMINFO_READS | XMMINFO_READD | XMMINFO_READD | XMMINFO_WRITED);
|
||||
// Specify READD here, because we might not write to it, and want to preserve the value.
|
||||
static EERECOMPILE_CODERC0(MOVZtemp, XMMINFO_READS | XMMINFO_READT | XMMINFO_READD | XMMINFO_WRITED | XMMINFO_NORENAME);
|
||||
|
||||
void recMOVZ()
|
||||
{
|
||||
if (_Rs_ == _Rd_)
|
||||
return;
|
||||
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
if (g_cpuConstRegs[_Rt_].UD[0] != 0)
|
||||
if (GPR_IS_CONST1(_Rt_) && g_cpuConstRegs[_Rt_].UD[0] != 0)
|
||||
return;
|
||||
}
|
||||
else
|
||||
_deleteEEreg(_Rd_, 1);
|
||||
|
||||
recMOVZtemp();
|
||||
}
|
||||
|
||||
//// MOVN
|
||||
void recMOVNtemp_const()
|
||||
static void recMOVNtemp_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0];
|
||||
}
|
||||
|
||||
void recMOVNtemp_consts(int info)
|
||||
static void recMOVNtemp_consts(int info)
|
||||
{
|
||||
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]);
|
||||
j8Ptr[0] = JZ8(0);
|
||||
// we need the constant anyway, so just force it into a register
|
||||
const int regs = (info & PROCESS_EE_S) ? EEREC_S : _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
if (info & PROCESS_EE_T)
|
||||
xTEST(xRegister64(EEREC_T), xRegister64(EEREC_T));
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
|
||||
|
||||
xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, g_cpuConstRegs[_Rs_].UD[0]);
|
||||
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
xCMOVNE(xRegister64(EEREC_D), xRegister64(regs));
|
||||
}
|
||||
|
||||
void recMOVNtemp_constt(int info)
|
||||
static void recMOVNtemp_constt(int info)
|
||||
{
|
||||
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]);
|
||||
if (info & PROCESS_EE_S)
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
else
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
|
||||
void recMOVNtemp_(int info)
|
||||
static void recMOVNtemp_(int info)
|
||||
{
|
||||
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]);
|
||||
j8Ptr[0] = JZ8(0);
|
||||
if (info & PROCESS_EE_T)
|
||||
xTEST(xRegister64(EEREC_T), xRegister64(EEREC_T));
|
||||
else
|
||||
xCMP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
|
||||
|
||||
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]);
|
||||
|
||||
x86SetJ8(j8Ptr[0]);
|
||||
if (info & PROCESS_EE_S)
|
||||
xCMOVNE(xRegister64(EEREC_D), xRegister64(EEREC_S));
|
||||
else
|
||||
xCMOVNE(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(MOVNtemp, XMMINFO_READS | XMMINFO_READD | XMMINFO_READD | XMMINFO_WRITED);
|
||||
static EERECOMPILE_CODERC0(MOVNtemp, XMMINFO_READS | XMMINFO_READT | XMMINFO_READD | XMMINFO_WRITED | XMMINFO_NORENAME);
|
||||
|
||||
void recMOVN()
|
||||
{
|
||||
if (_Rs_ == _Rd_)
|
||||
return;
|
||||
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
if (g_cpuConstRegs[_Rt_].UD[0] == 0)
|
||||
if (GPR_IS_CONST1(_Rt_) && g_cpuConstRegs[_Rt_].UD[0] == 0)
|
||||
return;
|
||||
}
|
||||
else
|
||||
_deleteEEreg(_Rd_, 1);
|
||||
|
||||
recMOVNtemp();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace OpcodeImpl
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
} // namespace R5900::Dynarec::OpcodeImpl
|
||||
|
|
|
@ -24,9 +24,8 @@ using namespace x86Emitter;
|
|||
|
||||
namespace Interp = R5900::Interpreter::OpcodeImpl;
|
||||
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
namespace R5900::Dynarec::OpcodeImpl
|
||||
{
|
||||
|
||||
/*********************************************************
|
||||
* Register mult/div & Register trap logic *
|
||||
|
@ -51,283 +50,293 @@ REC_FUNC_DEL(MADDU1, _Rd_);
|
|||
|
||||
#else
|
||||
|
||||
// if upper is 1, write in upper 64 bits of LO/HI
|
||||
void recWritebackHILO(int info, int writed, int upper)
|
||||
static void recWritebackHILO(int info, bool writed, bool upper)
|
||||
{
|
||||
int savedlo = 0;
|
||||
uptr loaddr = (uptr)&cpuRegs.LO.UL[upper ? 2 : 0];
|
||||
const uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0];
|
||||
const u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0;
|
||||
// writeback low 32 bits, sign extended to 64 bits
|
||||
bool eax_sign_extended = false;
|
||||
|
||||
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
|
||||
xMOVSX(rcx, edx);
|
||||
// case 1: LO is already in an XMM - use the xmm
|
||||
// case 2: LO is used as an XMM later in the block - use or allocate the XMM
|
||||
// case 3: LO is used as a GPR later in the block - use XMM if upper, otherwise use GPR, so it can be renamed
|
||||
// case 4: LO is already in a GPR - write to the GPR, or write to memory if upper
|
||||
// case 4: LO is not used - writeback to memory
|
||||
|
||||
if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive)
|
||||
if (EEINST_LIVETEST(XMMGPR_LO))
|
||||
{
|
||||
int reglo = 0;
|
||||
if ((reglo = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_READ)) >= 0)
|
||||
const bool loused = EEINST_USEDTEST(XMMGPR_LO);
|
||||
const bool lousedxmm = loused && (upper || EEINST_XMMUSEDTEST(XMMGPR_LO));
|
||||
const int xmmlo = lousedxmm ? _allocGPRtoXMMreg(XMMGPR_LO, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_WRITE);
|
||||
if (xmmlo >= 0)
|
||||
{
|
||||
if (xmmregs[reglo].mode & MODE_WRITE)
|
||||
{
|
||||
if (upper)
|
||||
xMOVQ(ptr[(void*)(loaddr - 8)], xRegisterSSE(reglo));
|
||||
else
|
||||
xMOVH.PS(ptr[(void*)(loaddr + 8)], xRegisterSSE(reglo));
|
||||
}
|
||||
|
||||
xmmregs[reglo].inuse = 0;
|
||||
reglo = -1;
|
||||
}
|
||||
|
||||
_signExtendToMem((void*)loaddr);
|
||||
savedlo = 1;
|
||||
}
|
||||
|
||||
if (writed && _Rd_)
|
||||
{
|
||||
_eeOnWriteReg(_Rd_, 1);
|
||||
|
||||
int regd = -1;
|
||||
if (g_pCurInstInfo->regs[_Rd_] & EEINST_XMM)
|
||||
{
|
||||
if (savedlo)
|
||||
{
|
||||
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE | MODE_READ);
|
||||
if (regd >= 0)
|
||||
{
|
||||
xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(loaddr)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (regd < 0)
|
||||
{
|
||||
_deleteEEreg(_Rd_, 0);
|
||||
|
||||
if (!savedlo)
|
||||
// we use CDQE over MOVSX because it's shorter.
|
||||
xCDQE();
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
}
|
||||
}
|
||||
|
||||
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
|
||||
{
|
||||
int reghi = 0;
|
||||
if ((reghi = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_READ)) >= 0)
|
||||
{
|
||||
if (xmmregs[reghi].mode & MODE_WRITE)
|
||||
{
|
||||
if (upper)
|
||||
xMOVQ(ptr[(void*)(hiaddr - 8)], xRegisterSSE(reghi));
|
||||
else
|
||||
xMOVH.PS(ptr[(void*)(hiaddr + 8)], xRegisterSSE(reghi));
|
||||
}
|
||||
|
||||
xmmregs[reghi].inuse = 0;
|
||||
reghi = -1;
|
||||
}
|
||||
|
||||
xMOV(ptr[(void*)(hiaddr)], rcx);
|
||||
}
|
||||
}
|
||||
|
||||
void recWritebackConstHILO(u64 res, int writed, int upper)
|
||||
{
|
||||
uptr loaddr = (uptr)&cpuRegs.LO.UL[upper ? 2 : 0];
|
||||
uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0];
|
||||
u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0;
|
||||
|
||||
if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive)
|
||||
{
|
||||
int reglo = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE | MODE_READ);
|
||||
|
||||
if (reglo >= 0)
|
||||
{
|
||||
u32* mem_ptr = recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res);
|
||||
if (upper)
|
||||
xMOVH.PS(xRegisterSSE(reglo), ptr[mem_ptr]);
|
||||
else
|
||||
xMOVL.PS(xRegisterSSE(reglo), ptr[mem_ptr]);
|
||||
xPINSR.Q(xRegisterSSE(xmmlo), rax, static_cast<u8>(upper));
|
||||
}
|
||||
else
|
||||
{
|
||||
xWriteImm64ToMem((u64*)loaddr, rax, (s64)(s32)(res & 0xffffffff));
|
||||
}
|
||||
}
|
||||
|
||||
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
|
||||
const int gprlo = upper ? -1 : (loused ? _allocX86reg(X86TYPE_GPR, XMMGPR_LO, MODE_WRITE) : _checkX86reg(X86TYPE_GPR, XMMGPR_LO, MODE_WRITE));
|
||||
if (gprlo >= 0)
|
||||
{
|
||||
|
||||
int reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE | MODE_READ);
|
||||
|
||||
if (reghi >= 0)
|
||||
{
|
||||
u32* mem_ptr = recGetImm64((res >> 63) ? -1 : 0, res >> 32);
|
||||
if (upper)
|
||||
xMOVH.PS(xRegisterSSE(reghi), ptr[mem_ptr]);
|
||||
else
|
||||
xMOVL.PS(xRegisterSSE(reghi), ptr[mem_ptr]);
|
||||
xMOVSX(xRegister64(gprlo), eax);
|
||||
}
|
||||
else
|
||||
{
|
||||
_deleteEEreg(XMMGPR_HI, 0);
|
||||
xWriteImm64ToMem((u64*)hiaddr, rax, (s64)res >> 32);
|
||||
xCDQE();
|
||||
eax_sign_extended = true;
|
||||
xMOV(ptr64[&cpuRegs.LO.UD[upper]], rax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!writed || !_Rd_)
|
||||
return;
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (s32)(res & 0xffffffffULL); //that is the difference
|
||||
if (EEINST_LIVETEST(XMMGPR_HI))
|
||||
{
|
||||
const bool hiused = EEINST_USEDTEST(XMMGPR_HI);
|
||||
const bool hiusedxmm = hiused && (upper || EEINST_XMMUSEDTEST(XMMGPR_HI));
|
||||
const int xmmhi = hiusedxmm ? _allocGPRtoXMMreg(XMMGPR_HI, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_WRITE);
|
||||
if (xmmhi >= 0)
|
||||
{
|
||||
xMOVSX(rdx, edx);
|
||||
xPINSR.Q(xRegisterSSE(xmmhi), rdx, static_cast<u8>(upper));
|
||||
}
|
||||
else
|
||||
{
|
||||
const int gprhi = upper ? -1 : (hiused ? _allocX86reg(X86TYPE_GPR, XMMGPR_HI, MODE_WRITE) : _checkX86reg(X86TYPE_GPR, XMMGPR_HI, MODE_WRITE));
|
||||
if (gprhi >= 0)
|
||||
{
|
||||
xMOVSX(xRegister64(gprhi), edx);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOVSX(rdx, edx);
|
||||
xMOV(ptr64[&cpuRegs.HI.UD[upper]], rdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// writeback lo to Rd if present
|
||||
if (writed && _Rd_ && EEINST_LIVETEST(_Rd_))
|
||||
{
|
||||
// TODO: This can be made optimal by keeping it in an xmm.
|
||||
// But currently the templates aren't hooked up for that - we'd need a "allow xmm" flag.
|
||||
if (info & PROCESS_EE_D)
|
||||
{
|
||||
if (eax_sign_extended)
|
||||
xMOV(xRegister64(EEREC_D), rax);
|
||||
else
|
||||
xMOVSX(xRegister64(EEREC_D), eax);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!eax_sign_extended)
|
||||
xCDQE();
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void recWritebackConstHILO(u64 res, bool writed, int upper)
|
||||
{
|
||||
// It's not often that MULT/DIV are entirely constant. So while the MOV64s here are not optimal
|
||||
// by any means, it's not something that's going to be hit often enough to worry about a cache.
|
||||
// Except for apparently when it's getting set to all-zeros, but that'll be fine with immediates.
|
||||
const s64 loval = static_cast<s64>(static_cast<s32>(static_cast<u32>(res)));
|
||||
const s64 hival = static_cast<s64>(static_cast<s32>(static_cast<u32>(res >> 32)));
|
||||
|
||||
if (EEINST_LIVETEST(XMMGPR_LO))
|
||||
{
|
||||
const bool lolive = EEINST_USEDTEST(XMMGPR_LO);
|
||||
const bool lolivexmm = lolive && (upper || EEINST_XMMUSEDTEST(XMMGPR_LO));
|
||||
const int xmmlo = lolivexmm ? _allocGPRtoXMMreg(XMMGPR_LO, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_WRITE);
|
||||
if (xmmlo >= 0)
|
||||
{
|
||||
xMOV64(rax, loval);
|
||||
xPINSR.Q(xRegisterSSE(xmmlo), rax, static_cast<u8>(upper));
|
||||
}
|
||||
else
|
||||
{
|
||||
const int gprlo = upper ? -1 : (lolive ? _allocX86reg(X86TYPE_GPR, XMMGPR_LO, MODE_WRITE) : _checkX86reg(X86TYPE_GPR, XMMGPR_LO, MODE_WRITE));
|
||||
if (gprlo >= 0)
|
||||
xImm64Op(xMOV, xRegister64(gprlo), rax, loval);
|
||||
else
|
||||
xImm64Op(xMOV, ptr64[&cpuRegs.LO.UD[upper]], rax, loval);
|
||||
}
|
||||
}
|
||||
|
||||
if (EEINST_LIVETEST(XMMGPR_HI))
|
||||
{
|
||||
const bool hilive = EEINST_USEDTEST(XMMGPR_HI);
|
||||
const bool hilivexmm = hilive && (upper || EEINST_XMMUSEDTEST(XMMGPR_HI));
|
||||
const int xmmhi = hilivexmm ? _allocGPRtoXMMreg(XMMGPR_HI, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_WRITE);
|
||||
if (xmmhi >= 0)
|
||||
{
|
||||
xMOV64(rax, hival);
|
||||
xPINSR.Q(xRegisterSSE(xmmhi), rax, static_cast<u8>(upper));
|
||||
}
|
||||
else
|
||||
{
|
||||
const int gprhi = upper ? -1 : (hilive ? _allocX86reg(X86TYPE_GPR, XMMGPR_HI, MODE_WRITE) : _checkX86reg(X86TYPE_GPR, XMMGPR_HI, MODE_WRITE));
|
||||
if (gprhi >= 0)
|
||||
xImm64Op(xMOV, xRegister64(gprhi), rax, hival);
|
||||
else
|
||||
xImm64Op(xMOV, ptr64[&cpuRegs.HI.UD[upper]], rax, hival);
|
||||
}
|
||||
}
|
||||
|
||||
// writeback lo to Rd if present
|
||||
if (writed && _Rd_ && EEINST_LIVETEST(_Rd_))
|
||||
{
|
||||
_eeOnWriteReg(_Rd_, 0);
|
||||
|
||||
const int regd = _checkX86reg(X86TYPE_GPR, _Rd_, MODE_WRITE);
|
||||
if (regd >= 0)
|
||||
xImm64Op(xMOV, xRegister64(regd), rax, loval);
|
||||
else
|
||||
xImm64Op(xMOV, ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax, loval);
|
||||
}
|
||||
}
|
||||
|
||||
//// MULT
|
||||
void recMULT_const()
|
||||
static void recMULT_const()
|
||||
{
|
||||
s64 res = (s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0];
|
||||
|
||||
recWritebackConstHILO(res, 1, 0);
|
||||
}
|
||||
|
||||
void recMULTUsuper(int info, int upper, int process);
|
||||
void recMULTsuper(int info, int upper, int process)
|
||||
static void recMULTsuper(int info, bool sign, bool upper, int process)
|
||||
{
|
||||
// TODO(Stenzek): Use MULX where available.
|
||||
if (process & PROCESS_CONSTS)
|
||||
{
|
||||
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
|
||||
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
if (info & PROCESS_EE_T)
|
||||
sign ? xMUL(xRegister32(EEREC_T)) : xUMUL(xRegister32(EEREC_T));
|
||||
else
|
||||
sign ? xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]) : xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
}
|
||||
else if (process & PROCESS_CONSTT)
|
||||
{
|
||||
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
|
||||
xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
if (info & PROCESS_EE_S)
|
||||
sign ? xMUL(xRegister32(EEREC_S)) : xUMUL(xRegister32(EEREC_S));
|
||||
else
|
||||
sign ? xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]) : xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
// S is more likely to be in a register than T (so put T in eax).
|
||||
if (info & PROCESS_EE_T)
|
||||
xMOV(eax, xRegister32(EEREC_T));
|
||||
else
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
|
||||
if (info & PROCESS_EE_S)
|
||||
sign ? xMUL(xRegister32(EEREC_S)) : xUMUL(xRegister32(EEREC_S));
|
||||
else
|
||||
sign ? xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]) : xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
}
|
||||
|
||||
recWritebackHILO(info, 1, upper);
|
||||
}
|
||||
|
||||
void recMULT_(int info)
|
||||
static void recMULT_(int info)
|
||||
{
|
||||
recMULTsuper(info, 0, 0);
|
||||
recMULTsuper(info, true, false, 0);
|
||||
}
|
||||
|
||||
void recMULT_consts(int info)
|
||||
static void recMULT_consts(int info)
|
||||
{
|
||||
recMULTsuper(info, 0, PROCESS_CONSTS);
|
||||
recMULTsuper(info, true, false, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recMULT_constt(int info)
|
||||
static void recMULT_constt(int info)
|
||||
{
|
||||
recMULTsuper(info, 0, PROCESS_CONSTT);
|
||||
recMULTsuper(info, true, false, PROCESS_CONSTT);
|
||||
}
|
||||
|
||||
// don't set XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI
|
||||
EERECOMPILE_CODE0(MULT, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
|
||||
// lo/hi allocation are taken care of in recWritebackHILO().
|
||||
EERECOMPILE_CODERC0(MULT, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
|
||||
|
||||
//// MULTU
|
||||
void recMULTU_const()
|
||||
static void recMULTU_const()
|
||||
{
|
||||
u64 res = (u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0];
|
||||
const u64 res = (u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0];
|
||||
|
||||
recWritebackConstHILO(res, 1, 0);
|
||||
}
|
||||
|
||||
void recMULTUsuper(int info, int upper, int process)
|
||||
static void recMULTU_(int info)
|
||||
{
|
||||
if (process & PROCESS_CONSTS)
|
||||
{
|
||||
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
|
||||
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
}
|
||||
else if (process & PROCESS_CONSTT)
|
||||
{
|
||||
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
|
||||
xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
recMULTsuper(info, false, false, 0);
|
||||
}
|
||||
|
||||
recWritebackHILO(info, 1, upper);
|
||||
static void recMULTU_consts(int info)
|
||||
{
|
||||
recMULTsuper(info, false, false, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recMULTU_(int info)
|
||||
static void recMULTU_constt(int info)
|
||||
{
|
||||
recMULTUsuper(info, 0, 0);
|
||||
}
|
||||
|
||||
void recMULTU_consts(int info)
|
||||
{
|
||||
recMULTUsuper(info, 0, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recMULTU_constt(int info)
|
||||
{
|
||||
recMULTUsuper(info, 0, PROCESS_CONSTT);
|
||||
recMULTsuper(info, false, false, PROCESS_CONSTT);
|
||||
}
|
||||
|
||||
// don't specify XMMINFO_WRITELO or XMMINFO_WRITEHI, that is taken care of
|
||||
EERECOMPILE_CODE0(MULTU, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
|
||||
EERECOMPILE_CODERC0(MULTU, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recMULT1_const()
|
||||
static void recMULT1_const()
|
||||
{
|
||||
s64 res = (s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0];
|
||||
|
||||
recWritebackConstHILO((u64)res, 1, 1);
|
||||
}
|
||||
|
||||
void recMULT1_(int info)
|
||||
static void recMULT1_(int info)
|
||||
{
|
||||
recMULTsuper(info, 1, 0);
|
||||
recMULTsuper(info, true, true, 0);
|
||||
}
|
||||
|
||||
void recMULT1_consts(int info)
|
||||
static void recMULT1_consts(int info)
|
||||
{
|
||||
recMULTsuper(info, 1, PROCESS_CONSTS);
|
||||
recMULTsuper(info, true, true, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recMULT1_constt(int info)
|
||||
static void recMULT1_constt(int info)
|
||||
{
|
||||
recMULTsuper(info, 1, PROCESS_CONSTT);
|
||||
recMULTsuper(info, true, true, PROCESS_CONSTT);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(MULT1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
|
||||
EERECOMPILE_CODERC0(MULT1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recMULTU1_const()
|
||||
static void recMULTU1_const()
|
||||
{
|
||||
u64 res = (u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0];
|
||||
|
||||
recWritebackConstHILO(res, 1, 1);
|
||||
}
|
||||
|
||||
void recMULTU1_(int info)
|
||||
static void recMULTU1_(int info)
|
||||
{
|
||||
recMULTUsuper(info, 1, 0);
|
||||
recMULTsuper(info, false, true, 0);
|
||||
}
|
||||
|
||||
void recMULTU1_consts(int info)
|
||||
static void recMULTU1_consts(int info)
|
||||
{
|
||||
recMULTUsuper(info, 1, PROCESS_CONSTS);
|
||||
recMULTsuper(info, false, true, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recMULTU1_constt(int info)
|
||||
static void recMULTU1_constt(int info)
|
||||
{
|
||||
recMULTUsuper(info, 1, PROCESS_CONSTT);
|
||||
recMULTsuper(info, false, true, PROCESS_CONSTT);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(MULTU1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
|
||||
EERECOMPILE_CODERC0(MULTU1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
|
||||
|
||||
//// DIV
|
||||
|
||||
void recDIVconst(int upper)
|
||||
static void recDIVconst(int upper)
|
||||
{
|
||||
s32 quot, rem;
|
||||
if (g_cpuConstRegs[_Rs_].UL[0] == 0x80000000 && g_cpuConstRegs[_Rt_].SL[0] == -1)
|
||||
|
@ -348,29 +357,36 @@ void recDIVconst(int upper)
|
|||
recWritebackConstHILO((u64)quot | ((u64)rem << 32), 0, upper);
|
||||
}
|
||||
|
||||
void recDIV_const()
|
||||
static void recDIV_const()
|
||||
{
|
||||
recDIVconst(0);
|
||||
}
|
||||
|
||||
void recDIVsuper(int info, int sign, int upper, int process)
|
||||
static void recDIVsuper(int info, bool sign, bool upper, int process)
|
||||
{
|
||||
const xRegister32 divisor((info & PROCESS_EE_T) ? EEREC_T : ecx.GetId());
|
||||
if (!(info & PROCESS_EE_T))
|
||||
{
|
||||
if (process & PROCESS_CONSTT)
|
||||
xMOV(ecx, g_cpuConstRegs[_Rt_].UL[0]);
|
||||
xMOV(divisor, g_cpuConstRegs[_Rt_].UL[0]);
|
||||
else
|
||||
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
xMOV(divisor, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
}
|
||||
|
||||
// can't use edx, it's part of the dividend
|
||||
pxAssert(divisor.GetId() != edx.GetId());
|
||||
|
||||
if (process & PROCESS_CONSTS)
|
||||
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
|
||||
else
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
_eeMoveGPRtoR(rax, _Rs_);
|
||||
|
||||
u8* end1;
|
||||
if (sign) //test for overflow (x86 will just throw an exception)
|
||||
{
|
||||
xCMP(eax, 0x80000000);
|
||||
u8* cont1 = JNE8(0);
|
||||
xCMP(ecx, 0xffffffff);
|
||||
xCMP(divisor, 0xffffffff);
|
||||
u8* cont2 = JNE8(0);
|
||||
//overflow case:
|
||||
xXOR(edx, edx); //EAX remains 0x80000000
|
||||
|
@ -380,7 +396,7 @@ void recDIVsuper(int info, int sign, int upper, int process)
|
|||
x86SetJ8(cont2);
|
||||
}
|
||||
|
||||
xCMP(ecx, 0);
|
||||
xCMP(divisor, 0);
|
||||
u8* cont3 = JNE8(0);
|
||||
//divide by zero
|
||||
xMOV(edx, eax);
|
||||
|
@ -398,12 +414,12 @@ void recDIVsuper(int info, int sign, int upper, int process)
|
|||
if (sign)
|
||||
{
|
||||
xCDQ();
|
||||
xDIV(ecx);
|
||||
xDIV(divisor);
|
||||
}
|
||||
else
|
||||
{
|
||||
xXOR(edx, edx);
|
||||
xUDIV(ecx);
|
||||
xUDIV(divisor);
|
||||
}
|
||||
|
||||
if (sign)
|
||||
|
@ -411,28 +427,29 @@ void recDIVsuper(int info, int sign, int upper, int process)
|
|||
x86SetJ8(end2);
|
||||
|
||||
// need to execute regardless of bad divide
|
||||
recWritebackHILO(info, 0, upper);
|
||||
recWritebackHILO(info, false, upper);
|
||||
}
|
||||
|
||||
void recDIV_(int info)
|
||||
static void recDIV_(int info)
|
||||
{
|
||||
recDIVsuper(info, 1, 0, 0);
|
||||
}
|
||||
|
||||
void recDIV_consts(int info)
|
||||
static void recDIV_consts(int info)
|
||||
{
|
||||
recDIVsuper(info, 1, 0, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recDIV_constt(int info)
|
||||
static void recDIV_constt(int info)
|
||||
{
|
||||
recDIVsuper(info, 1, 0, PROCESS_CONSTT);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DIV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI);
|
||||
// We handle S reading in the routine itself, since it needs to go into eax.
|
||||
EERECOMPILE_CODERC0(DIV, /*XMMINFO_READS |*/ XMMINFO_READT);
|
||||
|
||||
//// DIVU
|
||||
void recDIVUconst(int upper)
|
||||
static void recDIVUconst(int upper)
|
||||
{
|
||||
u32 quot, rem;
|
||||
if (g_cpuConstRegs[_Rt_].UL[0] != 0)
|
||||
|
@ -449,71 +466,73 @@ void recDIVUconst(int upper)
|
|||
recWritebackConstHILO((u64)quot | ((u64)rem << 32), 0, upper);
|
||||
}
|
||||
|
||||
void recDIVU_const()
|
||||
static void recDIVU_const()
|
||||
{
|
||||
recDIVUconst(0);
|
||||
}
|
||||
|
||||
void recDIVU_(int info)
|
||||
static void recDIVU_(int info)
|
||||
{
|
||||
recDIVsuper(info, 0, 0, 0);
|
||||
recDIVsuper(info, false, false, 0);
|
||||
}
|
||||
|
||||
void recDIVU_consts(int info)
|
||||
static void recDIVU_consts(int info)
|
||||
{
|
||||
recDIVsuper(info, 0, 0, PROCESS_CONSTS);
|
||||
recDIVsuper(info, false, false, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recDIVU_constt(int info)
|
||||
static void recDIVU_constt(int info)
|
||||
{
|
||||
recDIVsuper(info, 0, 0, PROCESS_CONSTT);
|
||||
recDIVsuper(info, false, false, PROCESS_CONSTT);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DIVU, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI);
|
||||
EERECOMPILE_CODERC0(DIVU, /*XMMINFO_READS |*/ XMMINFO_READT);
|
||||
|
||||
void recDIV1_const()
|
||||
static void recDIV1_const()
|
||||
{
|
||||
recDIVconst(1);
|
||||
}
|
||||
|
||||
void recDIV1_(int info)
|
||||
static void recDIV1_(int info)
|
||||
{
|
||||
recDIVsuper(info, 1, 1, 0);
|
||||
recDIVsuper(info, true, true, 0);
|
||||
}
|
||||
|
||||
void recDIV1_consts(int info)
|
||||
static void recDIV1_consts(int info)
|
||||
{
|
||||
recDIVsuper(info, 1, 1, PROCESS_CONSTS);
|
||||
recDIVsuper(info, true, true, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recDIV1_constt(int info)
|
||||
static void recDIV1_constt(int info)
|
||||
{
|
||||
recDIVsuper(info, 1, 1, PROCESS_CONSTT);
|
||||
recDIVsuper(info, true, true, PROCESS_CONSTT);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DIV1, XMMINFO_READS | XMMINFO_READT);
|
||||
EERECOMPILE_CODERC0(DIV1, /*XMMINFO_READS |*/ XMMINFO_READT);
|
||||
|
||||
void recDIVU1_const()
|
||||
static void recDIVU1_const()
|
||||
{
|
||||
recDIVUconst(1);
|
||||
}
|
||||
|
||||
void recDIVU1_(int info)
|
||||
static void recDIVU1_(int info)
|
||||
{
|
||||
recDIVsuper(info, 0, 1, 0);
|
||||
recDIVsuper(info, false, true, 0);
|
||||
}
|
||||
|
||||
void recDIVU1_consts(int info)
|
||||
static void recDIVU1_consts(int info)
|
||||
{
|
||||
recDIVsuper(info, 0, 1, PROCESS_CONSTS);
|
||||
recDIVsuper(info, false, true, PROCESS_CONSTS);
|
||||
}
|
||||
|
||||
void recDIVU1_constt(int info)
|
||||
static void recDIVU1_constt(int info)
|
||||
{
|
||||
recDIVsuper(info, 0, 1, PROCESS_CONSTT);
|
||||
recDIVsuper(info, false, true, PROCESS_CONSTT);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DIVU1, XMMINFO_READS | XMMINFO_READT);
|
||||
EERECOMPILE_CODERC0(DIVU1, /*XMMINFO_READS |*/ XMMINFO_READT);
|
||||
|
||||
// TODO(Stenzek): All of these :(
|
||||
|
||||
static void writeBackMAddToHiLoRd(int hiloID)
|
||||
{
|
||||
|
@ -564,8 +583,10 @@ void recMADD()
|
|||
|
||||
_deleteEEreg(XMMGPR_LO, 1);
|
||||
_deleteEEreg(XMMGPR_HI, 1);
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
_deleteGPRtoX86reg(_Rs_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoX86reg(_Rt_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -597,8 +618,10 @@ void recMADDU()
|
|||
|
||||
_deleteEEreg(XMMGPR_LO, 1);
|
||||
_deleteEEreg(XMMGPR_HI, 1);
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
_deleteGPRtoX86reg(_Rs_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoX86reg(_Rt_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -630,8 +653,10 @@ void recMADD1()
|
|||
|
||||
_deleteEEreg(XMMGPR_LO, 1);
|
||||
_deleteEEreg(XMMGPR_HI, 1);
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
_deleteGPRtoX86reg(_Rs_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoX86reg(_Rt_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -663,8 +688,10 @@ void recMADDU1()
|
|||
|
||||
_deleteEEreg(XMMGPR_LO, 1);
|
||||
_deleteEEreg(XMMGPR_HI, 1);
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
_deleteGPRtoX86reg(_Rs_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoX86reg(_Rt_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH);
|
||||
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -688,6 +715,4 @@ void recMADDU1()
|
|||
|
||||
#endif
|
||||
|
||||
} // namespace OpcodeImpl
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
} // namespace R5900::Dynarec::OpcodeImpl
|
||||
|
|
|
@ -22,9 +22,8 @@
|
|||
|
||||
using namespace x86Emitter;
|
||||
|
||||
namespace R5900 {
|
||||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
namespace R5900::Dynarec::OpcodeImpl
|
||||
{
|
||||
|
||||
/*********************************************************
|
||||
* Shift arithmetic with constant shift *
|
||||
|
@ -53,412 +52,387 @@ REC_FUNC_DEL(DSRAV, _Rd_);
|
|||
|
||||
#else
|
||||
|
||||
static void recMoveTtoD(int info)
|
||||
{
|
||||
if (info & PROCESS_EE_T)
|
||||
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_T));
|
||||
else
|
||||
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
}
|
||||
|
||||
static void recMoveTtoD64(int info)
|
||||
{
|
||||
if (info & PROCESS_EE_T)
|
||||
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_T));
|
||||
else
|
||||
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
}
|
||||
|
||||
static void recMoveSToRCX(int info)
|
||||
{
|
||||
// load full 64-bits for store->load forwarding, since we always store >=64.
|
||||
if (info & PROCESS_EE_S)
|
||||
xMOV(rcx, xRegister64(EEREC_S));
|
||||
else
|
||||
xMOV(rcx, ptr64[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
}
|
||||
|
||||
//// SLL
|
||||
void recSLL_const()
|
||||
static void recSLL_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] << _Sa_);
|
||||
}
|
||||
|
||||
void recSLLs_(int info, int sa)
|
||||
static void recSLLs_(int info, int sa)
|
||||
{
|
||||
// TODO: Use BMI
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
recMoveTtoD(info);
|
||||
if (sa != 0)
|
||||
{
|
||||
xSHL(eax, sa);
|
||||
xSHL(xRegister32(EEREC_D), sa);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
eeSignExtendTo(_Rd_);
|
||||
}
|
||||
|
||||
void recSLL_(int info)
|
||||
static void recSLL_(int info)
|
||||
{
|
||||
recSLLs_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, SLL);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, SLL, XMMINFO_WRITED | XMMINFO_READT);
|
||||
|
||||
//// SRL
|
||||
void recSRL_const()
|
||||
static void recSRL_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] >> _Sa_);
|
||||
}
|
||||
|
||||
void recSRLs_(int info, int sa)
|
||||
static void recSRLs_(int info, int sa)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
recMoveTtoD(info);
|
||||
if (sa != 0)
|
||||
xSHR(eax, sa);
|
||||
|
||||
eeSignExtendTo(_Rd_);
|
||||
xSHR(xRegister32(EEREC_D), sa);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
void recSRL_(int info)
|
||||
static void recSRL_(int info)
|
||||
{
|
||||
recSRLs_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, SRL);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, SRL, XMMINFO_WRITED | XMMINFO_READT);
|
||||
|
||||
//// SRA
|
||||
void recSRA_const()
|
||||
static void recSRA_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].SL[0] >> _Sa_);
|
||||
}
|
||||
|
||||
void recSRAs_(int info, int sa)
|
||||
static void recSRAs_(int info, int sa)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
recMoveTtoD(info);
|
||||
if (sa != 0)
|
||||
xSAR(eax, sa);
|
||||
|
||||
eeSignExtendTo(_Rd_);
|
||||
xSAR(xRegister32(EEREC_D), sa);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
void recSRA_(int info)
|
||||
static void recSRA_(int info)
|
||||
{
|
||||
recSRAs_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, SRA);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, SRA, XMMINFO_WRITED | XMMINFO_READT);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recDSLL_const()
|
||||
static void recDSLL_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << _Sa_);
|
||||
}
|
||||
|
||||
void recDSLLs_(int info, int sa)
|
||||
static void recDSLLs_(int info, int sa)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
recMoveTtoD64(info);
|
||||
if (sa != 0)
|
||||
xSHL(rax, sa);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
xSHL(xRegister64(EEREC_D), sa);
|
||||
}
|
||||
|
||||
void recDSLL_(int info)
|
||||
static void recDSLL_(int info)
|
||||
{
|
||||
recDSLLs_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, DSLL);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, DSLL, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
void recDSRL_const()
|
||||
static void recDSRL_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> _Sa_);
|
||||
}
|
||||
|
||||
void recDSRLs_(int info, int sa)
|
||||
static void recDSRLs_(int info, int sa)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
recMoveTtoD64(info);
|
||||
if (sa != 0)
|
||||
xSHR(rax, sa);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
xSHR(xRegister64(EEREC_D), sa);
|
||||
}
|
||||
|
||||
void recDSRL_(int info)
|
||||
static void recDSRL_(int info)
|
||||
{
|
||||
recDSRLs_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, DSRL);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, DSRL, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
|
||||
|
||||
//// DSRA
|
||||
void recDSRA_const()
|
||||
static void recDSRA_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (u64)(g_cpuConstRegs[_Rt_].SD[0] >> _Sa_);
|
||||
}
|
||||
|
||||
void recDSRAs_(int info, int sa)
|
||||
static void recDSRAs_(int info, int sa)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
recMoveTtoD64(info);
|
||||
if (sa != 0)
|
||||
xSAR(rax, sa);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
xSAR(xRegister64(EEREC_D), sa);
|
||||
}
|
||||
|
||||
void recDSRA_(int info)
|
||||
static void recDSRA_(int info)
|
||||
{
|
||||
recDSRAs_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, DSRA);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, DSRA, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
|
||||
|
||||
///// DSLL32
|
||||
void recDSLL32_const()
|
||||
static void recDSLL32_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (_Sa_ + 32));
|
||||
}
|
||||
|
||||
void recDSLL32s_(int info, int sa)
|
||||
static void recDSLL32_(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
xSHL(rax, sa + 32);
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
recDSLLs_(info, _Sa_ + 32);
|
||||
}
|
||||
|
||||
void recDSLL32_(int info)
|
||||
{
|
||||
recDSLL32s_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, DSLL32);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, DSLL32, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
|
||||
|
||||
//// DSRL32
|
||||
void recDSRL32_const()
|
||||
static void recDSRL32_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (_Sa_ + 32));
|
||||
}
|
||||
|
||||
void recDSRL32s_(int info, int sa)
|
||||
static void recDSRL32_(int info)
|
||||
{
|
||||
pxAssert(!(info & PROCESS_EE_XMM));
|
||||
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
|
||||
if (sa != 0)
|
||||
xSHR(eax, sa);
|
||||
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
recDSRLs_(info, _Sa_ + 32);
|
||||
}
|
||||
|
||||
void recDSRL32_(int info)
|
||||
{
|
||||
recDSRL32s_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, DSRL32);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, DSRL32, XMMINFO_WRITED | XMMINFO_READT);
|
||||
|
||||
//// DSRA32
|
||||
void recDSRA32_const()
|
||||
static void recDSRA32_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (u64)(g_cpuConstRegs[_Rt_].SD[0] >> (_Sa_ + 32));
|
||||
}
|
||||
|
||||
void recDSRA32s_(int info, int sa)
|
||||
static void recDSRA32_(int info)
|
||||
{
|
||||
recDSRAs_(info, sa + 32);
|
||||
recDSRAs_(info, _Sa_ + 32);
|
||||
}
|
||||
|
||||
void recDSRA32_(int info)
|
||||
{
|
||||
recDSRA32s_(info, _Sa_);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODEX(eeRecompileCode2, DSRA32);
|
||||
EERECOMPILE_CODEX(eeRecompileCodeRC2, DSRA32, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
|
||||
|
||||
/*********************************************************
|
||||
* Shift arithmetic with variant register shift *
|
||||
* Format: OP rd, rt, rs *
|
||||
*********************************************************/
|
||||
|
||||
static void recShiftV_constt(const xImpl_Group2& shift)
|
||||
static void recShiftV_constt(int info, const xImpl_Group2& shift)
|
||||
{
|
||||
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
|
||||
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
|
||||
shift(eax, cl);
|
||||
|
||||
eeSignExtendTo(_Rd_);
|
||||
pxAssert(_Rs_ != 0);
|
||||
recMoveSToRCX(info);
|
||||
xMOV(xRegister32(EEREC_D), g_cpuConstRegs[_Rt_].UL[0]);
|
||||
shift(xRegister32(EEREC_D), cl);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
static void recShiftV(const xImpl_Group2& shift)
|
||||
static void recShiftV(int info, const xImpl_Group2& shift)
|
||||
{
|
||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
if (_Rs_ != 0)
|
||||
{
|
||||
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
shift(eax, cl);
|
||||
}
|
||||
eeSignExtendTo(_Rd_);
|
||||
pxAssert(_Rs_ != 0);
|
||||
|
||||
recMoveSToRCX(info);
|
||||
recMoveTtoD(info);
|
||||
shift(xRegister32(EEREC_D), cl);
|
||||
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
|
||||
}
|
||||
|
||||
static void recDShiftV_constt(const xImpl_Group2& shift)
|
||||
static void recDShiftV_constt(int info, const xImpl_Group2& shift)
|
||||
{
|
||||
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
|
||||
xMOV64(rax, g_cpuConstRegs[_Rt_].UD[0]);
|
||||
shift(rax, cl);
|
||||
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
pxAssert(_Rs_ != 0);
|
||||
recMoveSToRCX(info);
|
||||
xMOV64(xRegister64(EEREC_D), g_cpuConstRegs[_Rt_].SD[0]);
|
||||
shift(xRegister64(EEREC_D), cl);
|
||||
}
|
||||
|
||||
static void recDShiftV(const xImpl_Group2& shift)
|
||||
static void recDShiftV(int info, const xImpl_Group2& shift)
|
||||
{
|
||||
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
|
||||
if (_Rs_ != 0)
|
||||
{
|
||||
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||
shift(rax, cl);
|
||||
}
|
||||
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
|
||||
pxAssert(_Rs_ != 0);
|
||||
recMoveSToRCX(info);
|
||||
recMoveTtoD64(info);
|
||||
shift(xRegister64(EEREC_D), cl);
|
||||
}
|
||||
|
||||
//// SLLV
|
||||
void recSLLV_const()
|
||||
static void recSLLV_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] << (g_cpuConstRegs[_Rs_].UL[0] & 0x1f));
|
||||
}
|
||||
|
||||
void recSLLV_consts(int info)
|
||||
static void recSLLV_consts(int info)
|
||||
{
|
||||
recSLLs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f);
|
||||
}
|
||||
|
||||
void recSLLV_constt(int info)
|
||||
static void recSLLV_constt(int info)
|
||||
{
|
||||
recShiftV_constt(xSHL);
|
||||
recShiftV_constt(info, xSHL);
|
||||
}
|
||||
|
||||
void recSLLV_(int info)
|
||||
static void recSLLV_(int info)
|
||||
{
|
||||
recShiftV(xSHL);
|
||||
recShiftV(info, xSHL);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(SLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(SLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
|
||||
//// SRLV
|
||||
void recSRLV_const()
|
||||
static void recSRLV_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x1f));
|
||||
}
|
||||
|
||||
void recSRLV_consts(int info)
|
||||
static void recSRLV_consts(int info)
|
||||
{
|
||||
recSRLs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f);
|
||||
}
|
||||
|
||||
void recSRLV_constt(int info)
|
||||
static void recSRLV_constt(int info)
|
||||
{
|
||||
recShiftV_constt(xSHR);
|
||||
recShiftV_constt(info, xSHR);
|
||||
}
|
||||
|
||||
void recSRLV_(int info)
|
||||
static void recSRLV_(int info)
|
||||
{
|
||||
recShiftV(xSHR);
|
||||
recShiftV(info, xSHR);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(SRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(SRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
|
||||
//// SRAV
|
||||
void recSRAV_const()
|
||||
static void recSRAV_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].SL[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x1f));
|
||||
}
|
||||
|
||||
void recSRAV_consts(int info)
|
||||
static void recSRAV_consts(int info)
|
||||
{
|
||||
recSRAs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f);
|
||||
}
|
||||
|
||||
void recSRAV_constt(int info)
|
||||
static void recSRAV_constt(int info)
|
||||
{
|
||||
recShiftV_constt(xSAR);
|
||||
recShiftV_constt(info, xSAR);
|
||||
}
|
||||
|
||||
void recSRAV_(int info)
|
||||
static void recSRAV_(int info)
|
||||
{
|
||||
recShiftV(xSAR);
|
||||
recShiftV(info, xSAR);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(SRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(SRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
|
||||
//// DSLLV
|
||||
void recDSLLV_const()
|
||||
static void recDSLLV_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (g_cpuConstRegs[_Rs_].UL[0] & 0x3f));
|
||||
}
|
||||
|
||||
void recDSLLV_consts(int info)
|
||||
static void recDSLLV_consts(int info)
|
||||
{
|
||||
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
|
||||
if (sa < 32)
|
||||
recDSLLs_(info, sa);
|
||||
else
|
||||
recDSLL32s_(info, sa - 32);
|
||||
}
|
||||
|
||||
void recDSLLV_constt(int info)
|
||||
static void recDSLLV_constt(int info)
|
||||
{
|
||||
recDShiftV_constt(xSHL);
|
||||
recDShiftV_constt(info, xSHL);
|
||||
}
|
||||
|
||||
void recDSLLV_(int info)
|
||||
static void recDSLLV_(int info)
|
||||
{
|
||||
recDShiftV(xSHL);
|
||||
recDShiftV(info, xSHL);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DSLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(DSLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
|
||||
|
||||
//// DSRLV
|
||||
void recDSRLV_const()
|
||||
static void recDSRLV_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x3f));
|
||||
}
|
||||
|
||||
void recDSRLV_consts(int info)
|
||||
static void recDSRLV_consts(int info)
|
||||
{
|
||||
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
|
||||
if (sa < 32)
|
||||
recDSRLs_(info, sa);
|
||||
else
|
||||
recDSRL32s_(info, sa - 32);
|
||||
}
|
||||
|
||||
void recDSRLV_constt(int info)
|
||||
static void recDSRLV_constt(int info)
|
||||
{
|
||||
recDShiftV_constt(xSHR);
|
||||
recDShiftV_constt(info, xSHR);
|
||||
}
|
||||
|
||||
void recDSRLV_(int info)
|
||||
static void recDSRLV_(int info)
|
||||
{
|
||||
recDShiftV(xSHR);
|
||||
recDShiftV(info, xSHR);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DSRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(DSRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
|
||||
|
||||
//// DSRAV
|
||||
void recDSRAV_const()
|
||||
static void recDSRAV_const()
|
||||
{
|
||||
g_cpuConstRegs[_Rd_].SD[0] = (s64)(g_cpuConstRegs[_Rt_].SD[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x3f));
|
||||
}
|
||||
|
||||
void recDSRAV_consts(int info)
|
||||
static void recDSRAV_consts(int info)
|
||||
{
|
||||
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
|
||||
if (sa < 32)
|
||||
recDSRAs_(info, sa);
|
||||
else
|
||||
recDSRA32s_(info, sa - 32);
|
||||
}
|
||||
|
||||
void recDSRAV_constt(int info)
|
||||
static void recDSRAV_constt(int info)
|
||||
{
|
||||
recDShiftV_constt(xSAR);
|
||||
recDShiftV_constt(info, xSAR);
|
||||
}
|
||||
|
||||
void recDSRAV_(int info)
|
||||
static void recDSRAV_(int info)
|
||||
{
|
||||
recDShiftV(xSAR);
|
||||
recDShiftV(info, xSAR);
|
||||
}
|
||||
|
||||
EERECOMPILE_CODE0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
|
||||
EERECOMPILE_CODERC0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace OpcodeImpl
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
} // namespace R5900::Dynarec::OpcodeImpl
|
||||
|
|
|
@ -47,527 +47,240 @@ void _deleteEEreg(int reg, int flush)
|
|||
_flushConstReg(reg);
|
||||
}
|
||||
GPR_DEL_CONST(reg);
|
||||
_deleteGPRtoXMMreg(reg, flush ? 0 : 2);
|
||||
_deleteGPRtoXMMreg(reg, flush ? DELETE_REG_FREE : DELETE_REG_FLUSH_AND_FREE);
|
||||
_deleteGPRtoX86reg(reg, flush ? DELETE_REG_FREE : DELETE_REG_FLUSH_AND_FREE);
|
||||
}
|
||||
|
||||
void _deleteEEreg128(int reg)
|
||||
{
|
||||
if (!reg)
|
||||
return;
|
||||
|
||||
GPR_DEL_CONST(reg);
|
||||
_deleteGPRtoXMMreg(reg, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
_deleteGPRtoX86reg(reg, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
}
|
||||
|
||||
void _flushEEreg(int reg, bool clear)
|
||||
{
|
||||
if (!reg)
|
||||
return;
|
||||
if (GPR_IS_CONST1(reg))
|
||||
{
|
||||
|
||||
if (GPR_IS_DIRTY_CONST(reg))
|
||||
_flushConstReg(reg);
|
||||
return;
|
||||
}
|
||||
_deleteGPRtoXMMreg(reg, clear ? 2 : 1);
|
||||
if (clear)
|
||||
GPR_DEL_CONST(reg);
|
||||
|
||||
_deleteGPRtoXMMreg(reg, clear ? DELETE_REG_FLUSH_AND_FREE : DELETE_REG_FLUSH);
|
||||
_deleteGPRtoX86reg(reg, clear ? DELETE_REG_FLUSH_AND_FREE : DELETE_REG_FLUSH);
|
||||
}
|
||||
|
||||
int eeProcessHILO(int reg, int mode, int mmx)
|
||||
int _eeTryRenameReg(int to, int from, int fromx86, int other, int xmminfo)
|
||||
{
|
||||
if (_hasFreeXMMreg() || !(g_pCurInstInfo->regs[reg] & EEINST_LASTUSE))
|
||||
{
|
||||
return _allocGPRtoXMMreg(-1, reg, mode);
|
||||
}
|
||||
|
||||
// can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt
|
||||
if ((xmminfo & XMMINFO_NORENAME) || fromx86 < 0 || to == from || to == other || !EEINST_RENAMETEST(from))
|
||||
return -1;
|
||||
|
||||
RALOG("Renaming %s to %s\n", R3000A::disRNameGPR[from], R3000A::disRNameGPR[to]);
|
||||
|
||||
// flush back when it's been modified
|
||||
if (x86regs[fromx86].mode & MODE_WRITE && EEINST_LIVETEST(from))
|
||||
_writebackX86Reg(fromx86);
|
||||
|
||||
// remove all references to renamed-to register
|
||||
_deleteGPRtoX86reg(to, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
_deleteGPRtoXMMreg(to, DELETE_REG_FLUSH_AND_FREE);
|
||||
GPR_DEL_CONST(to);
|
||||
|
||||
// and do the actual rename, new register has been modified.
|
||||
x86regs[fromx86].reg = to;
|
||||
x86regs[fromx86].mode |= MODE_READ | MODE_WRITE;
|
||||
return fromx86;
|
||||
}
|
||||
|
||||
// Strangely this code is used on NOT-MMX path ...
|
||||
#define PROCESS_EE_SETMODES(mmreg) (/*(mmxregs[mmreg].mode&MODE_WRITE)*/ false ? PROCESS_EE_MODEWRITES : 0)
|
||||
#define PROCESS_EE_SETMODET(mmreg) (/*(mmxregs[mmreg].mode&MODE_WRITE)*/ false ? PROCESS_EE_MODEWRITET : 0)
|
||||
|
||||
// ignores XMMINFO_READS, XMMINFO_READT, and XMMINFO_READD_LO from xmminfo
|
||||
// core of reg caching
|
||||
void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo)
|
||||
static bool FitsInImmediate(int reg, int fprinfo)
|
||||
{
|
||||
if (fprinfo & XMMINFO_64BITOP)
|
||||
return (s32)g_cpuConstRegs[reg].SD[0] == g_cpuConstRegs[reg].SD[0];
|
||||
else
|
||||
return true; // all 32bit ops fit
|
||||
}
|
||||
|
||||
void eeRecompileCodeRC0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo)
|
||||
{
|
||||
if (!_Rd_ && (xmminfo & XMMINFO_WRITED))
|
||||
return;
|
||||
|
||||
if (GPR_IS_CONST2(_Rs_, _Rt_))
|
||||
{
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
if (_Rd_ && (xmminfo & XMMINFO_WRITED))
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rd_, 2);
|
||||
}
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
_deleteGPRtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
_deleteGPRtoXMMreg(_Rd_, DELETE_REG_FLUSH_AND_FREE);
|
||||
GPR_SET_CONST(_Rd_);
|
||||
}
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
const int moded = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
|
||||
|
||||
// test if should write xmm, mirror to mmx code
|
||||
if (g_pCurInstInfo->info & EEINST_XMM)
|
||||
{
|
||||
int mmreg1, mmreg3, mmtemp;
|
||||
pxAssert(0);
|
||||
// this function should not be used for lo/hi.
|
||||
pxAssert(!(xmminfo & (XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI)));
|
||||
|
||||
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
|
||||
_addNeededGPRtoXMMreg(XMMGPR_LO);
|
||||
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
|
||||
_addNeededGPRtoXMMreg(XMMGPR_HI);
|
||||
_addNeededGPRtoXMMreg(_Rs_);
|
||||
_addNeededGPRtoXMMreg(_Rt_);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
u32 creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_;
|
||||
int vreg = creg == _Rs_ ? _Rt_ : _Rs_;
|
||||
|
||||
// if (g_pCurInstInfo->regs[vreg] & EEINST_XMM)
|
||||
// {
|
||||
// mmreg1 = _allocGPRtoXMMreg(-1, vreg, MODE_READ);
|
||||
// _addNeededGPRtoXMMreg(vreg);
|
||||
// }
|
||||
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, vreg, MODE_READ);
|
||||
|
||||
if (mmreg1 >= 0)
|
||||
{
|
||||
int info = PROCESS_EE_XMM;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
info |= PROCESS_EE_SETMODET(mmreg1);
|
||||
else
|
||||
info |= PROCESS_EE_SETMODES(mmreg1);
|
||||
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
{
|
||||
|
||||
_addNeededGPRtoXMMreg(_Rd_);
|
||||
mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
|
||||
|
||||
if (!(xmminfo & XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg)))
|
||||
{
|
||||
_freeXMMreg(mmreg1);
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
info &= ~PROCESS_EE_MODEWRITET;
|
||||
else
|
||||
info &= ~PROCESS_EE_MODEWRITES;
|
||||
xmmregs[mmreg1].inuse = 1;
|
||||
xmmregs[mmreg1].reg = _Rd_;
|
||||
xmmregs[mmreg1].mode = moded;
|
||||
mmreg3 = mmreg1;
|
||||
}
|
||||
else if (mmreg3 < 0)
|
||||
mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded);
|
||||
|
||||
info |= PROCESS_EE_SET_D(mmreg3);
|
||||
}
|
||||
|
||||
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
|
||||
{
|
||||
mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
|
||||
if (mmtemp >= 0)
|
||||
info |= PROCESS_EE_SET_LO(mmtemp);
|
||||
}
|
||||
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
|
||||
{
|
||||
mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
|
||||
if (mmtemp >= 0)
|
||||
info |= PROCESS_EE_SET_HI(mmtemp);
|
||||
}
|
||||
|
||||
if (creg == _Rs_)
|
||||
constscode(info | PROCESS_EE_SET_T(mmreg1));
|
||||
else
|
||||
consttcode(info | PROCESS_EE_SET_S(mmreg1));
|
||||
_clearNeededXMMregs();
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// no const regs
|
||||
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ);
|
||||
int mmreg2 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ);
|
||||
|
||||
if (mmreg1 >= 0 || mmreg2 >= 0)
|
||||
{
|
||||
int info = PROCESS_EE_XMM;
|
||||
|
||||
// do it all in xmm
|
||||
if (mmreg1 < 0)
|
||||
mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ);
|
||||
if (mmreg2 < 0)
|
||||
mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
|
||||
|
||||
info |= PROCESS_EE_SETMODES(mmreg1) | PROCESS_EE_SETMODET(mmreg2);
|
||||
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
{
|
||||
// check for last used, if so don't alloc a new XMM reg
|
||||
_addNeededGPRtoXMMreg(_Rd_);
|
||||
mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, moded);
|
||||
|
||||
if (mmreg3 < 0)
|
||||
{
|
||||
if (!(xmminfo & XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)))
|
||||
{
|
||||
_freeXMMreg(mmreg2);
|
||||
info &= ~PROCESS_EE_MODEWRITET;
|
||||
xmmregs[mmreg2].inuse = 1;
|
||||
xmmregs[mmreg2].reg = _Rd_;
|
||||
xmmregs[mmreg2].mode = moded;
|
||||
mmreg3 = mmreg2;
|
||||
}
|
||||
else if (!(xmminfo & XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)))
|
||||
{
|
||||
_freeXMMreg(mmreg1);
|
||||
info &= ~PROCESS_EE_MODEWRITES;
|
||||
xmmregs[mmreg1].inuse = 1;
|
||||
xmmregs[mmreg1].reg = _Rd_;
|
||||
xmmregs[mmreg1].mode = moded;
|
||||
mmreg3 = mmreg1;
|
||||
}
|
||||
else
|
||||
mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded);
|
||||
}
|
||||
|
||||
info |= PROCESS_EE_SET_D(mmreg3);
|
||||
}
|
||||
|
||||
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
|
||||
{
|
||||
mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
|
||||
if (mmtemp >= 0)
|
||||
info |= PROCESS_EE_SET_LO(mmtemp);
|
||||
}
|
||||
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
|
||||
{
|
||||
mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
|
||||
if (mmtemp >= 0)
|
||||
info |= PROCESS_EE_SET_HI(mmtemp);
|
||||
}
|
||||
|
||||
noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2));
|
||||
_clearNeededXMMregs();
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
// we have to put these up here, because the register allocator below will wipe out const flags
|
||||
// for the destination register when/if it switches it to write mode.
|
||||
const bool s_is_const = GPR_IS_CONST1(_Rs_);
|
||||
const bool t_is_const = GPR_IS_CONST1(_Rt_);
|
||||
const bool d_is_const = GPR_IS_CONST1(_Rd_);
|
||||
const bool s_is_used = EEINST_USEDTEST(_Rs_);
|
||||
const bool t_is_used = EEINST_USEDTEST(_Rt_);
|
||||
const bool s_in_xmm = _hasXMMreg(XMMTYPE_GPRREG, _Rs_);
|
||||
const bool t_in_xmm = _hasXMMreg(XMMTYPE_GPRREG, _Rt_);
|
||||
|
||||
// regular x86
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
_deleteGPRtoXMMreg(_Rd_, (xmminfo & XMMINFO_READD) ? 0 : 2);
|
||||
if ((xmminfo & XMMINFO_READS) && !s_is_const)
|
||||
_addNeededGPRtoX86reg(_Rs_);
|
||||
if ((xmminfo & XMMINFO_READT) && !t_is_const)
|
||||
_addNeededGPRtoX86reg(_Rt_);
|
||||
if ((xmminfo & XMMINFO_READD) && !d_is_const)
|
||||
_addNeededGPRtoX86reg(_Rd_);
|
||||
|
||||
// don't delete, fn will take care of them
|
||||
// if (xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO))
|
||||
// {
|
||||
// _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo & XMMINFO_READLO) ? 1 : 0);
|
||||
// }
|
||||
// if (xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI))
|
||||
// {
|
||||
// _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo & XMMINFO_READHI) ? 1 : 0);
|
||||
// }
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
// when it doesn't fit in an immediate, we'll flush it to a reg early to save code
|
||||
u32 info = 0;
|
||||
int regs = -1, regt = -1, regd = -1;
|
||||
if (xmminfo & XMMINFO_READS)
|
||||
{
|
||||
constscode(0);
|
||||
regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
if (regs < 0 && (!s_is_const || !FitsInImmediate(_Rs_, xmminfo)) && (s_is_used || s_in_xmm || ((xmminfo & XMMINFO_WRITED) && _Rd_ == _Rs_) || (xmminfo & XMMINFO_FORCEREGS)))
|
||||
{
|
||||
regs = _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
}
|
||||
if (regs >= 0)
|
||||
info |= PROCESS_EE_SET_S(regs);
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_READT)
|
||||
{
|
||||
regt = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
|
||||
if (regt < 0 && (!t_is_const || !FitsInImmediate(_Rt_, xmminfo)) && (t_is_used || t_in_xmm || ((xmminfo & XMMINFO_WRITED) && _Rd_ == _Rt_) || (xmminfo & XMMINFO_FORCEREGT)))
|
||||
{
|
||||
regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
|
||||
}
|
||||
if (regt >= 0)
|
||||
info |= PROCESS_EE_SET_T(regt);
|
||||
}
|
||||
|
||||
if (xmminfo & (XMMINFO_WRITED | XMMINFO_READD))
|
||||
{
|
||||
// _eeTryRenameReg() sets READ | WRITE already, so this is only needed when allocating.
|
||||
const int moded = ((xmminfo & XMMINFO_WRITED) ? MODE_WRITE : 0) | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
|
||||
|
||||
// If S is no longer live, swap D for S. Saves the move.
|
||||
int regd = (_Rd_ && xmminfo & XMMINFO_WRITED) ? _eeTryRenameReg(_Rd_, (xmminfo & XMMINFO_READS) ? _Rs_ : 0, regs, (xmminfo & XMMINFO_READT) ? _Rt_ : 0, xmminfo) : 0;
|
||||
if (regd < 0)
|
||||
regd = _allocX86reg(X86TYPE_GPR, _Rd_, moded);
|
||||
|
||||
pxAssert(regd >= 0);
|
||||
info |= PROCESS_EE_SET_D(regd);
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
|
||||
_validateRegs();
|
||||
|
||||
if (s_is_const && regs < 0)
|
||||
{
|
||||
constscode(info /*| PROCESS_CONSTS*/);
|
||||
return;
|
||||
}
|
||||
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
if (t_is_const && regt < 0)
|
||||
{
|
||||
consttcode(0);
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
consttcode(info /*| PROCESS_CONSTT*/);
|
||||
return;
|
||||
}
|
||||
|
||||
noconstcode(0);
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
noconstcode(info);
|
||||
}
|
||||
|
||||
// rt = rs op imm16
|
||||
void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
|
||||
void eeRecompileCodeRC1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode, int xmminfo)
|
||||
{
|
||||
pxAssert((xmminfo & (XMMINFO_READS | XMMINFO_WRITET)) == (XMMINFO_READS | XMMINFO_WRITET));
|
||||
|
||||
if (!_Rt_)
|
||||
return;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rt_, 2);
|
||||
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH_AND_FREE);
|
||||
_deleteGPRtoX86reg(_Rt_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
GPR_SET_CONST(_Rt_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
// test if should write xmm, mirror to mmx code
|
||||
if (g_pCurInstInfo->info & EEINST_XMM)
|
||||
{
|
||||
pxAssert(0);
|
||||
const bool s_is_used = EEINST_USEDTEST(_Rs_);
|
||||
const bool s_in_xmm = _hasXMMreg(XMMTYPE_GPRREG, _Rs_);
|
||||
|
||||
// no const regs
|
||||
const int mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ);
|
||||
u32 info = 0;
|
||||
int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
if (regs < 0 && (s_is_used || s_in_xmm || _Rt_ == _Rs_ || (xmminfo & XMMINFO_FORCEREGS)))
|
||||
regs = _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
|
||||
if (regs >= 0)
|
||||
info |= PROCESS_EE_SET_S(regs);
|
||||
|
||||
if (mmreg1 >= 0)
|
||||
{
|
||||
int info = PROCESS_EE_XMM | PROCESS_EE_SETMODES(mmreg1);
|
||||
// If S is no longer live, swap D for S. Saves the move.
|
||||
int regt = _eeTryRenameReg(_Rt_, _Rs_, regs, 0, xmminfo);
|
||||
if (regt < 0)
|
||||
regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
|
||||
// check for last used, if so don't alloc a new XMM reg
|
||||
_addNeededGPRtoXMMreg(_Rt_);
|
||||
int mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE);
|
||||
info |= PROCESS_EE_SET_T(regt);
|
||||
_validateRegs();
|
||||
|
||||
if (mmreg2 < 0)
|
||||
{
|
||||
if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
|
||||
{
|
||||
_freeXMMreg(mmreg1);
|
||||
info &= ~PROCESS_EE_MODEWRITES;
|
||||
xmmregs[mmreg1].inuse = 1;
|
||||
xmmregs[mmreg1].reg = _Rt_;
|
||||
xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ;
|
||||
mmreg2 = mmreg1;
|
||||
}
|
||||
else
|
||||
mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE);
|
||||
}
|
||||
|
||||
noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2));
|
||||
_clearNeededXMMregs();
|
||||
GPR_DEL_CONST(_Rt_);
|
||||
return;
|
||||
}
|
||||
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
||||
// regular x86
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 2);
|
||||
|
||||
noconstcode(0);
|
||||
GPR_DEL_CONST(_Rt_);
|
||||
noconstcode(info);
|
||||
}
|
||||
|
||||
// rd = rt op sa
|
||||
void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
|
||||
void eeRecompileCodeRC2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode, int xmminfo)
|
||||
{
|
||||
pxAssert((xmminfo & (XMMINFO_READT | XMMINFO_WRITED)) == (XMMINFO_READT | XMMINFO_WRITED));
|
||||
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rd_, 2);
|
||||
_deleteGPRtoXMMreg(_Rd_, DELETE_REG_FLUSH_AND_FREE);
|
||||
_deleteGPRtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
|
||||
GPR_SET_CONST(_Rd_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
// test if should write xmm, mirror to mmx code
|
||||
if (g_pCurInstInfo->info & EEINST_XMM)
|
||||
{
|
||||
pxAssert(0);
|
||||
const bool t_is_used = EEINST_USEDTEST(_Rt_);
|
||||
const bool t_in_xmm = _hasXMMreg(XMMTYPE_GPRREG, _Rt_);
|
||||
|
||||
// no const regs
|
||||
const int mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ);
|
||||
u32 info = 0;
|
||||
int regt = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
|
||||
if (regt < 0 && (t_is_used || t_in_xmm || (_Rd_ == _Rt_) || (xmminfo & XMMINFO_FORCEREGT)))
|
||||
regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
|
||||
if (regt >= 0)
|
||||
info |= PROCESS_EE_SET_T(regt);
|
||||
|
||||
if (mmreg1 >= 0)
|
||||
{
|
||||
int info = PROCESS_EE_XMM | PROCESS_EE_SETMODET(mmreg1);
|
||||
// If S is no longer live, swap D for T. Saves the move.
|
||||
int regd = _eeTryRenameReg(_Rd_, _Rt_, regt, 0, xmminfo);
|
||||
if (regd < 0)
|
||||
regd = _allocX86reg(X86TYPE_GPR, _Rd_, MODE_WRITE);
|
||||
|
||||
// check for last used, if so don't alloc a new XMM reg
|
||||
_addNeededGPRtoXMMreg(_Rd_);
|
||||
int mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
|
||||
info |= PROCESS_EE_SET_D(regd);
|
||||
_validateRegs();
|
||||
|
||||
if (mmreg2 < 0)
|
||||
{
|
||||
if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_))
|
||||
{
|
||||
_freeXMMreg(mmreg1);
|
||||
info &= ~PROCESS_EE_MODEWRITET;
|
||||
xmmregs[mmreg1].inuse = 1;
|
||||
xmmregs[mmreg1].reg = _Rd_;
|
||||
xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ;
|
||||
mmreg2 = mmreg1;
|
||||
}
|
||||
else
|
||||
mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
|
||||
}
|
||||
|
||||
noconstcode(info | PROCESS_EE_SET_T(mmreg1) | PROCESS_EE_SET_D(mmreg2));
|
||||
_clearNeededXMMregs();
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
return;
|
||||
}
|
||||
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
||||
// regular x86
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
_deleteGPRtoXMMreg(_Rd_, 2);
|
||||
|
||||
noconstcode(0);
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
}
|
||||
|
||||
// rt op rs
|
||||
void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode)
|
||||
{
|
||||
pxFail("Unfinished code reached.");
|
||||
|
||||
// for now, don't support xmm
|
||||
_deleteEEreg(_Rs_, 0);
|
||||
_deleteEEreg(_Rt_, 1);
|
||||
|
||||
if (GPR_IS_CONST2(_Rs_, _Rt_))
|
||||
{
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
//multicode(PROCESS_EE_CONSTT);
|
||||
return;
|
||||
}
|
||||
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
//multicode(PROCESS_EE_CONSTT);
|
||||
return;
|
||||
}
|
||||
|
||||
multicode(0);
|
||||
}
|
||||
|
||||
// Simple Code Templates //
|
||||
|
||||
// rd = rs op rt
|
||||
void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode)
|
||||
{
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
// for now, don't support xmm
|
||||
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
_deleteGPRtoXMMreg(_Rd_, 0);
|
||||
|
||||
if (GPR_IS_CONST2(_Rs_, _Rt_))
|
||||
{
|
||||
GPR_SET_CONST(_Rd_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
constscode(0);
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
return;
|
||||
}
|
||||
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
consttcode(0);
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
return;
|
||||
}
|
||||
|
||||
noconstcode(0);
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
}
|
||||
|
||||
// rt = rs op imm16
|
||||
void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
|
||||
{
|
||||
if (!_Rt_)
|
||||
return;
|
||||
|
||||
// for now, don't support xmm
|
||||
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 0);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
GPR_SET_CONST(_Rt_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
noconstcode(0);
|
||||
GPR_DEL_CONST(_Rt_);
|
||||
}
|
||||
|
||||
// rd = rt op sa
|
||||
void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
|
||||
{
|
||||
if (!_Rd_)
|
||||
return;
|
||||
|
||||
// for now, don't support xmm
|
||||
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
_deleteGPRtoXMMreg(_Rd_, 0);
|
||||
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
GPR_SET_CONST(_Rd_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
noconstcode(0);
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
}
|
||||
|
||||
// rd = rt MULT rs (SPECIAL)
|
||||
void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT)
|
||||
{
|
||||
pxFail("Unfinished code reached.");
|
||||
|
||||
// for now, don't support xmm
|
||||
if (MULT)
|
||||
{
|
||||
_deleteGPRtoXMMreg(_Rd_, 0);
|
||||
}
|
||||
|
||||
_deleteGPRtoXMMreg(_Rs_, 1);
|
||||
_deleteGPRtoXMMreg(_Rt_, 1);
|
||||
|
||||
if (GPR_IS_CONST2(_Rs_, _Rt_))
|
||||
{
|
||||
if (MULT && _Rd_)
|
||||
GPR_SET_CONST(_Rd_);
|
||||
constcode();
|
||||
return;
|
||||
}
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
//multicode(PROCESS_EE_CONSTS);
|
||||
if (MULT && _Rd_)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
return;
|
||||
}
|
||||
|
||||
if (GPR_IS_CONST1(_Rt_))
|
||||
{
|
||||
//multicode(PROCESS_EE_CONSTT);
|
||||
if (MULT && _Rd_)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
return;
|
||||
}
|
||||
|
||||
multicode(0);
|
||||
if (MULT && _Rd_)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
noconstcode(info);
|
||||
}
|
||||
|
||||
// EE XMM allocation code
|
||||
|
@ -575,40 +288,11 @@ int eeRecompileCodeXMM(int xmminfo)
|
|||
{
|
||||
int info = PROCESS_EE_XMM;
|
||||
|
||||
// flush consts
|
||||
if (xmminfo & XMMINFO_READT)
|
||||
{
|
||||
if (GPR_IS_CONST1(_Rt_) && !(g_cpuFlushedConstReg & (1 << _Rt_)))
|
||||
{
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]);
|
||||
g_cpuFlushedConstReg |= (1 << _Rt_);
|
||||
}
|
||||
}
|
||||
if (xmminfo & XMMINFO_READS)
|
||||
{
|
||||
if (GPR_IS_CONST1(_Rs_) && !(g_cpuFlushedConstReg & (1 << _Rs_)))
|
||||
{
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
|
||||
xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
|
||||
g_cpuFlushedConstReg |= (1 << _Rs_);
|
||||
}
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
{
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
}
|
||||
|
||||
// add needed
|
||||
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
|
||||
{
|
||||
_addNeededGPRtoXMMreg(XMMGPR_LO);
|
||||
}
|
||||
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
|
||||
{
|
||||
_addNeededGPRtoXMMreg(XMMGPR_HI);
|
||||
}
|
||||
if (xmminfo & XMMINFO_READS)
|
||||
_addNeededGPRtoXMMreg(_Rs_);
|
||||
if (xmminfo & XMMINFO_READT)
|
||||
|
@ -616,58 +300,59 @@ int eeRecompileCodeXMM(int xmminfo)
|
|||
if (xmminfo & XMMINFO_WRITED)
|
||||
_addNeededGPRtoXMMreg(_Rd_);
|
||||
|
||||
// allocate
|
||||
// TODO: we could do memory operands here if not live. but the MMI implementations aren't hooked up to that at the moment.
|
||||
if (xmminfo & XMMINFO_READS)
|
||||
{
|
||||
int reg = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ);
|
||||
info |= PROCESS_EE_SET_S(reg) | PROCESS_EE_SETMODES(reg);
|
||||
const int reg = _allocGPRtoXMMreg(_Rs_, MODE_READ);
|
||||
info |= PROCESS_EE_SET_S(reg);
|
||||
}
|
||||
if (xmminfo & XMMINFO_READT)
|
||||
{
|
||||
int reg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
|
||||
info |= PROCESS_EE_SET_T(reg) | PROCESS_EE_SETMODET(reg);
|
||||
const int reg = _allocGPRtoXMMreg(_Rt_, MODE_READ);
|
||||
info |= PROCESS_EE_SET_T(reg);
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
{
|
||||
int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? ((xmminfo & XMMINFO_READD_LO) ? (MODE_READ | MODE_READHALF) : MODE_READ) : 0);
|
||||
int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
|
||||
|
||||
int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd);
|
||||
|
||||
if (regd < 0)
|
||||
{
|
||||
if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)))
|
||||
if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READT) && EEINST_RENAMETEST(_Rt_))
|
||||
{
|
||||
_freeXMMreg(EEREC_T);
|
||||
xmmregs[EEREC_T].inuse = 1;
|
||||
xmmregs[EEREC_T].reg = _Rd_;
|
||||
xmmregs[EEREC_T].mode = readd;
|
||||
_deleteEEreg128(_Rd_);
|
||||
_reallocateXMMreg(EEREC_T, XMMTYPE_GPRREG, _Rd_, readd, EEINST_LIVETEST(_Rt_));
|
||||
regd = EEREC_T;
|
||||
}
|
||||
else if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)))
|
||||
else if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READS) && EEINST_RENAMETEST(_Rs_))
|
||||
{
|
||||
_freeXMMreg(EEREC_S);
|
||||
xmmregs[EEREC_S].inuse = 1;
|
||||
xmmregs[EEREC_S].reg = _Rd_;
|
||||
xmmregs[EEREC_S].mode = readd;
|
||||
_deleteEEreg128(_Rd_);
|
||||
_reallocateXMMreg(EEREC_S, XMMTYPE_GPRREG, _Rd_, readd, EEINST_LIVETEST(_Rs_));
|
||||
regd = EEREC_S;
|
||||
}
|
||||
else
|
||||
regd = _allocGPRtoXMMreg(-1, _Rd_, readd);
|
||||
{
|
||||
regd = _allocGPRtoXMMreg(_Rd_, readd);
|
||||
}
|
||||
}
|
||||
|
||||
info |= PROCESS_EE_SET_D(regd);
|
||||
}
|
||||
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
|
||||
{
|
||||
info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0)));
|
||||
info |= PROCESS_EE_LO;
|
||||
info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0)));
|
||||
}
|
||||
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
|
||||
{
|
||||
info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo & XMMINFO_READHI) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITEHI) ? MODE_WRITE : 0)));
|
||||
info |= PROCESS_EE_HI;
|
||||
info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(XMMGPR_HI, ((xmminfo & XMMINFO_READHI) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITEHI) ? MODE_WRITE : 0)));
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_WRITED)
|
||||
GPR_DEL_CONST(_Rd_);
|
||||
|
||||
_validateRegs();
|
||||
return info;
|
||||
}
|
||||
|
||||
|
@ -676,9 +361,6 @@ int eeRecompileCodeXMM(int xmminfo)
|
|||
#define _Fs_ _Rd_
|
||||
#define _Fd_ _Sa_
|
||||
|
||||
#define PROCESS_EE_SETMODES_XMM(mmreg) ((xmmregs[mmreg].mode & MODE_WRITE) ? PROCESS_EE_MODEWRITES : 0)
|
||||
#define PROCESS_EE_SETMODET_XMM(mmreg) ((xmmregs[mmreg].mode & MODE_WRITE) ? PROCESS_EE_MODEWRITET : 0)
|
||||
|
||||
// rd = rs op rt
|
||||
void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo)
|
||||
{
|
||||
|
@ -699,7 +381,7 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
|
|||
if (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE)
|
||||
mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ);
|
||||
else
|
||||
mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ);
|
||||
mmregt = _allocFPtoXMMreg(_Ft_, MODE_READ);
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_READS)
|
||||
|
@ -709,26 +391,27 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
|
|||
mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
|
||||
}
|
||||
else
|
||||
mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ);
|
||||
}
|
||||
{
|
||||
mmregs = _allocFPtoXMMreg(_Fs_, MODE_READ);
|
||||
|
||||
if (mmregs >= 0)
|
||||
info |= PROCESS_EE_SETMODES_XMM(mmregs);
|
||||
if (mmregt >= 0)
|
||||
info |= PROCESS_EE_SETMODET_XMM(mmregt);
|
||||
// if we just allocated S and Fs == Ft, share it
|
||||
if ((xmminfo & XMMINFO_READT) && _Fs_ == _Ft_)
|
||||
mmregt = mmregs;
|
||||
}
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_READD)
|
||||
{
|
||||
pxAssert(xmminfo & XMMINFO_WRITED);
|
||||
mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ);
|
||||
mmregd = _allocFPtoXMMreg(_Fd_, MODE_READ);
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_READACC)
|
||||
{
|
||||
if (!(xmminfo & XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE))
|
||||
if (!(xmminfo & XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[XMMFPU_ACC] & EEINST_LASTUSE))
|
||||
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ);
|
||||
else
|
||||
mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ);
|
||||
mmregacc = _allocFPACCtoXMMreg(MODE_READ);
|
||||
}
|
||||
|
||||
if (xmminfo & XMMINFO_WRITEACC)
|
||||
|
@ -741,34 +424,28 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
|
|||
|
||||
if (mmregacc < 0)
|
||||
{
|
||||
if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)))
|
||||
if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && FPUINST_RENAMETEST(_Ft_))
|
||||
{
|
||||
if (FPUINST_ISLIVE(_Ft_))
|
||||
{
|
||||
_freeXMMreg(mmregt);
|
||||
info &= ~PROCESS_EE_MODEWRITET;
|
||||
}
|
||||
xmmregs[mmregt].inuse = 1;
|
||||
if (EE_WRITE_DEAD_VALUES && xmmregs[mmregt].mode & MODE_WRITE)
|
||||
_writebackXMMreg(mmregt);
|
||||
|
||||
xmmregs[mmregt].reg = 0;
|
||||
xmmregs[mmregt].mode = readacc;
|
||||
xmmregs[mmregt].type = XMMTYPE_FPACC;
|
||||
mmregacc = mmregt;
|
||||
}
|
||||
else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)))
|
||||
else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && FPUINST_RENAMETEST(_Fs_))
|
||||
{
|
||||
if (FPUINST_ISLIVE(_Fs_))
|
||||
{
|
||||
_freeXMMreg(mmregs);
|
||||
info &= ~PROCESS_EE_MODEWRITES;
|
||||
}
|
||||
xmmregs[mmregs].inuse = 1;
|
||||
if (EE_WRITE_DEAD_VALUES && xmmregs[mmregs].mode & MODE_WRITE)
|
||||
_writebackXMMreg(mmregs);
|
||||
|
||||
xmmregs[mmregs].reg = 0;
|
||||
xmmregs[mmregs].mode = readacc;
|
||||
xmmregs[mmregs].type = XMMTYPE_FPACC;
|
||||
mmregacc = mmregs;
|
||||
}
|
||||
else
|
||||
mmregacc = _allocFPACCtoXMMreg(-1, readacc);
|
||||
mmregacc = _allocFPACCtoXMMreg(readacc);
|
||||
}
|
||||
|
||||
xmmregs[mmregacc].mode |= MODE_WRITE;
|
||||
|
@ -778,48 +455,43 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
|
|||
// check for last used, if so don't alloc a new XMM reg
|
||||
int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
|
||||
if (xmminfo & XMMINFO_READD)
|
||||
mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
|
||||
mmregd = _allocFPtoXMMreg(_Fd_, readd);
|
||||
else
|
||||
mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd);
|
||||
|
||||
if (mmregd < 0)
|
||||
{
|
||||
if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)))
|
||||
if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && FPUINST_RENAMETEST(_Ft_))
|
||||
{
|
||||
if (FPUINST_ISLIVE(_Ft_))
|
||||
{
|
||||
_freeXMMreg(mmregt);
|
||||
info &= ~PROCESS_EE_MODEWRITET;
|
||||
}
|
||||
xmmregs[mmregt].inuse = 1;
|
||||
if (EE_WRITE_DEAD_VALUES && xmmregs[mmregt].mode & MODE_WRITE)
|
||||
_writebackXMMreg(mmregt);
|
||||
|
||||
xmmregs[mmregt].reg = _Fd_;
|
||||
xmmregs[mmregt].mode = readd;
|
||||
mmregd = mmregt;
|
||||
}
|
||||
else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)))
|
||||
else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && FPUINST_RENAMETEST(_Fs_))
|
||||
{
|
||||
if (FPUINST_ISLIVE(_Fs_))
|
||||
{
|
||||
_freeXMMreg(mmregs);
|
||||
info &= ~PROCESS_EE_MODEWRITES;
|
||||
}
|
||||
if (EE_WRITE_DEAD_VALUES && xmmregs[mmregs].mode & MODE_WRITE)
|
||||
_writebackXMMreg(mmregs);
|
||||
|
||||
xmmregs[mmregs].inuse = 1;
|
||||
xmmregs[mmregs].reg = _Fd_;
|
||||
xmmregs[mmregs].mode = readd;
|
||||
mmregd = mmregs;
|
||||
}
|
||||
else if ((xmminfo & XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)))
|
||||
else if ((xmminfo & XMMINFO_READACC) && mmregacc >= 0 && FPUINST_RENAMETEST(XMMFPU_ACC))
|
||||
{
|
||||
if (FPUINST_ISLIVE(XMMFPU_ACC))
|
||||
_freeXMMreg(mmregacc);
|
||||
xmmregs[mmregacc].inuse = 1;
|
||||
if (EE_WRITE_DEAD_VALUES && xmmregs[mmregacc].mode & MODE_WRITE)
|
||||
_writebackXMMreg(mmregacc);
|
||||
|
||||
xmmregs[mmregacc].reg = _Fd_;
|
||||
xmmregs[mmregacc].mode = readd;
|
||||
xmmregs[mmregacc].type = XMMTYPE_FPREG;
|
||||
mmregd = mmregacc;
|
||||
}
|
||||
else
|
||||
mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
|
||||
mmregd = _allocFPtoXMMreg(_Fd_, readd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -841,12 +513,12 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
|
|||
if (xmminfo & XMMINFO_READS)
|
||||
{
|
||||
if (mmregs >= 0)
|
||||
info |= PROCESS_EE_SET_S(mmregs) | PROCESS_EE_S;
|
||||
info |= PROCESS_EE_SET_S(mmregs);
|
||||
}
|
||||
if (xmminfo & XMMINFO_READT)
|
||||
{
|
||||
if (mmregt >= 0)
|
||||
info |= PROCESS_EE_SET_T(mmregt) | PROCESS_EE_T;
|
||||
info |= PROCESS_EE_SET_T(mmregt);
|
||||
}
|
||||
|
||||
// at least one must be in xmm
|
||||
|
@ -856,5 +528,4 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
|
|||
}
|
||||
|
||||
xmmcode(info);
|
||||
_clearNeededXMMregs();
|
||||
}
|
||||
|
|
|
@ -23,11 +23,36 @@
|
|||
#include "iR5900.h"
|
||||
#include "common/Perf.h"
|
||||
|
||||
//#define LOG_STORES
|
||||
|
||||
using namespace vtlb_private;
|
||||
using namespace x86Emitter;
|
||||
|
||||
// we need enough for a 32-bit jump forwards (5 bytes)
|
||||
static constexpr u32 LOADSTORE_PADDING = 5;
|
||||
|
||||
//#define LOG_STORES
|
||||
|
||||
static u32 GetAllocatedGPRBitmask()
|
||||
{
|
||||
u32 mask = 0;
|
||||
for (u32 i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if (x86regs[i].inuse)
|
||||
mask |= (1u << i);
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
static u32 GetAllocatedXMMBitmask()
|
||||
{
|
||||
u32 mask = 0;
|
||||
for (u32 i = 0; i < iREGCNT_XMM; i++)
|
||||
{
|
||||
if (xmmregs[i].inuse)
|
||||
mask |= (1u << i);
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
/*
|
||||
// Pseudo-Code For the following Dynarec Implementations -->
|
||||
|
||||
|
@ -112,18 +137,39 @@ namespace vtlb_private
|
|||
// Prepares eax, ecx, and, ebx for Direct or Indirect operations.
|
||||
// Returns the writeback pointer for ebx (return address from indirect handling)
|
||||
//
|
||||
static u32* DynGen_PrepRegs()
|
||||
static void DynGen_PrepRegs(int addr_reg, int value_reg, u32 sz, bool xmm)
|
||||
{
|
||||
// Warning dirty ebx (in case someone got the very bad idea to move this code)
|
||||
EE::Profiler.EmitMem();
|
||||
|
||||
_freeX86reg(arg1regd);
|
||||
xMOV(arg1regd, xRegister32(addr_reg));
|
||||
|
||||
if (value_reg >= 0)
|
||||
{
|
||||
if (sz == 128)
|
||||
{
|
||||
pxAssert(xmm);
|
||||
_freeXMMreg(xRegisterSSE::GetArgRegister(1, 0).GetId());
|
||||
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), xRegisterSSE::GetInstance(value_reg));
|
||||
}
|
||||
else if (xmm)
|
||||
{
|
||||
// 32bit xmms are passed in GPRs
|
||||
pxAssert(sz == 32);
|
||||
_freeX86reg(arg2regd);
|
||||
xMOVD(arg2regd, xRegisterSSE(value_reg));
|
||||
}
|
||||
else
|
||||
{
|
||||
_freeX86reg(arg2regd);
|
||||
xMOV(arg2reg, xRegister64(value_reg));
|
||||
}
|
||||
}
|
||||
|
||||
xMOV(eax, arg1regd);
|
||||
xSHR(eax, VTLB_PAGE_BITS);
|
||||
xMOV(rax, ptrNative[xComplexAddress(rbx, vtlbdata.vmap, rax * wordsize)]);
|
||||
u32* writeback = xLEA_Writeback(rbx);
|
||||
xMOV(rax, ptrNative[xComplexAddress(arg3reg, vtlbdata.vmap, rax * wordsize)]);
|
||||
xADD(arg1reg, rax);
|
||||
|
||||
return writeback;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -169,17 +215,14 @@ namespace vtlb_private
|
|||
// ------------------------------------------------------------------------
|
||||
static void DynGen_DirectWrite(u32 bits)
|
||||
{
|
||||
// TODO: x86Emitter can't use dil
|
||||
switch (bits)
|
||||
{
|
||||
//8 , 16, 32 : data on EDX
|
||||
case 8:
|
||||
xMOV(edx, arg2regd);
|
||||
xMOV(ptr[arg1reg], dl);
|
||||
xMOV(ptr[arg1reg], xRegister8(arg2regd));
|
||||
break;
|
||||
|
||||
case 16:
|
||||
xMOV(ptr[arg1reg], xRegister16(arg2reg));
|
||||
xMOV(ptr[arg1reg], xRegister16(arg2regd));
|
||||
break;
|
||||
|
||||
case 32:
|
||||
|
@ -229,7 +272,9 @@ static u8* GetIndirectDispatcherPtr(int mode, int operandsize, int sign = 0)
|
|||
// Generates a JS instruction that targets the appropriate templated instance of
|
||||
// the vtlb Indirect Dispatcher.
|
||||
//
|
||||
static void DynGen_IndirectDispatch(int mode, int bits, bool sign = false)
|
||||
|
||||
template <typename GenDirectFn>
|
||||
static void DynGen_HandlerTest(const GenDirectFn& gen_direct, int mode, int bits, bool sign = false)
|
||||
{
|
||||
int szidx = 0;
|
||||
switch (bits)
|
||||
|
@ -241,7 +286,12 @@ static void DynGen_IndirectDispatch(int mode, int bits, bool sign = false)
|
|||
case 128: szidx = 4; break;
|
||||
jNO_DEFAULT;
|
||||
}
|
||||
xJS(GetIndirectDispatcherPtr(mode, szidx, sign));
|
||||
xForwardJS8 to_handler;
|
||||
gen_direct();
|
||||
xForwardJump8 done;
|
||||
to_handler.SetTarget();
|
||||
xFastCall(GetIndirectDispatcherPtr(mode, szidx, sign));
|
||||
done.SetTarget();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -250,6 +300,13 @@ static void DynGen_IndirectDispatch(int mode, int bits, bool sign = false)
|
|||
// Out: eax: result (if mode < 64)
|
||||
static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
|
||||
{
|
||||
// fixup stack
|
||||
#ifdef _WIN32
|
||||
xSUB(rsp, 32 + 8);
|
||||
#else
|
||||
xSUB(rsp, 8);
|
||||
#endif
|
||||
|
||||
xMOVZX(eax, al);
|
||||
if (wordsize != 8)
|
||||
xSUB(arg1regd, 0x80000000);
|
||||
|
@ -291,7 +348,13 @@ static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
|
|||
}
|
||||
}
|
||||
|
||||
xJMP(rbx);
|
||||
#ifdef _WIN32
|
||||
xADD(rsp, 32 + 8);
|
||||
#else
|
||||
xADD(rsp, 8);
|
||||
#endif
|
||||
|
||||
xRET();
|
||||
}
|
||||
|
||||
// One-time initialization procedure. Multiple subsequent calls during the lifespan of the
|
||||
|
@ -342,65 +405,83 @@ static void vtlb_SetWriteback(u32* writeback)
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Dynarec Load Implementations
|
||||
int vtlb_DynGenReadQuad(u32 bits, int gpr)
|
||||
{
|
||||
pxAssume(bits == 128);
|
||||
|
||||
u32* writeback = DynGen_PrepRegs();
|
||||
|
||||
const int reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
|
||||
DynGen_IndirectDispatch(0, bits);
|
||||
DynGen_DirectRead(bits, false);
|
||||
|
||||
vtlb_SetWriteback(writeback); // return target for indirect's call/ret
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Recompiled input registers:
|
||||
// ecx - source address to read from
|
||||
// Returns read value in eax.
|
||||
void vtlb_DynGenReadNonQuad(u32 bits, bool sign)
|
||||
int vtlb_DynGenReadNonQuad(u32 bits, bool sign, bool xmm, int addr_reg, vtlb_ReadRegAllocCallback dest_reg_alloc)
|
||||
{
|
||||
pxAssume(bits <= 64);
|
||||
|
||||
u32* writeback = DynGen_PrepRegs();
|
||||
|
||||
DynGen_IndirectDispatch(0, bits, sign && bits < 64);
|
||||
DynGen_DirectRead(bits, sign);
|
||||
|
||||
vtlb_SetWriteback(writeback);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
|
||||
// recompiler if the TLB is changed.
|
||||
int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, int gpr)
|
||||
int x86_dest_reg;
|
||||
if (!CHECK_FASTMEM || vtlb_IsFaultingPC(pc))
|
||||
{
|
||||
pxAssert(bits == 128);
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
EE::Profiler.EmitConstMem(addr_const);
|
||||
DynGen_PrepRegs(addr_reg, -1, bits, xmm);
|
||||
DynGen_HandlerTest([bits, sign]() { DynGen_DirectRead(bits, sign); }, 0, bits, sign && bits < 64);
|
||||
|
||||
int reg;
|
||||
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
|
||||
if (!vmv.isHandler(addr_const))
|
||||
if (!xmm)
|
||||
{
|
||||
void* ppf = reinterpret_cast<void*>(vmv.assumePtr(addr_const));
|
||||
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, -1) : _allocGPRtoXMMreg(-1, gpr, MODE_WRITE);
|
||||
xMOVAPS(xRegisterSSE(reg), ptr128[ppf]);
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
|
||||
xMOV(xRegister64(x86_dest_reg), rax);
|
||||
}
|
||||
else
|
||||
{
|
||||
// has to: translate, find function, call function
|
||||
u32 paddr = vmv.assumeHandlerGetPAddr(addr_const);
|
||||
|
||||
const int szidx = 4;
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
|
||||
xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg);
|
||||
// we shouldn't be loading any FPRs which aren't 32bit..
|
||||
// we use MOVD here despite it being floating-point data, because we're going int->float reinterpret.
|
||||
pxAssert(bits == 32);
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
|
||||
xMOVDZX(xRegisterSSE(x86_dest_reg), eax);
|
||||
}
|
||||
return reg;
|
||||
|
||||
return x86_dest_reg;
|
||||
}
|
||||
|
||||
const u8* codeStart;
|
||||
const xAddressReg x86addr(addr_reg);
|
||||
if (!xmm)
|
||||
{
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
|
||||
codeStart = x86Ptr;
|
||||
const xRegister64 x86reg(x86_dest_reg);
|
||||
switch (bits)
|
||||
{
|
||||
case 8:
|
||||
sign ? xMOVSX(x86reg, ptr8[RFASTMEMBASE + x86addr]) : xMOVZX(xRegister32(x86reg), ptr8[RFASTMEMBASE + x86addr]);
|
||||
break;
|
||||
case 16:
|
||||
sign ? xMOVSX(x86reg, ptr16[RFASTMEMBASE + x86addr]) : xMOVZX(xRegister32(x86reg), ptr16[RFASTMEMBASE + x86addr]);
|
||||
break;
|
||||
case 32:
|
||||
sign ? xMOVSX(x86reg, ptr32[RFASTMEMBASE + x86addr]) : xMOV(xRegister32(x86reg), ptr32[RFASTMEMBASE + x86addr]);
|
||||
break;
|
||||
case 64:
|
||||
xMOV(x86reg, ptr64[RFASTMEMBASE + x86addr]);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pxAssert(bits == 32);
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
|
||||
codeStart = x86Ptr;
|
||||
const xRegisterSSE xmmreg(x86_dest_reg);
|
||||
xMOVSSZX(xmmreg, ptr32[RFASTMEMBASE + x86addr]);
|
||||
}
|
||||
|
||||
const u32 padding = LOADSTORE_PADDING - std::min<u32>(static_cast<u32>(x86Ptr - codeStart), 5);
|
||||
for (u32 i = 0; i < padding; i++)
|
||||
xNOP();
|
||||
|
||||
vtlb_AddLoadStoreInfo((uptr)codeStart, static_cast<u32>(x86Ptr - codeStart),
|
||||
pc, GetAllocatedGPRBitmask(), GetAllocatedXMMBitmask(),
|
||||
static_cast<u8>(addr_reg), static_cast<u8>(x86_dest_reg),
|
||||
static_cast<u8>(bits), sign, true, xmm);
|
||||
|
||||
return x86_dest_reg;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -411,43 +492,44 @@ int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, int gpr)
|
|||
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
|
||||
// recompiler if the TLB is changed.
|
||||
//
|
||||
void vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, u32 addr_const)
|
||||
int vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, bool xmm, u32 addr_const, vtlb_ReadRegAllocCallback dest_reg_alloc)
|
||||
{
|
||||
EE::Profiler.EmitConstMem(addr_const);
|
||||
|
||||
int x86_dest_reg;
|
||||
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
|
||||
if (!vmv.isHandler(addr_const))
|
||||
{
|
||||
auto ppf = vmv.assumePtr(addr_const);
|
||||
if (!xmm)
|
||||
{
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
|
||||
switch (bits)
|
||||
{
|
||||
case 8:
|
||||
if (sign)
|
||||
xMOVSX(rax, ptr8[(u8*)ppf]);
|
||||
else
|
||||
xMOVZX(rax, ptr8[(u8*)ppf]);
|
||||
sign ? xMOVSX(xRegister64(x86_dest_reg), ptr8[(u8*)ppf]) : xMOVZX(xRegister32(x86_dest_reg), ptr8[(u8*)ppf]);
|
||||
break;
|
||||
|
||||
case 16:
|
||||
if (sign)
|
||||
xMOVSX(rax, ptr16[(u16*)ppf]);
|
||||
else
|
||||
xMOVZX(rax, ptr16[(u16*)ppf]);
|
||||
sign ? xMOVSX(xRegister64(x86_dest_reg), ptr16[(u16*)ppf]) : xMOVZX(xRegister32(x86_dest_reg), ptr16[(u16*)ppf]);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
if (sign)
|
||||
xMOVSX(rax, ptr32[(u32*)ppf]);
|
||||
else
|
||||
xMOV(eax, ptr32[(u32*)ppf]);
|
||||
sign ? xMOVSX(xRegister64(x86_dest_reg), ptr32[(u32*)ppf]) : xMOV(xRegister32(x86_dest_reg), ptr32[(u32*)ppf]);
|
||||
break;
|
||||
|
||||
case 64:
|
||||
xMOV(rax, ptr64[(u64*)ppf]);
|
||||
xMOV(xRegister64(x86_dest_reg), ptr64[(u64*)ppf]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
|
||||
xMOVSSZX(xRegisterSSE(x86_dest_reg), ptr32[(float*)ppf]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// has to: translate, find function, call function
|
||||
u32 paddr = vmv.assumeHandlerGetPAddr(addr_const);
|
||||
|
@ -464,60 +546,157 @@ void vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, u32 addr_const)
|
|||
// Shortcut for the INTC_STAT register, which many games like to spin on heavily.
|
||||
if ((bits == 32) && !EmuConfig.Speedhacks.IntcStat && (paddr == INTC_STAT))
|
||||
{
|
||||
xMOV(eax, ptr[&psHu32(INTC_STAT)]);
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
|
||||
if (!xmm)
|
||||
{
|
||||
if (sign)
|
||||
xMOVSX(xRegister64(x86_dest_reg), ptr32[&psHu32(INTC_STAT)]);
|
||||
else
|
||||
xMOV(xRegister32(x86_dest_reg), ptr32[&psHu32(INTC_STAT)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOVDZX(xRegisterSSE(x86_dest_reg), ptr32[&psHu32(INTC_STAT)]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
xFastCall(vmv.assumeHandlerGetRaw(szidx, false), paddr);
|
||||
|
||||
// perform sign extension on the result:
|
||||
if (!xmm)
|
||||
{
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
|
||||
switch (bits)
|
||||
{
|
||||
// save REX prefix by using 32bit dest for zext
|
||||
case 8:
|
||||
sign ? xMOVSX(xRegister64(x86_dest_reg), al) : xMOVZX(xRegister32(x86_dest_reg), al);
|
||||
break;
|
||||
|
||||
if (bits == 8)
|
||||
{
|
||||
if (sign)
|
||||
xMOVSX(rax, al);
|
||||
case 16:
|
||||
sign ? xMOVSX(xRegister64(x86_dest_reg), ax) : xMOVZX(xRegister32(x86_dest_reg), ax);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
sign ? xMOVSX(xRegister64(x86_dest_reg), eax) : xMOV(xRegister32(x86_dest_reg), eax);
|
||||
break;
|
||||
|
||||
case 64:
|
||||
xMOV(xRegister64(x86_dest_reg), rax);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
xMOVZX(rax, al);
|
||||
}
|
||||
else if (bits == 16)
|
||||
{
|
||||
if (sign)
|
||||
xMOVSX(rax, ax);
|
||||
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
|
||||
xMOVDZX(xRegisterSSE(x86_dest_reg), eax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return x86_dest_reg;
|
||||
}
|
||||
|
||||
int vtlb_DynGenReadQuad(u32 bits, int addr_reg, vtlb_ReadRegAllocCallback dest_reg_alloc)
|
||||
{
|
||||
pxAssume(bits == 128);
|
||||
|
||||
if (!CHECK_FASTMEM || vtlb_IsFaultingPC(pc))
|
||||
{
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
DynGen_PrepRegs(arg1regd.GetId(), -1, bits, true);
|
||||
DynGen_HandlerTest([bits]() {DynGen_DirectRead(bits, false); }, 0, bits);
|
||||
|
||||
const int reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0); // Handler returns in xmm0
|
||||
if (reg >= 0)
|
||||
xMOVAPS(xRegisterSSE(reg), xmm0);
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
const int reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0); // Handler returns in xmm0
|
||||
const u8* codeStart = x86Ptr;
|
||||
|
||||
xMOVAPS(xRegisterSSE(reg), ptr128[RFASTMEMBASE + arg1reg]);
|
||||
|
||||
const u32 padding = LOADSTORE_PADDING - std::min<u32>(static_cast<u32>(x86Ptr - codeStart), 5);
|
||||
for (u32 i = 0; i < padding; i++)
|
||||
xNOP();
|
||||
|
||||
vtlb_AddLoadStoreInfo((uptr)codeStart, static_cast<u32>(x86Ptr - codeStart),
|
||||
pc, GetAllocatedGPRBitmask(), GetAllocatedXMMBitmask(),
|
||||
static_cast<u8>(arg1reg.GetId()), static_cast<u8>(reg),
|
||||
static_cast<u8>(bits), false, true, true);
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
|
||||
// recompiler if the TLB is changed.
|
||||
int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, vtlb_ReadRegAllocCallback dest_reg_alloc)
|
||||
{
|
||||
pxAssert(bits == 128);
|
||||
|
||||
EE::Profiler.EmitConstMem(addr_const);
|
||||
|
||||
int reg;
|
||||
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
|
||||
if (!vmv.isHandler(addr_const))
|
||||
{
|
||||
void* ppf = reinterpret_cast<void*>(vmv.assumePtr(addr_const));
|
||||
reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
|
||||
if (reg >= 0)
|
||||
xMOVAPS(xRegisterSSE(reg), ptr128[ppf]);
|
||||
}
|
||||
else
|
||||
xMOVZX(rax, ax);
|
||||
}
|
||||
else if (bits == 32)
|
||||
{
|
||||
if (sign)
|
||||
xCDQE();
|
||||
}
|
||||
}
|
||||
// has to: translate, find function, call function
|
||||
u32 paddr = vmv.assumeHandlerGetPAddr(addr_const);
|
||||
|
||||
const int szidx = 4;
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr);
|
||||
|
||||
reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
|
||||
xMOVAPS(xRegisterSSE(reg), xmm0);
|
||||
}
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Dynarec Store Implementations
|
||||
|
||||
void vtlb_DynGenWrite(u32 sz)
|
||||
void vtlb_DynGenWrite(u32 sz, bool xmm, int addr_reg, int value_reg)
|
||||
{
|
||||
#ifdef LOG_STORES
|
||||
//if (sz != 128)
|
||||
//if (!xmm)
|
||||
{
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
xPUSH(xRegister64(addr_reg));
|
||||
xPUSH(xRegister64(value_reg));
|
||||
xPUSH(arg1reg);
|
||||
xPUSH(arg2reg);
|
||||
if (sz == 128)
|
||||
xMOV(arg1regd, xRegister32(addr_reg));
|
||||
if (xmm)
|
||||
{
|
||||
xSUB(rsp, 32 + 32);
|
||||
xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetArgRegister(1, 0));
|
||||
xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetInstance(value_reg));
|
||||
xMOVAPS(ptr[rsp + 48], xRegisterSSE::GetArgRegister(1, 0));
|
||||
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), xRegisterSSE::GetInstance(value_reg));
|
||||
xFastCall((void*)LogWriteQuad);
|
||||
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 32]);
|
||||
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 48]);
|
||||
xMOVAPS(xRegisterSSE::GetInstance(value_reg), ptr[rsp + 32]);
|
||||
xADD(rsp, 32 + 32);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(arg2reg, xRegister64(value_reg));
|
||||
if (sz == 8)
|
||||
xAND(arg2regd, 0xFF);
|
||||
else if (sz == 16)
|
||||
|
@ -530,15 +709,67 @@ void vtlb_DynGenWrite(u32 sz)
|
|||
}
|
||||
xPOP(arg2reg);
|
||||
xPOP(arg1reg);
|
||||
xPOP(xRegister64(value_reg));
|
||||
xPOP(xRegister64(addr_reg));
|
||||
}
|
||||
#endif
|
||||
|
||||
u32* writeback = DynGen_PrepRegs();
|
||||
if (!CHECK_FASTMEM || vtlb_IsFaultingPC(pc))
|
||||
{
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
DynGen_IndirectDispatch(1, sz);
|
||||
DynGen_DirectWrite(sz);
|
||||
DynGen_PrepRegs(addr_reg, value_reg, sz, xmm);
|
||||
DynGen_HandlerTest([sz]() { DynGen_DirectWrite(sz); }, 1, sz);
|
||||
return;
|
||||
}
|
||||
|
||||
vtlb_SetWriteback(writeback);
|
||||
const u8* codeStart = x86Ptr;
|
||||
|
||||
const xAddressReg vaddr_reg(addr_reg);
|
||||
if (!xmm)
|
||||
{
|
||||
switch (sz)
|
||||
{
|
||||
case 8:
|
||||
xMOV(ptr8[RFASTMEMBASE + vaddr_reg], xRegister8(xRegister32(value_reg)));
|
||||
break;
|
||||
case 16:
|
||||
xMOV(ptr16[RFASTMEMBASE + vaddr_reg], xRegister16(value_reg));
|
||||
break;
|
||||
case 32:
|
||||
xMOV(ptr32[RFASTMEMBASE + vaddr_reg], xRegister32(value_reg));
|
||||
break;
|
||||
case 64:
|
||||
xMOV(ptr64[RFASTMEMBASE + vaddr_reg], xRegister64(value_reg));
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
pxAssert(sz == 32 || sz == 128);
|
||||
switch (sz)
|
||||
{
|
||||
case 32:
|
||||
xMOVSS(ptr32[RFASTMEMBASE + vaddr_reg], xRegisterSSE(value_reg));
|
||||
break;
|
||||
case 128:
|
||||
xMOVAPS(ptr128[RFASTMEMBASE + vaddr_reg], xRegisterSSE(value_reg));
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
}
|
||||
|
||||
const u32 padding = LOADSTORE_PADDING - std::min<u32>(static_cast<u32>(x86Ptr - codeStart), 5);
|
||||
for (u32 i = 0; i < padding; i++)
|
||||
xNOP();
|
||||
|
||||
vtlb_AddLoadStoreInfo((uptr)codeStart, static_cast<u32>(x86Ptr - codeStart),
|
||||
pc, GetAllocatedGPRBitmask(), GetAllocatedXMMBitmask(),
|
||||
static_cast<u8>(addr_reg), static_cast<u8>(value_reg),
|
||||
static_cast<u8>(sz), false, false, xmm);
|
||||
}
|
||||
|
||||
|
||||
|
@ -546,28 +777,34 @@ void vtlb_DynGenWrite(u32 sz)
|
|||
// Generates code for a store instruction, where the address is a known constant.
|
||||
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
|
||||
// recompiler if the TLB is changed.
|
||||
void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const)
|
||||
void vtlb_DynGenWrite_Const(u32 bits, bool xmm, u32 addr_const, int value_reg)
|
||||
{
|
||||
EE::Profiler.EmitConstMem(addr_const);
|
||||
|
||||
#ifdef LOG_STORES
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
//if (bits != 128)
|
||||
//if (!xmm)
|
||||
{
|
||||
xPUSH(xRegister64(value_reg));
|
||||
xPUSH(xRegister64(value_reg));
|
||||
xPUSH(arg1reg);
|
||||
xPUSH(arg2reg);
|
||||
xMOV(arg1reg, addr_const);
|
||||
if (bits == 128)
|
||||
if (xmm)
|
||||
{
|
||||
xSUB(rsp, 32 + 32);
|
||||
xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetArgRegister(1, 0));
|
||||
xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetInstance(value_reg));
|
||||
xMOVAPS(ptr[rsp + 48], xRegisterSSE::GetArgRegister(1, 0));
|
||||
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), xRegisterSSE::GetInstance(value_reg));
|
||||
xFastCall((void*)LogWriteQuad);
|
||||
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 32]);
|
||||
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 48]);
|
||||
xMOVAPS(xRegisterSSE::GetInstance(value_reg), ptr[rsp + 32]);
|
||||
xADD(rsp, 32 + 32);
|
||||
}
|
||||
else
|
||||
{
|
||||
xMOV(arg2reg, xRegister64(value_reg));
|
||||
if (bits == 8)
|
||||
xAND(arg2regd, 0xFF);
|
||||
else if (bits == 16)
|
||||
|
@ -580,37 +817,52 @@ void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const)
|
|||
}
|
||||
xPOP(arg2reg);
|
||||
xPOP(arg1reg);
|
||||
xPOP(xRegister64(value_reg));
|
||||
xPOP(xRegister64(value_reg));
|
||||
}
|
||||
#endif
|
||||
|
||||
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
|
||||
if (!vmv.isHandler(addr_const))
|
||||
{
|
||||
// TODO: x86Emitter can't use dil
|
||||
auto ppf = vmv.assumePtr(addr_const);
|
||||
if (!xmm)
|
||||
{
|
||||
switch (bits)
|
||||
{
|
||||
//8 , 16, 32 : data on arg2
|
||||
case 8:
|
||||
xMOV(edx, arg2regd);
|
||||
xMOV(ptr[(void*)ppf], dl);
|
||||
xMOV(ptr[(void*)ppf], xRegister8(xRegister32(value_reg)));
|
||||
break;
|
||||
|
||||
case 16:
|
||||
xMOV(ptr[(void*)ppf], xRegister16(arg2reg));
|
||||
xMOV(ptr[(void*)ppf], xRegister16(value_reg));
|
||||
break;
|
||||
|
||||
case 32:
|
||||
xMOV(ptr[(void*)ppf], arg2regd);
|
||||
xMOV(ptr[(void*)ppf], xRegister32(value_reg));
|
||||
break;
|
||||
|
||||
case 64:
|
||||
xMOV(ptr64[(void*)ppf], arg2reg);
|
||||
xMOV(ptr64[(void*)ppf], xRegister64(value_reg));
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (bits)
|
||||
{
|
||||
case 32:
|
||||
xMOVSS(ptr[(void*)ppf], xRegisterSSE(value_reg));
|
||||
break;
|
||||
|
||||
case 128:
|
||||
xMOVAPS(ptr128[(void*)ppf], xRegisterSSE::GetArgRegister(1, 0));
|
||||
xMOVAPS(ptr128[(void*)ppf], xRegisterSSE(value_reg));
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -621,15 +873,47 @@ void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const)
|
|||
int szidx = 0;
|
||||
switch (bits)
|
||||
{
|
||||
case 8: szidx=0; break;
|
||||
case 16: szidx=1; break;
|
||||
case 32: szidx=2; break;
|
||||
case 64: szidx=3; break;
|
||||
case 128: szidx=4; break;
|
||||
case 8:
|
||||
szidx = 0;
|
||||
break;
|
||||
case 16:
|
||||
szidx = 1;
|
||||
break;
|
||||
case 32:
|
||||
szidx = 2;
|
||||
break;
|
||||
case 64:
|
||||
szidx = 3;
|
||||
break;
|
||||
case 128:
|
||||
szidx = 4;
|
||||
break;
|
||||
}
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
xFastCall(vmv.assumeHandlerGetRaw(szidx, true), paddr);
|
||||
|
||||
_freeX86reg(arg1regd);
|
||||
xMOV(arg1regd, paddr);
|
||||
if (bits == 128)
|
||||
{
|
||||
pxAssert(xmm);
|
||||
const xRegisterSSE argreg(xRegisterSSE::GetArgRegister(1, 0));
|
||||
_freeXMMreg(argreg.GetId());
|
||||
xMOVAPS(argreg, xRegisterSSE(value_reg));
|
||||
}
|
||||
else if (xmm)
|
||||
{
|
||||
pxAssert(bits == 32);
|
||||
_freeX86reg(arg2regd);
|
||||
xMOVD(arg2regd, xRegisterSSE(value_reg));
|
||||
}
|
||||
else
|
||||
{
|
||||
_freeX86reg(arg2regd);
|
||||
xMOV(arg2reg, xRegister64(value_reg));
|
||||
}
|
||||
|
||||
xFastCall(vmv.assumeHandlerGetRaw(szidx, true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -649,3 +933,156 @@ void vtlb_DynV2P()
|
|||
|
||||
xOR(eax, ecx);
|
||||
}
|
||||
|
||||
void vtlb_DynBackpatchLoadStore(uptr code_address, u32 code_size, u32 guest_pc, u32 guest_addr,
|
||||
u32 gpr_bitmask, u32 fpr_bitmask, u8 address_register, u8 data_register,
|
||||
u8 size_in_bits, bool is_signed, bool is_load, bool is_xmm)
|
||||
{
|
||||
static constexpr u32 GPR_SIZE = 8;
|
||||
static constexpr u32 XMM_SIZE = 16;
|
||||
|
||||
// on win32, we need to reserve an additional 32 bytes shadow space when calling out to C
|
||||
#ifdef _WIN32
|
||||
static constexpr u32 SHADOW_SIZE = 32;
|
||||
#else
|
||||
static constexpr u32 SHADOW_SIZE = 0;
|
||||
#endif
|
||||
|
||||
DevCon.WriteLn("Backpatching %s at %p[%u] (pc %08X vaddr %08X): Bitmask %08X %08X Addr %u Data %u Size %u Flags %02X %02X",
|
||||
is_load ? "load" : "store", (void*)code_address, code_size, guest_pc, guest_addr, gpr_bitmask, fpr_bitmask,
|
||||
address_register, data_register, size_in_bits, is_signed, is_load);
|
||||
|
||||
u8* thunk = recBeginThunk();
|
||||
|
||||
// save regs
|
||||
u32 num_gprs = 0;
|
||||
u32 num_fprs = 0;
|
||||
for (u32 i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if ((gpr_bitmask & (1u << i)) && (i == rbx.GetId() || i == arg1reg.GetId() || i == arg2reg.GetId() || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
|
||||
num_gprs++;
|
||||
}
|
||||
for (u32 i = 0; i < iREGCNT_XMM; i++)
|
||||
{
|
||||
if (fpr_bitmask & (1u << i) && xRegisterSSE::IsCallerSaved(i) && (!is_load || !is_xmm || data_register != i))
|
||||
num_fprs++;
|
||||
}
|
||||
|
||||
const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE) + (num_fprs * XMM_SIZE) + SHADOW_SIZE;
|
||||
const u32 arg1id = static_cast<u32>(arg1reg.GetId());
|
||||
const u32 arg2id = static_cast<u32>(arg2reg.GetId());
|
||||
const u32 arg3id = static_cast<u32>(arg3reg.GetId());
|
||||
|
||||
if (stack_size > 0)
|
||||
{
|
||||
xSUB(rsp, stack_size);
|
||||
|
||||
u32 stack_offset = SHADOW_SIZE;
|
||||
for (u32 i = 0; i < iREGCNT_XMM; i++)
|
||||
{
|
||||
if (fpr_bitmask & (1u << i) && xRegisterSSE::IsCallerSaved(i) && (!is_load || !is_xmm || data_register != i))
|
||||
{
|
||||
xMOVAPS(ptr128[rsp + stack_offset], xRegisterSSE(i));
|
||||
stack_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if ((gpr_bitmask & (1u << i)) && (i == arg1id || i == arg2id || i == arg3id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
|
||||
{
|
||||
xMOV(ptr64[rsp + stack_offset], xRegister64(i));
|
||||
stack_offset += GPR_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_load)
|
||||
{
|
||||
DynGen_PrepRegs(address_register, -1, size_in_bits, is_xmm);
|
||||
DynGen_HandlerTest([size_in_bits, is_signed]() {DynGen_DirectRead(size_in_bits, is_signed); }, 0, size_in_bits, is_signed && size_in_bits <= 32);
|
||||
|
||||
if (size_in_bits == 128)
|
||||
{
|
||||
if (data_register != xmm0.GetId())
|
||||
xMOVAPS(xRegisterSSE(data_register), xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_xmm)
|
||||
{
|
||||
xMOVDZX(xRegisterSSE(data_register), rax);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (data_register != eax.GetId())
|
||||
xMOV(xRegister64(data_register), rax);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (address_register != arg1reg.GetId())
|
||||
xMOV(arg1regd, xRegister32(address_register));
|
||||
|
||||
if (size_in_bits == 128)
|
||||
{
|
||||
const xRegisterSSE argreg(xRegisterSSE::GetArgRegister(1, 0));
|
||||
if (data_register != argreg.GetId())
|
||||
xMOVAPS(argreg, xRegisterSSE(data_register));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_xmm)
|
||||
{
|
||||
xMOVD(arg2reg, xRegisterSSE(data_register));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (data_register != arg2reg.GetId())
|
||||
xMOV(arg2reg, xRegister64(data_register));
|
||||
}
|
||||
}
|
||||
|
||||
DynGen_PrepRegs(address_register, data_register, size_in_bits, is_xmm);
|
||||
DynGen_HandlerTest([size_in_bits]() { DynGen_DirectWrite(size_in_bits); }, 1, size_in_bits);
|
||||
}
|
||||
|
||||
// restore regs
|
||||
if (stack_size > 0)
|
||||
{
|
||||
u32 stack_offset = SHADOW_SIZE;
|
||||
for (u32 i = 0; i < iREGCNT_XMM; i++)
|
||||
{
|
||||
if (fpr_bitmask & (1u << i) && xRegisterSSE::IsCallerSaved(i) && (!is_load || !is_xmm || data_register != i))
|
||||
{
|
||||
xMOVAPS(xRegisterSSE(i), ptr128[rsp + stack_offset]);
|
||||
stack_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < iREGCNT_GPR; i++)
|
||||
{
|
||||
if ((gpr_bitmask & (1u << i)) && (i == arg1id || i == arg2id || i == arg3id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
|
||||
{
|
||||
xMOV(xRegister64(i), ptr64[rsp + stack_offset]);
|
||||
stack_offset += GPR_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
xADD(rsp, stack_size);
|
||||
}
|
||||
|
||||
xJMP((void*)(code_address + code_size));
|
||||
|
||||
recEndThunk();
|
||||
|
||||
// backpatch to a jump to the slowmem handler
|
||||
x86Ptr = (u8*)code_address;
|
||||
xJMP(thunk);
|
||||
|
||||
// fill the rest of it with nops, if any
|
||||
pxAssertRel(static_cast<u32>((uptr)x86Ptr - code_address) <= code_size, "Overflowed when backpatching");
|
||||
for (u32 i = static_cast<u32>((uptr)x86Ptr - code_address); i < code_size; i++)
|
||||
xNOP();
|
||||
}
|
||||
|
|
|
@ -125,6 +125,7 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
|
|||
xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1);
|
||||
}
|
||||
|
||||
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
|
||||
|
||||
|
||||
|
@ -251,6 +252,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
|
|||
|
||||
if ((isEbit && isEbit != 3)) // Clear 'is busy' Flags
|
||||
{
|
||||
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
|
||||
if (!mVU.index || !THREAD_VU1)
|
||||
{
|
||||
|
@ -259,6 +261,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
|
|||
}
|
||||
else if(isEbit)
|
||||
{
|
||||
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -484,7 +484,9 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC)
|
|||
xForwardJGE32 skip;
|
||||
|
||||
mVUsavePipelineState(mVU);
|
||||
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
|
||||
|
||||
mVUendProgram(mVU, &mFC, 0);
|
||||
|
||||
skip.SetTarget();
|
||||
|
@ -801,6 +803,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
|
|||
}
|
||||
incPC(2);
|
||||
mVUsetupRange(mVU, xPC, false);
|
||||
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
|
||||
mVUendProgram(mVU, &mFC, 0);
|
||||
normBranchCompile(mVU, xPC);
|
||||
|
|
|
@ -215,6 +215,9 @@ struct microIR
|
|||
// Reg Alloc
|
||||
//------------------------------------------------------------------
|
||||
|
||||
//#define MVURALOG(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define MVURALOG(...)
|
||||
|
||||
struct microMapXMM
|
||||
{
|
||||
int VFreg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
|
||||
|
@ -231,6 +234,13 @@ protected:
|
|||
microMapXMM xmmMap[xmmTotal];
|
||||
int counter; // Current allocation count
|
||||
int index; // VU0 or VU1
|
||||
|
||||
// DO NOT REMOVE THIS.
|
||||
// This is here for a reason. MSVC likes to turn global writes into a load+conditional move+store.
|
||||
// That creates a race with the EE thread when we're compiling on the VU thread, even though
|
||||
// regAllocCOP2 is false. By adding another level of indirection, it emits a branch instead.
|
||||
_xmmregs* pxmmregs;
|
||||
|
||||
bool regAllocCOP2; // Local COP2 check
|
||||
|
||||
// Helper functions to get VU regs
|
||||
|
@ -260,11 +270,11 @@ protected:
|
|||
return -1;
|
||||
}
|
||||
|
||||
int findFreeReg()
|
||||
int findFreeReg(int vfreg)
|
||||
{
|
||||
if (regAllocCOP2)
|
||||
{
|
||||
return _freeXMMregsCOP2();
|
||||
return _allocVFtoXMMreg(vfreg, 0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < xmmTotal; i++)
|
||||
|
@ -289,12 +299,38 @@ public:
|
|||
// Fully resets the regalloc by clearing all cached data
|
||||
void reset(bool cop2mode)
|
||||
{
|
||||
// we run this at the of cop2, so don't free fprs
|
||||
regAllocCOP2 = false;
|
||||
|
||||
for (int i = 0; i < xmmTotal; i++)
|
||||
{
|
||||
clearReg(i);
|
||||
}
|
||||
counter = 0;
|
||||
regAllocCOP2 = cop2mode;
|
||||
pxmmregs = cop2mode ? xmmregs : nullptr;
|
||||
|
||||
if (cop2mode)
|
||||
{
|
||||
for (int i = 0; i < xmmTotal; i++)
|
||||
{
|
||||
if (!pxmmregs[i].inuse || pxmmregs[i].type != XMMTYPE_VFREG)
|
||||
continue;
|
||||
|
||||
// we shouldn't have any temp registers in here.. except for PQ, which
|
||||
// isn't allocated here yet.
|
||||
// pxAssertRel(fprregs[i].reg >= 0, "Valid full register preserved");
|
||||
if (pxmmregs[i].reg >= 0)
|
||||
{
|
||||
MVURALOG("Preserving VF reg %d in host reg %d across instruction\n", pxmmregs[i].reg, i);
|
||||
pxAssert(pxmmregs[i].reg != 255);
|
||||
pxmmregs[i].needed = false;
|
||||
xmmMap[i].isNeeded = false;
|
||||
xmmMap[i].VFreg = pxmmregs[i].reg;
|
||||
xmmMap[i].xyzw = ((pxmmregs[i].mode & MODE_WRITE) != 0) ? 0xf : 0x0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int getXmmCount()
|
||||
|
@ -314,6 +350,35 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void flushPartialForCOP2()
|
||||
{
|
||||
for (int i = 0; i < xmmTotal; i++)
|
||||
{
|
||||
microMapXMM& clear = xmmMap[i];
|
||||
|
||||
// toss away anything which is not a full cached register
|
||||
if (pxmmregs[i].inuse && pxmmregs[i].type == XMMTYPE_VFREG)
|
||||
{
|
||||
// Should've been done in clearNeeded()
|
||||
if (clear.xyzw != 0 && clear.xyzw != 0xf)
|
||||
writeBackReg(xRegisterSSE::GetInstance(i), false);
|
||||
|
||||
if (clear.VFreg <= 0)
|
||||
{
|
||||
// temps really shouldn't be here..
|
||||
_freeXMMreg(i);
|
||||
}
|
||||
}
|
||||
|
||||
// needed gets cleared in iCore.
|
||||
clear.VFreg = -1;
|
||||
clear.count = 0;
|
||||
clear.xyzw = 0;
|
||||
clear.isNeeded = 0;
|
||||
clear.isZero = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void TDwritebackAll(bool clearState = false)
|
||||
{
|
||||
for (int i = 0; i < xmmTotal; i++)
|
||||
|
@ -352,6 +417,12 @@ public:
|
|||
void clearReg(int regId)
|
||||
{
|
||||
microMapXMM& clear = xmmMap[regId];
|
||||
if (regAllocCOP2)
|
||||
{
|
||||
pxAssert(pxmmregs[regId].type == XMMTYPE_VFREG);
|
||||
pxmmregs[regId].inuse = false;
|
||||
}
|
||||
|
||||
clear.VFreg = -1;
|
||||
clear.count = 0;
|
||||
clear.xyzw = 0;
|
||||
|
@ -368,6 +439,24 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void clearRegCOP2(int xmmReg)
|
||||
{
|
||||
if (regAllocCOP2)
|
||||
clearReg(xmmReg);
|
||||
}
|
||||
|
||||
void updateCOP2AllocState(int rn)
|
||||
{
|
||||
if (!regAllocCOP2)
|
||||
return;
|
||||
|
||||
const bool dirty = (xmmMap[rn].VFreg > 0 && xmmMap[rn].xyzw != 0);
|
||||
pxAssert(pxmmregs[rn].type == XMMTYPE_VFREG);
|
||||
pxmmregs[rn].reg = xmmMap[rn].VFreg;
|
||||
pxmmregs[rn].mode = dirty ? (MODE_READ | MODE_WRITE) : MODE_READ;
|
||||
pxmmregs[rn].needed = xmmMap[rn].isNeeded;
|
||||
}
|
||||
|
||||
// Writes back modified reg to memory.
|
||||
// If all vectors modified, then keeps the VF reg cached in the xmm register.
|
||||
// If reg was not modified, then keeps the VF reg cached in the xmm register.
|
||||
|
@ -406,6 +495,7 @@ public:
|
|||
mapX.count = counter;
|
||||
mapX.xyzw = 0;
|
||||
mapX.isNeeded = false;
|
||||
updateCOP2AllocState(reg.Id);
|
||||
return;
|
||||
}
|
||||
clearReg(reg);
|
||||
|
@ -453,6 +543,7 @@ public:
|
|||
mapI.xyzw = 0xf;
|
||||
mapI.count = counter;
|
||||
mergeRegs = 2;
|
||||
updateCOP2AllocState(i);
|
||||
}
|
||||
else
|
||||
clearReg(i); // Clears when mergeRegs is 0 or 2
|
||||
|
@ -466,6 +557,12 @@ public:
|
|||
else
|
||||
clearReg(reg); // If Reg was temp or vf0, then invalidate itself
|
||||
}
|
||||
else if (regAllocCOP2 && clear.VFreg < 0)
|
||||
{
|
||||
// free on the EE side
|
||||
pxAssert(pxmmregs[reg.Id].type == XMMTYPE_VFREG);
|
||||
pxmmregs[reg.Id].inuse = false;
|
||||
}
|
||||
}
|
||||
|
||||
// vfLoadReg = VF reg to be loaded to the xmm register
|
||||
|
@ -495,7 +592,7 @@ public:
|
|||
{
|
||||
if (cloneWrite) // Clone Reg so as not to use the same Cached Reg
|
||||
{
|
||||
z = findFreeReg();
|
||||
z = findFreeReg(vfWriteReg);
|
||||
const xmm& xmmZ = xmm::GetInstance(z);
|
||||
writeBackReg(xmmZ);
|
||||
|
||||
|
@ -528,11 +625,13 @@ public:
|
|||
}
|
||||
xmmMap[z].count = counter;
|
||||
xmmMap[z].isNeeded = true;
|
||||
updateCOP2AllocState(z);
|
||||
|
||||
return xmm::GetInstance(z);
|
||||
}
|
||||
}
|
||||
}
|
||||
int x = findFreeReg();
|
||||
int x = findFreeReg((vfWriteReg >= 0) ? vfWriteReg : vfLoadReg);
|
||||
const xmm& xmmX = xmm::GetInstance(x);
|
||||
writeBackReg(xmmX);
|
||||
|
||||
|
@ -565,6 +664,7 @@ public:
|
|||
xmmMap[x].isZero = (vfLoadReg == 0);
|
||||
xmmMap[x].count = counter;
|
||||
xmmMap[x].isNeeded = true;
|
||||
updateCOP2AllocState(x);
|
||||
return xmmX;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -28,6 +28,10 @@ using namespace R5900::Dynarec;
|
|||
#define printCOP2(...) (void)0
|
||||
//#define printCOP2 DevCon.Status
|
||||
|
||||
// For now, we need to free all XMMs. Because we're not saving the nonvolatile registers when
|
||||
// we enter micro mode, they will get overriden otherwise...
|
||||
#define FLUSH_FOR_POSSIBLE_MICRO_EXEC (FLUSH_FREE_XMM | FLUSH_FREE_VU0)
|
||||
|
||||
void setupMacroOp(int mode, const char* opName)
|
||||
{
|
||||
// Set up reg allocation
|
||||
|
@ -96,8 +100,7 @@ void endMacroOp(int mode)
|
|||
xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ);
|
||||
}
|
||||
|
||||
microVU0.regAlloc->flushAll();
|
||||
_clearNeededCOP2Regs();
|
||||
microVU0.regAlloc->flushPartialForCOP2();
|
||||
|
||||
if (mode & 0x10)
|
||||
{
|
||||
|
@ -119,6 +122,11 @@ void endMacroOp(int mode)
|
|||
microVU0.regAlloc->reset(false);
|
||||
}
|
||||
|
||||
void mVUFreeCOP2XMMreg(int hostreg)
|
||||
{
|
||||
microVU0.regAlloc->clearRegCOP2(hostreg);
|
||||
}
|
||||
|
||||
#define REC_COP2_mVU0(f, opName, mode) \
|
||||
void recV##f() \
|
||||
{ \
|
||||
|
@ -142,13 +150,9 @@ void endMacroOp(int mode)
|
|||
#define INTERPRETATE_COP2_FUNC(f) \
|
||||
void recV##f() \
|
||||
{ \
|
||||
_freeX86reg(eax); \
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]); \
|
||||
xADD(eax, scaleblockcycles_clear()); \
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); \
|
||||
_cop2BackupRegs(); \
|
||||
iFlushCall(FLUSH_FOR_POSSIBLE_MICRO_EXEC); \
|
||||
xADD(ptr32[&cpuRegs.cycle], scaleblockcycles_clear()); \
|
||||
recCall(V##f); \
|
||||
_cop2RestoreRegs(); \
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
@ -303,13 +307,15 @@ INTERPRETATE_COP2_FUNC(CALLMSR);
|
|||
// Macro VU - Branches
|
||||
//------------------------------------------------------------------
|
||||
|
||||
void _setupBranchTest(u32*(jmpType)(u32), bool isLikely)
|
||||
static void _setupBranchTest(u32*(jmpType)(u32), bool isLikely)
|
||||
{
|
||||
printCOP2("COP2 Branch");
|
||||
_eeFlushAllUnused();
|
||||
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
|
||||
const bool swap = isLikely ? false : TrySwapDelaySlot(0, 0, 0);
|
||||
_eeFlushAllDirty();
|
||||
//xTEST(ptr32[&vif1Regs.stat._u32], 0x4);
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x100);
|
||||
recDoBranchImm(jmpType(0), isLikely);
|
||||
recDoBranchImm(branchTo, jmpType(0), isLikely, swap);
|
||||
}
|
||||
|
||||
void recBC2F() { _setupBranchTest(JNZ32, false); }
|
||||
|
@ -321,7 +327,7 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
|
|||
// Macro VU - COP2 Transfer Instructions
|
||||
//------------------------------------------------------------------
|
||||
|
||||
void COP2_Interlock(bool mBitSync)
|
||||
static void COP2_Interlock(bool mBitSync)
|
||||
{
|
||||
if (cpuRegs.code & 1)
|
||||
{
|
||||
|
@ -329,8 +335,9 @@ void COP2_Interlock(bool mBitSync)
|
|||
|
||||
// We can safely skip the _vu0FinishMicro() call, when there's nothing
|
||||
// that can trigger a VU0 program between CFC2/CTC2/COP2 instructions.
|
||||
if ((g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0_MICRO) || mBitSync)
|
||||
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
|
||||
{
|
||||
iFlushCall(FLUSH_FOR_POSSIBLE_MICRO_EXEC);
|
||||
_freeX86reg(eax);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
|
@ -338,10 +345,14 @@ void COP2_Interlock(bool mBitSync)
|
|||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
_cop2BackupRegs();
|
||||
if (mBitSync)
|
||||
{
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
|
||||
// Why do we check this here? Ratchet games, maybe others end up with flickering polygons
|
||||
// when we use lazy COP2 sync, otherwise. The micro resumption getting deferred an extra
|
||||
// EE block is apparently enough to cause issues.
|
||||
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, 4);
|
||||
xForwardJL32 skip;
|
||||
|
@ -354,18 +365,47 @@ void COP2_Interlock(bool mBitSync)
|
|||
}
|
||||
else
|
||||
xFastCall((void*)_vu0FinishMicro);
|
||||
_cop2RestoreRegs();
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex)
|
||||
static void mVUSyncVU0()
|
||||
{
|
||||
xTEST(eax, (vuIndex) ? 0x200 : 0x002);
|
||||
iFlushCall(FLUSH_FOR_POSSIBLE_MICRO_EXEC);
|
||||
_freeX86reg(eax);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, 4);
|
||||
xForwardJL32 skip;
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xMOV(arg2reg, s_nBlockInterlocked);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
|
||||
static void mVUFinishVU0()
|
||||
{
|
||||
iFlushCall(FLUSH_FOR_POSSIBLE_MICRO_EXEC);
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xFastCall((void*)_vu0FinishMicro);
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
|
||||
static void TEST_FBRST_RESET(int flagreg, FnType_Void* resetFunct, int vuIndex)
|
||||
{
|
||||
xTEST(xRegister32(flagreg), (vuIndex) ? 0x200 : 0x002);
|
||||
xForwardJZ8 skip;
|
||||
xFastCall((void*)resetFunct);
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
skip.SetTarget();
|
||||
}
|
||||
|
||||
|
@ -380,43 +420,20 @@ static void recCFC2()
|
|||
|
||||
if (!(cpuRegs.code & 1))
|
||||
{
|
||||
_freeX86reg(eax);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, 4);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xMOV(arg2reg, s_nBlockInterlocked);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
|
||||
mVUSyncVU0();
|
||||
else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
|
||||
mVUFinishVU0();
|
||||
}
|
||||
|
||||
_flushEEreg(_Rt_, true);
|
||||
|
||||
if (_Rd_ == REG_STATUS_FLAG) // Normalize Status Flag
|
||||
xMOV(eax, ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL]);
|
||||
else
|
||||
xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]);
|
||||
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
|
||||
pxAssert(!GPR_IS_CONST1(_Rt_));
|
||||
|
||||
// FixMe: Should R-Reg have upper 9 bits 0?
|
||||
if (_Rd_ >= 16)
|
||||
xCDQE(); // Sign Extend
|
||||
|
||||
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
|
||||
|
||||
// FixMe: I think this is needed, but not sure how it works
|
||||
// Update Refraction 20/09/2021: This is needed because Const Prop is broken
|
||||
// the Flushed flag isn't being cleared when it's not flushed. TODO I guess
|
||||
_eeOnWriteReg(_Rt_, 0);
|
||||
if (_Rd_ >= REG_STATUS_FLAG)
|
||||
xMOVSX(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
|
||||
else
|
||||
xMOV(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
|
||||
}
|
||||
|
||||
static void recCTC2()
|
||||
|
@ -430,28 +447,12 @@ static void recCTC2()
|
|||
|
||||
if (!(cpuRegs.code & 1))
|
||||
{
|
||||
_freeX86reg(eax);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, 4);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xMOV(arg2reg, s_nBlockInterlocked);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
|
||||
mVUSyncVU0();
|
||||
else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
|
||||
mVUFinishVU0();
|
||||
}
|
||||
|
||||
_flushEEreg(_Rt_);
|
||||
|
||||
switch (_Rd_)
|
||||
{
|
||||
case REG_MAC_FLAG:
|
||||
|
@ -459,7 +460,7 @@ static void recCTC2()
|
|||
case REG_VPU_STAT:
|
||||
break; // Read Only Regs
|
||||
case REG_R:
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
_eeMoveGPRtoR(eax, _Rt_);
|
||||
xAND(eax, 0x7FFFFF);
|
||||
xOR(eax, 0x3f800000);
|
||||
xMOV(ptr32[&vu0Regs.VI[REG_R].UL], eax);
|
||||
|
@ -468,7 +469,7 @@ static void recCTC2()
|
|||
{
|
||||
if (_Rt_)
|
||||
{
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
_eeMoveGPRtoR(eax, _Rt_);
|
||||
xAND(eax, 0xFC0);
|
||||
xAND(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], 0x3F);
|
||||
xOR(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax);
|
||||
|
@ -476,42 +477,44 @@ static void recCTC2()
|
|||
else
|
||||
xAND(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], 0x3F);
|
||||
|
||||
_freeXMMreg(xmmT1.Id);
|
||||
const int xmmtemp = _allocTempXMMreg(XMMT_INT);
|
||||
|
||||
//Need to update the sticky flags for microVU
|
||||
mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL);
|
||||
xMOVDZX(xmmT1, eax);
|
||||
xSHUF.PS(xmmT1, xmmT1, 0);
|
||||
xMOVDZX(xRegisterSSE(xmmtemp), eax); // TODO(Stenzek): This can be a broadcast.
|
||||
xSHUF.PS(xRegisterSSE(xmmtemp), xRegisterSSE(xmmtemp), 0);
|
||||
// Make sure the values are everywhere the need to be
|
||||
xMOVAPS(ptr128[&vu0Regs.micro_statusflags], xmmT1);
|
||||
xMOVAPS(ptr128[&vu0Regs.micro_statusflags], xRegisterSSE(xmmtemp));
|
||||
_freeXMMreg(xmmtemp);
|
||||
break;
|
||||
}
|
||||
case REG_CMSAR1: // Execute VU1 Micro SubRoutine
|
||||
_cop2BackupRegs();
|
||||
xMOV(ecx, 1);
|
||||
xFastCall((void*)vu1Finish, ecx);
|
||||
if (_Rt_)
|
||||
{
|
||||
xMOV(ecx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
}
|
||||
else
|
||||
xXOR(ecx, ecx);
|
||||
xFastCall((void*)vu1ExecMicro, ecx);
|
||||
_cop2RestoreRegs();
|
||||
iFlushCall(FLUSH_NONE);
|
||||
xMOV(arg1regd, 1);
|
||||
xFastCall((void*)vu1Finish);
|
||||
_eeMoveGPRtoR(arg1regd, _Rt_);
|
||||
iFlushCall(FLUSH_NONE);
|
||||
xFastCall((void*)vu1ExecMicro);
|
||||
break;
|
||||
case REG_FBRST:
|
||||
{
|
||||
if (!_Rt_)
|
||||
{
|
||||
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], 0);
|
||||
return;
|
||||
}
|
||||
else
|
||||
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
_cop2BackupRegs();
|
||||
TEST_FBRST_RESET(vu0ResetRegs, 0);
|
||||
TEST_FBRST_RESET(vu1ResetRegs, 1);
|
||||
_cop2RestoreRegs();
|
||||
xAND(eax, 0x0C0C);
|
||||
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], eax);
|
||||
|
||||
const int flagreg = _allocX86reg(X86TYPE_TEMP, 0, MODE_CALLEESAVED);
|
||||
_eeMoveGPRtoR(xRegister32(flagreg), _Rt_);
|
||||
|
||||
iFlushCall(FLUSH_FREE_VU0);
|
||||
TEST_FBRST_RESET(flagreg, vu0ResetRegs, 0);
|
||||
TEST_FBRST_RESET(flagreg, vu1ResetRegs, 1);
|
||||
|
||||
xAND(xRegister32(flagreg), 0x0C0C);
|
||||
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], xRegister32(flagreg));
|
||||
_freeX86reg(flagreg);
|
||||
}
|
||||
break;
|
||||
case 0:
|
||||
// Ignore writes to vi00.
|
||||
|
@ -521,6 +524,14 @@ static void recCTC2()
|
|||
// sVU's COP2 has a comment that "Donald Duck" needs this too...
|
||||
if (_Rd_ < REG_STATUS_FLAG)
|
||||
{
|
||||
// I isn't invalidated correctly yet, ideally we would move this to the XMM directly.
|
||||
if (_Rd_ == REG_I)
|
||||
{
|
||||
const int xmmreg = _checkXMMreg(XMMTYPE_VFREG, 33, 0);
|
||||
if (xmmreg >= 0)
|
||||
_freeXMMregWithoutWriteback(xmmreg);
|
||||
}
|
||||
|
||||
// Need to expand this out, because we want to write as 16 bits.
|
||||
_eeMoveGPRtoR(eax, _Rt_);
|
||||
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
|
||||
|
@ -545,32 +556,36 @@ static void recQMFC2()
|
|||
|
||||
if (!(cpuRegs.code & 1))
|
||||
{
|
||||
_freeX86reg(eax);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, 4);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xMOV(arg2reg, s_nBlockInterlocked);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
|
||||
mVUSyncVU0();
|
||||
else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
|
||||
mVUFinishVU0();
|
||||
}
|
||||
|
||||
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE);
|
||||
// Update Refraction 20/09/2021: This is needed because Const Prop is broken
|
||||
// the Flushed flag isn't being cleared when it's not flushed. TODO I guess
|
||||
_eeOnWriteReg(_Rt_, 0); // This is needed because Const Prop is broken
|
||||
const bool vf_used = COP2INST_USEDTEST(_Rd_);
|
||||
const int ftreg = _allocVFtoXMMreg(_Rd_, MODE_READ);
|
||||
_deleteEEreg128(_Rt_);
|
||||
|
||||
xMOVAPS(xRegisterSSE(rtreg), ptr128[&vu0Regs.VF[_Rd_]]);
|
||||
// const flag should've been cleared, but sanity check..
|
||||
pxAssert(!GPR_IS_CONST1(_Rt_));
|
||||
|
||||
if (vf_used)
|
||||
{
|
||||
// store direct to state if rt is not used
|
||||
const int rtreg = _allocIfUsedGPRtoXMM(_Rt_, MODE_WRITE);
|
||||
if (rtreg >= 0)
|
||||
xMOVAPS(xRegisterSSE(rtreg), xRegisterSSE(ftreg));
|
||||
else
|
||||
xMOVAPS(ptr128[&cpuRegs.GPR.r[_Rt_].UQ], xRegisterSSE(ftreg));
|
||||
|
||||
// don't cache vf00, microvu doesn't like it
|
||||
if (_Rd_ == 0)
|
||||
_freeXMMreg(ftreg);
|
||||
}
|
||||
else
|
||||
{
|
||||
_reallocateXMMreg(ftreg, XMMTYPE_GPRREG, _Rt_, MODE_WRITE, true);
|
||||
}
|
||||
}
|
||||
|
||||
static void recQMTC2()
|
||||
|
@ -583,29 +598,46 @@ static void recQMTC2()
|
|||
|
||||
if (!(cpuRegs.code & 1))
|
||||
{
|
||||
_freeX86reg(eax);
|
||||
xMOV(eax, ptr32[&cpuRegs.cycle]);
|
||||
xADD(eax, scaleblockcycles_clear());
|
||||
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
|
||||
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
xSUB(eax, ptr32[&VU0.cycle]);
|
||||
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
|
||||
xCMP(eax, 4);
|
||||
xForwardJL32 skip;
|
||||
_cop2BackupRegs();
|
||||
xLoadFarAddr(arg1reg, CpuVU0);
|
||||
xMOV(arg2reg, s_nBlockInterlocked);
|
||||
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
|
||||
_cop2RestoreRegs();
|
||||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
|
||||
mVUSyncVU0();
|
||||
else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
|
||||
mVUFinishVU0();
|
||||
}
|
||||
|
||||
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
|
||||
if (_Rt_)
|
||||
{
|
||||
// if we have to flush to memory anyway (has a constant or is x86), force load.
|
||||
const bool vf_used = COP2INST_USEDTEST(_Rd_);
|
||||
const bool can_rename = EEINST_RENAMETEST(_Rt_);
|
||||
const int rtreg = (GPR_IS_DIRTY_CONST(_Rt_) || _hasX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE)) ?
|
||||
_allocGPRtoXMMreg(_Rt_, MODE_READ) :
|
||||
_checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
|
||||
|
||||
xMOVAPS(ptr128[&vu0Regs.VF[_Rd_]], xRegisterSSE(rtreg));
|
||||
// NOTE: can't transfer xmm15 to VF, it's reserved for PQ.
|
||||
int vfreg = _checkXMMreg(XMMTYPE_VFREG, _Rd_, MODE_WRITE);
|
||||
if (can_rename && rtreg >= 0 && rtreg != xmmPQ.GetId())
|
||||
{
|
||||
// rt is no longer needed, so transfer to VF.
|
||||
if (vfreg >= 0)
|
||||
_freeXMMregWithoutWriteback(vfreg);
|
||||
_reallocateXMMreg(rtreg, XMMTYPE_VFREG, _Rd_, MODE_WRITE, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
// copy to VF.
|
||||
if (vfreg < 0)
|
||||
vfreg = _allocVFtoXMMreg(_Rd_, MODE_WRITE);
|
||||
if (rtreg >= 0)
|
||||
xMOVAPS(xRegisterSSE(vfreg), xRegisterSSE(rtreg));
|
||||
else
|
||||
xMOVAPS(xRegisterSSE(vfreg), ptr128[&cpuRegs.GPR.r[_Rt_].UQ]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const int vfreg = _allocVFtoXMMreg(_Rd_, MODE_WRITE);
|
||||
xPXOR(xRegisterSSE(vfreg), xRegisterSSE(vfreg));
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------
|
||||
|
@ -670,21 +702,101 @@ namespace R5900 {
|
|||
namespace Dynarec {
|
||||
namespace OpcodeImpl {
|
||||
void recCOP2() { recCOP2t[_Rs_](); }
|
||||
|
||||
#if defined(LOADSTORE_RECOMPILE) && defined(CP2_RECOMPILE)
|
||||
|
||||
/*********************************************************
|
||||
* Load and store for COP2 (VU0 unit) *
|
||||
* Format: OP rt, offset(base) *
|
||||
*********************************************************/
|
||||
|
||||
void recLQC2()
|
||||
{
|
||||
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
|
||||
mVUSyncVU0();
|
||||
else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
|
||||
mVUFinishVU0();
|
||||
|
||||
vtlb_ReadRegAllocCallback alloc_cb = nullptr;
|
||||
if (_Rt_)
|
||||
{
|
||||
// init regalloc after flush
|
||||
alloc_cb = []() { return _allocVFtoXMMreg(_Rt_, MODE_WRITE); };
|
||||
}
|
||||
|
||||
int xmmreg;
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
const u32 addr = (g_cpuConstRegs[_Rs_].UL[0] + _Imm_) & ~0xFu;
|
||||
xmmreg = vtlb_DynGenReadQuad_Const(128, addr, alloc_cb);
|
||||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(arg1regd, _Imm_);
|
||||
xAND(arg1regd, ~0xF);
|
||||
|
||||
xmmreg = vtlb_DynGenReadQuad(128, arg1regd.GetId(), alloc_cb);
|
||||
}
|
||||
|
||||
// toss away if loading to vf00
|
||||
if (!_Rt_)
|
||||
_freeXMMreg(xmmreg);
|
||||
|
||||
EE::Profiler.EmitOp(eeOpcode::LQC2);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
void recSQC2()
|
||||
{
|
||||
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
|
||||
mVUSyncVU0();
|
||||
else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
|
||||
mVUFinishVU0();
|
||||
|
||||
// vf00 has to be special cased here, because of the microvu temps...
|
||||
const int ftreg = _Rt_ ? _allocVFtoXMMreg(_Rt_, MODE_READ) : _allocTempXMMreg(XMMT_FPS);
|
||||
if (!_Rt_)
|
||||
xMOVAPS(xRegisterSSE(ftreg), ptr128[&vu0Regs.VF[0].F]);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
const u32 addr = (g_cpuConstRegs[_Rs_].UL[0] + _Imm_) & ~0xFu;
|
||||
vtlb_DynGenWrite_Const(128, true, addr, ftreg);
|
||||
}
|
||||
else
|
||||
{
|
||||
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||
if (_Imm_ != 0)
|
||||
xADD(arg1regd, _Imm_);
|
||||
xAND(arg1regd, ~0xF);
|
||||
|
||||
vtlb_DynGenWrite(128, true, arg1regd.GetId(), ftreg);
|
||||
}
|
||||
|
||||
if (!_Rt_)
|
||||
_freeXMMreg(ftreg);
|
||||
|
||||
EE::Profiler.EmitOp(eeOpcode::SQC2);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
REC_FUNC(LQC2);
|
||||
REC_FUNC(SQC2);
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace OpcodeImpl
|
||||
} // namespace Dynarec
|
||||
} // namespace R5900
|
||||
void recCOP2_BC2() { recCOP2_BC2t[_Rt_](); }
|
||||
void recCOP2_SPEC1()
|
||||
{
|
||||
if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0_MICRO)
|
||||
{
|
||||
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
|
||||
xForwardJZ32 skipvuidle;
|
||||
_cop2BackupRegs();
|
||||
xFastCall((void*)_vu0FinishMicro);
|
||||
_cop2RestoreRegs();
|
||||
skipvuidle.SetTarget();
|
||||
}
|
||||
if (g_pCurInstInfo->info & (EEINST_COP2_SYNC_VU0 | EEINST_COP2_FINISH_VU0))
|
||||
mVUFinishVU0();
|
||||
|
||||
recCOP2SPECIAL1t[_Funct_]();
|
||||
|
||||
|
|
Loading…
Reference in New Issue