EE Rec/IOP Rec: Rewrite large portions

- Add fastmem
 - Add delay slot swapping
 - Add COP2 sync elision
 - Add block analysis and use analysis
 - Add GPR register caching and renaming
This commit is contained in:
Connor McLaughlin 2022-10-29 13:39:19 +10:00 committed by refractionpcsx2
parent 56501e0811
commit 1ccddb92d4
52 changed files with 9111 additions and 5575 deletions

View File

@ -145,6 +145,41 @@ namespace HostSys
extern void UnmapSharedMemory(void* baseaddr, size_t size); extern void UnmapSharedMemory(void* baseaddr, size_t size);
} }
class SharedMemoryMappingArea
{
public:
static std::unique_ptr<SharedMemoryMappingArea> Create(size_t size);
~SharedMemoryMappingArea();
__fi size_t GetSize() const { return m_size; }
__fi size_t GetNumPages() const { return m_num_pages; }
__fi u8* BasePointer() const { return m_base_ptr; }
__fi u8* OffsetPointer(size_t offset) const { return m_base_ptr + offset; }
__fi u8* PagePointer(size_t page) const { return m_base_ptr + __pagesize * page; }
u8* Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode);
bool Unmap(void* map_base, size_t map_size);
private:
SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages);
u8* m_base_ptr;
size_t m_size;
size_t m_num_pages;
size_t m_num_mappings = 0;
#ifdef _WIN32
using PlaceholderMap = std::map<size_t, size_t>;
PlaceholderMap::iterator FindPlaceholder(size_t page);
PlaceholderMap m_placeholder_ranges;
#endif
};
// Safe version of Munmap -- NULLs the pointer variable immediately after free'ing it. // Safe version of Munmap -- NULLs the pointer variable immediately after free'ing it.
#define SafeSysMunmap(ptr, size) \ #define SafeSysMunmap(ptr, size) \
((void)(HostSys::Munmap(ptr, size), (ptr) = 0)) ((void)(HostSys::Munmap(ptr, size), (ptr) = 0))

View File

@ -23,6 +23,7 @@
#include "fmt/core.h" #include "fmt/core.h"
#include "common/Align.h"
#include "common/PageFaultSource.h" #include "common/PageFaultSource.h"
#include "common/Assertions.h" #include "common/Assertions.h"
#include "common/Console.h" #include "common/Console.h"
@ -34,12 +35,26 @@
#define MAP_ANONYMOUS MAP_ANON #define MAP_ANONYMOUS MAP_ANON
#endif #endif
#include <cerrno>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#ifndef __APPLE__
#include <ucontext.h>
#endif
extern void SignalExit(int sig); extern void SignalExit(int sig);
static const uptr m_pagemask = getpagesize() - 1; static const uptr m_pagemask = getpagesize() - 1;
static struct sigaction s_old_sigsegv_action;
#if defined(__APPLE__)
static struct sigaction s_old_sigbus_action;
#endif
// Linux implementation of SIGSEGV handler. Bind it using sigaction(). // Linux implementation of SIGSEGV handler. Bind it using sigaction().
static void SysPageFaultSignalFilter(int signal, siginfo_t* siginfo, void*) static void SysPageFaultSignalFilter(int signal, siginfo_t* siginfo, void* ctx)
{ {
// [TODO] : Add a thread ID filter to the Linux Signal handler here. // [TODO] : Add a thread ID filter to the Linux Signal handler here.
// Rationale: On windows, the __try/__except model allows per-thread specific behavior // Rationale: On windows, the __try/__except model allows per-thread specific behavior
@ -57,13 +72,20 @@ static void SysPageFaultSignalFilter(int signal, siginfo_t* siginfo, void*)
// Note: Use of stdio functions isn't safe here. Avoid console logs, // Note: Use of stdio functions isn't safe here. Avoid console logs,
// assertions, file logs, or just about anything else useful. // assertions, file logs, or just about anything else useful.
#if defined(__APPLE__) && defined(__x86_64__)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext->__ss.__rip);
#elif defined(__x86_64__)
void* const exception_pc = reinterpret_cast<void*>(static_cast<ucontext_t*>(ctx)->uc_mcontext.gregs[REG_RIP]);
#else
void* const exception_pc = nullptr;
#endif
// Note: This signal can be accessed by the EE or MTVU thread // Note: This signal can be accessed by the EE or MTVU thread
// Source_PageFault is a global variable with its own state information // Source_PageFault is a global variable with its own state information
// so for now we lock this exception code unless someone can fix this better... // so for now we lock this exception code unless someone can fix this better...
std::unique_lock lock(PageFault_Mutex); std::unique_lock lock(PageFault_Mutex);
Source_PageFault->Dispatch(PageFaultInfo((uptr)siginfo->si_addr & ~m_pagemask)); Source_PageFault->Dispatch(PageFaultInfo((uptr)exception_pc, (uptr)siginfo->si_addr & ~m_pagemask));
// resumes execution right where we left off (re-executes instruction that // resumes execution right where we left off (re-executes instruction that
// caused the SIGSEGV). // caused the SIGSEGV).
@ -89,11 +111,11 @@ void _platform_InstallSignalHandler()
sigemptyset(&sa.sa_mask); sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_SIGINFO; sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = SysPageFaultSignalFilter; sa.sa_sigaction = SysPageFaultSignalFilter;
#ifdef __APPLE__ #if defined(__APPLE__)
// MacOS uses SIGBUS for memory permission violations // MacOS uses SIGBUS for memory permission violations
sigaction(SIGBUS, &sa, NULL); sigaction(SIGBUS, &sa, &s_old_sigbus_action);
#else #else
sigaction(SIGSEGV, &sa, NULL); sigaction(SIGSEGV, &sa, &s_old_sigsegv_action);
#endif #endif
} }
@ -210,4 +232,56 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
pxFailRel("Failed to unmap shared memory"); pxFailRel("Failed to unmap shared memory");
} }
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
: m_base_ptr(base_ptr)
, m_size(size)
, m_num_pages(num_pages)
{
}
SharedMemoryMappingArea::~SharedMemoryMappingArea()
{
pxAssertRel(m_num_mappings == 0, "No mappings left");
if (munmap(m_base_ptr, m_size) != 0)
pxFailRel("Failed to release shared memory area");
}
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
{
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
void* alloc = mmap(nullptr, size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (alloc == MAP_FAILED)
return nullptr;
return std::unique_ptr<SharedMemoryMappingArea>(new SharedMemoryMappingArea(static_cast<u8*>(alloc), size, size / __pagesize));
}
u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode)
{
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
const uint lnxmode = LinuxProt(mode);
void* const ptr = mmap(map_base, map_size, lnxmode, MAP_SHARED | MAP_FIXED,
static_cast<int>(reinterpret_cast<intptr_t>(file_handle)), static_cast<off_t>(file_offset));
if (ptr == MAP_FAILED)
return nullptr;
m_num_mappings++;
return static_cast<u8*>(ptr);
}
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
{
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
if (mmap(map_base, map_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) == MAP_FAILED)
return false;
m_num_mappings--;
return true;
}
#endif #endif

View File

@ -34,10 +34,12 @@
struct PageFaultInfo struct PageFaultInfo
{ {
uptr pc;
uptr addr; uptr addr;
PageFaultInfo(uptr address) PageFaultInfo(uptr pc_, uptr address)
{ {
pc = pc_;
addr = address; addr = address;
} }
}; };

View File

@ -24,14 +24,8 @@
#define NOMINMAX #define NOMINMAX
#endif #endif
// Qt build requires Windows 10+, WX Windows 8.1+. // We require Windows 10+.
#ifndef _WIN32_WINNT
#ifdef PCSX2_CORE
#define _WIN32_WINNT 0x0A00 // Windows 10 #define _WIN32_WINNT 0x0A00 // Windows 10
#else
#define _WIN32_WINNT 0x0603 // Windows 8.1
#endif
#endif
#include <windows.h> #include <windows.h>
#include <VersionHelpers.h> #include <VersionHelpers.h>

View File

@ -24,16 +24,24 @@
#include "common/AlignedMalloc.h" #include "common/AlignedMalloc.h"
#include "fmt/core.h" #include "fmt/core.h"
#include "fmt/format.h"
static long DoSysPageFaultExceptionFilter(EXCEPTION_POINTERS* eps) static long DoSysPageFaultExceptionFilter(EXCEPTION_POINTERS* eps)
{ {
if (eps->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) if (eps->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION)
return EXCEPTION_CONTINUE_SEARCH; return EXCEPTION_CONTINUE_SEARCH;
#if defined(_M_AMD64)
void* const exception_pc = reinterpret_cast<void*>(eps->ContextRecord->Rip);
#else
void* const exception_pc = nullptr;
#endif
// Note: This exception can be accessed by the EE or MTVU thread // Note: This exception can be accessed by the EE or MTVU thread
// Source_PageFault is a global variable with its own state information // Source_PageFault is a global variable with its own state information
// so for now we lock this exception code unless someone can fix this better... // so for now we lock this exception code unless someone can fix this better...
std::unique_lock lock(PageFault_Mutex); std::unique_lock lock(PageFault_Mutex);
Source_PageFault->Dispatch(PageFaultInfo((uptr)eps->ExceptionRecord->ExceptionInformation[1])); Source_PageFault->Dispatch(PageFaultInfo((uptr)exception_pc, (uptr)eps->ExceptionRecord->ExceptionInformation[1]));
return Source_PageFault->WasHandled() ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH; return Source_PageFault->WasHandled() ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH;
} }
@ -148,4 +156,185 @@ void HostSys::UnmapSharedMemory(void* baseaddr, size_t size)
pxFail("Failed to unmap shared memory"); pxFail("Failed to unmap shared memory");
} }
SharedMemoryMappingArea::SharedMemoryMappingArea(u8* base_ptr, size_t size, size_t num_pages)
: m_base_ptr(base_ptr)
, m_size(size)
, m_num_pages(num_pages)
{
m_placeholder_ranges.emplace(0, size);
}
SharedMemoryMappingArea::~SharedMemoryMappingArea()
{
pxAssertRel(m_num_mappings == 0, "No mappings left");
// hopefully this will be okay, and we don't need to coalesce all the placeholders...
if (!VirtualFreeEx(GetCurrentProcess(), m_base_ptr, 0, MEM_RELEASE))
pxFailRel("Failed to release shared memory area");
}
SharedMemoryMappingArea::PlaceholderMap::iterator SharedMemoryMappingArea::FindPlaceholder(size_t offset)
{
if (m_placeholder_ranges.empty())
return m_placeholder_ranges.end();
// this will give us an iterator equal or after page
auto it = m_placeholder_ranges.lower_bound(offset);
if (it == m_placeholder_ranges.end())
{
// check the last page
it = (++m_placeholder_ranges.rbegin()).base();
}
// it's the one we found?
if (offset >= it->first && offset < it->second)
return it;
// otherwise try the one before
if (it == m_placeholder_ranges.begin())
return m_placeholder_ranges.end();
--it;
if (offset >= it->first && offset < it->second)
return it;
else
return m_placeholder_ranges.end();
}
std::unique_ptr<SharedMemoryMappingArea> SharedMemoryMappingArea::Create(size_t size)
{
pxAssertRel(Common::IsAlignedPow2(size, __pagesize), "Size is page aligned");
void* alloc = VirtualAlloc2(GetCurrentProcess(), nullptr, size, MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS, nullptr, 0);
if (!alloc)
return nullptr;
return std::unique_ptr<SharedMemoryMappingArea>(new SharedMemoryMappingArea(static_cast<u8*>(alloc), size, size / __pagesize));
}
u8* SharedMemoryMappingArea::Map(void* file_handle, size_t file_offset, void* map_base, size_t map_size, const PageProtectionMode& mode)
{
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
const size_t map_offset = static_cast<u8*>(map_base) - m_base_ptr;
pxAssert(Common::IsAlignedPow2(map_offset, __pagesize));
pxAssert(Common::IsAlignedPow2(map_size, __pagesize));
// should be a placeholder. unless there's some other mapping we didn't free.
PlaceholderMap::iterator phit = FindPlaceholder(map_offset);
pxAssertMsg(phit != m_placeholder_ranges.end(), "Page we're mapping is a placeholder");
pxAssertMsg(map_offset >= phit->first && map_offset < phit->second, "Page is in returned placeholder range");
pxAssertMsg((map_offset + map_size) <= phit->second, "Page range is in returned placeholder range");
// do we need to split to the left? (i.e. is there a placeholder before this range)
const size_t old_ph_end = phit->second;
if (map_offset != phit->first)
{
phit->second = map_offset;
// split it (i.e. left..start and start..end are now separated)
if (!VirtualFreeEx(GetCurrentProcess(), OffsetPointer(phit->first),
(map_offset - phit->first), MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER))
{
pxFailRel("Failed to left split placeholder for map");
}
}
else
{
// start of the placeholder is getting used, we'll split it right below if there's anything left over
m_placeholder_ranges.erase(phit);
}
// do we need to split to the right? (i.e. is there a placeholder after this range)
if ((map_offset + map_size) != old_ph_end)
{
// split out end..ph_end
m_placeholder_ranges.emplace(map_offset + map_size, old_ph_end);
if (!VirtualFreeEx(GetCurrentProcess(), OffsetPointer(map_offset), map_size,
MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER))
{
pxFailRel("Failed to right split placeholder for map");
}
}
// actually do the mapping, replacing the placeholder on the range
if (!MapViewOfFile3(static_cast<HANDLE>(file_handle), GetCurrentProcess(),
map_base, file_offset, map_size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
{
Console.Error("(SharedMemoryMappingArea) MapViewOfFile3() failed: %u", GetLastError());
return nullptr;
}
const DWORD prot = ConvertToWinApi(mode);
if (prot != PAGE_READWRITE)
{
DWORD old_prot;
if (!VirtualProtect(map_base, map_size, prot, &old_prot))
pxFail("Failed to protect memory mapping");
}
m_num_mappings++;
return static_cast<u8*>(map_base);
}
bool SharedMemoryMappingArea::Unmap(void* map_base, size_t map_size)
{
pxAssert(static_cast<u8*>(map_base) >= m_base_ptr && static_cast<u8*>(map_base) < (m_base_ptr + m_size));
const size_t map_offset = static_cast<u8*>(map_base) - m_base_ptr;
pxAssert(Common::IsAlignedPow2(map_offset, __pagesize));
pxAssert(Common::IsAlignedPow2(map_size, __pagesize));
const size_t page = map_offset / __pagesize;
// unmap the specified range
if (!UnmapViewOfFile2(GetCurrentProcess(), map_base, MEM_PRESERVE_PLACEHOLDER))
{
Console.Error("(SharedMemoryMappingArea) UnmapViewOfFile2() failed: %u", GetLastError());
return false;
}
// can we coalesce to the left?
PlaceholderMap::iterator left_it = (map_offset > 0) ? FindPlaceholder(map_offset - 1) : m_placeholder_ranges.end();
if (left_it != m_placeholder_ranges.end())
{
// the left placeholder should end at our start
pxAssert(map_offset == left_it->second);
left_it->second = map_offset + map_size;
// combine placeholders before and the range we're unmapping, i.e. to the left
if (!VirtualFreeEx(GetCurrentProcess(), OffsetPointer(left_it->first),
left_it->second - left_it->first, MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS))
{
pxFail("Failed to coalesce placeholders left for unmap");
}
}
else
{
// this is a new placeholder
left_it = m_placeholder_ranges.emplace(map_offset, map_offset + map_size).first;
}
// can we coalesce to the right?
PlaceholderMap::iterator right_it = ((map_offset + map_size) < m_size) ? FindPlaceholder(map_offset + map_size) : m_placeholder_ranges.end();
if (right_it != m_placeholder_ranges.end())
{
// should start at our end
pxAssert(right_it->first == (map_offset + map_size));
left_it->second = right_it->second;
m_placeholder_ranges.erase(right_it);
// combine our placeholder and the next, i.e. to the right
if (!VirtualFreeEx(GetCurrentProcess(), OffsetPointer(left_it->first),
left_it->second - left_it->first, MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS))
{
pxFail("Failed to coalescae placeholders right for unmap");
}
}
m_num_mappings--;
return true;
}
#endif #endif

View File

@ -35,6 +35,7 @@ AdvancedSystemSettingsWidget::AdvancedSystemSettingsWidget(SettingsDialog* dialo
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeCache, "EmuCore/CPU/Recompiler", "EnableEECache", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeCache, "EmuCore/CPU/Recompiler", "EnableEECache", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeINTCSpinDetection, "EmuCore/Speedhacks", "IntcStat", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeINTCSpinDetection, "EmuCore/Speedhacks", "IntcStat", true);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeWaitLoopDetection, "EmuCore/Speedhacks", "WaitLoop", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeWaitLoopDetection, "EmuCore/Speedhacks", "WaitLoop", true);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeFastmem, "EmuCore/CPU/Recompiler", "EnableFastmem", true);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0Recompiler, "EmuCore/CPU/Recompiler", "EnableVU0", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0Recompiler, "EmuCore/CPU/Recompiler", "EnableVU0", true);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1Recompiler, "EmuCore/CPU/Recompiler", "EnableVU1", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1Recompiler, "EmuCore/CPU/Recompiler", "EnableVU1", true);
@ -60,6 +61,9 @@ AdvancedSystemSettingsWidget::AdvancedSystemSettingsWidget(SettingsDialog* dialo
dialog->registerWidgetHelp(m_ui.eeINTCSpinDetection, tr("INTC Spin Detection"), tr("Checked"), dialog->registerWidgetHelp(m_ui.eeINTCSpinDetection, tr("INTC Spin Detection"), tr("Checked"),
tr("Huge speedup for some games, with almost no compatibility side effects.")); tr("Huge speedup for some games, with almost no compatibility side effects."));
dialog->registerWidgetHelp(m_ui.eeFastmem, tr("Enable Fast Memory Access"), tr("Checked"),
tr("Uses backpatching to avoid register flushing on every memory access."));
dialog->registerWidgetHelp(m_ui.vu0Recompiler, tr("Enable VU0 Recompiler"), tr("Checked"), dialog->registerWidgetHelp(m_ui.vu0Recompiler, tr("Enable VU0 Recompiler"), tr("Checked"),
tr("Enables VU0 Recompiler.")); tr("Enables VU0 Recompiler."));

View File

@ -32,13 +32,6 @@
<string>EmotionEngine (MIPS-IV)</string> <string>EmotionEngine (MIPS-IV)</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout_4"> <layout class="QGridLayout" name="gridLayout_4">
<item row="0" column="0">
<widget class="QCheckBox" name="eeRecompiler">
<property name="text">
<string>Enable Recompiler</string>
</property>
</widget>
</item>
<item row="2" column="0"> <item row="2" column="0">
<widget class="QCheckBox" name="eeWaitLoopDetection"> <widget class="QCheckBox" name="eeWaitLoopDetection">
<property name="text"> <property name="text">
@ -46,6 +39,20 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1">
<widget class="QCheckBox" name="eeINTCSpinDetection">
<property name="text">
<string>INTC Spin Detection</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="eeRecompiler">
<property name="text">
<string>Enable Recompiler</string>
</property>
</widget>
</item>
<item row="0" column="1"> <item row="0" column="1">
<widget class="QCheckBox" name="eeCache"> <widget class="QCheckBox" name="eeCache">
<property name="text"> <property name="text">
@ -53,10 +60,10 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1"> <item row="3" column="0">
<widget class="QCheckBox" name="eeINTCSpinDetection"> <widget class="QCheckBox" name="eeFastmem">
<property name="text"> <property name="text">
<string>INTC Spin Detection</string> <string>Enable Fast Memory Access</string>
</property> </property>
</widget> </widget>
</item> </item>

View File

@ -45,6 +45,7 @@ GameFixSettingsWidget::GameFixSettingsWidget(SettingsDialog* dialog, QWidget* pa
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VIF1StallHack, "EmuCore/Gamefixes", "VIF1StallHack", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VIF1StallHack, "EmuCore/Gamefixes", "VIF1StallHack", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VuAddSubHack, "EmuCore/Gamefixes", "VuAddSubHack", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VuAddSubHack, "EmuCore/Gamefixes", "VuAddSubHack", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.IbitHack, "EmuCore/Gamefixes", "IbitHack", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.IbitHack, "EmuCore/Gamefixes", "IbitHack", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.FullVU0SyncHack, "EmuCore/Gamefixes", "FullVU0SyncHack", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VUSyncHack, "EmuCore/Gamefixes", "VUSyncHack", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VUSyncHack, "EmuCore/Gamefixes", "VUSyncHack", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VUOverflowHack, "EmuCore/Gamefixes", "VUOverflowHack", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.VUOverflowHack, "EmuCore/Gamefixes", "VUOverflowHack", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.XgKickHack, "EmuCore/Gamefixes", "XgKickHack", false); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.XgKickHack, "EmuCore/Gamefixes", "XgKickHack", false);

View File

@ -113,6 +113,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="FullVU0SyncHack">
<property name="text">
<string>Full VU0 Synchronization (Correct But Slower)</string>
</property>
</widget>
</item>
<item> <item>
<widget class="QCheckBox" name="IbitHack"> <widget class="QCheckBox" name="IbitHack">
<property name="text"> <property name="text">

View File

@ -1698,12 +1698,9 @@ if(WIN32)
strmiids.lib strmiids.lib
opengl32.lib opengl32.lib
comsuppw.lib comsuppw.lib
)
if(PCSX2_CORE)
target_link_libraries(PCSX2_FLAGS INTERFACE
OneCore.lib OneCore.lib
) )
else() if(NOT PCSX2_CORE)
target_link_libraries(PCSX2_FLAGS INTERFACE target_link_libraries(PCSX2_FLAGS INTERFACE
pthreads4w pthreads4w
) )

View File

@ -46,6 +46,7 @@ enum GamefixId
Fix_VUOverflow, Fix_VUOverflow,
Fix_XGKick, Fix_XGKick,
Fix_BlitInternalFPS, Fix_BlitInternalFPS,
Fix_FullVU0Sync,
GamefixId_COUNT GamefixId_COUNT
}; };
@ -382,6 +383,8 @@ struct Pcsx2Config
bool bool
EnableEECache : 1; EnableEECache : 1;
bool
EnableFastmem : 1;
BITFIELD_END BITFIELD_END
RecompilerOptions(); RecompilerOptions();
@ -845,7 +848,8 @@ struct Pcsx2Config
VUSyncHack : 1, // Makes microVU run behind the EE to avoid VU register reading/writing sync issues. Useful for M-Bit games VUSyncHack : 1, // Makes microVU run behind the EE to avoid VU register reading/writing sync issues. Useful for M-Bit games
VUOverflowHack : 1, // Tries to simulate overflow flag checks (not really possible on x86 without soft floats) VUOverflowHack : 1, // Tries to simulate overflow flag checks (not really possible on x86 without soft floats)
XgKickHack : 1, // Erementar Gerad, adds more delay to VU XGkick instructions. Corrects the color of some graphics, but breaks Tri-ace games and others. XgKickHack : 1, // Erementar Gerad, adds more delay to VU XGkick instructions. Corrects the color of some graphics, but breaks Tri-ace games and others.
BlitInternalFPSHack : 1; // Disables privileged register write-based FPS detection. BlitInternalFPSHack : 1, // Disables privileged register write-based FPS detection.
FullVU0SyncHack : 1; // Forces tight VU0 sync on every COP2 instruction.
BITFIELD_END BITFIELD_END
GamefixOptions(); GamefixOptions();
@ -1146,6 +1150,7 @@ namespace EmuFolders
#define CHECK_EEREC (EmuConfig.Cpu.Recompiler.EnableEE) #define CHECK_EEREC (EmuConfig.Cpu.Recompiler.EnableEE)
#define CHECK_CACHE (EmuConfig.Cpu.Recompiler.EnableEECache) #define CHECK_CACHE (EmuConfig.Cpu.Recompiler.EnableEECache)
#define CHECK_IOPREC (EmuConfig.Cpu.Recompiler.EnableIOP) #define CHECK_IOPREC (EmuConfig.Cpu.Recompiler.EnableIOP)
#define CHECK_FASTMEM (EmuConfig.Cpu.Recompiler.EnableEE && EmuConfig.Cpu.Recompiler.EnableFastmem)
//------------ SPECIAL GAME FIXES!!! --------------- //------------ SPECIAL GAME FIXES!!! ---------------
#define CHECK_VUADDSUBHACK (EmuConfig.Gamefixes.VuAddSubHack) // Special Fix for Tri-ace games, they use an encryption algorithm that requires VU addi opcode to be bit-accurate. #define CHECK_VUADDSUBHACK (EmuConfig.Gamefixes.VuAddSubHack) // Special Fix for Tri-ace games, they use an encryption algorithm that requires VU addi opcode to be bit-accurate.
@ -1161,6 +1166,7 @@ namespace EmuFolders
#define CHECK_VIF1STALLHACK (EmuConfig.Gamefixes.VIF1StallHack) // Like above, processes FIFO data before the stall is allowed (to make sure data goes over). #define CHECK_VIF1STALLHACK (EmuConfig.Gamefixes.VIF1StallHack) // Like above, processes FIFO data before the stall is allowed (to make sure data goes over).
#define CHECK_GIFFIFOHACK (EmuConfig.Gamefixes.GIFFIFOHack) // Enabled the GIF FIFO (more correct but slower) #define CHECK_GIFFIFOHACK (EmuConfig.Gamefixes.GIFFIFOHack) // Enabled the GIF FIFO (more correct but slower)
#define CHECK_VUOVERFLOWHACK (EmuConfig.Gamefixes.VUOverflowHack) // Special Fix for Superman Returns, they check for overflows on PS2 floats which we can't do without soft floats. #define CHECK_VUOVERFLOWHACK (EmuConfig.Gamefixes.VUOverflowHack) // Special Fix for Superman Returns, they check for overflows on PS2 floats which we can't do without soft floats.
#define CHECK_FULLVU0SYNCHACK (EmuConfig.Gamefixes.FullVU0SyncHack)
//------------ Advanced Options!!! --------------- //------------ Advanced Options!!! ---------------
#define CHECK_VU_OVERFLOW (EmuConfig.Cpu.Recompiler.vuOverflow) #define CHECK_VU_OVERFLOW (EmuConfig.Cpu.Recompiler.vuOverflow)

View File

@ -298,8 +298,8 @@ void iDumpBlock( int startpc, u8 * ptr )
// write the instruction info // write the instruction info
std::fprintf(eff, "\n\nlive0 - %x, live2 - %x, lastuse - %x\nxmm - %x, used - %x\n", std::fprintf(eff, "\n\nlive0 - %x, lastuse - %x\nxmm - %x, used - %x\n",
EEINST_LIVE0, EEINST_LIVE2, EEINST_LASTUSE, EEINST_XMM, EEINST_USED EEINST_LIVE, EEINST_LASTUSE, EEINST_XMM, EEINST_USED
); );
memzero(used); memzero(used);

View File

@ -3801,6 +3801,8 @@ void FullscreenUI::DrawAdvancedSettingsPage()
"EmuCore/Speedhacks", "IntcStat", true); "EmuCore/Speedhacks", "IntcStat", true);
DrawToggleSetting(bsi, "Enable Wait Loop Detection", "Moderate speedup for some games, with no known side effects.", DrawToggleSetting(bsi, "Enable Wait Loop Detection", "Moderate speedup for some games, with no known side effects.",
"EmuCore/Speedhacks", "WaitLoop", true); "EmuCore/Speedhacks", "WaitLoop", true);
DrawToggleSetting(bsi, "Enable Fast Memory Access", "Uses backpatching to avoid register flushing on every memory access.",
"EmuCore/CPU/Recompiler", "EnableFastmem", true);
DrawToggleSetting(bsi, "Enable VU0 Recompiler (Micro Mode)", DrawToggleSetting(bsi, "Enable VU0 Recompiler (Micro Mode)",
"New Vector Unit recompiler with much improved compatibility. Recommended.", "EmuCore/CPU/Recompiler", "EnableVU0", true); "New Vector Unit recompiler with much improved compatibility. Recommended.", "EmuCore/CPU/Recompiler", "EnableVU0", true);
DrawToggleSetting(bsi, "Enable VU1 Recompiler", "New Vector Unit recompiler with much improved compatibility. Recommended.", DrawToggleSetting(bsi, "Enable VU1 Recompiler", "New Vector Unit recompiler with much improved compatibility. Recommended.",
@ -3857,6 +3859,8 @@ void FullscreenUI::DrawGameFixesSettingsPage()
"EmuCore/Gamefixes", "VuAddSubHack", false); "EmuCore/Gamefixes", "VuAddSubHack", false);
DrawToggleSetting(bsi, "VU I bit Hack avoid constant recompilation in some games", DrawToggleSetting(bsi, "VU I bit Hack avoid constant recompilation in some games",
"Scarface The World Is Yours, Crash Tag Team Racing.", "EmuCore/Gamefixes", "IbitHack", false); "Scarface The World Is Yours, Crash Tag Team Racing.", "EmuCore/Gamefixes", "IbitHack", false);
DrawToggleSetting(
bsi, "Full VU0 Synchronization", "Forces tight VU0 sync on every COP2 instruction.", "EmuCore/Gamefixes", "FullVU0SyncHack", false);
DrawToggleSetting(bsi, "VU Sync (Run behind)", "To avoid sync problems when reading or writing VU registers.", "EmuCore/Gamefixes", DrawToggleSetting(bsi, "VU Sync (Run behind)", "To avoid sync problems when reading or writing VU registers.", "EmuCore/Gamefixes",
"VUSyncHack", false); "VUSyncHack", false);
DrawToggleSetting( DrawToggleSetting(

View File

@ -404,6 +404,10 @@ void CommonHost::UpdateLogging(SettingsInterface& si)
DevConWriterEnabled = any_logging_sinks && (IsDevBuild || si.GetBoolValue("Logging", "EnableVerbose", false)); DevConWriterEnabled = any_logging_sinks && (IsDevBuild || si.GetBoolValue("Logging", "EnableVerbose", false));
SysConsole.eeConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableEEConsole", false); SysConsole.eeConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableEEConsole", false);
SysConsole.iopConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableIOPConsole", false); SysConsole.iopConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableIOPConsole", false);
SysTrace.IOP.R3000A.Enabled = true;
SysTrace.IOP.COP2.Enabled = true;
SysTrace.IOP.Memory.Enabled = true;
SysTrace.SIF.Enabled = true;
// Input Recording Logs // Input Recording Logs
SysConsole.recordingConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableInputRecordingLogs", true); SysConsole.recordingConsole.Enabled = any_logging_sinks && si.GetBoolValue("Logging", "EnableInputRecordingLogs", true);

View File

@ -963,6 +963,7 @@ void mmap_MarkCountedRamPage( u32 paddr )
m_PageProtectInfo[rampage].Mode = ProtMode_Write; m_PageProtectInfo[rampage].Mode = ProtMode_Write;
HostSys::MemProtect( &eeMem->Main[rampage<<__pageshift], __pagesize, PageAccess_ReadOnly() ); HostSys::MemProtect( &eeMem->Main[rampage<<__pageshift], __pagesize, PageAccess_ReadOnly() );
vtlb_UpdateFastmemProtection(rampage << __pageshift, __pagesize, PageAccess_ReadOnly());
} }
// offset - offset of address relative to psM. // offset - offset of address relative to psM.
@ -980,6 +981,7 @@ static __fi void mmap_ClearCpuBlock( uint offset )
"Attempted to clear a block that is already under manual protection." ); "Attempted to clear a block that is already under manual protection." );
HostSys::MemProtect( &eeMem->Main[rampage<<__pageshift], __pagesize, PageAccess_ReadWrite() ); HostSys::MemProtect( &eeMem->Main[rampage<<__pageshift], __pagesize, PageAccess_ReadWrite() );
vtlb_UpdateFastmemProtection(rampage << __pageshift, __pagesize, PageAccess_ReadWrite());
m_PageProtectInfo[rampage].Mode = ProtMode_Manual; m_PageProtectInfo[rampage].Mode = ProtMode_Manual;
Cpu->Clear( m_PageProtectInfo[rampage].ReverseRamMap, __pagesize ); Cpu->Clear( m_PageProtectInfo[rampage].ReverseRamMap, __pagesize );
} }
@ -988,12 +990,37 @@ void mmap_PageFaultHandler::OnPageFaultEvent( const PageFaultInfo& info, bool& h
{ {
pxAssert( eeMem ); pxAssert( eeMem );
u32 vaddr;
if (CHECK_FASTMEM && vtlb_GetGuestAddress(info.addr, &vaddr))
{
// this was inside the fastmem area. check if it's a code page
// fprintf(stderr, "Fault on fastmem %p vaddr %08X\n", info.addr, vaddr);
uptr ptr = (uptr)PSM(vaddr);
uptr offset = (ptr - (uptr)eeMem->Main);
if (ptr && m_PageProtectInfo[offset >> __pageshift].Mode == ProtMode_Write)
{
// fprintf(stderr, "Not backpatching code write at %08X\n", vaddr);
mmap_ClearCpuBlock(offset);
handled = true;
}
else
{
// fprintf(stderr, "Trying backpatching vaddr %08X\n", vaddr);
if (vtlb_BackpatchLoadStore(info.pc, info.addr))
handled = true;
}
}
else
{
// get bad virtual address // get bad virtual address
uptr offset = info.addr - (uptr)eeMem->Main; uptr offset = info.addr - (uptr)eeMem->Main;
if( offset >= Ps2MemSize::MainRam ) return; if (offset >= Ps2MemSize::MainRam)
return;
mmap_ClearCpuBlock( offset ); mmap_ClearCpuBlock(offset);
handled = true; handled = true;
}
} }
// Clears all block tracking statuses, manual protection flags, and write protection. // Clears all block tracking statuses, manual protection flags, and write protection.
@ -1005,4 +1032,5 @@ void mmap_ResetBlockTracking()
//DbgCon.WriteLn( "vtlb/mmap: Block Tracking reset..." ); //DbgCon.WriteLn( "vtlb/mmap: Block Tracking reset..." );
memzero( m_PageProtectInfo ); memzero( m_PageProtectInfo );
if (eeMem) HostSys::MemProtect( eeMem->Main, Ps2MemSize::MainRam, PageAccess_ReadWrite() ); if (eeMem) HostSys::MemProtect( eeMem->Main, Ps2MemSize::MainRam, PageAccess_ReadWrite() );
vtlb_UpdateFastmemProtection(0, Ps2MemSize::MainRam, PageAccess_ReadWrite());
} }

View File

@ -155,6 +155,7 @@ Pcsx2Config::RecompilerOptions::RecompilerOptions()
EnableIOP = true; EnableIOP = true;
EnableVU0 = true; EnableVU0 = true;
EnableVU1 = true; EnableVU1 = true;
EnableFastmem = true;
// vu and fpu clamping default to standard overflow. // vu and fpu clamping default to standard overflow.
vuOverflow = true; vuOverflow = true;
@ -211,6 +212,7 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap)
SettingsWrapBitBool(EnableEECache); SettingsWrapBitBool(EnableEECache);
SettingsWrapBitBool(EnableVU0); SettingsWrapBitBool(EnableVU0);
SettingsWrapBitBool(EnableVU1); SettingsWrapBitBool(EnableVU1);
SettingsWrapBitBool(EnableFastmem);
SettingsWrapBitBool(vuOverflow); SettingsWrapBitBool(vuOverflow);
SettingsWrapBitBool(vuExtraOverflow); SettingsWrapBitBool(vuExtraOverflow);
@ -864,7 +866,8 @@ static const char* const tbl_GamefixNames[] =
"VUSync", "VUSync",
"VUOverflow", "VUOverflow",
"XGKick", "XGKick",
"BlitInternalFPS" "BlitInternalFPS",
"FullVU0Sync",
}; };
const char* EnumToString(GamefixId id) const char* EnumToString(GamefixId id)
@ -907,6 +910,7 @@ void Pcsx2Config::GamefixOptions::Set(GamefixId id, bool enabled)
case Fix_VUSync: VUSyncHack = enabled; break; case Fix_VUSync: VUSyncHack = enabled; break;
case Fix_VUOverflow: VUOverflowHack = enabled; break; case Fix_VUOverflow: VUOverflowHack = enabled; break;
case Fix_BlitInternalFPS: BlitInternalFPSHack = enabled; break; case Fix_BlitInternalFPS: BlitInternalFPSHack = enabled; break;
case Fix_FullVU0Sync: FullVU0SyncHack = enabled; break;
jNO_DEFAULT; jNO_DEFAULT;
} }
} }
@ -934,6 +938,7 @@ bool Pcsx2Config::GamefixOptions::Get(GamefixId id) const
case Fix_VUSync: return VUSyncHack; case Fix_VUSync: return VUSyncHack;
case Fix_VUOverflow: return VUOverflowHack; case Fix_VUOverflow: return VUOverflowHack;
case Fix_BlitInternalFPS: return BlitInternalFPSHack; case Fix_BlitInternalFPS: return BlitInternalFPSHack;
case Fix_FullVU0Sync: return FullVU0SyncHack;
jNO_DEFAULT; jNO_DEFAULT;
} }
return false; // unreachable, but we still need to suppress warnings >_< return false; // unreachable, but we still need to suppress warnings >_<
@ -961,6 +966,7 @@ void Pcsx2Config::GamefixOptions::LoadSave(SettingsWrapper& wrap)
SettingsWrapBitBool(VUSyncHack); SettingsWrapBitBool(VUSyncHack);
SettingsWrapBitBool(VUOverflowHack); SettingsWrapBitBool(VUOverflowHack);
SettingsWrapBitBool(BlitInternalFPSHack); SettingsWrapBitBool(BlitInternalFPSHack);
SettingsWrapBitBool(FullVU0SyncHack);
} }

View File

@ -46,12 +46,6 @@ namespace Exception
public: public:
explicit CancelInstruction() { } explicit CancelInstruction() { }
}; };
class FailedToAllocateRegister
{
public:
explicit FailedToAllocateRegister() { }
};
} }
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------

View File

@ -109,12 +109,18 @@ void RecompiledCodeReserve::Reset()
void RecompiledCodeReserve::AllowModification() void RecompiledCodeReserve::AllowModification()
{ {
// Apple Silicon enforces write protection in hardware.
#if !defined(__APPLE__) || !defined(_M_ARM64)
HostSys::MemProtect(m_baseptr, m_size, PageAccess_Any()); HostSys::MemProtect(m_baseptr, m_size, PageAccess_Any());
#endif
} }
void RecompiledCodeReserve::ForbidModification() void RecompiledCodeReserve::ForbidModification()
{ {
// Apple Silicon enforces write protection in hardware.
#if !defined(__APPLE__) || !defined(_M_ARM64)
HostSys::MemProtect(m_baseptr, m_size, PageProtectionMode().Read().Execute()); HostSys::MemProtect(m_baseptr, m_size, PageProtectionMode().Read().Execute());
#endif
} }
// Sets the abbreviated name used by the profiler. Name should be under 10 characters long. // Sets the abbreviated name used by the profiler. Name should be under 10 characters long.

View File

@ -113,6 +113,10 @@ public:
VirtualMemoryBumpAllocator& BumpAllocator() { return m_bumpAllocator; } VirtualMemoryBumpAllocator& BumpAllocator() { return m_bumpAllocator; }
const eeMemoryReserve& EEMemory() const { return m_ee; }
const iopMemoryReserve& IOPMemory() const { return m_iop; }
const vuMemoryReserve& VUMemory() const { return m_vu; }
bool Allocate(); bool Allocate();
void Reset(); void Reset();
void Release(); void Release();

View File

@ -1475,6 +1475,7 @@ void VMManager::Execute()
// We need to switch the cpus out, and reset the new ones if so. // We need to switch the cpus out, and reset the new ones if so.
s_cpu_provider_pack->ApplyConfig(); s_cpu_provider_pack->ApplyConfig();
SysClearExecutionCache(); SysClearExecutionCache();
vtlb_ResetFastmem();
} }
// Execute until we're asked to stop. // Execute until we're asked to stop.
@ -1553,6 +1554,9 @@ void VMManager::CheckForCPUConfigChanges(const Pcsx2Config& old_config)
SysClearExecutionCache(); SysClearExecutionCache();
memBindConditionalHandlers(); memBindConditionalHandlers();
if (EmuConfig.Cpu.Recompiler.EnableFastmem != old_config.Cpu.Recompiler.EnableFastmem)
vtlb_ResetFastmem();
// did we toggle recompilers? // did we toggle recompilers?
if (EmuConfig.Cpu.CpusChanged(old_config.Cpu)) if (EmuConfig.Cpu.CpusChanged(old_config.Cpu))
{ {

View File

@ -71,7 +71,7 @@
<Link> <Link>
<LargeAddressAware>Yes</LargeAddressAware> <LargeAddressAware>Yes</LargeAddressAware>
<AdditionalDependencies>comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;rpcrt4.lib;iphlpapi.lib;dsound.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalDependencies>comctl32.lib;ws2_32.lib;shlwapi.lib;winmm.lib;rpcrt4.lib;iphlpapi.lib;dsound.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>dxguid.lib;dinput8.lib;hid.lib;PowrProf.lib;d3dcompiler.lib;d3d11.lib;dxgi.lib;strmiids.lib;opengl32.lib;comsuppw.lib;%(AdditionalDependencies)</AdditionalDependencies> <AdditionalDependencies>dxguid.lib;dinput8.lib;hid.lib;PowrProf.lib;d3dcompiler.lib;d3d11.lib;dxgi.lib;strmiids.lib;opengl32.lib;comsuppw.lib;OneCore.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>

View File

@ -42,6 +42,13 @@
#include "fmt/core.h" #include "fmt/core.h"
#include <map>
#include <unordered_set>
#include <unordered_map>
#define FASTMEM_LOG(...)
//#define FASTMEM_LOG(...) Console.WriteLn(__VA_ARGS__)
using namespace R5900; using namespace R5900;
using namespace vtlb_private; using namespace vtlb_private;
@ -60,6 +67,36 @@ static vtlbHandler UnmappedVirtHandler1;
static vtlbHandler UnmappedPhyHandler0; static vtlbHandler UnmappedPhyHandler0;
static vtlbHandler UnmappedPhyHandler1; static vtlbHandler UnmappedPhyHandler1;
struct FastmemVirtualMapping
{
u32 offset;
u32 size;
};
struct LoadstoreBackpatchInfo
{
u32 guest_pc;
u32 gpr_bitmask;
u32 fpr_bitmask;
u8 code_size;
u8 address_register;
u8 data_register;
u8 size_in_bits;
bool is_signed;
bool is_load;
bool is_fpr;
};
static constexpr size_t FASTMEM_AREA_SIZE = 0x100000000ULL;
static constexpr u32 FASTMEM_PAGE_COUNT = FASTMEM_AREA_SIZE / VTLB_PAGE_SIZE;
static constexpr u32 NO_FASTMEM_MAPPING = 0xFFFFFFFFu;
static std::unique_ptr<SharedMemoryMappingArea> s_fastmem_area;
static std::vector<u32> s_fastmem_virtual_mapping; // maps vaddr -> mainmem offset
static std::unordered_multimap<u32, u32> s_fastmem_physical_mapping; // maps mainmem offset -> vaddr
static std::unordered_map<uptr, LoadstoreBackpatchInfo> s_fastmem_backpatch_info;
static std::unordered_set<u32> s_fastmem_faulting_pcs;
vtlb_private::VTLBPhysical vtlb_private::VTLBPhysical::fromPointer(sptr ptr) { vtlb_private::VTLBPhysical vtlb_private::VTLBPhysical::fromPointer(sptr ptr) {
pxAssertMsg(ptr >= 0, "Address too high"); pxAssertMsg(ptr >= 0, "Address too high");
return VTLBPhysical(ptr); return VTLBPhysical(ptr);
@ -659,6 +696,341 @@ __fi u32 vtlb_V2P(u32 vaddr)
return paddr; return paddr;
} }
static constexpr bool vtlb_MismatchedHostPageSize()
{
return (__pagesize != VTLB_PAGE_SIZE);
}
static bool vtlb_IsHostAligned(u32 paddr)
{
if constexpr (!vtlb_MismatchedHostPageSize())
return true;
return ((paddr & __pagemask) == 0);
}
static u32 vtlb_HostPage(u32 page)
{
if constexpr (!vtlb_MismatchedHostPageSize())
return page;
return page >> (__pageshift - VTLB_PAGE_BITS);
}
static u32 vtlb_HostAlignOffset(u32 offset)
{
if constexpr (!vtlb_MismatchedHostPageSize())
return offset;
return offset & ~__pagemask;
}
static bool vtlb_IsHostCoalesced(u32 page)
{
if constexpr (__pagesize == VTLB_PAGE_SIZE)
{
return true;
}
else
{
static constexpr u32 shift = __pageshift - VTLB_PAGE_BITS;
static constexpr u32 count = (1u << shift);
static constexpr u32 mask = count - 1;
const u32 base = page & ~mask;
const u32 base_offset = s_fastmem_virtual_mapping[base];
if ((base_offset & __pagemask) != 0)
return false;
for (u32 i = 0, expected_offset = base_offset; i < count; i++, expected_offset += VTLB_PAGE_SIZE)
{
if (s_fastmem_virtual_mapping[base + i] != expected_offset)
return false;
}
return true;
}
}
static bool vtlb_GetMainMemoryOffsetFromPtr(uptr ptr, u32* mainmem_offset, u32* mainmem_size, PageProtectionMode* prot)
{
const uptr page_end = ptr + VTLB_PAGE_SIZE;
SysMainMemory& vmmem = GetVmMemory();
// EE memory and ROMs.
if (ptr >= (uptr)eeMem->Main && page_end <= (uptr)eeMem->ZeroRead)
{
const u32 eemem_offset = static_cast<u32>(ptr - (uptr)eeMem->Main);
const bool writeable = ((eemem_offset < Ps2MemSize::MainRam) ? (mmap_GetRamPageInfo(eemem_offset) != ProtMode_Write) : true);
*mainmem_offset = (eemem_offset + HostMemoryMap::EEmemOffset);
*mainmem_size = (offsetof(EEVM_MemoryAllocMess, ZeroRead) - eemem_offset);
*prot = PageProtectionMode().Read().Write(writeable);
return true;
}
// IOP memory.
if (ptr >= (uptr)iopMem->Main && page_end <= (uptr)iopMem->P)
{
const u32 iopmem_offset = static_cast<u32>(ptr - (uptr)iopMem->Main);
*mainmem_offset = iopmem_offset + HostMemoryMap::IOPmemOffset;
*mainmem_size = (offsetof(IopVM_MemoryAllocMess, P) - iopmem_offset);
*prot = PageProtectionMode().Read().Write();
return true;
}
// VU memory - this includes both data and code for VU0/VU1.
// Practically speaking, this is only data, because the code goes through a handler.
if (ptr >= (uptr)vmmem.VUMemory().GetPtr() && page_end <= (uptr)vmmem.VUMemory().GetPtrEnd())
{
const u32 vumem_offset = static_cast<u32>(ptr - (uptr)vmmem.VUMemory().GetPtr());
*mainmem_offset = vumem_offset + HostMemoryMap::VUmemOffset;
*mainmem_size = vmmem.VUMemory().GetSize() - vumem_offset;
*prot = PageProtectionMode().Read().Write();
return true;
}
// We end up with some unknown mappings here; currently the IOP memory, instead of being physically mapped
// as 2MB, ends up being mapped as 8MB. But this shouldn't be virtual mapped anyway, so fallback to slowmem
// in such cases.
return false;
}
static bool vtlb_GetMainMemoryOffset(u32 paddr, u32* mainmem_offset, u32* mainmem_size, PageProtectionMode* prot)
{
if (paddr >= VTLB_PMAP_SZ)
return false;
// Handlers aren't in our shared memory, obviously.
const VTLBPhysical& vm = vtlbdata.pmap[paddr >> VTLB_PAGE_BITS];
if (vm.isHandler())
return false;
return vtlb_GetMainMemoryOffsetFromPtr(vm.raw(), mainmem_offset, mainmem_size, prot);
}
static void vtlb_CreateFastmemMapping(u32 vaddr, u32 mainmem_offset, const PageProtectionMode& mode)
{
FASTMEM_LOG("Create fastmem mapping @ vaddr %08X mainmem %08X", vaddr, mainmem_offset);
const u32 page = vaddr / VTLB_PAGE_SIZE;
if (s_fastmem_virtual_mapping[page] == mainmem_offset)
{
// current mapping is fine
return;
}
if (s_fastmem_virtual_mapping[page] != NO_FASTMEM_MAPPING)
{
// current mapping needs to be removed
const bool was_coalesced = vtlb_IsHostCoalesced(page);
s_fastmem_virtual_mapping[page] = NO_FASTMEM_MAPPING;
if (was_coalesced && !s_fastmem_area->Unmap(s_fastmem_area->PagePointer(vtlb_HostPage(page)), __pagesize))
Console.Error("Failed to unmap vaddr %08X", vaddr);
// remove reverse mapping
auto range = s_fastmem_physical_mapping.equal_range(mainmem_offset);
for (auto it = range.first; it != range.second; )
{
auto this_it = it++;
if (this_it->second == vaddr)
s_fastmem_physical_mapping.erase(this_it);
}
}
s_fastmem_virtual_mapping[page] = mainmem_offset;
if (vtlb_IsHostCoalesced(page))
{
const u32 host_page = vtlb_HostPage(page);
const u32 host_offset = vtlb_HostAlignOffset(mainmem_offset);
if (!s_fastmem_area->Map(GetVmMemory().MainMemory()->GetFileHandle(), host_offset,
s_fastmem_area->PagePointer(host_page), __pagesize, mode))
{
Console.Error("Failed to map vaddr %08X to mainmem offset %08X", vtlb_HostAlignOffset(vaddr), host_offset);
s_fastmem_virtual_mapping[page] = NO_FASTMEM_MAPPING;
return;
}
}
s_fastmem_physical_mapping.emplace(mainmem_offset, vaddr);
}
static void vtlb_RemoveFastmemMapping(u32 vaddr)
{
const u32 page = vaddr / VTLB_PAGE_SIZE;
if (s_fastmem_virtual_mapping[page] == NO_FASTMEM_MAPPING)
return;
const u32 mainmem_offset = s_fastmem_virtual_mapping[page];
const bool was_coalesced = vtlb_IsHostCoalesced(page);
FASTMEM_LOG("Remove fastmem mapping @ vaddr %08X mainmem %08X", vaddr, mainmem_offset);
s_fastmem_virtual_mapping[page] = NO_FASTMEM_MAPPING;
if (was_coalesced && !s_fastmem_area->Unmap(s_fastmem_area->PagePointer(vtlb_HostPage(page)), __pagesize))
Console.Error("Failed to unmap vaddr %08X", vtlb_HostAlignOffset(vaddr));
// remove from reverse map
auto range = s_fastmem_physical_mapping.equal_range(mainmem_offset);
for (auto it = range.first; it != range.second;)
{
auto this_it = it++;
if (this_it->second == vaddr)
s_fastmem_physical_mapping.erase(this_it);
}
}
static void vtlb_RemoveFastmemMappings(u32 vaddr, u32 size)
{
pxAssert((vaddr & VTLB_PAGE_MASK) == 0);
pxAssert(size > 0 && (size & VTLB_PAGE_MASK) == 0);
const u32 num_pages = size / VTLB_PAGE_SIZE;
for (u32 i = 0; i < num_pages; i++, vaddr += VTLB_PAGE_SIZE)
vtlb_RemoveFastmemMapping(vaddr);
}
static void vtlb_RemoveFastmemMappings()
{
if (s_fastmem_virtual_mapping.empty())
{
// not initialized yet
return;
}
for (u32 page = 0; page < FASTMEM_PAGE_COUNT; page++)
{
if (s_fastmem_virtual_mapping[page] == NO_FASTMEM_MAPPING)
continue;
s_fastmem_virtual_mapping[page] = NO_FASTMEM_MAPPING;
if (!vtlb_IsHostAligned(page << VTLB_PAGE_BITS))
continue;
if (!s_fastmem_area->Unmap(s_fastmem_area->PagePointer(vtlb_HostPage(page)), __pagesize))
Console.Error("Failed to unmap vaddr %08X", page * __pagesize);
}
s_fastmem_physical_mapping.clear();
}
bool vtlb_ResolveFastmemMapping(uptr* addr)
{
uptr uaddr = *addr;
uptr fastmem_start = (uptr)vtlbdata.fastmem_base;
uptr fastmem_end = fastmem_start + 0xFFFFFFFFu;
if (uaddr < fastmem_start || uaddr > fastmem_end)
return false;
const u32 vaddr = static_cast<u32>(uaddr - fastmem_start);
FASTMEM_LOG("Trying to resolve %p (vaddr %08X)", (void*)uaddr, vaddr);
const u32 vpage = vaddr / VTLB_PAGE_SIZE;
if (s_fastmem_virtual_mapping[vpage] == NO_FASTMEM_MAPPING)
{
FASTMEM_LOG("%08X is not virtual mapped", vaddr);
return false;
}
const u32 mainmem_offset = s_fastmem_virtual_mapping[vpage] + (vaddr & VTLB_PAGE_MASK);
FASTMEM_LOG("Resolved %p (vaddr %08X) to mainmem offset %08X", uaddr, vaddr, mainmem_offset);
*addr = ((uptr)GetVmMemory().MainMemory()->GetBase()) + mainmem_offset;
return true;
}
bool vtlb_GetGuestAddress(uptr host_addr, u32* guest_addr)
{
uptr fastmem_start = (uptr)vtlbdata.fastmem_base;
uptr fastmem_end = fastmem_start + 0xFFFFFFFFu;
if (host_addr < fastmem_start || host_addr > fastmem_end)
return false;
*guest_addr = static_cast<u32>(host_addr - fastmem_start);
return true;
}
void vtlb_UpdateFastmemProtection(u32 paddr, u32 size, const PageProtectionMode& prot)
{
if (!CHECK_FASTMEM)
return;
pxAssert((paddr & VTLB_PAGE_MASK) == 0);
pxAssert(size > 0 && (size & VTLB_PAGE_MASK) == 0);
u32 mainmem_start, mainmem_size;
PageProtectionMode old_prot;
if (!vtlb_GetMainMemoryOffset(paddr, &mainmem_start, &mainmem_size, &old_prot))
return;
FASTMEM_LOG("UpdateFastmemProtection %08X mmoffset %08X %08X", paddr, mainmem_start, size);
u32 current_mainmem = mainmem_start;
const u32 num_pages = std::min(size, mainmem_size) / VTLB_PAGE_SIZE;
for (u32 i = 0; i < num_pages; i++, current_mainmem += VTLB_PAGE_SIZE)
{
// update virtual mapping mapping
auto range = s_fastmem_physical_mapping.equal_range(current_mainmem);
for (auto it = range.first; it != range.second; ++it)
{
FASTMEM_LOG(" valias %08X (size %u)", it->second, VTLB_PAGE_SIZE);
if (vtlb_IsHostAligned(it->second))
HostSys::MemProtect(s_fastmem_area->OffsetPointer(it->second), __pagesize, prot);
}
}
}
void vtlb_ClearLoadStoreInfo()
{
s_fastmem_backpatch_info.clear();
s_fastmem_faulting_pcs.clear();
}
void vtlb_AddLoadStoreInfo(uptr code_address, u32 code_size, u32 guest_pc, u32 gpr_bitmask, u32 fpr_bitmask, u8 address_register, u8 data_register, u8 size_in_bits, bool is_signed, bool is_load, bool is_fpr)
{
pxAssert(code_size < std::numeric_limits<u8>::max());
auto iter = s_fastmem_backpatch_info.find(code_address);
if (iter != s_fastmem_backpatch_info.end())
s_fastmem_backpatch_info.erase(iter);
LoadstoreBackpatchInfo info{guest_pc, gpr_bitmask, fpr_bitmask, static_cast<u8>(code_size), address_register, data_register, size_in_bits, is_signed, is_load, is_fpr};
s_fastmem_backpatch_info.emplace(code_address, info);
}
bool vtlb_BackpatchLoadStore(uptr code_address, uptr fault_address)
{
uptr fastmem_start = (uptr)vtlbdata.fastmem_base;
uptr fastmem_end = fastmem_start + 0xFFFFFFFFu;
if (fault_address < fastmem_start || fault_address > fastmem_end)
return false;
auto iter = s_fastmem_backpatch_info.find(code_address);
if (iter == s_fastmem_backpatch_info.end())
return false;
const LoadstoreBackpatchInfo& info = iter->second;
const u32 guest_addr = static_cast<u32>(fault_address - fastmem_start);
vtlb_DynBackpatchLoadStore(code_address, info.code_size, info.guest_pc, guest_addr,
info.gpr_bitmask, info.fpr_bitmask, info.address_register, info.data_register,
info.size_in_bits, info.is_signed, info.is_load, info.is_fpr);
// queue block for recompilation later
Cpu->Clear(info.guest_pc, 1);
// and store the pc in the faulting list, so that we don't emit another fastmem loadstore
s_fastmem_faulting_pcs.insert(info.guest_pc);
s_fastmem_backpatch_info.erase(iter);
return true;
}
bool vtlb_IsFaultingPC(u32 guest_pc)
{
return (s_fastmem_faulting_pcs.find(guest_pc) != s_fastmem_faulting_pcs.end());
}
//virtual mappings //virtual mappings
//TODO: Add invalid paddr checks //TODO: Add invalid paddr checks
void vtlb_VMap(u32 vaddr,u32 paddr,u32 size) void vtlb_VMap(u32 vaddr,u32 paddr,u32 size)
@ -667,6 +1039,23 @@ void vtlb_VMap(u32 vaddr,u32 paddr,u32 size)
verify(0==(paddr&VTLB_PAGE_MASK)); verify(0==(paddr&VTLB_PAGE_MASK));
verify(0==(size&VTLB_PAGE_MASK) && size>0); verify(0==(size&VTLB_PAGE_MASK) && size>0);
if (CHECK_FASTMEM)
{
const u32 num_pages = size / VTLB_PAGE_SIZE;
u32 current_vaddr = vaddr;
u32 current_paddr = paddr;
for (u32 i = 0; i < num_pages; i++, current_vaddr += VTLB_PAGE_SIZE, current_paddr += VTLB_PAGE_SIZE)
{
u32 hoffset, hsize;
PageProtectionMode mode;
if (vtlb_GetMainMemoryOffset(current_paddr, &hoffset, &hsize, &mode))
vtlb_CreateFastmemMapping(current_vaddr, hoffset, mode);
else
vtlb_RemoveFastmemMapping(current_vaddr);
}
}
while (size > 0) while (size > 0)
{ {
VTLBVirtual vmv; VTLBVirtual vmv;
@ -696,6 +1085,22 @@ void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 size)
verify(0==(vaddr&VTLB_PAGE_MASK)); verify(0==(vaddr&VTLB_PAGE_MASK));
verify(0==(size&VTLB_PAGE_MASK) && size>0); verify(0==(size&VTLB_PAGE_MASK) && size>0);
if (CHECK_FASTMEM)
{
if (buffer == eeMem->Scratch && size == Ps2MemSize::Scratch)
{
u32 fm_vaddr = vaddr;
u32 fm_hostoffset = HostMemoryMap::EEmemOffset + offsetof(EEVM_MemoryAllocMess, Scratch);
PageProtectionMode mode = PageProtectionMode().Read().Write();
for (u32 i = 0; i < (Ps2MemSize::Scratch / VTLB_PAGE_SIZE); i++, fm_vaddr += VTLB_PAGE_SIZE, fm_hostoffset += VTLB_PAGE_SIZE)
vtlb_CreateFastmemMapping(fm_vaddr, fm_hostoffset, mode);
}
else
{
vtlb_RemoveFastmemMappings(vaddr, size);
}
}
uptr bu8 = (uptr)buffer; uptr bu8 = (uptr)buffer;
while (size > 0) while (size > 0)
{ {
@ -711,6 +1116,8 @@ void vtlb_VMapUnmap(u32 vaddr,u32 size)
verify(0==(vaddr&VTLB_PAGE_MASK)); verify(0==(vaddr&VTLB_PAGE_MASK));
verify(0==(size&VTLB_PAGE_MASK) && size>0); verify(0==(size&VTLB_PAGE_MASK) && size>0);
vtlb_RemoveFastmemMappings(vaddr, size);
while (size > 0) while (size > 0)
{ {
@ -775,11 +1182,45 @@ void vtlb_Init()
// This function should probably be part of the COP0 rather than here in VTLB. // This function should probably be part of the COP0 rather than here in VTLB.
void vtlb_Reset() void vtlb_Reset()
{ {
vtlb_RemoveFastmemMappings();
for(int i=0; i<48; i++) UnmapTLB(i); for(int i=0; i<48; i++) UnmapTLB(i);
} }
void vtlb_Shutdown() void vtlb_Shutdown()
{ {
vtlb_RemoveFastmemMappings();
s_fastmem_backpatch_info.clear();
s_fastmem_faulting_pcs.clear();
}
void vtlb_ResetFastmem()
{
DevCon.WriteLn("Resetting fastmem mappings...");
vtlb_RemoveFastmemMappings();
s_fastmem_backpatch_info.clear();
s_fastmem_faulting_pcs.clear();
if (!CHECK_FASTMEM || !CHECK_EEREC || !vtlbdata.vmap)
return;
// we need to go through and look at the vtlb pointers, to remap the host area
for (size_t i = 0; i < VTLB_VMAP_ITEMS; i++)
{
const VTLBVirtual& vm = vtlbdata.vmap[i];
const u32 vaddr = static_cast<u32>(i) << VTLB_PAGE_BITS;
if (vm.isHandler(vaddr))
{
// Handlers should be unmapped.
continue;
}
// Check if it's a physical mapping to our main memory area.
u32 mainmem_offset, mainmem_size;
PageProtectionMode prot;
if (vtlb_GetMainMemoryOffsetFromPtr(vm.assumePtr(vaddr), &mainmem_offset, &mainmem_size, &prot))
vtlb_CreateFastmemMapping(vaddr, mainmem_offset, prot);
}
} }
static constexpr size_t VMAP_SIZE = sizeof(VTLBVirtual) * VTLB_VMAP_ITEMS; static constexpr size_t VMAP_SIZE = sizeof(VTLBVirtual) * VTLB_VMAP_ITEMS;
@ -804,6 +1245,19 @@ void vtlb_Core_Alloc()
HostSys::MemProtect(vmap, VMAP_SIZE, PageProtectionMode().Read().Write()); HostSys::MemProtect(vmap, VMAP_SIZE, PageProtectionMode().Read().Write());
vtlbdata.vmap = vmap; vtlbdata.vmap = vmap;
} }
if (!vtlbdata.fastmem_base)
{
pxAssert(!s_fastmem_area);
s_fastmem_area = SharedMemoryMappingArea::Create(FASTMEM_AREA_SIZE);
if (!s_fastmem_area)
pxFailRel("Failed to allocate fastmem area");
s_fastmem_virtual_mapping.resize(FASTMEM_PAGE_COUNT, NO_FASTMEM_MAPPING);
vtlbdata.fastmem_base = (uptr)s_fastmem_area->BasePointer();
Console.WriteLn(Color_StrongGreen, "Fastmem area: %p - %p",
vtlbdata.fastmem_base, vtlbdata.fastmem_base + (FASTMEM_AREA_SIZE - 1));
}
} }
static constexpr size_t PPMAP_SIZE = sizeof(*vtlbdata.ppmap) * VTLB_VMAP_ITEMS; static constexpr size_t PPMAP_SIZE = sizeof(*vtlbdata.ppmap) * VTLB_VMAP_ITEMS;
@ -840,6 +1294,14 @@ void vtlb_Core_Free()
HostSys::MemProtect(vtlbdata.ppmap, PPMAP_SIZE, PageProtectionMode()); HostSys::MemProtect(vtlbdata.ppmap, PPMAP_SIZE, PageProtectionMode());
vtlbdata.ppmap = nullptr; vtlbdata.ppmap = nullptr;
} }
vtlb_RemoveFastmemMappings();
vtlb_ClearLoadStoreInfo();
vtlbdata.fastmem_base = 0;
decltype(s_fastmem_physical_mapping)().swap(s_fastmem_physical_mapping);
decltype(s_fastmem_virtual_mapping)().swap(s_fastmem_virtual_mapping);
s_fastmem_area.reset();
} }
static std::string GetHostVmErrorMsg() static std::string GetHostVmErrorMsg()

View File

@ -57,6 +57,7 @@ extern void vtlb_Alloc_Ppmap();
extern void vtlb_Init(); extern void vtlb_Init();
extern void vtlb_Shutdown(); extern void vtlb_Shutdown();
extern void vtlb_Reset(); extern void vtlb_Reset();
extern void vtlb_ResetFastmem();
extern vtlbHandler vtlb_NewHandler(); extern vtlbHandler vtlb_NewHandler();
@ -82,6 +83,15 @@ extern void vtlb_DynV2P();
extern void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz); extern void vtlb_VMap(u32 vaddr,u32 paddr,u32 sz);
extern void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz); extern void vtlb_VMapBuffer(u32 vaddr,void* buffer,u32 sz);
extern void vtlb_VMapUnmap(u32 vaddr,u32 sz); extern void vtlb_VMapUnmap(u32 vaddr,u32 sz);
extern bool vtlb_ResolveFastmemMapping(uptr* addr);
extern bool vtlb_GetGuestAddress(uptr host_addr, u32* guest_addr);
extern void vtlb_UpdateFastmemProtection(u32 paddr, u32 size, const PageProtectionMode& prot);
extern bool vtlb_BackpatchLoadStore(uptr code_address, uptr fault_address);
extern void vtlb_ClearLoadStoreInfo();
extern void vtlb_AddLoadStoreInfo(uptr code_address, u32 code_size, u32 guest_pc, u32 gpr_bitmask, u32 fpr_bitmask, u8 address_register, u8 data_register, u8 size_in_bits, bool is_signed, bool is_load, bool is_fpr);
extern void vtlb_DynBackpatchLoadStore(uptr code_address, u32 code_size, u32 guest_pc, u32 guest_addr, u32 gpr_bitmask, u32 fpr_bitmask, u8 address_register, u8 data_register, u8 size_in_bits, bool is_signed, bool is_load, bool is_fpr);
extern bool vtlb_IsFaultingPC(u32 guest_pc);
//Memory functions //Memory functions
@ -101,13 +111,14 @@ extern DataType vtlb_ramRead(u32 mem);
template <typename DataType> template <typename DataType>
extern bool vtlb_ramWrite(u32 mem, const DataType& value); extern bool vtlb_ramWrite(u32 mem, const DataType& value);
extern void vtlb_DynGenWrite(u32 sz); using vtlb_ReadRegAllocCallback = int(*)();
extern void vtlb_DynGenReadNonQuad(u32 bits, bool sign); extern int vtlb_DynGenReadNonQuad(u32 bits, bool sign, bool xmm, int addr_reg, vtlb_ReadRegAllocCallback dest_reg_alloc = nullptr);
extern int vtlb_DynGenReadQuad(u32 sz, int gpr); extern int vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, bool xmm, u32 addr_const, vtlb_ReadRegAllocCallback dest_reg_alloc = nullptr);
extern int vtlb_DynGenReadQuad(u32 bits, int addr_reg, vtlb_ReadRegAllocCallback dest_reg_alloc = nullptr);
extern int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, vtlb_ReadRegAllocCallback dest_reg_alloc = nullptr);
extern void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ); extern void vtlb_DynGenWrite(u32 sz, bool xmm, int addr_reg, int value_reg);
extern int vtlb_DynGenReadQuad_Const( u32 bits, u32 addr_const, int gpr ); extern void vtlb_DynGenWrite_Const(u32 bits, bool xmm, u32 addr_const, int value_reg);
extern void vtlb_DynGenReadNonQuad_Const( u32 bits, bool sign, u32 addr_const );
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// VtlbMemoryReserve // VtlbMemoryReserve
@ -125,7 +136,7 @@ public:
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// eeMemoryReserve // eeMemoryReserve
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
class eeMemoryReserve : private VtlbMemoryReserve class eeMemoryReserve : public VtlbMemoryReserve
{ {
typedef VtlbMemoryReserve _parent; typedef VtlbMemoryReserve _parent;
@ -142,7 +153,7 @@ public:
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// iopMemoryReserve // iopMemoryReserve
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
class iopMemoryReserve : private VtlbMemoryReserve class iopMemoryReserve : public VtlbMemoryReserve
{ {
typedef VtlbMemoryReserve _parent; typedef VtlbMemoryReserve _parent;
@ -159,7 +170,7 @@ public:
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// vuMemoryReserve // vuMemoryReserve
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
class vuMemoryReserve : private VtlbMemoryReserve class vuMemoryReserve : public VtlbMemoryReserve
{ {
typedef VtlbMemoryReserve _parent; typedef VtlbMemoryReserve _parent;
@ -253,10 +264,13 @@ namespace vtlb_private
u32* ppmap; //4MB (allocated by vtlb_init) // PS2 virtual to PS2 physical u32* ppmap; //4MB (allocated by vtlb_init) // PS2 virtual to PS2 physical
uptr fastmem_base;
MapData() MapData()
{ {
vmap = NULL; vmap = NULL;
ppmap = NULL; ppmap = NULL;
fastmem_base = 0;
} }
}; };

View File

@ -44,7 +44,7 @@ namespace COP0 {
// this should be a conditional Jump -- JZ or JNZ normally. // this should be a conditional Jump -- JZ or JNZ normally.
static void _setupBranchTest() static void _setupBranchTest()
{ {
_eeFlushAllUnused(); _eeFlushAllDirty();
// COP0 branch conditionals are based on the following equation: // COP0 branch conditionals are based on the following equation:
// (((psHu16(DMAC_STAT) | ~psHu16(DMAC_PCR)) & 0x3ff) == 0x3ff) // (((psHu16(DMAC_STAT) | ~psHu16(DMAC_PCR)) & 0x3ff) == 0x3ff)
@ -64,26 +64,32 @@ static void _setupBranchTest()
void recBC0F() void recBC0F()
{ {
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = TrySwapDelaySlot(0, 0, 0);
_setupBranchTest(); _setupBranchTest();
recDoBranchImm(JE32(0)); recDoBranchImm(branchTo, JE32(0), false, swap);
} }
void recBC0T() void recBC0T()
{ {
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = TrySwapDelaySlot(0, 0, 0);
_setupBranchTest(); _setupBranchTest();
recDoBranchImm(JNE32(0)); recDoBranchImm(branchTo, JNE32(0), false, swap);
} }
void recBC0FL() void recBC0FL()
{ {
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
_setupBranchTest(); _setupBranchTest();
recDoBranchImm_Likely(JE32(0)); recDoBranchImm(branchTo, JE32(0), true, false);
} }
void recBC0TL() void recBC0TL()
{ {
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
_setupBranchTest(); _setupBranchTest();
recDoBranchImm_Likely(JNE32(0)); recDoBranchImm(branchTo, JNE32(0), true, false);
} }
void recTLBR() { recCall(Interp::TLBR); } void recTLBR() { recCall(Interp::TLBR); }
@ -118,7 +124,7 @@ void recDI()
// Jak X, Namco 50th anniversary, Spongebob the Movie, Spongebob Battle for Bikini Bottom, // Jak X, Namco 50th anniversary, Spongebob the Movie, Spongebob Battle for Bikini Bottom,
// The Incredibles, The Incredibles rize of the underminer, Soukou kihei armodyne, Garfield Saving Arlene, Tales of Fandom Vol. 2. // The Incredibles, The Incredibles rize of the underminer, Soukou kihei armodyne, Garfield Saving Arlene, Tales of Fandom Vol. 2.
if (!g_recompilingDelaySlot) if (!g_recompilingDelaySlot)
recompileNextInstruction(0); // DI execution is delayed by one instruction recompileNextInstruction(false, false); // DI execution is delayed by one instruction
xMOV(eax, ptr[&cpuRegs.CP0.n.Status]); xMOV(eax, ptr[&cpuRegs.CP0.n.Status]);
xTEST(eax, 0x20006); // EXL | ERL | EDI xTEST(eax, 0x20006); // EXL | ERL | EDI
@ -152,13 +158,12 @@ void recMFC0()
x86SetJ8(skipInc); x86SetJ8(skipInc);
xADD(ptr[&cpuRegs.CP0.n.Count], eax); xADD(ptr[&cpuRegs.CP0.n.Count], eax);
xMOV(ptr[&cpuRegs.lastCOP0Cycle], ecx); xMOV(ptr[&cpuRegs.lastCOP0Cycle], ecx);
xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]);
if (!_Rt_) if (!_Rt_)
return; return;
_deleteEEreg(_Rt_, 0); const int regt = _Rt_ ? _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE) : -1;
eeSignExtendTo(_Rt_); xMOVSX(xRegister64(regt), ptr32[&cpuRegs.CP0.r[_Rd_]]);
return; return;
} }
@ -169,22 +174,25 @@ void recMFC0()
{ {
if (0 == (_Imm_ & 1)) // MFPS, register value ignored if (0 == (_Imm_ & 1)) // MFPS, register value ignored
{ {
xMOV(eax, ptr[&cpuRegs.PERF.n.pccr]); const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
xMOVSX(xRegister64(regt), ptr32[&cpuRegs.PERF.n.pccr]);
} }
else if (0 == (_Imm_ & 2)) // MFPC 0, only LSB of register matters else if (0 == (_Imm_ & 2)) // MFPC 0, only LSB of register matters
{ {
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
xFastCall((void*)COP0_UpdatePCCR); xFastCall((void*)COP0_UpdatePCCR);
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr0]);
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
xMOVSX(xRegister64(regt), ptr32[&cpuRegs.PERF.n.pcr0]);
} }
else // MFPC 1 else // MFPC 1
{ {
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
xFastCall((void*)COP0_UpdatePCCR); xFastCall((void*)COP0_UpdatePCCR);
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
xMOVSX(xRegister64(regt), ptr32[&cpuRegs.PERF.n.pcr1]);
} }
_deleteEEreg(_Rt_, 0);
eeSignExtendTo(_Rt_);
return; return;
} }
@ -193,10 +201,9 @@ void recMFC0()
COP0_LOG("MFC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF); COP0_LOG("MFC0 Breakpoint debug Registers code = %x\n", cpuRegs.code & 0x3FF);
return; return;
} }
_eeOnWriteReg(_Rt_, 1);
_deleteEEreg(_Rt_, 0); const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]); xMOVSX(xRegister64(regt), ptr32[&cpuRegs.CP0.r[_Rd_]]);
eeSignExtendTo(_Rt_);
} }
void recMTC0() void recMTC0()
@ -260,15 +267,15 @@ void recMTC0()
switch (_Rd_) switch (_Rd_)
{ {
case 12: case 12:
_eeMoveGPRtoR(arg1reg, _Rt_);
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
_eeMoveGPRtoR(ecx, _Rt_); xFastCall((void*)WriteCP0Status);
xFastCall((void*)WriteCP0Status, ecx);
break; break;
case 16: case 16:
_eeMoveGPRtoR(arg1reg, _Rt_);
iFlushCall(FLUSH_INTERPRETER); iFlushCall(FLUSH_INTERPRETER);
_eeMoveGPRtoR(ecx, _Rt_); xFastCall((void*)WriteCP0Config);
xFastCall((void*)WriteCP0Config, ecx);
break; break;
case 9: case 9:

File diff suppressed because it is too large Load Diff

View File

@ -22,86 +22,72 @@
// Namespace Note : iCore32 contains all of the Register Allocation logic, in addition to a handful // Namespace Note : iCore32 contains all of the Register Allocation logic, in addition to a handful
// of utility functions for emitting frequent code. // of utility functions for emitting frequent code.
//#define RALOG(...) fprintf(stderr, __VA_ARGS__)
#define RALOG(...)
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Shared Register allocation flags (apply to X86, XMM, MMX, etc). // Shared Register allocation flags (apply to X86, XMM, MMX, etc).
#define MODE_READ 1 #define MODE_READ 1
#define MODE_WRITE 2 #define MODE_WRITE 2
#define MODE_READHALF 4 // read only low 64 bits #define MODE_CALLEESAVED 0x20 // can't flush reg to mem
#define MODE_VUXY 8 // vector only has xy valid (real zw are in mem), not the same as MODE_READHALF
#define MODE_VUZ 0x10 // z only doesn't work for now
#define MODE_VUXYZ (MODE_VUZ | MODE_VUXY) // vector only has xyz valid (real w is in memory)
#define MODE_NOFLUSH 0x20 // can't flush reg to mem
#define MODE_NOFRAME 0x40 // when allocating x86regs, don't use ebp reg
#define MODE_8BITREG 0x80 // when allocating x86regs, use only eax, ecx, edx, and ebx
#define PROCESS_EE_XMM 0x02 #define PROCESS_EE_XMM 0x02
// currently only used in FPU
#define PROCESS_EE_S 0x04 // S is valid, otherwise take from mem #define PROCESS_EE_S 0x04 // S is valid, otherwise take from mem
#define PROCESS_EE_T 0x08 // T is valid, otherwise take from mem #define PROCESS_EE_T 0x08 // T is valid, otherwise take from mem
#define PROCESS_EE_D 0x10 // D is valid, otherwise take from mem
// not used in VU recs
#define PROCESS_EE_MODEWRITES 0x10 // if s is a reg, set if not in cpuRegs
#define PROCESS_EE_MODEWRITET 0x20 // if t is a reg, set if not in cpuRegs
#define PROCESS_EE_LO 0x40 // lo reg is valid #define PROCESS_EE_LO 0x40 // lo reg is valid
#define PROCESS_EE_HI 0x80 // hi reg is valid #define PROCESS_EE_HI 0x80 // hi reg is valid
#define PROCESS_EE_ACC 0x40 // acc reg is valid #define PROCESS_EE_ACC 0x40 // acc reg is valid
// used in VU recs
#define PROCESS_VU_UPDATEFLAGS 0x10
#define PROCESS_VU_COP2 0x80 // simple cop2
#define EEREC_S (((info) >> 8) & 0xf) #define EEREC_S (((info) >> 8) & 0xf)
#define EEREC_T (((info) >> 12) & 0xf) #define EEREC_T (((info) >> 12) & 0xf)
#define EEREC_D (((info) >> 16) & 0xf) #define EEREC_D (((info) >> 16) & 0xf)
#define EEREC_LO (((info) >> 20) & 0xf) #define EEREC_LO (((info) >> 20) & 0xf)
#define EEREC_HI (((info) >> 24) & 0xf) #define EEREC_HI (((info) >> 24) & 0xf)
#define EEREC_ACC (((info) >> 20) & 0xf) #define EEREC_ACC (((info) >> 20) & 0xf)
#define EEREC_TEMP (((info) >> 24) & 0xf)
#define VUREC_FMAC ((info)&0x80000000)
#define PROCESS_EE_SET_S(reg) ((reg) << 8) #define PROCESS_EE_SET_S(reg) (((reg) << 8) | PROCESS_EE_S)
#define PROCESS_EE_SET_T(reg) ((reg) << 12) #define PROCESS_EE_SET_T(reg) (((reg) << 12) | PROCESS_EE_T)
#define PROCESS_EE_SET_D(reg) ((reg) << 16) #define PROCESS_EE_SET_D(reg) (((reg) << 16) | PROCESS_EE_D)
#define PROCESS_EE_SET_LO(reg) ((reg) << 20) #define PROCESS_EE_SET_LO(reg) (((reg) << 20) | PROCESS_EE_LO)
#define PROCESS_EE_SET_HI(reg) ((reg) << 24) #define PROCESS_EE_SET_HI(reg) (((reg) << 24) | PROCESS_EE_HI)
#define PROCESS_EE_SET_ACC(reg) ((reg) << 20) #define PROCESS_EE_SET_ACC(reg) (((reg) << 20) | PROCESS_EE_ACC)
#define PROCESS_VU_SET_ACC(reg) PROCESS_EE_SET_ACC(reg)
#define PROCESS_VU_SET_TEMP(reg) ((reg) << 24)
#define PROCESS_VU_SET_FMAC() 0x80000000
// special info not related to above flags // special info not related to above flags
#define PROCESS_CONSTS 1 #define PROCESS_CONSTS 1
#define PROCESS_CONSTT 2 #define PROCESS_CONSTT 2
// XMM caching helpers
#define XMMINFO_READLO 0x001
#define XMMINFO_READHI 0x002
#define XMMINFO_WRITELO 0x004
#define XMMINFO_WRITEHI 0x008
#define XMMINFO_WRITED 0x010
#define XMMINFO_READD 0x020
#define XMMINFO_READS 0x040
#define XMMINFO_READT 0x080
#define XMMINFO_READACC 0x200
#define XMMINFO_WRITEACC 0x400
#define XMMINFO_WRITET 0x800
#define XMMINFO_64BITOP 0x1000
#define XMMINFO_FORCEREGS 0x2000
#define XMMINFO_FORCEREGT 0x4000
#define XMMINFO_NORENAME 0x8000 // disables renaming of Rs to Rt in Rt = Rs op imm
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// X86 (32-bit) Register Allocation Tools // X86 (32-bit) Register Allocation Tools
#define X86TYPE_TEMP 0 #define X86TYPE_TEMP 0
#define X86TYPE_GPR 1 #define X86TYPE_GPR 1
#define X86TYPE_VI 2 #define X86TYPE_FPRC 2
#define X86TYPE_MEMOFFSET 3 #define X86TYPE_VIREG 3
#define X86TYPE_VIMEMOFFSET 4 #define X86TYPE_PCWRITEBACK 4
#define X86TYPE_VUQREAD 5 #define X86TYPE_PSX 5
#define X86TYPE_VUPREAD 6 #define X86TYPE_PSX_PCWRITEBACK 6
#define X86TYPE_VUQWRITE 7
#define X86TYPE_VUPWRITE 8
#define X86TYPE_PSX 9
#define X86TYPE_PCWRITEBACK 10
#define X86TYPE_PSX_PCWRITEBACK 12
#define X86TYPE_VITEMP 13
#define X86TYPE_FNARG 14 // function parameter, max is 4
#define X86TYPE_VU1 0x80
//#define X86_ISVI(type) ((type&~X86TYPE_VU1) == X86TYPE_VI)
static __fi int X86_ISVI(int type)
{
return ((type & ~X86TYPE_VU1) == X86TYPE_VI);
}
struct _x86regs struct _x86regs
{ {
@ -116,79 +102,83 @@ struct _x86regs
extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR]; extern _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
uptr _x86GetAddr(int type, int reg); bool _isAllocatableX86reg(int x86reg);
void _initX86regs(); void _initX86regs();
int _getFreeX86reg(int mode); int _getFreeX86reg(int mode);
int _allocX86reg(x86Emitter::xRegister32 x86reg, int type, int reg, int mode); int _allocX86reg(int type, int reg, int mode);
void _deleteX86reg(int type, int reg, int flush);
int _checkX86reg(int type, int reg, int mode); int _checkX86reg(int type, int reg, int mode);
bool _hasX86reg(int type, int reg, int required_mode = 0);
void _addNeededX86reg(int type, int reg); void _addNeededX86reg(int type, int reg);
void _clearNeededX86regs(); void _clearNeededX86regs();
void _freeX86reg(const x86Emitter::xRegister32& x86reg); void _freeX86reg(const x86Emitter::xRegister32& x86reg);
void _freeX86reg(int x86reg); void _freeX86reg(int x86reg);
void _freeX86regWithoutWriteback(int x86reg);
void _freeX86regs(); void _freeX86regs();
void _flushCachedRegs(); void _flushX86regs();
void _flushConstRegs(); void _flushConstRegs();
void _flushConstReg(int reg); void _flushConstReg(int reg);
void _validateRegs();
void _writebackX86Reg(int x86reg);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// XMM (128-bit) Register Allocation Tools // XMM (128-bit) Register Allocation Tools
#define XMM_CONV_VU(VU) (VU == &VU1)
#define XMMTYPE_TEMP 0 // has to be 0 #define XMMTYPE_TEMP 0 // has to be 0
#define XMMTYPE_VFREG 1 #define XMMTYPE_GPRREG X86TYPE_GPR
#define XMMTYPE_ACC 2 #define XMMTYPE_FPREG 6
#define XMMTYPE_FPREG 3 #define XMMTYPE_FPACC 7
#define XMMTYPE_FPACC 4 #define XMMTYPE_VFREG 8
#define XMMTYPE_GPRREG 5
// lo and hi regs // lo and hi regs
#define XMMGPR_LO 33 #define XMMGPR_LO 33
#define XMMGPR_HI 32 #define XMMGPR_HI 32
#define XMMFPU_ACC 32 #define XMMFPU_ACC 32
enum : int
{
DELETE_REG_FREE = 0,
DELETE_REG_FLUSH = 1,
DELETE_REG_FLUSH_AND_FREE = 2,
DELETE_REG_FREE_NO_WRITEBACK = 3
};
struct _xmmregs struct _xmmregs
{ {
u8 inuse; u8 inuse;
u8 reg; s8 reg;
u8 type; u8 type;
u8 mode; u8 mode;
u8 needed; u8 needed;
u8 VU; // 0 = VU0, 1 = VU1
u16 counter; u16 counter;
}; };
void _cop2BackupRegs();
void _cop2RestoreRegs();
void _initXMMregs(); void _initXMMregs();
int _getFreeXMMreg(); int _getFreeXMMreg(u32 maxreg = iREGCNT_XMM);
int _allocTempXMMreg(XMMSSEType type, int xmmreg); int _allocTempXMMreg(XMMSSEType type);
int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode); int _allocFPtoXMMreg(int fpreg, int mode);
int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode); int _allocGPRtoXMMreg(int gprreg, int mode);
int _allocFPACCtoXMMreg(int xmmreg, int mode); int _allocFPACCtoXMMreg(int mode);
void _reallocateXMMreg(int xmmreg, int newtype, int newreg, int newmode, bool writeback = true);
int _checkXMMreg(int type, int reg, int mode); int _checkXMMreg(int type, int reg, int mode);
bool _hasXMMreg(int type, int reg, int required_mode = 0);
void _addNeededFPtoXMMreg(int fpreg); void _addNeededFPtoXMMreg(int fpreg);
void _addNeededFPACCtoXMMreg(); void _addNeededFPACCtoXMMreg();
void _addNeededGPRtoX86reg(int gprreg);
void _addNeededPSXtoX86reg(int gprreg);
void _addNeededGPRtoXMMreg(int gprreg); void _addNeededGPRtoXMMreg(int gprreg);
void _clearNeededXMMregs(); void _clearNeededXMMregs();
//void _deleteACCtoXMMreg(int vu, int flush); void _deleteGPRtoX86reg(int reg, int flush);
void _deletePSXtoX86reg(int reg, int flush);
void _deleteGPRtoXMMreg(int reg, int flush); void _deleteGPRtoXMMreg(int reg, int flush);
void _deleteFPtoXMMreg(int reg, int flush); void _deleteFPtoXMMreg(int reg, int flush);
void _freeXMMreg(u32 xmmreg); void _freeXMMreg(int xmmreg);
void _clearNeededCOP2Regs(); void _freeXMMregWithoutWriteback(int xmmreg);
u16 _freeXMMregsCOP2(); void _writebackXMMreg(int xmmreg);
//void _moveXMMreg(int xmmreg); // instead of freeing, moves it to a diff location int _allocVFtoXMMreg(int vfreg, int mode);
void mVUFreeCOP2XMMreg(int hostreg);
void _flushCOP2regs();
void _flushXMMreg(int xmmreg);
void _flushXMMregs(); void _flushXMMregs();
u8 _hasFreeXMMreg();
void _freeXMMregs();
int _getNumXMMwrite();
void _signExtendSFtoM(uptr mem);
// returns new index of reg, lower 32 bits already in mmx
// shift is used when the data is in the top bits of the mmx reg to begin with
// a negative shift is for sign extension
int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns true if reg destroyed
////////////////////// //////////////////////
// Instruction Info // // Instruction Info //
@ -205,54 +195,99 @@ int _signExtendXMMtoM(uptr to, x86SSERegType from, int candestroy); // returns t
// 3/ EEINST_LIVE* is cleared when register is written. And set again when register is read. // 3/ EEINST_LIVE* is cleared when register is written. And set again when register is read.
// My guess: the purpose is to detect the usage hole in the flow // My guess: the purpose is to detect the usage hole in the flow
#define EEINST_LIVE0 1 // if var is ever used (read or write) #define EEINST_LIVE 1 // if var is ever used (read or write)
#define EEINST_LIVE2 4 // if cur var's next 64 bits are needed
#define EEINST_LASTUSE 8 // if var isn't written/read anymore #define EEINST_LASTUSE 8 // if var isn't written/read anymore
//#define EEINST_MMX 0x10 // removed
#define EEINST_XMM 0x20 // var will be used in xmm ops #define EEINST_XMM 0x20 // var will be used in xmm ops
#define EEINST_USED 0x40 #define EEINST_USED 0x40
#define EEINSTINFO_COP1 1
#define EEINSTINFO_COP2 2
#define EEINST_COP2_DENORMALIZE_STATUS_FLAG 0x100 #define EEINST_COP2_DENORMALIZE_STATUS_FLAG 0x100
#define EEINST_COP2_NORMALIZE_STATUS_FLAG 0x200 #define EEINST_COP2_NORMALIZE_STATUS_FLAG 0x200
#define EEINST_COP2_STATUS_FLAG 0x400 #define EEINST_COP2_STATUS_FLAG 0x400
#define EEINST_COP2_MAC_FLAG 0x800 #define EEINST_COP2_MAC_FLAG 0x800
#define EEINST_COP2_CLIP_FLAG 0x1000 #define EEINST_COP2_CLIP_FLAG 0x1000
#define EEINST_COP2_FINISH_VU0_MICRO 0x2000 #define EEINST_COP2_SYNC_VU0 0x2000
#define EEINST_COP2_FINISH_VU0 0x4000
#define EEINST_COP2_FLUSH_VU0_REGISTERS 0x8000
struct EEINST struct EEINST
{ {
u16 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM u16 info; // extra info, if 1 inst is COP1, 2 inst is COP2. Also uses EEINST_XMM
u8 regs[34]; // includes HI/LO (HI=32, LO=33) u8 regs[34]; // includes HI/LO (HI=32, LO=33)
u8 fpuregs[33]; // ACC=32 u8 fpuregs[33]; // ACC=32
u8 vfregs[33]; // ACC=32
u8 viregs[16];
// uses XMMTYPE_ flags; if type == XMMTYPE_TEMP, not used // uses XMMTYPE_ flags; if type == XMMTYPE_TEMP, not used
u8 writeType[3], writeReg[3]; // reg written in this inst, 0 if no reg u8 writeType[3], writeReg[3]; // reg written in this inst, 0 if no reg
u8 readType[4], readReg[4]; u8 readType[4], readReg[4];
// valid if info & EEINSTINFO_COP2
int cycle; // cycle of inst (at offset from block)
_VURegsNum vuregs;
}; };
extern EEINST* g_pCurInstInfo; // info for the cur instruction extern EEINST* g_pCurInstInfo; // info for the cur instruction
extern void _recClearInst(EEINST* pinst); extern void _recClearInst(EEINST* pinst);
// returns the number of insts + 1 until written (0 if not written) // returns the number of insts + 1 until written (0 if not written)
extern u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern u32 _recIsRegReadOrWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg);
// returns the number of insts + 1 until used (0 if not used)
//extern u32 _recIsRegUsed(EEINST* pinst, int size, u8 xmmtype, u8 reg);
extern void _recFillRegister(EEINST& pinst, int type, int reg, int write); extern void _recFillRegister(EEINST& pinst, int type, int reg, int write);
static __fi bool EEINST_ISLIVE64(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0)); } // If unset, values which are not live will not be written back to memory.
static __fi bool EEINST_ISLIVEXMM(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0 | EEINST_LIVE2)); } // Tends to break stuff at the moment.
static __fi bool EEINST_ISLIVE2(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } #define EE_WRITE_DEAD_VALUES 1
static __fi bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } /// Returns true if the register is used later in the block, and this isn't the last instruction to use it.
/// In other words, the register is worth keeping in a host register/caching it.
static __fi bool EEINST_USEDTEST(u32 reg)
{
return (g_pCurInstInfo->regs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
}
/// Returns true if the register is used later in the block as an XMM/128-bit value.
static __fi bool EEINST_XMMUSEDTEST(u32 reg)
{
return (g_pCurInstInfo->regs[reg] & (EEINST_USED | EEINST_XMM | EEINST_LASTUSE)) == (EEINST_USED | EEINST_XMM);
}
/// Returns true if the specified VF register is used later in the block.
static __fi bool COP2INST_USEDTEST(u32 reg)
{
return (g_pCurInstInfo->vfregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
}
/// Returns true if the value should be computed/written back.
/// Basically, this means it's either used before it's overwritten, or not overwritten by the end of the block.
static __fi bool EEINST_LIVETEST(u32 reg)
{
return EE_WRITE_DEAD_VALUES || ((g_pCurInstInfo->regs[reg] & EEINST_LIVE) != 0);
}
/// Returns true if the register can be renamed into another.
static __fi bool EEINST_RENAMETEST(u32 reg)
{
return (reg == 0 || !EEINST_USEDTEST(reg) || !EEINST_LIVETEST(reg));
}
static __fi bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE); }
static __fi bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } static __fi bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); }
/// Returns true if the register is used later in the block, and this isn't the last instruction to use it.
/// In other words, the register is worth keeping in a host register/caching it.
static __fi bool FPUINST_USEDTEST(u32 reg)
{
return (g_pCurInstInfo->fpuregs[reg] & (EEINST_USED | EEINST_LASTUSE)) == EEINST_USED;
}
/// Returns true if the value should be computed/written back.
static __fi bool FPUINST_LIVETEST(u32 reg)
{
return EE_WRITE_DEAD_VALUES || FPUINST_ISLIVE(reg);
}
/// Returns true if the register can be renamed into another.
static __fi bool FPUINST_RENAMETEST(u32 reg)
{
return (!EEINST_USEDTEST(reg) || !EEINST_LIVETEST(reg));
}
extern _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM]; extern _xmmregs xmmregs[iREGCNT_XMM], s_saveXMMregs[iREGCNT_XMM];
extern thread_local u8* j8Ptr[32]; // depreciated item. use local u8* vars instead. extern thread_local u8* j8Ptr[32]; // depreciated item. use local u8* vars instead.
@ -261,47 +296,32 @@ extern thread_local u32* j32Ptr[32]; // depreciated item. use local u32* vars i
extern u16 g_x86AllocCounter; extern u16 g_x86AllocCounter;
extern u16 g_xmmAllocCounter; extern u16 g_xmmAllocCounter;
// allocates only if later insts use XMM, otherwise checks
int _allocCheckGPRtoXMM(EEINST* pinst, int gprreg, int mode);
int _allocCheckFPUtoXMM(EEINST* pinst, int fpureg, int mode);
// allocates only if later insts use this register // allocates only if later insts use this register
int _allocCheckGPRtoX86(EEINST* pinst, int gprreg, int mode); int _allocIfUsedGPRtoX86(int gprreg, int mode);
int _allocIfUsedGPRtoXMM(int gprreg, int mode);
int _allocIfUsedFPUtoXMM(int fpureg, int mode);
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// iFlushCall / _psxFlushCall Parameters // iFlushCall / _psxFlushCall Parameters
// Flushing vs. Freeing, as understood by Air (I could be wrong still....) #define FLUSH_NONE 0x000 // frees caller saved registers
#define FLUSH_CONSTANT_REGS 0x001
// "Freeing" registers means that the contents of the registers are flushed to memory.
// This is good for any sort of C code function that plans to modify the actual
// registers. When the Recs resume, they'll reload the registers with values saved
// as needed. (similar to a "FreezeXMMRegs")
// "Flushing" means that in addition to the standard free (which is actually a flush)
// the register allocations are additionally wiped. This should only be necessary if
// the code being called is going to modify register allocations -- ie, be doing
// some kind of recompiling of its own.
#define FLUSH_CACHED_REGS 0x001
#define FLUSH_FLUSH_XMM 0x002 #define FLUSH_FLUSH_XMM 0x002
#define FLUSH_FREE_XMM 0x004 // both flushes and frees #define FLUSH_FREE_XMM 0x004 // both flushes and frees
#define FLUSH_FLUSH_ALLX86 0x020 // flush x86 #define FLUSH_ALL_X86 0x020 // flush x86
#define FLUSH_FREE_TEMPX86 0x040 // flush and free temporary x86 regs #define FLUSH_FREE_TEMP_X86 0x040 // flush and free temporary x86 regs
#define FLUSH_FREE_ALLX86 0x080 // free all x86 regs #define FLUSH_FREE_NONTEMP_X86 0x080 // free all x86 regs, except temporary
#define FLUSH_FREE_VU0 0x100 // free all vu0 related regs #define FLUSH_FREE_VU0 0x100 // free all vu0 related regs
#define FLUSH_PC 0x200 // program counter #define FLUSH_PC 0x200 // program counter
#define FLUSH_CAUSE 0x000 // disabled for now: cause register, only the branch delay bit //#define FLUSH_CAUSE 0x000 // disabled for now: cause register, only the branch delay bit
#define FLUSH_CODE 0x800 // opcode for interpreter #define FLUSH_CODE 0x800 // opcode for interpreter
#define FLUSH_EVERYTHING 0x1ff #define FLUSH_EVERYTHING 0x1ff
//#define FLUSH_EXCEPTION 0x1ff // will probably do this totally differently actually //#define FLUSH_EXCEPTION 0x1ff // will probably do this totally differently actually
#define FLUSH_INTERPRETER 0xfff #define FLUSH_INTERPRETER 0xfff
#define FLUSH_FULLVTLB FLUSH_NOCONST #define FLUSH_FULLVTLB 0x000
// no freeing, used when callee won't destroy xmm regs // no freeing, used when callee won't destroy xmm regs
#define FLUSH_NODESTROY (FLUSH_CACHED_REGS | FLUSH_FLUSH_XMM | FLUSH_FLUSH_ALLX86) #define FLUSH_NODESTROY (FLUSH_CONSTANT_REGS | FLUSH_FLUSH_XMM | FLUSH_ALL_X86)
// used when regs aren't going to be changed be callee
#define FLUSH_NOCONST (FLUSH_FREE_XMM | FLUSH_FREE_TEMPX86)
#endif #endif

View File

@ -126,23 +126,18 @@ void recCFC1(void)
return; return;
EE::Profiler.EmitOp(eeOpcode::CFC1); EE::Profiler.EmitOp(eeOpcode::CFC1);
_eeOnWriteReg(_Rt_, 1); const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
if (_Fs_ >= 16)
xMOV(eax, ptr[&fpuRegs.fprc[31]]);
else
xMOV(eax, ptr[&fpuRegs.fprc[0]]);
_deleteEEreg(_Rt_, 0);
if (_Fs_ >= 16) if (_Fs_ >= 16)
{ {
xAND(eax, 0x0083c078); //remove always-zero bits xMOV(xRegister32(regt), ptr32[&fpuRegs.fprc[31]]);
xOR(eax, 0x01000001); //set always-one bits xAND(xRegister32(regt), 0x0083c078); //remove always-zero bits
xOR(xRegister32(regt), 0x01000001); //set always-one bits
xMOVSX(xRegister64(regt), xRegister32(regt));
}
else
{
xMOVSX(xRegister64(regt), ptr32[&fpuRegs.fprc[0]]);
} }
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
} }
void recCTC1() void recCTC1()
@ -163,7 +158,10 @@ void recCTC1()
{ {
xMOVSS(ptr[&fpuRegs.fprc[_Fs_]], xRegisterSSE(mmreg)); xMOVSS(ptr[&fpuRegs.fprc[_Fs_]], xRegisterSSE(mmreg));
} }
else if ((mmreg = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ)) >= 0)
{
xMOV(ptr32[&fpuRegs.fprc[_Fs_]], xRegister32(mmreg));
}
else else
{ {
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, 1);
@ -184,36 +182,42 @@ void recMFC1()
{ {
if (!_Rt_) if (!_Rt_)
return; return;
EE::Profiler.EmitOp(eeOpcode::MFC1); EE::Profiler.EmitOp(eeOpcode::MFC1);
_eeOnWriteReg(_Rt_, 1); const int xmmregt = _allocIfUsedGPRtoXMM(_Rt_, MODE_READ | MODE_WRITE);
const int regs = _allocIfUsedFPUtoXMM(_Fs_, MODE_READ);
if (regs >= 0 && xmmregt >= 0)
{
// if we're in xmm, we shouldn't be const
pxAssert(!GPR_IS_CONST1(_Rt_));
const int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); // both in xmm, sign extend and insert lower bits
const int temp = _allocTempXMMreg(XMMT_FPS);
xMOVAPS(xRegisterSSE(temp), xRegisterSSE(regs));
xPSRA.D(xRegisterSSE(temp), 31);
xMOVSS(xRegisterSSE(xmmregt), xRegisterSSE(regs));
xINSERTPS(xRegisterSSE(xmmregt), xRegisterSSE(temp), _MM_MK_INSERTPS_NDX(0, 1, 0));
_freeXMMreg(temp);
return;
}
// storing to a gpr..
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
// shouldn't be const after we're writing.
pxAssert(!GPR_IS_CONST1(_Rt_));
if (regs >= 0) if (regs >= 0)
{ {
_deleteGPRtoXMMreg(_Rt_, 2); // xmm -> gpr
_signExtendXMMtoM((uptr)&cpuRegs.GPR.r[_Rt_].UL[0], regs, 0); xMOVD(xRegister32(regt), xRegisterSSE(regs));
xMOVSX(xRegister64(regt), xRegister32(regt));
} }
else else
{ {
const int regt = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); // mem -> gpr
xMOVSX(xRegister64(regt), ptr32[&fpuRegs.fpr[_Fs_].UL]);
if (regt >= 0)
{
if (xmmregs[regt].mode & MODE_WRITE)
{
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rt_].UL[2]], xRegisterSSE(regt));
}
xmmregs[regt].inuse = 0;
}
_deleteEEreg(_Rt_, 0);
xMOV(eax, ptr[&fpuRegs.fpr[_Fs_].UL]);
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
} }
} }
@ -228,44 +232,60 @@ void recMTC1()
EE::Profiler.EmitOp(eeOpcode::MTC1); EE::Profiler.EmitOp(eeOpcode::MTC1);
if (GPR_IS_CONST1(_Rt_)) if (GPR_IS_CONST1(_Rt_))
{ {
_deleteFPtoXMMreg(_Fs_, 0); const int xmmreg = _allocIfUsedFPUtoXMM(_Fs_, MODE_WRITE);
xMOV(ptr32[&fpuRegs.fpr[_Fs_].UL], g_cpuConstRegs[_Rt_].UL[0]); if (xmmreg >= 0)
{
// common case: mtc1 zero, fnn
if (g_cpuConstRegs[_Rt_].UL[0] == 0)
{
xPXOR(xRegisterSSE(xmmreg), xRegisterSSE(xmmreg));
} }
else else
{ {
int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); // may as well flush the constant register, since we're needing it in a gpr anyway
const int x86reg = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
if (mmreg >= 0) xMOVDZX(xRegisterSSE(xmmreg), xRegister32(x86reg));
}
}
else
{
pxAssert(!_hasXMMreg(XMMTYPE_FPREG, _Fs_));
xMOV(ptr32[&fpuRegs.fpr[_Fs_].UL], g_cpuConstRegs[_Rt_].UL[0]);
}
}
else
{
const int xmmgpr = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
if (xmmgpr >= 0)
{ {
if (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) if (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE)
{ {
// transfer the reg directly // transfer the reg directly
_deleteGPRtoXMMreg(_Rt_, 2); _deleteFPtoXMMreg(_Fs_, DELETE_REG_FREE_NO_WRITEBACK);
_deleteFPtoXMMreg(_Fs_, 2); _reallocateXMMreg(xmmgpr, XMMTYPE_FPREG, _Fs_, MODE_WRITE);
_allocFPtoXMMreg(mmreg, _Fs_, MODE_WRITE);
} }
else else
{ {
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); const int xmmreg2 = _allocIfUsedFPUtoXMM(_Fs_, MODE_WRITE);
if (xmmreg2 >= 0)
if (mmreg2 >= 0) xMOVSS(xRegisterSSE(xmmreg2), xRegisterSSE(xmmgpr));
xMOVSS(xRegisterSSE(mmreg2), xRegisterSSE(mmreg));
else else
xMOVSS(ptr[&fpuRegs.fpr[_Fs_].UL], xRegisterSSE(mmreg)); xMOVSS(ptr[&fpuRegs.fpr[_Fs_].UL], xRegisterSSE(xmmgpr));
} }
} }
else else
{ {
int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); // may as well cache it..
const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
const int mmreg2 = _allocIfUsedFPUtoXMM(_Fs_, MODE_WRITE);
if (mmreg2 >= 0) if (mmreg2 >= 0)
{ {
xMOVSSZX(xRegisterSSE(mmreg2), ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMOVDZX(xRegisterSSE(mmreg2), xRegister32(regt));
} }
else else
{ {
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMOV(ptr32[&fpuRegs.fpr[_Fs_].UL], xRegister32(regt));
xMOV(ptr[&fpuRegs.fpr[_Fs_].UL], eax);
} }
} }
} }
@ -311,31 +331,39 @@ REC_FPUFUNC(RSQRT_S);
// Clamp Functions (Converts NaN's and Infinities to Normal Numbers) // Clamp Functions (Converts NaN's and Infinities to Normal Numbers)
//------------------------------------------------------------------ //------------------------------------------------------------------
alignas(16) static u64 FPU_FLOAT_TEMP[2]; static int fpuCopyToTempForClamp(int fpureg, int xmmreg)
{
if (FPUINST_USEDTEST(fpureg))
{
const int tempreg = _allocTempXMMreg(XMMT_FPS);
xMOVSS(xRegisterSSE(tempreg), xRegisterSSE(xmmreg));
return tempreg;
}
// flush back the original value, before we mess with it below
if (FPUINST_LIVETEST(fpureg))
_flushXMMreg(xmmreg);
// turn it into a temp, so in case the liveness was incorrect, we don't reuse it after clamp
_reallocateXMMreg(xmmreg, XMMTYPE_TEMP, 0, 0, true);
return xmmreg;
}
static void fpuFreeIfTemp(int xmmreg)
{
if (xmmregs[xmmreg].inuse && xmmregs[xmmreg].type == XMMTYPE_TEMP)
_freeXMMreg(xmmreg);
}
__fi void fpuFloat3(int regd) // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax __fi void fpuFloat3(int regd) // +NaN -> +fMax, -NaN -> -fMax, +Inf -> +fMax, -Inf -> -fMax
{ {
int t1reg = _allocTempXMMreg(XMMT_FPS, -1); const int t1reg = _allocTempXMMreg(XMMT_FPS);
if (t1reg >= 0)
{
xMOVSS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xMOVSS(xRegisterSSE(t1reg), xRegisterSSE(regd));
xAND.PS(xRegisterSSE(t1reg), ptr[&s_neg[0]]); xAND.PS(xRegisterSSE(t1reg), ptr[&s_neg[0]]);
xMIN.SS(xRegisterSSE(regd), ptr[&g_maxvals[0]]); xMIN.SS(xRegisterSSE(regd), ptr[&g_maxvals[0]]);
xMAX.SS(xRegisterSSE(regd), ptr[&g_minvals[0]]); xMAX.SS(xRegisterSSE(regd), ptr[&g_minvals[0]]);
xOR.PS(xRegisterSSE(regd), xRegisterSSE(t1reg)); xOR.PS(xRegisterSSE(regd), xRegisterSSE(t1reg));
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
}
else
{
Console.Error("fpuFloat2() allocation error");
t1reg = (regd == 0) ? 1 : 0; // get a temp reg thats not regd
xMOVAPS(ptr[&FPU_FLOAT_TEMP[0]], xRegisterSSE(t1reg)); // backup data in t1reg to a temp address
xMOVSS(xRegisterSSE(t1reg), xRegisterSSE(regd));
xAND.PS(xRegisterSSE(t1reg), ptr[&s_neg[0]]);
xMIN.SS(xRegisterSSE(regd), ptr[&g_maxvals[0]]);
xMAX.SS(xRegisterSSE(regd), ptr[&g_minvals[0]]);
xOR.PS(xRegisterSSE(regd), xRegisterSSE(t1reg));
xMOVAPS(xRegisterSSE(t1reg), ptr[&FPU_FLOAT_TEMP[0]]); // restore t1reg data
}
} }
__fi void fpuFloat(int regd) // +/-NaN -> +fMax, +Inf -> +fMax, -Inf -> -fMax __fi void fpuFloat(int regd) // +/-NaN -> +fMax, +Inf -> +fMax, -Inf -> -fMax
@ -396,34 +424,31 @@ FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED | XMMINFO_READS);
//------------------------------------------------------------------ //------------------------------------------------------------------
void FPU_ADD_SUB(int regd, int regt, int issub) void FPU_ADD_SUB(int regd, int regt, int issub)
{ {
int tempecx = _allocX86reg(ecx, X86TYPE_TEMP, 0, 0); //receives regd const int xmmtemp = _allocTempXMMreg(XMMT_FPS); //temporary for anding with regd/regt
int temp2 = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); //receives regt xMOVD(ecx, xRegisterSSE(regd)); // ecx receives regd
int xmmtemp = _allocTempXMMreg(XMMT_FPS, -1); //temporary for anding with regd/regt xMOVD(eax, xRegisterSSE(regt)); // eax receives regt
xMOVD(xRegister32(tempecx), xRegisterSSE(regd));
xMOVD(xRegister32(temp2), xRegisterSSE(regt));
//mask the exponents //mask the exponents
xSHR(xRegister32(tempecx), 23); xSHR(ecx, 23);
xSHR(xRegister32(temp2), 23); xSHR(eax, 23);
xAND(xRegister32(tempecx), 0xff); xAND(ecx, 0xff);
xAND(xRegister32(temp2), 0xff); xAND(eax, 0xff);
xSUB(xRegister32(tempecx), xRegister32(temp2)); //tempecx = exponent difference xSUB(ecx, eax); //tempecx = exponent difference
xCMP(xRegister32(tempecx), 25); xCMP(ecx, 25);
j8Ptr[0] = JGE8(0); j8Ptr[0] = JGE8(0);
xCMP(xRegister32(tempecx), 0); xCMP(ecx, 0);
j8Ptr[1] = JG8(0); j8Ptr[1] = JG8(0);
j8Ptr[2] = JE8(0); j8Ptr[2] = JE8(0);
xCMP(xRegister32(tempecx), -25); xCMP(ecx, -25);
j8Ptr[3] = JLE8(0); j8Ptr[3] = JLE8(0);
//diff = -24 .. -1 , expd < expt //diff = -24 .. -1 , expd < expt
xNEG(xRegister32(tempecx)); xNEG(ecx);
xDEC(xRegister32(tempecx)); xDEC(ecx);
xMOV(xRegister32(temp2), 0xffffffff); xMOV(eax, 0xffffffff);
xSHL(xRegister32(temp2), cl); //temp2 = 0xffffffff << tempecx xSHL(eax, cl); //temp2 = 0xffffffff << tempecx
xMOVDZX(xRegisterSSE(xmmtemp), xRegister32(temp2)); xMOVDZX(xRegisterSSE(xmmtemp), eax);
xAND.PS(xRegisterSSE(regd), xRegisterSSE(xmmtemp)); xAND.PS(xRegisterSSE(regd), xRegisterSSE(xmmtemp));
if (issub) if (issub)
xSUB.SS(xRegisterSSE(regd), xRegisterSSE(regt)); xSUB.SS(xRegisterSSE(regd), xRegisterSSE(regt));
@ -443,10 +468,10 @@ void FPU_ADD_SUB(int regd, int regt, int issub)
x86SetJ8(j8Ptr[1]); x86SetJ8(j8Ptr[1]);
//diff = 1 .. 24, expt < expd //diff = 1 .. 24, expt < expd
xDEC(xRegister32(tempecx)); xDEC(ecx);
xMOV(xRegister32(temp2), 0xffffffff); xMOV(eax, 0xffffffff);
xSHL(xRegister32(temp2), cl); //temp2 = 0xffffffff << tempecx xSHL(eax, cl); //temp2 = 0xffffffff << tempecx
xMOVDZX(xRegisterSSE(xmmtemp), xRegister32(temp2)); xMOVDZX(xRegisterSSE(xmmtemp), eax);
xAND.PS(xRegisterSSE(xmmtemp), xRegisterSSE(regt)); xAND.PS(xRegisterSSE(xmmtemp), xRegisterSSE(regt));
if (issub) if (issub)
xSUB.SS(xRegisterSSE(regd), xRegisterSSE(xmmtemp)); xSUB.SS(xRegisterSSE(regd), xRegisterSSE(xmmtemp));
@ -476,8 +501,6 @@ void FPU_ADD_SUB(int regd, int regt, int issub)
x86SetJ8(j8Ptr[7]); x86SetJ8(j8Ptr[7]);
_freeXMMreg(xmmtemp); _freeXMMreg(xmmtemp);
_freeX86reg(temp2);
_freeX86reg(tempecx);
} }
void FPU_ADD(int regd, int regt) void FPU_ADD(int regd, int regt)
@ -550,7 +573,7 @@ static void (*recComOpXMM_to_XMM_REV[])(x86SSERegType, x86SSERegType) = { //reve
int recCommutativeOp(int info, int regd, int op) int recCommutativeOp(int info, int regd, int op)
{ {
int t0reg = _allocTempXMMreg(XMMT_FPS, -1); int t0reg = _allocTempXMMreg(XMMT_FPS);
switch (info & (PROCESS_EE_S | PROCESS_EE_T)) switch (info & (PROCESS_EE_S | PROCESS_EE_T))
{ {
@ -667,7 +690,7 @@ FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC | XMMINFO_READS | XMMINFO_READT)
static void _setupBranchTest() static void _setupBranchTest()
{ {
_eeFlushAllUnused(); _eeFlushAllDirty();
// COP1 branch conditionals are based on the following equation: // COP1 branch conditionals are based on the following equation:
// (fpuRegs.fprc[31] & 0x00800000) // (fpuRegs.fprc[31] & 0x00800000)
@ -680,29 +703,35 @@ static void _setupBranchTest()
void recBC1F() void recBC1F()
{ {
EE::Profiler.EmitOp(eeOpcode::BC1F); EE::Profiler.EmitOp(eeOpcode::BC1F);
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = TrySwapDelaySlot(0, 0, 0);
_setupBranchTest(); _setupBranchTest();
recDoBranchImm(JNZ32(0)); recDoBranchImm(branchTo, JNZ32(0), false, swap);
} }
void recBC1T() void recBC1T()
{ {
EE::Profiler.EmitOp(eeOpcode::BC1T); EE::Profiler.EmitOp(eeOpcode::BC1T);
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = TrySwapDelaySlot(0, 0, 0);
_setupBranchTest(); _setupBranchTest();
recDoBranchImm(JZ32(0)); recDoBranchImm(branchTo, JZ32(0), false, swap);
} }
void recBC1FL() void recBC1FL()
{ {
EE::Profiler.EmitOp(eeOpcode::BC1FL); EE::Profiler.EmitOp(eeOpcode::BC1FL);
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
_setupBranchTest(); _setupBranchTest();
recDoBranchImm_Likely(JNZ32(0)); recDoBranchImm(branchTo, JNZ32(0), true, false);
} }
void recBC1TL() void recBC1TL()
{ {
EE::Profiler.EmitOp(eeOpcode::BC1TL); EE::Profiler.EmitOp(eeOpcode::BC1TL);
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
_setupBranchTest(); _setupBranchTest();
recDoBranchImm_Likely(JZ32(0)); recDoBranchImm(branchTo, JZ32(0), true, false);
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -713,49 +742,62 @@ void recBC1TL()
void recC_EQ_xmm(int info) void recC_EQ_xmm(int info)
{ {
EE::Profiler.EmitOp(eeOpcode::CEQ_F); EE::Profiler.EmitOp(eeOpcode::CEQ_F);
int tempReg;
int t0reg;
//Console.WriteLn("recC_EQ_xmm()"); //Console.WriteLn("recC_EQ_xmm()");
switch (info & (PROCESS_EE_S | PROCESS_EE_T)) switch (info & (PROCESS_EE_S | PROCESS_EE_T))
{ {
case PROCESS_EE_S: case PROCESS_EE_S:
fpuFloat3(EEREC_S);
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
if (t0reg >= 0)
{ {
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
fpuFloat3(regs);
const int t0reg = _allocTempXMMreg(XMMT_FPS);
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]);
fpuFloat3(t0reg); fpuFloat3(t0reg);
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg));
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(t0reg));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
fpuFreeIfTemp(regs);
} }
else
xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]);
break; break;
case PROCESS_EE_T: case PROCESS_EE_T:
fpuFloat3(EEREC_T);
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
if (t0reg >= 0)
{ {
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
fpuFloat3(regt);
const int t0reg = _allocTempXMMreg(XMMT_FPS);
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
fpuFloat3(t0reg); fpuFloat3(t0reg);
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(regt));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
fpuFreeIfTemp(regt);
} }
else
xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]);
break; break;
case (PROCESS_EE_S | PROCESS_EE_T): case (PROCESS_EE_S | PROCESS_EE_T):
fpuFloat3(EEREC_S); {
fpuFloat3(EEREC_T); const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_T)); fpuFloat3(regs);
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
fpuFloat3(regt);
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(regt));
fpuFreeIfTemp(regs);
fpuFreeIfTemp(regt);
}
break; break;
default: default:
Console.WriteLn(Color_Magenta, "recC_EQ_xmm: Default"); Console.WriteLn(Color_Magenta, "recC_EQ_xmm: Default");
tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); xMOV(eax, ptr[&fpuRegs.fpr[_Fs_]]);
xMOV(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Fs_]]); xCMP(eax, ptr[&fpuRegs.fpr[_Ft_]]);
xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]);
j8Ptr[0] = JZ8(0); j8Ptr[0] = JZ8(0);
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
@ -763,9 +805,6 @@ void recC_EQ_xmm(int info)
x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[0]);
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
x86SetJ8(j8Ptr[1]); x86SetJ8(j8Ptr[1]);
if (tempReg >= 0)
_freeX86reg(tempReg);
return; return;
} }
@ -790,59 +829,62 @@ void recC_F()
void recC_LE_xmm(int info) void recC_LE_xmm(int info)
{ {
EE::Profiler.EmitOp(eeOpcode::CLE_F); EE::Profiler.EmitOp(eeOpcode::CLE_F);
int tempReg; //tempX86reg
int t0reg; //tempXMMreg
//Console.WriteLn("recC_LE_xmm()"); //Console.WriteLn("recC_LE_xmm()");
switch (info & (PROCESS_EE_S | PROCESS_EE_T)) switch (info & (PROCESS_EE_S | PROCESS_EE_T))
{ {
case PROCESS_EE_S: case PROCESS_EE_S:
fpuFloat3(EEREC_S);
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
if (t0reg >= 0)
{ {
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
fpuFloat3(regs);
const int t0reg = _allocTempXMMreg(XMMT_FPS);
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]);
fpuFloat3(t0reg); fpuFloat3(t0reg);
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg));
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(t0reg));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
fpuFreeIfTemp(regs);
} }
else
xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]);
break; break;
case PROCESS_EE_T: case PROCESS_EE_T:
fpuFloat3(EEREC_T);
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
if (t0reg >= 0)
{ {
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
fpuFloat3(regt);
const int t0reg = _allocTempXMMreg(XMMT_FPS);
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
fpuFloat3(t0reg); fpuFloat3(t0reg);
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
_freeXMMreg(t0reg);
}
else
{
xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]);
j8Ptr[0] = JAE8(0); xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(regt));
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
j8Ptr[1] = JMP8(0); _freeXMMreg(t0reg);
x86SetJ8(j8Ptr[0]); fpuFreeIfTemp(regt);
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
x86SetJ8(j8Ptr[1]);
return;
} }
break; break;
case (PROCESS_EE_S | PROCESS_EE_T): case (PROCESS_EE_S | PROCESS_EE_T):
fpuFloat3(EEREC_S); {
fpuFloat3(EEREC_T); const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_T)); fpuFloat3(regs);
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
fpuFloat3(regt);
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(regt));
fpuFreeIfTemp(regs);
fpuFreeIfTemp(regt);
}
break; break;
default: // Untested and incorrect, but this case is never reached AFAIK (cottonvibes) default: // Untested and incorrect, but this case is never reached AFAIK (cottonvibes)
Console.WriteLn(Color_Magenta, "recC_LE_xmm: Default"); Console.WriteLn(Color_Magenta, "recC_LE_xmm: Default");
tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); xMOV(eax, ptr[&fpuRegs.fpr[_Fs_]]);
xMOV(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Fs_]]); xCMP(eax, ptr[&fpuRegs.fpr[_Ft_]]);
xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]);
j8Ptr[0] = JLE8(0); j8Ptr[0] = JLE8(0);
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
@ -850,9 +892,6 @@ void recC_LE_xmm(int info)
x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[0]);
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
x86SetJ8(j8Ptr[1]); x86SetJ8(j8Ptr[1]);
if (tempReg >= 0)
_freeX86reg(tempReg);
return; return;
} }
@ -870,61 +909,62 @@ FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS | XMMINFO_READT);
void recC_LT_xmm(int info) void recC_LT_xmm(int info)
{ {
EE::Profiler.EmitOp(eeOpcode::CLT_F); EE::Profiler.EmitOp(eeOpcode::CLT_F);
int tempReg;
int t0reg;
//Console.WriteLn("recC_LT_xmm()"); //Console.WriteLn("recC_LT_xmm()");
switch (info & (PROCESS_EE_S | PROCESS_EE_T)) switch (info & (PROCESS_EE_S | PROCESS_EE_T))
{ {
case PROCESS_EE_S: case PROCESS_EE_S:
fpuFloat3(EEREC_S);
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
if (t0reg >= 0)
{ {
const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
fpuFloat3(regs);
const int t0reg = _allocTempXMMreg(XMMT_FPS);
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]);
fpuFloat3(t0reg); fpuFloat3(t0reg);
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(t0reg));
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(t0reg));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
fpuFreeIfTemp(regs);
} }
else
xUCOMI.SS(xRegisterSSE(EEREC_S), ptr[&fpuRegs.fpr[_Ft_]]);
break; break;
case PROCESS_EE_T: case PROCESS_EE_T:
fpuFloat3(EEREC_T);
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
if (t0reg >= 0)
{ {
const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
fpuFloat3(regt);
const int t0reg = _allocTempXMMreg(XMMT_FPS);
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
fpuFloat3(t0reg); fpuFloat3(t0reg);
xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
_freeXMMreg(t0reg);
}
else
{
xUCOMI.SS(xRegisterSSE(EEREC_T), ptr[&fpuRegs.fpr[_Fs_]]);
j8Ptr[0] = JA8(0); xUCOMI.SS(xRegisterSSE(t0reg), xRegisterSSE(regt));
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
j8Ptr[1] = JMP8(0); _freeXMMreg(t0reg);
x86SetJ8(j8Ptr[0]); fpuFreeIfTemp(regt);
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
x86SetJ8(j8Ptr[1]);
return;
} }
break; break;
case (PROCESS_EE_S | PROCESS_EE_T): case (PROCESS_EE_S | PROCESS_EE_T):
// Clamp NaNs {
// Note: This fixes a crash in Rule of Rose. const int regs = fpuCopyToTempForClamp(_Fs_, EEREC_S);
fpuFloat3(EEREC_S); fpuFloat3(regs);
fpuFloat3(EEREC_T);
xUCOMI.SS(xRegisterSSE(EEREC_S), xRegisterSSE(EEREC_T)); const int regt = fpuCopyToTempForClamp(_Ft_, EEREC_T);
fpuFloat3(regt);
xUCOMI.SS(xRegisterSSE(regs), xRegisterSSE(regt));
fpuFreeIfTemp(regs);
fpuFreeIfTemp(regt);
}
break; break;
default: default:
Console.WriteLn(Color_Magenta, "recC_LT_xmm: Default"); Console.WriteLn(Color_Magenta, "recC_LT_xmm: Default");
tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); xMOV(eax, ptr[&fpuRegs.fpr[_Fs_]]);
xMOV(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Fs_]]); xCMP(eax, ptr[&fpuRegs.fpr[_Ft_]]);
xCMP(xRegister32(tempReg), ptr[&fpuRegs.fpr[_Ft_]]);
j8Ptr[0] = JL8(0); j8Ptr[0] = JL8(0);
xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC); xAND(ptr32[&fpuRegs.fprc[31]], ~FPUflagC);
@ -932,9 +972,6 @@ void recC_LT_xmm(int info)
x86SetJ8(j8Ptr[0]); x86SetJ8(j8Ptr[0]);
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC); xOR(ptr32[&fpuRegs.fprc[31]], FPUflagC);
x86SetJ8(j8Ptr[1]); x86SetJ8(j8Ptr[1]);
if (tempReg >= 0)
_freeX86reg(tempReg);
return; return;
} }
@ -957,13 +994,19 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT);
void recCVT_S_xmm(int info) void recCVT_S_xmm(int info)
{ {
EE::Profiler.EmitOp(eeOpcode::CVTS_F); EE::Profiler.EmitOp(eeOpcode::CVTS_F);
if (!(info & PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info & PROCESS_EE_MODEWRITES))) if (info & PROCESS_EE_D)
{ {
if (info & PROCESS_EE_S)
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]); xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
} }
else else
{ {
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); const int temp = _allocTempXMMreg(XMMT_FPS);
xCVTSI2SS(xRegisterSSE(temp), ptr32[&fpuRegs.fpr[_Fs_]]);
xMOVSS(ptr32[&fpuRegs.fpr[_Fd_]], xRegisterSSE(temp));
_freeXMMreg(temp);
} }
} }
@ -998,7 +1041,7 @@ void recCVT_W()
} }
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_] //kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
_deleteFPtoXMMreg(_Fd_, 2); _deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
@ -1018,23 +1061,22 @@ void recDIVhelper1(int regd, int regt) // Sets flags
{ {
u8 *pjmp1, *pjmp2; u8 *pjmp1, *pjmp2;
u32 *ajmp32, *bjmp32; u32 *ajmp32, *bjmp32;
int t1reg = _allocTempXMMreg(XMMT_FPS, -1); const int t1reg = _allocTempXMMreg(XMMT_FPS);
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
/*--- Check for divide by zero ---*/ /*--- Check for divide by zero ---*/
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt));
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); xMOVMSKPS(eax, xRegisterSSE(t1reg));
xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) xAND(eax, 1); //Check sign (if regt == zero, sign will be set)
ajmp32 = JZ32(0); //Skip if not set ajmp32 = JZ32(0); //Skip if not set
/*--- Check for 0/0 ---*/ /*--- Check for 0/0 ---*/
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd));
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); xMOVMSKPS(eax, xRegisterSSE(t1reg));
xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) xAND(eax, 1); //Check sign (if regd == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set pjmp1 = JZ8(0); //Skip if not set
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 ) xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 )
pjmp2 = JMP8(0); pjmp2 = JMP8(0);
@ -1059,7 +1101,6 @@ void recDIVhelper1(int regd, int regt) // Sets flags
x86SetJ32(bjmp32); x86SetJ32(bjmp32);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
_freeX86reg(tempReg);
} }
void recDIVhelper2(int regd, int regt) // Doesn't sets flags void recDIVhelper2(int regd, int regt) // Doesn't sets flags
@ -1075,7 +1116,7 @@ void recDIV_S_xmm(int info)
{ {
EE::Profiler.EmitOp(eeOpcode::DIV_F); EE::Profiler.EmitOp(eeOpcode::DIV_F);
bool roundmodeFlag = false; bool roundmodeFlag = false;
int t0reg = _allocTempXMMreg(XMMT_FPS, -1); int t0reg = _allocTempXMMreg(XMMT_FPS);
//Console.WriteLn("DIV"); //Console.WriteLn("DIV");
if (CHECK_FPUNEGDIVHACK) if (CHECK_FPUNEGDIVHACK)
@ -1181,7 +1222,7 @@ FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
//------------------------------------------------------------------ //------------------------------------------------------------------
void recMADDtemp(int info, int regd) void recMADDtemp(int info, int regd)
{ {
const int t0reg = _allocTempXMMreg(XMMT_FPS, -1); const int t0reg = _allocTempXMMreg(XMMT_FPS);
switch (info & (PROCESS_EE_S | PROCESS_EE_T)) switch (info & (PROCESS_EE_S | PROCESS_EE_T))
{ {
@ -1203,7 +1244,7 @@ void recMADDtemp(int info, int regd)
FPU_ADD(regd, t0reg); FPU_ADD(regd, t0reg);
} }
} }
else if (regd == EEREC_ACC) else if ((info & PROCESS_EE_ACC) && regd == EEREC_ACC)
{ {
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Ft_]]);
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); } if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(EEREC_S); fpuFloat2(t0reg); }
@ -1306,7 +1347,7 @@ void recMADDtemp(int info, int regd)
FPU_ADD(regd, t0reg); FPU_ADD(regd, t0reg);
} }
} }
else if (regd == EEREC_ACC) else if ((info & PROCESS_EE_ACC) && regd == EEREC_ACC)
{ {
xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVSS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); } if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(EEREC_T); }
@ -1335,7 +1376,7 @@ void recMADDtemp(int info, int regd)
default: default:
if (regd == EEREC_ACC) if (regd == EEREC_ACC)
{ {
const int t1reg = _allocTempXMMreg(XMMT_FPS, -1); const int t1reg = _allocTempXMMreg(XMMT_FPS);
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
xMOVSSZX(xRegisterSSE(t1reg), ptr[&fpuRegs.fpr[_Ft_]]); xMOVSSZX(xRegisterSSE(t1reg), ptr[&fpuRegs.fpr[_Ft_]]);
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); } if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); }
@ -1433,7 +1474,7 @@ FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED | XMMINFO_READS);
//------------------------------------------------------------------ //------------------------------------------------------------------
void recMSUBtemp(int info, int regd) void recMSUBtemp(int info, int regd)
{ {
int t0reg = _allocTempXMMreg(XMMT_FPS, -1); int t0reg = _allocTempXMMreg(XMMT_FPS);
switch (info & (PROCESS_EE_S | PROCESS_EE_T)) switch (info & (PROCESS_EE_S | PROCESS_EE_T))
{ {
@ -1559,7 +1600,7 @@ void recMSUBtemp(int info, int regd)
default: default:
if (regd == EEREC_ACC) if (regd == EEREC_ACC)
{ {
const int t1reg = _allocTempXMMreg(XMMT_FPS, -1); const int t1reg = _allocTempXMMreg(XMMT_FPS);
xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]); xMOVSSZX(xRegisterSSE(t0reg), ptr[&fpuRegs.fpr[_Fs_]]);
xMOVSSZX(xRegisterSSE(t1reg), ptr[&fpuRegs.fpr[_Ft_]]); xMOVSSZX(xRegisterSSE(t1reg), ptr[&fpuRegs.fpr[_Ft_]]);
if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); } if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat2(t0reg); fpuFloat2(t1reg); }
@ -1663,7 +1704,7 @@ void recSUBhelper(int regd, int regt)
void recSUBop(int info, int regd) void recSUBop(int info, int regd)
{ {
int t0reg = _allocTempXMMreg(XMMT_FPS, -1); int t0reg = _allocTempXMMreg(XMMT_FPS);
//xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags //xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagO|FPUflagU)); // Clear O and U flags
@ -1761,19 +1802,15 @@ void recSQRT_S_xmm(int info)
if (CHECK_FPU_EXTRA_FLAGS) if (CHECK_FPU_EXTRA_FLAGS)
{ {
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
/*--- Check for negative SQRT ---*/ /*--- Check for negative SQRT ---*/
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(EEREC_D)); xMOVMSKPS(eax, xRegisterSSE(EEREC_D));
xAND(xRegister32(tempReg), 1); //Check sign xAND(eax, 1); //Check sign
u8* pjmp = JZ8(0); //Skip if none are u8* pjmp = JZ8(0); //Skip if none are
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags
xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive
x86SetJ8(pjmp); x86SetJ8(pjmp);
_freeX86reg(tempReg);
} }
else else
xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_pos[0]]); // Make EEREC_D Positive
@ -1800,14 +1837,13 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
u8 *pjmp1, *pjmp2; u8 *pjmp1, *pjmp2;
u32 *pjmp32; u32 *pjmp32;
u8 *qjmp1, *qjmp2; u8 *qjmp1, *qjmp2;
int t1reg = _allocTempXMMreg(XMMT_FPS, -1); int t1reg = _allocTempXMMreg(XMMT_FPS);
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
/*--- (first) Check for negative SQRT ---*/ /*--- (first) Check for negative SQRT ---*/
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t0reg)); xMOVMSKPS(eax, xRegisterSSE(t0reg));
xAND(xRegister32(tempReg), 1); //Check sign xAND(eax, 1); //Check sign
pjmp2 = JZ8(0); //Skip if not set pjmp2 = JZ8(0); //Skip if not set
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags
xAND.PS(xRegisterSSE(t0reg), ptr[&s_pos[0]]); // Make t0reg Positive xAND.PS(xRegisterSSE(t0reg), ptr[&s_pos[0]]); // Make t0reg Positive
@ -1816,14 +1852,14 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
/*--- Check for zero ---*/ /*--- Check for zero ---*/
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(t0reg)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(t0reg));
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); xMOVMSKPS(eax, xRegisterSSE(t1reg));
xAND(xRegister32(tempReg), 1); //Check sign (if t0reg == zero, sign will be set) xAND(eax, 1); //Check sign (if t0reg == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set pjmp1 = JZ8(0); //Skip if not set
/*--- Check for 0/0 ---*/ /*--- Check for 0/0 ---*/
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd));
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); xMOVMSKPS(eax, xRegisterSSE(t1reg));
xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) xAND(eax, 1); //Check sign (if regd == zero, sign will be set)
qjmp1 = JZ8(0); //Skip if not set qjmp1 = JZ8(0); //Skip if not set
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 ) xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 )
qjmp2 = JMP8(0); qjmp2 = JMP8(0);
@ -1850,7 +1886,6 @@ void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when re
x86SetJ32(pjmp32); x86SetJ32(pjmp32);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
_freeX86reg(tempReg);
} }
void recRSQRThelper2(int regd, int t0reg) // Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags) void recRSQRThelper2(int regd, int t0reg) // Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags)
@ -1872,7 +1907,7 @@ void recRSQRT_S_xmm(int info)
// iFPUd (Full mode) sets roundmode to nearest for rSQRT. // iFPUd (Full mode) sets roundmode to nearest for rSQRT.
// Should this do the same, or should Full mode leave roundmode alone? --air // Should this do the same, or should Full mode leave roundmode alone? --air
int t0reg = _allocTempXMMreg(XMMT_FPS, -1); int t0reg = _allocTempXMMreg(XMMT_FPS);
//Console.WriteLn("FPU: RSQRT"); //Console.WriteLn("FPU: RSQRT");
switch (info & (PROCESS_EE_S | PROCESS_EE_T)) switch (info & (PROCESS_EE_S | PROCESS_EE_T))

View File

@ -288,7 +288,7 @@ void SetMaxValue(int regd)
#define ALLOC_S(sreg) \ #define ALLOC_S(sreg) \
do { \ do { \
(sreg) = _allocTempXMMreg(XMMT_FPS, -1); \ (sreg) = _allocTempXMMreg(XMMT_FPS); \
GET_S(sreg); \ GET_S(sreg); \
} while (0) } while (0)
@ -302,7 +302,7 @@ void SetMaxValue(int regd)
#define ALLOC_T(treg) \ #define ALLOC_T(treg) \
do { \ do { \
(treg) = _allocTempXMMreg(XMMT_FPS, -1); \ (treg) = _allocTempXMMreg(XMMT_FPS); \
GET_T(treg); \ GET_T(treg); \
} while (0) } while (0)
@ -316,7 +316,7 @@ void SetMaxValue(int regd)
#define ALLOC_ACC(areg) \ #define ALLOC_ACC(areg) \
do { \ do { \
(areg) = _allocTempXMMreg(XMMT_FPS, -1); \ (areg) = _allocTempXMMreg(XMMT_FPS); \
GET_ACC(areg); \ GET_ACC(areg); \
} while (0) } while (0)
@ -355,34 +355,31 @@ FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED | XMMINFO_READS);
//------------------------------------------------------------------ //------------------------------------------------------------------
void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they are floats void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they are floats
{ {
int tempecx = _allocX86reg(ecx, X86TYPE_TEMP, 0, 0); //receives regd const int xmmtemp = _allocTempXMMreg(XMMT_FPS); //temporary for anding with regd/regt
int temp2 = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); //receives regt xMOVD(ecx, xRegisterSSE(tempd)); //receives regd
int xmmtemp = _allocTempXMMreg(XMMT_FPS, -1); //temporary for anding with regd/regt xMOVD(eax, xRegisterSSE(tempt)); //receives regt
xMOVD(xRegister32(tempecx), xRegisterSSE(tempd));
xMOVD(xRegister32(temp2), xRegisterSSE(tempt));
//mask the exponents //mask the exponents
xSHR(xRegister32(tempecx), 23); xSHR(ecx, 23);
xSHR(xRegister32(temp2), 23); xSHR(eax, 23);
xAND(xRegister32(tempecx), 0xff); xAND(ecx, 0xff);
xAND(xRegister32(temp2), 0xff); xAND(eax, 0xff);
xSUB(xRegister32(tempecx), xRegister32(temp2)); //tempecx = exponent difference xSUB(ecx, eax); //tempecx = exponent difference
xCMP(xRegister32(tempecx), 25); xCMP(ecx, 25);
j8Ptr[0] = JGE8(0); j8Ptr[0] = JGE8(0);
xCMP(xRegister32(tempecx), 0); xCMP(ecx, 0);
j8Ptr[1] = JG8(0); j8Ptr[1] = JG8(0);
j8Ptr[2] = JE8(0); j8Ptr[2] = JE8(0);
xCMP(xRegister32(tempecx), -25); xCMP(ecx, -25);
j8Ptr[3] = JLE8(0); j8Ptr[3] = JLE8(0);
//diff = -24 .. -1 , expd < expt //diff = -24 .. -1 , expd < expt
xNEG(xRegister32(tempecx)); xNEG(ecx);
xDEC(xRegister32(tempecx)); xDEC(ecx);
xMOV(xRegister32(temp2), 0xffffffff); xMOV(eax, 0xffffffff);
xSHL(xRegister32(temp2), cl); //temp2 = 0xffffffff << tempecx xSHL(eax, cl); //temp2 = 0xffffffff << tempecx
xMOVDZX(xRegisterSSE(xmmtemp), xRegister32(temp2)); xMOVDZX(xRegisterSSE(xmmtemp), eax);
xAND.PS(xRegisterSSE(tempd), xRegisterSSE(xmmtemp)); xAND.PS(xRegisterSSE(tempd), xRegisterSSE(xmmtemp));
j8Ptr[4] = JMP8(0); j8Ptr[4] = JMP8(0);
@ -393,10 +390,10 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a
x86SetJ8(j8Ptr[1]); x86SetJ8(j8Ptr[1]);
//diff = 1 .. 24, expt < expd //diff = 1 .. 24, expt < expd
xDEC(xRegister32(tempecx)); xDEC(ecx);
xMOV(xRegister32(temp2), 0xffffffff); xMOV(eax, 0xffffffff);
xSHL(xRegister32(temp2), cl); //temp2 = 0xffffffff << tempecx xSHL(eax, cl); //temp2 = 0xffffffff << tempecx
xMOVDZX(xRegisterSSE(xmmtemp), xRegister32(temp2)); xMOVDZX(xRegisterSSE(xmmtemp), eax);
xAND.PS(xRegisterSSE(tempt), xRegisterSSE(xmmtemp)); xAND.PS(xRegisterSSE(tempt), xRegisterSSE(xmmtemp));
j8Ptr[6] = JMP8(0); j8Ptr[6] = JMP8(0);
@ -412,8 +409,6 @@ void FPU_ADD_SUB(int tempd, int tempt) //tempd and tempt are overwritten, they a
x86SetJ8(j8Ptr[6]); x86SetJ8(j8Ptr[6]);
_freeXMMreg(xmmtemp); _freeXMMreg(xmmtemp);
_freeX86reg(temp2);
_freeX86reg(tempecx);
} }
void FPU_MUL(int info, int regd, int sreg, int treg, bool acc) void FPU_MUL(int info, int regd, int sreg, int treg, bool acc)
@ -554,10 +549,21 @@ FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS | XMMINFO_READT);
void recCVT_S_xmm(int info) void recCVT_S_xmm(int info)
{ {
EE::Profiler.EmitOp(eeOpcode::CVTS_F); EE::Profiler.EmitOp(eeOpcode::CVTS_F);
if (!(info & PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info & PROCESS_EE_MODEWRITES)))
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]); if (info & PROCESS_EE_D)
else {
if (info & PROCESS_EE_S)
xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xCVTDQ2PS(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else
xCVTSI2SS(xRegisterSSE(EEREC_D), ptr32[&fpuRegs.fpr[_Fs_]]);
}
else
{
const int temp = _allocTempXMMreg(XMMT_FPS);
xCVTSI2SS(xRegisterSSE(temp), ptr32[&fpuRegs.fpr[_Fs_]]);
xMOVSS(ptr32[&fpuRegs.fpr[_Fd_]], xRegisterSSE(temp));
_freeXMMreg(temp);
}
} }
FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS); FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED | XMMINFO_READS);
@ -581,7 +587,7 @@ void recCVT_W() //called from iFPU.cpp's recCVT_W
} }
//kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_] //kill register allocation for dst because we write directly to fpuRegs.fpr[_Fd_]
_deleteFPtoXMMreg(_Fd_, 2); _deleteFPtoXMMreg(_Fd_, DELETE_REG_FREE_NO_WRITEBACK);
xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative xADD(edx, 0x7FFFFFFF); // 0x7FFFFFFF if positive, 0x8000 0000 if negative
@ -601,23 +607,22 @@ void recDIVhelper1(int regd, int regt) // Sets flags
{ {
u8 *pjmp1, *pjmp2; u8 *pjmp1, *pjmp2;
u32 *ajmp32, *bjmp32; u32 *ajmp32, *bjmp32;
int t1reg = _allocTempXMMreg(XMMT_FPS, -1); const int t1reg = _allocTempXMMreg(XMMT_FPS);
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
//--- Check for divide by zero --- //--- Check for divide by zero ---
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt));
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); xMOVMSKPS(eax, xRegisterSSE(t1reg));
xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) xAND(eax, 1); //Check sign (if regt == zero, sign will be set)
ajmp32 = JZ32(0); //Skip if not set ajmp32 = JZ32(0); //Skip if not set
//--- Check for 0/0 --- //--- Check for 0/0 ---
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd));
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); xMOVMSKPS(eax, xRegisterSSE(t1reg));
xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) xAND(eax, 1); //Check sign (if regd == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set pjmp1 = JZ8(0); //Skip if not set
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 ) xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 )
pjmp2 = JMP8(0); pjmp2 = JMP8(0);
@ -642,7 +647,6 @@ void recDIVhelper1(int regd, int regt) // Sets flags
x86SetJ32(bjmp32); x86SetJ32(bjmp32);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
_freeX86reg(tempReg);
} }
void recDIVhelper2(int regd, int regt) // Doesn't sets flags void recDIVhelper2(int regd, int regt) // Doesn't sets flags
@ -951,8 +955,7 @@ void recSQRT_S_xmm(int info)
{ {
EE::Profiler.EmitOp(eeOpcode::SQRT_F); EE::Profiler.EmitOp(eeOpcode::SQRT_F);
int roundmodeFlag = 0; int roundmodeFlag = 0;
const int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0); const int t1reg = _allocTempXMMreg(XMMT_FPS);
const int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
//Console.WriteLn("FPU: SQRT"); //Console.WriteLn("FPU: SQRT");
if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest) if (g_sseMXCSR.GetRoundMode() != SSEround_Nearest)
@ -972,8 +975,8 @@ void recSQRT_S_xmm(int info)
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
//--- Check for negative SQRT --- (sqrt(-0) = 0, unlike what the docs say) //--- Check for negative SQRT --- (sqrt(-0) = 0, unlike what the docs say)
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(EEREC_D)); xMOVMSKPS(eax, xRegisterSSE(EEREC_D));
xAND(xRegister32(tempReg), 1); //Check sign xAND(eax, 1); //Check sign
u8* pjmp = JZ8(0); //Skip if none are u8* pjmp = JZ8(0); //Skip if none are
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags
xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_const.pos[0]]); // Make EEREC_D Positive xAND.PS(xRegisterSSE(EEREC_D), ptr[&s_const.pos[0]]); // Make EEREC_D Positive
@ -994,7 +997,6 @@ void recSQRT_S_xmm(int info)
if (roundmodeFlag == 1) if (roundmodeFlag == 1)
xLDMXCSR(g_sseMXCSR); xLDMXCSR(g_sseMXCSR);
_freeX86reg(tempReg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
} }
@ -1010,14 +1012,13 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
u8 *pjmp1, *pjmp2; u8 *pjmp1, *pjmp2;
u8 *qjmp1, *qjmp2; u8 *qjmp1, *qjmp2;
u32* pjmp32; u32* pjmp32;
int t1reg = _allocTempXMMreg(XMMT_FPS, -1); int t1reg = _allocTempXMMreg(XMMT_FPS);
int tempReg = _allocX86reg(xEmptyReg, X86TYPE_TEMP, 0, 0);
xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags xAND(ptr32[&fpuRegs.fprc[31]], ~(FPUflagI | FPUflagD)); // Clear I and D flags
//--- (first) Check for negative SQRT --- //--- (first) Check for negative SQRT ---
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(regt)); xMOVMSKPS(eax, xRegisterSSE(regt));
xAND(xRegister32(tempReg), 1); //Check sign xAND(eax, 1); //Check sign
pjmp2 = JZ8(0); //Skip if not set pjmp2 = JZ8(0); //Skip if not set
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags
xAND.PS(xRegisterSSE(regt), ptr[&s_const.pos[0]]); // Make regt Positive xAND.PS(xRegisterSSE(regt), ptr[&s_const.pos[0]]); // Make regt Positive
@ -1026,15 +1027,15 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
//--- Check for zero --- //--- Check for zero ---
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regt));
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); xMOVMSKPS(eax, xRegisterSSE(t1reg));
xAND(xRegister32(tempReg), 1); //Check sign (if regt == zero, sign will be set) xAND(eax, 1); //Check sign (if regt == zero, sign will be set)
pjmp1 = JZ8(0); //Skip if not set pjmp1 = JZ8(0); //Skip if not set
//--- Check for 0/0 --- //--- Check for 0/0 ---
xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg)); xXOR.PS(xRegisterSSE(t1reg), xRegisterSSE(t1reg));
xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd)); xCMPEQ.SS(xRegisterSSE(t1reg), xRegisterSSE(regd));
xMOVMSKPS(xRegister32(tempReg), xRegisterSSE(t1reg)); xMOVMSKPS(eax, xRegisterSSE(t1reg));
xAND(xRegister32(tempReg), 1); //Check sign (if regd == zero, sign will be set) xAND(eax, 1); //Check sign (if regd == zero, sign will be set)
qjmp1 = JZ8(0); //Skip if not set qjmp1 = JZ8(0); //Skip if not set
xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 ) xOR(ptr32[&fpuRegs.fprc[31]], FPUflagI | FPUflagSI); // Set I and SI flags ( 0/0 )
qjmp2 = JMP8(0); qjmp2 = JMP8(0);
@ -1055,7 +1056,6 @@ void recRSQRThelper1(int regd, int regt) // Preforms the RSQRT function when reg
x86SetJ32(pjmp32); x86SetJ32(pjmp32);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
_freeX86reg(tempReg);
} }
void recRSQRThelper2(int regd, int regt) // Preforms the RSQRT function when regd <- Fs and regt <- Ft (Doesn't set flags) void recRSQRThelper2(int regd, int regt) // Preforms the RSQRT function when regd <- Fs and regt <- Ft (Doesn't set flags)

View File

@ -56,11 +56,14 @@ REC_FUNC_DEL(PSLLW, _Rd_);
void recPLZCW() void recPLZCW()
{ {
int regs = -1; int x86regs = -1;
int xmmregs = -1;
if (!_Rd_) if (!_Rd_)
return; return;
// TODO(Stenzek): Don't flush to memory at the end here. Careful of Rs == Rd.
EE::Profiler.EmitOp(eeOpcode::PLZCW); EE::Profiler.EmitOp(eeOpcode::PLZCW);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
@ -78,16 +81,20 @@ void recPLZCW()
_eeOnWriteReg(_Rd_, 0); _eeOnWriteReg(_Rd_, 0);
if ((regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0) if ((xmmregs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0)
{ {
xMOVD(eax, xRegisterSSE(regs)); xMOVD(eax, xRegisterSSE(xmmregs));
}
else if ((x86regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ)) >= 0)
{
xMOV(eax, xRegister32(x86regs));
} }
else else
{ {
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
// Count the number of leading bits (MSB) that match the sign bit, excluding the sign // Count the number of leading bits (MSB) that match the sign bit, excluding the sign
// bit itself. // bit itself.
@ -115,11 +122,14 @@ void recPLZCW()
// second word // second word
if (regs >= 0) if (xmmregs >= 0)
{ {
xPSHUF.D(xRegisterSSE(regs & 0xf), xRegisterSSE(regs & 0xf), 0xe1); xPEXTR.D(eax, xRegisterSSE(xmmregs), 1);
xMOVD(eax, xRegisterSSE(regs & 0xf)); }
xPSHUF.D(xRegisterSSE(regs & 0xf), xRegisterSSE(regs & 0xf), 0xe1); else if (x86regs >= 0)
{
xMOV(rax, xRegister64(x86regs));
xSHR(rax, 32);
} }
else else
{ {
@ -158,7 +168,7 @@ void recPMFHL()
{ {
case 0x00: // LW case 0x00: // LW
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT);
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0x88); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0x88);
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -167,7 +177,7 @@ void recPMFHL()
break; break;
case 0x01: // UW case 0x01: // UW
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT);
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xdd); xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xdd);
xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0xdd); xPSHUF.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0xdd);
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -182,7 +192,7 @@ void recPMFHL()
break; break;
case 0x03: // LH case 0x03: // LH
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT);
xPSHUF.LW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0x88); xPSHUF.LW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0x88);
xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0x88); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_LO), 0x88);
xPSHUF.HW(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x88); xPSHUF.HW(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x88);
@ -452,7 +462,7 @@ void recPPACW()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
if (EEREC_D == EEREC_T) if (EEREC_D == EEREC_T)
{ {
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S), 0x88); xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S), 0x88);
@ -492,7 +502,7 @@ void recPPACH()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xPSHUF.LW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S), 0x88); xPSHUF.LW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S), 0x88);
xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88); xPSHUF.LW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T), 0x88);
xPSHUF.HW(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x88); xPSHUF.HW(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x88);
@ -518,28 +528,19 @@ void recPPACB()
int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM((_Rs_ != 0 ? XMMINFO_READS : 0) | XMMINFO_READT | XMMINFO_WRITED);
if (_Rs_ == 0) if (_Rs_ == 0)
{ {
if (_hasFreeXMMreg()) const int t0reg = _allocTempXMMreg(XMMT_INT);
{
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPSLL.W(xRegisterSSE(EEREC_D), 8); xPSLL.W(xRegisterSSE(EEREC_D), 8);
xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSRL.W(xRegisterSSE(EEREC_D), 8); xPSRL.W(xRegisterSSE(EEREC_D), 8);
xPACK.USWB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPACK.USWB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
} }
else else
{ {
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); const int t0reg = _allocTempXMMreg(XMMT_INT);
xPSLL.W(xRegisterSSE(EEREC_D), 8);
xPSRL.W(xRegisterSSE(EEREC_D), 8);
xPACK.USWB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_D));
xPSRL.DQ(xRegisterSSE(EEREC_D), 8);
}
}
else
{
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -563,8 +564,8 @@ void recPEXT5()
EE::Profiler.EmitOp(eeOpcode::PEXT5); EE::Profiler.EmitOp(eeOpcode::PEXT5);
int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); // for bit 5..9 xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); // for bit 5..9
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); // for bit 15 xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); // for bit 15
@ -602,8 +603,8 @@ void recPPAC5()
EE::Profiler.EmitOp(eeOpcode::PPAC5); EE::Profiler.EmitOp(eeOpcode::PPAC5);
int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); // for bit 10..14 xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); // for bit 10..14
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); // for bit 15 xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(EEREC_T)); // for bit 15
@ -671,7 +672,7 @@ void recPCGTB()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPCMP.GTB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPCMP.GTB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -696,7 +697,7 @@ void recPCGTH()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPCMP.GTW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPCMP.GTW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -722,7 +723,7 @@ void recPCGTW()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPCMP.GTD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPCMP.GTD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -783,9 +784,9 @@ void recPADDSW()
EE::Profiler.EmitOp(eeOpcode::PADDSW); EE::Profiler.EmitOp(eeOpcode::PADDSW);
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
int t2reg = _allocTempXMMreg(XMMT_INT, -1); int t2reg = _allocTempXMMreg(XMMT_INT);
// The idea is: // The idea is:
// s = x + y; (wrap-arounded) // s = x + y; (wrap-arounded)
@ -843,7 +844,7 @@ void recPSUBSB()
xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if (EEREC_D == EEREC_T) else if (EEREC_D == EEREC_T)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPSUB.SB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -870,7 +871,7 @@ void recPSUBSH()
xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if (EEREC_D == EEREC_T) else if (EEREC_D == EEREC_T)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPSUB.SW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -894,9 +895,9 @@ void recPSUBSW()
EE::Profiler.EmitOp(eeOpcode::PSUBSW); EE::Profiler.EmitOp(eeOpcode::PSUBSW);
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
int t2reg = _allocTempXMMreg(XMMT_INT, -1); int t2reg = _allocTempXMMreg(XMMT_INT);
// The idea is: // The idea is:
// s = x - y; (wrap-arounded) // s = x - y; (wrap-arounded)
@ -1050,7 +1051,7 @@ void recPSUBB()
xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if (EEREC_D == EEREC_T) else if (EEREC_D == EEREC_T)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPSUB.B(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1077,7 +1078,7 @@ void recPSUBH()
xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if (EEREC_D == EEREC_T) else if (EEREC_D == EEREC_T)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPSUB.W(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1104,7 +1105,7 @@ void recPSUBW()
xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if (EEREC_D == EEREC_T) else if (EEREC_D == EEREC_T)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPSUB.D(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1138,7 +1139,7 @@ void recPEXTLW()
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if (EEREC_D == EEREC_S) else if (EEREC_D == EEREC_S)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPUNPCK.LDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1172,7 +1173,7 @@ void recPEXTLB()
xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if (EEREC_D == EEREC_S) else if (EEREC_D == EEREC_S)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPUNPCK.LBW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1206,7 +1207,7 @@ void recPEXTLH()
xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if (EEREC_D == EEREC_S) else if (EEREC_D == EEREC_S)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPUNPCK.LWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1264,7 +1265,7 @@ void recPABSW() //needs clamping
EE::Profiler.EmitOp(eeOpcode::PABSW); EE::Profiler.EmitOp(eeOpcode::PABSW);
int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.D(xRegisterSSE(t0reg), 31); xPSLL.D(xRegisterSSE(t0reg), 31);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffffffff if equal to 0x80000000 xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffffffff if equal to 0x80000000
@ -1284,7 +1285,7 @@ void recPABSH()
EE::Profiler.EmitOp(eeOpcode::PABSH); EE::Profiler.EmitOp(eeOpcode::PABSH);
int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_READT | XMMINFO_WRITED);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.W(xRegisterSSE(t0reg), 15); xPSLL.W(xRegisterSSE(t0reg), 15);
xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffff if equal to 0x8000 xPCMP.EQW(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); //0xffff if equal to 0x8000
@ -1337,7 +1338,7 @@ void recPADSBH()
} }
else else
{ {
const int t0reg = _allocTempXMMreg(XMMT_INT, -1); const int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
@ -1387,8 +1388,8 @@ void recPADDUW()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
xPCMP.EQB(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQB(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.D(xRegisterSSE(t0reg), 31); // 0x80000000 xPSLL.D(xRegisterSSE(t0reg), 31); // 0x80000000
@ -1432,7 +1433,7 @@ void recPSUBUB()
xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if (EEREC_D == EEREC_T) else if (EEREC_D == EEREC_T)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPSUB.USB(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1459,7 +1460,7 @@ void recPSUBUH()
xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
else if (EEREC_D == EEREC_T) else if (EEREC_D == EEREC_T)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPSUB.USW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1482,8 +1483,8 @@ void recPSUBUW()
EE::Profiler.EmitOp(eeOpcode::PSUBUW); EE::Profiler.EmitOp(eeOpcode::PSUBUW);
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
xPCMP.EQB(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQB(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.D(xRegisterSSE(t0reg), 31); // 0x80000000 xPSLL.D(xRegisterSSE(t0reg), 31); // 0x80000000
@ -1545,7 +1546,7 @@ void recPEXTUH()
xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if (EEREC_D == EEREC_S) else if (EEREC_D == EEREC_S)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPUNPCK.HWD(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1614,7 +1615,7 @@ void recPEXTUB()
xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if (EEREC_D == EEREC_S) else if (EEREC_D == EEREC_S)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPUNPCK.HBW(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1649,7 +1650,7 @@ void recPEXTUW()
xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
else if (EEREC_D == EEREC_S) else if (EEREC_D == EEREC_S)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPUNPCK.HDQ(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
@ -1910,8 +1911,8 @@ void recPSLLVW()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
// shamt is 5-bit // shamt is 5-bit
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
@ -1967,8 +1968,8 @@ void recPSRLVW()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
// shamt is 5-bit // shamt is 5-bit
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
@ -2134,7 +2135,7 @@ void recPHMADH()
EE::Profiler.EmitOp(eeOpcode::PHMADH); EE::Profiler.EmitOp(eeOpcode::PHMADH);
int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI); int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xPSRL.D(xRegisterSSE(t0reg), 16); xPSRL.D(xRegisterSSE(t0reg), 16);
@ -2181,8 +2182,8 @@ void recPMSUBH()
EE::Profiler.EmitOp(eeOpcode::PMSUBH); EE::Profiler.EmitOp(eeOpcode::PMSUBH);
int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI); int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
if (!_Rd_) if (!_Rd_)
{ {
@ -2247,7 +2248,7 @@ void recPHMSBH()
EE::Profiler.EmitOp(eeOpcode::PHMSBH); EE::Profiler.EmitOp(eeOpcode::PHMSBH);
int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI); int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xPCMP.EQD(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO)); xPCMP.EQD(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_LO));
xPSRL.D(xRegisterSSE(EEREC_LO), 16); xPSRL.D(xRegisterSSE(EEREC_LO), 16);
@ -2316,7 +2317,7 @@ void recPINTH()
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
if (EEREC_D == EEREC_S) if (EEREC_D == EEREC_S)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVHL.PS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVHL.PS(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
if (EEREC_D != EEREC_T) if (EEREC_D != EEREC_T)
xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xMOVQZX(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));
@ -2360,7 +2361,7 @@ void recPMULTH()
EE::Profiler.EmitOp(eeOpcode::PMULTH); EE::Profiler.EmitOp(eeOpcode::PMULTH);
int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI); int info = eeRecompileCodeXMM(XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_WRITELO | XMMINFO_WRITEHI);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(EEREC_S));
@ -2506,8 +2507,8 @@ void recPMADDH()
EE::Profiler.EmitOp(eeOpcode::PMADDH); EE::Profiler.EmitOp(eeOpcode::PMADDH);
int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI); int info = eeRecompileCodeXMM((_Rd_ ? XMMINFO_WRITED : 0) | XMMINFO_READS | XMMINFO_READT | XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
if (!_Rd_) if (!_Rd_)
{ {
@ -2616,8 +2617,8 @@ void recPSRAVW()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT);
// shamt is 5-bit // shamt is 5-bit
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
@ -2699,7 +2700,7 @@ void recPINTEH()
else if (EEREC_D == EEREC_T) else if (EEREC_D == EEREC_T)
{ {
pxAssert(EEREC_D != EEREC_S); pxAssert(EEREC_D != EEREC_S);
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT);
xPSLL.D(xRegisterSSE(EEREC_D), 16); xPSLL.D(xRegisterSSE(EEREC_D), 16);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xPSRL.D(xRegisterSSE(EEREC_D), 16); xPSRL.D(xRegisterSSE(EEREC_D), 16);
@ -2708,7 +2709,7 @@ void recPINTEH()
} }
else else
{ {
t0reg = _allocTempXMMreg(XMMT_INT, -1); t0reg = _allocTempXMMreg(XMMT_INT);
xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S)); xMOVDQA(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_S));
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T)); xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
xPSLL.D(xRegisterSSE(t0reg), 16); xPSLL.D(xRegisterSSE(t0reg), 16);
@ -2767,7 +2768,7 @@ void recPMULTUW()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8); xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8);
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg)); xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg));
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg)); xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg));
@ -2833,7 +2834,7 @@ void recPMADDUW()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8); xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(EEREC_HI), 0xd8);
xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg)); xMOVDQA(xRegisterSSE(EEREC_LO), xRegisterSSE(t0reg));
xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg)); xMOVDQA(xRegisterSSE(EEREC_HI), xRegisterSSE(t0reg));
@ -2902,7 +2903,7 @@ void recPNOR()
{ {
if (EEREC_D == EEREC_T) if (EEREC_D == EEREC_T)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
@ -2919,7 +2920,7 @@ void recPNOR()
{ {
if (EEREC_D == EEREC_S) if (EEREC_D == EEREC_S)
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg)); xPXOR(xRegisterSSE(EEREC_D), xRegisterSSE(t0reg));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
@ -2932,7 +2933,7 @@ void recPNOR()
} }
else else
{ {
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT);
if (EEREC_D == EEREC_S) if (EEREC_D == EEREC_S)
xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T)); xPOR(xRegisterSSE(EEREC_D), xRegisterSSE(EEREC_T));

View File

@ -104,6 +104,7 @@ static EEINST* s_psaveInstInfo = NULL;
u32 s_psxBlockCycles = 0; // cycles of current block recompiling u32 s_psxBlockCycles = 0; // cycles of current block recompiling
static u32 s_savenBlockCycles = 0; static u32 s_savenBlockCycles = 0;
static bool s_recompilingDelaySlot = false;
static void iPsxBranchTest(u32 newpc, u32 cpuBranch); static void iPsxBranchTest(u32 newpc, u32 cpuBranch);
void psxRecompileNextInstruction(int delayslot); void psxRecompileNextInstruction(int delayslot);
@ -119,7 +120,58 @@ static u32 psxdump = 0;
#define PSXREC_CLEARM(mem) \ #define PSXREC_CLEARM(mem) \
(((mem) < g_psxMaxRecMem && (psxRecLUT[(mem) >> 16] + (mem))) ? \ (((mem) < g_psxMaxRecMem && (psxRecLUT[(mem) >> 16] + (mem))) ? \
psxRecClearMem(mem) : 4) psxRecClearMem(mem) : \
4)
#ifdef DUMP_BLOCKS
static ZydisFormatterFunc s_old_print_address;
static ZyanStatus ZydisFormatterPrintAddressAbsolute(const ZydisFormatter* formatter,
ZydisFormatterBuffer* buffer, ZydisFormatterContext* context)
{
ZyanU64 address;
ZYAN_CHECK(ZydisCalcAbsoluteAddress(context->instruction, context->operand,
context->runtime_address, &address));
char buf[128];
u32 len = 0;
#define A(x) ((u64)(x))
if (address >= A(iopMem->Main) && address < A(iopMem->P))
{
len = snprintf(buf, sizeof(buf), "iopMem+0x%08X", static_cast<u32>(address - A(iopMem->Main)));
}
else if (address >= A(&psxRegs.GPR) && address < A(&psxRegs.CP0))
{
len = snprintf(buf, sizeof(buf), "psxRegs.GPR.%s", R3000A::disRNameGPR[static_cast<u32>(address - A(&psxRegs)) / 4u]);
}
else if (address == A(&psxRegs.pc))
{
len = snprintf(buf, sizeof(buf), "psxRegs.pc");
}
else if (address == A(&psxRegs.cycle))
{
len = snprintf(buf, sizeof(buf), "psxRegs.cycle");
}
else if (address == A(&g_nextEventCycle))
{
len = snprintf(buf, sizeof(buf), "g_nextEventCycle");
}
#undef A
if (len > 0)
{
ZYAN_CHECK(ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL));
ZyanString* string;
ZYAN_CHECK(ZydisFormatterBufferGetString(buffer, &string));
return ZyanStringAppendFormat(string, "&%s", buf);
}
return s_old_print_address(formatter, buffer, context);
}
#endif
// ===================================================================================================== // =====================================================================================================
// Dynamically Compiled Dispatchers - R3000A style // Dynamically Compiled Dispatchers - R3000A style
@ -197,9 +249,9 @@ static DynGenFunc* _DynGen_EnterRecompiledCode()
{ // Properly scope the frame prologue/epilogue { // Properly scope the frame prologue/epilogue
#ifdef ENABLE_VTUNE #ifdef ENABLE_VTUNE
xScopedStackFrame frame(true); xScopedStackFrame frame(true, true);
#else #else
xScopedStackFrame frame(IsDevBuild); xScopedStackFrame frame(false, true);
#endif #endif
xJMP((void*)iopDispatcherReg); xJMP((void*)iopDispatcherReg);
@ -266,7 +318,7 @@ static void iIopDumpBlock(int startpc, u8* ptr)
} }
// write the instruction info // write the instruction info
std::fprintf(f, "\n\nlive0 - %x, lastuse - %x used - %x\n", EEINST_LIVE0, EEINST_LASTUSE, EEINST_USED); std::fprintf(f, "\n\nlive0 - %x, lastuse - %x used - %x\n", EEINST_LIVE, EEINST_LASTUSE, EEINST_USED);
memzero(used); memzero(used);
numused = 0; numused = 0;
@ -325,85 +377,14 @@ static void iIopDumpBlock(int startpc, u8* ptr)
} }
int status = std::system(fmt::format("objdump -D -b binary -mi386 -M intel --no-show-raw-insn {} >> {}; rm {}", int status = std::system(fmt::format("objdump -D -b binary -mi386 -M intel --no-show-raw-insn {} >> {}; rm {}",
"mydump1", filename.c_str(), "mydump1").c_str()); "mydump1", filename.c_str(), "mydump1")
.c_str());
if (!WIFEXITED(status)) if (!WIFEXITED(status))
Console.Error("IOP dump didn't terminate normally"); Console.Error("IOP dump didn't terminate normally");
#endif #endif
} }
u8 _psxLoadWritesRs(u32 tempcode)
{
switch (tempcode >> 26)
{
case 32: case 33: case 34: case 35: case 36: case 37: case 38:
return ((tempcode >> 21) & 0x1f) == ((tempcode >> 16) & 0x1f); // rs==rt
}
return 0;
}
u8 _psxIsLoadStore(u32 tempcode)
{
switch (tempcode >> 26)
{
case 32: case 33: case 34: case 35: case 36: case 37: case 38:
// 4 byte stores
case 40: case 41: case 42: case 43: case 46:
return 1;
}
return 0;
}
void _psxFlushAllUnused()
{
int i;
for (i = 0; i < 34; ++i)
{
if (psxpc < s_nEndBlock)
{
if ((g_pCurInstInfo[1].regs[i] & EEINST_USED))
continue;
}
else if ((g_pCurInstInfo[0].regs[i] & EEINST_USED))
{
continue;
}
if (i < 32 && PSX_IS_CONST1(i))
{
_psxFlushConstReg(i);
}
else
{
_deleteX86reg(X86TYPE_PSX, i, 1);
}
}
}
int _psxFlushUnusedConstReg()
{
int i;
for (i = 1; i < 32; ++i)
{
if ((g_psxHasConstReg & (1 << i)) && !(g_psxFlushedConstReg & (1 << i)) &&
!_recIsRegWritten(g_pCurInstInfo + 1, (s_nEndBlock - psxpc) / 4, XMMTYPE_GPRREG, i))
{
// check if will be written in the future
xMOV(ptr32[&psxRegs.GPR.r[i]], g_psxConstRegs[i]);
g_psxFlushedConstReg |= 1 << i;
return 1;
}
}
return 0;
}
void _psxFlushCachedRegs()
{
_psxFlushConstRegs();
}
void _psxFlushConstReg(int reg) void _psxFlushConstReg(int reg)
{ {
if (PSX_IS_CONST1(reg) && !(g_psxFlushedConstReg & (1 << reg))) if (PSX_IS_CONST1(reg) && !(g_psxFlushedConstReg & (1 << reg)))
@ -415,6 +396,8 @@ void _psxFlushConstReg(int reg)
void _psxFlushConstRegs() void _psxFlushConstRegs()
{ {
// TODO: Combine flushes
int i; int i;
// flush constants // flush constants
@ -442,66 +425,88 @@ void _psxDeleteReg(int reg, int flush)
if (!reg) if (!reg)
return; return;
if (flush && PSX_IS_CONST1(reg)) if (flush && PSX_IS_CONST1(reg))
{
_psxFlushConstReg(reg); _psxFlushConstReg(reg);
return;
}
PSX_DEL_CONST(reg); PSX_DEL_CONST(reg);
_deleteX86reg(X86TYPE_PSX, reg, flush ? 0 : 2); _deletePSXtoX86reg(reg, flush ? DELETE_REG_FREE : DELETE_REG_FREE_NO_WRITEBACK);
} }
void _psxMoveGPRtoR(const xRegister32& to, int fromgpr) void _psxMoveGPRtoR(const xRegister32& to, int fromgpr)
{ {
if (PSX_IS_CONST1(fromgpr)) if (PSX_IS_CONST1(fromgpr))
{
xMOV(to, g_psxConstRegs[fromgpr]); xMOV(to, g_psxConstRegs[fromgpr]);
}
else else
{ {
// check x86 const int reg = EEINST_USEDTEST(fromgpr) ? _allocX86reg(X86TYPE_PSX, fromgpr, MODE_READ) : _checkX86reg(X86TYPE_PSX, fromgpr, MODE_READ);
if (reg >= 0)
xMOV(to, xRegister32(reg));
else
xMOV(to, ptr[&psxRegs.GPR.r[fromgpr]]); xMOV(to, ptr[&psxRegs.GPR.r[fromgpr]]);
} }
} }
#if 0
void _psxMoveGPRtoM(uptr to, int fromgpr) void _psxMoveGPRtoM(uptr to, int fromgpr)
{ {
if( PSX_IS_CONST1(fromgpr) ) if (PSX_IS_CONST1(fromgpr))
xMOV(ptr32[(u32*)(to)], g_psxConstRegs[fromgpr] ); {
else { xMOV(ptr32[(u32*)(to)], g_psxConstRegs[fromgpr]);
// check x86 }
xMOV(eax, ptr[&psxRegs.GPR.r[ fromgpr ] ]); else
xMOV(ptr[(void*)(to)], eax); {
const int reg = EEINST_USEDTEST(fromgpr) ? _allocX86reg(X86TYPE_PSX, fromgpr, MODE_READ) : _checkX86reg(X86TYPE_PSX, fromgpr, MODE_READ);
if (reg >= 0)
{
xMOV(ptr32[(u32*)(to)], xRegister32(reg));
}
else
{
xMOV(eax, ptr[&psxRegs.GPR.r[fromgpr]]);
xMOV(ptr32[(u32*)(to)], eax);
}
} }
} }
#endif
#if 0
void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr)
{
if( PSX_IS_CONST1(fromgpr) )
xMOV(ptr32[xAddressReg(to)], g_psxConstRegs[fromgpr] );
else {
// check x86
xMOV(eax, ptr[&psxRegs.GPR.r[ fromgpr ] ]);
xMOV(ptr[xAddressReg(to)], eax);
}
}
#endif
void _psxFlushCall(int flushtype) void _psxFlushCall(int flushtype)
{ {
// x86-32 ABI : These registers are not preserved across calls: // Free registers that are not saved across function calls (x86-32 ABI):
_freeX86reg(eax); for (u32 i = 0; i < iREGCNT_GPR; i++)
_freeX86reg(ecx); {
_freeX86reg(edx); if (!x86regs[i].inuse)
continue;
if (xRegisterBase::IsCallerSaved(i) ||
((flushtype & FLUSH_FREE_NONTEMP_X86) && x86regs[i].type != X86TYPE_TEMP) ||
((flushtype & FLUSH_FREE_TEMP_X86) && x86regs[i].type == X86TYPE_TEMP))
{
_freeX86reg(i);
}
}
if (flushtype & FLUSH_ALL_X86)
_flushX86regs();
if (flushtype & FLUSH_CONSTANT_REGS)
_psxFlushConstRegs();
if ((flushtype & FLUSH_PC) /*&& !g_cpuFlushedPC*/) if ((flushtype & FLUSH_PC) /*&& !g_cpuFlushedPC*/)
{ {
xMOV(ptr32[&psxRegs.pc], psxpc); xMOV(ptr32[&psxRegs.pc], psxpc);
//g_cpuFlushedPC = true; //g_cpuFlushedPC = true;
} }
}
if (flushtype & FLUSH_CACHED_REGS) void _psxFlushAllDirty()
_psxFlushConstRegs(); {
// TODO: Combine flushes
for (u32 i = 0; i < 32; ++i)
{
if (PSX_IS_CONST1(i))
_psxFlushConstReg(i);
}
_flushX86regs();
} }
void psxSaveBranchState() void psxSaveBranchState()
@ -538,41 +543,235 @@ void _psxOnWriteReg(int reg)
PSX_DEL_CONST(reg); PSX_DEL_CONST(reg);
} }
bool psxTrySwapDelaySlot(u32 rs, u32 rt, u32 rd)
{
#if 1
if (s_recompilingDelaySlot)
return false;
const u32 opcode_encoded = iopMemRead32(psxpc);
if (opcode_encoded == 0)
{
psxRecompileNextInstruction(true, true);
return true;
}
const u32 opcode_rs = ((opcode_encoded >> 21) & 0x1F);
const u32 opcode_rt = ((opcode_encoded >> 16) & 0x1F);
const u32 opcode_rd = ((opcode_encoded >> 11) & 0x1F);
switch (opcode_encoded >> 26)
{
case 8: // ADDI
case 9: // ADDIU
case 10: // SLTI
case 11: // SLTIU
case 12: // ANDIU
case 13: // ORI
case 14: // XORI
case 15: // LUI
case 32: // LB
case 33: // LH
case 34: // LWL
case 35: // LW
case 36: // LBU
case 37: // LHU
case 38: // LWR
case 39: // LWU
case 40: // SB
case 41: // SH
case 42: // SWL
case 43: // SW
case 46: // SWR
{
if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
goto is_unsafe;
}
break;
case 50: // LWC2
case 58: // SWC2
break;
case 0: // SPECIAL
{
switch (opcode_encoded & 0x3F)
{
case 0: // SLL
case 2: // SRL
case 3: // SRA
case 4: // SLLV
case 6: // SRLV
case 7: // SRAV
case 32: // ADD
case 33: // ADDU
case 34: // SUB
case 35: // SUBU
case 36: // AND
case 37: // OR
case 38: // XOR
case 39: // NOR
case 42: // SLT
case 43: // SLTU
{
if ((rs != 0 && rs == opcode_rd) || (rt != 0 && rt == opcode_rd) || (rd != 0 && (rd == opcode_rs || rd == opcode_rt)))
goto is_unsafe;
}
break;
case 15: // SYNC
case 24: // MULT
case 25: // MULTU
case 26: // DIV
case 27: // DIVU
break;
default:
goto is_unsafe;
}
}
break;
case 16: // COP0
case 17: // COP1
case 18: // COP2
case 19: // COP3
{
switch ((opcode_encoded >> 21) & 0x1F)
{
case 0: // MFC0
case 2: // CFC0
{
if ((rs != 0 && rs == opcode_rt) || (rt != 0 && rt == opcode_rt) || (rd != 0 && rd == opcode_rt))
goto is_unsafe;
}
break;
case 4: // MTC0
case 6: // CTC0
break;
default:
{
// swap when it's GTE
if ((opcode_encoded >> 26) != 18)
goto is_unsafe;
}
break;
}
break;
}
break;
default:
goto is_unsafe;
}
RALOG("Swapping delay slot %08X %s\n", psxpc, disR3000AF(iopMemRead32(psxpc), psxpc));
psxRecompileNextInstruction(true, true);
return true;
is_unsafe:
RALOG("NOT SWAPPING delay slot %08X %s\n", psxpc, disR3000AF(iopMemRead32(psxpc), psxpc));
return false;
#else
return false;
#endif
}
int psxTryRenameReg(int to, int from, int fromx86, int other, int xmminfo)
{
// can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt
if ((xmminfo & XMMINFO_NORENAME) || fromx86 < 0 || to == from || to == other || !EEINST_RENAMETEST(from))
return -1;
RALOG("Renaming %s to %s\n", R3000A::disRNameGPR[from], R3000A::disRNameGPR[to]);
// flush back when it's been modified
if (x86regs[fromx86].mode & MODE_WRITE && EEINST_LIVETEST(from))
_writebackX86Reg(fromx86);
// remove all references to renamed-to register
_deletePSXtoX86reg(to, DELETE_REG_FREE_NO_WRITEBACK);
PSX_DEL_CONST(to);
// and do the actual rename, new register has been modified.
x86regs[fromx86].reg = to;
x86regs[fromx86].mode |= MODE_READ | MODE_WRITE;
return fromx86;
}
// rd = rs op rt // rd = rs op rt
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode) void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
{ {
if (!_Rd_) if (!_Rd_)
return; return;
// for now, don't support xmm
_deleteX86reg(X86TYPE_PSX, _Rs_, 1);
_deleteX86reg(X86TYPE_PSX, _Rt_, 1);
_deleteX86reg(X86TYPE_PSX, _Rd_, 0);
if (PSX_IS_CONST2(_Rs_, _Rt_)) if (PSX_IS_CONST2(_Rs_, _Rt_))
{ {
_deletePSXtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
PSX_SET_CONST(_Rd_); PSX_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
} }
if (PSX_IS_CONST1(_Rs_)) // we have to put these up here, because the register allocator below will wipe out const flags
// for the destination register when/if it switches it to write mode.
const bool s_is_const = PSX_IS_CONST1(_Rs_);
const bool t_is_const = PSX_IS_CONST1(_Rt_);
const bool d_is_const = PSX_IS_CONST1(_Rd_);
const bool s_is_used = EEINST_USEDTEST(_Rs_);
const bool t_is_used = EEINST_USEDTEST(_Rt_);
if (!s_is_const)
_addNeededGPRtoX86reg(_Rs_);
if (!t_is_const)
_addNeededGPRtoX86reg(_Rt_);
if (!d_is_const)
_addNeededGPRtoX86reg(_Rd_);
u32 info = 0;
int regs = _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
if (regs < 0 && ((!s_is_const && s_is_used) || _Rs_ == _Rd_))
regs = _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
if (regs >= 0)
info |= PROCESS_EE_SET_S(regs);
int regt = _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
if (regt < 0 && ((!t_is_const && t_is_used) || _Rt_ == _Rd_))
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
if (regt >= 0)
info |= PROCESS_EE_SET_T(regt);
// If S is no longer live, swap D for S. Saves the move.
int regd = psxTryRenameReg(_Rd_, _Rs_, regs, _Rt_, xmminfo);
if (regd < 0)
{ {
constscode(0); // TODO: If not live, write direct to memory.
regd = _allocX86reg(X86TYPE_PSX, _Rd_, MODE_WRITE);
}
if (regd >= 0)
info |= PROCESS_EE_SET_D(regd);
_validateRegs();
if (s_is_const && regs < 0)
{
// This *must* go inside the if, because of when _Rs_ = _Rd_
PSX_DEL_CONST(_Rd_); PSX_DEL_CONST(_Rd_);
constscode(info /*| PROCESS_CONSTS*/);
return; return;
} }
if (PSX_IS_CONST1(_Rt_)) if (t_is_const && regt < 0)
{ {
consttcode(0);
PSX_DEL_CONST(_Rd_); PSX_DEL_CONST(_Rd_);
consttcode(info /*| PROCESS_CONSTT*/);
return; return;
} }
noconstcode(0);
PSX_DEL_CONST(_Rd_); PSX_DEL_CONST(_Rd_);
noconstcode(info);
} }
static void psxRecompileIrxImport() static void psxRecompileIrxImport()
@ -619,7 +818,7 @@ static void psxRecompileIrxImport()
} }
// rt = rs op imm16 // rt = rs op imm16
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode) void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
{ {
if (!_Rt_) if (!_Rt_)
{ {
@ -629,75 +828,157 @@ void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode)
return; return;
} }
// for now, don't support xmm
_deleteX86reg(X86TYPE_PSX, _Rs_, 1);
_deleteX86reg(X86TYPE_PSX, _Rt_, 0);
if (PSX_IS_CONST1(_Rs_)) if (PSX_IS_CONST1(_Rs_))
{ {
_deletePSXtoX86reg(_Rt_, DELETE_REG_FREE_NO_WRITEBACK);
PSX_SET_CONST(_Rt_); PSX_SET_CONST(_Rt_);
constcode(); constcode();
return; return;
} }
noconstcode(0); _addNeededPSXtoX86reg(_Rs_);
_addNeededPSXtoX86reg(_Rt_);
u32 info = 0;
const bool s_is_used = EEINST_USEDTEST(_Rs_);
const int regs = s_is_used ? _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ) : _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
if (regs >= 0)
info |= PROCESS_EE_SET_S(regs);
int regt = psxTryRenameReg(_Rt_, _Rs_, regs, 0, xmminfo);
if (regt < 0)
{
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_WRITE);
}
if (regt >= 0)
info |= PROCESS_EE_SET_T(regt);
_validateRegs();
PSX_DEL_CONST(_Rt_); PSX_DEL_CONST(_Rt_);
noconstcode(info);
} }
// rd = rt op sa // rd = rt op sa
void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode) void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo)
{ {
if (!_Rd_) if (!_Rd_)
return; return;
// for now, don't support xmm
_deleteX86reg(X86TYPE_PSX, _Rt_, 1);
_deleteX86reg(X86TYPE_PSX, _Rd_, 0);
if (PSX_IS_CONST1(_Rt_)) if (PSX_IS_CONST1(_Rt_))
{ {
_deletePSXtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
PSX_SET_CONST(_Rd_); PSX_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
} }
noconstcode(0); _addNeededPSXtoX86reg(_Rt_);
_addNeededPSXtoX86reg(_Rd_);
u32 info = 0;
const bool s_is_used = EEINST_USEDTEST(_Rt_);
const int regt = s_is_used ? _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ) : _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
if (regt >= 0)
info |= PROCESS_EE_SET_T(regt);
int regd = psxTryRenameReg(_Rd_, _Rt_, regt, 0, xmminfo);
if (regd < 0)
{
regd = _allocX86reg(X86TYPE_PSX, _Rd_, MODE_WRITE);
}
if (regd >= 0)
info |= PROCESS_EE_SET_D(regd);
_validateRegs();
PSX_DEL_CONST(_Rd_); PSX_DEL_CONST(_Rd_);
noconstcode(info);
} }
// rd = rt MULT rs (SPECIAL) // rd = rt MULT rs (SPECIAL)
void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int LOHI) void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int LOHI)
{ {
_deleteX86reg(X86TYPE_PSX, _Rs_, 1);
_deleteX86reg(X86TYPE_PSX, _Rt_, 1);
if (LOHI)
{
_deleteX86reg(X86TYPE_PSX, PSX_HI, 1);
_deleteX86reg(X86TYPE_PSX, PSX_LO, 1);
}
if (PSX_IS_CONST2(_Rs_, _Rt_)) if (PSX_IS_CONST2(_Rs_, _Rt_))
{ {
if (LOHI)
{
_deletePSXtoX86reg(PSX_LO, DELETE_REG_FREE_NO_WRITEBACK);
_deletePSXtoX86reg(PSX_HI, DELETE_REG_FREE_NO_WRITEBACK);
}
constcode(); constcode();
return; return;
} }
if (PSX_IS_CONST1(_Rs_)) // we have to put these up here, because the register allocator below will wipe out const flags
// for the destination register when/if it switches it to write mode.
const bool s_is_const = PSX_IS_CONST1(_Rs_);
const bool t_is_const = PSX_IS_CONST1(_Rt_);
const bool s_is_used = EEINST_USEDTEST(_Rs_);
const bool t_is_used = EEINST_USEDTEST(_Rt_);
if (!s_is_const)
_addNeededGPRtoX86reg(_Rs_);
if (!t_is_const)
_addNeededGPRtoX86reg(_Rt_);
if (LOHI)
{ {
constscode(0); if (EEINST_LIVETEST(PSX_LO))
_addNeededPSXtoX86reg(PSX_LO);
if (EEINST_LIVETEST(PSX_HI))
_addNeededPSXtoX86reg(PSX_HI);
}
u32 info = 0;
int regs = _checkX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
if (regs < 0 && !s_is_const && s_is_used)
regs = _allocX86reg(X86TYPE_PSX, _Rs_, MODE_READ);
if (regs >= 0)
info |= PROCESS_EE_SET_S(regs);
// need at least one in a register
int regt = _checkX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
if (regs < 0 || (regt < 0 && !t_is_const && t_is_used))
regt = _allocX86reg(X86TYPE_PSX, _Rt_, MODE_READ);
if (regt >= 0)
info |= PROCESS_EE_SET_T(regt);
if (LOHI)
{
// going to destroy lo/hi, so invalidate if we're writing it back to state
const bool lo_is_used = EEINST_USEDTEST(PSX_LO);
const int reglo = lo_is_used ? _allocX86reg(X86TYPE_PSX, PSX_LO, MODE_WRITE) : -1;
if (reglo >= 0)
info |= PROCESS_EE_SET_LO(reglo) | PROCESS_EE_LO;
else
_deletePSXtoX86reg(PSX_LO, DELETE_REG_FREE_NO_WRITEBACK);
const bool hi_is_live = EEINST_USEDTEST(PSX_HI);
const int reghi = hi_is_live ? _allocX86reg(X86TYPE_PSX, PSX_HI, MODE_WRITE) : -1;
if (reghi >= 0)
info |= PROCESS_EE_SET_HI(reghi) | PROCESS_EE_HI;
else
_deletePSXtoX86reg(PSX_HI, DELETE_REG_FREE_NO_WRITEBACK);
}
_validateRegs();
if (s_is_const && regs < 0)
{
// This *must* go inside the if, because of when _Rs_ = _Rd_
constscode(info /*| PROCESS_CONSTS*/);
return; return;
} }
if (PSX_IS_CONST1(_Rt_)) if (t_is_const && regt < 0)
{ {
consttcode(0); consttcode(info /*| PROCESS_CONSTT*/);
return; return;
} }
noconstcode(0); noconstcode(info);
} }
static u8* m_recBlockAlloc = NULL; static u8* m_recBlockAlloc = NULL;
@ -730,10 +1011,14 @@ static void recAlloc()
} }
u8* curpos = m_recBlockAlloc; u8* curpos = m_recBlockAlloc;
recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::IopRam / 4) * sizeof(BASEBLOCK); recRAM = (BASEBLOCK*)curpos;
recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK); curpos += (Ps2MemSize::IopRam / 4) * sizeof(BASEBLOCK);
recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK); recROM = (BASEBLOCK*)curpos;
recROM2 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom2 / 4) * sizeof(BASEBLOCK); curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK);
recROM1 = (BASEBLOCK*)curpos;
curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK);
recROM2 = (BASEBLOCK*)curpos;
curpos += (Ps2MemSize::Rom2 / 4) * sizeof(BASEBLOCK);
if (!s_pInstCache) if (!s_pInstCache)
@ -929,35 +1214,39 @@ void psxSetBranchReg(u32 reg)
if (reg != 0xffffffff) if (reg != 0xffffffff)
{ {
_allocX86reg(calleeSavedReg2d, X86TYPE_PSX_PCWRITEBACK, 0, MODE_WRITE); const bool swap = psxTrySwapDelaySlot(reg, 0, 0);
_psxMoveGPRtoR(calleeSavedReg2d, reg);
psxRecompileNextInstruction(1); int wbreg = -1;
if (!swap)
if (x86regs[calleeSavedReg2d.GetId()].inuse)
{ {
pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PSX_PCWRITEBACK); wbreg = _allocX86reg(X86TYPE_PCWRITEBACK, 0, MODE_WRITE | MODE_CALLEESAVED);
xMOV(ptr32[&psxRegs.pc], calleeSavedReg2d); _psxMoveGPRtoR(xRegister32(wbreg), reg);
x86regs[calleeSavedReg2d.GetId()].inuse = 0;
#ifdef PCSX2_DEBUG psxRecompileNextInstruction(true, false);
xOR(calleeSavedReg2d, calleeSavedReg2d);
#endif if (x86regs[wbreg].inuse && x86regs[wbreg].type == X86TYPE_PCWRITEBACK)
{
xMOV(ptr32[&psxRegs.pc], xRegister32(wbreg));
x86regs[wbreg].inuse = 0;
} }
else else
{ {
xMOV(eax, ptr32[&psxRegs.pcWriteback]); xMOV(eax, ptr32[&psxRegs.pcWriteback]);
xMOV(ptr32[&psxRegs.pc], eax); xMOV(ptr32[&psxRegs.pc], eax);
#ifdef PCSX2_DEBUG
xOR(eax, eax);
#endif
} }
}
#ifdef PCSX2_DEBUG else
xForwardJNZ8 skipAssert; {
xWrite8(0xcc); if (PSX_IS_DIRTY_CONST(reg) || _hasX86reg(X86TYPE_PSX, reg, 0))
skipAssert.SetTarget(); {
#endif const int x86reg = _allocX86reg(X86TYPE_PSX, reg, MODE_READ);
xMOV(ptr32[&psxRegs.pc], xRegister32(x86reg));
}
else
{
_psxMoveGPRtoM((uptr)&psxRegs.pc, reg);
}
}
} }
_psxFlushCall(FLUSH_EVERYTHING); _psxFlushCall(FLUSH_EVERYTHING);
@ -1239,17 +1528,47 @@ static void psxEncodeMemcheck()
bool store = (opcode.flags & IS_STORE) != 0; bool store = (opcode.flags & IS_STORE) != 0;
switch (opcode.flags & MEMTYPE_MASK) switch (opcode.flags & MEMTYPE_MASK)
{ {
case MEMTYPE_BYTE: psxRecMemcheck(op, 8, store); break; case MEMTYPE_BYTE:
case MEMTYPE_HALF: psxRecMemcheck(op, 16, store); break; psxRecMemcheck(op, 8, store);
case MEMTYPE_WORD: psxRecMemcheck(op, 32, store); break; break;
case MEMTYPE_DWORD: psxRecMemcheck(op, 64, store); break; case MEMTYPE_HALF:
psxRecMemcheck(op, 16, store);
break;
case MEMTYPE_WORD:
psxRecMemcheck(op, 32, store);
break;
case MEMTYPE_DWORD:
psxRecMemcheck(op, 64, store);
break;
} }
} }
void psxRecompileNextInstruction(int delayslot) void psxRecompileNextInstruction(bool delayslot, bool swapped_delayslot)
{ {
// pblock isn't used elsewhere in this function. #ifdef DUMP_BLOCKS
//BASEBLOCK* pblock = PSX_GETBLOCK(psxpc); const bool dump_block = true;
const u8* instStart = x86Ptr;
ZydisDecoder disas_decoder;
ZydisFormatter disas_formatter;
ZydisDecodedInstruction disas_instruction;
if (dump_block)
{
fprintf(stderr, "Compiling %s%s\n", delayslot ? "delay slot " : "", disR3000AF(iopMemRead32(psxpc), psxpc));
if (!delayslot)
{
ZydisDecoderInit(&disas_decoder, ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_ADDRESS_WIDTH_64);
ZydisFormatterInit(&disas_formatter, ZYDIS_FORMATTER_STYLE_INTEL);
s_old_print_address = (ZydisFormatterFunc)&ZydisFormatterPrintAddressAbsolute;
ZydisFormatterSetHook(&disas_formatter, ZYDIS_FORMATTER_FUNC_PRINT_ADDRESS_ABS, (const void**)&s_old_print_address);
}
}
#endif
const int old_code = psxRegs.code;
EEINST* old_inst_info = g_pCurInstInfo;
s_recompilingDelaySlot = delayslot;
// add breakpoint // add breakpoint
if (!delayslot) if (!delayslot)
@ -1257,11 +1576,9 @@ void psxRecompileNextInstruction(int delayslot)
psxEncodeBreakpoint(); psxEncodeBreakpoint();
psxEncodeMemcheck(); psxEncodeMemcheck();
} }
else
if (IsDebugBuild)
{ {
xNOP(); _clearNeededX86regs();
xMOV(eax, psxpc);
} }
psxRegs.code = iopMemRead32(psxpc); psxRegs.code = iopMemRead32(psxpc);
@ -1274,7 +1591,31 @@ void psxRecompileNextInstruction(int delayslot)
rpsxBSC[psxRegs.code >> 26](); rpsxBSC[psxRegs.code >> 26]();
s_psxBlockCycles += g_iopCyclePenalty; s_psxBlockCycles += g_iopCyclePenalty;
if (!swapped_delayslot)
_clearNeededX86regs(); _clearNeededX86regs();
if (swapped_delayslot)
{
psxRegs.code = old_code;
g_pCurInstInfo = old_inst_info;
}
#ifdef DUMP_BLOCKS
if (dump_block && !delayslot)
{
const u8* instPtr = instStart;
ZyanUSize instLength = static_cast<ZyanUSize>(x86Ptr - instStart);
while (ZYAN_SUCCESS(ZydisDecoderDecodeBuffer(&disas_decoder, instPtr, instLength, &disas_instruction)))
{
char buffer[256];
if (ZYAN_SUCCESS(ZydisFormatterFormatInstruction(&disas_formatter, &disas_instruction, buffer, sizeof(buffer), (ZyanU64)instPtr)))
std::fprintf(stderr, " %016" PRIX64 " %s\n", (u64)instPtr, buffer);
instPtr += disas_instruction.length;
instLength -= disas_instruction.length;
}
}
#endif
} }
static void PreBlockCheck(u32 blockpc) static void PreBlockCheck(u32 blockpc)
@ -1370,8 +1711,7 @@ static void iopRecRecompile(const u32 startpc)
s_pCurBlock = PSX_GETBLOCK(startpc); s_pCurBlock = PSX_GETBLOCK(startpc);
pxAssert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile pxAssert(s_pCurBlock->GetFnptr() == (uptr)iopJITCompile || s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock);
|| s_pCurBlock->GetFnptr() == (uptr)iopJITCompileInBlock);
s_pCurBlockEx = recBlocks.Get(HWADDR(startpc)); s_pCurBlockEx = recBlocks.Get(HWADDR(startpc));
@ -1408,9 +1748,7 @@ static void iopRecRecompile(const u32 startpc)
while (1) while (1)
{ {
BASEBLOCK* pblock = PSX_GETBLOCK(i); BASEBLOCK* pblock = PSX_GETBLOCK(i);
if (i != startpc if (i != startpc && pblock->GetFnptr() != (uptr)iopJITCompile && pblock->GetFnptr() != (uptr)iopJITCompileInBlock)
&& pblock->GetFnptr() != (uptr)iopJITCompile
&& pblock->GetFnptr() != (uptr)iopJITCompileInBlock)
{ {
// branch = 3 // branch = 3
willbranch3 = 1; willbranch3 = 1;
@ -1449,7 +1787,10 @@ static void iopRecRecompile(const u32 startpc)
goto StartRecomp; goto StartRecomp;
// branches // branches
case 4: case 5: case 6: case 7: case 4:
case 5:
case 6:
case 7:
s_branchTo = _Imm_ * 4 + i + 4; s_branchTo = _Imm_ * 4 + i + 4;
if (s_branchTo > startpc && s_branchTo < i) if (s_branchTo > startpc && s_branchTo < i)
s_nEndBlock = s_branchTo; s_nEndBlock = s_branchTo;
@ -1525,7 +1866,7 @@ StartRecomp:
g_pCurInstInfo = s_pInstCache; g_pCurInstInfo = s_pInstCache;
while (!psxbranch && psxpc < s_nEndBlock) while (!psxbranch && psxpc < s_nEndBlock)
{ {
psxRecompileNextInstruction(0); psxRecompileNextInstruction(false, false);
} }
if (IsDebugBuild && (psxdump & 1)) if (IsDebugBuild && (psxdump & 1))

View File

@ -34,25 +34,17 @@ static const int psxInstCycles_Load = 0;
extern uptr psxRecLUT[]; extern uptr psxRecLUT[];
u8 _psxLoadWritesRs(u32 tempcode);
u8 _psxIsLoadStore(u32 tempcode);
void _psxFlushAllUnused();
int _psxFlushUnusedConstReg();
void _psxFlushCachedRegs();
void _psxFlushConstReg(int reg); void _psxFlushConstReg(int reg);
void _psxFlushConstRegs(); void _psxFlushConstRegs();
void _psxDeleteReg(int reg, int flush); void _psxDeleteReg(int reg, int flush);
void _psxFlushCall(int flushtype); void _psxFlushCall(int flushtype);
void _psxFlushAllDirty();
void _psxOnWriteReg(int reg); void _psxOnWriteReg(int reg);
void _psxMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr); void _psxMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
#if 0
void _psxMoveGPRtoM(uptr to, int fromgpr); void _psxMoveGPRtoM(uptr to, int fromgpr);
void _psxMoveGPRtoRm(x86IntRegType to, int fromgpr);
#endif
extern u32 psxpc; // recompiler pc extern u32 psxpc; // recompiler pc
extern int psxbranch; // set for branch extern int psxbranch; // set for branch
@ -63,13 +55,14 @@ void psxLoadBranchState();
extern void psxSetBranchReg(u32 reg); extern void psxSetBranchReg(u32 reg);
extern void psxSetBranchImm(u32 imm); extern void psxSetBranchImm(u32 imm);
extern void psxRecompileNextInstruction(int delayslot); extern void psxRecompileNextInstruction(bool delayslot, bool swapped_delayslot);
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// IOP Constant Propagation Defines, Vars, and API - From here down! // IOP Constant Propagation Defines, Vars, and API - From here down!
#define PSX_IS_CONST1(reg) ((reg) < 32 && (g_psxHasConstReg & (1 << (reg)))) #define PSX_IS_CONST1(reg) ((reg) < 32 && (g_psxHasConstReg & (1 << (reg))))
#define PSX_IS_CONST2(reg1, reg2) ((g_psxHasConstReg & (1 << (reg1))) && (g_psxHasConstReg & (1 << (reg2)))) #define PSX_IS_CONST2(reg1, reg2) ((g_psxHasConstReg & (1 << (reg1))) && (g_psxHasConstReg & (1 << (reg2))))
#define PSX_IS_DIRTY_CONST(reg) ((reg) < 32 && (g_psxHasConstReg & (1 << (reg))) && (!(g_psxFlushedConstReg & (1 << (reg)))))
#define PSX_SET_CONST(reg) \ #define PSX_SET_CONST(reg) \
{ \ { \
if ((reg) < 32) \ if ((reg) < 32) \
@ -91,28 +84,31 @@ extern u32 g_psxHasConstReg, g_psxFlushedConstReg;
typedef void (*R3000AFNPTR)(); typedef void (*R3000AFNPTR)();
typedef void (*R3000AFNPTR_INFO)(int info); typedef void (*R3000AFNPTR_INFO)(int info);
bool psxTrySwapDelaySlot(u32 rs, u32 rt, u32 rd);
int psxTryRenameReg(int to, int from, int fromx86, int other, int xmminfo);
// //
// non mmx/xmm version, slower // non mmx/xmm version, slower
// //
// rd = rs op rt // rd = rs op rt
#define PSXRECOMPILE_CONSTCODE0(fn) \ #define PSXRECOMPILE_CONSTCODE0(fn, info) \
void rpsx##fn(void) \ void rpsx##fn(void) \
{ \ { \
psxRecompileCodeConst0(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_); \ psxRecompileCodeConst0(rpsx##fn##_const, rpsx##fn##_consts, rpsx##fn##_constt, rpsx##fn##_, info); \
} }
// rt = rs op imm16 // rt = rs op imm16
#define PSXRECOMPILE_CONSTCODE1(fn) \ #define PSXRECOMPILE_CONSTCODE1(fn, info) \
void rpsx##fn(void) \ void rpsx##fn(void) \
{ \ { \
psxRecompileCodeConst1(rpsx##fn##_const, rpsx##fn##_); \ psxRecompileCodeConst1(rpsx##fn##_const, rpsx##fn##_, info); \
} }
// rd = rt op sa // rd = rt op sa
#define PSXRECOMPILE_CONSTCODE2(fn) \ #define PSXRECOMPILE_CONSTCODE2(fn, info) \
void rpsx##fn(void) \ void rpsx##fn(void) \
{ \ { \
psxRecompileCodeConst2(rpsx##fn##_const, rpsx##fn##_); \ psxRecompileCodeConst2(rpsx##fn##_const, rpsx##fn##_, info); \
} }
// [lo,hi] = rt op rs // [lo,hi] = rt op rs
@ -130,11 +126,11 @@ typedef void (*R3000AFNPTR_INFO)(int info);
} }
// rd = rs op rt // rd = rs op rt
void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode); void psxRecompileCodeConst0(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int xmminfo);
// rt = rs op imm16 // rt = rs op imm16
void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode); void psxRecompileCodeConst1(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo);
// rd = rt op sa // rd = rt op sa
void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode); void psxRecompileCodeConst2(R3000AFNPTR constcode, R3000AFNPTR_INFO noconstcode, int xmminfo);
// [lo,hi] = rt op rs // [lo,hi] = rt op rs
void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int LOHI); void psxRecompileCodeConst3(R3000AFNPTR constcode, R3000AFNPTR_INFO constscode, R3000AFNPTR_INFO consttcode, R3000AFNPTR_INFO noconstcode, int LOHI);

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,9 @@
#include "iCore.h" #include "iCore.h"
#include "R5900_Profiler.h" #include "R5900_Profiler.h"
// Register containing a pointer to our fastmem (4GB) area
#define RFASTMEMBASE x86Emitter::rbp
extern u32 maxrecmem; extern u32 maxrecmem;
extern u32 pc; // recompiler pc extern u32 pc; // recompiler pc
extern int g_branch; // set for branch extern int g_branch; // set for branch
@ -61,11 +64,16 @@ extern bool s_nBlockInterlocked; // Current block has VU0 interlocking
extern bool g_recompilingDelaySlot; extern bool g_recompilingDelaySlot;
// Used for generating backpatch thunks for fastmem.
u8* recBeginThunk();
u8* recEndThunk();
// used when processing branches // used when processing branches
bool TrySwapDelaySlot(u32 rs, u32 rt, u32 rd);
void SaveBranchState(); void SaveBranchState();
void LoadBranchState(); void LoadBranchState();
void recompileNextInstruction(int delayslot); void recompileNextInstruction(bool delayslot, bool swapped_delay_slot);
void SetBranchReg(u32 reg); void SetBranchReg(u32 reg);
void SetBranchImm(u32 imm); void SetBranchImm(u32 imm);
@ -78,8 +86,7 @@ namespace R5900
{ {
namespace Dynarec namespace Dynarec
{ {
extern void recDoBranchImm(u32* jmpSkip, bool isLikely = false); extern void recDoBranchImm(u32 branchTo, u32* jmpSkip, bool isLikely = false, bool swappedDelaySlot = false);
extern void recDoBranchImm_Likely(u32* jmpSkip);
} // namespace Dynarec } // namespace Dynarec
} // namespace R5900 } // namespace R5900
@ -88,6 +95,7 @@ namespace R5900
#define GPR_IS_CONST1(reg) (EE_CONST_PROP && (reg) < 32 && (g_cpuHasConstReg & (1 << (reg)))) #define GPR_IS_CONST1(reg) (EE_CONST_PROP && (reg) < 32 && (g_cpuHasConstReg & (1 << (reg))))
#define GPR_IS_CONST2(reg1, reg2) (EE_CONST_PROP && (g_cpuHasConstReg & (1 << (reg1))) && (g_cpuHasConstReg & (1 << (reg2)))) #define GPR_IS_CONST2(reg1, reg2) (EE_CONST_PROP && (g_cpuHasConstReg & (1 << (reg1))) && (g_cpuHasConstReg & (1 << (reg2))))
#define GPR_IS_DIRTY_CONST(reg) (EE_CONST_PROP && (reg) < 32 && (g_cpuHasConstReg & (1 << (reg))) && (!(g_cpuFlushedConstReg & (1 << (reg)))))
#define GPR_SET_CONST(reg) \ #define GPR_SET_CONST(reg) \
{ \ { \
if ((reg) < 32) \ if ((reg) < 32) \
@ -106,29 +114,23 @@ namespace R5900
alignas(16) extern GPR_reg64 g_cpuConstRegs[32]; alignas(16) extern GPR_reg64 g_cpuConstRegs[32];
extern u32 g_cpuHasConstReg, g_cpuFlushedConstReg; extern u32 g_cpuHasConstReg, g_cpuFlushedConstReg;
// gets a memory pointer to the constant reg
u32* _eeGetConstReg(int reg);
// finds where the GPR is stored and moves lower 32 bits to EAX // finds where the GPR is stored and moves lower 32 bits to EAX
void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr); void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr, bool allow_preload = true);
void _eeMoveGPRtoR(const x86Emitter::xRegister64& to, int fromgpr); void _eeMoveGPRtoR(const x86Emitter::xRegister64& to, int fromgpr, bool allow_preload = true);
void _eeMoveGPRtoM(uptr to, int fromgpr); void _eeMoveGPRtoM(uptr to, int fromgpr); // 32-bit only
void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr);
void _signExtendToMem(void* mem);
void eeSignExtendTo(int gpr, bool onlyupper = false);
void _eeFlushAllUnused(); void _eeFlushAllDirty();
void _eeOnWriteReg(int reg, int signext); void _eeOnWriteReg(int reg, int signext);
// totally deletes from const, xmm, and mmx entries // totally deletes from const, xmm, and mmx entries
// if flush is 1, also flushes to memory // if flush is 1, also flushes to memory
// if 0, only flushes if not an xmm reg (used when overwriting lower 64bits of reg) // if 0, only flushes if not an xmm reg (used when overwriting lower 64bits of reg)
void _deleteEEreg(int reg, int flush); void _deleteEEreg(int reg, int flush);
void _deleteEEreg128(int reg);
void _flushEEreg(int reg, bool clear = false); void _flushEEreg(int reg, bool clear = false);
// allocates memory on the instruction size and returns the pointer int _eeTryRenameReg(int to, int from, int fromx86, int other, int xmminfo);
u32* recGetImm64(u32 hi, u32 lo);
////////////////////////////////////// //////////////////////////////////////
// Templates for code recompilation // // Templates for code recompilation //
@ -141,14 +143,27 @@ typedef void (*R5900FNPTR_INFO)(int info);
void rec##fn(void) \ void rec##fn(void) \
{ \ { \
EE::Profiler.EmitOp(eeOpcode::fn); \ EE::Profiler.EmitOp(eeOpcode::fn); \
eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, xmminfo); \ eeRecompileCode0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, (xmminfo)); \
} }
#define EERECOMPILE_CODERC0(fn, xmminfo) \
#define EERECOMPILE_CODEX(codename, fn) \
void rec##fn(void) \ void rec##fn(void) \
{ \ { \
EE::Profiler.EmitOp(eeOpcode::fn); \ EE::Profiler.EmitOp(eeOpcode::fn); \
codename(rec##fn##_const, rec##fn##_); \ eeRecompileCodeRC0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_, (xmminfo)); \
}
#define EERECOMPILE_CODEX(codename, fn, xmminfo) \
void rec##fn(void) \
{ \
EE::Profiler.EmitOp(eeOpcode::fn); \
codename(rec##fn##_const, rec##fn##_, (xmminfo)); \
}
#define EERECOMPILE_CODEI(codename, fn, xmminfo) \
void rec##fn(void) \
{ \
EE::Profiler.EmitOp(eeOpcode::fn); \
codename(rec##fn##_const, rec##fn##_, (xmminfo)); \
} }
// //
@ -156,66 +171,11 @@ typedef void (*R5900FNPTR_INFO)(int info);
// //
// rd = rs op rt // rd = rs op rt
void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo); void eeRecompileCodeRC0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo);
// rt = rs op imm16 // rt = rs op imm16
void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode); void eeRecompileCodeRC1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode, int xmminfo);
// rd = rt op sa // rd = rt op sa
void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode); void eeRecompileCodeRC2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode, int xmminfo);
// rt op rs (SPECIAL)
void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode);
//
// non mmx/xmm version, slower
//
// rd = rs op rt
#define EERECOMPILE_CONSTCODE0(fn) \
void rec##fn(void) \
{ \
eeRecompileCodeConst0(rec##fn##_const, rec##fn##_consts, rec##fn##_constt, rec##fn##_); \
}
// rt = rs op imm16
#define EERECOMPILE_CONSTCODE1(fn) \
void rec##fn(void) \
{ \
eeRecompileCodeConst1(rec##fn##_const, rec##fn##_); \
}
// rd = rt op sa
#define EERECOMPILE_CONSTCODE2(fn) \
void rec##fn(void) \
{ \
eeRecompileCodeConst2(rec##fn##_const, rec##fn##_); \
}
// rd = rt op rs
#define EERECOMPILE_CONSTCODESPECIAL(fn, mult) \
void rec##fn(void) \
{ \
eeRecompileCodeConstSPECIAL(rec##fn##_const, rec##fn##_, mult); \
}
// rd = rs op rt
void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode);
// rt = rs op imm16
void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode);
// rd = rt op sa
void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode);
// rd = rt MULT rs (SPECIAL)
void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT);
// XMM caching helpers
#define XMMINFO_READLO 0x001
#define XMMINFO_READHI 0x002
#define XMMINFO_WRITELO 0x004
#define XMMINFO_WRITEHI 0x008
#define XMMINFO_WRITED 0x010
#define XMMINFO_READD 0x020
#define XMMINFO_READS 0x040
#define XMMINFO_READT 0x080
#define XMMINFO_READD_LO 0x100 // if set and XMMINFO_READD is set, reads only low 64 bits of D
#define XMMINFO_READACC 0x200
#define XMMINFO_WRITEACC 0x400
#define FPURECOMPILE_CONSTCODE(fn, xmminfo) \ #define FPURECOMPILE_CONSTCODE(fn, xmminfo) \
void rec##fn(void) \ void rec##fn(void) \

File diff suppressed because it is too large Load Diff

View File

@ -72,3 +72,5 @@ namespace R5900
void Run(u32 start, u32 end, EEINST* inst_cache) override; void Run(u32 start, u32 end, EEINST* inst_cache) override;
}; };
} // namespace R5900 } // namespace R5900
void recBackpropBSC(u32 code, EEINST* prev, EEINST* pinst);

View File

@ -31,17 +31,18 @@ namespace Dynarec {
// Parameters: // Parameters:
// jmpSkip - This parameter is the result of the appropriate J32 instruction // jmpSkip - This parameter is the result of the appropriate J32 instruction
// (usually JZ32 or JNZ32). // (usually JZ32 or JNZ32).
void recDoBranchImm(u32* jmpSkip, bool isLikely) void recDoBranchImm(u32 branchTo, u32* jmpSkip, bool isLikely, bool swappedDelaySlot)
{ {
// All R5900 branches use this format:
const u32 branchTo = ((s32)_Imm_ * 4) + pc;
// First up is the Branch Taken Path : Save the recompiler's state, compile the // First up is the Branch Taken Path : Save the recompiler's state, compile the
// DelaySlot, and issue a BranchTest insertion. The state is reloaded below for // DelaySlot, and issue a BranchTest insertion. The state is reloaded below for
// the "did not branch" path (maintains consts, register allocations, and other optimizations). // the "did not branch" path (maintains consts, register allocations, and other optimizations).
if (!swappedDelaySlot)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(branchTo); SetBranchImm(branchTo);
// Jump target when the branch is *not* taken, skips the branchtest code // Jump target when the branch is *not* taken, skips the branchtest code
@ -50,18 +51,17 @@ void recDoBranchImm(u32* jmpSkip, bool isLikely)
// if it's a likely branch then we'll need to skip the delay slot here, since // if it's a likely branch then we'll need to skip the delay slot here, since
// MIPS cancels the delay slot instruction when branches aren't taken. // MIPS cancels the delay slot instruction when branches aren't taken.
if (!swappedDelaySlot)
{
LoadBranchState(); LoadBranchState();
if (!isLikely) if (!isLikely)
{ {
pc -= 4; // instruction rewinder for delay slot, if non-likely. pc -= 4; // instruction rewinder for delay slot, if non-likely.
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
} }
SetBranchImm(pc); // start a new recompiled block.
}
void recDoBranchImm_Likely(u32* jmpSkip) SetBranchImm(pc); // start a new recompiled block.
{
recDoBranchImm(jmpSkip, true);
} }
namespace OpcodeImpl { namespace OpcodeImpl {
@ -95,6 +95,7 @@ void recMFSA()
if (!_Rd_) if (!_Rd_)
return; return;
// TODO(Stenzek): Make these less rubbish
mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if (mmreg >= 0) if (mmreg >= 0)
{ {
@ -102,10 +103,9 @@ void recMFSA()
} }
else else
{ {
xMOV(eax, ptr[&cpuRegs.sa]); xMOV(rax, ptr32[&cpuRegs.sa]);
_deleteEEreg(_Rd_, 0); _deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
} }
} }
@ -124,6 +124,10 @@ void recMTSA()
{ {
xMOVSS(ptr[&cpuRegs.sa], xRegisterSSE(mmreg)); xMOVSS(ptr[&cpuRegs.sa], xRegisterSSE(mmreg));
} }
else if ((mmreg = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ)) >= 0)
{
xMOV(ptr[&cpuRegs.sa], xRegister32(mmreg));
}
else else
{ {
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);

View File

@ -21,6 +21,7 @@
#include "VU.h" #include "VU.h"
#include "common/emitter/x86emitter.h" #include "common/emitter/x86emitter.h"
#include "R3000A.h" #include "R3000A.h"
#include "x86/iR3000A.h"
using namespace x86Emitter; using namespace x86Emitter;
@ -29,7 +30,7 @@ using namespace x86Emitter;
extern u32 g_psxConstRegs[32]; extern u32 g_psxConstRegs[32];
// X86 caching // X86 caching
static int g_x86checknext; static uint g_x86checknext;
// use special x86 register allocation for ia32 // use special x86 register allocation for ia32
@ -40,92 +41,19 @@ void _initX86regs()
g_x86checknext = 0; g_x86checknext = 0;
} }
uptr _x86GetAddr(int type, int reg)
{
uptr ret = 0;
switch (type & ~X86TYPE_VU1)
{
case X86TYPE_GPR:
ret = (uptr)&cpuRegs.GPR.r[reg];
break;
case X86TYPE_VI:
if (type & X86TYPE_VU1)
ret = (uptr)&VU1.VI[reg];
else
ret = (uptr)&VU0.VI[reg];
break;
case X86TYPE_MEMOFFSET:
ret = 0;
break;
case X86TYPE_VIMEMOFFSET:
ret = 0;
break;
case X86TYPE_VUQREAD:
if (type & X86TYPE_VU1)
ret = (uptr)&VU1.VI[REG_Q];
else
ret = (uptr)&VU0.VI[REG_Q];
break;
case X86TYPE_VUPREAD:
if (type & X86TYPE_VU1)
ret = (uptr)&VU1.VI[REG_P];
else
ret = (uptr)&VU0.VI[REG_P];
break;
case X86TYPE_VUQWRITE:
if (type & X86TYPE_VU1)
ret = (uptr)&VU1.q;
else
ret = (uptr)&VU0.q;
break;
case X86TYPE_VUPWRITE:
if (type & X86TYPE_VU1)
ret = (uptr)&VU1.p;
else
ret = (uptr)&VU0.p;
break;
case X86TYPE_PSX:
ret = (uptr)&psxRegs.GPR.r[reg];
break;
case X86TYPE_PCWRITEBACK:
ret = (uptr)&cpuRegs.pcWriteback;
break;
case X86TYPE_PSX_PCWRITEBACK:
ret = (uptr)&psxRegs.pcWriteback;
break;
jNO_DEFAULT;
}
return ret;
}
int _getFreeX86reg(int mode) int _getFreeX86reg(int mode)
{ {
int tempi = -1; int tempi = -1;
u32 bestcount = 0x10000; u32 bestcount = 0x10000;
int maxreg = (mode & MODE_8BITREG) ? 4 : iREGCNT_GPR;
for (uint i = 0; i < iREGCNT_GPR; i++) for (uint i = 0; i < iREGCNT_GPR; i++)
{ {
int reg = (g_x86checknext + i) % iREGCNT_GPR; const int reg = (g_x86checknext + i) % iREGCNT_GPR;
if (reg == 0 || reg == esp.GetId() || reg == ebp.GetId()) if (x86regs[reg].inuse || !_isAllocatableX86reg(reg))
continue; continue;
if (reg >= maxreg)
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(reg))
continue; continue;
//if( (mode&MODE_NOFRAME) && reg==EBP ) continue;
if (x86regs[reg].inuse == 0) if (x86regs[reg].inuse == 0)
{ {
@ -134,20 +62,26 @@ int _getFreeX86reg(int mode)
} }
} }
for (int i = 1; i < maxreg; i++) for (uint i = 0; i < iREGCNT_GPR; i++)
{ {
if (i == esp.GetId() || i == ebp.GetId()) if (!_isAllocatableX86reg(i))
continue; continue;
//if( (mode&MODE_NOFRAME) && i==EBP ) continue;
if ((mode & MODE_CALLEESAVED) && xRegister32::IsCallerSaved(i))
continue;
// should have checked inuse in the previous loop.
pxAssert(x86regs[i].inuse);
if (x86regs[i].needed) if (x86regs[i].needed)
continue; continue;
if (x86regs[i].type != X86TYPE_TEMP) if (x86regs[i].type != X86TYPE_TEMP)
{ {
if (x86regs[i].counter < bestcount) if (x86regs[i].counter < bestcount)
{ {
tempi = i; tempi = static_cast<int>(i);
bestcount = x86regs[i].counter; bestcount = x86regs[i].counter;
} }
continue; continue;
@ -163,22 +97,15 @@ int _getFreeX86reg(int mode)
return tempi; return tempi;
} }
pxFailDev("x86 register allocation error"); pxFailRel("x86 register allocation error");
throw Exception::FailedToAllocateRegister(); return -1;
}
void _flushCachedRegs()
{
_flushConstRegs();
_flushXMMregs();
} }
void _flushConstReg(int reg) void _flushConstReg(int reg)
{ {
if (GPR_IS_CONST1(reg) && !(g_cpuFlushedConstReg & (1 << reg))) if (GPR_IS_CONST1(reg) && !(g_cpuFlushedConstReg & (1 << reg)))
{ {
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[0]], g_cpuConstRegs[reg].UL[0]); xWriteImm64ToMem(&cpuRegs.GPR.r[reg].UD[0], rax, g_cpuConstRegs[reg].SD[0]);
xMOV(ptr32[&cpuRegs.GPR.r[reg].UL[1]], g_cpuConstRegs[reg].UL[1]);
g_cpuFlushedConstReg |= (1 << reg); g_cpuFlushedConstReg |= (1 << reg);
if (reg == 0) if (reg == 0)
DevCon.Warning("Flushing r0!"); DevCon.Warning("Flushing r0!");
@ -187,243 +114,367 @@ void _flushConstReg(int reg)
void _flushConstRegs() void _flushConstRegs()
{ {
s32 zero_cnt = 0, minusone_cnt = 0; int zero_reg_count = 0;
s32 eaxval = 1; // 0, -1 int minusone_reg_count = 0;
u32 done[4] = {0, 0, 0, 0}; for (u32 i = 0; i < 32; i++)
u8* rewindPtr;
// flush constants
// flush 0 and -1 first
// ignore r0
for (int i = 1, j = 0; i < 32; j++ && ++i, j %= 2)
{ {
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1 << i)) if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1u << i))
continue;
if (g_cpuConstRegs[i].SL[j] != 0)
continue; continue;
if (eaxval != 0) if (g_cpuConstRegs[i].SD[0] == 0)
zero_reg_count++;
else if (g_cpuConstRegs[i].SD[0] == -1)
minusone_reg_count++;
}
// if we have more than one of zero/minus-one, precompute
bool rax_is_zero = false;
if (zero_reg_count > 1)
{ {
xXOR(eax, eax); xXOR(eax, eax);
eaxval = 0; for (u32 i = 0; i < 32; i++)
}
xMOV(ptr[&cpuRegs.GPR.r[i].SL[j]], eax);
done[j] |= 1 << i;
zero_cnt++;
}
rewindPtr = x86Ptr;
for (int i = 1, j = 0; i < 32; j++ && ++i, j %= 2)
{ {
if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1 << i)) if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1u << i))
continue;
if (g_cpuConstRegs[i].SL[j] != -1)
continue; continue;
if (eaxval > 0) if (g_cpuConstRegs[i].SD[0] == 0)
{ {
xXOR(eax, eax); xMOV(ptr64[&cpuRegs.GPR.r[i].UD[0]], rax);
eaxval = 0; g_cpuFlushedConstReg |= 1u << i;
} }
if (eaxval == 0) }
rax_is_zero = true;
}
if (minusone_reg_count > 1)
{ {
xNOT(eax); if (!rax_is_zero)
eaxval = -1; xMOV(rax, -1);
}
xMOV(ptr[&cpuRegs.GPR.r[i].SL[j]], eax);
done[j + 2] |= 1 << i;
minusone_cnt++;
}
if (minusone_cnt == 1 && !zero_cnt) // not worth it for one byte
{
x86SetPtr(rewindPtr);
}
else else
xNOT(rax);
for (u32 i = 0; i < 32; i++)
{ {
done[0] |= done[2]; if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1u << i))
done[1] |= done[3]; continue;
if (g_cpuConstRegs[i].SD[0] == -1)
{
xMOV(ptr64[&cpuRegs.GPR.r[i].UD[0]], rax);
g_cpuFlushedConstReg |= 1u << i;
}
}
} }
for (int i = 1; i < 32; ++i) // and whatever's left over..
for (u32 i = 0; i < 32; i++)
{ {
if (GPR_IS_CONST1(i)) if (!GPR_IS_CONST1(i) || g_cpuFlushedConstReg & (1u << i))
{ continue;
if (!(g_cpuFlushedConstReg & (1 << i)))
{
if (!(done[0] & (1 << i)))
xMOV(ptr32[&cpuRegs.GPR.r[i].UL[0]], g_cpuConstRegs[i].UL[0]);
if (!(done[1] & (1 << i)))
xMOV(ptr32[&cpuRegs.GPR.r[i].UL[1]], g_cpuConstRegs[i].UL[1]);
g_cpuFlushedConstReg |= 1 << i; xWriteImm64ToMem(&cpuRegs.GPR.r[i].UD[0], rax, g_cpuConstRegs[i].UD[0]);
} g_cpuFlushedConstReg |= 1u << i;
if (g_cpuHasConstReg == g_cpuFlushedConstReg)
break;
}
} }
} }
int _allocX86reg(xRegister32 x86reg, int type, int reg, int mode) static const char* GetModeString(int mode)
{ {
uint i; return ((mode & MODE_READ)) ? ((mode & MODE_WRITE) ? "readwrite" : "read") : "write";
pxAssertDev(reg >= 0 && reg < 32, "Register index out of bounds."); }
pxAssertDev(x86reg != esp && x86reg != ebp, "Allocation of ESP/EBP is not allowed!");
// don't alloc EAX and ESP,EBP if MODE_NOFRAME void _validateRegs()
int oldmode = mode; {
//int noframe = mode & MODE_NOFRAME; #ifdef PCSX2_DEVBUILD
uint maxreg = (mode & MODE_8BITREG) ? 4 : iREGCNT_GPR; // check that no two registers are in write mode in both fprs and gprs
mode &= ~(MODE_NOFRAME | MODE_8BITREG); for (s8 guestreg = 0; guestreg < 32; guestreg++)
int readfromreg = -1; {
u32 gprreg = 0, gprmode = 0;
u32 fprreg = 0, fprmode = 0;
for (u32 hostreg = 0; hostreg < iREGCNT_GPR; hostreg++)
{
if (x86regs[hostreg].inuse && x86regs[hostreg].type == X86TYPE_GPR && x86regs[hostreg].reg == guestreg)
{
pxAssertMsg(gprreg == 0 && gprmode == 0, "register is not already allocated in a GPR");
gprreg = hostreg;
gprmode = x86regs[hostreg].mode;
}
}
for (u32 hostreg = 0; hostreg < iREGCNT_XMM; hostreg++)
{
if (xmmregs[hostreg].inuse && xmmregs[hostreg].type == XMMTYPE_GPRREG && xmmregs[hostreg].reg == guestreg)
{
pxAssertMsg(fprreg == 0 && fprmode == 0, "register is not already allocated in a XMM");
fprreg = hostreg;
fprmode = xmmregs[hostreg].mode;
}
}
if ((gprmode | fprmode) & MODE_WRITE)
pxAssertMsg((gprmode & MODE_WRITE) != (fprmode & MODE_WRITE), "only one of gpr or fps is in write state");
if (gprmode & MODE_WRITE)
pxAssertMsg(fprmode == 0, "when writing to the gpr, fpr is invalid");
if (fprmode & MODE_WRITE)
pxAssertMsg(gprmode == 0, "when writing to the fpr, gpr is invalid");
}
#endif
}
int _allocX86reg(int type, int reg, int mode)
{
if (type == X86TYPE_GPR || type == X86TYPE_PSX)
{
pxAssertDev(reg >= 0 && reg < 34, "Register index out of bounds.");
}
int hostXMMreg = (type == X86TYPE_GPR) ? _checkXMMreg(XMMTYPE_GPRREG, reg, 0) : -1;
if (type != X86TYPE_TEMP) if (type != X86TYPE_TEMP)
{ {
if (maxreg < iREGCNT_GPR) for (int i = 0; i < static_cast<int>(iREGCNT_GPR); i++)
{
// make sure reg isn't in the higher regs
for (i = maxreg; i < iREGCNT_GPR; ++i)
{ {
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg) if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg)
continue; continue;
if (mode & MODE_READ) pxAssert(type != X86TYPE_GPR || !GPR_IS_CONST1(reg) || (GPR_IS_CONST1(reg) && g_cpuFlushedConstReg & (1u << reg)));
{
readfromreg = i;
x86regs[i].inuse = 0;
break;
}
else if (mode & MODE_WRITE)
{
x86regs[i].inuse = 0;
break;
}
}
}
for (i = 1; i < maxreg; i++) // can't go from write to read
{ pxAssert(!((x86regs[i].mode & (MODE_READ | MODE_WRITE)) == MODE_WRITE && (mode & (MODE_READ | MODE_WRITE)) == MODE_READ));
if ((int)i == esp.GetId() || (int)i == ebp.GetId()) // if (type != X86TYPE_TEMP && !(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
continue;
if (!x86regs[i].inuse || x86regs[i].type != type || x86regs[i].reg != reg)
continue;
// We're in a for loop until i<maxreg. This will never happen.
/*if( i >= maxreg ) {
if (x86regs[i].mode & MODE_READ) readfromreg = i;
mode |= x86regs[i].mode&MODE_WRITE;
x86regs[i].inuse = 0;
break;
}*/
if (!x86reg.IsEmpty())
{
// requested specific reg, so return that instead
if (i != (uint)x86reg.GetId())
{
if (x86regs[i].mode & MODE_READ)
readfromreg = i;
mode |= x86regs[i].mode & MODE_WRITE;
x86regs[i].inuse = 0;
break;
}
}
if (type != X86TYPE_TEMP && !(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
{
if (type == X86TYPE_GPR) if (type == X86TYPE_GPR)
_flushConstReg(reg); {
RALOG("Changing host reg %d for guest reg %d from %s to %s mode\n", i, reg, GetModeString(x86regs[i].mode), GetModeString(x86regs[i].mode | mode));
if (X86_ISVI(type) && reg < 16) if (mode & MODE_WRITE)
xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]); {
else if (GPR_IS_CONST1(reg))
xMOV(xRegister32(i), ptr[(void*)(_x86GetAddr(type, reg))]); {
RALOG("Clearing constant value for guest reg %d on change to write mode\n", reg);
x86regs[i].mode |= MODE_READ; GPR_DEL_CONST(reg);
} }
x86regs[i].needed = 1; if (hostXMMreg >= 0)
x86regs[i].mode |= mode; {
// ensure upper bits get written
RALOG("Invalidating host XMM reg %d for guest reg %d due to GPR write transition\n", hostXMMreg, reg);
pxAssert(!(xmmregs[hostXMMreg].mode & MODE_WRITE));
_freeXMMreg(hostXMMreg);
}
}
}
else if (type == X86TYPE_PSX)
{
RALOG("Changing host reg %d for guest PSX reg %d from %s to %s mode\n", i, reg, GetModeString(x86regs[i].mode), GetModeString(x86regs[i].mode | mode));
if (mode & MODE_WRITE)
{
if (PSX_IS_CONST1(reg))
{
RALOG("Clearing constant value for guest PSX reg %d on change to write mode\n", reg);
PSX_DEL_CONST(reg);
}
}
}
else if (type == X86TYPE_VIREG)
{
// keep VI temporaries separate
if (reg < 0)
continue;
}
x86regs[i].counter = g_x86AllocCounter++;
x86regs[i].mode |= mode & ~MODE_CALLEESAVED;
x86regs[i].needed = true;
return i; return i;
} }
} }
if (x86reg.IsEmpty()) const int regnum = _getFreeX86reg(mode);
x86reg = xRegister32(_getFreeX86reg(oldmode)); xRegister64 new_reg(regnum);
else x86regs[regnum].type = type;
_freeX86reg(x86reg); x86regs[regnum].reg = reg;
x86regs[regnum].mode = mode & ~MODE_CALLEESAVED;
x86regs[regnum].counter = g_x86AllocCounter++;
x86regs[regnum].needed = true;
x86regs[regnum].inuse = true;
x86regs[x86reg.GetId()].type = type; if (type == X86TYPE_GPR)
x86regs[x86reg.GetId()].reg = reg; {
x86regs[x86reg.GetId()].mode = mode; RALOG("Allocating host reg %d to guest reg %d in %s mode\n", regnum, reg, GetModeString(mode));
x86regs[x86reg.GetId()].needed = 1; }
x86regs[x86reg.GetId()].inuse = 1;
if (mode & MODE_READ) if (mode & MODE_READ)
{ {
if (readfromreg >= 0) switch (type)
xMOV(x86reg, xRegister32(readfromreg));
else
{ {
if (type == X86TYPE_GPR) case X86TYPE_GPR:
{
if (reg == 0)
{
xXOR(x86reg, x86reg);
}
else
{
_flushConstReg(reg);
_deleteGPRtoXMMreg(reg, 1);
_eeMoveGPRtoR(x86reg, reg);
_deleteGPRtoXMMreg(reg, 0);
}
}
else
{
if (X86_ISVI(type) && reg < 16)
{ {
if (reg == 0) if (reg == 0)
xXOR(x86reg, x86reg); {
else xXOR(xRegister32(new_reg), xRegister32(new_reg)); // 32-bit is smaller and zexts anyway
xMOVZX(x86reg, ptr16[(u16*)(_x86GetAddr(type, reg))]);
} }
else else
xMOV(x86reg, ptr[(void*)(_x86GetAddr(type, reg))]); {
if (hostXMMreg >= 0)
{
// is in a XMM. we don't need to free the XMM since we're not writing, and it's still valid
RALOG("Copying %d from XMM %d to GPR %d on read\n", reg, hostXMMreg, regnum);
xMOVD(new_reg, xRegisterSSE(hostXMMreg)); // actually MOVQ
// if the XMM was dirty, just get rid of it, we don't want to try to sync the values up...
if (xmmregs[hostXMMreg].mode & MODE_WRITE)
{
RALOG("Freeing dirty XMM %d for GPR %d\n", hostXMMreg, reg);
_freeXMMreg(hostXMMreg);
} }
} }
else if (GPR_IS_CONST1(reg))
{
xMOV64(new_reg, g_cpuConstRegs[reg].SD[0]);
g_cpuFlushedConstReg |= (1u << reg);
x86regs[regnum].mode |= MODE_WRITE; // reg is dirty
RALOG("Writing constant value %lld from guest reg %d to host reg %d\n", g_cpuConstRegs[reg].SD[0], reg, regnum);
}
else
{
// not loaded
RALOG("Loading guest reg %d to GPR %d\n", reg, regnum);
xMOV(new_reg, ptr64[&cpuRegs.GPR.r[reg].UD[0]]);
}
}
}
break;
case X86TYPE_FPRC:
RALOG("Loading guest reg FPCR %d to GPR %d\n", reg, regnum);
xMOV(xRegister32(regnum), ptr32[&fpuRegs.fprc[reg]]);
break;
case X86TYPE_PSX:
{
const xRegister32 new_reg32(regnum);
if (reg == 0)
{
xXOR(new_reg32, new_reg32);
}
else
{
if (PSX_IS_CONST1(reg))
{
xMOV(new_reg32, g_psxConstRegs[reg]);
g_psxFlushedConstReg |= (1u << reg);
x86regs[regnum].mode |= MODE_WRITE; // reg is dirty
RALOG("Writing constant value %d from guest PSX reg %d to host reg %d\n", g_psxConstRegs[reg], reg, regnum);
}
else
{
RALOG("Loading guest PSX reg %d to GPR %d\n", reg, regnum);
xMOV(new_reg32, ptr32[&psxRegs.GPR.r[reg]]);
}
}
}
break;
default:
abort();
break;
}
} }
// Need to port all the code if (type == X86TYPE_GPR && (mode & MODE_WRITE))
// return x86reg; {
return x86reg.GetId(); if (reg < 32 && GPR_IS_CONST1(reg))
{
RALOG("Clearing constant value for guest reg %d on write allocation\n", reg);
GPR_DEL_CONST(reg);
}
if (hostXMMreg >= 0)
{
// writing, so kill the xmm allocation. gotta ensure the upper bits gets stored first.
RALOG("Invalidating %d from XMM %d because of GPR %d write\n", reg, hostXMMreg, regnum);
_freeXMMreg(hostXMMreg);
}
}
else if (type == X86TYPE_PSX && (mode & MODE_WRITE))
{
if (reg < 32 && PSX_IS_CONST1(reg))
{
RALOG("Clearing constant value for guest PSX reg %d on write allocation\n", reg);
PSX_DEL_CONST(reg);
}
}
// Console.WriteLn("Allocating reg %d", regnum);
return regnum;
}
void _writebackX86Reg(int x86reg)
{
switch (x86regs[x86reg].type)
{
case X86TYPE_GPR:
RALOG("Writing back GPR reg %d for guest reg %d P2\n", x86reg, x86regs[x86reg].reg);
xMOV(ptr64[&cpuRegs.GPR.r[x86regs[x86reg].reg].UD[0]], xRegister64(x86reg));
break;
case X86TYPE_FPRC:
RALOG("Writing back GPR reg %d for guest reg FPCR %d P2\n", x86reg, x86regs[x86reg].reg);
xMOV(ptr32[&fpuRegs.fprc[x86regs[x86reg].reg]], xRegister32(x86reg));
break;
case X86TYPE_VIREG:
RALOG("Writing back VI reg %d for guest reg %d P2\n", x86reg, x86regs[x86reg].reg);
xMOV(ptr16[&VU0.VI[x86regs[x86reg].reg].UL], xRegister16(x86reg));
break;
case X86TYPE_PCWRITEBACK:
RALOG("Writing back PC writeback in host reg %d\n", x86reg);
xMOV(ptr32[&cpuRegs.pcWriteback], xRegister32(x86reg));
break;
case X86TYPE_PSX:
RALOG("Writing back PSX GPR reg %d for guest reg %d P2\n", x86reg, x86regs[x86reg].reg);
xMOV(ptr32[&psxRegs.GPR.r[x86regs[x86reg].reg]], xRegister32(x86reg));
break;
case X86TYPE_PSX_PCWRITEBACK:
RALOG("Writing back PSX PC writeback in host reg %d\n", x86reg);
xMOV(ptr32[&psxRegs.pcWriteback], xRegister32(x86reg));
break;
default:
abort();
break;
}
} }
int _checkX86reg(int type, int reg, int mode) int _checkX86reg(int type, int reg, int mode)
{ {
uint i; for (uint i = 0; i < iREGCNT_GPR; i++)
for (i = 0; i < iREGCNT_GPR; i++)
{ {
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type) if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type)
{ {
// shouldn't have dirty constants...
pxAssert((type != X86TYPE_GPR || !GPR_IS_DIRTY_CONST(reg)) &&
(type != X86TYPE_PSX || !PSX_IS_DIRTY_CONST(reg)));
if (!(x86regs[i].mode & MODE_READ) && (mode & MODE_READ)) if ((type == X86TYPE_GPR || type == X86TYPE_PSX) && !(x86regs[i].mode & MODE_READ) && (mode & MODE_READ))
pxFailRel("Somehow ended up with an allocated x86 without mode");
// ensure constants get deleted once we alloc as write
if (mode & MODE_WRITE)
{ {
if (X86_ISVI(type)) if (type == X86TYPE_GPR)
xMOVZX(xRegister32(i), ptr16[(u16*)(_x86GetAddr(type, reg))]); {
else // go through the alloc path instead, because we might need to invalidate an xmm.
xMOV(xRegister32(i), ptr[(void*)(_x86GetAddr(type, reg))]); return _allocX86reg(X86TYPE_GPR, reg, mode);
}
else if (type == X86TYPE_PSX)
{
pxAssert(!PSX_IS_DIRTY_CONST(reg));
PSX_DEL_CONST(reg);
}
} }
x86regs[i].mode |= mode; x86regs[i].mode |= mode;
@ -438,9 +489,7 @@ int _checkX86reg(int type, int reg, int mode)
void _addNeededX86reg(int type, int reg) void _addNeededX86reg(int type, int reg)
{ {
uint i; for (uint i = 0; i < iREGCNT_GPR; i++)
for (i = 0; i < iREGCNT_GPR; i++)
{ {
if (!x86regs[i].inuse || x86regs[i].reg != reg || x86regs[i].type != type) if (!x86regs[i].inuse || x86regs[i].reg != reg || x86regs[i].type != type)
continue; continue;
@ -452,9 +501,7 @@ void _addNeededX86reg(int type, int reg)
void _clearNeededX86regs() void _clearNeededX86regs()
{ {
uint i; for (uint i = 0; i < iREGCNT_GPR; i++)
for (i = 0; i < iREGCNT_GPR; i++)
{ {
if (x86regs[i].needed) if (x86regs[i].needed)
{ {
@ -465,44 +512,6 @@ void _clearNeededX86regs()
} }
} }
void _deleteX86reg(int type, int reg, int flush)
{
uint i;
for (i = 0; i < iREGCNT_GPR; i++)
{
if (x86regs[i].inuse && x86regs[i].reg == reg && x86regs[i].type == type)
{
switch (flush)
{
case 0:
_freeX86reg(i);
break;
case 1:
if (x86regs[i].mode & MODE_WRITE)
{
if (X86_ISVI(type) && x86regs[i].reg < 16)
xMOV(ptr[(void*)(_x86GetAddr(type, x86regs[i].reg))], xRegister16(i));
else
xMOV(ptr[(void*)(_x86GetAddr(type, x86regs[i].reg))], xRegister32(i));
// get rid of MODE_WRITE since don't want to flush again
x86regs[i].mode &= ~MODE_WRITE;
x86regs[i].mode |= MODE_READ;
}
return;
case 2:
x86regs[i].inuse = 0;
break;
}
}
}
}
// Temporary solution to support eax/ebx... type
void _freeX86reg(const x86Emitter::xRegister32& x86reg) void _freeX86reg(const x86Emitter::xRegister32& x86reg)
{ {
_freeX86reg(x86reg.GetId()); _freeX86reg(x86reg.GetId());
@ -514,17 +523,33 @@ void _freeX86reg(int x86reg)
if (x86regs[x86reg].inuse && (x86regs[x86reg].mode & MODE_WRITE)) if (x86regs[x86reg].inuse && (x86regs[x86reg].mode & MODE_WRITE))
{ {
_writebackX86Reg(x86reg);
x86regs[x86reg].mode &= ~MODE_WRITE; x86regs[x86reg].mode &= ~MODE_WRITE;
}
if (X86_ISVI(x86regs[x86reg].type) && x86regs[x86reg].reg < 16) _freeX86regWithoutWriteback(x86reg);
{ }
xMOV(ptr[(void*)(_x86GetAddr(x86regs[x86reg].type, x86regs[x86reg].reg))], xRegister16(x86reg));
} void _freeX86regWithoutWriteback(int x86reg)
else {
xMOV(ptr[(void*)(_x86GetAddr(x86regs[x86reg].type, x86regs[x86reg].reg))], xRegister32(x86reg)); pxAssert(x86reg >= 0 && x86reg < (int)iREGCNT_GPR);
}
x86regs[x86reg].inuse = 0; x86regs[x86reg].inuse = 0;
if (x86regs[x86reg].type == X86TYPE_VIREG)
{
RALOG("Freeing VI reg %d in host GPR %d\n", x86regs[x86reg].reg, x86reg);
//mVUFreeCOP2GPR(x86reg);
abort();
}
else if (x86regs[x86reg].inuse && x86regs[x86reg].type == X86TYPE_GPR)
{
RALOG("Freeing X86 register %d (was guest %d)...\n", x86reg, x86regs[x86reg].reg);
}
else if (x86regs[x86reg].inuse)
{
RALOG("Freeing X86 register %d...\n", x86reg);
}
} }
void _freeX86regs() void _freeX86regs()
@ -533,12 +558,18 @@ void _freeX86regs()
_freeX86reg(i); _freeX86reg(i);
} }
// Misc void _flushX86regs()
void _signExtendSFtoM(uptr mem)
{ {
xLAHF(); for (u32 i = 0; i < iREGCNT_GPR; ++i)
xSAR(ax, 15); {
xCWDE(); if (x86regs[i].inuse && x86regs[i].mode & MODE_WRITE)
xMOV(ptr[(void*)(mem)], eax); {
// shouldn't be const, because if we got to write mode, we should've flushed then
pxAssert(x86regs[i].type != X86TYPE_GPR || !GPR_IS_DIRTY_CONST(x86regs[i].reg));
RALOG("Flushing x86 reg %u in _eeFlushAllDirty()\n", i);
_writebackX86Reg(i);
x86regs[i].mode = (x86regs[i].mode & ~MODE_WRITE) | MODE_READ;
}
}
} }

File diff suppressed because it is too large Load Diff

View File

@ -22,10 +22,8 @@
using namespace x86Emitter; using namespace x86Emitter;
namespace R5900 { namespace R5900::Dynarec::OpcodeImpl
namespace Dynarec { {
namespace OpcodeImpl {
/********************************************************* /*********************************************************
* Register arithmetic * * Register arithmetic *
* Format: OP rd, rs, rt * * Format: OP rd, rs, rt *
@ -54,50 +52,109 @@ REC_FUNC_DEL(SLTU, _Rd_);
#else #else
static void recMoveStoD(int info)
{
if (info & PROCESS_EE_S)
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
else
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
}
static void recMoveStoD64(int info)
{
if (info & PROCESS_EE_S)
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
else
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
}
static void recMoveTtoD(int info)
{
if (info & PROCESS_EE_T)
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_T));
else
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
}
static void recMoveTtoD64(int info)
{
if (info & PROCESS_EE_T)
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_T));
else
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
}
//// ADD //// ADD
void recADD_const() static void recADD_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] + g_cpuConstRegs[_Rt_].UL[0])); g_cpuConstRegs[_Rd_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] + g_cpuConstRegs[_Rt_].UL[0]));
} }
void recADD_constv(int info, int creg, u32 vreg) // s is constant
static void recADD_consts(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
s32 cval = g_cpuConstRegs[creg].SL[0]; const s32 cval = g_cpuConstRegs[_Rs_].SL[0];
recMoveTtoD(info);
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].SL[0]]); if (cval != 0)
if (cval) xADD(xRegister32(EEREC_D), cval);
xADD(eax, cval); xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
eeSignExtendTo(_Rd_, _Rd_ == vreg && !cval);
}
// s is constant
void recADD_consts(int info)
{
recADD_constv(info, _Rs_, _Rt_);
} }
// t is constant // t is constant
void recADD_constt(int info) static void recADD_constt(int info)
{
recADD_constv(info, _Rt_, _Rs_);
}
// nothing is constant
void recADD_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]); const s32 cval = g_cpuConstRegs[_Rt_].SL[0];
if (_Rs_ == _Rt_) recMoveStoD(info);
xADD(eax, eax); if (cval != 0)
else xADD(xRegister32(EEREC_D), cval);
xADD(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]); xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
eeSignExtendTo(_Rd_);
} }
EERECOMPILE_CODE0(ADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); // nothing is constant
static void recADD_(int info)
{
pxAssert(!(info & PROCESS_EE_XMM));
if ((info & PROCESS_EE_S) && (info & PROCESS_EE_T))
{
if (EEREC_D == EEREC_S)
{
xADD(xRegister32(EEREC_D), xRegister32(EEREC_T));
}
else if (EEREC_D == EEREC_T)
{
xADD(xRegister32(EEREC_D), xRegister32(EEREC_S));
}
else
{
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
xADD(xRegister32(EEREC_D), xRegister32(EEREC_T));
}
}
else if (info & PROCESS_EE_S)
{
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
xADD(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UD[0]]);
}
else if (info & PROCESS_EE_T)
{
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_T));
xADD(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rs_].UD[0]]);
}
else
{
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xADD(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UD[0]]);
}
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
}
EERECOMPILE_CODERC0(ADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
//// ADDU //// ADDU
void recADDU(void) void recADDU(void)
@ -111,77 +168,67 @@ void recDADD_const(void)
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] + g_cpuConstRegs[_Rt_].UD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] + g_cpuConstRegs[_Rt_].UD[0];
} }
void recDADD_constv(int info, int creg, u32 vreg) // s is constant
static void recDADD_consts(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
GPR_reg64 cval = g_cpuConstRegs[creg]; const s64 cval = g_cpuConstRegs[_Rs_].SD[0];
recMoveTtoD64(info);
if (_Rd_ == vreg) if (cval != 0)
{ xImm64Op(xADD, xRegister64(EEREC_D), rax, cval);
if (!cval.SD[0])
return; // no-op
xImm64Op(xADD, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, cval.SD[0]);
}
else
{
if (cval.SD[0])
{
xMOV64(rax, cval.SD[0]);
xADD(rax, ptr64[&cpuRegs.GPR.r[vreg].SD[0]]);
}
else
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[vreg].SD[0]]);
}
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
}
} }
void recDADD_consts(int info) // t is constant
{ static void recDADD_constt(int info)
recDADD_constv(info, _Rs_, _Rt_);
}
void recDADD_constt(int info)
{
recDADD_constv(info, _Rt_, _Rs_);
}
void recDADD_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
u32 rs = _Rs_, rt = _Rt_; const s64 cval = g_cpuConstRegs[_Rt_].SD[0];
if (_Rd_ == _Rt_) recMoveStoD64(info);
rs = _Rt_, rt = _Rs_; if (cval != 0)
xImm64Op(xADD, xRegister64(EEREC_D), rax, cval);
}
if (_Rd_ == _Rs_ && _Rs_ == _Rt_) // nothing is constant
static void recDADD_(int info)
{
pxAssert(!(info & PROCESS_EE_XMM));
if ((info & PROCESS_EE_S) && (info & PROCESS_EE_T))
{ {
xSHL(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 1); if (EEREC_D == EEREC_S)
return; {
xADD(xRegister64(EEREC_D), xRegister64(EEREC_T));
} }
else if (EEREC_D == EEREC_T)
xMOV(rax, ptr64[&cpuRegs.GPR.r[rt].SD[0]]);
if (_Rd_ == rs)
{ {
xADD(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax); xADD(xRegister64(EEREC_D), xRegister64(EEREC_S));
return;
}
else if (rs == rt)
{
xADD(rax, rax);
} }
else else
{ {
xADD(rax, ptr32[&cpuRegs.GPR.r[rs].SD[0]]); xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
xADD(xRegister64(EEREC_D), xRegister64(EEREC_T));
}
}
else if (info & PROCESS_EE_S)
{
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
xADD(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
}
else if (info & PROCESS_EE_T)
{
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_T));
xADD(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
}
else
{
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xADD(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
} }
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
} }
EERECOMPILE_CODE0(DADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); EERECOMPILE_CODERC0(DADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT | XMMINFO_64BITOP);
//// DADDU //// DADDU
void recDADDU(void) void recDADDU(void)
@ -191,50 +238,92 @@ void recDADDU(void)
//// SUB //// SUB
void recSUB_const() static void recSUB_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] - g_cpuConstRegs[_Rt_].UL[0])); g_cpuConstRegs[_Rd_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] - g_cpuConstRegs[_Rt_].UL[0]));
} }
void recSUB_consts(int info) static void recSUB_consts(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
s32 sval = g_cpuConstRegs[_Rs_].SL[0]; const s32 sval = g_cpuConstRegs[_Rs_].SL[0];
xMOV(eax, sval); xMOV(eax, sval);
if (info & PROCESS_EE_T)
xSUB(eax, xRegister32(EEREC_T));
else
xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]); xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
eeSignExtendTo(_Rd_);
xMOVSX(xRegister64(EEREC_D), eax);
} }
void recSUB_constt(int info) static void recSUB_constt(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
s32 tval = g_cpuConstRegs[_Rt_].SL[0]; const s32 tval = g_cpuConstRegs[_Rt_].SL[0];
recMoveStoD(info);
if (tval != 0)
xSUB(xRegister32(EEREC_D), tval);
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]); xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
if (tval)
xSUB(eax, tval);
eeSignExtendTo(_Rd_, _Rd_ == _Rs_ && !tval);
} }
void recSUB_(int info) static void recSUB_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
if (_Rs_ == _Rt_) if (_Rs_ == _Rt_)
{ {
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0); xXOR(xRegister32(EEREC_D), xRegister32(EEREC_D));
return; return;
} }
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]); // a bit messier here because it's not commutative..
xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]); if ((info & PROCESS_EE_S) && (info & PROCESS_EE_T))
eeSignExtendTo(_Rd_); {
if (EEREC_D == EEREC_S)
{
xSUB(xRegister32(EEREC_D), xRegister32(EEREC_T));
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
}
else if (EEREC_D == EEREC_T)
{
// D might equal T
xMOV(eax, xRegister32(EEREC_S));
xSUB(eax, xRegister32(EEREC_T));
xMOVSX(xRegister64(EEREC_D), eax);
}
else
{
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
xSUB(xRegister32(EEREC_D), xRegister32(EEREC_T));
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
}
}
else if (info & PROCESS_EE_S)
{
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_S));
xSUB(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
}
else if (info & PROCESS_EE_T)
{
// D might equal T
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xSUB(eax, xRegister32(EEREC_T));
xMOVSX(xRegister64(EEREC_D), eax);
}
else
{
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xSUB(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
}
} }
EERECOMPILE_CODE0(SUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(SUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
//// SUBU //// SUBU
void recSUBU(void) void recSUBU(void)
@ -243,74 +332,79 @@ void recSUBU(void)
} }
//// DSUB //// DSUB
void recDSUB_const() static void recDSUB_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] - g_cpuConstRegs[_Rt_].UD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] - g_cpuConstRegs[_Rt_].UD[0];
} }
void recDSUB_consts(int info) static void recDSUB_consts(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
GPR_reg64 sval = g_cpuConstRegs[_Rs_]; // gross, because if d == t, we can't destroy t
const s64 sval = g_cpuConstRegs[_Rs_].SD[0];
const xRegister64 regd((info & PROCESS_EE_T && EEREC_D == EEREC_T) ? rax.GetId() : EEREC_D);
xMOV64(regd, sval);
if (!sval.SD[0] && _Rd_ == _Rt_) if (info & PROCESS_EE_T)
{ xSUB(regd, xRegister64(EEREC_T));
xNEG(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]]);
return;
}
else else
{ xSUB(regd, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]);
xMOV64(rax, sval.SD[0]);
}
xSUB(rax, ptr32[&cpuRegs.GPR.r[_Rt_].SD[0]]); // emitter will eliminate redundant moves.
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax); xMOV(xRegister64(EEREC_D), regd);
} }
void recDSUB_constt(int info) static void recDSUB_constt(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
GPR_reg64 tval = g_cpuConstRegs[_Rt_]; const s64 tval = g_cpuConstRegs[_Rt_].SD[0];
recMoveStoD64(info);
if (_Rd_ == _Rs_) if (tval != 0)
{ xImm64Op(xSUB, xRegister64(EEREC_D), rax, tval);
xImm64Op(xSUB, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, tval.SD[0]);
}
else
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rs_].SD[0]]);
if (tval.SD[0])
{
xImm64Op(xSUB, rax, rdx, tval.SD[0]);
}
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
}
} }
void recDSUB_(int info) static void recDSUB_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
if (_Rs_ == _Rt_) if (_Rs_ == _Rt_)
{ {
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0); xXOR(xRegister32(EEREC_D), xRegister32(EEREC_D));
return;
} }
else if (_Rd_ == _Rs_)
// a bit messier here because it's not commutative..
if ((info & PROCESS_EE_S) && (info & PROCESS_EE_T))
{ {
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]); // D might equal T
xSUB(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax); const xRegister64 regd(EEREC_D == EEREC_T ? rax.GetId() : EEREC_D);
xMOV(regd, xRegister64(EEREC_S));
xSUB(regd, xRegister64(EEREC_T));
xMOV(xRegister64(EEREC_D), regd);
}
else if (info & PROCESS_EE_S)
{
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
xSUB(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
}
else if (info & PROCESS_EE_T)
{
// D might equal T
const xRegister64 regd(EEREC_D == EEREC_T ? rax.GetId() : EEREC_D);
xMOV(regd, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xSUB(regd, xRegister64(EEREC_T));
xMOV(xRegister64(EEREC_D), regd);
} }
else else
{ {
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rs_].SD[0]]); xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xSUB(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]); xSUB(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
} }
} }
EERECOMPILE_CODE0(DSUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(DSUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
//// DSUBU //// DSUBU
void recDSUBU(void) void recDSUBU(void)
@ -320,24 +414,24 @@ void recDSUBU(void)
namespace namespace
{ {
enum class LogicalOp enum class LogicalOp
{ {
AND, AND,
OR, OR,
XOR, XOR,
NOR NOR
}; };
} // namespace } // namespace
static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg) static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg, int regv)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xImpl_G1Logic bad{}; xImpl_G1Logic bad{};
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND : op == LogicalOp::OR ? xOR :
: op == LogicalOp::OR ? xOR op == LogicalOp::XOR ? xXOR :
: op == LogicalOp::XOR ? xXOR op == LogicalOp::NOR ? xOR :
: op == LogicalOp::NOR ? xOR : bad; bad;
s64 fixedInput, fixedOutput, identityInput; s64 fixedInput, fixedOutput, identityInput;
bool hasFixed = true; bool hasFixed = true;
switch (op) switch (op)
@ -369,29 +463,18 @@ static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg)
if (hasFixed && cval.SD[0] == fixedInput) if (hasFixed && cval.SD[0] == fixedInput)
{ {
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], fixedOutput); xMOV64(xRegister64(EEREC_D), fixedOutput);
}
else if (_Rd_ == vreg)
{
if (cval.SD[0] != identityInput)
xImm64Op(xOP, ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax, cval.UD[0]);
if (op == LogicalOp::NOR)
xNOT(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]]);
} }
else else
{ {
if (cval.SD[0] != identityInput) if (regv >= 0)
{ xMOV(xRegister64(EEREC_D), xRegister64(regv));
xMOV64(rax, cval.SD[0]);
xOP(rax, ptr32[&cpuRegs.GPR.r[vreg].UD[0]]);
}
else else
{ xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[vreg].UD[0]]);
xMOV(rax, ptr32[&cpuRegs.GPR.r[vreg].UD[0]]); if (cval.SD[0] != identityInput)
} xImm64Op(xOP, xRegister64(EEREC_D), rax, cval.UD[0]);
if (op == LogicalOp::NOR) if (op == LogicalOp::NOR)
xNOT(rax); xNOT(xRegister64(EEREC_D));
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
} }
@ -400,208 +483,234 @@ static void recLogicalOp(LogicalOp op, int info)
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xImpl_G1Logic bad{}; xImpl_G1Logic bad{};
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND : op == LogicalOp::OR ? xOR :
: op == LogicalOp::OR ? xOR op == LogicalOp::XOR ? xXOR :
: op == LogicalOp::XOR ? xXOR op == LogicalOp::NOR ? xOR :
: op == LogicalOp::NOR ? xOR : bad; bad;
pxAssert(&xOP != &bad); pxAssert(&xOP != &bad);
// swap because it's commutative and Rd might be Rt
u32 rs = _Rs_, rt = _Rt_; u32 rs = _Rs_, rt = _Rt_;
int regs = (info & PROCESS_EE_S) ? EEREC_S : -1, regt = (info & PROCESS_EE_T) ? EEREC_T : -1;
if (_Rd_ == _Rt_) if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_; {
std::swap(rs, rt);
std::swap(regs, regt);
}
if (op == LogicalOp::XOR && rs == rt) if (op == LogicalOp::XOR && rs == rt)
{ {
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], 0); xXOR(xRegister32(EEREC_D), xRegister32(EEREC_D));
}
else if (_Rd_ == rs)
{
if (rs != rt)
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[rt].UD[0]]);
xOP(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
if (op == LogicalOp::NOR)
xNOT(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]]);
} }
else else
{ {
xMOV(rax, ptr64[&cpuRegs.GPR.r[rs].UD[0]]); if (regs >= 0)
if (rs != rt) xMOV(xRegister64(EEREC_D), xRegister64(regs));
xOP(rax, ptr64[&cpuRegs.GPR.r[rt].UD[0]]); else
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[rs].UD[0]]);
if (regt >= 0)
xOP(xRegister64(EEREC_D), xRegister64(regt));
else
xOP(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[rt].UD[0]]);
if (op == LogicalOp::NOR) if (op == LogicalOp::NOR)
xNOT(rax); xNOT(xRegister64(EEREC_D));
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
} }
//// AND //// AND
void recAND_const() static void recAND_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] & g_cpuConstRegs[_Rt_].UD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] & g_cpuConstRegs[_Rt_].UD[0];
} }
void recAND_consts(int info) static void recAND_consts(int info)
{ {
recLogicalOp_constv(LogicalOp::AND, info, _Rs_, _Rt_); recLogicalOp_constv(LogicalOp::AND, info, _Rs_, _Rt_, (info & PROCESS_EE_T) ? EEREC_T : -1);
} }
void recAND_constt(int info) static void recAND_constt(int info)
{ {
recLogicalOp_constv(LogicalOp::AND, info, _Rt_, _Rs_); recLogicalOp_constv(LogicalOp::AND, info, _Rt_, _Rs_, (info & PROCESS_EE_S) ? EEREC_S : -1);
} }
void recAND_(int info) static void recAND_(int info)
{ {
recLogicalOp(LogicalOp::AND, info); recLogicalOp(LogicalOp::AND, info);
} }
EERECOMPILE_CODE0(AND, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(AND, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
//// OR //// OR
void recOR_const() static void recOR_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] | g_cpuConstRegs[_Rt_].UD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] | g_cpuConstRegs[_Rt_].UD[0];
} }
void recOR_consts(int info) static void recOR_consts(int info)
{ {
recLogicalOp_constv(LogicalOp::OR, info, _Rs_, _Rt_); recLogicalOp_constv(LogicalOp::OR, info, _Rs_, _Rt_, (info & PROCESS_EE_T) ? EEREC_T : -1);
} }
void recOR_constt(int info) static void recOR_constt(int info)
{ {
recLogicalOp_constv(LogicalOp::OR, info, _Rt_, _Rs_); recLogicalOp_constv(LogicalOp::OR, info, _Rt_, _Rs_, (info & PROCESS_EE_S) ? EEREC_S : -1);
} }
void recOR_(int info) static void recOR_(int info)
{ {
recLogicalOp(LogicalOp::OR, info); recLogicalOp(LogicalOp::OR, info);
} }
EERECOMPILE_CODE0(OR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(OR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
//// XOR //// XOR
void recXOR_const() static void recXOR_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] ^ g_cpuConstRegs[_Rt_].UD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] ^ g_cpuConstRegs[_Rt_].UD[0];
} }
void recXOR_consts(int info) static void recXOR_consts(int info)
{ {
recLogicalOp_constv(LogicalOp::XOR, info, _Rs_, _Rt_); recLogicalOp_constv(LogicalOp::XOR, info, _Rs_, _Rt_, (info & PROCESS_EE_T) ? EEREC_T : -1);
} }
void recXOR_constt(int info) static void recXOR_constt(int info)
{ {
recLogicalOp_constv(LogicalOp::XOR, info, _Rt_, _Rs_); recLogicalOp_constv(LogicalOp::XOR, info, _Rt_, _Rs_, (info & PROCESS_EE_S) ? EEREC_S : -1);
} }
void recXOR_(int info) static void recXOR_(int info)
{ {
recLogicalOp(LogicalOp::XOR, info); recLogicalOp(LogicalOp::XOR, info);
} }
EERECOMPILE_CODE0(XOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(XOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
//// NOR //// NOR
void recNOR_const() static void recNOR_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = ~(g_cpuConstRegs[_Rs_].UD[0] | g_cpuConstRegs[_Rt_].UD[0]); g_cpuConstRegs[_Rd_].UD[0] = ~(g_cpuConstRegs[_Rs_].UD[0] | g_cpuConstRegs[_Rt_].UD[0]);
} }
void recNOR_consts(int info) static void recNOR_consts(int info)
{ {
recLogicalOp_constv(LogicalOp::NOR, info, _Rs_, _Rt_); recLogicalOp_constv(LogicalOp::NOR, info, _Rs_, _Rt_, (info & PROCESS_EE_T) ? EEREC_T : -1);
} }
void recNOR_constt(int info) static void recNOR_constt(int info)
{ {
recLogicalOp_constv(LogicalOp::NOR, info, _Rt_, _Rs_); recLogicalOp_constv(LogicalOp::NOR, info, _Rt_, _Rs_, (info & PROCESS_EE_S) ? EEREC_S : -1);
} }
void recNOR_(int info) static void recNOR_(int info)
{ {
recLogicalOp(LogicalOp::NOR, info); recLogicalOp(LogicalOp::NOR, info);
} }
EERECOMPILE_CODE0(NOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(NOR, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
//// SLT - test with silent hill, lemans //// SLT - test with silent hill, lemans
void recSLT_const() static void recSLT_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].SD[0] < g_cpuConstRegs[_Rt_].SD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].SD[0] < g_cpuConstRegs[_Rt_].SD[0];
} }
void recSLTs_const(int info, int sign, int st) static void recSLTs_const(int info, int sign, int st)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
GPR_reg64 cval = g_cpuConstRegs[st ? _Rt_ : _Rs_]; const s64 cval = g_cpuConstRegs[st ? _Rt_ : _Rs_].SD[0];
const xImpl_Set& SET = st ? (sign ? xSETL : xSETB) : (sign ? xSETG : xSETA); const xImpl_Set& SET = st ? (sign ? xSETL : xSETB) : (sign ? xSETG : xSETA);
xXOR(eax, eax); // If Rd == Rs or Rt, we can't xor it before it's used.
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UD[0]], rdx, cval.UD[0]); // So, allocate a temporary register first, and then reallocate it to Rd.
SET(al); const xRegister32 dreg((_Rd_ == (st ? _Rs_ : _Rt_)) ? _allocX86reg(X86TYPE_TEMP, 0, 0) : EEREC_D);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); const int regs = st ? ((info & PROCESS_EE_S) ? EEREC_S : -1) : ((info & PROCESS_EE_T) ? EEREC_T : -1);
xXOR(dreg, dreg);
if (regs >= 0)
xImm64Op(xCMP, xRegister64(regs), rcx, cval);
else
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UD[0]], rcx, cval);
SET(xRegister8(dreg));
if (dreg.GetId() != EEREC_D)
{
std::swap(x86regs[dreg.GetId()], x86regs[EEREC_D]);
_freeX86reg(EEREC_D);
}
} }
void recSLTs_(int info, int sign) static void recSLTs_(int info, int sign)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
const xImpl_Set& SET = sign ? xSETL : xSETB; const xImpl_Set& SET = sign ? xSETL : xSETB;
xXOR(eax, eax); // need to keep Rs/Rt around.
xMOV(rdx, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]); const xRegister32 dreg((_Rd_ == _Rt_ || _Rd_ == _Rs_) ? _allocX86reg(X86TYPE_TEMP, 0, 0) : EEREC_D);
xCMP(rdx, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
SET(al); // force Rs into a register, may as well cache it since we're loading anyway.
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); const int regs = (info & PROCESS_EE_S) ? EEREC_S : _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
xXOR(dreg, dreg);
if (info & PROCESS_EE_T)
xCMP(xRegister64(regs), xRegister64(EEREC_T));
else
xCMP(xRegister64(regs), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
SET(xRegister8(dreg));
if (dreg.GetId() != EEREC_D)
{
std::swap(x86regs[dreg.GetId()], x86regs[EEREC_D]);
_freeX86reg(EEREC_D);
}
} }
void recSLT_consts(int info) static void recSLT_consts(int info)
{ {
recSLTs_const(info, 1, 0); recSLTs_const(info, 1, 0);
} }
void recSLT_constt(int info) static void recSLT_constt(int info)
{ {
recSLTs_const(info, 1, 1); recSLTs_const(info, 1, 1);
} }
void recSLT_(int info) static void recSLT_(int info)
{ {
recSLTs_(info, 1); recSLTs_(info, 1);
} }
EERECOMPILE_CODE0(SLT, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(SLT, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_NORENAME);
// SLTU - test with silent hill, lemans // SLTU - test with silent hill, lemans
void recSLTU_const() static void recSLTU_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] < g_cpuConstRegs[_Rt_].UD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] < g_cpuConstRegs[_Rt_].UD[0];
} }
void recSLTU_consts(int info) static void recSLTU_consts(int info)
{ {
recSLTs_const(info, 0, 0); recSLTs_const(info, 0, 0);
} }
void recSLTU_constt(int info) static void recSLTU_constt(int info)
{ {
recSLTs_const(info, 0, 1); recSLTs_const(info, 0, 1);
} }
void recSLTU_(int info) static void recSLTU_(int info)
{ {
recSLTs_(info, 0); recSLTs_(info, 0);
} }
EERECOMPILE_CODE0(SLTU, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(SLTU, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_NORENAME);
#endif #endif
} // namespace OpcodeImpl } // namespace R5900::Dynarec::OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -22,10 +22,8 @@
using namespace x86Emitter; using namespace x86Emitter;
namespace R5900 { namespace R5900::Dynarec::OpcodeImpl
namespace Dynarec { {
namespace OpcodeImpl {
/********************************************************* /*********************************************************
* Arithmetic with immediate operand * * Arithmetic with immediate operand *
* Format: OP rt, rs, immediate * * Format: OP rt, rs, immediate *
@ -48,34 +46,37 @@ REC_FUNC_DEL(SLTIU, _Rt_);
#else #else
static void recMoveStoT(int info)
{
if (info & PROCESS_EE_S)
xMOV(xRegister32(EEREC_T), xRegister32(EEREC_S));
else
xMOV(xRegister32(EEREC_T), ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
}
static void recMoveStoT64(int info)
{
if (info & PROCESS_EE_S)
xMOV(xRegister64(EEREC_T), xRegister64(EEREC_S));
else
xMOV(xRegister64(EEREC_T), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
}
//// ADDI //// ADDI
void recADDI_const(void) static void recADDI_const(void)
{ {
g_cpuConstRegs[_Rt_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] + u32(s32(_Imm_)))); g_cpuConstRegs[_Rt_].SD[0] = s64(s32(g_cpuConstRegs[_Rs_].UL[0] + u32(s32(_Imm_))));
} }
void recADDI_(int info) static void recADDI_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
recMoveStoT(info);
if (_Rt_ == _Rs_) xADD(xRegister32(EEREC_T), _Imm_);
{ xMOVSX(xRegister64(EEREC_T), xRegister32(EEREC_T));
// must perform the ADD unconditionally, to maintain flags status:
xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_);
_signExtendSFtoM((uptr)&cpuRegs.GPR.r[_Rt_].UL[1]);
}
else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
if (_Imm_ != 0)
xADD(eax, _Imm_);
eeSignExtendTo(_Rt_);
}
} }
EERECOMPILE_CODEX(eeRecompileCode1, ADDI); EERECOMPILE_CODEX(eeRecompileCodeRC1, ADDI, XMMINFO_WRITET | XMMINFO_READS);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recADDIU() void recADDIU()
@ -84,33 +85,19 @@ void recADDIU()
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recDADDI_const() static void recDADDI_const()
{ {
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] + u64(s64(_Imm_)); g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] + u64(s64(_Imm_));
} }
void recDADDI_(int info) static void recDADDI_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
recMoveStoT64(info);
if (_Rt_ == _Rs_) xADD(xRegister64(EEREC_T), _Imm_);
{
xADD(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _Imm_);
}
else
{
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
if (_Imm_ != 0)
{
xADD(rax, _Imm_);
}
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
}
} }
EERECOMPILE_CODEX(eeRecompileCode1, DADDI); EERECOMPILE_CODEX(eeRecompileCodeRC1, DADDI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP);
//// DADDIU //// DADDIU
void recDADDIU() void recDADDIU()
@ -119,133 +106,137 @@ void recDADDIU()
} }
//// SLTIU //// SLTIU
void recSLTIU_const() static void recSLTIU_const()
{ {
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] < (u64)(_Imm_); g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] < (u64)(_Imm_);
} }
extern void recSLTmemconstt(int regd, int regs, u32 mem, int sign); static void recSLTIU_(int info)
extern u32 s_sltone;
void recSLTIU_(int info)
{ {
xXOR(eax, eax); pxAssert(!(info & PROCESS_EE_XMM));
// TODO(Stenzek): this can be made to suck less by turning Rs into a temp and reallocating Rt.
const xRegister32 dreg((_Rt_ == _Rs_) ? _allocX86reg(X86TYPE_TEMP, 0, 0) : EEREC_T);
xXOR(dreg, dreg);
if (info & PROCESS_EE_S)
xCMP(xRegister64(EEREC_S), _Imm_);
else
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_); xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_);
xSETB(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); xSETB(xRegister8(dreg));
if (dreg.GetId() != EEREC_T)
{
std::swap(x86regs[dreg.GetId()], x86regs[EEREC_T]);
_freeX86reg(EEREC_T);
}
} }
EERECOMPILE_CODEX(eeRecompileCode1, SLTIU); EERECOMPILE_CODEX(eeRecompileCodeRC1, SLTIU, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP | XMMINFO_NORENAME);
//// SLTI //// SLTI
void recSLTI_const() static void recSLTI_const()
{ {
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].SD[0] < (s64)(_Imm_); g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].SD[0] < (s64)(_Imm_);
} }
void recSLTI_(int info) static void recSLTI_(int info)
{ {
// test silent hill if modding const xRegister32 dreg((_Rt_ == _Rs_) ? _allocX86reg(X86TYPE_TEMP, 0, 0) : EEREC_T);
xXOR(eax, eax); xXOR(dreg, dreg);
if (info & PROCESS_EE_S)
xCMP(xRegister64(EEREC_S), _Imm_);
else
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_); xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_);
xSETL(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); xSETL(xRegister8(dreg));
if (dreg.GetId() != EEREC_T)
{
std::swap(x86regs[dreg.GetId()], x86regs[EEREC_T]);
_freeX86reg(EEREC_T);
}
} }
EERECOMPILE_CODEX(eeRecompileCode1, SLTI); EERECOMPILE_CODEX(eeRecompileCodeRC1, SLTI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP | XMMINFO_NORENAME);
//// ANDI //// ANDI
void recANDI_const() static void recANDI_const()
{ {
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] & (u64)_ImmU_; // Zero-extended Immediate g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] & (u64)_ImmU_; // Zero-extended Immediate
} }
namespace namespace
{ {
enum class LogicalOp enum class LogicalOp
{ {
AND, AND,
OR, OR,
XOR XOR
}; };
} // namespace } // namespace
static void recLogicalOpI(int info, LogicalOp op) static void recLogicalOpI(int info, LogicalOp op)
{ {
xImpl_G1Logic bad{}; xImpl_G1Logic bad{};
const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND const xImpl_G1Logic& xOP = op == LogicalOp::AND ? xAND : op == LogicalOp::OR ? xOR :
: op == LogicalOp::OR ? xOR op == LogicalOp::XOR ? xXOR :
: op == LogicalOp::XOR ? xXOR : bad; bad;
pxAssert(&xOP != &bad); pxAssert(&xOP != &bad);
if (_ImmU_ != 0) if (_ImmU_ != 0)
{ {
if (_Rt_ == _Rs_) recMoveStoT64(info);
{ xOP(xRegister64(EEREC_T), _ImmU_);
if (op == LogicalOp::AND)
xOP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _ImmU_);
else
xOP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _ImmU_);
}
else
{
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xOP(rax, _ImmU_);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
}
} }
else else
{ {
if (op == LogicalOp::AND) if (op == LogicalOp::AND)
{ {
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0); xXOR(xRegister32(EEREC_T), xRegister32(EEREC_T));
} }
else else
{ {
if (_Rt_ != _Rs_) recMoveStoT64(info);
{
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
}
} }
} }
} }
void recANDI_(int info) static void recANDI_(int info)
{ {
recLogicalOpI(info, LogicalOp::AND); recLogicalOpI(info, LogicalOp::AND);
} }
EERECOMPILE_CODEX(eeRecompileCode1, ANDI); EERECOMPILE_CODEX(eeRecompileCodeRC1, ANDI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recORI_const() static void recORI_const()
{ {
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] | (u64)_ImmU_; // Zero-extended Immediate g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] | (u64)_ImmU_; // Zero-extended Immediate
} }
void recORI_(int info) static void recORI_(int info)
{ {
recLogicalOpI(info, LogicalOp::OR); recLogicalOpI(info, LogicalOp::OR);
} }
EERECOMPILE_CODEX(eeRecompileCode1, ORI); EERECOMPILE_CODEX(eeRecompileCodeRC1, ORI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recXORI_const() static void recXORI_const()
{ {
g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] ^ (u64)_ImmU_; // Zero-extended Immediate g_cpuConstRegs[_Rt_].UD[0] = g_cpuConstRegs[_Rs_].UD[0] ^ (u64)_ImmU_; // Zero-extended Immediate
} }
void recXORI_(int info) static void recXORI_(int info)
{ {
recLogicalOpI(info, LogicalOp::XOR); recLogicalOpI(info, LogicalOp::XOR);
} }
EERECOMPILE_CODEX(eeRecompileCode1, XORI); EERECOMPILE_CODEX(eeRecompileCodeRC1, XORI, XMMINFO_WRITET | XMMINFO_READS | XMMINFO_64BITOP);
#endif #endif
} // namespace OpcodeImpl } // namespace R5900::Dynarec::OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -24,10 +24,8 @@
using namespace x86Emitter; using namespace x86Emitter;
namespace R5900 { namespace R5900::Dynarec::OpcodeImpl
namespace Dynarec { {
namespace OpcodeImpl {
/********************************************************* /*********************************************************
* Register branch logic * * Register branch logic *
* Format: OP rs, rt, offset * * Format: OP rs, rt, offset *
@ -55,135 +53,62 @@ REC_SYS_DEL(BGEZALL, 31);
#else #else
void recSetBranchEQ(int info, int bne, int process) static void recSetBranchEQ(int bne, int process)
{ {
if (info & PROCESS_EE_XMM) // TODO(Stenzek): This is suboptimal if the registers are in XMMs.
{ // If the constant register is already in a host register, we don't need the immediate...
int t0reg;
if (process & PROCESS_CONSTS) if (process & PROCESS_CONSTS)
{ {
if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) _eeFlushAllDirty();
{
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH_AND_FREE);
xmmregs[EEREC_T].inuse = 0; const int regt = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
t0reg = EEREC_T; if (regt >= 0)
} xImm64Op(xCMP, xRegister64(regt), rax, g_cpuConstRegs[_Rs_].UD[0]);
else else
{
t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
}
_flushConstReg(_Rs_);
xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
if (t0reg != EEREC_T)
_freeXMMreg(t0reg);
}
else if (process & PROCESS_CONSTT)
{
if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
{
_deleteGPRtoXMMreg(_Rs_, 1);
xmmregs[EEREC_S].inuse = 0;
t0reg = EEREC_S;
}
else
{
t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
}
_flushConstReg(_Rt_);
xPCMP.EQD(xRegisterSSE(t0reg), ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
if (t0reg != EEREC_S)
_freeXMMreg(t0reg);
}
else
{
if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
{
_deleteGPRtoXMMreg(_Rs_, 1);
xmmregs[EEREC_S].inuse = 0;
t0reg = EEREC_S;
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
}
else if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_))
{
_deleteGPRtoXMMreg(_Rt_, 1);
xmmregs[EEREC_T].inuse = 0;
t0reg = EEREC_T;
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
}
else
{
t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVQZX(xRegisterSSE(t0reg), xRegisterSSE(EEREC_S));
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(EEREC_T));
}
if (t0reg != EEREC_S && t0reg != EEREC_T)
_freeXMMreg(t0reg);
}
xMOVMSKPS(eax, xRegisterSSE(t0reg));
_eeFlushAllUnused();
xAND(al, 3);
xCMP(al, 0x3);
if (bne)
j32Ptr[1] = JE32(0);
else
j32Ptr[0] = j32Ptr[1] = JNE32(0);
}
else
{
_eeFlushAllUnused();
if (process & PROCESS_CONSTS)
{
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax, g_cpuConstRegs[_Rs_].UD[0]); xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax, g_cpuConstRegs[_Rs_].UD[0]);
} }
else if (process & PROCESS_CONSTT) else if (process & PROCESS_CONSTT)
{ {
_eeFlushAllDirty();
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH_AND_FREE);
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
if (regs >= 0)
xImm64Op(xCMP, xRegister64(regs), rax, g_cpuConstRegs[_Rt_].UD[0]);
else
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], rax, g_cpuConstRegs[_Rt_].UD[0]); xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], rax, g_cpuConstRegs[_Rt_].UD[0]);
} }
else else
{ {
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); // force S into register, since we need to load it, may as well cache.
xCMP(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); _deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH_AND_FREE);
const int regs = _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
const int regt = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
_eeFlushAllDirty();
if (regt >= 0)
xCMP(xRegister64(regs), xRegister64(regt));
else
xCMP(xRegister64(regs), ptr64[&cpuRegs.GPR.r[_Rt_]]);
} }
if (bne) if (bne)
{ j32Ptr[0] = JE32(0);
j32Ptr[1] = JE32(0);
}
else else
{ j32Ptr[0] = JNE32(0);
j32Ptr[0] = j32Ptr[1] = JNE32(0);
}
}
_clearNeededXMMregs();
} }
void recSetBranchL(int ltz) static void recSetBranchL(int ltz)
{ {
int regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ); const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
const int regsxmm = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
_eeFlushAllDirty();
if (regs >= 0) if (regsxmm >= 0)
{ {
xMOVMSKPS(eax, xRegisterSSE(regs)); xMOVMSKPS(eax, xRegisterSSE(regsxmm));
_eeFlushAllUnused();
xTEST(al, 2); xTEST(al, 2);
if (ltz) if (ltz)
@ -194,17 +119,19 @@ void recSetBranchL(int ltz)
return; return;
} }
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); if (regs >= 0)
xCMP(xRegister64(regs), 0);
else
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
if (ltz) if (ltz)
j32Ptr[0] = JGE32(0); j32Ptr[0] = JGE32(0);
else else
j32Ptr[0] = JL32(0); j32Ptr[0] = JL32(0);
_clearNeededXMMregs();
} }
//// BEQ //// BEQ
void recBEQ_const() static void recBEQ_const()
{ {
u32 branchTo; u32 branchTo;
@ -213,48 +140,62 @@ void recBEQ_const()
else else
branchTo = pc + 4; branchTo = pc + 4;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
void recBEQ_process(int info, int process) static void recBEQ_process(int process)
{ {
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
if (_Rs_ == _Rt_) if (_Rs_ == _Rt_)
{ {
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
else else
{ {
recSetBranchEQ(info, 0, process); const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0);
recSetBranchEQ(0, process);
if (!swap)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
x86SetJ32(j32Ptr[1]);
if (!swap)
{
// recopy the next inst // recopy the next inst
pc -= 4; pc -= 4;
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(pc); SetBranchImm(pc);
} }
} }
void recBEQ_(int info) { recBEQ_process(info, 0); } void recBEQ()
void recBEQ_consts(int info) { recBEQ_process(info, PROCESS_CONSTS); } {
void recBEQ_constt(int info) { recBEQ_process(info, PROCESS_CONSTT); } // prefer using the host register over an immediate, it'll be smaller code.
if (GPR_IS_CONST2(_Rs_, _Rt_))
EERECOMPILE_CODE0(BEQ, XMMINFO_READS | XMMINFO_READT); recBEQ_const();
else if (GPR_IS_CONST1(_Rs_) && _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ) < 0)
recBEQ_process(PROCESS_CONSTS);
else if (GPR_IS_CONST1(_Rt_) && _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ) < 0)
recBEQ_process(PROCESS_CONSTT);
else
recBEQ_process(0);
}
//// BNE //// BNE
void recBNE_const() static void recBNE_const()
{ {
u32 branchTo; u32 branchTo;
@ -263,51 +204,65 @@ void recBNE_const()
else else
branchTo = pc + 4; branchTo = pc + 4;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
void recBNE_process(int info, int process) static void recBNE_process(int process)
{ {
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
if (_Rs_ == _Rt_) if (_Rs_ == _Rt_)
{ {
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(pc); SetBranchImm(pc);
return; return;
} }
recSetBranchEQ(info, 1, process); const bool swap = TrySwapDelaySlot(_Rs_, _Rt_, 0);
recSetBranchEQ(1, process);
if (!swap)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[1]); x86SetJ32(j32Ptr[0]);
if (!swap)
{
// recopy the next inst // recopy the next inst
pc -= 4; pc -= 4;
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(pc); SetBranchImm(pc);
} }
void recBNE_(int info) { recBNE_process(info, 0); } void recBNE()
void recBNE_consts(int info) { recBNE_process(info, PROCESS_CONSTS); } {
void recBNE_constt(int info) { recBNE_process(info, PROCESS_CONSTT); } if (GPR_IS_CONST2(_Rs_, _Rt_))
recBNE_const();
EERECOMPILE_CODE0(BNE, XMMINFO_READS | XMMINFO_READT); else if (GPR_IS_CONST1(_Rs_) && _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ) < 0)
recBNE_process(PROCESS_CONSTS);
else if (GPR_IS_CONST1(_Rt_) && _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ) < 0)
recBNE_process(PROCESS_CONSTT);
else
recBNE_process(0);
}
//// BEQL //// BEQL
void recBEQL_const() static void recBEQL_const()
{ {
if (g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0]) if (g_cpuConstRegs[_Rs_].SD[0] == g_cpuConstRegs[_Rt_].SD[0])
{ {
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
else else
@ -316,35 +271,40 @@ void recBEQL_const()
} }
} }
void recBEQL_process(int info, int process) static void recBEQL_process(int process)
{ {
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
recSetBranchEQ(info, 0, process); recSetBranchEQ(0, process);
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
x86SetJ32(j32Ptr[1]);
LoadBranchState(); LoadBranchState();
SetBranchImm(pc); SetBranchImm(pc);
} }
void recBEQL_(int info) { recBEQL_process(info, 0); } void recBEQL()
void recBEQL_consts(int info) { recBEQL_process(info, PROCESS_CONSTS); } {
void recBEQL_constt(int info) { recBEQL_process(info, PROCESS_CONSTT); } if (GPR_IS_CONST2(_Rs_, _Rt_))
recBEQL_const();
EERECOMPILE_CODE0(BEQL, XMMINFO_READS | XMMINFO_READT); else if (GPR_IS_CONST1(_Rs_) && _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ) < 0)
recBEQL_process(PROCESS_CONSTS);
else if (GPR_IS_CONST1(_Rt_) && _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ) < 0)
recBEQL_process(PROCESS_CONSTT);
else
recBEQL_process(0);
}
//// BNEL //// BNEL
void recBNEL_const() static void recBNEL_const()
{ {
if (g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0]) if (g_cpuConstRegs[_Rs_].SD[0] != g_cpuConstRegs[_Rt_].SD[0])
{ {
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
else else
@ -353,29 +313,34 @@ void recBNEL_const()
} }
} }
void recBNEL_process(int info, int process) static void recBNEL_process(int process)
{ {
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
recSetBranchEQ(info, 0, process); recSetBranchEQ(0, process);
SaveBranchState(); SaveBranchState();
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
x86SetJ32(j32Ptr[1]);
// recopy the next inst // recopy the next inst
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
void recBNEL_(int info) { recBNEL_process(info, 0); } void recBNEL()
void recBNEL_consts(int info) { recBNEL_process(info, PROCESS_CONSTS); } {
void recBNEL_constt(int info) { recBNEL_process(info, PROCESS_CONSTT); } if (GPR_IS_CONST2(_Rs_, _Rt_))
recBNEL_const();
EERECOMPILE_CODE0(BNEL, XMMINFO_READS | XMMINFO_READT); else if (GPR_IS_CONST1(_Rs_) && _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ) < 0)
recBNEL_process(PROCESS_CONSTS);
else if (GPR_IS_CONST1(_Rt_) && _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ) < 0)
recBNEL_process(PROCESS_CONSTT);
else
recBNEL_process(0);
}
/********************************************************* /*********************************************************
* Register branch logic * * Register branch logic *
@ -402,36 +367,43 @@ void recBLTZAL()
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeOnWriteReg(31, 0); _eeOnWriteReg(31, 0);
_eeFlushAllUnused(); _eeFlushAllDirty();
_deleteEEreg(31, 0); _deleteEEreg(31, 0);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV64(rax, pc + 4);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr64[&cpuRegs.GPR.n.ra.UD[0]], rax);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] < 0))
branchTo = pc + 4; branchTo = pc + 4;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
return; return;
} }
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
recSetBranchL(1); recSetBranchL(1);
if (!swap)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(true, false);
recompileNextInstruction(1); }
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
if (!swap)
{
// recopy the next inst // recopy the next inst
pc -= 4; pc -= 4;
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(pc); SetBranchImm(pc);
} }
@ -444,36 +416,43 @@ void recBGEZAL()
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeOnWriteReg(31, 0); _eeOnWriteReg(31, 0);
_eeFlushAllUnused(); _eeFlushAllDirty();
_deleteEEreg(31, 0); _deleteEEreg(31, 0);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV64(rax, pc + 4);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr64[&cpuRegs.GPR.n.ra.UD[0]], rax);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0))
branchTo = pc + 4; branchTo = pc + 4;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
return; return;
} }
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
recSetBranchL(0); recSetBranchL(0);
if (!swap)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(true, false);
recompileNextInstruction(1); }
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
if (!swap)
{
// recopy the next inst // recopy the next inst
pc -= 4; pc -= 4;
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(pc); SetBranchImm(pc);
} }
@ -486,11 +465,11 @@ void recBLTZALL()
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeOnWriteReg(31, 0); _eeOnWriteReg(31, 0);
_eeFlushAllUnused(); _eeFlushAllDirty();
_deleteEEreg(31, 0); _deleteEEreg(31, 0);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV64(rax, pc + 4);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr64[&cpuRegs.GPR.n.ra.UD[0]], rax);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -498,7 +477,7 @@ void recBLTZALL()
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else else
{ {
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
return; return;
@ -507,7 +486,7 @@ void recBLTZALL()
recSetBranchL(1); recSetBranchL(1);
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
@ -524,11 +503,11 @@ void recBGEZALL()
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeOnWriteReg(31, 0); _eeOnWriteReg(31, 0);
_eeFlushAllUnused(); _eeFlushAllDirty();
_deleteEEreg(31, 0); _deleteEEreg(31, 0);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[0]], pc + 4); xMOV64(rax, pc + 4);
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr64[&cpuRegs.GPR.n.ra.UD[0]], rax);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -536,7 +515,7 @@ void recBGEZALL()
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else else
{ {
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
return; return;
@ -545,7 +524,7 @@ void recBGEZALL()
recSetBranchL(0); recSetBranchL(0);
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
@ -562,43 +541,44 @@ void recBLEZ()
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] <= 0))
branchTo = pc + 4; branchTo = pc + 4;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
return; return;
} }
_flushEEreg(_Rs_); const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
_eeFlushAllDirty();
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); if (regs >= 0)
j8Ptr[0] = JL8(0); xCMP(xRegister64(regs), 0);
j32Ptr[1] = JG32(0); else
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0); j32Ptr[0] = JG32(0);
j32Ptr[2] = JNZ32(0);
x86SetJ8(j8Ptr[0]);
_clearNeededXMMregs();
if (!swap)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[1]); x86SetJ32(j32Ptr[0]);
x86SetJ32(j32Ptr[2]);
if (!swap)
{
// recopy the next inst // recopy the next inst
pc -= 4; pc -= 4;
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(pc); SetBranchImm(pc);
} }
@ -610,43 +590,44 @@ void recBGTZ()
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
if (!(g_cpuConstRegs[_Rs_].SD[0] > 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] > 0))
branchTo = pc + 4; branchTo = pc + 4;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
return; return;
} }
_flushEEreg(_Rs_); const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
_eeFlushAllDirty();
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); if (regs >= 0)
j8Ptr[0] = JG8(0); xCMP(xRegister64(regs), 0);
j32Ptr[1] = JL32(0); else
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0); j32Ptr[0] = JLE32(0);
j32Ptr[2] = JZ32(0);
x86SetJ8(j8Ptr[0]);
_clearNeededXMMregs();
if (!swap)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[1]); x86SetJ32(j32Ptr[0]);
x86SetJ32(j32Ptr[2]);
if (!swap)
{
// recopy the next inst // recopy the next inst
pc -= 4; pc -= 4;
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(pc); SetBranchImm(pc);
} }
@ -658,31 +639,37 @@ void recBLTZ()
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
if (!(g_cpuConstRegs[_Rs_].SD[0] < 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] < 0))
branchTo = pc + 4; branchTo = pc + 4;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
return; return;
} }
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
_eeFlushAllDirty();
recSetBranchL(1); recSetBranchL(1);
if (!swap)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
if (!swap)
{
// recopy the next inst // recopy the next inst
pc -= 4; pc -= 4;
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(pc); SetBranchImm(pc);
} }
@ -694,31 +681,38 @@ void recBGEZ()
u32 branchTo = ((s32)_Imm_ * 4) + pc; u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0)) if (!(g_cpuConstRegs[_Rs_].SD[0] >= 0))
branchTo = pc + 4; branchTo = pc + 4;
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
return; return;
} }
const bool swap = TrySwapDelaySlot(_Rs_, 0, 0);
_eeFlushAllDirty();
recSetBranchL(0); recSetBranchL(0);
if (!swap)
{
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
if (!swap)
{
// recopy the next inst // recopy the next inst
pc -= 4; pc -= 4;
LoadBranchState(); LoadBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
}
SetBranchImm(pc); SetBranchImm(pc);
} }
@ -728,9 +722,7 @@ void recBLTZL()
{ {
EE::Profiler.EmitOp(eeOpcode::BLTZL); EE::Profiler.EmitOp(eeOpcode::BLTZL);
u32 branchTo = ((s32)_Imm_ * 4) + pc; const u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -738,16 +730,17 @@ void recBLTZL()
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else else
{ {
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
return; return;
} }
_eeFlushAllDirty();
recSetBranchL(1); recSetBranchL(1);
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
@ -762,9 +755,7 @@ void recBGEZL()
{ {
EE::Profiler.EmitOp(eeOpcode::BGEZL); EE::Profiler.EmitOp(eeOpcode::BGEZL);
u32 branchTo = ((s32)_Imm_ * 4) + pc; const u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -772,16 +763,17 @@ void recBGEZL()
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else else
{ {
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
return; return;
} }
_eeFlushAllDirty();
recSetBranchL(0); recSetBranchL(0);
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[0]); x86SetJ32(j32Ptr[0]);
@ -802,9 +794,7 @@ void recBLEZL()
{ {
EE::Profiler.EmitOp(eeOpcode::BLEZL); EE::Profiler.EmitOp(eeOpcode::BLEZL);
u32 branchTo = ((s32)_Imm_ * 4) + pc; const u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -812,32 +802,27 @@ void recBLEZL()
SetBranchImm(pc + 4); SetBranchImm(pc + 4);
else else
{ {
_clearNeededXMMregs(); recompileNextInstruction(true, false);
recompileNextInstruction(1);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
return; return;
} }
_flushEEreg(_Rs_); const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
_eeFlushAllDirty();
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); if (regs >= 0)
j32Ptr[0] = JL32(0); xCMP(xRegister64(regs), 0);
j32Ptr[1] = JG32(0); else
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0); j32Ptr[0] = JG32(0);
j32Ptr[2] = JNZ32(0);
x86SetJ32(j32Ptr[0]);
_clearNeededXMMregs();
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[1]); x86SetJ32(j32Ptr[0]);
x86SetJ32(j32Ptr[2]);
LoadBranchState(); LoadBranchState();
SetBranchImm(pc); SetBranchImm(pc);
@ -848,9 +833,7 @@ void recBGTZL()
{ {
EE::Profiler.EmitOp(eeOpcode::BGTZL); EE::Profiler.EmitOp(eeOpcode::BGTZL);
u32 branchTo = ((s32)_Imm_ * 4) + pc; const u32 branchTo = ((s32)_Imm_ * 4) + pc;
_eeFlushAllUnused();
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -859,31 +842,27 @@ void recBGTZL()
else else
{ {
_clearNeededXMMregs(); _clearNeededXMMregs();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
} }
return; return;
} }
_flushEEreg(_Rs_); const int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
_eeFlushAllDirty();
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], 0); if (regs >= 0)
j32Ptr[0] = JG32(0); xCMP(xRegister64(regs), 0);
j32Ptr[1] = JL32(0); else
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], 0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], 0); j32Ptr[0] = JLE32(0);
j32Ptr[2] = JZ32(0);
x86SetJ32(j32Ptr[0]);
_clearNeededXMMregs();
SaveBranchState(); SaveBranchState();
recompileNextInstruction(1); recompileNextInstruction(true, false);
SetBranchImm(branchTo); SetBranchImm(branchTo);
x86SetJ32(j32Ptr[1]); x86SetJ32(j32Ptr[0]);
x86SetJ32(j32Ptr[2]);
LoadBranchState(); LoadBranchState();
SetBranchImm(pc); SetBranchImm(pc);
@ -891,6 +870,4 @@ void recBGTZL()
#endif #endif
} // namespace OpcodeImpl } // namespace R5900::Dynarec::OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -14,8 +14,6 @@
*/ */
// recompiler reworked to add dynamic linking zerofrog(@gmail.com) Jan06
#include "PrecompiledHeader.h" #include "PrecompiledHeader.h"
#include "Common.h" #include "Common.h"
@ -24,9 +22,8 @@
using namespace x86Emitter; using namespace x86Emitter;
namespace R5900 { namespace R5900::Dynarec::OpcodeImpl
namespace Dynarec { {
namespace OpcodeImpl {
/********************************************************* /*********************************************************
* Jump to target * * Jump to target *
@ -50,7 +47,7 @@ void recJ()
// SET_FPUSTATE; // SET_FPUSTATE;
u32 newpc = (_InstrucTarget_ << 2) + (pc & 0xf0000000); u32 newpc = (_InstrucTarget_ << 2) + (pc & 0xf0000000);
recompileNextInstruction(1); recompileNextInstruction(true, false);
if (EmuConfig.Gamefixes.GoemonTlbHack) if (EmuConfig.Gamefixes.GoemonTlbHack)
SetBranchImm(vtlb_V2P(newpc)); SetBranchImm(vtlb_V2P(newpc));
else else
@ -76,7 +73,7 @@ void recJAL()
xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0); xMOV(ptr32[&cpuRegs.GPR.r[31].UL[1]], 0);
} }
recompileNextInstruction(1); recompileNextInstruction(true, false);
if (EmuConfig.Gamefixes.GoemonTlbHack) if (EmuConfig.Gamefixes.GoemonTlbHack)
SetBranchImm(vtlb_V2P(newpc)); SetBranchImm(vtlb_V2P(newpc));
else else
@ -101,34 +98,40 @@ void recJALR()
{ {
EE::Profiler.EmitOp(eeOpcode::JALR); EE::Profiler.EmitOp(eeOpcode::JALR);
int newpc = pc + 4; const u32 newpc = pc + 4;
_allocX86reg(calleeSavedReg2d, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); const bool swap = (EmuConfig.Gamefixes.GoemonTlbHack || _Rd_ == _Rs_) ? false : TrySwapDelaySlot(_Rs_, 0, _Rd_);
_eeMoveGPRtoR(calleeSavedReg2d, _Rs_);
// uncomment when there are NO instructions that need to call interpreter
// int mmreg;
// if (GPR_IS_CONST1(_Rs_))
// xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[_Rs_].UL[0]);
// else
// {
// int mmreg;
//
// if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0)
// {
// xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg));
// }
// else {
// xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[_Rs_].UL[0])]);
// xMOV(ptr[&cpuRegs.pc], eax);
// }
// }
int wbreg = -1;
if (!swap)
{
wbreg = _allocX86reg(X86TYPE_PCWRITEBACK, 0, MODE_WRITE | MODE_CALLEESAVED);
_eeMoveGPRtoR(xRegister32(wbreg), _Rs_);
if (EmuConfig.Gamefixes.GoemonTlbHack) if (EmuConfig.Gamefixes.GoemonTlbHack)
{ {
xMOV(ecx, calleeSavedReg2d); xMOV(ecx, xRegister32(wbreg));
vtlb_DynV2P(); vtlb_DynV2P();
xMOV(calleeSavedReg2d, eax); xMOV(xRegister32(wbreg), eax);
}
} }
// uncomment when there are NO instructions that need to call interpreter
// int mmreg;
// if (GPR_IS_CONST1(_Rs_))
// xMOV(ptr32[&cpuRegs.pc], g_cpuConstRegs[_Rs_].UL[0]);
// else
// {
// int mmreg;
//
// if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0)
// {
// xMOVSS(ptr[&cpuRegs.pc], xRegisterSSE(mmreg));
// }
// else {
// xMOV(eax, ptr[(void*)((int)&cpuRegs.GPR.r[_Rs_].UL[0])]);
// xMOV(ptr[&cpuRegs.pc], eax);
// }
// }
if (_Rd_) if (_Rd_)
{ {
@ -136,36 +139,46 @@ void recJALR()
if (EE_CONST_PROP) if (EE_CONST_PROP)
{ {
GPR_SET_CONST(_Rd_); GPR_SET_CONST(_Rd_);
g_cpuConstRegs[_Rd_].UL[0] = newpc; g_cpuConstRegs[_Rd_].UD[0] = newpc;
g_cpuConstRegs[_Rd_].UL[1] = 0;
} }
else else
{ {
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], newpc); xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, newpc);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
} }
} }
_clearNeededXMMregs(); if (!swap)
recompileNextInstruction(1);
if (x86regs[calleeSavedReg2d.GetId()].inuse)
{ {
pxAssert(x86regs[calleeSavedReg2d.GetId()].type == X86TYPE_PCWRITEBACK); recompileNextInstruction(true, false);
xMOV(ptr[&cpuRegs.pc], calleeSavedReg2d);
x86regs[calleeSavedReg2d.GetId()].inuse = 0; // the next instruction may have flushed the register.. so reload it if so.
if (x86regs[wbreg].inuse && x86regs[wbreg].type == X86TYPE_PCWRITEBACK)
{
xMOV(ptr[&cpuRegs.pc], xRegister32(wbreg));
x86regs[wbreg].inuse = 0;
} }
else else
{ {
xMOV(eax, ptr[&cpuRegs.pcWriteback]); xMOV(eax, ptr[&cpuRegs.pcWriteback]);
xMOV(ptr[&cpuRegs.pc], eax); xMOV(ptr[&cpuRegs.pc], eax);
} }
}
else
{
if (GPR_IS_DIRTY_CONST(_Rs_) || _hasX86reg(X86TYPE_GPR, _Rs_, 0))
{
const int x86reg = _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
xMOV(ptr32[&cpuRegs.pc], xRegister32(x86reg));
}
else
{
_eeMoveGPRtoM((uptr)&cpuRegs.pc, _Rs_);
}
}
SetBranchReg(0xffffffff); SetBranchReg(0xffffffff);
} }
#endif #endif
} // namespace OpcodeImpl } // namespace R5900::Dynarec::OpcodeImpl
} // namespace Dynarec
} // namespace R5900

File diff suppressed because it is too large Load Diff

View File

@ -22,9 +22,8 @@
using namespace x86Emitter; using namespace x86Emitter;
namespace R5900 { namespace R5900::Dynarec::OpcodeImpl
namespace Dynarec { {
namespace OpcodeImpl {
/********************************************************* /*********************************************************
* Shift arithmetic with constant shift * * Shift arithmetic with constant shift *
@ -56,11 +55,6 @@ static void xCopy64(u64* dst, u64* src)
xMOV(ptr64[dst], rax); xMOV(ptr64[dst], rax);
} }
static void xCMPToZero64(u64* mem)
{
xCMP(ptr64[mem], 0);
}
/********************************************************* /*********************************************************
* Load higher 16 bits of the first word in GPR with imm * * Load higher 16 bits of the first word in GPR with imm *
* Format: OP rt, immediate * * Format: OP rt, immediate *
@ -69,22 +63,13 @@ static void xCMPToZero64(u64* mem)
//// LUI //// LUI
void recLUI() void recLUI()
{ {
int mmreg;
if (!_Rt_) if (!_Rt_)
return; return;
_eeOnWriteReg(_Rt_, 1); // need to flush the upper 64 bits for xmm
GPR_DEL_CONST(_Rt_);
if ((mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE)) >= 0) _deleteGPRtoX86reg(_Rt_, DELETE_REG_FREE_NO_WRITEBACK);
{ _deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH_AND_FREE);
if (xmmregs[mmreg].mode & MODE_WRITE)
{
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rt_].UL[2]], xRegisterSSE(mmreg));
}
xmmregs[mmreg].inuse = 0;
}
_deleteEEreg(_Rt_, 0);
if (EE_CONST_PROP) if (EE_CONST_PROP)
{ {
@ -93,363 +78,300 @@ void recLUI()
} }
else else
{ {
xMOV(eax, (s32)(cpuRegs.code << 16)); const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
eeSignExtendTo(_Rt_); xMOV64(xRegister64(regt), (s64)(s32)(cpuRegs.code << 16));
} }
EE::Profiler.EmitOp(eeOpcode::LUI); EE::Profiler.EmitOp(eeOpcode::LUI);
} }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recMFHILO(int hi) static void recMFHILO(bool hi, bool upper)
{ {
int reghi, regd, xmmhilo;
if (!_Rd_) if (!_Rd_)
return; return;
xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO; // kill any constants on rd, lower 64 bits get written regardless of upper
reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ);
_eeOnWriteReg(_Rd_, 0); _eeOnWriteReg(_Rd_, 0);
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE); const int reg = hi ? XMMGPR_HI : XMMGPR_LO;
const int xmmd = EEINST_XMMUSEDTEST(_Rd_) ? _allocGPRtoXMMreg(_Rd_, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE);
if (reghi >= 0) const int xmmhilo = EEINST_XMMUSEDTEST(reg) ? _allocGPRtoXMMreg(reg, MODE_READ) : _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ);
if (xmmd >= 0)
{ {
if (regd >= 0) if (xmmhilo >= 0)
{ {
pxAssert(regd != reghi); if (upper)
xMOVHL.PS(xRegisterSSE(xmmd), xRegisterSSE(xmmhilo));
xmmregs[regd].inuse = 0; else
xMOVSD(xRegisterSSE(xmmd), xRegisterSSE(xmmhilo));
xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], xRegisterSSE(reghi)); }
else
if (xmmregs[regd].mode & MODE_WRITE)
{ {
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UL[2]], xRegisterSSE(regd)); const int gprhilo = upper ? -1 : _allocIfUsedGPRtoX86(reg, MODE_READ);
if (gprhilo >= 0)
xPINSR.Q(xRegisterSSE(xmmd), xRegister64(gprhilo), 0);
else
xPINSR.Q(xRegisterSSE(xmmd), ptr64[hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]], 0);
} }
} }
else else
{ {
_deleteEEreg(_Rd_, 0); // try rename {hi,lo} -> rd
xMOVQ(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi)); const int gprreg = upper ? -1 : _checkX86reg(X86TYPE_GPR, reg, MODE_READ);
if (gprreg >= 0 && _eeTryRenameReg(_Rd_, reg, gprreg, -1, 0) >= 0)
return;
const int gprd = _allocIfUsedGPRtoX86(_Rd_, MODE_WRITE);
if (gprd >= 0 && xmmhilo >= 0)
{
pxAssert(gprreg < 0);
if (upper)
xPEXTR.Q(xRegister64(gprd), xRegisterSSE(xmmhilo), 1);
else
xMOVD(xRegister64(gprd), xRegisterSSE(xmmhilo));
} }
else if (gprd < 0 && xmmhilo >= 0)
{
pxAssert(gprreg < 0);
if (upper)
xPEXTR.Q(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(xmmhilo), 1);
else
xMOVQ(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(xmmhilo));
}
else if (gprd >= 0)
{
if (gprreg >= 0)
xMOV(xRegister64(gprd), xRegister64(gprreg));
else
xMOV(xRegister64(gprd), ptr64[hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]]);
}
else if (gprreg >= 0)
{
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegister64(gprreg));
} }
else else
{ {
if (regd >= 0) xMOV(rax, ptr64[hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]]);
{ xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
if (EEINST_ISLIVE2(_Rd_))
xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
else
xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
}
else
{
_deleteEEreg(_Rd_, 0);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], hi ? &cpuRegs.HI.UD[0] : &cpuRegs.LO.UD[0]);
} }
} }
} }
void recMTHILO(int hi) static void recMTHILO(bool hi, bool upper)
{ {
int reghi, regs, xmmhilo; const int reg = hi ? XMMGPR_HI : XMMGPR_LO;
uptr addrhilo; _eeOnWriteReg(reg, 0);
xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO; const int xmms = EEINST_XMMUSEDTEST(_Rs_) ? _allocGPRtoXMMreg(_Rs_, MODE_READ) : _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
addrhilo = hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0]; const int xmmhilo = EEINST_XMMUSEDTEST(reg) ? _allocGPRtoXMMreg(reg, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ | MODE_WRITE);
if (xmms >= 0)
regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ | MODE_WRITE);
if (reghi >= 0)
{ {
if (regs >= 0) if (xmmhilo >= 0)
{ {
pxAssert(reghi != regs); if (upper)
xMOVLH.PS(xRegisterSSE(xmmhilo), xRegisterSSE(xmms));
_deleteGPRtoXMMreg(_Rs_, 0); else
xPUNPCK.HQDQ(xRegisterSSE(reghi), xRegisterSSE(reghi)); xMOVSD(xRegisterSSE(xmmhilo), xRegisterSSE(xmms));
xPUNPCK.LQDQ(xRegisterSSE(regs), xRegisterSSE(reghi));
// swap regs
xmmregs[regs] = xmmregs[reghi];
xmmregs[reghi].inuse = 0;
xmmregs[regs].mode |= MODE_WRITE;
} }
else else
{ {
_flushConstReg(_Rs_); const int gprhilo = upper ? -1 : _allocIfUsedGPRtoX86(reg, MODE_WRITE);
xMOVL.PS(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); if (gprhilo >= 0)
xmmregs[reghi].mode |= MODE_WRITE; xMOVD(xRegister64(gprhilo), xRegisterSSE(xmms)); // actually movq
else
xMOVQ(ptr64[hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]], xRegisterSSE(xmms));
} }
} }
else else
{ {
if (regs >= 0) // try rename rs -> {hi,lo}
const int gprs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
if (gprs >= 0 && !upper && _eeTryRenameReg(reg, _Rs_, gprs, -1, 0) >= 0)
return;
if (xmmhilo >= 0)
{ {
xMOVQ(ptr[(void*)(addrhilo)], xRegisterSSE(regs)); if (gprs >= 0)
{
xPINSR.Q(xRegisterSSE(xmmhilo), xRegister64(gprs), static_cast<u8>(upper));
}
else if (GPR_IS_CONST1(_Rs_))
{
_eeMoveGPRtoR(rax, _Rs_);
xPINSR.Q(xRegisterSSE(xmmhilo), rax, static_cast<u8>(upper));
} }
else else
{ {
if (GPR_IS_CONST1(_Rs_)) xPINSR.Q(xRegisterSSE(xmmhilo), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], static_cast<u8>(upper));
{ }
xWriteImm64ToMem((u64*)addrhilo, rax, g_cpuConstRegs[_Rs_].UD[0]);
} }
else else
{ {
_eeMoveGPRtoR(ecx, _Rs_); const int gprreg = upper ? -1 : _allocIfUsedGPRtoX86(reg, MODE_WRITE);
_flushEEreg(_Rs_); if (gprreg >= 0)
xCopy64((u64*)addrhilo, &cpuRegs.GPR.r[_Rs_].UD[0]); _eeMoveGPRtoR(xRegister64(gprreg), _Rs_);
} else
_eeMoveGPRtoM((uptr)(hi ? &cpuRegs.HI.UD[static_cast<u8>(upper)] : &cpuRegs.LO.UD[static_cast<u8>(upper)]), _Rs_);
} }
} }
} }
void recMFHI() void recMFHI()
{ {
recMFHILO(1); recMFHILO(true, false);
EE::Profiler.EmitOp(eeOpcode::MFHI); EE::Profiler.EmitOp(eeOpcode::MFHI);
} }
void recMFLO() void recMFLO()
{ {
recMFHILO(0); recMFHILO(false, false);
EE::Profiler.EmitOp(eeOpcode::MFLO); EE::Profiler.EmitOp(eeOpcode::MFLO);
} }
void recMTHI() void recMTHI()
{ {
recMTHILO(1); recMTHILO(true, false);
EE::Profiler.EmitOp(eeOpcode::MTHI); EE::Profiler.EmitOp(eeOpcode::MTHI);
} }
void recMTLO() void recMTLO()
{ {
recMTHILO(0); recMTHILO(false, false);
EE::Profiler.EmitOp(eeOpcode::MTLO); EE::Profiler.EmitOp(eeOpcode::MTLO);
} }
////////////////////////////////////////////////////
void recMFHILO1(int hi)
{
int reghi, regd, xmmhilo;
if (!_Rd_)
return;
xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO;
reghi = _checkXMMreg(XMMTYPE_GPRREG, xmmhilo, MODE_READ);
_eeOnWriteReg(_Rd_, 0);
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_READ | MODE_WRITE);
if (reghi >= 0)
{
if (regd >= 0)
{
xMOVHL.PS(xRegisterSSE(regd), xRegisterSSE(reghi));
xmmregs[regd].mode |= MODE_WRITE;
}
else
{
_deleteEEreg(_Rd_, 0);
xMOVH.PS(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(reghi));
}
}
else
{
if (regd >= 0)
{
if (EEINST_ISLIVE2(_Rd_))
{
xPUNPCK.HQDQ(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0])]);
xPSHUF.D(xRegisterSSE(regd), xRegisterSSE(regd), 0x4e);
}
else
{
xMOVQZX(xRegisterSSE(regd), ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UD[1] : (uptr)&cpuRegs.LO.UD[1])]);
}
xmmregs[regd].mode |= MODE_WRITE;
}
else
{
_deleteEEreg(_Rd_, 0);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], hi ? &cpuRegs.HI.UD[1] : &cpuRegs.LO.UD[1]);
}
}
}
void recMTHILO1(int hi)
{
int reghi, regs, xmmhilo;
uptr addrhilo;
xmmhilo = hi ? XMMGPR_HI : XMMGPR_LO;
addrhilo = hi ? (uptr)&cpuRegs.HI.UD[0] : (uptr)&cpuRegs.LO.UD[0];
regs = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ);
reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, xmmhilo, MODE_WRITE | MODE_READ);
if (reghi >= 0)
{
if (regs >= 0)
{
xPUNPCK.LQDQ(xRegisterSSE(reghi), xRegisterSSE(regs));
}
else
{
_flushEEreg(_Rs_);
xPUNPCK.LQDQ(xRegisterSSE(reghi), ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
}
}
else
{
if (regs >= 0)
{
xMOVQ(ptr[(void*)(addrhilo + 8)], xRegisterSSE(regs));
}
else
{
if (GPR_IS_CONST1(_Rs_))
{
xWriteImm64ToMem((u64*)(addrhilo + 8), rax, g_cpuConstRegs[_Rs_].UD[0]);
}
else
{
_flushEEreg(_Rs_);
xCopy64((u64*)(addrhilo + 8), &cpuRegs.GPR.r[_Rs_].UD[0]);
}
}
}
}
void recMFHI1() void recMFHI1()
{ {
recMFHILO1(1); recMFHILO(true, true);
EE::Profiler.EmitOp(eeOpcode::MFHI1); EE::Profiler.EmitOp(eeOpcode::MFHI1);
} }
void recMFLO1() void recMFLO1()
{ {
recMFHILO1(0); recMFHILO(false, true);
EE::Profiler.EmitOp(eeOpcode::MFLO1); EE::Profiler.EmitOp(eeOpcode::MFLO1);
} }
void recMTHI1() void recMTHI1()
{ {
recMTHILO1(1); recMTHILO(true, true);
EE::Profiler.EmitOp(eeOpcode::MTHI1); EE::Profiler.EmitOp(eeOpcode::MTHI1);
} }
void recMTLO1() void recMTLO1()
{ {
recMTHILO1(0); recMTHILO(false, true);
EE::Profiler.EmitOp(eeOpcode::MTLO1); EE::Profiler.EmitOp(eeOpcode::MTLO1);
} }
//// MOVZ //// MOVZ
void recMOVZtemp_const() // if (rt == 0) then rd <- rs
static void recMOVZtemp_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0];
} }
void recMOVZtemp_consts(int info) static void recMOVZtemp_consts(int info)
{ {
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]); // we need the constant anyway, so just force it into a register
j8Ptr[0] = JNZ8(0); const int regs = (info & PROCESS_EE_S) ? EEREC_S : _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
if (info & PROCESS_EE_T)
xTEST(xRegister64(EEREC_T), xRegister64(EEREC_T));
else
xCMP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, g_cpuConstRegs[_Rs_].UD[0]); xCMOVE(xRegister64(EEREC_D), xRegister64(regs));
x86SetJ8(j8Ptr[0]);
} }
void recMOVZtemp_constt(int info) static void recMOVZtemp_constt(int info)
{ {
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]); if (info & PROCESS_EE_S)
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
else
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
} }
void recMOVZtemp_(int info) static void recMOVZtemp_(int info)
{ {
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]); if (info & PROCESS_EE_T)
j8Ptr[0] = JNZ8(0); xTEST(xRegister64(EEREC_T), xRegister64(EEREC_T));
else
xCMP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]); if (info & PROCESS_EE_S)
xCMOVE(xRegister64(EEREC_D), xRegister64(EEREC_S));
x86SetJ8(j8Ptr[0]); else
xCMOVE(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
} }
EERECOMPILE_CODE0(MOVZtemp, XMMINFO_READS | XMMINFO_READD | XMMINFO_READD | XMMINFO_WRITED); // Specify READD here, because we might not write to it, and want to preserve the value.
static EERECOMPILE_CODERC0(MOVZtemp, XMMINFO_READS | XMMINFO_READT | XMMINFO_READD | XMMINFO_WRITED | XMMINFO_NORENAME);
void recMOVZ() void recMOVZ()
{ {
if (_Rs_ == _Rd_) if (_Rs_ == _Rd_)
return; return;
if (GPR_IS_CONST1(_Rt_)) if (GPR_IS_CONST1(_Rt_) && g_cpuConstRegs[_Rt_].UD[0] != 0)
{
if (g_cpuConstRegs[_Rt_].UD[0] != 0)
return; return;
}
else
_deleteEEreg(_Rd_, 1);
recMOVZtemp(); recMOVZtemp();
} }
//// MOVN //// MOVN
void recMOVNtemp_const() static void recMOVNtemp_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0]; g_cpuConstRegs[_Rd_].UD[0] = g_cpuConstRegs[_Rs_].UD[0];
} }
void recMOVNtemp_consts(int info) static void recMOVNtemp_consts(int info)
{ {
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]); // we need the constant anyway, so just force it into a register
j8Ptr[0] = JZ8(0); const int regs = (info & PROCESS_EE_S) ? EEREC_S : _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
if (info & PROCESS_EE_T)
xTEST(xRegister64(EEREC_T), xRegister64(EEREC_T));
else
xCMP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, g_cpuConstRegs[_Rs_].UD[0]); xCMOVNE(xRegister64(EEREC_D), xRegister64(regs));
x86SetJ8(j8Ptr[0]);
} }
void recMOVNtemp_constt(int info) static void recMOVNtemp_constt(int info)
{ {
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]); if (info & PROCESS_EE_S)
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_S));
else
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
} }
void recMOVNtemp_(int info) static void recMOVNtemp_(int info)
{ {
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]); if (info & PROCESS_EE_T)
j8Ptr[0] = JZ8(0); xTEST(xRegister64(EEREC_T), xRegister64(EEREC_T));
else
xCMP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]); if (info & PROCESS_EE_S)
xCMOVNE(xRegister64(EEREC_D), xRegister64(EEREC_S));
x86SetJ8(j8Ptr[0]); else
xCMOVNE(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
} }
EERECOMPILE_CODE0(MOVNtemp, XMMINFO_READS | XMMINFO_READD | XMMINFO_READD | XMMINFO_WRITED); static EERECOMPILE_CODERC0(MOVNtemp, XMMINFO_READS | XMMINFO_READT | XMMINFO_READD | XMMINFO_WRITED | XMMINFO_NORENAME);
void recMOVN() void recMOVN()
{ {
if (_Rs_ == _Rd_) if (_Rs_ == _Rd_)
return; return;
if (GPR_IS_CONST1(_Rt_)) if (GPR_IS_CONST1(_Rt_) && g_cpuConstRegs[_Rt_].UD[0] == 0)
{
if (g_cpuConstRegs[_Rt_].UD[0] == 0)
return; return;
}
else
_deleteEEreg(_Rd_, 1);
recMOVNtemp(); recMOVNtemp();
} }
#endif #endif
} // namespace OpcodeImpl } // namespace R5900::Dynarec::OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -24,9 +24,8 @@ using namespace x86Emitter;
namespace Interp = R5900::Interpreter::OpcodeImpl; namespace Interp = R5900::Interpreter::OpcodeImpl;
namespace R5900 { namespace R5900::Dynarec::OpcodeImpl
namespace Dynarec { {
namespace OpcodeImpl {
/********************************************************* /*********************************************************
* Register mult/div & Register trap logic * * Register mult/div & Register trap logic *
@ -51,283 +50,293 @@ REC_FUNC_DEL(MADDU1, _Rd_);
#else #else
// if upper is 1, write in upper 64 bits of LO/HI static void recWritebackHILO(int info, bool writed, bool upper)
void recWritebackHILO(int info, int writed, int upper)
{ {
int savedlo = 0; // writeback low 32 bits, sign extended to 64 bits
uptr loaddr = (uptr)&cpuRegs.LO.UL[upper ? 2 : 0]; bool eax_sign_extended = false;
const uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0];
const u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0;
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive) // case 1: LO is already in an XMM - use the xmm
xMOVSX(rcx, edx); // case 2: LO is used as an XMM later in the block - use or allocate the XMM
// case 3: LO is used as a GPR later in the block - use XMM if upper, otherwise use GPR, so it can be renamed
// case 4: LO is already in a GPR - write to the GPR, or write to memory if upper
// case 4: LO is not used - writeback to memory
if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive) if (EEINST_LIVETEST(XMMGPR_LO))
{ {
int reglo = 0; const bool loused = EEINST_USEDTEST(XMMGPR_LO);
if ((reglo = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_READ)) >= 0) const bool lousedxmm = loused && (upper || EEINST_XMMUSEDTEST(XMMGPR_LO));
const int xmmlo = lousedxmm ? _allocGPRtoXMMreg(XMMGPR_LO, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_WRITE);
if (xmmlo >= 0)
{ {
if (xmmregs[reglo].mode & MODE_WRITE) // we use CDQE over MOVSX because it's shorter.
{
if (upper)
xMOVQ(ptr[(void*)(loaddr - 8)], xRegisterSSE(reglo));
else
xMOVH.PS(ptr[(void*)(loaddr + 8)], xRegisterSSE(reglo));
}
xmmregs[reglo].inuse = 0;
reglo = -1;
}
_signExtendToMem((void*)loaddr);
savedlo = 1;
}
if (writed && _Rd_)
{
_eeOnWriteReg(_Rd_, 1);
int regd = -1;
if (g_pCurInstInfo->regs[_Rd_] & EEINST_XMM)
{
if (savedlo)
{
regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE | MODE_READ);
if (regd >= 0)
{
xMOVL.PS(xRegisterSSE(regd), ptr[(void*)(loaddr)]);
}
}
}
if (regd < 0)
{
_deleteEEreg(_Rd_, 0);
if (!savedlo)
xCDQE(); xCDQE();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); xPINSR.Q(xRegisterSSE(xmmlo), rax, static_cast<u8>(upper));
} }
}
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
{
int reghi = 0;
if ((reghi = _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_READ)) >= 0)
{
if (xmmregs[reghi].mode & MODE_WRITE)
{
if (upper)
xMOVQ(ptr[(void*)(hiaddr - 8)], xRegisterSSE(reghi));
else else
xMOVH.PS(ptr[(void*)(hiaddr + 8)], xRegisterSSE(reghi)); {
const int gprlo = upper ? -1 : (loused ? _allocX86reg(X86TYPE_GPR, XMMGPR_LO, MODE_WRITE) : _checkX86reg(X86TYPE_GPR, XMMGPR_LO, MODE_WRITE));
if (gprlo >= 0)
{
xMOVSX(xRegister64(gprlo), eax);
}
else
{
xCDQE();
eax_sign_extended = true;
xMOV(ptr64[&cpuRegs.LO.UD[upper]], rax);
}
}
} }
xmmregs[reghi].inuse = 0; if (EEINST_LIVETEST(XMMGPR_HI))
reghi = -1; {
const bool hiused = EEINST_USEDTEST(XMMGPR_HI);
const bool hiusedxmm = hiused && (upper || EEINST_XMMUSEDTEST(XMMGPR_HI));
const int xmmhi = hiusedxmm ? _allocGPRtoXMMreg(XMMGPR_HI, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_WRITE);
if (xmmhi >= 0)
{
xMOVSX(rdx, edx);
xPINSR.Q(xRegisterSSE(xmmhi), rdx, static_cast<u8>(upper));
}
else
{
const int gprhi = upper ? -1 : (hiused ? _allocX86reg(X86TYPE_GPR, XMMGPR_HI, MODE_WRITE) : _checkX86reg(X86TYPE_GPR, XMMGPR_HI, MODE_WRITE));
if (gprhi >= 0)
{
xMOVSX(xRegister64(gprhi), edx);
}
else
{
xMOVSX(rdx, edx);
xMOV(ptr64[&cpuRegs.HI.UD[upper]], rdx);
}
}
} }
xMOV(ptr[(void*)(hiaddr)], rcx); // writeback lo to Rd if present
if (writed && _Rd_ && EEINST_LIVETEST(_Rd_))
{
// TODO: This can be made optimal by keeping it in an xmm.
// But currently the templates aren't hooked up for that - we'd need a "allow xmm" flag.
if (info & PROCESS_EE_D)
{
if (eax_sign_extended)
xMOV(xRegister64(EEREC_D), rax);
else
xMOVSX(xRegister64(EEREC_D), eax);
}
else
{
if (!eax_sign_extended)
xCDQE();
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
} }
} }
void recWritebackConstHILO(u64 res, int writed, int upper)
static void recWritebackConstHILO(u64 res, bool writed, int upper)
{ {
uptr loaddr = (uptr)&cpuRegs.LO.UL[upper ? 2 : 0]; // It's not often that MULT/DIV are entirely constant. So while the MOV64s here are not optimal
uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0]; // by any means, it's not something that's going to be hit often enough to worry about a cache.
u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0; // Except for apparently when it's getting set to all-zeros, but that'll be fine with immediates.
const s64 loval = static_cast<s64>(static_cast<s32>(static_cast<u32>(res)));
const s64 hival = static_cast<s64>(static_cast<s32>(static_cast<u32>(res >> 32)));
if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive) if (EEINST_LIVETEST(XMMGPR_LO))
{ {
int reglo = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE | MODE_READ); const bool lolive = EEINST_USEDTEST(XMMGPR_LO);
const bool lolivexmm = lolive && (upper || EEINST_XMMUSEDTEST(XMMGPR_LO));
if (reglo >= 0) const int xmmlo = lolivexmm ? _allocGPRtoXMMreg(XMMGPR_LO, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_LO, MODE_WRITE);
if (xmmlo >= 0)
{ {
u32* mem_ptr = recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res); xMOV64(rax, loval);
if (upper) xPINSR.Q(xRegisterSSE(xmmlo), rax, static_cast<u8>(upper));
xMOVH.PS(xRegisterSSE(reglo), ptr[mem_ptr]);
else
xMOVL.PS(xRegisterSSE(reglo), ptr[mem_ptr]);
} }
else else
{ {
xWriteImm64ToMem((u64*)loaddr, rax, (s64)(s32)(res & 0xffffffff)); const int gprlo = upper ? -1 : (lolive ? _allocX86reg(X86TYPE_GPR, XMMGPR_LO, MODE_WRITE) : _checkX86reg(X86TYPE_GPR, XMMGPR_LO, MODE_WRITE));
} if (gprlo >= 0)
} xImm64Op(xMOV, xRegister64(gprlo), rax, loval);
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
{
int reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE | MODE_READ);
if (reghi >= 0)
{
u32* mem_ptr = recGetImm64((res >> 63) ? -1 : 0, res >> 32);
if (upper)
xMOVH.PS(xRegisterSSE(reghi), ptr[mem_ptr]);
else else
xMOVL.PS(xRegisterSSE(reghi), ptr[mem_ptr]); xImm64Op(xMOV, ptr64[&cpuRegs.LO.UD[upper]], rax, loval);
}
}
if (EEINST_LIVETEST(XMMGPR_HI))
{
const bool hilive = EEINST_USEDTEST(XMMGPR_HI);
const bool hilivexmm = hilive && (upper || EEINST_XMMUSEDTEST(XMMGPR_HI));
const int xmmhi = hilivexmm ? _allocGPRtoXMMreg(XMMGPR_HI, MODE_READ | MODE_WRITE) : _checkXMMreg(XMMTYPE_GPRREG, XMMGPR_HI, MODE_WRITE);
if (xmmhi >= 0)
{
xMOV64(rax, hival);
xPINSR.Q(xRegisterSSE(xmmhi), rax, static_cast<u8>(upper));
} }
else else
{ {
_deleteEEreg(XMMGPR_HI, 0); const int gprhi = upper ? -1 : (hilive ? _allocX86reg(X86TYPE_GPR, XMMGPR_HI, MODE_WRITE) : _checkX86reg(X86TYPE_GPR, XMMGPR_HI, MODE_WRITE));
xWriteImm64ToMem((u64*)hiaddr, rax, (s64)res >> 32); if (gprhi >= 0)
xImm64Op(xMOV, xRegister64(gprhi), rax, hival);
else
xImm64Op(xMOV, ptr64[&cpuRegs.HI.UD[upper]], rax, hival);
} }
} }
if (!writed || !_Rd_) // writeback lo to Rd if present
return; if (writed && _Rd_ && EEINST_LIVETEST(_Rd_))
g_cpuConstRegs[_Rd_].SD[0] = (s32)(res & 0xffffffffULL); //that is the difference {
_eeOnWriteReg(_Rd_, 0);
const int regd = _checkX86reg(X86TYPE_GPR, _Rd_, MODE_WRITE);
if (regd >= 0)
xImm64Op(xMOV, xRegister64(regd), rax, loval);
else
xImm64Op(xMOV, ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax, loval);
}
} }
//// MULT //// MULT
void recMULT_const() static void recMULT_const()
{ {
s64 res = (s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0]; s64 res = (s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0];
recWritebackConstHILO(res, 1, 0); recWritebackConstHILO(res, 1, 0);
} }
void recMULTUsuper(int info, int upper, int process); static void recMULTsuper(int info, bool sign, bool upper, int process)
void recMULTsuper(int info, int upper, int process)
{ {
// TODO(Stenzek): Use MULX where available.
if (process & PROCESS_CONSTS) if (process & PROCESS_CONSTS)
{ {
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); if (info & PROCESS_EE_T)
sign ? xMUL(xRegister32(EEREC_T)) : xUMUL(xRegister32(EEREC_T));
else
sign ? xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]) : xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
} }
else if (process & PROCESS_CONSTT) else if (process & PROCESS_CONSTT)
{ {
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]); if (info & PROCESS_EE_S)
sign ? xMUL(xRegister32(EEREC_S)) : xUMUL(xRegister32(EEREC_S));
else
sign ? xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]) : xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
else else
{ {
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); // S is more likely to be in a register than T (so put T in eax).
xMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); if (info & PROCESS_EE_T)
xMOV(eax, xRegister32(EEREC_T));
else
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
if (info & PROCESS_EE_S)
sign ? xMUL(xRegister32(EEREC_S)) : xUMUL(xRegister32(EEREC_S));
else
sign ? xMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]) : xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
} }
recWritebackHILO(info, 1, upper); recWritebackHILO(info, 1, upper);
} }
void recMULT_(int info) static void recMULT_(int info)
{ {
recMULTsuper(info, 0, 0); recMULTsuper(info, true, false, 0);
} }
void recMULT_consts(int info) static void recMULT_consts(int info)
{ {
recMULTsuper(info, 0, PROCESS_CONSTS); recMULTsuper(info, true, false, PROCESS_CONSTS);
} }
void recMULT_constt(int info) static void recMULT_constt(int info)
{ {
recMULTsuper(info, 0, PROCESS_CONSTT); recMULTsuper(info, true, false, PROCESS_CONSTT);
} }
// don't set XMMINFO_WRITED|XMMINFO_WRITELO|XMMINFO_WRITEHI // lo/hi allocation are taken care of in recWritebackHILO().
EERECOMPILE_CODE0(MULT, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0)); EERECOMPILE_CODERC0(MULT, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
//// MULTU //// MULTU
void recMULTU_const() static void recMULTU_const()
{ {
u64 res = (u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0]; const u64 res = (u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0];
recWritebackConstHILO(res, 1, 0); recWritebackConstHILO(res, 1, 0);
} }
void recMULTUsuper(int info, int upper, int process) static void recMULTU_(int info)
{ {
if (process & PROCESS_CONSTS) recMULTsuper(info, false, false, 0);
{
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
}
else if (process & PROCESS_CONSTT)
{
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
}
else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xUMUL(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
}
recWritebackHILO(info, 1, upper);
} }
void recMULTU_(int info) static void recMULTU_consts(int info)
{ {
recMULTUsuper(info, 0, 0); recMULTsuper(info, false, false, PROCESS_CONSTS);
} }
void recMULTU_consts(int info) static void recMULTU_constt(int info)
{ {
recMULTUsuper(info, 0, PROCESS_CONSTS); recMULTsuper(info, false, false, PROCESS_CONSTT);
}
void recMULTU_constt(int info)
{
recMULTUsuper(info, 0, PROCESS_CONSTT);
} }
// don't specify XMMINFO_WRITELO or XMMINFO_WRITEHI, that is taken care of // don't specify XMMINFO_WRITELO or XMMINFO_WRITEHI, that is taken care of
EERECOMPILE_CODE0(MULTU, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0)); EERECOMPILE_CODERC0(MULTU, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recMULT1_const() static void recMULT1_const()
{ {
s64 res = (s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0]; s64 res = (s64)g_cpuConstRegs[_Rs_].SL[0] * (s64)g_cpuConstRegs[_Rt_].SL[0];
recWritebackConstHILO((u64)res, 1, 1); recWritebackConstHILO((u64)res, 1, 1);
} }
void recMULT1_(int info) static void recMULT1_(int info)
{ {
recMULTsuper(info, 1, 0); recMULTsuper(info, true, true, 0);
} }
void recMULT1_consts(int info) static void recMULT1_consts(int info)
{ {
recMULTsuper(info, 1, PROCESS_CONSTS); recMULTsuper(info, true, true, PROCESS_CONSTS);
} }
void recMULT1_constt(int info) static void recMULT1_constt(int info)
{ {
recMULTsuper(info, 1, PROCESS_CONSTT); recMULTsuper(info, true, true, PROCESS_CONSTT);
} }
EERECOMPILE_CODE0(MULT1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0)); EERECOMPILE_CODERC0(MULT1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recMULTU1_const() static void recMULTU1_const()
{ {
u64 res = (u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0]; u64 res = (u64)g_cpuConstRegs[_Rs_].UL[0] * (u64)g_cpuConstRegs[_Rt_].UL[0];
recWritebackConstHILO(res, 1, 1); recWritebackConstHILO(res, 1, 1);
} }
void recMULTU1_(int info) static void recMULTU1_(int info)
{ {
recMULTUsuper(info, 1, 0); recMULTsuper(info, false, true, 0);
} }
void recMULTU1_consts(int info) static void recMULTU1_consts(int info)
{ {
recMULTUsuper(info, 1, PROCESS_CONSTS); recMULTsuper(info, false, true, PROCESS_CONSTS);
} }
void recMULTU1_constt(int info) static void recMULTU1_constt(int info)
{ {
recMULTUsuper(info, 1, PROCESS_CONSTT); recMULTsuper(info, false, true, PROCESS_CONSTT);
} }
EERECOMPILE_CODE0(MULTU1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0)); EERECOMPILE_CODERC0(MULTU1, XMMINFO_READS | XMMINFO_READT | (_Rd_ ? XMMINFO_WRITED : 0));
//// DIV //// DIV
void recDIVconst(int upper) static void recDIVconst(int upper)
{ {
s32 quot, rem; s32 quot, rem;
if (g_cpuConstRegs[_Rs_].UL[0] == 0x80000000 && g_cpuConstRegs[_Rt_].SL[0] == -1) if (g_cpuConstRegs[_Rs_].UL[0] == 0x80000000 && g_cpuConstRegs[_Rt_].SL[0] == -1)
@ -348,29 +357,36 @@ void recDIVconst(int upper)
recWritebackConstHILO((u64)quot | ((u64)rem << 32), 0, upper); recWritebackConstHILO((u64)quot | ((u64)rem << 32), 0, upper);
} }
void recDIV_const() static void recDIV_const()
{ {
recDIVconst(0); recDIVconst(0);
} }
void recDIVsuper(int info, int sign, int upper, int process) static void recDIVsuper(int info, bool sign, bool upper, int process)
{ {
const xRegister32 divisor((info & PROCESS_EE_T) ? EEREC_T : ecx.GetId());
if (!(info & PROCESS_EE_T))
{
if (process & PROCESS_CONSTT) if (process & PROCESS_CONSTT)
xMOV(ecx, g_cpuConstRegs[_Rt_].UL[0]); xMOV(divisor, g_cpuConstRegs[_Rt_].UL[0]);
else else
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); xMOV(divisor, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
}
// can't use edx, it's part of the dividend
pxAssert(divisor.GetId() != edx.GetId());
if (process & PROCESS_CONSTS) if (process & PROCESS_CONSTS)
xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]); xMOV(eax, g_cpuConstRegs[_Rs_].UL[0]);
else else
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); _eeMoveGPRtoR(rax, _Rs_);
u8* end1; u8* end1;
if (sign) //test for overflow (x86 will just throw an exception) if (sign) //test for overflow (x86 will just throw an exception)
{ {
xCMP(eax, 0x80000000); xCMP(eax, 0x80000000);
u8* cont1 = JNE8(0); u8* cont1 = JNE8(0);
xCMP(ecx, 0xffffffff); xCMP(divisor, 0xffffffff);
u8* cont2 = JNE8(0); u8* cont2 = JNE8(0);
//overflow case: //overflow case:
xXOR(edx, edx); //EAX remains 0x80000000 xXOR(edx, edx); //EAX remains 0x80000000
@ -380,7 +396,7 @@ void recDIVsuper(int info, int sign, int upper, int process)
x86SetJ8(cont2); x86SetJ8(cont2);
} }
xCMP(ecx, 0); xCMP(divisor, 0);
u8* cont3 = JNE8(0); u8* cont3 = JNE8(0);
//divide by zero //divide by zero
xMOV(edx, eax); xMOV(edx, eax);
@ -398,12 +414,12 @@ void recDIVsuper(int info, int sign, int upper, int process)
if (sign) if (sign)
{ {
xCDQ(); xCDQ();
xDIV(ecx); xDIV(divisor);
} }
else else
{ {
xXOR(edx, edx); xXOR(edx, edx);
xUDIV(ecx); xUDIV(divisor);
} }
if (sign) if (sign)
@ -411,28 +427,29 @@ void recDIVsuper(int info, int sign, int upper, int process)
x86SetJ8(end2); x86SetJ8(end2);
// need to execute regardless of bad divide // need to execute regardless of bad divide
recWritebackHILO(info, 0, upper); recWritebackHILO(info, false, upper);
} }
void recDIV_(int info) static void recDIV_(int info)
{ {
recDIVsuper(info, 1, 0, 0); recDIVsuper(info, 1, 0, 0);
} }
void recDIV_consts(int info) static void recDIV_consts(int info)
{ {
recDIVsuper(info, 1, 0, PROCESS_CONSTS); recDIVsuper(info, 1, 0, PROCESS_CONSTS);
} }
void recDIV_constt(int info) static void recDIV_constt(int info)
{ {
recDIVsuper(info, 1, 0, PROCESS_CONSTT); recDIVsuper(info, 1, 0, PROCESS_CONSTT);
} }
EERECOMPILE_CODE0(DIV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI); // We handle S reading in the routine itself, since it needs to go into eax.
EERECOMPILE_CODERC0(DIV, /*XMMINFO_READS |*/ XMMINFO_READT);
//// DIVU //// DIVU
void recDIVUconst(int upper) static void recDIVUconst(int upper)
{ {
u32 quot, rem; u32 quot, rem;
if (g_cpuConstRegs[_Rt_].UL[0] != 0) if (g_cpuConstRegs[_Rt_].UL[0] != 0)
@ -449,71 +466,73 @@ void recDIVUconst(int upper)
recWritebackConstHILO((u64)quot | ((u64)rem << 32), 0, upper); recWritebackConstHILO((u64)quot | ((u64)rem << 32), 0, upper);
} }
void recDIVU_const() static void recDIVU_const()
{ {
recDIVUconst(0); recDIVUconst(0);
} }
void recDIVU_(int info) static void recDIVU_(int info)
{ {
recDIVsuper(info, 0, 0, 0); recDIVsuper(info, false, false, 0);
} }
void recDIVU_consts(int info) static void recDIVU_consts(int info)
{ {
recDIVsuper(info, 0, 0, PROCESS_CONSTS); recDIVsuper(info, false, false, PROCESS_CONSTS);
} }
void recDIVU_constt(int info) static void recDIVU_constt(int info)
{ {
recDIVsuper(info, 0, 0, PROCESS_CONSTT); recDIVsuper(info, false, false, PROCESS_CONSTT);
} }
EERECOMPILE_CODE0(DIVU, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITELO | XMMINFO_WRITEHI); EERECOMPILE_CODERC0(DIVU, /*XMMINFO_READS |*/ XMMINFO_READT);
void recDIV1_const() static void recDIV1_const()
{ {
recDIVconst(1); recDIVconst(1);
} }
void recDIV1_(int info) static void recDIV1_(int info)
{ {
recDIVsuper(info, 1, 1, 0); recDIVsuper(info, true, true, 0);
} }
void recDIV1_consts(int info) static void recDIV1_consts(int info)
{ {
recDIVsuper(info, 1, 1, PROCESS_CONSTS); recDIVsuper(info, true, true, PROCESS_CONSTS);
} }
void recDIV1_constt(int info) static void recDIV1_constt(int info)
{ {
recDIVsuper(info, 1, 1, PROCESS_CONSTT); recDIVsuper(info, true, true, PROCESS_CONSTT);
} }
EERECOMPILE_CODE0(DIV1, XMMINFO_READS | XMMINFO_READT); EERECOMPILE_CODERC0(DIV1, /*XMMINFO_READS |*/ XMMINFO_READT);
void recDIVU1_const() static void recDIVU1_const()
{ {
recDIVUconst(1); recDIVUconst(1);
} }
void recDIVU1_(int info) static void recDIVU1_(int info)
{ {
recDIVsuper(info, 0, 1, 0); recDIVsuper(info, false, true, 0);
} }
void recDIVU1_consts(int info) static void recDIVU1_consts(int info)
{ {
recDIVsuper(info, 0, 1, PROCESS_CONSTS); recDIVsuper(info, false, true, PROCESS_CONSTS);
} }
void recDIVU1_constt(int info) static void recDIVU1_constt(int info)
{ {
recDIVsuper(info, 0, 1, PROCESS_CONSTT); recDIVsuper(info, false, true, PROCESS_CONSTT);
} }
EERECOMPILE_CODE0(DIVU1, XMMINFO_READS | XMMINFO_READT); EERECOMPILE_CODERC0(DIVU1, /*XMMINFO_READS |*/ XMMINFO_READT);
// TODO(Stenzek): All of these :(
static void writeBackMAddToHiLoRd(int hiloID) static void writeBackMAddToHiLoRd(int hiloID)
{ {
@ -564,8 +583,10 @@ void recMADD()
_deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1); _deleteEEreg(XMMGPR_HI, 1);
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoX86reg(_Rs_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoX86reg(_Rt_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -597,8 +618,10 @@ void recMADDU()
_deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1); _deleteEEreg(XMMGPR_HI, 1);
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoX86reg(_Rs_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoX86reg(_Rt_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -630,8 +653,10 @@ void recMADD1()
_deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1); _deleteEEreg(XMMGPR_HI, 1);
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoX86reg(_Rs_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoX86reg(_Rt_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -663,8 +688,10 @@ void recMADDU1()
_deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1); _deleteEEreg(XMMGPR_HI, 1);
_deleteGPRtoXMMreg(_Rs_, 1); _deleteGPRtoX86reg(_Rs_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rt_, 1); _deleteGPRtoX86reg(_Rt_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rs_, DELETE_REG_FLUSH);
_deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -688,6 +715,4 @@ void recMADDU1()
#endif #endif
} // namespace OpcodeImpl } // namespace R5900::Dynarec::OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -22,9 +22,8 @@
using namespace x86Emitter; using namespace x86Emitter;
namespace R5900 { namespace R5900::Dynarec::OpcodeImpl
namespace Dynarec { {
namespace OpcodeImpl {
/********************************************************* /*********************************************************
* Shift arithmetic with constant shift * * Shift arithmetic with constant shift *
@ -53,412 +52,387 @@ REC_FUNC_DEL(DSRAV, _Rd_);
#else #else
static void recMoveTtoD(int info)
{
if (info & PROCESS_EE_T)
xMOV(xRegister32(EEREC_D), xRegister32(EEREC_T));
else
xMOV(xRegister32(EEREC_D), ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
}
static void recMoveTtoD64(int info)
{
if (info & PROCESS_EE_T)
xMOV(xRegister64(EEREC_D), xRegister64(EEREC_T));
else
xMOV(xRegister64(EEREC_D), ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
}
static void recMoveSToRCX(int info)
{
// load full 64-bits for store->load forwarding, since we always store >=64.
if (info & PROCESS_EE_S)
xMOV(rcx, xRegister64(EEREC_S));
else
xMOV(rcx, ptr64[&cpuRegs.GPR.r[_Rs_].UL[0]]);
}
//// SLL //// SLL
void recSLL_const() static void recSLL_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] << _Sa_); g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] << _Sa_);
} }
void recSLLs_(int info, int sa) static void recSLLs_(int info, int sa)
{ {
// TODO: Use BMI
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); recMoveTtoD(info);
if (sa != 0) if (sa != 0)
{ xSHL(xRegister32(EEREC_D), sa);
xSHL(eax, sa); xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
}
eeSignExtendTo(_Rd_);
} }
void recSLL_(int info) static void recSLL_(int info)
{ {
recSLLs_(info, _Sa_); recSLLs_(info, _Sa_);
} }
EERECOMPILE_CODEX(eeRecompileCode2, SLL); EERECOMPILE_CODEX(eeRecompileCodeRC2, SLL, XMMINFO_WRITED | XMMINFO_READT);
//// SRL //// SRL
void recSRL_const() static void recSRL_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] >> _Sa_); g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] >> _Sa_);
} }
void recSRLs_(int info, int sa) static void recSRLs_(int info, int sa)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); recMoveTtoD(info);
if (sa != 0) if (sa != 0)
xSHR(eax, sa); xSHR(xRegister32(EEREC_D), sa);
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
eeSignExtendTo(_Rd_);
} }
void recSRL_(int info) static void recSRL_(int info)
{ {
recSRLs_(info, _Sa_); recSRLs_(info, _Sa_);
} }
EERECOMPILE_CODEX(eeRecompileCode2, SRL); EERECOMPILE_CODEX(eeRecompileCodeRC2, SRL, XMMINFO_WRITED | XMMINFO_READT);
//// SRA //// SRA
void recSRA_const() static void recSRA_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].SL[0] >> _Sa_); g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].SL[0] >> _Sa_);
} }
void recSRAs_(int info, int sa) static void recSRAs_(int info, int sa)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); recMoveTtoD(info);
if (sa != 0) if (sa != 0)
xSAR(eax, sa); xSAR(xRegister32(EEREC_D), sa);
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
eeSignExtendTo(_Rd_);
} }
void recSRA_(int info) static void recSRA_(int info)
{ {
recSRAs_(info, _Sa_); recSRAs_(info, _Sa_);
} }
EERECOMPILE_CODEX(eeRecompileCode2, SRA); EERECOMPILE_CODEX(eeRecompileCodeRC2, SRA, XMMINFO_WRITED | XMMINFO_READT);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recDSLL_const() static void recDSLL_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << _Sa_); g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << _Sa_);
} }
void recDSLLs_(int info, int sa) static void recDSLLs_(int info, int sa)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); recMoveTtoD64(info);
if (sa != 0) if (sa != 0)
xSHL(rax, sa); xSHL(xRegister64(EEREC_D), sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
void recDSLL_(int info) static void recDSLL_(int info)
{ {
recDSLLs_(info, _Sa_); recDSLLs_(info, _Sa_);
} }
EERECOMPILE_CODEX(eeRecompileCode2, DSLL); EERECOMPILE_CODEX(eeRecompileCodeRC2, DSLL, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recDSRL_const() static void recDSRL_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> _Sa_); g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> _Sa_);
} }
void recDSRLs_(int info, int sa) static void recDSRLs_(int info, int sa)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); recMoveTtoD64(info);
if (sa != 0) if (sa != 0)
xSHR(rax, sa); xSHR(xRegister64(EEREC_D), sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
void recDSRL_(int info) static void recDSRL_(int info)
{ {
recDSRLs_(info, _Sa_); recDSRLs_(info, _Sa_);
} }
EERECOMPILE_CODEX(eeRecompileCode2, DSRL); EERECOMPILE_CODEX(eeRecompileCodeRC2, DSRL, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
//// DSRA //// DSRA
void recDSRA_const() static void recDSRA_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (u64)(g_cpuConstRegs[_Rt_].SD[0] >> _Sa_); g_cpuConstRegs[_Rd_].SD[0] = (u64)(g_cpuConstRegs[_Rt_].SD[0] >> _Sa_);
} }
void recDSRAs_(int info, int sa) static void recDSRAs_(int info, int sa)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); pxAssert(!(info & PROCESS_EE_XMM));
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); recMoveTtoD64(info);
if (sa != 0) if (sa != 0)
xSAR(rax, sa); xSAR(xRegister64(EEREC_D), sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
void recDSRA_(int info) static void recDSRA_(int info)
{ {
recDSRAs_(info, _Sa_); recDSRAs_(info, _Sa_);
} }
EERECOMPILE_CODEX(eeRecompileCode2, DSRA); EERECOMPILE_CODEX(eeRecompileCodeRC2, DSRA, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
///// DSLL32 ///// DSLL32
void recDSLL32_const() static void recDSLL32_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (_Sa_ + 32)); g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (_Sa_ + 32));
} }
void recDSLL32s_(int info, int sa) static void recDSLL32_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); recDSLLs_(info, _Sa_ + 32);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
xSHL(rax, sa + 32);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
void recDSLL32_(int info) EERECOMPILE_CODEX(eeRecompileCodeRC2, DSLL32, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
{
recDSLL32s_(info, _Sa_);
}
EERECOMPILE_CODEX(eeRecompileCode2, DSLL32);
//// DSRL32 //// DSRL32
void recDSRL32_const() static void recDSRL32_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (_Sa_ + 32)); g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (_Sa_ + 32));
} }
void recDSRL32s_(int info, int sa) static void recDSRL32_(int info)
{ {
pxAssert(!(info & PROCESS_EE_XMM)); recDSRLs_(info, _Sa_ + 32);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
if (sa != 0)
xSHR(eax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
void recDSRL32_(int info) EERECOMPILE_CODEX(eeRecompileCodeRC2, DSRL32, XMMINFO_WRITED | XMMINFO_READT);
{
recDSRL32s_(info, _Sa_);
}
EERECOMPILE_CODEX(eeRecompileCode2, DSRL32);
//// DSRA32 //// DSRA32
void recDSRA32_const() static void recDSRA32_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (u64)(g_cpuConstRegs[_Rt_].SD[0] >> (_Sa_ + 32)); g_cpuConstRegs[_Rd_].SD[0] = (u64)(g_cpuConstRegs[_Rt_].SD[0] >> (_Sa_ + 32));
} }
void recDSRA32s_(int info, int sa) static void recDSRA32_(int info)
{ {
recDSRAs_(info, sa + 32); recDSRAs_(info, _Sa_ + 32);
} }
void recDSRA32_(int info) EERECOMPILE_CODEX(eeRecompileCodeRC2, DSRA32, XMMINFO_WRITED | XMMINFO_READT | XMMINFO_64BITOP);
{
recDSRA32s_(info, _Sa_);
}
EERECOMPILE_CODEX(eeRecompileCode2, DSRA32);
/********************************************************* /*********************************************************
* Shift arithmetic with variant register shift * * Shift arithmetic with variant register shift *
* Format: OP rd, rt, rs * * Format: OP rd, rt, rs *
*********************************************************/ *********************************************************/
static void recShiftV_constt(const xImpl_Group2& shift) static void recShiftV_constt(int info, const xImpl_Group2& shift)
{ {
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); pxAssert(_Rs_ != 0);
recMoveSToRCX(info);
xMOV(eax, g_cpuConstRegs[_Rt_].UL[0]); xMOV(xRegister32(EEREC_D), g_cpuConstRegs[_Rt_].UL[0]);
shift(eax, cl); shift(xRegister32(EEREC_D), cl);
xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
eeSignExtendTo(_Rd_);
} }
static void recShiftV(const xImpl_Group2& shift) static void recShiftV(int info, const xImpl_Group2& shift)
{ {
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); pxAssert(_Rs_ != 0);
if (_Rs_ != 0)
{ recMoveSToRCX(info);
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); recMoveTtoD(info);
shift(eax, cl); shift(xRegister32(EEREC_D), cl);
} xMOVSX(xRegister64(EEREC_D), xRegister32(EEREC_D));
eeSignExtendTo(_Rd_);
} }
static void recDShiftV_constt(const xImpl_Group2& shift) static void recDShiftV_constt(int info, const xImpl_Group2& shift)
{ {
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); pxAssert(_Rs_ != 0);
recMoveSToRCX(info);
xMOV64(rax, g_cpuConstRegs[_Rt_].UD[0]); xMOV64(xRegister64(EEREC_D), g_cpuConstRegs[_Rt_].SD[0]);
shift(rax, cl); shift(xRegister64(EEREC_D), cl);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
static void recDShiftV(const xImpl_Group2& shift) static void recDShiftV(int info, const xImpl_Group2& shift)
{ {
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); pxAssert(_Rs_ != 0);
if (_Rs_ != 0) recMoveSToRCX(info);
{ recMoveTtoD64(info);
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); shift(xRegister64(EEREC_D), cl);
shift(rax, cl);
}
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
} }
//// SLLV //// SLLV
void recSLLV_const() static void recSLLV_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] << (g_cpuConstRegs[_Rs_].UL[0] & 0x1f)); g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] << (g_cpuConstRegs[_Rs_].UL[0] & 0x1f));
} }
void recSLLV_consts(int info) static void recSLLV_consts(int info)
{ {
recSLLs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f); recSLLs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f);
} }
void recSLLV_constt(int info) static void recSLLV_constt(int info)
{ {
recShiftV_constt(xSHL); recShiftV_constt(info, xSHL);
} }
void recSLLV_(int info) static void recSLLV_(int info)
{ {
recShiftV(xSHL); recShiftV(info, xSHL);
} }
EERECOMPILE_CODE0(SLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(SLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
//// SRLV //// SRLV
void recSRLV_const() static void recSRLV_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x1f)); g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].UL[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x1f));
} }
void recSRLV_consts(int info) static void recSRLV_consts(int info)
{ {
recSRLs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f); recSRLs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f);
} }
void recSRLV_constt(int info) static void recSRLV_constt(int info)
{ {
recShiftV_constt(xSHR); recShiftV_constt(info, xSHR);
} }
void recSRLV_(int info) static void recSRLV_(int info)
{ {
recShiftV(xSHR); recShiftV(info, xSHR);
} }
EERECOMPILE_CODE0(SRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(SRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
//// SRAV //// SRAV
void recSRAV_const() static void recSRAV_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].SL[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x1f)); g_cpuConstRegs[_Rd_].SD[0] = (s32)(g_cpuConstRegs[_Rt_].SL[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x1f));
} }
void recSRAV_consts(int info) static void recSRAV_consts(int info)
{ {
recSRAs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f); recSRAs_(info, g_cpuConstRegs[_Rs_].UL[0] & 0x1f);
} }
void recSRAV_constt(int info) static void recSRAV_constt(int info)
{ {
recShiftV_constt(xSAR); recShiftV_constt(info, xSAR);
} }
void recSRAV_(int info) static void recSRAV_(int info)
{ {
recShiftV(xSAR); recShiftV(info, xSAR);
} }
EERECOMPILE_CODE0(SRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(SRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
//// DSLLV //// DSLLV
void recDSLLV_const() static void recDSLLV_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (g_cpuConstRegs[_Rs_].UL[0] & 0x3f)); g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] << (g_cpuConstRegs[_Rs_].UL[0] & 0x3f));
} }
void recDSLLV_consts(int info) static void recDSLLV_consts(int info)
{ {
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
if (sa < 32)
recDSLLs_(info, sa); recDSLLs_(info, sa);
else
recDSLL32s_(info, sa - 32);
} }
void recDSLLV_constt(int info) static void recDSLLV_constt(int info)
{ {
recDShiftV_constt(xSHL); recDShiftV_constt(info, xSHL);
} }
void recDSLLV_(int info) static void recDSLLV_(int info)
{ {
recDShiftV(xSHL); recDShiftV(info, xSHL);
} }
EERECOMPILE_CODE0(DSLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(DSLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
//// DSRLV //// DSRLV
void recDSRLV_const() static void recDSRLV_const()
{ {
g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x3f)); g_cpuConstRegs[_Rd_].UD[0] = (u64)(g_cpuConstRegs[_Rt_].UD[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x3f));
} }
void recDSRLV_consts(int info) static void recDSRLV_consts(int info)
{ {
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
if (sa < 32)
recDSRLs_(info, sa); recDSRLs_(info, sa);
else
recDSRL32s_(info, sa - 32);
} }
void recDSRLV_constt(int info) static void recDSRLV_constt(int info)
{ {
recDShiftV_constt(xSHR); recDShiftV_constt(info, xSHR);
} }
void recDSRLV_(int info) static void recDSRLV_(int info)
{ {
recDShiftV(xSHR); recDShiftV(info, xSHR);
} }
EERECOMPILE_CODE0(DSRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(DSRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
//// DSRAV //// DSRAV
void recDSRAV_const() static void recDSRAV_const()
{ {
g_cpuConstRegs[_Rd_].SD[0] = (s64)(g_cpuConstRegs[_Rt_].SD[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x3f)); g_cpuConstRegs[_Rd_].SD[0] = (s64)(g_cpuConstRegs[_Rt_].SD[0] >> (g_cpuConstRegs[_Rs_].UL[0] & 0x3f));
} }
void recDSRAV_consts(int info) static void recDSRAV_consts(int info)
{ {
int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f; int sa = g_cpuConstRegs[_Rs_].UL[0] & 0x3f;
if (sa < 32)
recDSRAs_(info, sa); recDSRAs_(info, sa);
else
recDSRA32s_(info, sa - 32);
} }
void recDSRAV_constt(int info) static void recDSRAV_constt(int info)
{ {
recDShiftV_constt(xSAR); recDShiftV_constt(info, xSAR);
} }
void recDSRAV_(int info) static void recDSRAV_(int info)
{ {
recDShiftV(xSAR); recDShiftV(info, xSAR);
} }
EERECOMPILE_CODE0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); EERECOMPILE_CODERC0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED | XMMINFO_64BITOP);
#endif #endif
} // namespace OpcodeImpl } // namespace R5900::Dynarec::OpcodeImpl
} // namespace Dynarec
} // namespace R5900

View File

@ -47,527 +47,240 @@ void _deleteEEreg(int reg, int flush)
_flushConstReg(reg); _flushConstReg(reg);
} }
GPR_DEL_CONST(reg); GPR_DEL_CONST(reg);
_deleteGPRtoXMMreg(reg, flush ? 0 : 2); _deleteGPRtoXMMreg(reg, flush ? DELETE_REG_FREE : DELETE_REG_FLUSH_AND_FREE);
_deleteGPRtoX86reg(reg, flush ? DELETE_REG_FREE : DELETE_REG_FLUSH_AND_FREE);
}
void _deleteEEreg128(int reg)
{
if (!reg)
return;
GPR_DEL_CONST(reg);
_deleteGPRtoXMMreg(reg, DELETE_REG_FREE_NO_WRITEBACK);
_deleteGPRtoX86reg(reg, DELETE_REG_FREE_NO_WRITEBACK);
} }
void _flushEEreg(int reg, bool clear) void _flushEEreg(int reg, bool clear)
{ {
if (!reg) if (!reg)
return; return;
if (GPR_IS_CONST1(reg))
{ if (GPR_IS_DIRTY_CONST(reg))
_flushConstReg(reg); _flushConstReg(reg);
return; if (clear)
} GPR_DEL_CONST(reg);
_deleteGPRtoXMMreg(reg, clear ? 2 : 1);
_deleteGPRtoXMMreg(reg, clear ? DELETE_REG_FLUSH_AND_FREE : DELETE_REG_FLUSH);
_deleteGPRtoX86reg(reg, clear ? DELETE_REG_FLUSH_AND_FREE : DELETE_REG_FLUSH);
} }
int eeProcessHILO(int reg, int mode, int mmx) int _eeTryRenameReg(int to, int from, int fromx86, int other, int xmminfo)
{ {
if (_hasFreeXMMreg() || !(g_pCurInstInfo->regs[reg] & EEINST_LASTUSE)) // can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt
{ if ((xmminfo & XMMINFO_NORENAME) || fromx86 < 0 || to == from || to == other || !EEINST_RENAMETEST(from))
return _allocGPRtoXMMreg(-1, reg, mode);
}
return -1; return -1;
RALOG("Renaming %s to %s\n", R3000A::disRNameGPR[from], R3000A::disRNameGPR[to]);
// flush back when it's been modified
if (x86regs[fromx86].mode & MODE_WRITE && EEINST_LIVETEST(from))
_writebackX86Reg(fromx86);
// remove all references to renamed-to register
_deleteGPRtoX86reg(to, DELETE_REG_FREE_NO_WRITEBACK);
_deleteGPRtoXMMreg(to, DELETE_REG_FLUSH_AND_FREE);
GPR_DEL_CONST(to);
// and do the actual rename, new register has been modified.
x86regs[fromx86].reg = to;
x86regs[fromx86].mode |= MODE_READ | MODE_WRITE;
return fromx86;
} }
// Strangely this code is used on NOT-MMX path ...
#define PROCESS_EE_SETMODES(mmreg) (/*(mmxregs[mmreg].mode&MODE_WRITE)*/ false ? PROCESS_EE_MODEWRITES : 0)
#define PROCESS_EE_SETMODET(mmreg) (/*(mmxregs[mmreg].mode&MODE_WRITE)*/ false ? PROCESS_EE_MODEWRITET : 0)
// ignores XMMINFO_READS, XMMINFO_READT, and XMMINFO_READD_LO from xmminfo static bool FitsInImmediate(int reg, int fprinfo)
// core of reg caching {
void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo) if (fprinfo & XMMINFO_64BITOP)
return (s32)g_cpuConstRegs[reg].SD[0] == g_cpuConstRegs[reg].SD[0];
else
return true; // all 32bit ops fit
}
void eeRecompileCodeRC0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo)
{ {
if (!_Rd_ && (xmminfo & XMMINFO_WRITED)) if (!_Rd_ && (xmminfo & XMMINFO_WRITED))
return; return;
if (GPR_IS_CONST2(_Rs_, _Rt_)) if (GPR_IS_CONST2(_Rs_, _Rt_))
{ {
if (xmminfo & XMMINFO_WRITED) if (_Rd_ && (xmminfo & XMMINFO_WRITED))
{ {
_deleteGPRtoXMMreg(_Rd_, 2); _deleteGPRtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
} _deleteGPRtoXMMreg(_Rd_, DELETE_REG_FLUSH_AND_FREE);
if (xmminfo & XMMINFO_WRITED)
GPR_SET_CONST(_Rd_); GPR_SET_CONST(_Rd_);
}
constcode(); constcode();
return; return;
} }
const int moded = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0); const int moded = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
// test if should write xmm, mirror to mmx code // this function should not be used for lo/hi.
if (g_pCurInstInfo->info & EEINST_XMM) pxAssert(!(xmminfo & (XMMINFO_READLO | XMMINFO_READHI | XMMINFO_WRITELO | XMMINFO_WRITEHI)));
{
int mmreg1, mmreg3, mmtemp;
pxAssert(0);
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO)) // we have to put these up here, because the register allocator below will wipe out const flags
_addNeededGPRtoXMMreg(XMMGPR_LO); // for the destination register when/if it switches it to write mode.
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI)) const bool s_is_const = GPR_IS_CONST1(_Rs_);
_addNeededGPRtoXMMreg(XMMGPR_HI); const bool t_is_const = GPR_IS_CONST1(_Rt_);
_addNeededGPRtoXMMreg(_Rs_); const bool d_is_const = GPR_IS_CONST1(_Rd_);
_addNeededGPRtoXMMreg(_Rt_); const bool s_is_used = EEINST_USEDTEST(_Rs_);
const bool t_is_used = EEINST_USEDTEST(_Rt_);
if (GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_)) const bool s_in_xmm = _hasXMMreg(XMMTYPE_GPRREG, _Rs_);
{ const bool t_in_xmm = _hasXMMreg(XMMTYPE_GPRREG, _Rt_);
u32 creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_;
int vreg = creg == _Rs_ ? _Rt_ : _Rs_;
// if (g_pCurInstInfo->regs[vreg] & EEINST_XMM)
// {
// mmreg1 = _allocGPRtoXMMreg(-1, vreg, MODE_READ);
// _addNeededGPRtoXMMreg(vreg);
// }
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, vreg, MODE_READ);
if (mmreg1 >= 0)
{
int info = PROCESS_EE_XMM;
if (GPR_IS_CONST1(_Rs_))
info |= PROCESS_EE_SETMODET(mmreg1);
else
info |= PROCESS_EE_SETMODES(mmreg1);
if (xmminfo & XMMINFO_WRITED)
{
_addNeededGPRtoXMMreg(_Rd_);
mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if (!(xmminfo & XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg)))
{
_freeXMMreg(mmreg1);
if (GPR_IS_CONST1(_Rs_))
info &= ~PROCESS_EE_MODEWRITET;
else
info &= ~PROCESS_EE_MODEWRITES;
xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rd_;
xmmregs[mmreg1].mode = moded;
mmreg3 = mmreg1;
}
else if (mmreg3 < 0)
mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded);
info |= PROCESS_EE_SET_D(mmreg3);
}
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
{
mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
if (mmtemp >= 0)
info |= PROCESS_EE_SET_LO(mmtemp);
}
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
{
mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
if (mmtemp >= 0)
info |= PROCESS_EE_SET_HI(mmtemp);
}
if (creg == _Rs_)
constscode(info | PROCESS_EE_SET_T(mmreg1));
else
consttcode(info | PROCESS_EE_SET_S(mmreg1));
_clearNeededXMMregs();
if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
return;
}
}
else
{
// no const regs
mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ);
int mmreg2 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ);
if (mmreg1 >= 0 || mmreg2 >= 0)
{
int info = PROCESS_EE_XMM;
// do it all in xmm
if (mmreg1 < 0)
mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ);
if (mmreg2 < 0)
mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
info |= PROCESS_EE_SETMODES(mmreg1) | PROCESS_EE_SETMODET(mmreg2);
if (xmminfo & XMMINFO_WRITED)
{
// check for last used, if so don't alloc a new XMM reg
_addNeededGPRtoXMMreg(_Rd_);
mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, moded);
if (mmreg3 < 0)
{
if (!(xmminfo & XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)))
{
_freeXMMreg(mmreg2);
info &= ~PROCESS_EE_MODEWRITET;
xmmregs[mmreg2].inuse = 1;
xmmregs[mmreg2].reg = _Rd_;
xmmregs[mmreg2].mode = moded;
mmreg3 = mmreg2;
}
else if (!(xmminfo & XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)))
{
_freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITES;
xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rd_;
xmmregs[mmreg1].mode = moded;
mmreg3 = mmreg1;
}
else
mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded);
}
info |= PROCESS_EE_SET_D(mmreg3);
}
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
{
mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
if (mmtemp >= 0)
info |= PROCESS_EE_SET_LO(mmtemp);
}
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
{
mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0), 0);
if (mmtemp >= 0)
info |= PROCESS_EE_SET_HI(mmtemp);
}
noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2));
_clearNeededXMMregs();
if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
return;
}
}
_clearNeededXMMregs();
}
// regular x86 // regular x86
_deleteGPRtoXMMreg(_Rs_, 1); if ((xmminfo & XMMINFO_READS) && !s_is_const)
_deleteGPRtoXMMreg(_Rt_, 1); _addNeededGPRtoX86reg(_Rs_);
if (xmminfo & XMMINFO_WRITED) if ((xmminfo & XMMINFO_READT) && !t_is_const)
_deleteGPRtoXMMreg(_Rd_, (xmminfo & XMMINFO_READD) ? 0 : 2); _addNeededGPRtoX86reg(_Rt_);
if ((xmminfo & XMMINFO_READD) && !d_is_const)
_addNeededGPRtoX86reg(_Rd_);
// don't delete, fn will take care of them // when it doesn't fit in an immediate, we'll flush it to a reg early to save code
// if (xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO)) u32 info = 0;
// { int regs = -1, regt = -1, regd = -1;
// _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo & XMMINFO_READLO) ? 1 : 0); if (xmminfo & XMMINFO_READS)
// }
// if (xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI))
// {
// _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo & XMMINFO_READHI) ? 1 : 0);
// }
if (GPR_IS_CONST1(_Rs_))
{ {
constscode(0); regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
if (regs < 0 && (!s_is_const || !FitsInImmediate(_Rs_, xmminfo)) && (s_is_used || s_in_xmm || ((xmminfo & XMMINFO_WRITED) && _Rd_ == _Rs_) || (xmminfo & XMMINFO_FORCEREGS)))
{
regs = _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
}
if (regs >= 0)
info |= PROCESS_EE_SET_S(regs);
}
if (xmminfo & XMMINFO_READT)
{
regt = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
if (regt < 0 && (!t_is_const || !FitsInImmediate(_Rt_, xmminfo)) && (t_is_used || t_in_xmm || ((xmminfo & XMMINFO_WRITED) && _Rd_ == _Rt_) || (xmminfo & XMMINFO_FORCEREGT)))
{
regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
}
if (regt >= 0)
info |= PROCESS_EE_SET_T(regt);
}
if (xmminfo & (XMMINFO_WRITED | XMMINFO_READD))
{
// _eeTryRenameReg() sets READ | WRITE already, so this is only needed when allocating.
const int moded = ((xmminfo & XMMINFO_WRITED) ? MODE_WRITE : 0) | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
// If S is no longer live, swap D for S. Saves the move.
int regd = (_Rd_ && xmminfo & XMMINFO_WRITED) ? _eeTryRenameReg(_Rd_, (xmminfo & XMMINFO_READS) ? _Rs_ : 0, regs, (xmminfo & XMMINFO_READT) ? _Rt_ : 0, xmminfo) : 0;
if (regd < 0)
regd = _allocX86reg(X86TYPE_GPR, _Rd_, moded);
pxAssert(regd >= 0);
info |= PROCESS_EE_SET_D(regd);
}
if (xmminfo & XMMINFO_WRITED) if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_); GPR_DEL_CONST(_Rd_);
_validateRegs();
if (s_is_const && regs < 0)
{
constscode(info /*| PROCESS_CONSTS*/);
return; return;
} }
if (GPR_IS_CONST1(_Rt_)) if (t_is_const && regt < 0)
{ {
consttcode(0); consttcode(info /*| PROCESS_CONSTT*/);
if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
return; return;
} }
noconstcode(0); noconstcode(info);
if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
} }
// rt = rs op imm16 void eeRecompileCodeRC1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode, int xmminfo)
void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{ {
pxAssert((xmminfo & (XMMINFO_READS | XMMINFO_WRITET)) == (XMMINFO_READS | XMMINFO_WRITET));
if (!_Rt_) if (!_Rt_)
return; return;
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
_deleteGPRtoXMMreg(_Rt_, 2); _deleteGPRtoXMMreg(_Rt_, DELETE_REG_FLUSH_AND_FREE);
_deleteGPRtoX86reg(_Rt_, DELETE_REG_FREE_NO_WRITEBACK);
GPR_SET_CONST(_Rt_); GPR_SET_CONST(_Rt_);
constcode(); constcode();
return; return;
} }
// test if should write xmm, mirror to mmx code const bool s_is_used = EEINST_USEDTEST(_Rs_);
if (g_pCurInstInfo->info & EEINST_XMM) const bool s_in_xmm = _hasXMMreg(XMMTYPE_GPRREG, _Rs_);
{
pxAssert(0);
// no const regs u32 info = 0;
const int mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); int regs = _checkX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
if (regs < 0 && (s_is_used || s_in_xmm || _Rt_ == _Rs_ || (xmminfo & XMMINFO_FORCEREGS)))
regs = _allocX86reg(X86TYPE_GPR, _Rs_, MODE_READ);
if (regs >= 0)
info |= PROCESS_EE_SET_S(regs);
if (mmreg1 >= 0) // If S is no longer live, swap D for S. Saves the move.
{ int regt = _eeTryRenameReg(_Rt_, _Rs_, regs, 0, xmminfo);
int info = PROCESS_EE_XMM | PROCESS_EE_SETMODES(mmreg1); if (regt < 0)
regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
// check for last used, if so don't alloc a new XMM reg info |= PROCESS_EE_SET_T(regt);
_addNeededGPRtoXMMreg(_Rt_); _validateRegs();
int mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE);
if (mmreg2 < 0)
{
if ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))
{
_freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITES;
xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rt_;
xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ;
mmreg2 = mmreg1;
}
else
mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE);
}
noconstcode(info | PROCESS_EE_SET_S(mmreg1) | PROCESS_EE_SET_T(mmreg2));
_clearNeededXMMregs();
GPR_DEL_CONST(_Rt_);
return;
}
_clearNeededXMMregs();
}
// regular x86
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 2);
noconstcode(0);
GPR_DEL_CONST(_Rt_); GPR_DEL_CONST(_Rt_);
noconstcode(info);
} }
// rd = rt op sa // rd = rt op sa
void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) void eeRecompileCodeRC2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode, int xmminfo)
{ {
pxAssert((xmminfo & (XMMINFO_READT | XMMINFO_WRITED)) == (XMMINFO_READT | XMMINFO_WRITED));
if (!_Rd_) if (!_Rd_)
return; return;
if (GPR_IS_CONST1(_Rt_)) if (GPR_IS_CONST1(_Rt_))
{ {
_deleteGPRtoXMMreg(_Rd_, 2); _deleteGPRtoXMMreg(_Rd_, DELETE_REG_FLUSH_AND_FREE);
_deleteGPRtoX86reg(_Rd_, DELETE_REG_FREE_NO_WRITEBACK);
GPR_SET_CONST(_Rd_); GPR_SET_CONST(_Rd_);
constcode(); constcode();
return; return;
} }
// test if should write xmm, mirror to mmx code const bool t_is_used = EEINST_USEDTEST(_Rt_);
if (g_pCurInstInfo->info & EEINST_XMM) const bool t_in_xmm = _hasXMMreg(XMMTYPE_GPRREG, _Rt_);
{
pxAssert(0);
// no const regs u32 info = 0;
const int mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); int regt = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
if (regt < 0 && (t_is_used || t_in_xmm || (_Rd_ == _Rt_) || (xmminfo & XMMINFO_FORCEREGT)))
regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_READ);
if (regt >= 0)
info |= PROCESS_EE_SET_T(regt);
if (mmreg1 >= 0) // If S is no longer live, swap D for T. Saves the move.
{ int regd = _eeTryRenameReg(_Rd_, _Rt_, regt, 0, xmminfo);
int info = PROCESS_EE_XMM | PROCESS_EE_SETMODET(mmreg1); if (regd < 0)
regd = _allocX86reg(X86TYPE_GPR, _Rd_, MODE_WRITE);
// check for last used, if so don't alloc a new XMM reg info |= PROCESS_EE_SET_D(regd);
_addNeededGPRtoXMMreg(_Rd_); _validateRegs();
int mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE);
if (mmreg2 < 0)
{
if ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_))
{
_freeXMMreg(mmreg1);
info &= ~PROCESS_EE_MODEWRITET;
xmmregs[mmreg1].inuse = 1;
xmmregs[mmreg1].reg = _Rd_;
xmmregs[mmreg1].mode = MODE_WRITE | MODE_READ;
mmreg2 = mmreg1;
}
else
mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
}
noconstcode(info | PROCESS_EE_SET_T(mmreg1) | PROCESS_EE_SET_D(mmreg2));
_clearNeededXMMregs();
GPR_DEL_CONST(_Rd_);
return;
}
_clearNeededXMMregs();
}
// regular x86
_deleteGPRtoXMMreg(_Rt_, 1);
_deleteGPRtoXMMreg(_Rd_, 2);
noconstcode(0);
GPR_DEL_CONST(_Rd_);
}
// rt op rs
void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode)
{
pxFail("Unfinished code reached.");
// for now, don't support xmm
_deleteEEreg(_Rs_, 0);
_deleteEEreg(_Rt_, 1);
if (GPR_IS_CONST2(_Rs_, _Rt_))
{
constcode();
return;
}
if (GPR_IS_CONST1(_Rs_))
{
//multicode(PROCESS_EE_CONSTT);
return;
}
if (GPR_IS_CONST1(_Rt_))
{
//multicode(PROCESS_EE_CONSTT);
return;
}
multicode(0);
}
// Simple Code Templates //
// rd = rs op rt
void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode)
{
if (!_Rd_)
return;
// for now, don't support xmm
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1);
_deleteGPRtoXMMreg(_Rd_, 0);
if (GPR_IS_CONST2(_Rs_, _Rt_))
{
GPR_SET_CONST(_Rd_);
constcode();
return;
}
if (GPR_IS_CONST1(_Rs_))
{
constscode(0);
GPR_DEL_CONST(_Rd_);
return;
}
if (GPR_IS_CONST1(_Rt_))
{
consttcode(0);
GPR_DEL_CONST(_Rd_);
return;
}
noconstcode(0);
GPR_DEL_CONST(_Rd_);
}
// rt = rs op imm16
void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{
if (!_Rt_)
return;
// for now, don't support xmm
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 0);
if (GPR_IS_CONST1(_Rs_))
{
GPR_SET_CONST(_Rt_);
constcode();
return;
}
noconstcode(0);
GPR_DEL_CONST(_Rt_);
}
// rd = rt op sa
void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode)
{
if (!_Rd_)
return;
// for now, don't support xmm
_deleteGPRtoXMMreg(_Rt_, 1);
_deleteGPRtoXMMreg(_Rd_, 0);
if (GPR_IS_CONST1(_Rt_))
{
GPR_SET_CONST(_Rd_);
constcode();
return;
}
noconstcode(0);
GPR_DEL_CONST(_Rd_);
}
// rd = rt MULT rs (SPECIAL)
void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT)
{
pxFail("Unfinished code reached.");
// for now, don't support xmm
if (MULT)
{
_deleteGPRtoXMMreg(_Rd_, 0);
}
_deleteGPRtoXMMreg(_Rs_, 1);
_deleteGPRtoXMMreg(_Rt_, 1);
if (GPR_IS_CONST2(_Rs_, _Rt_))
{
if (MULT && _Rd_)
GPR_SET_CONST(_Rd_);
constcode();
return;
}
if (GPR_IS_CONST1(_Rs_))
{
//multicode(PROCESS_EE_CONSTS);
if (MULT && _Rd_)
GPR_DEL_CONST(_Rd_);
return;
}
if (GPR_IS_CONST1(_Rt_))
{
//multicode(PROCESS_EE_CONSTT);
if (MULT && _Rd_)
GPR_DEL_CONST(_Rd_);
return;
}
multicode(0);
if (MULT && _Rd_)
GPR_DEL_CONST(_Rd_); GPR_DEL_CONST(_Rd_);
noconstcode(info);
} }
// EE XMM allocation code // EE XMM allocation code
@ -575,40 +288,11 @@ int eeRecompileCodeXMM(int xmminfo)
{ {
int info = PROCESS_EE_XMM; int info = PROCESS_EE_XMM;
// flush consts
if (xmminfo & XMMINFO_READT)
{
if (GPR_IS_CONST1(_Rt_) && !(g_cpuFlushedConstReg & (1 << _Rt_)))
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]);
g_cpuFlushedConstReg |= (1 << _Rt_);
}
}
if (xmminfo & XMMINFO_READS)
{
if (GPR_IS_CONST1(_Rs_) && !(g_cpuFlushedConstReg & (1 << _Rs_)))
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
g_cpuFlushedConstReg |= (1 << _Rs_);
}
}
if (xmminfo & XMMINFO_WRITED)
{
GPR_DEL_CONST(_Rd_);
}
// add needed // add needed
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO)) if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
{
_addNeededGPRtoXMMreg(XMMGPR_LO); _addNeededGPRtoXMMreg(XMMGPR_LO);
}
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI)) if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
{
_addNeededGPRtoXMMreg(XMMGPR_HI); _addNeededGPRtoXMMreg(XMMGPR_HI);
}
if (xmminfo & XMMINFO_READS) if (xmminfo & XMMINFO_READS)
_addNeededGPRtoXMMreg(_Rs_); _addNeededGPRtoXMMreg(_Rs_);
if (xmminfo & XMMINFO_READT) if (xmminfo & XMMINFO_READT)
@ -616,58 +300,59 @@ int eeRecompileCodeXMM(int xmminfo)
if (xmminfo & XMMINFO_WRITED) if (xmminfo & XMMINFO_WRITED)
_addNeededGPRtoXMMreg(_Rd_); _addNeededGPRtoXMMreg(_Rd_);
// allocate // TODO: we could do memory operands here if not live. but the MMI implementations aren't hooked up to that at the moment.
if (xmminfo & XMMINFO_READS) if (xmminfo & XMMINFO_READS)
{ {
int reg = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); const int reg = _allocGPRtoXMMreg(_Rs_, MODE_READ);
info |= PROCESS_EE_SET_S(reg) | PROCESS_EE_SETMODES(reg); info |= PROCESS_EE_SET_S(reg);
} }
if (xmminfo & XMMINFO_READT) if (xmminfo & XMMINFO_READT)
{ {
int reg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); const int reg = _allocGPRtoXMMreg(_Rt_, MODE_READ);
info |= PROCESS_EE_SET_T(reg) | PROCESS_EE_SETMODET(reg); info |= PROCESS_EE_SET_T(reg);
} }
if (xmminfo & XMMINFO_WRITED) if (xmminfo & XMMINFO_WRITED)
{ {
int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? ((xmminfo & XMMINFO_READD_LO) ? (MODE_READ | MODE_READHALF) : MODE_READ) : 0); int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd); int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd);
if (regd < 0) if (regd < 0)
{ {
if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_))) if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READT) && EEINST_RENAMETEST(_Rt_))
{ {
_freeXMMreg(EEREC_T); _deleteEEreg128(_Rd_);
xmmregs[EEREC_T].inuse = 1; _reallocateXMMreg(EEREC_T, XMMTYPE_GPRREG, _Rd_, readd, EEINST_LIVETEST(_Rt_));
xmmregs[EEREC_T].reg = _Rd_;
xmmregs[EEREC_T].mode = readd;
regd = EEREC_T; regd = EEREC_T;
} }
else if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_))) else if (!(xmminfo & XMMINFO_READD) && (xmminfo & XMMINFO_READS) && EEINST_RENAMETEST(_Rs_))
{ {
_freeXMMreg(EEREC_S); _deleteEEreg128(_Rd_);
xmmregs[EEREC_S].inuse = 1; _reallocateXMMreg(EEREC_S, XMMTYPE_GPRREG, _Rd_, readd, EEINST_LIVETEST(_Rs_));
xmmregs[EEREC_S].reg = _Rd_;
xmmregs[EEREC_S].mode = readd;
regd = EEREC_S; regd = EEREC_S;
} }
else else
regd = _allocGPRtoXMMreg(-1, _Rd_, readd); {
regd = _allocGPRtoXMMreg(_Rd_, readd);
}
} }
info |= PROCESS_EE_SET_D(regd); info |= PROCESS_EE_SET_D(regd);
} }
if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO)) if (xmminfo & (XMMINFO_READLO | XMMINFO_WRITELO))
{ {
info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0))); info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(XMMGPR_LO, ((xmminfo & XMMINFO_READLO) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITELO) ? MODE_WRITE : 0)));
info |= PROCESS_EE_LO;
} }
if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI)) if (xmminfo & (XMMINFO_READHI | XMMINFO_WRITEHI))
{ {
info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo & XMMINFO_READHI) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITEHI) ? MODE_WRITE : 0))); info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(XMMGPR_HI, ((xmminfo & XMMINFO_READHI) ? MODE_READ : 0) | ((xmminfo & XMMINFO_WRITEHI) ? MODE_WRITE : 0)));
info |= PROCESS_EE_HI;
} }
if (xmminfo & XMMINFO_WRITED)
GPR_DEL_CONST(_Rd_);
_validateRegs();
return info; return info;
} }
@ -676,9 +361,6 @@ int eeRecompileCodeXMM(int xmminfo)
#define _Fs_ _Rd_ #define _Fs_ _Rd_
#define _Fd_ _Sa_ #define _Fd_ _Sa_
#define PROCESS_EE_SETMODES_XMM(mmreg) ((xmmregs[mmreg].mode & MODE_WRITE) ? PROCESS_EE_MODEWRITES : 0)
#define PROCESS_EE_SETMODET_XMM(mmreg) ((xmmregs[mmreg].mode & MODE_WRITE) ? PROCESS_EE_MODEWRITET : 0)
// rd = rs op rt // rd = rs op rt
void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo) void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo)
{ {
@ -699,7 +381,7 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
if (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) if (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE)
mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ); mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ);
else else
mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ); mmregt = _allocFPtoXMMreg(_Ft_, MODE_READ);
} }
if (xmminfo & XMMINFO_READS) if (xmminfo & XMMINFO_READS)
@ -709,26 +391,27 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
} }
else else
mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); {
} mmregs = _allocFPtoXMMreg(_Fs_, MODE_READ);
if (mmregs >= 0) // if we just allocated S and Fs == Ft, share it
info |= PROCESS_EE_SETMODES_XMM(mmregs); if ((xmminfo & XMMINFO_READT) && _Fs_ == _Ft_)
if (mmregt >= 0) mmregt = mmregs;
info |= PROCESS_EE_SETMODET_XMM(mmregt); }
}
if (xmminfo & XMMINFO_READD) if (xmminfo & XMMINFO_READD)
{ {
pxAssert(xmminfo & XMMINFO_WRITED); pxAssert(xmminfo & XMMINFO_WRITED);
mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ); mmregd = _allocFPtoXMMreg(_Fd_, MODE_READ);
} }
if (xmminfo & XMMINFO_READACC) if (xmminfo & XMMINFO_READACC)
{ {
if (!(xmminfo & XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE)) if (!(xmminfo & XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[XMMFPU_ACC] & EEINST_LASTUSE))
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ); mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ);
else else
mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ); mmregacc = _allocFPACCtoXMMreg(MODE_READ);
} }
if (xmminfo & XMMINFO_WRITEACC) if (xmminfo & XMMINFO_WRITEACC)
@ -741,34 +424,28 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
if (mmregacc < 0) if (mmregacc < 0)
{ {
if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_))) if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && FPUINST_RENAMETEST(_Ft_))
{ {
if (FPUINST_ISLIVE(_Ft_)) if (EE_WRITE_DEAD_VALUES && xmmregs[mmregt].mode & MODE_WRITE)
{ _writebackXMMreg(mmregt);
_freeXMMreg(mmregt);
info &= ~PROCESS_EE_MODEWRITET;
}
xmmregs[mmregt].inuse = 1;
xmmregs[mmregt].reg = 0; xmmregs[mmregt].reg = 0;
xmmregs[mmregt].mode = readacc; xmmregs[mmregt].mode = readacc;
xmmregs[mmregt].type = XMMTYPE_FPACC; xmmregs[mmregt].type = XMMTYPE_FPACC;
mmregacc = mmregt; mmregacc = mmregt;
} }
else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_))) else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && FPUINST_RENAMETEST(_Fs_))
{ {
if (FPUINST_ISLIVE(_Fs_)) if (EE_WRITE_DEAD_VALUES && xmmregs[mmregs].mode & MODE_WRITE)
{ _writebackXMMreg(mmregs);
_freeXMMreg(mmregs);
info &= ~PROCESS_EE_MODEWRITES;
}
xmmregs[mmregs].inuse = 1;
xmmregs[mmregs].reg = 0; xmmregs[mmregs].reg = 0;
xmmregs[mmregs].mode = readacc; xmmregs[mmregs].mode = readacc;
xmmregs[mmregs].type = XMMTYPE_FPACC; xmmregs[mmregs].type = XMMTYPE_FPACC;
mmregacc = mmregs; mmregacc = mmregs;
} }
else else
mmregacc = _allocFPACCtoXMMreg(-1, readacc); mmregacc = _allocFPACCtoXMMreg(readacc);
} }
xmmregs[mmregacc].mode |= MODE_WRITE; xmmregs[mmregacc].mode |= MODE_WRITE;
@ -778,48 +455,43 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
// check for last used, if so don't alloc a new XMM reg // check for last used, if so don't alloc a new XMM reg
int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0); int readd = MODE_WRITE | ((xmminfo & XMMINFO_READD) ? MODE_READ : 0);
if (xmminfo & XMMINFO_READD) if (xmminfo & XMMINFO_READD)
mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); mmregd = _allocFPtoXMMreg(_Fd_, readd);
else else
mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd); mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd);
if (mmregd < 0) if (mmregd < 0)
{ {
if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_))) if ((xmminfo & XMMINFO_READT) && mmregt >= 0 && FPUINST_RENAMETEST(_Ft_))
{ {
if (FPUINST_ISLIVE(_Ft_)) if (EE_WRITE_DEAD_VALUES && xmmregs[mmregt].mode & MODE_WRITE)
{ _writebackXMMreg(mmregt);
_freeXMMreg(mmregt);
info &= ~PROCESS_EE_MODEWRITET;
}
xmmregs[mmregt].inuse = 1;
xmmregs[mmregt].reg = _Fd_; xmmregs[mmregt].reg = _Fd_;
xmmregs[mmregt].mode = readd; xmmregs[mmregt].mode = readd;
mmregd = mmregt; mmregd = mmregt;
} }
else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_))) else if ((xmminfo & XMMINFO_READS) && mmregs >= 0 && FPUINST_RENAMETEST(_Fs_))
{ {
if (FPUINST_ISLIVE(_Fs_)) if (EE_WRITE_DEAD_VALUES && xmmregs[mmregs].mode & MODE_WRITE)
{ _writebackXMMreg(mmregs);
_freeXMMreg(mmregs);
info &= ~PROCESS_EE_MODEWRITES;
}
xmmregs[mmregs].inuse = 1; xmmregs[mmregs].inuse = 1;
xmmregs[mmregs].reg = _Fd_; xmmregs[mmregs].reg = _Fd_;
xmmregs[mmregs].mode = readd; xmmregs[mmregs].mode = readd;
mmregd = mmregs; mmregd = mmregs;
} }
else if ((xmminfo & XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC))) else if ((xmminfo & XMMINFO_READACC) && mmregacc >= 0 && FPUINST_RENAMETEST(XMMFPU_ACC))
{ {
if (FPUINST_ISLIVE(XMMFPU_ACC)) if (EE_WRITE_DEAD_VALUES && xmmregs[mmregacc].mode & MODE_WRITE)
_freeXMMreg(mmregacc); _writebackXMMreg(mmregacc);
xmmregs[mmregacc].inuse = 1;
xmmregs[mmregacc].reg = _Fd_; xmmregs[mmregacc].reg = _Fd_;
xmmregs[mmregacc].mode = readd; xmmregs[mmregacc].mode = readd;
xmmregs[mmregacc].type = XMMTYPE_FPREG; xmmregs[mmregacc].type = XMMTYPE_FPREG;
mmregd = mmregacc; mmregd = mmregacc;
} }
else else
mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); mmregd = _allocFPtoXMMreg(_Fd_, readd);
} }
} }
@ -841,12 +513,12 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
if (xmminfo & XMMINFO_READS) if (xmminfo & XMMINFO_READS)
{ {
if (mmregs >= 0) if (mmregs >= 0)
info |= PROCESS_EE_SET_S(mmregs) | PROCESS_EE_S; info |= PROCESS_EE_SET_S(mmregs);
} }
if (xmminfo & XMMINFO_READT) if (xmminfo & XMMINFO_READT)
{ {
if (mmregt >= 0) if (mmregt >= 0)
info |= PROCESS_EE_SET_T(mmregt) | PROCESS_EE_T; info |= PROCESS_EE_SET_T(mmregt);
} }
// at least one must be in xmm // at least one must be in xmm
@ -856,5 +528,4 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo
} }
xmmcode(info); xmmcode(info);
_clearNeededXMMregs();
} }

View File

@ -23,11 +23,36 @@
#include "iR5900.h" #include "iR5900.h"
#include "common/Perf.h" #include "common/Perf.h"
//#define LOG_STORES
using namespace vtlb_private; using namespace vtlb_private;
using namespace x86Emitter; using namespace x86Emitter;
// we need enough for a 32-bit jump forwards (5 bytes)
static constexpr u32 LOADSTORE_PADDING = 5;
//#define LOG_STORES
static u32 GetAllocatedGPRBitmask()
{
u32 mask = 0;
for (u32 i = 0; i < iREGCNT_GPR; i++)
{
if (x86regs[i].inuse)
mask |= (1u << i);
}
return mask;
}
static u32 GetAllocatedXMMBitmask()
{
u32 mask = 0;
for (u32 i = 0; i < iREGCNT_XMM; i++)
{
if (xmmregs[i].inuse)
mask |= (1u << i);
}
return mask;
}
/* /*
// Pseudo-Code For the following Dynarec Implementations --> // Pseudo-Code For the following Dynarec Implementations -->
@ -112,18 +137,39 @@ namespace vtlb_private
// Prepares eax, ecx, and, ebx for Direct or Indirect operations. // Prepares eax, ecx, and, ebx for Direct or Indirect operations.
// Returns the writeback pointer for ebx (return address from indirect handling) // Returns the writeback pointer for ebx (return address from indirect handling)
// //
static u32* DynGen_PrepRegs() static void DynGen_PrepRegs(int addr_reg, int value_reg, u32 sz, bool xmm)
{ {
// Warning dirty ebx (in case someone got the very bad idea to move this code)
EE::Profiler.EmitMem(); EE::Profiler.EmitMem();
_freeX86reg(arg1regd);
xMOV(arg1regd, xRegister32(addr_reg));
if (value_reg >= 0)
{
if (sz == 128)
{
pxAssert(xmm);
_freeXMMreg(xRegisterSSE::GetArgRegister(1, 0).GetId());
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), xRegisterSSE::GetInstance(value_reg));
}
else if (xmm)
{
// 32bit xmms are passed in GPRs
pxAssert(sz == 32);
_freeX86reg(arg2regd);
xMOVD(arg2regd, xRegisterSSE(value_reg));
}
else
{
_freeX86reg(arg2regd);
xMOV(arg2reg, xRegister64(value_reg));
}
}
xMOV(eax, arg1regd); xMOV(eax, arg1regd);
xSHR(eax, VTLB_PAGE_BITS); xSHR(eax, VTLB_PAGE_BITS);
xMOV(rax, ptrNative[xComplexAddress(rbx, vtlbdata.vmap, rax * wordsize)]); xMOV(rax, ptrNative[xComplexAddress(arg3reg, vtlbdata.vmap, rax * wordsize)]);
u32* writeback = xLEA_Writeback(rbx);
xADD(arg1reg, rax); xADD(arg1reg, rax);
return writeback;
} }
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
@ -169,17 +215,14 @@ namespace vtlb_private
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
static void DynGen_DirectWrite(u32 bits) static void DynGen_DirectWrite(u32 bits)
{ {
// TODO: x86Emitter can't use dil
switch (bits) switch (bits)
{ {
//8 , 16, 32 : data on EDX
case 8: case 8:
xMOV(edx, arg2regd); xMOV(ptr[arg1reg], xRegister8(arg2regd));
xMOV(ptr[arg1reg], dl);
break; break;
case 16: case 16:
xMOV(ptr[arg1reg], xRegister16(arg2reg)); xMOV(ptr[arg1reg], xRegister16(arg2regd));
break; break;
case 32: case 32:
@ -229,7 +272,9 @@ static u8* GetIndirectDispatcherPtr(int mode, int operandsize, int sign = 0)
// Generates a JS instruction that targets the appropriate templated instance of // Generates a JS instruction that targets the appropriate templated instance of
// the vtlb Indirect Dispatcher. // the vtlb Indirect Dispatcher.
// //
static void DynGen_IndirectDispatch(int mode, int bits, bool sign = false)
template <typename GenDirectFn>
static void DynGen_HandlerTest(const GenDirectFn& gen_direct, int mode, int bits, bool sign = false)
{ {
int szidx = 0; int szidx = 0;
switch (bits) switch (bits)
@ -241,7 +286,12 @@ static void DynGen_IndirectDispatch(int mode, int bits, bool sign = false)
case 128: szidx = 4; break; case 128: szidx = 4; break;
jNO_DEFAULT; jNO_DEFAULT;
} }
xJS(GetIndirectDispatcherPtr(mode, szidx, sign)); xForwardJS8 to_handler;
gen_direct();
xForwardJump8 done;
to_handler.SetTarget();
xFastCall(GetIndirectDispatcherPtr(mode, szidx, sign));
done.SetTarget();
} }
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
@ -250,6 +300,13 @@ static void DynGen_IndirectDispatch(int mode, int bits, bool sign = false)
// Out: eax: result (if mode < 64) // Out: eax: result (if mode < 64)
static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign) static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
{ {
// fixup stack
#ifdef _WIN32
xSUB(rsp, 32 + 8);
#else
xSUB(rsp, 8);
#endif
xMOVZX(eax, al); xMOVZX(eax, al);
if (wordsize != 8) if (wordsize != 8)
xSUB(arg1regd, 0x80000000); xSUB(arg1regd, 0x80000000);
@ -291,7 +348,13 @@ static void DynGen_IndirectTlbDispatcher(int mode, int bits, bool sign)
} }
} }
xJMP(rbx); #ifdef _WIN32
xADD(rsp, 32 + 8);
#else
xADD(rsp, 8);
#endif
xRET();
} }
// One-time initialization procedure. Multiple subsequent calls during the lifespan of the // One-time initialization procedure. Multiple subsequent calls during the lifespan of the
@ -342,65 +405,83 @@ static void vtlb_SetWriteback(u32* writeback)
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Dynarec Load Implementations // Dynarec Load Implementations
int vtlb_DynGenReadQuad(u32 bits, int gpr)
{
pxAssume(bits == 128);
u32* writeback = DynGen_PrepRegs();
const int reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
DynGen_IndirectDispatch(0, bits);
DynGen_DirectRead(bits, false);
vtlb_SetWriteback(writeback); // return target for indirect's call/ret
return reg;
}
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Recompiled input registers: // Recompiled input registers:
// ecx - source address to read from // ecx - source address to read from
// Returns read value in eax. // Returns read value in eax.
void vtlb_DynGenReadNonQuad(u32 bits, bool sign) int vtlb_DynGenReadNonQuad(u32 bits, bool sign, bool xmm, int addr_reg, vtlb_ReadRegAllocCallback dest_reg_alloc)
{ {
pxAssume(bits <= 64); pxAssume(bits <= 64);
u32* writeback = DynGen_PrepRegs(); int x86_dest_reg;
if (!CHECK_FASTMEM || vtlb_IsFaultingPC(pc))
DynGen_IndirectDispatch(0, bits, sign && bits < 64);
DynGen_DirectRead(bits, sign);
vtlb_SetWriteback(writeback);
}
// ------------------------------------------------------------------------
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
// recompiler if the TLB is changed.
int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, int gpr)
{
pxAssert(bits == 128);
EE::Profiler.EmitConstMem(addr_const);
int reg;
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
if (!vmv.isHandler(addr_const))
{ {
void* ppf = reinterpret_cast<void*>(vmv.assumePtr(addr_const)); iFlushCall(FLUSH_FULLVTLB);
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, -1) : _allocGPRtoXMMreg(-1, gpr, MODE_WRITE);
xMOVAPS(xRegisterSSE(reg), ptr128[ppf]); DynGen_PrepRegs(addr_reg, -1, bits, xmm);
DynGen_HandlerTest([bits, sign]() { DynGen_DirectRead(bits, sign); }, 0, bits, sign && bits < 64);
if (!xmm)
{
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
xMOV(xRegister64(x86_dest_reg), rax);
} }
else else
{ {
// has to: translate, find function, call function // we shouldn't be loading any FPRs which aren't 32bit..
u32 paddr = vmv.assumeHandlerGetPAddr(addr_const); // we use MOVD here despite it being floating-point data, because we're going int->float reinterpret.
pxAssert(bits == 32);
const int szidx = 4; x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
iFlushCall(FLUSH_FULLVTLB); xMOVDZX(xRegisterSSE(x86_dest_reg), eax);
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg);
} }
return reg;
return x86_dest_reg;
}
const u8* codeStart;
const xAddressReg x86addr(addr_reg);
if (!xmm)
{
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
codeStart = x86Ptr;
const xRegister64 x86reg(x86_dest_reg);
switch (bits)
{
case 8:
sign ? xMOVSX(x86reg, ptr8[RFASTMEMBASE + x86addr]) : xMOVZX(xRegister32(x86reg), ptr8[RFASTMEMBASE + x86addr]);
break;
case 16:
sign ? xMOVSX(x86reg, ptr16[RFASTMEMBASE + x86addr]) : xMOVZX(xRegister32(x86reg), ptr16[RFASTMEMBASE + x86addr]);
break;
case 32:
sign ? xMOVSX(x86reg, ptr32[RFASTMEMBASE + x86addr]) : xMOV(xRegister32(x86reg), ptr32[RFASTMEMBASE + x86addr]);
break;
case 64:
xMOV(x86reg, ptr64[RFASTMEMBASE + x86addr]);
break;
jNO_DEFAULT
}
}
else
{
pxAssert(bits == 32);
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
codeStart = x86Ptr;
const xRegisterSSE xmmreg(x86_dest_reg);
xMOVSSZX(xmmreg, ptr32[RFASTMEMBASE + x86addr]);
}
const u32 padding = LOADSTORE_PADDING - std::min<u32>(static_cast<u32>(x86Ptr - codeStart), 5);
for (u32 i = 0; i < padding; i++)
xNOP();
vtlb_AddLoadStoreInfo((uptr)codeStart, static_cast<u32>(x86Ptr - codeStart),
pc, GetAllocatedGPRBitmask(), GetAllocatedXMMBitmask(),
static_cast<u8>(addr_reg), static_cast<u8>(x86_dest_reg),
static_cast<u8>(bits), sign, true, xmm);
return x86_dest_reg;
} }
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
@ -411,43 +492,44 @@ int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, int gpr)
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
// recompiler if the TLB is changed. // recompiler if the TLB is changed.
// //
void vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, u32 addr_const) int vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, bool xmm, u32 addr_const, vtlb_ReadRegAllocCallback dest_reg_alloc)
{ {
EE::Profiler.EmitConstMem(addr_const); EE::Profiler.EmitConstMem(addr_const);
int x86_dest_reg;
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS]; auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
if (!vmv.isHandler(addr_const)) if (!vmv.isHandler(addr_const))
{ {
auto ppf = vmv.assumePtr(addr_const); auto ppf = vmv.assumePtr(addr_const);
if (!xmm)
{
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
switch (bits) switch (bits)
{ {
case 8: case 8:
if (sign) sign ? xMOVSX(xRegister64(x86_dest_reg), ptr8[(u8*)ppf]) : xMOVZX(xRegister32(x86_dest_reg), ptr8[(u8*)ppf]);
xMOVSX(rax, ptr8[(u8*)ppf]);
else
xMOVZX(rax, ptr8[(u8*)ppf]);
break; break;
case 16: case 16:
if (sign) sign ? xMOVSX(xRegister64(x86_dest_reg), ptr16[(u16*)ppf]) : xMOVZX(xRegister32(x86_dest_reg), ptr16[(u16*)ppf]);
xMOVSX(rax, ptr16[(u16*)ppf]);
else
xMOVZX(rax, ptr16[(u16*)ppf]);
break; break;
case 32: case 32:
if (sign) sign ? xMOVSX(xRegister64(x86_dest_reg), ptr32[(u32*)ppf]) : xMOV(xRegister32(x86_dest_reg), ptr32[(u32*)ppf]);
xMOVSX(rax, ptr32[(u32*)ppf]);
else
xMOV(eax, ptr32[(u32*)ppf]);
break; break;
case 64: case 64:
xMOV(rax, ptr64[(u64*)ppf]); xMOV(xRegister64(x86_dest_reg), ptr64[(u64*)ppf]);
break; break;
} }
} }
else else
{
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
xMOVSSZX(xRegisterSSE(x86_dest_reg), ptr32[(float*)ppf]);
}
}
else
{ {
// has to: translate, find function, call function // has to: translate, find function, call function
u32 paddr = vmv.assumeHandlerGetPAddr(addr_const); u32 paddr = vmv.assumeHandlerGetPAddr(addr_const);
@ -464,60 +546,157 @@ void vtlb_DynGenReadNonQuad_Const(u32 bits, bool sign, u32 addr_const)
// Shortcut for the INTC_STAT register, which many games like to spin on heavily. // Shortcut for the INTC_STAT register, which many games like to spin on heavily.
if ((bits == 32) && !EmuConfig.Speedhacks.IntcStat && (paddr == INTC_STAT)) if ((bits == 32) && !EmuConfig.Speedhacks.IntcStat && (paddr == INTC_STAT))
{ {
xMOV(eax, ptr[&psHu32(INTC_STAT)]); x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
if (!xmm)
{
if (sign)
xMOVSX(xRegister64(x86_dest_reg), ptr32[&psHu32(INTC_STAT)]);
else
xMOV(xRegister32(x86_dest_reg), ptr32[&psHu32(INTC_STAT)]);
}
else
{
xMOVDZX(xRegisterSSE(x86_dest_reg), ptr32[&psHu32(INTC_STAT)]);
}
} }
else else
{ {
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
xFastCall(vmv.assumeHandlerGetRaw(szidx, false), paddr); xFastCall(vmv.assumeHandlerGetRaw(szidx, false), paddr);
// perform sign extension on the result: if (!xmm)
{
x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeX86reg(eax), eax.GetId());
switch (bits)
{
// save REX prefix by using 32bit dest for zext
case 8:
sign ? xMOVSX(xRegister64(x86_dest_reg), al) : xMOVZX(xRegister32(x86_dest_reg), al);
break;
if (bits == 8) case 16:
{ sign ? xMOVSX(xRegister64(x86_dest_reg), ax) : xMOVZX(xRegister32(x86_dest_reg), ax);
if (sign) break;
xMOVSX(rax, al);
case 32:
sign ? xMOVSX(xRegister64(x86_dest_reg), eax) : xMOV(xRegister32(x86_dest_reg), eax);
break;
case 64:
xMOV(xRegister64(x86_dest_reg), rax);
break;
}
}
else else
xMOVZX(rax, al);
}
else if (bits == 16)
{ {
if (sign) x86_dest_reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
xMOVSX(rax, ax); xMOVDZX(xRegisterSSE(x86_dest_reg), eax);
}
}
}
return x86_dest_reg;
}
int vtlb_DynGenReadQuad(u32 bits, int addr_reg, vtlb_ReadRegAllocCallback dest_reg_alloc)
{
pxAssume(bits == 128);
if (!CHECK_FASTMEM || vtlb_IsFaultingPC(pc))
{
iFlushCall(FLUSH_FULLVTLB);
DynGen_PrepRegs(arg1regd.GetId(), -1, bits, true);
DynGen_HandlerTest([bits]() {DynGen_DirectRead(bits, false); }, 0, bits);
const int reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0); // Handler returns in xmm0
if (reg >= 0)
xMOVAPS(xRegisterSSE(reg), xmm0);
return reg;
}
const int reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0); // Handler returns in xmm0
const u8* codeStart = x86Ptr;
xMOVAPS(xRegisterSSE(reg), ptr128[RFASTMEMBASE + arg1reg]);
const u32 padding = LOADSTORE_PADDING - std::min<u32>(static_cast<u32>(x86Ptr - codeStart), 5);
for (u32 i = 0; i < padding; i++)
xNOP();
vtlb_AddLoadStoreInfo((uptr)codeStart, static_cast<u32>(x86Ptr - codeStart),
pc, GetAllocatedGPRBitmask(), GetAllocatedXMMBitmask(),
static_cast<u8>(arg1reg.GetId()), static_cast<u8>(reg),
static_cast<u8>(bits), false, true, true);
return reg;
}
// ------------------------------------------------------------------------
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
// recompiler if the TLB is changed.
int vtlb_DynGenReadQuad_Const(u32 bits, u32 addr_const, vtlb_ReadRegAllocCallback dest_reg_alloc)
{
pxAssert(bits == 128);
EE::Profiler.EmitConstMem(addr_const);
int reg;
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
if (!vmv.isHandler(addr_const))
{
void* ppf = reinterpret_cast<void*>(vmv.assumePtr(addr_const));
reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
if (reg >= 0)
xMOVAPS(xRegisterSSE(reg), ptr128[ppf]);
}
else else
xMOVZX(rax, ax);
}
else if (bits == 32)
{ {
if (sign) // has to: translate, find function, call function
xCDQE(); u32 paddr = vmv.assumeHandlerGetPAddr(addr_const);
}
} const int szidx = 4;
iFlushCall(FLUSH_FULLVTLB);
xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr);
reg = dest_reg_alloc ? dest_reg_alloc() : (_freeXMMreg(0), 0);
xMOVAPS(xRegisterSSE(reg), xmm0);
} }
return reg;
} }
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Dynarec Store Implementations // Dynarec Store Implementations
void vtlb_DynGenWrite(u32 sz) void vtlb_DynGenWrite(u32 sz, bool xmm, int addr_reg, int value_reg)
{ {
#ifdef LOG_STORES #ifdef LOG_STORES
//if (sz != 128) //if (!xmm)
{ {
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
xPUSH(xRegister64(addr_reg));
xPUSH(xRegister64(value_reg));
xPUSH(arg1reg); xPUSH(arg1reg);
xPUSH(arg2reg); xPUSH(arg2reg);
if (sz == 128) xMOV(arg1regd, xRegister32(addr_reg));
if (xmm)
{ {
xSUB(rsp, 32 + 32); xSUB(rsp, 32 + 32);
xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetArgRegister(1, 0)); xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetInstance(value_reg));
xMOVAPS(ptr[rsp + 48], xRegisterSSE::GetArgRegister(1, 0));
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), xRegisterSSE::GetInstance(value_reg));
xFastCall((void*)LogWriteQuad); xFastCall((void*)LogWriteQuad);
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 32]); xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 48]);
xMOVAPS(xRegisterSSE::GetInstance(value_reg), ptr[rsp + 32]);
xADD(rsp, 32 + 32); xADD(rsp, 32 + 32);
} }
else else
{ {
xMOV(arg2reg, xRegister64(value_reg));
if (sz == 8) if (sz == 8)
xAND(arg2regd, 0xFF); xAND(arg2regd, 0xFF);
else if (sz == 16) else if (sz == 16)
@ -530,15 +709,67 @@ void vtlb_DynGenWrite(u32 sz)
} }
xPOP(arg2reg); xPOP(arg2reg);
xPOP(arg1reg); xPOP(arg1reg);
xPOP(xRegister64(value_reg));
xPOP(xRegister64(addr_reg));
} }
#endif #endif
u32* writeback = DynGen_PrepRegs(); if (!CHECK_FASTMEM || vtlb_IsFaultingPC(pc))
{
iFlushCall(FLUSH_FULLVTLB);
DynGen_IndirectDispatch(1, sz); DynGen_PrepRegs(addr_reg, value_reg, sz, xmm);
DynGen_DirectWrite(sz); DynGen_HandlerTest([sz]() { DynGen_DirectWrite(sz); }, 1, sz);
return;
}
vtlb_SetWriteback(writeback); const u8* codeStart = x86Ptr;
const xAddressReg vaddr_reg(addr_reg);
if (!xmm)
{
switch (sz)
{
case 8:
xMOV(ptr8[RFASTMEMBASE + vaddr_reg], xRegister8(xRegister32(value_reg)));
break;
case 16:
xMOV(ptr16[RFASTMEMBASE + vaddr_reg], xRegister16(value_reg));
break;
case 32:
xMOV(ptr32[RFASTMEMBASE + vaddr_reg], xRegister32(value_reg));
break;
case 64:
xMOV(ptr64[RFASTMEMBASE + vaddr_reg], xRegister64(value_reg));
break;
jNO_DEFAULT
}
}
else
{
pxAssert(sz == 32 || sz == 128);
switch (sz)
{
case 32:
xMOVSS(ptr32[RFASTMEMBASE + vaddr_reg], xRegisterSSE(value_reg));
break;
case 128:
xMOVAPS(ptr128[RFASTMEMBASE + vaddr_reg], xRegisterSSE(value_reg));
break;
jNO_DEFAULT
}
}
const u32 padding = LOADSTORE_PADDING - std::min<u32>(static_cast<u32>(x86Ptr - codeStart), 5);
for (u32 i = 0; i < padding; i++)
xNOP();
vtlb_AddLoadStoreInfo((uptr)codeStart, static_cast<u32>(x86Ptr - codeStart),
pc, GetAllocatedGPRBitmask(), GetAllocatedXMMBitmask(),
static_cast<u8>(addr_reg), static_cast<u8>(value_reg),
static_cast<u8>(sz), false, false, xmm);
} }
@ -546,28 +777,34 @@ void vtlb_DynGenWrite(u32 sz)
// Generates code for a store instruction, where the address is a known constant. // Generates code for a store instruction, where the address is a known constant.
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
// recompiler if the TLB is changed. // recompiler if the TLB is changed.
void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const) void vtlb_DynGenWrite_Const(u32 bits, bool xmm, u32 addr_const, int value_reg)
{ {
EE::Profiler.EmitConstMem(addr_const); EE::Profiler.EmitConstMem(addr_const);
#ifdef LOG_STORES #ifdef LOG_STORES
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
//if (bits != 128) //if (!xmm)
{ {
xPUSH(xRegister64(value_reg));
xPUSH(xRegister64(value_reg));
xPUSH(arg1reg); xPUSH(arg1reg);
xPUSH(arg2reg); xPUSH(arg2reg);
xMOV(arg1reg, addr_const); xMOV(arg1reg, addr_const);
if (bits == 128) if (xmm)
{ {
xSUB(rsp, 32 + 32); xSUB(rsp, 32 + 32);
xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetArgRegister(1, 0)); xMOVAPS(ptr[rsp + 32], xRegisterSSE::GetInstance(value_reg));
xMOVAPS(ptr[rsp + 48], xRegisterSSE::GetArgRegister(1, 0));
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), xRegisterSSE::GetInstance(value_reg));
xFastCall((void*)LogWriteQuad); xFastCall((void*)LogWriteQuad);
xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 32]); xMOVAPS(xRegisterSSE::GetArgRegister(1, 0), ptr[rsp + 48]);
xMOVAPS(xRegisterSSE::GetInstance(value_reg), ptr[rsp + 32]);
xADD(rsp, 32 + 32); xADD(rsp, 32 + 32);
} }
else else
{ {
xMOV(arg2reg, xRegister64(value_reg));
if (bits == 8) if (bits == 8)
xAND(arg2regd, 0xFF); xAND(arg2regd, 0xFF);
else if (bits == 16) else if (bits == 16)
@ -580,37 +817,52 @@ void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const)
} }
xPOP(arg2reg); xPOP(arg2reg);
xPOP(arg1reg); xPOP(arg1reg);
xPOP(xRegister64(value_reg));
xPOP(xRegister64(value_reg));
} }
#endif #endif
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS]; auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
if (!vmv.isHandler(addr_const)) if (!vmv.isHandler(addr_const))
{ {
// TODO: x86Emitter can't use dil
auto ppf = vmv.assumePtr(addr_const); auto ppf = vmv.assumePtr(addr_const);
if (!xmm)
{
switch (bits) switch (bits)
{ {
//8 , 16, 32 : data on arg2
case 8: case 8:
xMOV(edx, arg2regd); xMOV(ptr[(void*)ppf], xRegister8(xRegister32(value_reg)));
xMOV(ptr[(void*)ppf], dl);
break; break;
case 16: case 16:
xMOV(ptr[(void*)ppf], xRegister16(arg2reg)); xMOV(ptr[(void*)ppf], xRegister16(value_reg));
break; break;
case 32: case 32:
xMOV(ptr[(void*)ppf], arg2regd); xMOV(ptr[(void*)ppf], xRegister32(value_reg));
break; break;
case 64: case 64:
xMOV(ptr64[(void*)ppf], arg2reg); xMOV(ptr64[(void*)ppf], xRegister64(value_reg));
break;
jNO_DEFAULT
}
}
else
{
switch (bits)
{
case 32:
xMOVSS(ptr[(void*)ppf], xRegisterSSE(value_reg));
break; break;
case 128: case 128:
xMOVAPS(ptr128[(void*)ppf], xRegisterSSE::GetArgRegister(1, 0)); xMOVAPS(ptr128[(void*)ppf], xRegisterSSE(value_reg));
break; break;
jNO_DEFAULT
}
} }
} }
else else
@ -621,15 +873,47 @@ void vtlb_DynGenWrite_Const(u32 bits, u32 addr_const)
int szidx = 0; int szidx = 0;
switch (bits) switch (bits)
{ {
case 8: szidx=0; break; case 8:
case 16: szidx=1; break; szidx = 0;
case 32: szidx=2; break; break;
case 64: szidx=3; break; case 16:
case 128: szidx=4; break; szidx = 1;
break;
case 32:
szidx = 2;
break;
case 64:
szidx = 3;
break;
case 128:
szidx = 4;
break;
} }
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
xFastCall(vmv.assumeHandlerGetRaw(szidx, true), paddr);
_freeX86reg(arg1regd);
xMOV(arg1regd, paddr);
if (bits == 128)
{
pxAssert(xmm);
const xRegisterSSE argreg(xRegisterSSE::GetArgRegister(1, 0));
_freeXMMreg(argreg.GetId());
xMOVAPS(argreg, xRegisterSSE(value_reg));
}
else if (xmm)
{
pxAssert(bits == 32);
_freeX86reg(arg2regd);
xMOVD(arg2regd, xRegisterSSE(value_reg));
}
else
{
_freeX86reg(arg2regd);
xMOV(arg2reg, xRegister64(value_reg));
}
xFastCall(vmv.assumeHandlerGetRaw(szidx, true));
} }
} }
@ -649,3 +933,156 @@ void vtlb_DynV2P()
xOR(eax, ecx); xOR(eax, ecx);
} }
void vtlb_DynBackpatchLoadStore(uptr code_address, u32 code_size, u32 guest_pc, u32 guest_addr,
u32 gpr_bitmask, u32 fpr_bitmask, u8 address_register, u8 data_register,
u8 size_in_bits, bool is_signed, bool is_load, bool is_xmm)
{
static constexpr u32 GPR_SIZE = 8;
static constexpr u32 XMM_SIZE = 16;
// on win32, we need to reserve an additional 32 bytes shadow space when calling out to C
#ifdef _WIN32
static constexpr u32 SHADOW_SIZE = 32;
#else
static constexpr u32 SHADOW_SIZE = 0;
#endif
DevCon.WriteLn("Backpatching %s at %p[%u] (pc %08X vaddr %08X): Bitmask %08X %08X Addr %u Data %u Size %u Flags %02X %02X",
is_load ? "load" : "store", (void*)code_address, code_size, guest_pc, guest_addr, gpr_bitmask, fpr_bitmask,
address_register, data_register, size_in_bits, is_signed, is_load);
u8* thunk = recBeginThunk();
// save regs
u32 num_gprs = 0;
u32 num_fprs = 0;
for (u32 i = 0; i < iREGCNT_GPR; i++)
{
if ((gpr_bitmask & (1u << i)) && (i == rbx.GetId() || i == arg1reg.GetId() || i == arg2reg.GetId() || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
num_gprs++;
}
for (u32 i = 0; i < iREGCNT_XMM; i++)
{
if (fpr_bitmask & (1u << i) && xRegisterSSE::IsCallerSaved(i) && (!is_load || !is_xmm || data_register != i))
num_fprs++;
}
const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE) + (num_fprs * XMM_SIZE) + SHADOW_SIZE;
const u32 arg1id = static_cast<u32>(arg1reg.GetId());
const u32 arg2id = static_cast<u32>(arg2reg.GetId());
const u32 arg3id = static_cast<u32>(arg3reg.GetId());
if (stack_size > 0)
{
xSUB(rsp, stack_size);
u32 stack_offset = SHADOW_SIZE;
for (u32 i = 0; i < iREGCNT_XMM; i++)
{
if (fpr_bitmask & (1u << i) && xRegisterSSE::IsCallerSaved(i) && (!is_load || !is_xmm || data_register != i))
{
xMOVAPS(ptr128[rsp + stack_offset], xRegisterSSE(i));
stack_offset += XMM_SIZE;
}
}
for (u32 i = 0; i < iREGCNT_GPR; i++)
{
if ((gpr_bitmask & (1u << i)) && (i == arg1id || i == arg2id || i == arg3id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
{
xMOV(ptr64[rsp + stack_offset], xRegister64(i));
stack_offset += GPR_SIZE;
}
}
}
if (is_load)
{
DynGen_PrepRegs(address_register, -1, size_in_bits, is_xmm);
DynGen_HandlerTest([size_in_bits, is_signed]() {DynGen_DirectRead(size_in_bits, is_signed); }, 0, size_in_bits, is_signed && size_in_bits <= 32);
if (size_in_bits == 128)
{
if (data_register != xmm0.GetId())
xMOVAPS(xRegisterSSE(data_register), xmm0);
}
else
{
if (is_xmm)
{
xMOVDZX(xRegisterSSE(data_register), rax);
}
else
{
if (data_register != eax.GetId())
xMOV(xRegister64(data_register), rax);
}
}
}
else
{
if (address_register != arg1reg.GetId())
xMOV(arg1regd, xRegister32(address_register));
if (size_in_bits == 128)
{
const xRegisterSSE argreg(xRegisterSSE::GetArgRegister(1, 0));
if (data_register != argreg.GetId())
xMOVAPS(argreg, xRegisterSSE(data_register));
}
else
{
if (is_xmm)
{
xMOVD(arg2reg, xRegisterSSE(data_register));
}
else
{
if (data_register != arg2reg.GetId())
xMOV(arg2reg, xRegister64(data_register));
}
}
DynGen_PrepRegs(address_register, data_register, size_in_bits, is_xmm);
DynGen_HandlerTest([size_in_bits]() { DynGen_DirectWrite(size_in_bits); }, 1, size_in_bits);
}
// restore regs
if (stack_size > 0)
{
u32 stack_offset = SHADOW_SIZE;
for (u32 i = 0; i < iREGCNT_XMM; i++)
{
if (fpr_bitmask & (1u << i) && xRegisterSSE::IsCallerSaved(i) && (!is_load || !is_xmm || data_register != i))
{
xMOVAPS(xRegisterSSE(i), ptr128[rsp + stack_offset]);
stack_offset += XMM_SIZE;
}
}
for (u32 i = 0; i < iREGCNT_GPR; i++)
{
if ((gpr_bitmask & (1u << i)) && (i == arg1id || i == arg2id || i == arg3id || xRegisterBase::IsCallerSaved(i)) && (!is_load || is_xmm || data_register != i))
{
xMOV(xRegister64(i), ptr64[rsp + stack_offset]);
stack_offset += GPR_SIZE;
}
}
xADD(rsp, stack_size);
}
xJMP((void*)(code_address + code_size));
recEndThunk();
// backpatch to a jump to the slowmem handler
x86Ptr = (u8*)code_address;
xJMP(thunk);
// fill the rest of it with nops, if any
pxAssertRel(static_cast<u32>((uptr)x86Ptr - code_address) <= code_size, "Overflowed when backpatching");
for (u32 i = static_cast<u32>((uptr)x86Ptr - code_address); i < code_size; i++)
xNOP();
}

View File

@ -125,6 +125,7 @@ void mVUDTendProgram(mV, microFlagCycles* mFC, int isEbit)
xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1); xMOVAPS(ptr128[&mVU.regs().micro_statusflags], xmmT1);
} }
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
@ -251,6 +252,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
if ((isEbit && isEbit != 3)) // Clear 'is busy' Flags if ((isEbit && isEbit != 3)) // Clear 'is busy' Flags
{ {
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
if (!mVU.index || !THREAD_VU1) if (!mVU.index || !THREAD_VU1)
{ {
@ -259,6 +261,7 @@ void mVUendProgram(mV, microFlagCycles* mFC, int isEbit)
} }
else if(isEbit) else if(isEbit)
{ {
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
} }

View File

@ -484,7 +484,9 @@ void mVUtestCycles(microVU& mVU, microFlagCycles& mFC)
xForwardJGE32 skip; xForwardJGE32 skip;
mVUsavePipelineState(mVU); mVUsavePipelineState(mVU);
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles); xMOV(ptr32[&mVU.regs().nextBlockCycles], mVUcycles);
mVUendProgram(mVU, &mFC, 0); mVUendProgram(mVU, &mFC, 0);
skip.SetTarget(); skip.SetTarget();
@ -801,6 +803,7 @@ void* mVUcompile(microVU& mVU, u32 startPC, uptr pState)
} }
incPC(2); incPC(2);
mVUsetupRange(mVU, xPC, false); mVUsetupRange(mVU, xPC, false);
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
xMOV(ptr32[&mVU.regs().nextBlockCycles], 0); xMOV(ptr32[&mVU.regs().nextBlockCycles], 0);
mVUendProgram(mVU, &mFC, 0); mVUendProgram(mVU, &mFC, 0);
normBranchCompile(mVU, xPC); normBranchCompile(mVU, xPC);

View File

@ -215,6 +215,9 @@ struct microIR
// Reg Alloc // Reg Alloc
//------------------------------------------------------------------ //------------------------------------------------------------------
//#define MVURALOG(...) fprintf(stderr, __VA_ARGS__)
#define MVURALOG(...)
struct microMapXMM struct microMapXMM
{ {
int VFreg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg) int VFreg; // VF Reg Number Stored (-1 = Temp; 0 = vf0 and will not be written back; 32 = ACC; 33 = I reg)
@ -231,6 +234,13 @@ protected:
microMapXMM xmmMap[xmmTotal]; microMapXMM xmmMap[xmmTotal];
int counter; // Current allocation count int counter; // Current allocation count
int index; // VU0 or VU1 int index; // VU0 or VU1
// DO NOT REMOVE THIS.
// This is here for a reason. MSVC likes to turn global writes into a load+conditional move+store.
// That creates a race with the EE thread when we're compiling on the VU thread, even though
// regAllocCOP2 is false. By adding another level of indirection, it emits a branch instead.
_xmmregs* pxmmregs;
bool regAllocCOP2; // Local COP2 check bool regAllocCOP2; // Local COP2 check
// Helper functions to get VU regs // Helper functions to get VU regs
@ -260,11 +270,11 @@ protected:
return -1; return -1;
} }
int findFreeReg() int findFreeReg(int vfreg)
{ {
if (regAllocCOP2) if (regAllocCOP2)
{ {
return _freeXMMregsCOP2(); return _allocVFtoXMMreg(vfreg, 0);
} }
for (int i = 0; i < xmmTotal; i++) for (int i = 0; i < xmmTotal; i++)
@ -289,12 +299,38 @@ public:
// Fully resets the regalloc by clearing all cached data // Fully resets the regalloc by clearing all cached data
void reset(bool cop2mode) void reset(bool cop2mode)
{ {
// we run this at the of cop2, so don't free fprs
regAllocCOP2 = false;
for (int i = 0; i < xmmTotal; i++) for (int i = 0; i < xmmTotal; i++)
{ {
clearReg(i); clearReg(i);
} }
counter = 0; counter = 0;
regAllocCOP2 = cop2mode; regAllocCOP2 = cop2mode;
pxmmregs = cop2mode ? xmmregs : nullptr;
if (cop2mode)
{
for (int i = 0; i < xmmTotal; i++)
{
if (!pxmmregs[i].inuse || pxmmregs[i].type != XMMTYPE_VFREG)
continue;
// we shouldn't have any temp registers in here.. except for PQ, which
// isn't allocated here yet.
// pxAssertRel(fprregs[i].reg >= 0, "Valid full register preserved");
if (pxmmregs[i].reg >= 0)
{
MVURALOG("Preserving VF reg %d in host reg %d across instruction\n", pxmmregs[i].reg, i);
pxAssert(pxmmregs[i].reg != 255);
pxmmregs[i].needed = false;
xmmMap[i].isNeeded = false;
xmmMap[i].VFreg = pxmmregs[i].reg;
xmmMap[i].xyzw = ((pxmmregs[i].mode & MODE_WRITE) != 0) ? 0xf : 0x0;
}
}
}
} }
int getXmmCount() int getXmmCount()
@ -314,6 +350,35 @@ public:
} }
} }
void flushPartialForCOP2()
{
for (int i = 0; i < xmmTotal; i++)
{
microMapXMM& clear = xmmMap[i];
// toss away anything which is not a full cached register
if (pxmmregs[i].inuse && pxmmregs[i].type == XMMTYPE_VFREG)
{
// Should've been done in clearNeeded()
if (clear.xyzw != 0 && clear.xyzw != 0xf)
writeBackReg(xRegisterSSE::GetInstance(i), false);
if (clear.VFreg <= 0)
{
// temps really shouldn't be here..
_freeXMMreg(i);
}
}
// needed gets cleared in iCore.
clear.VFreg = -1;
clear.count = 0;
clear.xyzw = 0;
clear.isNeeded = 0;
clear.isZero = 0;
}
}
void TDwritebackAll(bool clearState = false) void TDwritebackAll(bool clearState = false)
{ {
for (int i = 0; i < xmmTotal; i++) for (int i = 0; i < xmmTotal; i++)
@ -352,6 +417,12 @@ public:
void clearReg(int regId) void clearReg(int regId)
{ {
microMapXMM& clear = xmmMap[regId]; microMapXMM& clear = xmmMap[regId];
if (regAllocCOP2)
{
pxAssert(pxmmregs[regId].type == XMMTYPE_VFREG);
pxmmregs[regId].inuse = false;
}
clear.VFreg = -1; clear.VFreg = -1;
clear.count = 0; clear.count = 0;
clear.xyzw = 0; clear.xyzw = 0;
@ -368,6 +439,24 @@ public:
} }
} }
void clearRegCOP2(int xmmReg)
{
if (regAllocCOP2)
clearReg(xmmReg);
}
void updateCOP2AllocState(int rn)
{
if (!regAllocCOP2)
return;
const bool dirty = (xmmMap[rn].VFreg > 0 && xmmMap[rn].xyzw != 0);
pxAssert(pxmmregs[rn].type == XMMTYPE_VFREG);
pxmmregs[rn].reg = xmmMap[rn].VFreg;
pxmmregs[rn].mode = dirty ? (MODE_READ | MODE_WRITE) : MODE_READ;
pxmmregs[rn].needed = xmmMap[rn].isNeeded;
}
// Writes back modified reg to memory. // Writes back modified reg to memory.
// If all vectors modified, then keeps the VF reg cached in the xmm register. // If all vectors modified, then keeps the VF reg cached in the xmm register.
// If reg was not modified, then keeps the VF reg cached in the xmm register. // If reg was not modified, then keeps the VF reg cached in the xmm register.
@ -406,6 +495,7 @@ public:
mapX.count = counter; mapX.count = counter;
mapX.xyzw = 0; mapX.xyzw = 0;
mapX.isNeeded = false; mapX.isNeeded = false;
updateCOP2AllocState(reg.Id);
return; return;
} }
clearReg(reg); clearReg(reg);
@ -453,6 +543,7 @@ public:
mapI.xyzw = 0xf; mapI.xyzw = 0xf;
mapI.count = counter; mapI.count = counter;
mergeRegs = 2; mergeRegs = 2;
updateCOP2AllocState(i);
} }
else else
clearReg(i); // Clears when mergeRegs is 0 or 2 clearReg(i); // Clears when mergeRegs is 0 or 2
@ -466,6 +557,12 @@ public:
else else
clearReg(reg); // If Reg was temp or vf0, then invalidate itself clearReg(reg); // If Reg was temp or vf0, then invalidate itself
} }
else if (regAllocCOP2 && clear.VFreg < 0)
{
// free on the EE side
pxAssert(pxmmregs[reg.Id].type == XMMTYPE_VFREG);
pxmmregs[reg.Id].inuse = false;
}
} }
// vfLoadReg = VF reg to be loaded to the xmm register // vfLoadReg = VF reg to be loaded to the xmm register
@ -495,7 +592,7 @@ public:
{ {
if (cloneWrite) // Clone Reg so as not to use the same Cached Reg if (cloneWrite) // Clone Reg so as not to use the same Cached Reg
{ {
z = findFreeReg(); z = findFreeReg(vfWriteReg);
const xmm& xmmZ = xmm::GetInstance(z); const xmm& xmmZ = xmm::GetInstance(z);
writeBackReg(xmmZ); writeBackReg(xmmZ);
@ -528,11 +625,13 @@ public:
} }
xmmMap[z].count = counter; xmmMap[z].count = counter;
xmmMap[z].isNeeded = true; xmmMap[z].isNeeded = true;
updateCOP2AllocState(z);
return xmm::GetInstance(z); return xmm::GetInstance(z);
} }
} }
} }
int x = findFreeReg(); int x = findFreeReg((vfWriteReg >= 0) ? vfWriteReg : vfLoadReg);
const xmm& xmmX = xmm::GetInstance(x); const xmm& xmmX = xmm::GetInstance(x);
writeBackReg(xmmX); writeBackReg(xmmX);
@ -565,6 +664,7 @@ public:
xmmMap[x].isZero = (vfLoadReg == 0); xmmMap[x].isZero = (vfLoadReg == 0);
xmmMap[x].count = counter; xmmMap[x].count = counter;
xmmMap[x].isNeeded = true; xmmMap[x].isNeeded = true;
updateCOP2AllocState(x);
return xmmX; return xmmX;
} }
}; };

View File

@ -28,6 +28,10 @@ using namespace R5900::Dynarec;
#define printCOP2(...) (void)0 #define printCOP2(...) (void)0
//#define printCOP2 DevCon.Status //#define printCOP2 DevCon.Status
// For now, we need to free all XMMs. Because we're not saving the nonvolatile registers when
// we enter micro mode, they will get overriden otherwise...
#define FLUSH_FOR_POSSIBLE_MICRO_EXEC (FLUSH_FREE_XMM | FLUSH_FREE_VU0)
void setupMacroOp(int mode, const char* opName) void setupMacroOp(int mode, const char* opName)
{ {
// Set up reg allocation // Set up reg allocation
@ -96,8 +100,7 @@ void endMacroOp(int mode)
xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ); xMOVSS(ptr32[&vu0Regs.VI[REG_Q].UL], xmmPQ);
} }
microVU0.regAlloc->flushAll(); microVU0.regAlloc->flushPartialForCOP2();
_clearNeededCOP2Regs();
if (mode & 0x10) if (mode & 0x10)
{ {
@ -119,6 +122,11 @@ void endMacroOp(int mode)
microVU0.regAlloc->reset(false); microVU0.regAlloc->reset(false);
} }
void mVUFreeCOP2XMMreg(int hostreg)
{
microVU0.regAlloc->clearRegCOP2(hostreg);
}
#define REC_COP2_mVU0(f, opName, mode) \ #define REC_COP2_mVU0(f, opName, mode) \
void recV##f() \ void recV##f() \
{ \ { \
@ -142,13 +150,9 @@ void endMacroOp(int mode)
#define INTERPRETATE_COP2_FUNC(f) \ #define INTERPRETATE_COP2_FUNC(f) \
void recV##f() \ void recV##f() \
{ \ { \
_freeX86reg(eax); \ iFlushCall(FLUSH_FOR_POSSIBLE_MICRO_EXEC); \
xMOV(eax, ptr32[&cpuRegs.cycle]); \ xADD(ptr32[&cpuRegs.cycle], scaleblockcycles_clear()); \
xADD(eax, scaleblockcycles_clear()); \
xMOV(ptr32[&cpuRegs.cycle], eax); \
_cop2BackupRegs(); \
recCall(V##f); \ recCall(V##f); \
_cop2RestoreRegs(); \
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -303,13 +307,15 @@ INTERPRETATE_COP2_FUNC(CALLMSR);
// Macro VU - Branches // Macro VU - Branches
//------------------------------------------------------------------ //------------------------------------------------------------------
void _setupBranchTest(u32*(jmpType)(u32), bool isLikely) static void _setupBranchTest(u32*(jmpType)(u32), bool isLikely)
{ {
printCOP2("COP2 Branch"); printCOP2("COP2 Branch");
_eeFlushAllUnused(); const u32 branchTo = ((s32)_Imm_ * 4) + pc;
const bool swap = isLikely ? false : TrySwapDelaySlot(0, 0, 0);
_eeFlushAllDirty();
//xTEST(ptr32[&vif1Regs.stat._u32], 0x4); //xTEST(ptr32[&vif1Regs.stat._u32], 0x4);
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x100); xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x100);
recDoBranchImm(jmpType(0), isLikely); recDoBranchImm(branchTo, jmpType(0), isLikely, swap);
} }
void recBC2F() { _setupBranchTest(JNZ32, false); } void recBC2F() { _setupBranchTest(JNZ32, false); }
@ -321,7 +327,7 @@ void recBC2TL() { _setupBranchTest(JZ32, true); }
// Macro VU - COP2 Transfer Instructions // Macro VU - COP2 Transfer Instructions
//------------------------------------------------------------------ //------------------------------------------------------------------
void COP2_Interlock(bool mBitSync) static void COP2_Interlock(bool mBitSync)
{ {
if (cpuRegs.code & 1) if (cpuRegs.code & 1)
{ {
@ -329,8 +335,9 @@ void COP2_Interlock(bool mBitSync)
// We can safely skip the _vu0FinishMicro() call, when there's nothing // We can safely skip the _vu0FinishMicro() call, when there's nothing
// that can trigger a VU0 program between CFC2/CTC2/COP2 instructions. // that can trigger a VU0 program between CFC2/CTC2/COP2 instructions.
if ((g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0_MICRO) || mBitSync) if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
{ {
iFlushCall(FLUSH_FOR_POSSIBLE_MICRO_EXEC);
_freeX86reg(eax); _freeX86reg(eax);
xMOV(eax, ptr32[&cpuRegs.cycle]); xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear()); xADD(eax, scaleblockcycles_clear());
@ -338,10 +345,14 @@ void COP2_Interlock(bool mBitSync)
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1); xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle; xForwardJZ32 skipvuidle;
_cop2BackupRegs();
if (mBitSync) if (mBitSync)
{ {
xSUB(eax, ptr32[&VU0.cycle]); xSUB(eax, ptr32[&VU0.cycle]);
// Why do we check this here? Ratchet games, maybe others end up with flickering polygons
// when we use lazy COP2 sync, otherwise. The micro resumption getting deferred an extra
// EE block is apparently enough to cause issues.
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
xSUB(eax, ptr32[&VU0.nextBlockCycles]); xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 4); xCMP(eax, 4);
xForwardJL32 skip; xForwardJL32 skip;
@ -354,18 +365,47 @@ void COP2_Interlock(bool mBitSync)
} }
else else
xFastCall((void*)_vu0FinishMicro); xFastCall((void*)_vu0FinishMicro);
_cop2RestoreRegs();
skipvuidle.SetTarget(); skipvuidle.SetTarget();
} }
} }
} }
void TEST_FBRST_RESET(FnType_Void* resetFunct, int vuIndex) static void mVUSyncVU0()
{ {
xTEST(eax, (vuIndex) ? 0x200 : 0x002); iFlushCall(FLUSH_FOR_POSSIBLE_MICRO_EXEC);
_freeX86reg(eax);
xMOV(eax, ptr32[&cpuRegs.cycle]);
xADD(eax, scaleblockcycles_clear());
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&VU0.cycle]);
if (EmuConfig.Gamefixes.VUSyncHack || EmuConfig.Gamefixes.FullVU0SyncHack)
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 4);
xForwardJL32 skip;
xLoadFarAddr(arg1reg, CpuVU0);
xMOV(arg2reg, s_nBlockInterlocked);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
skip.SetTarget();
skipvuidle.SetTarget();
}
static void mVUFinishVU0()
{
iFlushCall(FLUSH_FOR_POSSIBLE_MICRO_EXEC);
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xFastCall((void*)_vu0FinishMicro);
skipvuidle.SetTarget();
}
static void TEST_FBRST_RESET(int flagreg, FnType_Void* resetFunct, int vuIndex)
{
xTEST(xRegister32(flagreg), (vuIndex) ? 0x200 : 0x002);
xForwardJZ8 skip; xForwardJZ8 skip;
xFastCall((void*)resetFunct); xFastCall((void*)resetFunct);
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
skip.SetTarget(); skip.SetTarget();
} }
@ -380,43 +420,20 @@ static void recCFC2()
if (!(cpuRegs.code & 1)) if (!(cpuRegs.code & 1))
{ {
_freeX86reg(eax); if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
xMOV(eax, ptr32[&cpuRegs.cycle]); mVUSyncVU0();
xADD(eax, scaleblockcycles_clear()); else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles mVUFinishVU0();
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&VU0.cycle]);
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 4);
xForwardJL32 skip;
_cop2BackupRegs();
xLoadFarAddr(arg1reg, CpuVU0);
xMOV(arg2reg, s_nBlockInterlocked);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
_cop2RestoreRegs();
skip.SetTarget();
skipvuidle.SetTarget();
} }
_flushEEreg(_Rt_, true); const int regt = _allocX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE);
pxAssert(!GPR_IS_CONST1(_Rt_));
if (_Rd_ == REG_STATUS_FLAG) // Normalize Status Flag
xMOV(eax, ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL]);
else
xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]);
// FixMe: Should R-Reg have upper 9 bits 0? // FixMe: Should R-Reg have upper 9 bits 0?
if (_Rd_ >= 16) if (_Rd_ >= REG_STATUS_FLAG)
xCDQE(); // Sign Extend xMOVSX(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
else
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); xMOV(xRegister64(regt), ptr32[&vu0Regs.VI[_Rd_].UL]);
// FixMe: I think this is needed, but not sure how it works
// Update Refraction 20/09/2021: This is needed because Const Prop is broken
// the Flushed flag isn't being cleared when it's not flushed. TODO I guess
_eeOnWriteReg(_Rt_, 0);
} }
static void recCTC2() static void recCTC2()
@ -430,28 +447,12 @@ static void recCTC2()
if (!(cpuRegs.code & 1)) if (!(cpuRegs.code & 1))
{ {
_freeX86reg(eax); if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
xMOV(eax, ptr32[&cpuRegs.cycle]); mVUSyncVU0();
xADD(eax, scaleblockcycles_clear()); else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles mVUFinishVU0();
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&VU0.cycle]);
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 4);
xForwardJL32 skip;
_cop2BackupRegs();
xLoadFarAddr(arg1reg, CpuVU0);
xMOV(arg2reg, s_nBlockInterlocked);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
_cop2RestoreRegs();
skip.SetTarget();
skipvuidle.SetTarget();
} }
_flushEEreg(_Rt_);
switch (_Rd_) switch (_Rd_)
{ {
case REG_MAC_FLAG: case REG_MAC_FLAG:
@ -459,7 +460,7 @@ static void recCTC2()
case REG_VPU_STAT: case REG_VPU_STAT:
break; // Read Only Regs break; // Read Only Regs
case REG_R: case REG_R:
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); _eeMoveGPRtoR(eax, _Rt_);
xAND(eax, 0x7FFFFF); xAND(eax, 0x7FFFFF);
xOR(eax, 0x3f800000); xOR(eax, 0x3f800000);
xMOV(ptr32[&vu0Regs.VI[REG_R].UL], eax); xMOV(ptr32[&vu0Regs.VI[REG_R].UL], eax);
@ -468,7 +469,7 @@ static void recCTC2()
{ {
if (_Rt_) if (_Rt_)
{ {
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); _eeMoveGPRtoR(eax, _Rt_);
xAND(eax, 0xFC0); xAND(eax, 0xFC0);
xAND(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], 0x3F); xAND(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], 0x3F);
xOR(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax); xOR(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], eax);
@ -476,42 +477,44 @@ static void recCTC2()
else else
xAND(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], 0x3F); xAND(ptr32[&vu0Regs.VI[REG_STATUS_FLAG].UL], 0x3F);
_freeXMMreg(xmmT1.Id); const int xmmtemp = _allocTempXMMreg(XMMT_INT);
//Need to update the sticky flags for microVU //Need to update the sticky flags for microVU
mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL); mVUallocSFLAGd(&vu0Regs.VI[REG_STATUS_FLAG].UL);
xMOVDZX(xmmT1, eax); xMOVDZX(xRegisterSSE(xmmtemp), eax); // TODO(Stenzek): This can be a broadcast.
xSHUF.PS(xmmT1, xmmT1, 0); xSHUF.PS(xRegisterSSE(xmmtemp), xRegisterSSE(xmmtemp), 0);
// Make sure the values are everywhere the need to be // Make sure the values are everywhere the need to be
xMOVAPS(ptr128[&vu0Regs.micro_statusflags], xmmT1); xMOVAPS(ptr128[&vu0Regs.micro_statusflags], xRegisterSSE(xmmtemp));
_freeXMMreg(xmmtemp);
break; break;
} }
case REG_CMSAR1: // Execute VU1 Micro SubRoutine case REG_CMSAR1: // Execute VU1 Micro SubRoutine
_cop2BackupRegs(); iFlushCall(FLUSH_NONE);
xMOV(ecx, 1); xMOV(arg1regd, 1);
xFastCall((void*)vu1Finish, ecx); xFastCall((void*)vu1Finish);
if (_Rt_) _eeMoveGPRtoR(arg1regd, _Rt_);
{ iFlushCall(FLUSH_NONE);
xMOV(ecx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); xFastCall((void*)vu1ExecMicro);
}
else
xXOR(ecx, ecx);
xFastCall((void*)vu1ExecMicro, ecx);
_cop2RestoreRegs();
break; break;
case REG_FBRST: case REG_FBRST:
{
if (!_Rt_) if (!_Rt_)
{ {
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], 0); xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], 0);
return; return;
} }
else
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); const int flagreg = _allocX86reg(X86TYPE_TEMP, 0, MODE_CALLEESAVED);
_cop2BackupRegs(); _eeMoveGPRtoR(xRegister32(flagreg), _Rt_);
TEST_FBRST_RESET(vu0ResetRegs, 0);
TEST_FBRST_RESET(vu1ResetRegs, 1); iFlushCall(FLUSH_FREE_VU0);
_cop2RestoreRegs(); TEST_FBRST_RESET(flagreg, vu0ResetRegs, 0);
xAND(eax, 0x0C0C); TEST_FBRST_RESET(flagreg, vu1ResetRegs, 1);
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], eax);
xAND(xRegister32(flagreg), 0x0C0C);
xMOV(ptr32[&vu0Regs.VI[REG_FBRST].UL], xRegister32(flagreg));
_freeX86reg(flagreg);
}
break; break;
case 0: case 0:
// Ignore writes to vi00. // Ignore writes to vi00.
@ -521,6 +524,14 @@ static void recCTC2()
// sVU's COP2 has a comment that "Donald Duck" needs this too... // sVU's COP2 has a comment that "Donald Duck" needs this too...
if (_Rd_ < REG_STATUS_FLAG) if (_Rd_ < REG_STATUS_FLAG)
{ {
// I isn't invalidated correctly yet, ideally we would move this to the XMM directly.
if (_Rd_ == REG_I)
{
const int xmmreg = _checkXMMreg(XMMTYPE_VFREG, 33, 0);
if (xmmreg >= 0)
_freeXMMregWithoutWriteback(xmmreg);
}
// Need to expand this out, because we want to write as 16 bits. // Need to expand this out, because we want to write as 16 bits.
_eeMoveGPRtoR(eax, _Rt_); _eeMoveGPRtoR(eax, _Rt_);
xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax); xMOV(ptr16[&vu0Regs.VI[_Rd_].US[0]], ax);
@ -545,32 +556,36 @@ static void recQMFC2()
if (!(cpuRegs.code & 1)) if (!(cpuRegs.code & 1))
{ {
_freeX86reg(eax); if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
xMOV(eax, ptr32[&cpuRegs.cycle]); mVUSyncVU0();
xADD(eax, scaleblockcycles_clear()); else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles mVUFinishVU0();
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&VU0.cycle]);
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 4);
xForwardJL32 skip;
_cop2BackupRegs();
xLoadFarAddr(arg1reg, CpuVU0);
xMOV(arg2reg, s_nBlockInterlocked);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
_cop2RestoreRegs();
skip.SetTarget();
skipvuidle.SetTarget();
} }
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE); const bool vf_used = COP2INST_USEDTEST(_Rd_);
// Update Refraction 20/09/2021: This is needed because Const Prop is broken const int ftreg = _allocVFtoXMMreg(_Rd_, MODE_READ);
// the Flushed flag isn't being cleared when it's not flushed. TODO I guess _deleteEEreg128(_Rt_);
_eeOnWriteReg(_Rt_, 0); // This is needed because Const Prop is broken
xMOVAPS(xRegisterSSE(rtreg), ptr128[&vu0Regs.VF[_Rd_]]); // const flag should've been cleared, but sanity check..
pxAssert(!GPR_IS_CONST1(_Rt_));
if (vf_used)
{
// store direct to state if rt is not used
const int rtreg = _allocIfUsedGPRtoXMM(_Rt_, MODE_WRITE);
if (rtreg >= 0)
xMOVAPS(xRegisterSSE(rtreg), xRegisterSSE(ftreg));
else
xMOVAPS(ptr128[&cpuRegs.GPR.r[_Rt_].UQ], xRegisterSSE(ftreg));
// don't cache vf00, microvu doesn't like it
if (_Rd_ == 0)
_freeXMMreg(ftreg);
}
else
{
_reallocateXMMreg(ftreg, XMMTYPE_GPRREG, _Rt_, MODE_WRITE, true);
}
} }
static void recQMTC2() static void recQMTC2()
@ -583,29 +598,46 @@ static void recQMTC2()
if (!(cpuRegs.code & 1)) if (!(cpuRegs.code & 1))
{ {
_freeX86reg(eax); if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
xMOV(eax, ptr32[&cpuRegs.cycle]); mVUSyncVU0();
xADD(eax, scaleblockcycles_clear()); else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
xMOV(ptr32[&cpuRegs.cycle], eax); // update cycles mVUFinishVU0();
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
xSUB(eax, ptr32[&VU0.cycle]);
xSUB(eax, ptr32[&VU0.nextBlockCycles]);
xCMP(eax, 4);
xForwardJL32 skip;
_cop2BackupRegs();
xLoadFarAddr(arg1reg, CpuVU0);
xMOV(arg2reg, s_nBlockInterlocked);
xFastCall((void*)BaseVUmicroCPU::ExecuteBlockJIT, arg1reg, arg2reg);
_cop2RestoreRegs();
skip.SetTarget();
skipvuidle.SetTarget();
} }
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); if (_Rt_)
{
// if we have to flush to memory anyway (has a constant or is x86), force load.
const bool vf_used = COP2INST_USEDTEST(_Rd_);
const bool can_rename = EEINST_RENAMETEST(_Rt_);
const int rtreg = (GPR_IS_DIRTY_CONST(_Rt_) || _hasX86reg(X86TYPE_GPR, _Rt_, MODE_WRITE)) ?
_allocGPRtoXMMreg(_Rt_, MODE_READ) :
_checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ);
xMOVAPS(ptr128[&vu0Regs.VF[_Rd_]], xRegisterSSE(rtreg)); // NOTE: can't transfer xmm15 to VF, it's reserved for PQ.
int vfreg = _checkXMMreg(XMMTYPE_VFREG, _Rd_, MODE_WRITE);
if (can_rename && rtreg >= 0 && rtreg != xmmPQ.GetId())
{
// rt is no longer needed, so transfer to VF.
if (vfreg >= 0)
_freeXMMregWithoutWriteback(vfreg);
_reallocateXMMreg(rtreg, XMMTYPE_VFREG, _Rd_, MODE_WRITE, true);
}
else
{
// copy to VF.
if (vfreg < 0)
vfreg = _allocVFtoXMMreg(_Rd_, MODE_WRITE);
if (rtreg >= 0)
xMOVAPS(xRegisterSSE(vfreg), xRegisterSSE(rtreg));
else
xMOVAPS(xRegisterSSE(vfreg), ptr128[&cpuRegs.GPR.r[_Rt_].UQ]);
}
}
else
{
const int vfreg = _allocVFtoXMMreg(_Rd_, MODE_WRITE);
xPXOR(xRegisterSSE(vfreg), xRegisterSSE(vfreg));
}
} }
//------------------------------------------------------------------ //------------------------------------------------------------------
@ -669,22 +701,102 @@ void (*recCOP2SPECIAL2t[128])() = {
namespace R5900 { namespace R5900 {
namespace Dynarec { namespace Dynarec {
namespace OpcodeImpl { namespace OpcodeImpl {
void recCOP2() { recCOP2t[_Rs_](); } void recCOP2() { recCOP2t[_Rs_](); }
#if defined(LOADSTORE_RECOMPILE) && defined(CP2_RECOMPILE)
/*********************************************************
* Load and store for COP2 (VU0 unit) *
* Format: OP rt, offset(base) *
*********************************************************/
void recLQC2()
{
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
mVUSyncVU0();
else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
mVUFinishVU0();
vtlb_ReadRegAllocCallback alloc_cb = nullptr;
if (_Rt_)
{
// init regalloc after flush
alloc_cb = []() { return _allocVFtoXMMreg(_Rt_, MODE_WRITE); };
}
int xmmreg;
if (GPR_IS_CONST1(_Rs_))
{
const u32 addr = (g_cpuConstRegs[_Rs_].UL[0] + _Imm_) & ~0xFu;
xmmreg = vtlb_DynGenReadQuad_Const(128, addr, alloc_cb);
}
else
{
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(arg1regd, _Imm_);
xAND(arg1regd, ~0xF);
xmmreg = vtlb_DynGenReadQuad(128, arg1regd.GetId(), alloc_cb);
}
// toss away if loading to vf00
if (!_Rt_)
_freeXMMreg(xmmreg);
EE::Profiler.EmitOp(eeOpcode::LQC2);
}
////////////////////////////////////////////////////
void recSQC2()
{
if (g_pCurInstInfo->info & EEINST_COP2_SYNC_VU0)
mVUSyncVU0();
else if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0)
mVUFinishVU0();
// vf00 has to be special cased here, because of the microvu temps...
const int ftreg = _Rt_ ? _allocVFtoXMMreg(_Rt_, MODE_READ) : _allocTempXMMreg(XMMT_FPS);
if (!_Rt_)
xMOVAPS(xRegisterSSE(ftreg), ptr128[&vu0Regs.VF[0].F]);
if (GPR_IS_CONST1(_Rs_))
{
const u32 addr = (g_cpuConstRegs[_Rs_].UL[0] + _Imm_) & ~0xFu;
vtlb_DynGenWrite_Const(128, true, addr, ftreg);
}
else
{
_eeMoveGPRtoR(arg1regd, _Rs_);
if (_Imm_ != 0)
xADD(arg1regd, _Imm_);
xAND(arg1regd, ~0xF);
vtlb_DynGenWrite(128, true, arg1regd.GetId(), ftreg);
}
if (!_Rt_)
_freeXMMreg(ftreg);
EE::Profiler.EmitOp(eeOpcode::SQC2);
}
#else
REC_FUNC(LQC2);
REC_FUNC(SQC2);
#endif
} // namespace OpcodeImpl } // namespace OpcodeImpl
} // namespace Dynarec } // namespace Dynarec
} // namespace R5900 } // namespace R5900
void recCOP2_BC2() { recCOP2_BC2t[_Rt_](); } void recCOP2_BC2() { recCOP2_BC2t[_Rt_](); }
void recCOP2_SPEC1() void recCOP2_SPEC1()
{ {
if (g_pCurInstInfo->info & EEINST_COP2_FINISH_VU0_MICRO) if (g_pCurInstInfo->info & (EEINST_COP2_SYNC_VU0 | EEINST_COP2_FINISH_VU0))
{ mVUFinishVU0();
xTEST(ptr32[&VU0.VI[REG_VPU_STAT].UL], 0x1);
xForwardJZ32 skipvuidle;
_cop2BackupRegs();
xFastCall((void*)_vu0FinishMicro);
_cop2RestoreRegs();
skipvuidle.SetTarget();
}
recCOP2SPECIAL1t[_Funct_](); recCOP2SPECIAL1t[_Funct_]();