vtlb: Switch read64 and read128 handlers to return in sse regs

This commit is contained in:
TellowKrinkle 2021-09-20 01:10:59 -05:00 committed by refractionpcsx2
parent 7563f54e83
commit e9518f78c7
17 changed files with 324 additions and 181 deletions

View File

@ -151,6 +151,7 @@ static const int __pagesize = PCSX2_PAGESIZE;
#ifndef __fastcall #ifndef __fastcall
#define __fastcall __attribute__((fastcall)) #define __fastcall __attribute__((fastcall))
#endif #endif
#define __vectorcall __fastcall
#define _inline __inline__ __attribute__((unused)) #define _inline __inline__ __attribute__((unused))
#ifdef NDEBUG #ifdef NDEBUG
#define __forceinline __attribute__((always_inline, unused)) #define __forceinline __attribute__((always_inline, unused))

View File

@ -215,6 +215,7 @@ set(pcsx2Headers
SaveState.h SaveState.h
Sifcmd.h Sifcmd.h
Sif.h Sif.h
SingleRegisterTypes.h
Sio.h Sio.h
sio_internal.h sio_internal.h
SPR.h SPR.h

View File

@ -212,17 +212,26 @@ static int getFreeCache(u32 mem, int* way)
return setIdx; return setIdx;
} }
template <bool Write, int Bytes>
void* prepareCacheAccess(u32 mem, int* way, int* idx)
{
*way = 0;
*idx = getFreeCache(mem, way);
CacheLine line = cache.lineAt(*idx, *way);
if (Write)
line.tag.setDirty();
u32 aligned = mem & ~(Bytes - 1);
return &line.data.bytes[aligned & 0x3f];
}
template <typename Int> template <typename Int>
void writeCache(u32 mem, Int value) void writeCache(u32 mem, Int value)
{ {
int way = 0; int way, idx;
const int idx = getFreeCache(mem, &way); void* addr = prepareCacheAccess<true, sizeof(Int)>(mem, &way, &idx);
CACHE_LOG("writeCache%d %8.8x adding to %d, way %d, value %llx", 8 * sizeof(value), mem, idx, way, value); CACHE_LOG("writeCache%d %8.8x adding to %d, way %d, value %llx", 8 * sizeof(value), mem, idx, way, value);
CacheLine line = cache.lineAt(idx, way); *reinterpret_cast<Int*>(addr) = value;
line.tag.setDirty(); // Set dirty bit for writes;
u32 aligned = mem & ~(sizeof(value) - 1);
*reinterpret_cast<Int*>(&line.data.bytes[aligned & 0x3f]) = value;
} }
void writeCache8(u32 mem, u8 value) void writeCache8(u32 mem, u8 value)
@ -247,25 +256,20 @@ void writeCache64(u32 mem, const u64 value)
void writeCache128(u32 mem, const mem128_t* value) void writeCache128(u32 mem, const mem128_t* value)
{ {
int way = 0; int way, idx;
const int idx = getFreeCache(mem, &way); void* addr = prepareCacheAccess<true, sizeof(mem128_t)>(mem, &way, &idx);
CACHE_LOG("writeCache128 %8.8x adding to %d, way %x, lo %x, hi %x", mem, idx, way, value->lo, value->hi); CACHE_LOG("writeCache128 %8.8x adding to %d, way %x, lo %llx, hi %llx", mem, idx, way, value->lo, value->hi);
CacheLine line = cache.lineAt(idx, way); *reinterpret_cast<mem128_t*>(addr) = *value;
line.tag.setDirty(); // Set dirty bit for writes;
u32 aligned = mem & ~0xF;
*reinterpret_cast<mem128_t*>(&line.data.bytes[aligned & 0x3f]) = *value;
} }
template <typename Int> template <typename Int>
Int readCache(u32 mem) Int readCache(u32 mem)
{ {
int way = 0; int way, idx;
const int idx = getFreeCache(mem, &way); void* addr = prepareCacheAccess<false, sizeof(Int)>(mem, &way, &idx);
CacheLine line = cache.lineAt(idx, way); Int value = *reinterpret_cast<Int*>(addr);
u32 aligned = mem & ~(sizeof(Int) - 1);
Int value = *reinterpret_cast<Int*>(&line.data.bytes[aligned & 0x3f]);
CACHE_LOG("readCache%d %8.8x from %d, way %d, value %llx", 8 * sizeof(value), mem, idx, way, value); CACHE_LOG("readCache%d %8.8x from %d, way %d, value %llx", 8 * sizeof(value), mem, idx, way, value);
return value; return value;
} }
@ -286,9 +290,23 @@ u32 readCache32(u32 mem)
return readCache<u32>(mem); return readCache<u32>(mem);
} }
u64 readCache64(u32 mem) RETURNS_R64 readCache64(u32 mem)
{ {
return readCache<u64>(mem); int way, idx;
void* addr = prepareCacheAccess<false, sizeof(u64)>(mem, &way, &idx);
r64 value = r64_load(addr);
CACHE_LOG("readCache64 %8.8x from %d, way %d, value %llx", mem, idx, way, *(u64*)&value);
return value;
}
RETURNS_R128 readCache128(u32 mem)
{
int way, idx;
void* addr = prepareCacheAccess<false, sizeof(mem128_t)>(mem, &way, &idx);
r128 value = r128_load(addr);
u64* vptr = reinterpret_cast<u64*>(&value);
CACHE_LOG("readCache128 %8.8x from %d, way %d, lo %llx, hi %llx", mem, idx, way, vptr[0], vptr[1]);
return value;
} }
template <typename Op> template <typename Op>

View File

@ -17,6 +17,7 @@
#define __CACHE_H__ #define __CACHE_H__
#include "Common.h" #include "Common.h"
#include "SingleRegisterTypes.h"
void resetCache(); void resetCache();
void writeCache8(u32 mem, u8 value); void writeCache8(u32 mem, u8 value);
@ -27,6 +28,7 @@ void writeCache128(u32 mem, const mem128_t* value);
u8 readCache8(u32 mem); u8 readCache8(u32 mem);
u16 readCache16(u32 mem); u16 readCache16(u32 mem);
u32 readCache32(u32 mem); u32 readCache32(u32 mem);
u64 readCache64(u32 mem); RETURNS_R64 readCache64(u32 mem);
RETURNS_R128 readCache128(u32 mem);
#endif /* __CACHE_H__ */ #endif /* __CACHE_H__ */

View File

@ -33,7 +33,7 @@ static __fi void IntCHackCheck()
if( diff > 0 ) cpuRegs.cycle = g_nextEventCycle; if( diff > 0 ) cpuRegs.cycle = g_nextEventCycle;
} }
template< uint page > void __fastcall _hwRead128(u32 mem, mem128_t* result ); template< uint page > RETURNS_R128 _hwRead128(u32 mem);
template< uint page, bool intcstathack > template< uint page, bool intcstathack >
mem32_t __fastcall _hwRead32(u32 mem) mem32_t __fastcall _hwRead32(u32 mem)
@ -71,9 +71,8 @@ mem32_t __fastcall _hwRead32(u32 mem)
DevCon.WriteLn( Color_Cyan, "Reading 32-bit FIFO data" ); DevCon.WriteLn( Color_Cyan, "Reading 32-bit FIFO data" );
u128 out128; r128 out128 = _hwRead128<page>(mem & ~0x0f);
_hwRead128<page>(mem & ~0x0f, &out128); return reinterpret_cast<u32*>(&out128)[(mem >> 2) & 0x3];
return out128._u32[(mem >> 2) & 0x3];
} }
break; break;
@ -270,15 +269,14 @@ mem16_t __fastcall hwRead16_page_0F_INTC_HACK(u32 mem)
} }
template< uint page > template< uint page >
static void _hwRead64(u32 mem, mem64_t* result ) static RETURNS_R64 _hwRead64(u32 mem)
{ {
pxAssume( (mem & 0x07) == 0 ); pxAssume( (mem & 0x07) == 0 );
switch (page) switch (page)
{ {
case 0x02: case 0x02:
*result = ipuRead64(mem); return ipuRead64(mem);
return;
case 0x04: case 0x04:
case 0x05: case 0x05:
@ -295,11 +293,9 @@ static void _hwRead64(u32 mem, mem64_t* result )
uint wordpart = (mem >> 3) & 0x1; uint wordpart = (mem >> 3) & 0x1;
DevCon.WriteLn( Color_Cyan, "Reading 64-bit FIFO data (%s 64 bits discarded)", wordpart ? "upper" : "lower" ); DevCon.WriteLn( Color_Cyan, "Reading 64-bit FIFO data (%s 64 bits discarded)", wordpart ? "upper" : "lower" );
u128 out128; r128 full = _hwRead128<page>(mem & ~0x0f);
_hwRead128<page>(mem & ~0x0f, &out128); return r64_load(reinterpret_cast<u64*>(&full) + wordpart);
*result = out128._u64[wordpart];
} }
return;
case 0x0F: case 0x0F:
if ((mem & 0xffffff00) == 0x1000f300) if ((mem & 0xffffff00) == 0x1000f300)
{ {
@ -308,30 +304,33 @@ static void _hwRead64(u32 mem, mem64_t* result )
{ {
ReadFifoSingleWord(); ReadFifoSingleWord();
*result = psHu32(0x1000f3E0); u32 lo = psHu32(0x1000f3E0);
ReadFifoSingleWord(); ReadFifoSingleWord();
*result |= (u64)psHu32(0x1000f3E0) << 32; u32 hi = psHu32(0x1000f3E0);
return r64_from_u32x2(lo, hi);
} }
} }
return;
default: break; default: break;
} }
*result = _hwRead32<page,false>( mem ); return r64_from_u32(_hwRead32<page, false>(mem));
} }
template< uint page > template< uint page >
void __fastcall hwRead64(u32 mem, mem64_t* result ) RETURNS_R64 hwRead64(u32 mem)
{ {
_hwRead64<page>( mem, result ); r64 res = _hwRead64<page>(mem);
eeHwTraceLog( mem, *result, true ); eeHwTraceLog(mem, *(u64*)&res, true);
return res;
} }
template< uint page > template< uint page >
void __fastcall _hwRead128(u32 mem, mem128_t* result ) RETURNS_R128 _hwRead128(u32 mem)
{ {
pxAssume( (mem & 0x0f) == 0 ); pxAssume( (mem & 0x0f) == 0 );
alignas(16) mem128_t result;
// FIFOs are the only "legal" 128 bit registers, so we Handle them first. // FIFOs are the only "legal" 128 bit registers, so we Handle them first.
// All other registers fall back on the 64-bit handler (and from there // All other registers fall back on the 64-bit handler (and from there
// all non-IPU reads fall back to the 32-bit handler). // all non-IPU reads fall back to the 32-bit handler).
@ -339,15 +338,15 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
switch (page) switch (page)
{ {
case 0x05: case 0x05:
ReadFIFO_VIF1( result ); ReadFIFO_VIF1(&result);
break; break;
case 0x07: case 0x07:
if (mem & 0x10) if (mem & 0x10)
ZeroQWC( result ); // IPUin is write-only return r128_zero(); // IPUin is write-only
else else
ReadFIFO_IPUout( result ); ReadFIFO_IPUout(&result);
break; break;
case 0x04: case 0x04:
case 0x06: case 0x06:
@ -355,13 +354,12 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
// [Ps2Confirm] Reads from these FIFOs (and IPUin) do one of the following: // [Ps2Confirm] Reads from these FIFOs (and IPUin) do one of the following:
// return zero, leave contents of the dest register unchanged, or in some // return zero, leave contents of the dest register unchanged, or in some
// indeterminate state. The actual behavior probably isn't important. // indeterminate state. The actual behavior probably isn't important.
ZeroQWC( result ); return r128_zero();
break;
case 0x0F: case 0x0F:
// todo: psx mode: this is new // todo: psx mode: this is new
if (((mem & 0x1FFFFFFF) >= EEMemoryMap::SBUS_PS1_Start) && ((mem & 0x1FFFFFFF) < EEMemoryMap::SBUS_PS1_End)) { if (((mem & 0x1FFFFFFF) >= EEMemoryMap::SBUS_PS1_Start) && ((mem & 0x1FFFFFFF) < EEMemoryMap::SBUS_PS1_End)) {
PGIFrQword((mem & 0x1FFFFFFF), result); PGIFrQword((mem & 0x1FFFFFFF), &result);
return; break;
} }
// WARNING: this code is never executed anymore due to previous condition. // WARNING: this code is never executed anymore due to previous condition.
@ -373,37 +371,38 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
{ {
ReadFifoSingleWord(); ReadFifoSingleWord();
result->lo = psHu32(0x1000f3E0); u32 part0 = psHu32(0x1000f3E0);
ReadFifoSingleWord(); ReadFifoSingleWord();
result->lo |= (u64)psHu32(0x1000f3E0) << 32; u32 part1 = psHu32(0x1000f3E0);
ReadFifoSingleWord(); ReadFifoSingleWord();
result->hi = psHu32(0x1000f3E0); u32 part2 = psHu32(0x1000f3E0);
ReadFifoSingleWord(); ReadFifoSingleWord();
result->hi |= (u64)psHu32(0x1000f3E0) << 32; u32 part3 = psHu32(0x1000f3E0);
return r128_from_u32x4(part0, part1, part2, part3);
} }
} }
break; break;
default: default:
_hwRead64<page>( mem, &result->lo ); return r128_from_r64_clean(_hwRead64<page>(mem));
result->hi = 0;
break;
} }
return r128_load(&result);
} }
template< uint page > template< uint page >
void __fastcall hwRead128(u32 mem, mem128_t* result ) RETURNS_R128 hwRead128(u32 mem)
{ {
_hwRead128<page>( mem, result ); r128 res = _hwRead128<page>(mem);
eeHwTraceLog( mem, *result, true ); eeHwTraceLog(mem, *(mem128_t*)&res, true);
return res;
} }
#define InstantizeHwRead(pageidx) \ #define InstantizeHwRead(pageidx) \
template mem8_t __fastcall hwRead8<pageidx>(u32 mem); \ template mem8_t __fastcall hwRead8<pageidx>(u32 mem); \
template mem16_t __fastcall hwRead16<pageidx>(u32 mem); \ template mem16_t __fastcall hwRead16<pageidx>(u32 mem); \
template mem32_t __fastcall hwRead32<pageidx>(u32 mem); \ template mem32_t __fastcall hwRead32<pageidx>(u32 mem); \
template void __fastcall hwRead64<pageidx>(u32 mem, mem64_t* result ); \ template RETURNS_R64 hwRead64<pageidx>(u32 mem); \
template void __fastcall hwRead128<pageidx>(u32 mem, mem128_t* result ); \ template RETURNS_R128 hwRead128<pageidx>(u32 mem); \
template mem32_t __fastcall _hwRead32<pageidx, false>(u32 mem); template mem32_t __fastcall _hwRead32<pageidx, false>(u32 mem);
InstantizeHwRead(0x00); InstantizeHwRead(0x08); InstantizeHwRead(0x00); InstantizeHwRead(0x08);

View File

@ -241,7 +241,7 @@ __fi u32 ipuRead32(u32 mem)
return psHu32(IPU_CMD + mem); return psHu32(IPU_CMD + mem);
} }
__fi u64 ipuRead64(u32 mem) __fi RETURNS_R64 ipuRead64(u32 mem)
{ {
// Note: It's assumed that mem's input value is always in the 0x10002000 page // Note: It's assumed that mem's input value is always in the 0x10002000 page
// of memory (if not, it's probably bad code). // of memory (if not, it's probably bad code).
@ -263,7 +263,7 @@ __fi u64 ipuRead64(u32 mem)
if (ipuRegs.cmd.DATA & 0xffffff) if (ipuRegs.cmd.DATA & 0xffffff)
IPU_LOG("read64: IPU_CMD=BUSY=%x, DATA=%08X", ipuRegs.cmd.BUSY ? 1 : 0, ipuRegs.cmd.DATA); IPU_LOG("read64: IPU_CMD=BUSY=%x, DATA=%08X", ipuRegs.cmd.BUSY ? 1 : 0, ipuRegs.cmd.DATA);
return ipuRegs.cmd._u64; return r64_load(&ipuRegs.cmd._u64);
} }
ipucase(IPU_CTRL): ipucase(IPU_CTRL):
@ -282,7 +282,7 @@ __fi u64 ipuRead64(u32 mem)
IPU_LOG("read64: Unknown=%x", mem); IPU_LOG("read64: Unknown=%x", mem);
break; break;
} }
return psHu64(IPU_CMD + mem); return r64_load(&psHu64(IPU_CMD + mem));
} }
void ipuSoftReset() void ipuSoftReset()

View File

@ -289,7 +289,7 @@ extern int coded_block_pattern;
extern void ipuReset(); extern void ipuReset();
extern u32 ipuRead32(u32 mem); extern u32 ipuRead32(u32 mem);
extern u64 ipuRead64(u32 mem); extern RETURNS_R64 ipuRead64(u32 mem);
extern bool ipuWrite32(u32 mem,u32 value); extern bool ipuWrite32(u32 mem,u32 value);
extern bool ipuWrite64(u32 mem,u64 value); extern bool ipuWrite64(u32 mem,u64 value);

View File

@ -220,13 +220,13 @@ static mem32_t __fastcall nullRead32(u32 mem) {
MEM_LOG("Read uninstalled memory at address %08x", mem); MEM_LOG("Read uninstalled memory at address %08x", mem);
return 0; return 0;
} }
static void __fastcall nullRead64(u32 mem, mem64_t *out) { static RETURNS_R64 nullRead64(u32 mem) {
MEM_LOG("Read uninstalled memory at address %08x", mem); MEM_LOG("Read uninstalled memory at address %08x", mem);
*out = 0; return r64_zero();
} }
static void __fastcall nullRead128(u32 mem, mem128_t *out) { static RETURNS_R128 nullRead128(u32 mem) {
MEM_LOG("Read uninstalled memory at address %08x", mem); MEM_LOG("Read uninstalled memory at address %08x", mem);
ZeroQWC(out); return r128_zero();
} }
static void __fastcall nullWrite8(u32 mem, mem8_t value) static void __fastcall nullWrite8(u32 mem, mem8_t value)
{ {
@ -324,12 +324,12 @@ static mem32_t __fastcall _ext_memRead32(u32 mem)
} }
template<int p> template<int p>
static void __fastcall _ext_memRead64(u32 mem, mem64_t *out) static RETURNS_R64 _ext_memRead64(u32 mem)
{ {
switch (p) switch (p)
{ {
case 6: // gsm case 6: // gsm
*out = gsRead64(mem); return; return r64_from_u64(gsRead64(mem));
default: break; default: break;
} }
@ -338,15 +338,14 @@ static void __fastcall _ext_memRead64(u32 mem, mem64_t *out)
} }
template<int p> template<int p>
static void __fastcall _ext_memRead128(u32 mem, mem128_t *out) static RETURNS_R128 _ext_memRead128(u32 mem)
{ {
switch (p) switch (p)
{ {
//case 1: // hwm //case 1: // hwm
// hwRead128(mem & ~0xa0000000, out); return; // return hwRead128(mem & ~0xa0000000);
case 6: // gsm case 6: // gsm
CopyQWC(out,PS2GS_BASE(mem)); return r128_load(PS2GS_BASE(mem));
return;
default: break; default: break;
} }
@ -475,19 +474,19 @@ template<int vunum> static mem32_t __fc vuMicroRead32(u32 addr) {
if (vunum && THREAD_VU1) vu1Thread.WaitVU(); if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return *(u32*)&vu->Micro[addr]; return *(u32*)&vu->Micro[addr];
} }
template<int vunum> static void __fc vuMicroRead64(u32 addr,mem64_t* data) { template<int vunum> static RETURNS_R64 vuMicroRead64(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0; VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff; addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU(); if (vunum && THREAD_VU1) vu1Thread.WaitVU();
*data=*(u64*)&vu->Micro[addr]; return r64_load(&vu->Micro[addr]);
} }
template<int vunum> static void __fc vuMicroRead128(u32 addr,mem128_t* data) { template<int vunum> static RETURNS_R128 vuMicroRead128(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0; VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff; addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU(); if (vunum && THREAD_VU1) vu1Thread.WaitVU();
CopyQWC(data,&vu->Micro[addr]); return r128_load(&vu->Micro[addr]);
} }
// Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per // Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per
@ -578,17 +577,17 @@ template<int vunum> static mem32_t __fc vuDataRead32(u32 addr) {
if (vunum && THREAD_VU1) vu1Thread.WaitVU(); if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return *(u32*)&vu->Mem[addr]; return *(u32*)&vu->Mem[addr];
} }
template<int vunum> static void __fc vuDataRead64(u32 addr, mem64_t* data) { template<int vunum> static RETURNS_R64 vuDataRead64(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0; VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff; addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU(); if (vunum && THREAD_VU1) vu1Thread.WaitVU();
*data=*(u64*)&vu->Mem[addr]; return r64_load(&vu->Mem[addr]);
} }
template<int vunum> static void __fc vuDataRead128(u32 addr, mem128_t* data) { template<int vunum> static RETURNS_R128 vuDataRead128(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0; VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff; addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU(); if (vunum && THREAD_VU1) vu1Thread.WaitVU();
CopyQWC(data,&vu->Mem[addr]); return r128_load(&vu->Mem[addr]);
} }
// VU Data Memory Writes... // VU Data Memory Writes...

View File

@ -136,11 +136,11 @@ extern void mmap_ResetBlockTracking();
#define memWrite16 vtlb_memWrite<mem16_t> #define memWrite16 vtlb_memWrite<mem16_t>
#define memWrite32 vtlb_memWrite<mem32_t> #define memWrite32 vtlb_memWrite<mem32_t>
static __fi void memRead64(u32 mem, mem64_t* out) { vtlb_memRead64(mem, out); } static __fi void memRead64(u32 mem, mem64_t* out) { _mm_storel_epi64((__m128i*)out, vtlb_memRead64(mem)); }
static __fi void memRead64(u32 mem, mem64_t& out) { vtlb_memRead64(mem, &out); } static __fi void memRead64(u32 mem, mem64_t& out) { memRead64(mem, &out); }
static __fi void memRead128(u32 mem, mem128_t* out) { vtlb_memRead128(mem, out); } static __fi void memRead128(u32 mem, mem128_t* out) { _mm_store_si128((__m128i*)out, vtlb_memRead128(mem)); }
static __fi void memRead128(u32 mem, mem128_t& out) { vtlb_memRead128(mem, &out); } static __fi void memRead128(u32 mem, mem128_t& out) { memRead128(mem, &out); }
static __fi void memWrite64(u32 mem, const mem64_t* val) { vtlb_memWrite64(mem, val); } static __fi void memWrite64(u32 mem, const mem64_t* val) { vtlb_memWrite64(mem, val); }
static __fi void memWrite64(u32 mem, const mem64_t& val) { vtlb_memWrite64(mem, &val); } static __fi void memWrite64(u32 mem, const mem64_t& val) { vtlb_memWrite64(mem, &val); }

107
pcsx2/SingleRegisterTypes.h Normal file
View File

@ -0,0 +1,107 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// --------------------------------------------------------------------------------------
// r64 / r128 - Types that are guaranteed to fit in one register
// --------------------------------------------------------------------------------------
// Note: Recompilers rely on some of these types and the registers they allocate to,
// so be careful if you want to change them
#pragma once
#include <immintrin.h>
// Can't stick them in structs because it breaks calling convention things, yay
using r64 = __m128i;
using r128 = __m128i;
// Calling convention setting, yay
#define RETURNS_R64 r64 __vectorcall
#define RETURNS_R128 r128 __vectorcall
#define TAKES_R64 __vectorcall
#define TAKES_R128 __vectorcall
// And since we can't stick them in structs, we get lots of static methods, yay!
__forceinline static r64 r64_load(const void* ptr)
{
return _mm_loadl_epi64(reinterpret_cast<const r64*>(ptr));
}
__forceinline static r64 r64_zero()
{
return _mm_setzero_si128();
}
__forceinline static r64 r64_from_u32(u32 val)
{
return _mm_cvtsi32_si128(val);
}
__forceinline static r64 r64_from_u32x2(u32 lo, u32 hi)
{
return _mm_unpacklo_epi32(_mm_cvtsi32_si128(lo), _mm_cvtsi32_si128(hi));
}
__forceinline static r64 r64_from_u64(u64 val)
{
#ifdef _M_X86_64
return _mm_cvtsi64_si128(val);
#else
return r64_from_u32x2(val, val >> 64);
#endif
}
__forceinline static r128 r128_load(const void* ptr)
{
return _mm_load_si128(reinterpret_cast<const r128*>(ptr));
}
__forceinline static r128 r128_zero()
{
return _mm_setzero_si128();
}
/// Expects that r64 came from r64-handling code, and not from a recompiler or something
__forceinline static r128 r128_from_r64_clean(r64 val)
{
return val;
}
__forceinline static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
{
return _mm_setr_epi32(lo0, lo1, hi0, hi1);
}
template <typename u>
struct rhelper;
template <>
struct rhelper<u64>
{
using r = r64;
__forceinline static r load(void* ptr) { return r64_load(ptr); }
__forceinline static r zero() { return r64_zero(); }
};
template <>
struct rhelper<u128>
{
using r = r128;
__forceinline static r load(void* ptr) { return r128_load(ptr); }
__forceinline static r zero() { return r128_zero(); }
};
template <typename u>
using u_to_r = typename rhelper<u>::r;

View File

@ -952,6 +952,7 @@
<ClInclude Include="Dump.h" /> <ClInclude Include="Dump.h" />
<ClInclude Include="IopCommon.h" /> <ClInclude Include="IopCommon.h" />
<ClInclude Include="SaveState.h" /> <ClInclude Include="SaveState.h" />
<ClInclude Include="SingleRegisterTypes.h" />
<ClInclude Include="System.h" /> <ClInclude Include="System.h" />
<ClInclude Include="System\SysThreads.h" /> <ClInclude Include="System\SysThreads.h" />
<ClInclude Include="System\RecTypes.h" /> <ClInclude Include="System\RecTypes.h" />

View File

@ -1687,6 +1687,9 @@
<ClInclude Include="SaveState.h"> <ClInclude Include="SaveState.h">
<Filter>System\Include</Filter> <Filter>System\Include</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="SingleRegisterTypes.h">
<Filter>System\Include</Filter>
</ClInclude>
<ClInclude Include="System.h"> <ClInclude Include="System.h">
<Filter>System\Include</Filter> <Filter>System\Include</Filter>
</ClInclude> </ClInclude>

View File

@ -41,8 +41,8 @@
template< uint page > extern mem8_t __fastcall hwRead8 (u32 mem); template< uint page > extern mem8_t __fastcall hwRead8 (u32 mem);
template< uint page > extern mem16_t __fastcall hwRead16 (u32 mem); template< uint page > extern mem16_t __fastcall hwRead16 (u32 mem);
template< uint page > extern mem32_t __fastcall hwRead32 (u32 mem); template< uint page > extern mem32_t __fastcall hwRead32 (u32 mem);
template< uint page > extern void __fastcall hwRead64 (u32 mem, mem64_t* out ); template< uint page > extern RETURNS_R64 hwRead64 (u32 mem);
template< uint page > extern void __fastcall hwRead128(u32 mem, mem128_t* out); template< uint page > extern RETURNS_R128 hwRead128(u32 mem);
// Internal hwRead32 which does not log reads, used by hwWrite8/16 to perform // Internal hwRead32 which does not log reads, used by hwWrite8/16 to perform
// read-modify-write operations. // read-modify-write operations.

View File

@ -164,7 +164,7 @@ DataType __fastcall vtlb_memRead(u32 addr)
return 0; // technically unreachable, but suppresses warnings. return 0; // technically unreachable, but suppresses warnings.
} }
void __fastcall vtlb_memRead64(u32 mem, mem64_t *out) RETURNS_R64 vtlb_memRead64(u32 mem)
{ {
auto vmv = vtlbdata.vmap[mem>>VTLB_PAGE_BITS]; auto vmv = vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
@ -173,22 +173,22 @@ void __fastcall vtlb_memRead64(u32 mem, mem64_t *out)
if (!CHECK_EEREC) { if (!CHECK_EEREC) {
if(CHECK_CACHE && CheckCache(mem)) if(CHECK_CACHE && CheckCache(mem))
{ {
*out = readCache64(mem); return readCache64(mem);
return;
} }
} }
*out = *(mem64_t*)vmv.assumePtr(mem); return r64_load(reinterpret_cast<const void*>(vmv.assumePtr(mem)));
} }
else else
{ {
//has to: translate, find function, call function //has to: translate, find function, call function
u32 paddr = vmv.assumeHandlerGetPAddr(mem); u32 paddr = vmv.assumeHandlerGetPAddr(mem);
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr); //Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
vmv.assumeHandler<64, false>()(paddr, out); return vmv.assumeHandler<64, false>()(paddr);
} }
} }
void __fastcall vtlb_memRead128(u32 mem, mem128_t *out)
RETURNS_R128 vtlb_memRead128(u32 mem)
{ {
auto vmv = vtlbdata.vmap[mem>>VTLB_PAGE_BITS]; auto vmv = vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
@ -198,20 +198,18 @@ void __fastcall vtlb_memRead128(u32 mem, mem128_t *out)
{ {
if(CHECK_CACHE && CheckCache(mem)) if(CHECK_CACHE && CheckCache(mem))
{ {
out->lo = readCache64(mem); return readCache128(mem);
out->hi = readCache64(mem+8);
return;
} }
} }
CopyQWC(out,(void*)vmv.assumePtr(mem)); return r128_load(reinterpret_cast<const void*>(vmv.assumePtr(mem)));
} }
else else
{ {
//has to: translate, find function, call function //has to: translate, find function, call function
u32 paddr = vmv.assumeHandlerGetPAddr(mem); u32 paddr = vmv.assumeHandlerGetPAddr(mem);
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr); //Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
vmv.assumeHandler<128, false>()(paddr, out); return vmv.assumeHandler<128, false>()(paddr);
} }
} }
@ -436,28 +434,28 @@ static __ri void vtlb_BusError(u32 addr,u32 mode)
} }
template<typename OperandType, u32 saddr> template<typename OperandType, u32 saddr>
OperandType __fastcall vtlbUnmappedVReadSm(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; } OperandType __fastcall vtlbUnmappedVReadSm(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; }
template<typename OperandType, u32 saddr> template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedVReadLg(u32 addr,OperandType* data) { vtlb_Miss(addr|saddr,0); } u_to_r<OperandType> __vectorcall vtlbUnmappedVReadLg(u32 addr) { vtlb_Miss(addr|saddr,0); return rhelper<OperandType>::zero(); }
template<typename OperandType, u32 saddr> template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedVWriteSm(u32 addr,OperandType data) { vtlb_Miss(addr|saddr,1); } void __fastcall vtlbUnmappedVWriteSm(u32 addr,OperandType data) { vtlb_Miss(addr|saddr,1); }
template<typename OperandType, u32 saddr> template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedVWriteLg(u32 addr,const OperandType* data) { vtlb_Miss(addr|saddr,1); } void __fastcall vtlbUnmappedVWriteLg(u32 addr,const OperandType* data) { vtlb_Miss(addr|saddr,1); }
template<typename OperandType, u32 saddr> template<typename OperandType, u32 saddr>
OperandType __fastcall vtlbUnmappedPReadSm(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; } OperandType __fastcall vtlbUnmappedPReadSm(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; }
template<typename OperandType, u32 saddr> template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedPReadLg(u32 addr,OperandType* data) { vtlb_BusError(addr|saddr,0); } u_to_r<OperandType> __vectorcall vtlbUnmappedPReadLg(u32 addr) { vtlb_BusError(addr|saddr,0); return rhelper<OperandType>::zero(); }
template<typename OperandType, u32 saddr> template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedPWriteSm(u32 addr,OperandType data) { vtlb_BusError(addr|saddr,1); } void __fastcall vtlbUnmappedPWriteSm(u32 addr,OperandType data) { vtlb_BusError(addr|saddr,1); }
template<typename OperandType, u32 saddr> template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedPWriteLg(u32 addr,const OperandType* data) { vtlb_BusError(addr|saddr,1); } void __fastcall vtlbUnmappedPWriteLg(u32 addr,const OperandType* data) { vtlb_BusError(addr|saddr,1); }
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// VTLB mapping errors // VTLB mapping errors
@ -484,14 +482,16 @@ static mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr)
return 0; return 0;
} }
static void __fastcall vtlbDefaultPhyRead64(u32 addr, mem64_t* dest) static __m128i __vectorcall vtlbDefaultPhyRead64(u32 addr)
{ {
pxFailDev(pxsFmt("(VTLB) Attempted read64 from unmapped physical address @ 0x%08X.", addr)); pxFailDev(pxsFmt("(VTLB) Attempted read64 from unmapped physical address @ 0x%08X.", addr));
return r64_zero();
} }
static void __fastcall vtlbDefaultPhyRead128(u32 addr, mem128_t* dest) static __m128i __vectorcall vtlbDefaultPhyRead128(u32 addr)
{ {
pxFailDev(pxsFmt("(VTLB) Attempted read128 from unmapped physical address @ 0x%08X.", addr)); pxFailDev(pxsFmt("(VTLB) Attempted read128 from unmapped physical address @ 0x%08X.", addr));
return r128_zero();
} }
static void __fastcall vtlbDefaultPhyWrite8(u32 addr, mem8_t data) static void __fastcall vtlbDefaultPhyWrite8(u32 addr, mem8_t data)

View File

@ -16,6 +16,7 @@
#pragma once #pragma once
#include "MemoryTypes.h" #include "MemoryTypes.h"
#include "SingleRegisterTypes.h"
#include "common/PageFaultSource.h" #include "common/PageFaultSource.h"
@ -25,8 +26,8 @@ static const uptr VTLB_AllocUpperBounds = _1gb * 2;
typedef mem8_t __fastcall vtlbMemR8FP(u32 addr); typedef mem8_t __fastcall vtlbMemR8FP(u32 addr);
typedef mem16_t __fastcall vtlbMemR16FP(u32 addr); typedef mem16_t __fastcall vtlbMemR16FP(u32 addr);
typedef mem32_t __fastcall vtlbMemR32FP(u32 addr); typedef mem32_t __fastcall vtlbMemR32FP(u32 addr);
typedef void __fastcall vtlbMemR64FP(u32 addr,mem64_t* data); typedef RETURNS_R64 vtlbMemR64FP(u32 addr);
typedef void __fastcall vtlbMemR128FP(u32 addr,mem128_t* data); typedef RETURNS_R128 vtlbMemR128FP(u32 addr);
// Specialized function pointers for each write type // Specialized function pointers for each write type
typedef void __fastcall vtlbMemW8FP(u32 addr,mem8_t data); typedef void __fastcall vtlbMemW8FP(u32 addr,mem8_t data);
@ -87,8 +88,8 @@ extern void vtlb_VMapUnmap(u32 vaddr,u32 sz);
template< typename DataType > template< typename DataType >
extern DataType __fastcall vtlb_memRead(u32 mem); extern DataType __fastcall vtlb_memRead(u32 mem);
extern void __fastcall vtlb_memRead64(u32 mem, mem64_t *out); extern RETURNS_R64 vtlb_memRead64(u32 mem);
extern void __fastcall vtlb_memRead128(u32 mem, mem128_t *out); extern RETURNS_R128 vtlb_memRead128(u32 mem);
template< typename DataType > template< typename DataType >
extern void __fastcall vtlb_memWrite(u32 mem, DataType value); extern void __fastcall vtlb_memWrite(u32 mem, DataType value);
@ -97,10 +98,10 @@ extern void __fastcall vtlb_memWrite128(u32 mem, const mem128_t* value);
extern void vtlb_DynGenWrite(u32 sz); extern void vtlb_DynGenWrite(u32 sz);
extern void vtlb_DynGenRead32(u32 bits, bool sign); extern void vtlb_DynGenRead32(u32 bits, bool sign);
extern void vtlb_DynGenRead64(u32 sz); extern int vtlb_DynGenRead64(u32 sz, int gpr);
extern void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const ); extern void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const );
extern void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const ); extern int vtlb_DynGenRead64_Const( u32 bits, u32 addr_const, int gpr );
extern void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const ); extern void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const );
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------

View File

@ -107,10 +107,8 @@ void recLoad64(u32 bits, bool sign)
// Load arg2 with the destination. // Load arg2 with the destination.
// 64/128 bit modes load the result directly into the cpuRegs.GPR struct. // 64/128 bit modes load the result directly into the cpuRegs.GPR struct.
if (_Rt_) int gprreg = ((bits == 128) && _Rt_) ? _Rt_ : -1;
xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); int reg;
else
xLEA(arg2reg, ptr[&dummyValue[0]]);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -121,7 +119,7 @@ void recLoad64(u32 bits, bool sign)
_eeOnLoadWrite(_Rt_); _eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0); _deleteEEreg(_Rt_, 0);
vtlb_DynGenRead64_Const(bits, srcadr); reg = vtlb_DynGenRead64_Const(bits, srcadr, gprreg);
} }
else else
{ {
@ -134,9 +132,17 @@ void recLoad64(u32 bits, bool sign)
_eeOnLoadWrite(_Rt_); _eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0); _deleteEEreg(_Rt_, 0);
iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(bits); iFlushCall(FLUSH_FULLVTLB);
reg = vtlb_DynGenRead64(bits, gprreg);
}
if (gprreg == -1)
{
if (_Rt_)
xMOVQ(ptr64[&cpuRegs.GPR.r[_Rt_].UL[0]], xRegisterSSE(reg));
_freeXMMreg(reg);
} }
} }
@ -458,14 +464,14 @@ void recLDL()
return; return;
#ifdef LOADSTORE_RECOMPILE #ifdef LOADSTORE_RECOMPILE
xLEA(arg2reg, ptr128[&dummyValue[0]]); int t2reg;
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
srcadr &= ~0x07; srcadr &= ~0x07;
vtlb_DynGenRead64_Const(64, srcadr); t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
} }
else else
{ {
@ -478,13 +484,12 @@ void recLDL()
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(64); t2reg = vtlb_DynGenRead64(64, -1);
} }
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE); int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1);
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -502,7 +507,7 @@ void recLDL()
} }
xCMP(eax, 8); xCMP(eax, 8);
xForwardJE32 skip; xForwardJE8 skip;
//Calculate the shift from top bit to lowest //Calculate the shift from top bit to lowest
xMOV(edx, 64); xMOV(edx, 64);
xSHL(eax, 3); xSHL(eax, 3);
@ -512,18 +517,13 @@ void recLDL()
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg)); xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
xMOVDQA(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
xMOVDZX(xRegisterSSE(t1reg), edx); xMOVDZX(xRegisterSSE(t1reg), edx);
xMOVQZX(xRegisterSSE(t0reg), ptr64[&dummyValue[0]]); xPSLL.Q(xRegisterSSE(t2reg), xRegisterSSE(t1reg));
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); xPOR(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
xPOR(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t0reg));
xForwardJump32 full;
skip.SetTarget();
xMOVL.PS(xRegisterSSE(rtreg), ptr128[&dummyValue[0]]); skip.SetTarget();
full.SetTarget(); xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t2reg));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
@ -546,14 +546,14 @@ void recLDR()
return; return;
#ifdef LOADSTORE_RECOMPILE #ifdef LOADSTORE_RECOMPILE
xLEA(arg2reg, ptr128[&dummyValue[0]]); int t2reg;
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
srcadr &= ~0x07; srcadr &= ~0x07;
vtlb_DynGenRead64_Const(64, srcadr); t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
} }
else else
{ {
@ -566,13 +566,12 @@ void recLDR()
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(64); t2reg = vtlb_DynGenRead64(64, -1);
} }
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE); int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE);
int t0reg = _allocTempXMMreg(XMMT_INT, -1); int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1); int t1reg = _allocTempXMMreg(XMMT_INT, -1);
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
@ -599,18 +598,13 @@ void recLDR()
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg)); xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
xMOVQZX(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
xMOVDZX(xRegisterSSE(t1reg), eax); //shift*8 xMOVDZX(xRegisterSSE(t1reg), eax); //shift*8
xMOVQZX(xRegisterSSE(t0reg), ptr64[&dummyValue[0]]); xPSRL.Q(xRegisterSSE(t2reg), xRegisterSSE(t1reg));
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); xPOR(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
xPOR(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t0reg));
xForwardJump32 full;
skip.SetTarget();
xMOVL.PS(xRegisterSSE(rtreg), ptr128[&dummyValue[0]]); skip.SetTarget();
full.SetTarget(); xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t2reg));
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
@ -631,14 +625,13 @@ void recLDR()
void recSDL() void recSDL()
{ {
#ifdef LOADSTORE_RECOMPILE #ifdef LOADSTORE_RECOMPILE
xLEA(arg2reg, ptr128[&dummyValue[0]]); int t2reg;
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
srcadr &= ~0x07; srcadr &= ~0x07;
vtlb_DynGenRead64_Const(64, srcadr); t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
} }
else else
{ {
@ -651,7 +644,7 @@ void recSDL()
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(64); t2reg = vtlb_DynGenRead64(64, -1);
} }
_flushEEreg(_Rt_); // flush register to mem _flushEEreg(_Rt_); // flush register to mem
@ -675,7 +668,7 @@ void recSDL()
} }
xCMP(eax, 8); xCMP(eax, 8);
xForwardJE32 skip; xForwardJE8 skip;
//Calculate the shift from top bit to lowest //Calculate the shift from top bit to lowest
xMOV(edx, 64); xMOV(edx, 64);
xSHL(eax, 3); xSHL(eax, 3);
@ -684,8 +677,7 @@ void recSDL()
xMOVDZX(xRegisterSSE(t1reg), eax); xMOVDZX(xRegisterSSE(t1reg), eax);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xMOVQZX(xRegisterSSE(t1reg), ptr64[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower! xPAND(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
// Shift over reg value (shift, PSLL.Q multiplies by 8) // Shift over reg value (shift, PSLL.Q multiplies by 8)
xMOVDZX(xRegisterSSE(t1reg), edx); xMOVDZX(xRegisterSSE(t1reg), edx);
@ -698,6 +690,7 @@ void recSDL()
_deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rt_, 3);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
_freeXMMreg(t2reg);
xLEA(arg2reg, ptr128[&dummyValue[0]]); xLEA(arg2reg, ptr128[&dummyValue[0]]);
@ -733,14 +726,13 @@ void recSDL()
void recSDR() void recSDR()
{ {
#ifdef LOADSTORE_RECOMPILE #ifdef LOADSTORE_RECOMPILE
xLEA(arg2reg, ptr128[&dummyValue[0]]); int t2reg;
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
srcadr &= ~0x07; srcadr &= ~0x07;
vtlb_DynGenRead64_Const(64, srcadr); t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
} }
else else
{ {
@ -753,7 +745,7 @@ void recSDR()
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(64); t2reg = vtlb_DynGenRead64(64, -1);
} }
_flushEEreg(_Rt_); // flush register to mem _flushEEreg(_Rt_); // flush register to mem
@ -776,7 +768,7 @@ void recSDR()
} }
xCMP(eax, 0); xCMP(eax, 0);
xForwardJE32 skip; xForwardJE8 skip;
//Calculate the shift from top bit to lowest //Calculate the shift from top bit to lowest
xMOV(edx, 64); xMOV(edx, 64);
xSHL(eax, 3); xSHL(eax, 3);
@ -785,8 +777,7 @@ void recSDR()
xMOVDZX(xRegisterSSE(t1reg), edx); xMOVDZX(xRegisterSSE(t1reg), edx);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg)); xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xMOVQZX(xRegisterSSE(t1reg), ptr64[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower! xPAND(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
// Shift over reg value (shift, PSLL.Q multiplies by 8) // Shift over reg value (shift, PSLL.Q multiplies by 8)
xMOVDZX(xRegisterSSE(t1reg), eax); xMOVDZX(xRegisterSSE(t1reg), eax);
@ -799,6 +790,7 @@ void recSDR()
_deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rt_, 3);
_freeXMMreg(t0reg); _freeXMMreg(t0reg);
_freeXMMreg(t1reg); _freeXMMreg(t1reg);
_freeXMMreg(t2reg);
xLEA(arg2reg, ptr128[&dummyValue[0]]); xLEA(arg2reg, ptr128[&dummyValue[0]]);
@ -931,16 +923,13 @@ void recLQC2()
skip.SetTarget(); skip.SetTarget();
skipvuidle.SetTarget(); skipvuidle.SetTarget();
if (_Rt_) int gpr;
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
else
xLEA(arg2reg, ptr[&dummyValue[0]]);
if (GPR_IS_CONST1(_Rs_)) if (GPR_IS_CONST1(_Rs_))
{ {
int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_; int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
vtlb_DynGenRead64_Const(128, addr); gpr = vtlb_DynGenRead64_Const(128, addr, -1);
} }
else else
{ {
@ -950,9 +939,14 @@ void recLQC2()
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(128); gpr = vtlb_DynGenRead64(128, -1);
} }
if (_Rt_)
xMOVAPS(ptr128[&VU0.VF[_Ft_].UD[0]], xRegisterSSE(gpr));
_freeXMMreg(gpr);
EE::Profiler.EmitOp(eeOpcode::LQC2); EE::Profiler.EmitOp(eeOpcode::LQC2);
} }

View File

@ -172,6 +172,8 @@ namespace vtlb_private
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
static void DynGen_DirectRead(u32 bits, bool sign) static void DynGen_DirectRead(u32 bits, bool sign)
{ {
pxAssert(bits == 8 || bits == 16 || bits == 32);
switch (bits) switch (bits)
{ {
case 8: case 8:
@ -192,15 +194,24 @@ namespace vtlb_private
xMOV(eax, ptr[arg1reg]); xMOV(eax, ptr[arg1reg]);
break; break;
jNO_DEFAULT
}
}
static void DynGen_DirectRead64(u32 bits)
{
pxAssert(bits == 64 || bits == 128);
switch (bits) {
case 64: case 64:
iMOV64_Smart(ptr[arg2reg], ptr[arg1reg]); xMOVQZX(xmm0, ptr64[arg1reg]);
break; break;
case 128: case 128:
iMOV128_SSE(ptr[arg2reg], ptr[arg1reg]); xMOVAPS(xmm0, ptr128[arg1reg]);
break; break;
jNO_DEFAULT jNO_DEFAULT
} }
} }
@ -375,16 +386,18 @@ static void vtlb_SetWriteback(u32* writeback)
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Dynarec Load Implementations // Dynarec Load Implementations
void vtlb_DynGenRead64(u32 bits) int vtlb_DynGenRead64(u32 bits, int gpr)
{ {
pxAssume(bits == 64 || bits == 128); pxAssume(bits == 64 || bits == 128);
u32* writeback = DynGen_PrepRegs(); u32* writeback = DynGen_PrepRegs();
int reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
DynGen_IndirectDispatch(0, bits); DynGen_IndirectDispatch(0, bits);
DynGen_DirectRead(bits, false); DynGen_DirectRead64(bits);
vtlb_SetWriteback(writeback); // return target for indirect's call/ret vtlb_SetWriteback(writeback); // return target for indirect's call/ret
return reg;
} }
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
@ -406,25 +419,27 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the // TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
// recompiler if the TLB is changed. // recompiler if the TLB is changed.
void vtlb_DynGenRead64_Const(u32 bits, u32 addr_const) int vtlb_DynGenRead64_Const(u32 bits, u32 addr_const, int gpr)
{ {
EE::Profiler.EmitConstMem(addr_const); EE::Profiler.EmitConstMem(addr_const);
int reg;
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS]; auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
if (!vmv.isHandler(addr_const)) if (!vmv.isHandler(addr_const))
{ {
auto ppf = vmv.assumePtr(addr_const); void* ppf = reinterpret_cast<void*>(vmv.assumePtr(addr_const));
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, -1) : _allocGPRtoXMMreg(-1, gpr, MODE_WRITE);
switch (bits) switch (bits)
{ {
case 64: case 64:
iMOV64_Smart(ptr[arg2reg], ptr[(void*)ppf]); xMOVQZX(xRegisterSSE(reg), ptr64[ppf]);
break; break;
case 128: case 128:
iMOV128_SSE(ptr[arg2reg], ptr[(void*)ppf]); xMOVAPS(xRegisterSSE(reg), ptr128[ppf]);
break; break;
jNO_DEFAULT jNO_DEFAULT
} }
} }
else else
@ -440,8 +455,10 @@ void vtlb_DynGenRead64_Const(u32 bits, u32 addr_const)
} }
iFlushCall(FLUSH_FULLVTLB); iFlushCall(FLUSH_FULLVTLB);
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg); xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg);
} }
return reg;
} }
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------