mirror of https://github.com/PCSX2/pcsx2.git
vtlb: Switch read64 and read128 handlers to return in sse regs
This commit is contained in:
parent
7563f54e83
commit
e9518f78c7
|
@ -151,6 +151,7 @@ static const int __pagesize = PCSX2_PAGESIZE;
|
|||
#ifndef __fastcall
|
||||
#define __fastcall __attribute__((fastcall))
|
||||
#endif
|
||||
#define __vectorcall __fastcall
|
||||
#define _inline __inline__ __attribute__((unused))
|
||||
#ifdef NDEBUG
|
||||
#define __forceinline __attribute__((always_inline, unused))
|
||||
|
|
|
@ -215,6 +215,7 @@ set(pcsx2Headers
|
|||
SaveState.h
|
||||
Sifcmd.h
|
||||
Sif.h
|
||||
SingleRegisterTypes.h
|
||||
Sio.h
|
||||
sio_internal.h
|
||||
SPR.h
|
||||
|
|
|
@ -212,17 +212,26 @@ static int getFreeCache(u32 mem, int* way)
|
|||
return setIdx;
|
||||
}
|
||||
|
||||
template <bool Write, int Bytes>
|
||||
void* prepareCacheAccess(u32 mem, int* way, int* idx)
|
||||
{
|
||||
*way = 0;
|
||||
*idx = getFreeCache(mem, way);
|
||||
CacheLine line = cache.lineAt(*idx, *way);
|
||||
if (Write)
|
||||
line.tag.setDirty();
|
||||
u32 aligned = mem & ~(Bytes - 1);
|
||||
return &line.data.bytes[aligned & 0x3f];
|
||||
}
|
||||
|
||||
template <typename Int>
|
||||
void writeCache(u32 mem, Int value)
|
||||
{
|
||||
int way = 0;
|
||||
const int idx = getFreeCache(mem, &way);
|
||||
int way, idx;
|
||||
void* addr = prepareCacheAccess<true, sizeof(Int)>(mem, &way, &idx);
|
||||
|
||||
CACHE_LOG("writeCache%d %8.8x adding to %d, way %d, value %llx", 8 * sizeof(value), mem, idx, way, value);
|
||||
CacheLine line = cache.lineAt(idx, way);
|
||||
line.tag.setDirty(); // Set dirty bit for writes;
|
||||
u32 aligned = mem & ~(sizeof(value) - 1);
|
||||
*reinterpret_cast<Int*>(&line.data.bytes[aligned & 0x3f]) = value;
|
||||
*reinterpret_cast<Int*>(addr) = value;
|
||||
}
|
||||
|
||||
void writeCache8(u32 mem, u8 value)
|
||||
|
@ -247,25 +256,20 @@ void writeCache64(u32 mem, const u64 value)
|
|||
|
||||
void writeCache128(u32 mem, const mem128_t* value)
|
||||
{
|
||||
int way = 0;
|
||||
const int idx = getFreeCache(mem, &way);
|
||||
int way, idx;
|
||||
void* addr = prepareCacheAccess<true, sizeof(mem128_t)>(mem, &way, &idx);
|
||||
|
||||
CACHE_LOG("writeCache128 %8.8x adding to %d, way %x, lo %x, hi %x", mem, idx, way, value->lo, value->hi);
|
||||
CacheLine line = cache.lineAt(idx, way);
|
||||
line.tag.setDirty(); // Set dirty bit for writes;
|
||||
u32 aligned = mem & ~0xF;
|
||||
*reinterpret_cast<mem128_t*>(&line.data.bytes[aligned & 0x3f]) = *value;
|
||||
CACHE_LOG("writeCache128 %8.8x adding to %d, way %x, lo %llx, hi %llx", mem, idx, way, value->lo, value->hi);
|
||||
*reinterpret_cast<mem128_t*>(addr) = *value;
|
||||
}
|
||||
|
||||
template <typename Int>
|
||||
Int readCache(u32 mem)
|
||||
{
|
||||
int way = 0;
|
||||
const int idx = getFreeCache(mem, &way);
|
||||
int way, idx;
|
||||
void* addr = prepareCacheAccess<false, sizeof(Int)>(mem, &way, &idx);
|
||||
|
||||
CacheLine line = cache.lineAt(idx, way);
|
||||
u32 aligned = mem & ~(sizeof(Int) - 1);
|
||||
Int value = *reinterpret_cast<Int*>(&line.data.bytes[aligned & 0x3f]);
|
||||
Int value = *reinterpret_cast<Int*>(addr);
|
||||
CACHE_LOG("readCache%d %8.8x from %d, way %d, value %llx", 8 * sizeof(value), mem, idx, way, value);
|
||||
return value;
|
||||
}
|
||||
|
@ -286,9 +290,23 @@ u32 readCache32(u32 mem)
|
|||
return readCache<u32>(mem);
|
||||
}
|
||||
|
||||
u64 readCache64(u32 mem)
|
||||
RETURNS_R64 readCache64(u32 mem)
|
||||
{
|
||||
return readCache<u64>(mem);
|
||||
int way, idx;
|
||||
void* addr = prepareCacheAccess<false, sizeof(u64)>(mem, &way, &idx);
|
||||
r64 value = r64_load(addr);
|
||||
CACHE_LOG("readCache64 %8.8x from %d, way %d, value %llx", mem, idx, way, *(u64*)&value);
|
||||
return value;
|
||||
}
|
||||
|
||||
RETURNS_R128 readCache128(u32 mem)
|
||||
{
|
||||
int way, idx;
|
||||
void* addr = prepareCacheAccess<false, sizeof(mem128_t)>(mem, &way, &idx);
|
||||
r128 value = r128_load(addr);
|
||||
u64* vptr = reinterpret_cast<u64*>(&value);
|
||||
CACHE_LOG("readCache128 %8.8x from %d, way %d, lo %llx, hi %llx", mem, idx, way, vptr[0], vptr[1]);
|
||||
return value;
|
||||
}
|
||||
|
||||
template <typename Op>
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#define __CACHE_H__
|
||||
|
||||
#include "Common.h"
|
||||
#include "SingleRegisterTypes.h"
|
||||
|
||||
void resetCache();
|
||||
void writeCache8(u32 mem, u8 value);
|
||||
|
@ -27,6 +28,7 @@ void writeCache128(u32 mem, const mem128_t* value);
|
|||
u8 readCache8(u32 mem);
|
||||
u16 readCache16(u32 mem);
|
||||
u32 readCache32(u32 mem);
|
||||
u64 readCache64(u32 mem);
|
||||
RETURNS_R64 readCache64(u32 mem);
|
||||
RETURNS_R128 readCache128(u32 mem);
|
||||
|
||||
#endif /* __CACHE_H__ */
|
||||
|
|
|
@ -33,7 +33,7 @@ static __fi void IntCHackCheck()
|
|||
if( diff > 0 ) cpuRegs.cycle = g_nextEventCycle;
|
||||
}
|
||||
|
||||
template< uint page > void __fastcall _hwRead128(u32 mem, mem128_t* result );
|
||||
template< uint page > RETURNS_R128 _hwRead128(u32 mem);
|
||||
|
||||
template< uint page, bool intcstathack >
|
||||
mem32_t __fastcall _hwRead32(u32 mem)
|
||||
|
@ -71,9 +71,8 @@ mem32_t __fastcall _hwRead32(u32 mem)
|
|||
|
||||
DevCon.WriteLn( Color_Cyan, "Reading 32-bit FIFO data" );
|
||||
|
||||
u128 out128;
|
||||
_hwRead128<page>(mem & ~0x0f, &out128);
|
||||
return out128._u32[(mem >> 2) & 0x3];
|
||||
r128 out128 = _hwRead128<page>(mem & ~0x0f);
|
||||
return reinterpret_cast<u32*>(&out128)[(mem >> 2) & 0x3];
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -270,15 +269,14 @@ mem16_t __fastcall hwRead16_page_0F_INTC_HACK(u32 mem)
|
|||
}
|
||||
|
||||
template< uint page >
|
||||
static void _hwRead64(u32 mem, mem64_t* result )
|
||||
static RETURNS_R64 _hwRead64(u32 mem)
|
||||
{
|
||||
pxAssume( (mem & 0x07) == 0 );
|
||||
|
||||
switch (page)
|
||||
{
|
||||
case 0x02:
|
||||
*result = ipuRead64(mem);
|
||||
return;
|
||||
return ipuRead64(mem);
|
||||
|
||||
case 0x04:
|
||||
case 0x05:
|
||||
|
@ -295,11 +293,9 @@ static void _hwRead64(u32 mem, mem64_t* result )
|
|||
uint wordpart = (mem >> 3) & 0x1;
|
||||
DevCon.WriteLn( Color_Cyan, "Reading 64-bit FIFO data (%s 64 bits discarded)", wordpart ? "upper" : "lower" );
|
||||
|
||||
u128 out128;
|
||||
_hwRead128<page>(mem & ~0x0f, &out128);
|
||||
*result = out128._u64[wordpart];
|
||||
r128 full = _hwRead128<page>(mem & ~0x0f);
|
||||
return r64_load(reinterpret_cast<u64*>(&full) + wordpart);
|
||||
}
|
||||
return;
|
||||
case 0x0F:
|
||||
if ((mem & 0xffffff00) == 0x1000f300)
|
||||
{
|
||||
|
@ -308,30 +304,33 @@ static void _hwRead64(u32 mem, mem64_t* result )
|
|||
{
|
||||
|
||||
ReadFifoSingleWord();
|
||||
*result = psHu32(0x1000f3E0);
|
||||
u32 lo = psHu32(0x1000f3E0);
|
||||
ReadFifoSingleWord();
|
||||
*result |= (u64)psHu32(0x1000f3E0) << 32;
|
||||
u32 hi = psHu32(0x1000f3E0);
|
||||
return r64_from_u32x2(lo, hi);
|
||||
}
|
||||
}
|
||||
return;
|
||||
default: break;
|
||||
}
|
||||
|
||||
*result = _hwRead32<page,false>( mem );
|
||||
return r64_from_u32(_hwRead32<page, false>(mem));
|
||||
}
|
||||
|
||||
template< uint page >
|
||||
void __fastcall hwRead64(u32 mem, mem64_t* result )
|
||||
RETURNS_R64 hwRead64(u32 mem)
|
||||
{
|
||||
_hwRead64<page>( mem, result );
|
||||
eeHwTraceLog( mem, *result, true );
|
||||
r64 res = _hwRead64<page>(mem);
|
||||
eeHwTraceLog(mem, *(u64*)&res, true);
|
||||
return res;
|
||||
}
|
||||
|
||||
template< uint page >
|
||||
void __fastcall _hwRead128(u32 mem, mem128_t* result )
|
||||
RETURNS_R128 _hwRead128(u32 mem)
|
||||
{
|
||||
pxAssume( (mem & 0x0f) == 0 );
|
||||
|
||||
alignas(16) mem128_t result;
|
||||
|
||||
// FIFOs are the only "legal" 128 bit registers, so we Handle them first.
|
||||
// All other registers fall back on the 64-bit handler (and from there
|
||||
// all non-IPU reads fall back to the 32-bit handler).
|
||||
|
@ -339,15 +338,15 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
|
|||
switch (page)
|
||||
{
|
||||
case 0x05:
|
||||
ReadFIFO_VIF1( result );
|
||||
break;
|
||||
ReadFIFO_VIF1(&result);
|
||||
break;
|
||||
|
||||
case 0x07:
|
||||
if (mem & 0x10)
|
||||
ZeroQWC( result ); // IPUin is write-only
|
||||
return r128_zero(); // IPUin is write-only
|
||||
else
|
||||
ReadFIFO_IPUout( result );
|
||||
break;
|
||||
ReadFIFO_IPUout(&result);
|
||||
break;
|
||||
|
||||
case 0x04:
|
||||
case 0x06:
|
||||
|
@ -355,13 +354,12 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
|
|||
// [Ps2Confirm] Reads from these FIFOs (and IPUin) do one of the following:
|
||||
// return zero, leave contents of the dest register unchanged, or in some
|
||||
// indeterminate state. The actual behavior probably isn't important.
|
||||
ZeroQWC( result );
|
||||
break;
|
||||
return r128_zero();
|
||||
case 0x0F:
|
||||
// todo: psx mode: this is new
|
||||
if (((mem & 0x1FFFFFFF) >= EEMemoryMap::SBUS_PS1_Start) && ((mem & 0x1FFFFFFF) < EEMemoryMap::SBUS_PS1_End)) {
|
||||
PGIFrQword((mem & 0x1FFFFFFF), result);
|
||||
return;
|
||||
PGIFrQword((mem & 0x1FFFFFFF), &result);
|
||||
break;
|
||||
}
|
||||
|
||||
// WARNING: this code is never executed anymore due to previous condition.
|
||||
|
@ -373,37 +371,38 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
|
|||
{
|
||||
|
||||
ReadFifoSingleWord();
|
||||
result->lo = psHu32(0x1000f3E0);
|
||||
u32 part0 = psHu32(0x1000f3E0);
|
||||
ReadFifoSingleWord();
|
||||
result->lo |= (u64)psHu32(0x1000f3E0) << 32;
|
||||
u32 part1 = psHu32(0x1000f3E0);
|
||||
ReadFifoSingleWord();
|
||||
result->hi = psHu32(0x1000f3E0);
|
||||
u32 part2 = psHu32(0x1000f3E0);
|
||||
ReadFifoSingleWord();
|
||||
result->hi |= (u64)psHu32(0x1000f3E0) << 32;
|
||||
u32 part3 = psHu32(0x1000f3E0);
|
||||
return r128_from_u32x4(part0, part1, part2, part3);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
_hwRead64<page>( mem, &result->lo );
|
||||
result->hi = 0;
|
||||
break;
|
||||
return r128_from_r64_clean(_hwRead64<page>(mem));
|
||||
}
|
||||
return r128_load(&result);
|
||||
}
|
||||
|
||||
template< uint page >
|
||||
void __fastcall hwRead128(u32 mem, mem128_t* result )
|
||||
RETURNS_R128 hwRead128(u32 mem)
|
||||
{
|
||||
_hwRead128<page>( mem, result );
|
||||
eeHwTraceLog( mem, *result, true );
|
||||
r128 res = _hwRead128<page>(mem);
|
||||
eeHwTraceLog(mem, *(mem128_t*)&res, true);
|
||||
return res;
|
||||
}
|
||||
|
||||
#define InstantizeHwRead(pageidx) \
|
||||
template mem8_t __fastcall hwRead8<pageidx>(u32 mem); \
|
||||
template mem16_t __fastcall hwRead16<pageidx>(u32 mem); \
|
||||
template mem32_t __fastcall hwRead32<pageidx>(u32 mem); \
|
||||
template void __fastcall hwRead64<pageidx>(u32 mem, mem64_t* result ); \
|
||||
template void __fastcall hwRead128<pageidx>(u32 mem, mem128_t* result ); \
|
||||
template RETURNS_R64 hwRead64<pageidx>(u32 mem); \
|
||||
template RETURNS_R128 hwRead128<pageidx>(u32 mem); \
|
||||
template mem32_t __fastcall _hwRead32<pageidx, false>(u32 mem);
|
||||
|
||||
InstantizeHwRead(0x00); InstantizeHwRead(0x08);
|
||||
|
|
|
@ -241,7 +241,7 @@ __fi u32 ipuRead32(u32 mem)
|
|||
return psHu32(IPU_CMD + mem);
|
||||
}
|
||||
|
||||
__fi u64 ipuRead64(u32 mem)
|
||||
__fi RETURNS_R64 ipuRead64(u32 mem)
|
||||
{
|
||||
// Note: It's assumed that mem's input value is always in the 0x10002000 page
|
||||
// of memory (if not, it's probably bad code).
|
||||
|
@ -263,7 +263,7 @@ __fi u64 ipuRead64(u32 mem)
|
|||
|
||||
if (ipuRegs.cmd.DATA & 0xffffff)
|
||||
IPU_LOG("read64: IPU_CMD=BUSY=%x, DATA=%08X", ipuRegs.cmd.BUSY ? 1 : 0, ipuRegs.cmd.DATA);
|
||||
return ipuRegs.cmd._u64;
|
||||
return r64_load(&ipuRegs.cmd._u64);
|
||||
}
|
||||
|
||||
ipucase(IPU_CTRL):
|
||||
|
@ -282,7 +282,7 @@ __fi u64 ipuRead64(u32 mem)
|
|||
IPU_LOG("read64: Unknown=%x", mem);
|
||||
break;
|
||||
}
|
||||
return psHu64(IPU_CMD + mem);
|
||||
return r64_load(&psHu64(IPU_CMD + mem));
|
||||
}
|
||||
|
||||
void ipuSoftReset()
|
||||
|
|
|
@ -289,7 +289,7 @@ extern int coded_block_pattern;
|
|||
extern void ipuReset();
|
||||
|
||||
extern u32 ipuRead32(u32 mem);
|
||||
extern u64 ipuRead64(u32 mem);
|
||||
extern RETURNS_R64 ipuRead64(u32 mem);
|
||||
extern bool ipuWrite32(u32 mem,u32 value);
|
||||
extern bool ipuWrite64(u32 mem,u64 value);
|
||||
|
||||
|
|
|
@ -220,13 +220,13 @@ static mem32_t __fastcall nullRead32(u32 mem) {
|
|||
MEM_LOG("Read uninstalled memory at address %08x", mem);
|
||||
return 0;
|
||||
}
|
||||
static void __fastcall nullRead64(u32 mem, mem64_t *out) {
|
||||
static RETURNS_R64 nullRead64(u32 mem) {
|
||||
MEM_LOG("Read uninstalled memory at address %08x", mem);
|
||||
*out = 0;
|
||||
return r64_zero();
|
||||
}
|
||||
static void __fastcall nullRead128(u32 mem, mem128_t *out) {
|
||||
static RETURNS_R128 nullRead128(u32 mem) {
|
||||
MEM_LOG("Read uninstalled memory at address %08x", mem);
|
||||
ZeroQWC(out);
|
||||
return r128_zero();
|
||||
}
|
||||
static void __fastcall nullWrite8(u32 mem, mem8_t value)
|
||||
{
|
||||
|
@ -324,12 +324,12 @@ static mem32_t __fastcall _ext_memRead32(u32 mem)
|
|||
}
|
||||
|
||||
template<int p>
|
||||
static void __fastcall _ext_memRead64(u32 mem, mem64_t *out)
|
||||
static RETURNS_R64 _ext_memRead64(u32 mem)
|
||||
{
|
||||
switch (p)
|
||||
{
|
||||
case 6: // gsm
|
||||
*out = gsRead64(mem); return;
|
||||
return r64_from_u64(gsRead64(mem));
|
||||
default: break;
|
||||
}
|
||||
|
||||
|
@ -338,15 +338,14 @@ static void __fastcall _ext_memRead64(u32 mem, mem64_t *out)
|
|||
}
|
||||
|
||||
template<int p>
|
||||
static void __fastcall _ext_memRead128(u32 mem, mem128_t *out)
|
||||
static RETURNS_R128 _ext_memRead128(u32 mem)
|
||||
{
|
||||
switch (p)
|
||||
{
|
||||
//case 1: // hwm
|
||||
// hwRead128(mem & ~0xa0000000, out); return;
|
||||
// return hwRead128(mem & ~0xa0000000);
|
||||
case 6: // gsm
|
||||
CopyQWC(out,PS2GS_BASE(mem));
|
||||
return;
|
||||
return r128_load(PS2GS_BASE(mem));
|
||||
default: break;
|
||||
}
|
||||
|
||||
|
@ -475,19 +474,19 @@ template<int vunum> static mem32_t __fc vuMicroRead32(u32 addr) {
|
|||
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
|
||||
return *(u32*)&vu->Micro[addr];
|
||||
}
|
||||
template<int vunum> static void __fc vuMicroRead64(u32 addr,mem64_t* data) {
|
||||
template<int vunum> static RETURNS_R64 vuMicroRead64(u32 addr) {
|
||||
VURegs* vu = vunum ? &VU1 : &VU0;
|
||||
addr &= vunum ? 0x3fff: 0xfff;
|
||||
|
||||
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
|
||||
*data=*(u64*)&vu->Micro[addr];
|
||||
return r64_load(&vu->Micro[addr]);
|
||||
}
|
||||
template<int vunum> static void __fc vuMicroRead128(u32 addr,mem128_t* data) {
|
||||
template<int vunum> static RETURNS_R128 vuMicroRead128(u32 addr) {
|
||||
VURegs* vu = vunum ? &VU1 : &VU0;
|
||||
addr &= vunum ? 0x3fff: 0xfff;
|
||||
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
|
||||
|
||||
CopyQWC(data,&vu->Micro[addr]);
|
||||
return r128_load(&vu->Micro[addr]);
|
||||
}
|
||||
|
||||
// Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per
|
||||
|
@ -578,17 +577,17 @@ template<int vunum> static mem32_t __fc vuDataRead32(u32 addr) {
|
|||
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
|
||||
return *(u32*)&vu->Mem[addr];
|
||||
}
|
||||
template<int vunum> static void __fc vuDataRead64(u32 addr, mem64_t* data) {
|
||||
template<int vunum> static RETURNS_R64 vuDataRead64(u32 addr) {
|
||||
VURegs* vu = vunum ? &VU1 : &VU0;
|
||||
addr &= vunum ? 0x3fff: 0xfff;
|
||||
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
|
||||
*data=*(u64*)&vu->Mem[addr];
|
||||
return r64_load(&vu->Mem[addr]);
|
||||
}
|
||||
template<int vunum> static void __fc vuDataRead128(u32 addr, mem128_t* data) {
|
||||
template<int vunum> static RETURNS_R128 vuDataRead128(u32 addr) {
|
||||
VURegs* vu = vunum ? &VU1 : &VU0;
|
||||
addr &= vunum ? 0x3fff: 0xfff;
|
||||
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
|
||||
CopyQWC(data,&vu->Mem[addr]);
|
||||
return r128_load(&vu->Mem[addr]);
|
||||
}
|
||||
|
||||
// VU Data Memory Writes...
|
||||
|
|
|
@ -136,11 +136,11 @@ extern void mmap_ResetBlockTracking();
|
|||
#define memWrite16 vtlb_memWrite<mem16_t>
|
||||
#define memWrite32 vtlb_memWrite<mem32_t>
|
||||
|
||||
static __fi void memRead64(u32 mem, mem64_t* out) { vtlb_memRead64(mem, out); }
|
||||
static __fi void memRead64(u32 mem, mem64_t& out) { vtlb_memRead64(mem, &out); }
|
||||
static __fi void memRead64(u32 mem, mem64_t* out) { _mm_storel_epi64((__m128i*)out, vtlb_memRead64(mem)); }
|
||||
static __fi void memRead64(u32 mem, mem64_t& out) { memRead64(mem, &out); }
|
||||
|
||||
static __fi void memRead128(u32 mem, mem128_t* out) { vtlb_memRead128(mem, out); }
|
||||
static __fi void memRead128(u32 mem, mem128_t& out) { vtlb_memRead128(mem, &out); }
|
||||
static __fi void memRead128(u32 mem, mem128_t* out) { _mm_store_si128((__m128i*)out, vtlb_memRead128(mem)); }
|
||||
static __fi void memRead128(u32 mem, mem128_t& out) { memRead128(mem, &out); }
|
||||
|
||||
static __fi void memWrite64(u32 mem, const mem64_t* val) { vtlb_memWrite64(mem, val); }
|
||||
static __fi void memWrite64(u32 mem, const mem64_t& val) { vtlb_memWrite64(mem, &val); }
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2021 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// r64 / r128 - Types that are guaranteed to fit in one register
|
||||
// --------------------------------------------------------------------------------------
|
||||
// Note: Recompilers rely on some of these types and the registers they allocate to,
|
||||
// so be careful if you want to change them
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <immintrin.h>
|
||||
// Can't stick them in structs because it breaks calling convention things, yay
|
||||
using r64 = __m128i;
|
||||
using r128 = __m128i;
|
||||
|
||||
// Calling convention setting, yay
|
||||
#define RETURNS_R64 r64 __vectorcall
|
||||
#define RETURNS_R128 r128 __vectorcall
|
||||
#define TAKES_R64 __vectorcall
|
||||
#define TAKES_R128 __vectorcall
|
||||
|
||||
// And since we can't stick them in structs, we get lots of static methods, yay!
|
||||
|
||||
__forceinline static r64 r64_load(const void* ptr)
|
||||
{
|
||||
return _mm_loadl_epi64(reinterpret_cast<const r64*>(ptr));
|
||||
}
|
||||
|
||||
__forceinline static r64 r64_zero()
|
||||
{
|
||||
return _mm_setzero_si128();
|
||||
}
|
||||
|
||||
__forceinline static r64 r64_from_u32(u32 val)
|
||||
{
|
||||
return _mm_cvtsi32_si128(val);
|
||||
}
|
||||
|
||||
__forceinline static r64 r64_from_u32x2(u32 lo, u32 hi)
|
||||
{
|
||||
return _mm_unpacklo_epi32(_mm_cvtsi32_si128(lo), _mm_cvtsi32_si128(hi));
|
||||
}
|
||||
|
||||
__forceinline static r64 r64_from_u64(u64 val)
|
||||
{
|
||||
#ifdef _M_X86_64
|
||||
return _mm_cvtsi64_si128(val);
|
||||
#else
|
||||
return r64_from_u32x2(val, val >> 64);
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static r128 r128_load(const void* ptr)
|
||||
{
|
||||
return _mm_load_si128(reinterpret_cast<const r128*>(ptr));
|
||||
}
|
||||
|
||||
__forceinline static r128 r128_zero()
|
||||
{
|
||||
return _mm_setzero_si128();
|
||||
}
|
||||
|
||||
/// Expects that r64 came from r64-handling code, and not from a recompiler or something
|
||||
__forceinline static r128 r128_from_r64_clean(r64 val)
|
||||
{
|
||||
return val;
|
||||
}
|
||||
|
||||
__forceinline static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
|
||||
{
|
||||
return _mm_setr_epi32(lo0, lo1, hi0, hi1);
|
||||
}
|
||||
|
||||
template <typename u>
|
||||
struct rhelper;
|
||||
|
||||
template <>
|
||||
struct rhelper<u64>
|
||||
{
|
||||
using r = r64;
|
||||
__forceinline static r load(void* ptr) { return r64_load(ptr); }
|
||||
__forceinline static r zero() { return r64_zero(); }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct rhelper<u128>
|
||||
{
|
||||
using r = r128;
|
||||
__forceinline static r load(void* ptr) { return r128_load(ptr); }
|
||||
__forceinline static r zero() { return r128_zero(); }
|
||||
};
|
||||
|
||||
template <typename u>
|
||||
using u_to_r = typename rhelper<u>::r;
|
|
@ -952,6 +952,7 @@
|
|||
<ClInclude Include="Dump.h" />
|
||||
<ClInclude Include="IopCommon.h" />
|
||||
<ClInclude Include="SaveState.h" />
|
||||
<ClInclude Include="SingleRegisterTypes.h" />
|
||||
<ClInclude Include="System.h" />
|
||||
<ClInclude Include="System\SysThreads.h" />
|
||||
<ClInclude Include="System\RecTypes.h" />
|
||||
|
|
|
@ -1687,6 +1687,9 @@
|
|||
<ClInclude Include="SaveState.h">
|
||||
<Filter>System\Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="SingleRegisterTypes.h">
|
||||
<Filter>System\Include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="System.h">
|
||||
<Filter>System\Include</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -41,8 +41,8 @@
|
|||
template< uint page > extern mem8_t __fastcall hwRead8 (u32 mem);
|
||||
template< uint page > extern mem16_t __fastcall hwRead16 (u32 mem);
|
||||
template< uint page > extern mem32_t __fastcall hwRead32 (u32 mem);
|
||||
template< uint page > extern void __fastcall hwRead64 (u32 mem, mem64_t* out );
|
||||
template< uint page > extern void __fastcall hwRead128(u32 mem, mem128_t* out);
|
||||
template< uint page > extern RETURNS_R64 hwRead64 (u32 mem);
|
||||
template< uint page > extern RETURNS_R128 hwRead128(u32 mem);
|
||||
|
||||
// Internal hwRead32 which does not log reads, used by hwWrite8/16 to perform
|
||||
// read-modify-write operations.
|
||||
|
|
|
@ -164,7 +164,7 @@ DataType __fastcall vtlb_memRead(u32 addr)
|
|||
return 0; // technically unreachable, but suppresses warnings.
|
||||
}
|
||||
|
||||
void __fastcall vtlb_memRead64(u32 mem, mem64_t *out)
|
||||
RETURNS_R64 vtlb_memRead64(u32 mem)
|
||||
{
|
||||
auto vmv = vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
|
||||
|
||||
|
@ -173,22 +173,22 @@ void __fastcall vtlb_memRead64(u32 mem, mem64_t *out)
|
|||
if (!CHECK_EEREC) {
|
||||
if(CHECK_CACHE && CheckCache(mem))
|
||||
{
|
||||
*out = readCache64(mem);
|
||||
return;
|
||||
return readCache64(mem);
|
||||
}
|
||||
}
|
||||
|
||||
*out = *(mem64_t*)vmv.assumePtr(mem);
|
||||
return r64_load(reinterpret_cast<const void*>(vmv.assumePtr(mem)));
|
||||
}
|
||||
else
|
||||
{
|
||||
//has to: translate, find function, call function
|
||||
u32 paddr = vmv.assumeHandlerGetPAddr(mem);
|
||||
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
|
||||
vmv.assumeHandler<64, false>()(paddr, out);
|
||||
return vmv.assumeHandler<64, false>()(paddr);
|
||||
}
|
||||
}
|
||||
void __fastcall vtlb_memRead128(u32 mem, mem128_t *out)
|
||||
|
||||
RETURNS_R128 vtlb_memRead128(u32 mem)
|
||||
{
|
||||
auto vmv = vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
|
||||
|
||||
|
@ -198,20 +198,18 @@ void __fastcall vtlb_memRead128(u32 mem, mem128_t *out)
|
|||
{
|
||||
if(CHECK_CACHE && CheckCache(mem))
|
||||
{
|
||||
out->lo = readCache64(mem);
|
||||
out->hi = readCache64(mem+8);
|
||||
return;
|
||||
return readCache128(mem);
|
||||
}
|
||||
}
|
||||
|
||||
CopyQWC(out,(void*)vmv.assumePtr(mem));
|
||||
return r128_load(reinterpret_cast<const void*>(vmv.assumePtr(mem)));
|
||||
}
|
||||
else
|
||||
{
|
||||
//has to: translate, find function, call function
|
||||
u32 paddr = vmv.assumeHandlerGetPAddr(mem);
|
||||
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
|
||||
vmv.assumeHandler<128, false>()(paddr, out);
|
||||
return vmv.assumeHandler<128, false>()(paddr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -436,28 +434,28 @@ static __ri void vtlb_BusError(u32 addr,u32 mode)
|
|||
}
|
||||
|
||||
template<typename OperandType, u32 saddr>
|
||||
OperandType __fastcall vtlbUnmappedVReadSm(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; }
|
||||
OperandType __fastcall vtlbUnmappedVReadSm(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; }
|
||||
|
||||
template<typename OperandType, u32 saddr>
|
||||
void __fastcall vtlbUnmappedVReadLg(u32 addr,OperandType* data) { vtlb_Miss(addr|saddr,0); }
|
||||
u_to_r<OperandType> __vectorcall vtlbUnmappedVReadLg(u32 addr) { vtlb_Miss(addr|saddr,0); return rhelper<OperandType>::zero(); }
|
||||
|
||||
template<typename OperandType, u32 saddr>
|
||||
void __fastcall vtlbUnmappedVWriteSm(u32 addr,OperandType data) { vtlb_Miss(addr|saddr,1); }
|
||||
void __fastcall vtlbUnmappedVWriteSm(u32 addr,OperandType data) { vtlb_Miss(addr|saddr,1); }
|
||||
|
||||
template<typename OperandType, u32 saddr>
|
||||
void __fastcall vtlbUnmappedVWriteLg(u32 addr,const OperandType* data) { vtlb_Miss(addr|saddr,1); }
|
||||
void __fastcall vtlbUnmappedVWriteLg(u32 addr,const OperandType* data) { vtlb_Miss(addr|saddr,1); }
|
||||
|
||||
template<typename OperandType, u32 saddr>
|
||||
OperandType __fastcall vtlbUnmappedPReadSm(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; }
|
||||
OperandType __fastcall vtlbUnmappedPReadSm(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; }
|
||||
|
||||
template<typename OperandType, u32 saddr>
|
||||
void __fastcall vtlbUnmappedPReadLg(u32 addr,OperandType* data) { vtlb_BusError(addr|saddr,0); }
|
||||
u_to_r<OperandType> __vectorcall vtlbUnmappedPReadLg(u32 addr) { vtlb_BusError(addr|saddr,0); return rhelper<OperandType>::zero(); }
|
||||
|
||||
template<typename OperandType, u32 saddr>
|
||||
void __fastcall vtlbUnmappedPWriteSm(u32 addr,OperandType data) { vtlb_BusError(addr|saddr,1); }
|
||||
void __fastcall vtlbUnmappedPWriteSm(u32 addr,OperandType data) { vtlb_BusError(addr|saddr,1); }
|
||||
|
||||
template<typename OperandType, u32 saddr>
|
||||
void __fastcall vtlbUnmappedPWriteLg(u32 addr,const OperandType* data) { vtlb_BusError(addr|saddr,1); }
|
||||
void __fastcall vtlbUnmappedPWriteLg(u32 addr,const OperandType* data) { vtlb_BusError(addr|saddr,1); }
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// VTLB mapping errors
|
||||
|
@ -484,14 +482,16 @@ static mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void __fastcall vtlbDefaultPhyRead64(u32 addr, mem64_t* dest)
|
||||
static __m128i __vectorcall vtlbDefaultPhyRead64(u32 addr)
|
||||
{
|
||||
pxFailDev(pxsFmt("(VTLB) Attempted read64 from unmapped physical address @ 0x%08X.", addr));
|
||||
return r64_zero();
|
||||
}
|
||||
|
||||
static void __fastcall vtlbDefaultPhyRead128(u32 addr, mem128_t* dest)
|
||||
static __m128i __vectorcall vtlbDefaultPhyRead128(u32 addr)
|
||||
{
|
||||
pxFailDev(pxsFmt("(VTLB) Attempted read128 from unmapped physical address @ 0x%08X.", addr));
|
||||
return r128_zero();
|
||||
}
|
||||
|
||||
static void __fastcall vtlbDefaultPhyWrite8(u32 addr, mem8_t data)
|
||||
|
|
13
pcsx2/vtlb.h
13
pcsx2/vtlb.h
|
@ -16,6 +16,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "MemoryTypes.h"
|
||||
#include "SingleRegisterTypes.h"
|
||||
|
||||
#include "common/PageFaultSource.h"
|
||||
|
||||
|
@ -25,8 +26,8 @@ static const uptr VTLB_AllocUpperBounds = _1gb * 2;
|
|||
typedef mem8_t __fastcall vtlbMemR8FP(u32 addr);
|
||||
typedef mem16_t __fastcall vtlbMemR16FP(u32 addr);
|
||||
typedef mem32_t __fastcall vtlbMemR32FP(u32 addr);
|
||||
typedef void __fastcall vtlbMemR64FP(u32 addr,mem64_t* data);
|
||||
typedef void __fastcall vtlbMemR128FP(u32 addr,mem128_t* data);
|
||||
typedef RETURNS_R64 vtlbMemR64FP(u32 addr);
|
||||
typedef RETURNS_R128 vtlbMemR128FP(u32 addr);
|
||||
|
||||
// Specialized function pointers for each write type
|
||||
typedef void __fastcall vtlbMemW8FP(u32 addr,mem8_t data);
|
||||
|
@ -87,8 +88,8 @@ extern void vtlb_VMapUnmap(u32 vaddr,u32 sz);
|
|||
|
||||
template< typename DataType >
|
||||
extern DataType __fastcall vtlb_memRead(u32 mem);
|
||||
extern void __fastcall vtlb_memRead64(u32 mem, mem64_t *out);
|
||||
extern void __fastcall vtlb_memRead128(u32 mem, mem128_t *out);
|
||||
extern RETURNS_R64 vtlb_memRead64(u32 mem);
|
||||
extern RETURNS_R128 vtlb_memRead128(u32 mem);
|
||||
|
||||
template< typename DataType >
|
||||
extern void __fastcall vtlb_memWrite(u32 mem, DataType value);
|
||||
|
@ -97,10 +98,10 @@ extern void __fastcall vtlb_memWrite128(u32 mem, const mem128_t* value);
|
|||
|
||||
extern void vtlb_DynGenWrite(u32 sz);
|
||||
extern void vtlb_DynGenRead32(u32 bits, bool sign);
|
||||
extern void vtlb_DynGenRead64(u32 sz);
|
||||
extern int vtlb_DynGenRead64(u32 sz, int gpr);
|
||||
|
||||
extern void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const );
|
||||
extern void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const );
|
||||
extern int vtlb_DynGenRead64_Const( u32 bits, u32 addr_const, int gpr );
|
||||
extern void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const );
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
|
|
@ -107,10 +107,8 @@ void recLoad64(u32 bits, bool sign)
|
|||
// Load arg2 with the destination.
|
||||
// 64/128 bit modes load the result directly into the cpuRegs.GPR struct.
|
||||
|
||||
if (_Rt_)
|
||||
xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
|
||||
else
|
||||
xLEA(arg2reg, ptr[&dummyValue[0]]);
|
||||
int gprreg = ((bits == 128) && _Rt_) ? _Rt_ : -1;
|
||||
int reg;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -121,7 +119,7 @@ void recLoad64(u32 bits, bool sign)
|
|||
_eeOnLoadWrite(_Rt_);
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
|
||||
vtlb_DynGenRead64_Const(bits, srcadr);
|
||||
reg = vtlb_DynGenRead64_Const(bits, srcadr, gprreg);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -134,9 +132,17 @@ void recLoad64(u32 bits, bool sign)
|
|||
|
||||
_eeOnLoadWrite(_Rt_);
|
||||
_deleteEEreg(_Rt_, 0);
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
vtlb_DynGenRead64(bits);
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
reg = vtlb_DynGenRead64(bits, gprreg);
|
||||
}
|
||||
|
||||
if (gprreg == -1)
|
||||
{
|
||||
if (_Rt_)
|
||||
xMOVQ(ptr64[&cpuRegs.GPR.r[_Rt_].UL[0]], xRegisterSSE(reg));
|
||||
|
||||
_freeXMMreg(reg);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -458,14 +464,14 @@ void recLDL()
|
|||
return;
|
||||
|
||||
#ifdef LOADSTORE_RECOMPILE
|
||||
xLEA(arg2reg, ptr128[&dummyValue[0]]);
|
||||
int t2reg;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||
srcadr &= ~0x07;
|
||||
|
||||
vtlb_DynGenRead64_Const(64, srcadr);
|
||||
t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -478,13 +484,12 @@ void recLDL()
|
|||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
vtlb_DynGenRead64(64);
|
||||
t2reg = vtlb_DynGenRead64(64, -1);
|
||||
}
|
||||
|
||||
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -502,7 +507,7 @@ void recLDL()
|
|||
}
|
||||
|
||||
xCMP(eax, 8);
|
||||
xForwardJE32 skip;
|
||||
xForwardJE8 skip;
|
||||
//Calculate the shift from top bit to lowest
|
||||
xMOV(edx, 64);
|
||||
xSHL(eax, 3);
|
||||
|
@ -512,18 +517,13 @@ void recLDL()
|
|||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
|
||||
xMOVDQA(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
|
||||
|
||||
xMOVDZX(xRegisterSSE(t1reg), edx);
|
||||
xMOVQZX(xRegisterSSE(t0reg), ptr64[&dummyValue[0]]);
|
||||
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
xPOR(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
|
||||
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t0reg));
|
||||
xForwardJump32 full;
|
||||
skip.SetTarget();
|
||||
xPSLL.Q(xRegisterSSE(t2reg), xRegisterSSE(t1reg));
|
||||
xPOR(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
|
||||
|
||||
xMOVL.PS(xRegisterSSE(rtreg), ptr128[&dummyValue[0]]);
|
||||
full.SetTarget();
|
||||
skip.SetTarget();
|
||||
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t2reg));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -546,14 +546,14 @@ void recLDR()
|
|||
return;
|
||||
|
||||
#ifdef LOADSTORE_RECOMPILE
|
||||
xLEA(arg2reg, ptr128[&dummyValue[0]]);
|
||||
int t2reg;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||
srcadr &= ~0x07;
|
||||
|
||||
vtlb_DynGenRead64_Const(64, srcadr);
|
||||
t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -566,13 +566,12 @@ void recLDR()
|
|||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
vtlb_DynGenRead64(64);
|
||||
t2reg = vtlb_DynGenRead64(64, -1);
|
||||
}
|
||||
|
||||
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE);
|
||||
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
|
@ -599,18 +598,13 @@ void recLDR()
|
|||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
|
||||
xMOVQZX(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
|
||||
|
||||
xMOVDZX(xRegisterSSE(t1reg), eax); //shift*8
|
||||
xMOVQZX(xRegisterSSE(t0reg), ptr64[&dummyValue[0]]);
|
||||
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
xPOR(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
|
||||
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t0reg));
|
||||
xForwardJump32 full;
|
||||
skip.SetTarget();
|
||||
xPSRL.Q(xRegisterSSE(t2reg), xRegisterSSE(t1reg));
|
||||
xPOR(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
|
||||
|
||||
xMOVL.PS(xRegisterSSE(rtreg), ptr128[&dummyValue[0]]);
|
||||
full.SetTarget();
|
||||
skip.SetTarget();
|
||||
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t2reg));
|
||||
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
|
@ -631,14 +625,13 @@ void recLDR()
|
|||
void recSDL()
|
||||
{
|
||||
#ifdef LOADSTORE_RECOMPILE
|
||||
xLEA(arg2reg, ptr128[&dummyValue[0]]);
|
||||
|
||||
int t2reg;
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||
srcadr &= ~0x07;
|
||||
|
||||
vtlb_DynGenRead64_Const(64, srcadr);
|
||||
t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -651,7 +644,7 @@ void recSDL()
|
|||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
vtlb_DynGenRead64(64);
|
||||
t2reg = vtlb_DynGenRead64(64, -1);
|
||||
}
|
||||
_flushEEreg(_Rt_); // flush register to mem
|
||||
|
||||
|
@ -675,7 +668,7 @@ void recSDL()
|
|||
}
|
||||
|
||||
xCMP(eax, 8);
|
||||
xForwardJE32 skip;
|
||||
xForwardJE8 skip;
|
||||
//Calculate the shift from top bit to lowest
|
||||
xMOV(edx, 64);
|
||||
xSHL(eax, 3);
|
||||
|
@ -684,8 +677,7 @@ void recSDL()
|
|||
xMOVDZX(xRegisterSSE(t1reg), eax);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
xMOVQZX(xRegisterSSE(t1reg), ptr64[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower!
|
||||
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
|
||||
|
||||
// Shift over reg value (shift, PSLL.Q multiplies by 8)
|
||||
xMOVDZX(xRegisterSSE(t1reg), edx);
|
||||
|
@ -698,6 +690,7 @@ void recSDL()
|
|||
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
_freeXMMreg(t2reg);
|
||||
|
||||
xLEA(arg2reg, ptr128[&dummyValue[0]]);
|
||||
|
||||
|
@ -733,14 +726,13 @@ void recSDL()
|
|||
void recSDR()
|
||||
{
|
||||
#ifdef LOADSTORE_RECOMPILE
|
||||
xLEA(arg2reg, ptr128[&dummyValue[0]]);
|
||||
|
||||
int t2reg;
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||
srcadr &= ~0x07;
|
||||
|
||||
vtlb_DynGenRead64_Const(64, srcadr);
|
||||
t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -753,7 +745,7 @@ void recSDR()
|
|||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
vtlb_DynGenRead64(64);
|
||||
t2reg = vtlb_DynGenRead64(64, -1);
|
||||
}
|
||||
_flushEEreg(_Rt_); // flush register to mem
|
||||
|
||||
|
@ -776,7 +768,7 @@ void recSDR()
|
|||
}
|
||||
|
||||
xCMP(eax, 0);
|
||||
xForwardJE32 skip;
|
||||
xForwardJE8 skip;
|
||||
//Calculate the shift from top bit to lowest
|
||||
xMOV(edx, 64);
|
||||
xSHL(eax, 3);
|
||||
|
@ -785,8 +777,7 @@ void recSDR()
|
|||
xMOVDZX(xRegisterSSE(t1reg), edx);
|
||||
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
xMOVQZX(xRegisterSSE(t1reg), ptr64[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower!
|
||||
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
|
||||
|
||||
// Shift over reg value (shift, PSLL.Q multiplies by 8)
|
||||
xMOVDZX(xRegisterSSE(t1reg), eax);
|
||||
|
@ -799,6 +790,7 @@ void recSDR()
|
|||
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||
_freeXMMreg(t0reg);
|
||||
_freeXMMreg(t1reg);
|
||||
_freeXMMreg(t2reg);
|
||||
|
||||
xLEA(arg2reg, ptr128[&dummyValue[0]]);
|
||||
|
||||
|
@ -931,16 +923,13 @@ void recLQC2()
|
|||
skip.SetTarget();
|
||||
skipvuidle.SetTarget();
|
||||
|
||||
if (_Rt_)
|
||||
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
|
||||
else
|
||||
xLEA(arg2reg, ptr[&dummyValue[0]]);
|
||||
int gpr;
|
||||
|
||||
if (GPR_IS_CONST1(_Rs_))
|
||||
{
|
||||
int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||
|
||||
vtlb_DynGenRead64_Const(128, addr);
|
||||
gpr = vtlb_DynGenRead64_Const(128, addr, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -950,9 +939,14 @@ void recLQC2()
|
|||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
|
||||
vtlb_DynGenRead64(128);
|
||||
gpr = vtlb_DynGenRead64(128, -1);
|
||||
}
|
||||
|
||||
if (_Rt_)
|
||||
xMOVAPS(ptr128[&VU0.VF[_Ft_].UD[0]], xRegisterSSE(gpr));
|
||||
|
||||
_freeXMMreg(gpr);
|
||||
|
||||
EE::Profiler.EmitOp(eeOpcode::LQC2);
|
||||
}
|
||||
|
||||
|
|
|
@ -172,6 +172,8 @@ namespace vtlb_private
|
|||
// ------------------------------------------------------------------------
|
||||
static void DynGen_DirectRead(u32 bits, bool sign)
|
||||
{
|
||||
pxAssert(bits == 8 || bits == 16 || bits == 32);
|
||||
|
||||
switch (bits)
|
||||
{
|
||||
case 8:
|
||||
|
@ -192,15 +194,24 @@ namespace vtlb_private
|
|||
xMOV(eax, ptr[arg1reg]);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
}
|
||||
}
|
||||
|
||||
static void DynGen_DirectRead64(u32 bits)
|
||||
{
|
||||
pxAssert(bits == 64 || bits == 128);
|
||||
|
||||
switch (bits) {
|
||||
case 64:
|
||||
iMOV64_Smart(ptr[arg2reg], ptr[arg1reg]);
|
||||
xMOVQZX(xmm0, ptr64[arg1reg]);
|
||||
break;
|
||||
|
||||
case 128:
|
||||
iMOV128_SSE(ptr[arg2reg], ptr[arg1reg]);
|
||||
xMOVAPS(xmm0, ptr128[arg1reg]);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
jNO_DEFAULT
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -375,16 +386,18 @@ static void vtlb_SetWriteback(u32* writeback)
|
|||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Dynarec Load Implementations
|
||||
void vtlb_DynGenRead64(u32 bits)
|
||||
int vtlb_DynGenRead64(u32 bits, int gpr)
|
||||
{
|
||||
pxAssume(bits == 64 || bits == 128);
|
||||
|
||||
u32* writeback = DynGen_PrepRegs();
|
||||
|
||||
int reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
|
||||
DynGen_IndirectDispatch(0, bits);
|
||||
DynGen_DirectRead(bits, false);
|
||||
DynGen_DirectRead64(bits);
|
||||
|
||||
vtlb_SetWriteback(writeback); // return target for indirect's call/ret
|
||||
return reg;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -406,25 +419,27 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
|
|||
// ------------------------------------------------------------------------
|
||||
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
|
||||
// recompiler if the TLB is changed.
|
||||
void vtlb_DynGenRead64_Const(u32 bits, u32 addr_const)
|
||||
int vtlb_DynGenRead64_Const(u32 bits, u32 addr_const, int gpr)
|
||||
{
|
||||
EE::Profiler.EmitConstMem(addr_const);
|
||||
|
||||
int reg;
|
||||
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
|
||||
if (!vmv.isHandler(addr_const))
|
||||
{
|
||||
auto ppf = vmv.assumePtr(addr_const);
|
||||
void* ppf = reinterpret_cast<void*>(vmv.assumePtr(addr_const));
|
||||
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, -1) : _allocGPRtoXMMreg(-1, gpr, MODE_WRITE);
|
||||
switch (bits)
|
||||
{
|
||||
case 64:
|
||||
iMOV64_Smart(ptr[arg2reg], ptr[(void*)ppf]);
|
||||
xMOVQZX(xRegisterSSE(reg), ptr64[ppf]);
|
||||
break;
|
||||
|
||||
case 128:
|
||||
iMOV128_SSE(ptr[arg2reg], ptr[(void*)ppf]);
|
||||
xMOVAPS(xRegisterSSE(reg), ptr128[ppf]);
|
||||
break;
|
||||
|
||||
jNO_DEFAULT
|
||||
jNO_DEFAULT
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -440,8 +455,10 @@ void vtlb_DynGenRead64_Const(u32 bits, u32 addr_const)
|
|||
}
|
||||
|
||||
iFlushCall(FLUSH_FULLVTLB);
|
||||
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
|
||||
xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg);
|
||||
}
|
||||
return reg;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
|
Loading…
Reference in New Issue