vtlb: Switch read64 and read128 handlers to return in sse regs

This commit is contained in:
TellowKrinkle 2021-09-20 01:10:59 -05:00 committed by refractionpcsx2
parent 7563f54e83
commit e9518f78c7
17 changed files with 324 additions and 181 deletions

View File

@ -151,6 +151,7 @@ static const int __pagesize = PCSX2_PAGESIZE;
#ifndef __fastcall
#define __fastcall __attribute__((fastcall))
#endif
#define __vectorcall __fastcall
#define _inline __inline__ __attribute__((unused))
#ifdef NDEBUG
#define __forceinline __attribute__((always_inline, unused))

View File

@ -215,6 +215,7 @@ set(pcsx2Headers
SaveState.h
Sifcmd.h
Sif.h
SingleRegisterTypes.h
Sio.h
sio_internal.h
SPR.h

View File

@ -212,17 +212,26 @@ static int getFreeCache(u32 mem, int* way)
return setIdx;
}
template <bool Write, int Bytes>
void* prepareCacheAccess(u32 mem, int* way, int* idx)
{
*way = 0;
*idx = getFreeCache(mem, way);
CacheLine line = cache.lineAt(*idx, *way);
if (Write)
line.tag.setDirty();
u32 aligned = mem & ~(Bytes - 1);
return &line.data.bytes[aligned & 0x3f];
}
template <typename Int>
void writeCache(u32 mem, Int value)
{
int way = 0;
const int idx = getFreeCache(mem, &way);
int way, idx;
void* addr = prepareCacheAccess<true, sizeof(Int)>(mem, &way, &idx);
CACHE_LOG("writeCache%d %8.8x adding to %d, way %d, value %llx", 8 * sizeof(value), mem, idx, way, value);
CacheLine line = cache.lineAt(idx, way);
line.tag.setDirty(); // Set dirty bit for writes;
u32 aligned = mem & ~(sizeof(value) - 1);
*reinterpret_cast<Int*>(&line.data.bytes[aligned & 0x3f]) = value;
*reinterpret_cast<Int*>(addr) = value;
}
void writeCache8(u32 mem, u8 value)
@ -247,25 +256,20 @@ void writeCache64(u32 mem, const u64 value)
void writeCache128(u32 mem, const mem128_t* value)
{
int way = 0;
const int idx = getFreeCache(mem, &way);
int way, idx;
void* addr = prepareCacheAccess<true, sizeof(mem128_t)>(mem, &way, &idx);
CACHE_LOG("writeCache128 %8.8x adding to %d, way %x, lo %x, hi %x", mem, idx, way, value->lo, value->hi);
CacheLine line = cache.lineAt(idx, way);
line.tag.setDirty(); // Set dirty bit for writes;
u32 aligned = mem & ~0xF;
*reinterpret_cast<mem128_t*>(&line.data.bytes[aligned & 0x3f]) = *value;
CACHE_LOG("writeCache128 %8.8x adding to %d, way %x, lo %llx, hi %llx", mem, idx, way, value->lo, value->hi);
*reinterpret_cast<mem128_t*>(addr) = *value;
}
template <typename Int>
Int readCache(u32 mem)
{
int way = 0;
const int idx = getFreeCache(mem, &way);
int way, idx;
void* addr = prepareCacheAccess<false, sizeof(Int)>(mem, &way, &idx);
CacheLine line = cache.lineAt(idx, way);
u32 aligned = mem & ~(sizeof(Int) - 1);
Int value = *reinterpret_cast<Int*>(&line.data.bytes[aligned & 0x3f]);
Int value = *reinterpret_cast<Int*>(addr);
CACHE_LOG("readCache%d %8.8x from %d, way %d, value %llx", 8 * sizeof(value), mem, idx, way, value);
return value;
}
@ -286,9 +290,23 @@ u32 readCache32(u32 mem)
return readCache<u32>(mem);
}
u64 readCache64(u32 mem)
RETURNS_R64 readCache64(u32 mem)
{
return readCache<u64>(mem);
int way, idx;
void* addr = prepareCacheAccess<false, sizeof(u64)>(mem, &way, &idx);
r64 value = r64_load(addr);
CACHE_LOG("readCache64 %8.8x from %d, way %d, value %llx", mem, idx, way, *(u64*)&value);
return value;
}
RETURNS_R128 readCache128(u32 mem)
{
int way, idx;
void* addr = prepareCacheAccess<false, sizeof(mem128_t)>(mem, &way, &idx);
r128 value = r128_load(addr);
u64* vptr = reinterpret_cast<u64*>(&value);
CACHE_LOG("readCache128 %8.8x from %d, way %d, lo %llx, hi %llx", mem, idx, way, vptr[0], vptr[1]);
return value;
}
template <typename Op>

View File

@ -17,6 +17,7 @@
#define __CACHE_H__
#include "Common.h"
#include "SingleRegisterTypes.h"
void resetCache();
void writeCache8(u32 mem, u8 value);
@ -27,6 +28,7 @@ void writeCache128(u32 mem, const mem128_t* value);
u8 readCache8(u32 mem);
u16 readCache16(u32 mem);
u32 readCache32(u32 mem);
u64 readCache64(u32 mem);
RETURNS_R64 readCache64(u32 mem);
RETURNS_R128 readCache128(u32 mem);
#endif /* __CACHE_H__ */

View File

@ -33,7 +33,7 @@ static __fi void IntCHackCheck()
if( diff > 0 ) cpuRegs.cycle = g_nextEventCycle;
}
template< uint page > void __fastcall _hwRead128(u32 mem, mem128_t* result );
template< uint page > RETURNS_R128 _hwRead128(u32 mem);
template< uint page, bool intcstathack >
mem32_t __fastcall _hwRead32(u32 mem)
@ -71,9 +71,8 @@ mem32_t __fastcall _hwRead32(u32 mem)
DevCon.WriteLn( Color_Cyan, "Reading 32-bit FIFO data" );
u128 out128;
_hwRead128<page>(mem & ~0x0f, &out128);
return out128._u32[(mem >> 2) & 0x3];
r128 out128 = _hwRead128<page>(mem & ~0x0f);
return reinterpret_cast<u32*>(&out128)[(mem >> 2) & 0x3];
}
break;
@ -270,15 +269,14 @@ mem16_t __fastcall hwRead16_page_0F_INTC_HACK(u32 mem)
}
template< uint page >
static void _hwRead64(u32 mem, mem64_t* result )
static RETURNS_R64 _hwRead64(u32 mem)
{
pxAssume( (mem & 0x07) == 0 );
switch (page)
{
case 0x02:
*result = ipuRead64(mem);
return;
return ipuRead64(mem);
case 0x04:
case 0x05:
@ -295,11 +293,9 @@ static void _hwRead64(u32 mem, mem64_t* result )
uint wordpart = (mem >> 3) & 0x1;
DevCon.WriteLn( Color_Cyan, "Reading 64-bit FIFO data (%s 64 bits discarded)", wordpart ? "upper" : "lower" );
u128 out128;
_hwRead128<page>(mem & ~0x0f, &out128);
*result = out128._u64[wordpart];
r128 full = _hwRead128<page>(mem & ~0x0f);
return r64_load(reinterpret_cast<u64*>(&full) + wordpart);
}
return;
case 0x0F:
if ((mem & 0xffffff00) == 0x1000f300)
{
@ -308,30 +304,33 @@ static void _hwRead64(u32 mem, mem64_t* result )
{
ReadFifoSingleWord();
*result = psHu32(0x1000f3E0);
u32 lo = psHu32(0x1000f3E0);
ReadFifoSingleWord();
*result |= (u64)psHu32(0x1000f3E0) << 32;
u32 hi = psHu32(0x1000f3E0);
return r64_from_u32x2(lo, hi);
}
}
return;
default: break;
}
*result = _hwRead32<page,false>( mem );
return r64_from_u32(_hwRead32<page, false>(mem));
}
template< uint page >
void __fastcall hwRead64(u32 mem, mem64_t* result )
RETURNS_R64 hwRead64(u32 mem)
{
_hwRead64<page>( mem, result );
eeHwTraceLog( mem, *result, true );
r64 res = _hwRead64<page>(mem);
eeHwTraceLog(mem, *(u64*)&res, true);
return res;
}
template< uint page >
void __fastcall _hwRead128(u32 mem, mem128_t* result )
RETURNS_R128 _hwRead128(u32 mem)
{
pxAssume( (mem & 0x0f) == 0 );
alignas(16) mem128_t result;
// FIFOs are the only "legal" 128 bit registers, so we Handle them first.
// All other registers fall back on the 64-bit handler (and from there
// all non-IPU reads fall back to the 32-bit handler).
@ -339,15 +338,15 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
switch (page)
{
case 0x05:
ReadFIFO_VIF1( result );
break;
ReadFIFO_VIF1(&result);
break;
case 0x07:
if (mem & 0x10)
ZeroQWC( result ); // IPUin is write-only
return r128_zero(); // IPUin is write-only
else
ReadFIFO_IPUout( result );
break;
ReadFIFO_IPUout(&result);
break;
case 0x04:
case 0x06:
@ -355,13 +354,12 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
// [Ps2Confirm] Reads from these FIFOs (and IPUin) do one of the following:
// return zero, leave contents of the dest register unchanged, or in some
// indeterminate state. The actual behavior probably isn't important.
ZeroQWC( result );
break;
return r128_zero();
case 0x0F:
// todo: psx mode: this is new
if (((mem & 0x1FFFFFFF) >= EEMemoryMap::SBUS_PS1_Start) && ((mem & 0x1FFFFFFF) < EEMemoryMap::SBUS_PS1_End)) {
PGIFrQword((mem & 0x1FFFFFFF), result);
return;
PGIFrQword((mem & 0x1FFFFFFF), &result);
break;
}
// WARNING: this code is never executed anymore due to previous condition.
@ -373,37 +371,38 @@ void __fastcall _hwRead128(u32 mem, mem128_t* result )
{
ReadFifoSingleWord();
result->lo = psHu32(0x1000f3E0);
u32 part0 = psHu32(0x1000f3E0);
ReadFifoSingleWord();
result->lo |= (u64)psHu32(0x1000f3E0) << 32;
u32 part1 = psHu32(0x1000f3E0);
ReadFifoSingleWord();
result->hi = psHu32(0x1000f3E0);
u32 part2 = psHu32(0x1000f3E0);
ReadFifoSingleWord();
result->hi |= (u64)psHu32(0x1000f3E0) << 32;
u32 part3 = psHu32(0x1000f3E0);
return r128_from_u32x4(part0, part1, part2, part3);
}
}
break;
default:
_hwRead64<page>( mem, &result->lo );
result->hi = 0;
break;
return r128_from_r64_clean(_hwRead64<page>(mem));
}
return r128_load(&result);
}
template< uint page >
void __fastcall hwRead128(u32 mem, mem128_t* result )
RETURNS_R128 hwRead128(u32 mem)
{
_hwRead128<page>( mem, result );
eeHwTraceLog( mem, *result, true );
r128 res = _hwRead128<page>(mem);
eeHwTraceLog(mem, *(mem128_t*)&res, true);
return res;
}
#define InstantizeHwRead(pageidx) \
template mem8_t __fastcall hwRead8<pageidx>(u32 mem); \
template mem16_t __fastcall hwRead16<pageidx>(u32 mem); \
template mem32_t __fastcall hwRead32<pageidx>(u32 mem); \
template void __fastcall hwRead64<pageidx>(u32 mem, mem64_t* result ); \
template void __fastcall hwRead128<pageidx>(u32 mem, mem128_t* result ); \
template RETURNS_R64 hwRead64<pageidx>(u32 mem); \
template RETURNS_R128 hwRead128<pageidx>(u32 mem); \
template mem32_t __fastcall _hwRead32<pageidx, false>(u32 mem);
InstantizeHwRead(0x00); InstantizeHwRead(0x08);

View File

@ -241,7 +241,7 @@ __fi u32 ipuRead32(u32 mem)
return psHu32(IPU_CMD + mem);
}
__fi u64 ipuRead64(u32 mem)
__fi RETURNS_R64 ipuRead64(u32 mem)
{
// Note: It's assumed that mem's input value is always in the 0x10002000 page
// of memory (if not, it's probably bad code).
@ -263,7 +263,7 @@ __fi u64 ipuRead64(u32 mem)
if (ipuRegs.cmd.DATA & 0xffffff)
IPU_LOG("read64: IPU_CMD=BUSY=%x, DATA=%08X", ipuRegs.cmd.BUSY ? 1 : 0, ipuRegs.cmd.DATA);
return ipuRegs.cmd._u64;
return r64_load(&ipuRegs.cmd._u64);
}
ipucase(IPU_CTRL):
@ -282,7 +282,7 @@ __fi u64 ipuRead64(u32 mem)
IPU_LOG("read64: Unknown=%x", mem);
break;
}
return psHu64(IPU_CMD + mem);
return r64_load(&psHu64(IPU_CMD + mem));
}
void ipuSoftReset()

View File

@ -289,7 +289,7 @@ extern int coded_block_pattern;
extern void ipuReset();
extern u32 ipuRead32(u32 mem);
extern u64 ipuRead64(u32 mem);
extern RETURNS_R64 ipuRead64(u32 mem);
extern bool ipuWrite32(u32 mem,u32 value);
extern bool ipuWrite64(u32 mem,u64 value);

View File

@ -220,13 +220,13 @@ static mem32_t __fastcall nullRead32(u32 mem) {
MEM_LOG("Read uninstalled memory at address %08x", mem);
return 0;
}
static void __fastcall nullRead64(u32 mem, mem64_t *out) {
static RETURNS_R64 nullRead64(u32 mem) {
MEM_LOG("Read uninstalled memory at address %08x", mem);
*out = 0;
return r64_zero();
}
static void __fastcall nullRead128(u32 mem, mem128_t *out) {
static RETURNS_R128 nullRead128(u32 mem) {
MEM_LOG("Read uninstalled memory at address %08x", mem);
ZeroQWC(out);
return r128_zero();
}
static void __fastcall nullWrite8(u32 mem, mem8_t value)
{
@ -324,12 +324,12 @@ static mem32_t __fastcall _ext_memRead32(u32 mem)
}
template<int p>
static void __fastcall _ext_memRead64(u32 mem, mem64_t *out)
static RETURNS_R64 _ext_memRead64(u32 mem)
{
switch (p)
{
case 6: // gsm
*out = gsRead64(mem); return;
return r64_from_u64(gsRead64(mem));
default: break;
}
@ -338,15 +338,14 @@ static void __fastcall _ext_memRead64(u32 mem, mem64_t *out)
}
template<int p>
static void __fastcall _ext_memRead128(u32 mem, mem128_t *out)
static RETURNS_R128 _ext_memRead128(u32 mem)
{
switch (p)
{
//case 1: // hwm
// hwRead128(mem & ~0xa0000000, out); return;
// return hwRead128(mem & ~0xa0000000);
case 6: // gsm
CopyQWC(out,PS2GS_BASE(mem));
return;
return r128_load(PS2GS_BASE(mem));
default: break;
}
@ -475,19 +474,19 @@ template<int vunum> static mem32_t __fc vuMicroRead32(u32 addr) {
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return *(u32*)&vu->Micro[addr];
}
template<int vunum> static void __fc vuMicroRead64(u32 addr,mem64_t* data) {
template<int vunum> static RETURNS_R64 vuMicroRead64(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
*data=*(u64*)&vu->Micro[addr];
return r64_load(&vu->Micro[addr]);
}
template<int vunum> static void __fc vuMicroRead128(u32 addr,mem128_t* data) {
template<int vunum> static RETURNS_R128 vuMicroRead128(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
CopyQWC(data,&vu->Micro[addr]);
return r128_load(&vu->Micro[addr]);
}
// Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per
@ -578,17 +577,17 @@ template<int vunum> static mem32_t __fc vuDataRead32(u32 addr) {
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
return *(u32*)&vu->Mem[addr];
}
template<int vunum> static void __fc vuDataRead64(u32 addr, mem64_t* data) {
template<int vunum> static RETURNS_R64 vuDataRead64(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
*data=*(u64*)&vu->Mem[addr];
return r64_load(&vu->Mem[addr]);
}
template<int vunum> static void __fc vuDataRead128(u32 addr, mem128_t* data) {
template<int vunum> static RETURNS_R128 vuDataRead128(u32 addr) {
VURegs* vu = vunum ? &VU1 : &VU0;
addr &= vunum ? 0x3fff: 0xfff;
if (vunum && THREAD_VU1) vu1Thread.WaitVU();
CopyQWC(data,&vu->Mem[addr]);
return r128_load(&vu->Mem[addr]);
}
// VU Data Memory Writes...

View File

@ -136,11 +136,11 @@ extern void mmap_ResetBlockTracking();
#define memWrite16 vtlb_memWrite<mem16_t>
#define memWrite32 vtlb_memWrite<mem32_t>
static __fi void memRead64(u32 mem, mem64_t* out) { vtlb_memRead64(mem, out); }
static __fi void memRead64(u32 mem, mem64_t& out) { vtlb_memRead64(mem, &out); }
static __fi void memRead64(u32 mem, mem64_t* out) { _mm_storel_epi64((__m128i*)out, vtlb_memRead64(mem)); }
static __fi void memRead64(u32 mem, mem64_t& out) { memRead64(mem, &out); }
static __fi void memRead128(u32 mem, mem128_t* out) { vtlb_memRead128(mem, out); }
static __fi void memRead128(u32 mem, mem128_t& out) { vtlb_memRead128(mem, &out); }
static __fi void memRead128(u32 mem, mem128_t* out) { _mm_store_si128((__m128i*)out, vtlb_memRead128(mem)); }
static __fi void memRead128(u32 mem, mem128_t& out) { memRead128(mem, &out); }
static __fi void memWrite64(u32 mem, const mem64_t* val) { vtlb_memWrite64(mem, val); }
static __fi void memWrite64(u32 mem, const mem64_t& val) { vtlb_memWrite64(mem, &val); }

107
pcsx2/SingleRegisterTypes.h Normal file
View File

@ -0,0 +1,107 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// --------------------------------------------------------------------------------------
// r64 / r128 - Types that are guaranteed to fit in one register
// --------------------------------------------------------------------------------------
// Note: Recompilers rely on some of these types and the registers they allocate to,
// so be careful if you want to change them
#pragma once
#include <immintrin.h>
// Can't stick them in structs because it breaks calling convention things, yay
using r64 = __m128i;
using r128 = __m128i;
// Calling convention setting, yay
#define RETURNS_R64 r64 __vectorcall
#define RETURNS_R128 r128 __vectorcall
#define TAKES_R64 __vectorcall
#define TAKES_R128 __vectorcall
// And since we can't stick them in structs, we get lots of static methods, yay!
__forceinline static r64 r64_load(const void* ptr)
{
return _mm_loadl_epi64(reinterpret_cast<const r64*>(ptr));
}
__forceinline static r64 r64_zero()
{
return _mm_setzero_si128();
}
__forceinline static r64 r64_from_u32(u32 val)
{
return _mm_cvtsi32_si128(val);
}
__forceinline static r64 r64_from_u32x2(u32 lo, u32 hi)
{
return _mm_unpacklo_epi32(_mm_cvtsi32_si128(lo), _mm_cvtsi32_si128(hi));
}
__forceinline static r64 r64_from_u64(u64 val)
{
#ifdef _M_X86_64
return _mm_cvtsi64_si128(val);
#else
return r64_from_u32x2(val, val >> 64);
#endif
}
__forceinline static r128 r128_load(const void* ptr)
{
return _mm_load_si128(reinterpret_cast<const r128*>(ptr));
}
__forceinline static r128 r128_zero()
{
return _mm_setzero_si128();
}
/// Expects that r64 came from r64-handling code, and not from a recompiler or something
__forceinline static r128 r128_from_r64_clean(r64 val)
{
return val;
}
__forceinline static r128 r128_from_u32x4(u32 lo0, u32 lo1, u32 hi0, u32 hi1)
{
return _mm_setr_epi32(lo0, lo1, hi0, hi1);
}
template <typename u>
struct rhelper;
template <>
struct rhelper<u64>
{
using r = r64;
__forceinline static r load(void* ptr) { return r64_load(ptr); }
__forceinline static r zero() { return r64_zero(); }
};
template <>
struct rhelper<u128>
{
using r = r128;
__forceinline static r load(void* ptr) { return r128_load(ptr); }
__forceinline static r zero() { return r128_zero(); }
};
template <typename u>
using u_to_r = typename rhelper<u>::r;

View File

@ -952,6 +952,7 @@
<ClInclude Include="Dump.h" />
<ClInclude Include="IopCommon.h" />
<ClInclude Include="SaveState.h" />
<ClInclude Include="SingleRegisterTypes.h" />
<ClInclude Include="System.h" />
<ClInclude Include="System\SysThreads.h" />
<ClInclude Include="System\RecTypes.h" />

View File

@ -1687,6 +1687,9 @@
<ClInclude Include="SaveState.h">
<Filter>System\Include</Filter>
</ClInclude>
<ClInclude Include="SingleRegisterTypes.h">
<Filter>System\Include</Filter>
</ClInclude>
<ClInclude Include="System.h">
<Filter>System\Include</Filter>
</ClInclude>

View File

@ -41,8 +41,8 @@
template< uint page > extern mem8_t __fastcall hwRead8 (u32 mem);
template< uint page > extern mem16_t __fastcall hwRead16 (u32 mem);
template< uint page > extern mem32_t __fastcall hwRead32 (u32 mem);
template< uint page > extern void __fastcall hwRead64 (u32 mem, mem64_t* out );
template< uint page > extern void __fastcall hwRead128(u32 mem, mem128_t* out);
template< uint page > extern RETURNS_R64 hwRead64 (u32 mem);
template< uint page > extern RETURNS_R128 hwRead128(u32 mem);
// Internal hwRead32 which does not log reads, used by hwWrite8/16 to perform
// read-modify-write operations.

View File

@ -164,7 +164,7 @@ DataType __fastcall vtlb_memRead(u32 addr)
return 0; // technically unreachable, but suppresses warnings.
}
void __fastcall vtlb_memRead64(u32 mem, mem64_t *out)
RETURNS_R64 vtlb_memRead64(u32 mem)
{
auto vmv = vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
@ -173,22 +173,22 @@ void __fastcall vtlb_memRead64(u32 mem, mem64_t *out)
if (!CHECK_EEREC) {
if(CHECK_CACHE && CheckCache(mem))
{
*out = readCache64(mem);
return;
return readCache64(mem);
}
}
*out = *(mem64_t*)vmv.assumePtr(mem);
return r64_load(reinterpret_cast<const void*>(vmv.assumePtr(mem)));
}
else
{
//has to: translate, find function, call function
u32 paddr = vmv.assumeHandlerGetPAddr(mem);
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
vmv.assumeHandler<64, false>()(paddr, out);
return vmv.assumeHandler<64, false>()(paddr);
}
}
void __fastcall vtlb_memRead128(u32 mem, mem128_t *out)
RETURNS_R128 vtlb_memRead128(u32 mem)
{
auto vmv = vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
@ -198,20 +198,18 @@ void __fastcall vtlb_memRead128(u32 mem, mem128_t *out)
{
if(CHECK_CACHE && CheckCache(mem))
{
out->lo = readCache64(mem);
out->hi = readCache64(mem+8);
return;
return readCache128(mem);
}
}
CopyQWC(out,(void*)vmv.assumePtr(mem));
return r128_load(reinterpret_cast<const void*>(vmv.assumePtr(mem)));
}
else
{
//has to: translate, find function, call function
u32 paddr = vmv.assumeHandlerGetPAddr(mem);
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
vmv.assumeHandler<128, false>()(paddr, out);
return vmv.assumeHandler<128, false>()(paddr);
}
}
@ -436,28 +434,28 @@ static __ri void vtlb_BusError(u32 addr,u32 mode)
}
template<typename OperandType, u32 saddr>
OperandType __fastcall vtlbUnmappedVReadSm(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; }
OperandType __fastcall vtlbUnmappedVReadSm(u32 addr) { vtlb_Miss(addr|saddr,0); return 0; }
template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedVReadLg(u32 addr,OperandType* data) { vtlb_Miss(addr|saddr,0); }
u_to_r<OperandType> __vectorcall vtlbUnmappedVReadLg(u32 addr) { vtlb_Miss(addr|saddr,0); return rhelper<OperandType>::zero(); }
template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedVWriteSm(u32 addr,OperandType data) { vtlb_Miss(addr|saddr,1); }
void __fastcall vtlbUnmappedVWriteSm(u32 addr,OperandType data) { vtlb_Miss(addr|saddr,1); }
template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedVWriteLg(u32 addr,const OperandType* data) { vtlb_Miss(addr|saddr,1); }
void __fastcall vtlbUnmappedVWriteLg(u32 addr,const OperandType* data) { vtlb_Miss(addr|saddr,1); }
template<typename OperandType, u32 saddr>
OperandType __fastcall vtlbUnmappedPReadSm(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; }
OperandType __fastcall vtlbUnmappedPReadSm(u32 addr) { vtlb_BusError(addr|saddr,0); return 0; }
template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedPReadLg(u32 addr,OperandType* data) { vtlb_BusError(addr|saddr,0); }
u_to_r<OperandType> __vectorcall vtlbUnmappedPReadLg(u32 addr) { vtlb_BusError(addr|saddr,0); return rhelper<OperandType>::zero(); }
template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedPWriteSm(u32 addr,OperandType data) { vtlb_BusError(addr|saddr,1); }
void __fastcall vtlbUnmappedPWriteSm(u32 addr,OperandType data) { vtlb_BusError(addr|saddr,1); }
template<typename OperandType, u32 saddr>
void __fastcall vtlbUnmappedPWriteLg(u32 addr,const OperandType* data) { vtlb_BusError(addr|saddr,1); }
void __fastcall vtlbUnmappedPWriteLg(u32 addr,const OperandType* data) { vtlb_BusError(addr|saddr,1); }
// --------------------------------------------------------------------------------------
// VTLB mapping errors
@ -484,14 +482,16 @@ static mem32_t __fastcall vtlbDefaultPhyRead32(u32 addr)
return 0;
}
static void __fastcall vtlbDefaultPhyRead64(u32 addr, mem64_t* dest)
static __m128i __vectorcall vtlbDefaultPhyRead64(u32 addr)
{
pxFailDev(pxsFmt("(VTLB) Attempted read64 from unmapped physical address @ 0x%08X.", addr));
return r64_zero();
}
static void __fastcall vtlbDefaultPhyRead128(u32 addr, mem128_t* dest)
static __m128i __vectorcall vtlbDefaultPhyRead128(u32 addr)
{
pxFailDev(pxsFmt("(VTLB) Attempted read128 from unmapped physical address @ 0x%08X.", addr));
return r128_zero();
}
static void __fastcall vtlbDefaultPhyWrite8(u32 addr, mem8_t data)

View File

@ -16,6 +16,7 @@
#pragma once
#include "MemoryTypes.h"
#include "SingleRegisterTypes.h"
#include "common/PageFaultSource.h"
@ -25,8 +26,8 @@ static const uptr VTLB_AllocUpperBounds = _1gb * 2;
typedef mem8_t __fastcall vtlbMemR8FP(u32 addr);
typedef mem16_t __fastcall vtlbMemR16FP(u32 addr);
typedef mem32_t __fastcall vtlbMemR32FP(u32 addr);
typedef void __fastcall vtlbMemR64FP(u32 addr,mem64_t* data);
typedef void __fastcall vtlbMemR128FP(u32 addr,mem128_t* data);
typedef RETURNS_R64 vtlbMemR64FP(u32 addr);
typedef RETURNS_R128 vtlbMemR128FP(u32 addr);
// Specialized function pointers for each write type
typedef void __fastcall vtlbMemW8FP(u32 addr,mem8_t data);
@ -87,8 +88,8 @@ extern void vtlb_VMapUnmap(u32 vaddr,u32 sz);
template< typename DataType >
extern DataType __fastcall vtlb_memRead(u32 mem);
extern void __fastcall vtlb_memRead64(u32 mem, mem64_t *out);
extern void __fastcall vtlb_memRead128(u32 mem, mem128_t *out);
extern RETURNS_R64 vtlb_memRead64(u32 mem);
extern RETURNS_R128 vtlb_memRead128(u32 mem);
template< typename DataType >
extern void __fastcall vtlb_memWrite(u32 mem, DataType value);
@ -97,10 +98,10 @@ extern void __fastcall vtlb_memWrite128(u32 mem, const mem128_t* value);
extern void vtlb_DynGenWrite(u32 sz);
extern void vtlb_DynGenRead32(u32 bits, bool sign);
extern void vtlb_DynGenRead64(u32 sz);
extern int vtlb_DynGenRead64(u32 sz, int gpr);
extern void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const );
extern void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const );
extern int vtlb_DynGenRead64_Const( u32 bits, u32 addr_const, int gpr );
extern void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const );
// --------------------------------------------------------------------------------------

View File

@ -107,10 +107,8 @@ void recLoad64(u32 bits, bool sign)
// Load arg2 with the destination.
// 64/128 bit modes load the result directly into the cpuRegs.GPR struct.
if (_Rt_)
xLEA(arg2reg, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
else
xLEA(arg2reg, ptr[&dummyValue[0]]);
int gprreg = ((bits == 128) && _Rt_) ? _Rt_ : -1;
int reg;
if (GPR_IS_CONST1(_Rs_))
{
@ -121,7 +119,7 @@ void recLoad64(u32 bits, bool sign)
_eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0);
vtlb_DynGenRead64_Const(bits, srcadr);
reg = vtlb_DynGenRead64_Const(bits, srcadr, gprreg);
}
else
{
@ -134,9 +132,17 @@ void recLoad64(u32 bits, bool sign)
_eeOnLoadWrite(_Rt_);
_deleteEEreg(_Rt_, 0);
iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(bits);
iFlushCall(FLUSH_FULLVTLB);
reg = vtlb_DynGenRead64(bits, gprreg);
}
if (gprreg == -1)
{
if (_Rt_)
xMOVQ(ptr64[&cpuRegs.GPR.r[_Rt_].UL[0]], xRegisterSSE(reg));
_freeXMMreg(reg);
}
}
@ -458,14 +464,14 @@ void recLDL()
return;
#ifdef LOADSTORE_RECOMPILE
xLEA(arg2reg, ptr128[&dummyValue[0]]);
int t2reg;
if (GPR_IS_CONST1(_Rs_))
{
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
srcadr &= ~0x07;
vtlb_DynGenRead64_Const(64, srcadr);
t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
}
else
{
@ -478,13 +484,12 @@ void recLDL()
iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(64);
t2reg = vtlb_DynGenRead64(64, -1);
}
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE);
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
if (GPR_IS_CONST1(_Rs_))
{
@ -502,7 +507,7 @@ void recLDL()
}
xCMP(eax, 8);
xForwardJE32 skip;
xForwardJE8 skip;
//Calculate the shift from top bit to lowest
xMOV(edx, 64);
xSHL(eax, 3);
@ -512,18 +517,13 @@ void recLDL()
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
xMOVDQA(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
xMOVDZX(xRegisterSSE(t1reg), edx);
xMOVQZX(xRegisterSSE(t0reg), ptr64[&dummyValue[0]]);
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xPOR(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t0reg));
xForwardJump32 full;
skip.SetTarget();
xPSLL.Q(xRegisterSSE(t2reg), xRegisterSSE(t1reg));
xPOR(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
xMOVL.PS(xRegisterSSE(rtreg), ptr128[&dummyValue[0]]);
full.SetTarget();
skip.SetTarget();
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t2reg));
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
@ -546,14 +546,14 @@ void recLDR()
return;
#ifdef LOADSTORE_RECOMPILE
xLEA(arg2reg, ptr128[&dummyValue[0]]);
int t2reg;
if (GPR_IS_CONST1(_Rs_))
{
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
srcadr &= ~0x07;
vtlb_DynGenRead64_Const(64, srcadr);
t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
}
else
{
@ -566,13 +566,12 @@ void recLDR()
iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(64);
t2reg = vtlb_DynGenRead64(64, -1);
}
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE);
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
if (GPR_IS_CONST1(_Rs_))
{
@ -599,18 +598,13 @@ void recLDR()
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
xMOVQZX(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
xMOVDZX(xRegisterSSE(t1reg), eax); //shift*8
xMOVQZX(xRegisterSSE(t0reg), ptr64[&dummyValue[0]]);
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xPOR(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t0reg));
xForwardJump32 full;
skip.SetTarget();
xPSRL.Q(xRegisterSSE(t2reg), xRegisterSSE(t1reg));
xPOR(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
xMOVL.PS(xRegisterSSE(rtreg), ptr128[&dummyValue[0]]);
full.SetTarget();
skip.SetTarget();
xMOVSD(xRegisterSSE(rtreg), xRegisterSSE(t2reg));
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
@ -631,14 +625,13 @@ void recLDR()
void recSDL()
{
#ifdef LOADSTORE_RECOMPILE
xLEA(arg2reg, ptr128[&dummyValue[0]]);
int t2reg;
if (GPR_IS_CONST1(_Rs_))
{
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
srcadr &= ~0x07;
vtlb_DynGenRead64_Const(64, srcadr);
t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
}
else
{
@ -651,7 +644,7 @@ void recSDL()
iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(64);
t2reg = vtlb_DynGenRead64(64, -1);
}
_flushEEreg(_Rt_); // flush register to mem
@ -675,7 +668,7 @@ void recSDL()
}
xCMP(eax, 8);
xForwardJE32 skip;
xForwardJE8 skip;
//Calculate the shift from top bit to lowest
xMOV(edx, 64);
xSHL(eax, 3);
@ -684,8 +677,7 @@ void recSDL()
xMOVDZX(xRegisterSSE(t1reg), eax);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xMOVQZX(xRegisterSSE(t1reg), ptr64[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower!
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
// Shift over reg value (shift, PSLL.Q multiplies by 8)
xMOVDZX(xRegisterSSE(t1reg), edx);
@ -698,6 +690,7 @@ void recSDL()
_deleteGPRtoXMMreg(_Rt_, 3);
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
_freeXMMreg(t2reg);
xLEA(arg2reg, ptr128[&dummyValue[0]]);
@ -733,14 +726,13 @@ void recSDL()
void recSDR()
{
#ifdef LOADSTORE_RECOMPILE
xLEA(arg2reg, ptr128[&dummyValue[0]]);
int t2reg;
if (GPR_IS_CONST1(_Rs_))
{
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
srcadr &= ~0x07;
vtlb_DynGenRead64_Const(64, srcadr);
t2reg = vtlb_DynGenRead64_Const(64, srcadr, -1);
}
else
{
@ -753,7 +745,7 @@ void recSDR()
iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(64);
t2reg = vtlb_DynGenRead64(64, -1);
}
_flushEEreg(_Rt_); // flush register to mem
@ -776,7 +768,7 @@ void recSDR()
}
xCMP(eax, 0);
xForwardJE32 skip;
xForwardJE8 skip;
//Calculate the shift from top bit to lowest
xMOV(edx, 64);
xSHL(eax, 3);
@ -785,8 +777,7 @@ void recSDR()
xMOVDZX(xRegisterSSE(t1reg), edx);
xPCMP.EQD(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xMOVQZX(xRegisterSSE(t1reg), ptr64[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower!
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
// Shift over reg value (shift, PSLL.Q multiplies by 8)
xMOVDZX(xRegisterSSE(t1reg), eax);
@ -799,6 +790,7 @@ void recSDR()
_deleteGPRtoXMMreg(_Rt_, 3);
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
_freeXMMreg(t2reg);
xLEA(arg2reg, ptr128[&dummyValue[0]]);
@ -931,16 +923,13 @@ void recLQC2()
skip.SetTarget();
skipvuidle.SetTarget();
if (_Rt_)
xLEA(arg2reg, ptr[&VU0.VF[_Ft_].UD[0]]);
else
xLEA(arg2reg, ptr[&dummyValue[0]]);
int gpr;
if (GPR_IS_CONST1(_Rs_))
{
int addr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
vtlb_DynGenRead64_Const(128, addr);
gpr = vtlb_DynGenRead64_Const(128, addr, -1);
}
else
{
@ -950,9 +939,14 @@ void recLQC2()
iFlushCall(FLUSH_FULLVTLB);
vtlb_DynGenRead64(128);
gpr = vtlb_DynGenRead64(128, -1);
}
if (_Rt_)
xMOVAPS(ptr128[&VU0.VF[_Ft_].UD[0]], xRegisterSSE(gpr));
_freeXMMreg(gpr);
EE::Profiler.EmitOp(eeOpcode::LQC2);
}

View File

@ -172,6 +172,8 @@ namespace vtlb_private
// ------------------------------------------------------------------------
static void DynGen_DirectRead(u32 bits, bool sign)
{
pxAssert(bits == 8 || bits == 16 || bits == 32);
switch (bits)
{
case 8:
@ -192,15 +194,24 @@ namespace vtlb_private
xMOV(eax, ptr[arg1reg]);
break;
jNO_DEFAULT
}
}
static void DynGen_DirectRead64(u32 bits)
{
pxAssert(bits == 64 || bits == 128);
switch (bits) {
case 64:
iMOV64_Smart(ptr[arg2reg], ptr[arg1reg]);
xMOVQZX(xmm0, ptr64[arg1reg]);
break;
case 128:
iMOV128_SSE(ptr[arg2reg], ptr[arg1reg]);
xMOVAPS(xmm0, ptr128[arg1reg]);
break;
jNO_DEFAULT
jNO_DEFAULT
}
}
@ -375,16 +386,18 @@ static void vtlb_SetWriteback(u32* writeback)
//////////////////////////////////////////////////////////////////////////////////////////
// Dynarec Load Implementations
void vtlb_DynGenRead64(u32 bits)
int vtlb_DynGenRead64(u32 bits, int gpr)
{
pxAssume(bits == 64 || bits == 128);
u32* writeback = DynGen_PrepRegs();
int reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
DynGen_IndirectDispatch(0, bits);
DynGen_DirectRead(bits, false);
DynGen_DirectRead64(bits);
vtlb_SetWriteback(writeback); // return target for indirect's call/ret
return reg;
}
// ------------------------------------------------------------------------
@ -406,25 +419,27 @@ void vtlb_DynGenRead32(u32 bits, bool sign)
// ------------------------------------------------------------------------
// TLB lookup is performed in const, with the assumption that the COP0/TLB will clear the
// recompiler if the TLB is changed.
void vtlb_DynGenRead64_Const(u32 bits, u32 addr_const)
int vtlb_DynGenRead64_Const(u32 bits, u32 addr_const, int gpr)
{
EE::Profiler.EmitConstMem(addr_const);
int reg;
auto vmv = vtlbdata.vmap[addr_const >> VTLB_PAGE_BITS];
if (!vmv.isHandler(addr_const))
{
auto ppf = vmv.assumePtr(addr_const);
void* ppf = reinterpret_cast<void*>(vmv.assumePtr(addr_const));
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, -1) : _allocGPRtoXMMreg(-1, gpr, MODE_WRITE);
switch (bits)
{
case 64:
iMOV64_Smart(ptr[arg2reg], ptr[(void*)ppf]);
xMOVQZX(xRegisterSSE(reg), ptr64[ppf]);
break;
case 128:
iMOV128_SSE(ptr[arg2reg], ptr[(void*)ppf]);
xMOVAPS(xRegisterSSE(reg), ptr128[ppf]);
break;
jNO_DEFAULT
jNO_DEFAULT
}
}
else
@ -440,8 +455,10 @@ void vtlb_DynGenRead64_Const(u32 bits, u32 addr_const)
}
iFlushCall(FLUSH_FULLVTLB);
reg = gpr == -1 ? _allocTempXMMreg(XMMT_INT, 0) : _allocGPRtoXMMreg(0, gpr, MODE_WRITE); // Handler returns in xmm0
xFastCall(vmv.assumeHandlerGetRaw(szidx, 0), paddr, arg2reg);
}
return reg;
}
// ------------------------------------------------------------------------