PCSX2/EEcore:

* Now using SSE for all hardware register reads and writes (mainly MFIFO stuff) [don't expect a speedup, really -- its more of a code simplification in this case].
 * [refactoring] Changed the EE Memory (vtlb) to use the u128 type instead of u64 for the 128-bit loads/stores (see mem128_t typedef)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@3626 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2010-08-09 15:42:13 +00:00
parent c425ece009
commit 20adde44a6
14 changed files with 170 additions and 134 deletions

View File

@ -102,9 +102,9 @@ typedef s32 sptr;
#endif
#endif
//////////////////////////////////////////////////////////////////////////////////////////
// A rough-and-ready cross platform 128-bit datatype, Non-SSE style.
//
// --------------------------------------------------------------------------------------
// u128 / s128 - A rough-and-ready cross platform 128-bit datatype, Non-SSE style.
// --------------------------------------------------------------------------------------
// Note: These structs don't provide any additional constructors because C++ doesn't allow
// the use of datatypes with constructors in unions (and since unions aren't the primary
// uses of these types, that means we can't have constructors). Embedded functions for
@ -129,6 +129,20 @@ struct u128
u128 retval = { src, 0 };
return retval;
}
operator u32() const { return (u32)lo; }
operator u16() const { return (u16)lo; }
operator u8() const { return (u8)lo; }
bool operator==( const u128& right ) const
{
return (lo == right.lo) && (hi == right.hi);
}
bool operator!=( const u128& right ) const
{
return (lo != right.lo) && (hi != right.hi);
}
};
struct s128
@ -149,6 +163,20 @@ struct s128
s128 retval = { src, (src < 0) ? -1 : 0 };
return retval;
}
operator u32() const { return (s32)lo; }
operator u16() const { return (s16)lo; }
operator u8() const { return (s8)lo; }
bool operator==( const s128& right ) const
{
return (lo == right.lo) && (hi == right.hi);
}
bool operator!=( const s128& right ) const
{
return (lo != right.lo) && (hi != right.hi);
}
};
#else

View File

@ -43,17 +43,16 @@
//////////////////////////////////////////////////////////////////////////
// ReadFIFO Pages
void __fastcall ReadFIFO_page_4(u32 mem, u64 *out)
void __fastcall ReadFIFO_page_4(u32 mem, mem128_t* out)
{
pxAssert( (mem >= VIF0_FIFO) && (mem < VIF1_FIFO) );
VIF_LOG("ReadFIFO/VIF0 0x%08X", mem);
out[0] = psHu64(VIF0_FIFO);
out[1] = psHu64(VIF0_FIFO + 8);
CopyQWC( out, &psHu128(VIF0_FIFO) );
}
void __fastcall ReadFIFO_page_5(u32 mem, u64 *out)
void __fastcall ReadFIFO_page_5(u32 mem, mem128_t* out)
{
pxAssert( (mem >= VIF1_FIFO) && (mem < GIF_FIFO) );
@ -80,18 +79,16 @@ void __fastcall ReadFIFO_page_5(u32 mem, u64 *out)
}
}
out[0] = psHu64(VIF1_FIFO);
out[1] = psHu64(VIF1_FIFO + 8);
CopyQWC( out, &psHu128(VIF1_FIFO) );
}
void __fastcall ReadFIFO_page_6(u32 mem, u64 *out)
void __fastcall ReadFIFO_page_6(u32 mem, mem128_t* out)
{
pxAssert( (mem >= GIF_FIFO) && (mem < IPUout_FIFO) );
DevCon.Warning( "ReadFIFO/GIF, addr=0x%x", mem );
out[0] = psHu64(GIF_FIFO);
out[1] = psHu64(GIF_FIFO + 8);
CopyQWC( out, &psHu128(GIF_FIFO) );
}
// ReadFIFO_page_7 is contained in IPU_Fifo.cpp
@ -105,8 +102,7 @@ void __fastcall WriteFIFO_page_4(u32 mem, const mem128_t *value)
VIF_LOG("WriteFIFO/VIF0, addr=0x%08X", mem);
psHu64(VIF0_FIFO) = value[0];
psHu64(VIF0_FIFO + 8) = value[1];
CopyQWC(&psHu128(VIF0_FIFO), value);
vif0ch->qwc += 1;
if(vif0.irqoffset != 0 && vif0.vifstalled == true) DevCon.Warning("Offset on VIF0 FIFO start!");
@ -130,8 +126,7 @@ void __fastcall WriteFIFO_page_5(u32 mem, const mem128_t *value)
VIF_LOG("WriteFIFO/VIF1, addr=0x%08X", mem);
psHu64(VIF1_FIFO) = value[0];
psHu64(VIF1_FIFO + 8) = value[1];
CopyQWC(&psHu128(VIF1_FIFO), value);
if (vif1Regs->stat.FDR)
DevCon.Warning("writing to fifo when fdr is set!");
@ -168,15 +163,10 @@ void __fastcall WriteFIFO_page_6(u32 mem, const mem128_t *value)
pxAssert( (mem >= GIF_FIFO) && (mem < IPUout_FIFO) );
GIF_LOG("WriteFIFO/GIF, addr=0x%08X", mem);
psHu64(GIF_FIFO) = value[0];
psHu64(GIF_FIFO + 8) = value[1];
CopyQWC(&psHu128(GIF_FIFO), value);
CopyQWC(nloop0_packet, value);
nloop0_packet[0] = psHu32(GIF_FIFO);
nloop0_packet[1] = psHu32(GIF_FIFO + 4);
nloop0_packet[2] = psHu32(GIF_FIFO + 8);
nloop0_packet[3] = psHu32(GIF_FIFO + 12);
GetMTGS().PrepDataPacket(GIF_PATH_3, 1);
//u64* data = (u64*)GetMTGS().GetDataPacketPtr();
GIFPath_CopyTag( GIF_PATH_3, (u128*)nloop0_packet, 1 );
GetMTGS().SendDataPacket();
if(GSTransferStatus.PTH3 == STOPPED_MODE && gifRegs->stat.APATH == GIF_APATH3 )

View File

@ -346,11 +346,7 @@ void __fastcall gsWrite128_generic( u32 mem, const mem128_t* value )
GIF_LOG("GS Write128 at %8.8lx with data %8.8x_%8.8x_%8.8x_%8.8x", mem,
srcval32[3], srcval32[2], srcval32[1], srcval32[0]);
const uint masked_mem = mem & 0x13ff;
u64* writeTo = (u64*)(&PS2MEM_GS[masked_mem]);
writeTo[0] = value[0];
writeTo[1] = value[1];
CopyQWC(PS2GS_BASE(mem), value);
}
__fi u8 gsRead8(u32 mem)

View File

@ -475,14 +475,14 @@ void __fastcall hwRead64_generic(u32 mem, mem64_t* result )
void __fastcall hwRead128_page_00(u32 mem, mem128_t* result )
{
result[0] = hwRead32_page_00( mem );
result[1] = 0;
result->lo = hwRead32_page_00( mem );
result->hi = 0;
}
void __fastcall hwRead128_page_01(u32 mem, mem128_t* result )
{
result[0] = hwRead32_page_01( mem );
result[1] = 0;
result->lo = hwRead32_page_01( mem );
result->hi = 0;
}
void __fastcall hwRead128_page_02(u32 mem, mem128_t* result )
@ -493,8 +493,6 @@ void __fastcall hwRead128_page_02(u32 mem, mem128_t* result )
void __fastcall hwRead128_generic(u32 mem, mem128_t* out)
{
out[0] = psHu64(mem);
out[1] = psHu64(mem+8);
CopyQWC(out, &psHu128(mem));
UnknownHW_LOG("Hardware Read 128 at %x",mem);
}

View File

@ -1227,25 +1227,27 @@ void __fastcall hwWrite128_generic(u32 mem, const mem128_t *srcval)
{
//hwWrite128( mem, srcval ); return;
const uint srcval32 = *srcval;
switch (mem)
{
case INTC_STAT:
HW_LOG("INTC_STAT Write 64bit %x", (u32)srcval[0]);
psHu32(INTC_STAT) &= ~srcval[0];
HW_LOG("INTC_STAT Write 128bit %x (lower 32bits effective)", srcval32);
psHu32(INTC_STAT) &= ~srcval32;
//cpuTestINTCInts();
break;
case INTC_MASK:
HW_LOG("INTC_MASK Write 64bit %x", (u32)srcval[0]);
psHu32(INTC_MASK) ^= (u16)srcval[0];
HW_LOG("INTC_MASK Write 128bit %x (lower 32bits effective)", srcval32);
psHu32(INTC_MASK) ^= (u16)srcval32;
cpuTestINTCInts();
break;
case DMAC_ENABLEW: // DMAC_ENABLEW
oldvalue = psHu8(DMAC_ENABLEW + 2);
psHu32(DMAC_ENABLEW) = srcval[0];
psHu32(DMAC_ENABLER) = srcval[0];
if (((oldvalue & 0x1) == 1) && (((srcval[0] >> 16) & 0x1) == 0))
psHu32(DMAC_ENABLEW) = srcval32;
psHu32(DMAC_ENABLER) = srcval32;
if (((oldvalue & 0x1) == 1) && (((srcval32 >> 16) & 0x1) == 0))
{
if (!QueuedDMA.empty()) StartQueuedDMA();
}
@ -1257,9 +1259,7 @@ void __fastcall hwWrite128_generic(u32 mem, const mem128_t *srcval)
break;
default:
psHu64(mem ) = srcval[0];
psHu64(mem+8) = srcval[1];
CopyQWC(&psHu128(mem), srcval);
UnknownHW_LOG("Unknown Hardware write 128 at %x with value %x_%x (status=%x)", mem, srcval[1], srcval[0], cpuRegs.CP0.n.Status.val);
break;
}

View File

@ -179,7 +179,7 @@ __fi bool decoder_t::ReadIpuData(u128* out)
return true;
}
void __fastcall ReadFIFO_page_7(u32 mem, u64 *out)
void __fastcall ReadFIFO_page_7(u32 mem, mem128_t* out)
{
pxAssert( (mem >= IPUout_FIFO) && (mem < D0_CHCR) );
@ -188,7 +188,7 @@ void __fastcall ReadFIFO_page_7(u32 mem, u64 *out)
if (mem == 0) // IPUout_FIFO
{
if (decoder.ReadIpuData((u128*)out))
if (decoder.ReadIpuData(out))
{
ipu_fifo.out.readpos = (ipu_fifo.out.readpos + 4) & 31;
}

View File

@ -245,7 +245,7 @@ static void __fastcall nullRead64(u32 mem, mem64_t *out) {
}
static void __fastcall nullRead128(u32 mem, mem128_t *out) {
MEM_LOG("Read uninstalled memory at address %08x", mem);
*out = 0;
ZeroQWC(out);
}
static void __fastcall nullWrite8(u32 mem, mem8_t value)
{
@ -363,8 +363,8 @@ static void __fastcall _ext_memRead128(u32 mem, mem128_t *out)
//case 1: // hwm
// hwRead128(mem & ~0xa0000000, out); return;
case 6: // gsm
out[0] = gsRead64(mem );
out[1] = gsRead64(mem+8); return;
CopyQWC(out,PS2GS_BASE(mem));
return;
}
MEM_LOG("Unknown Memory read128 from address %8.8x", mem);
@ -519,8 +519,7 @@ static void __fastcall vuMicroRead128(u32 addr,mem128_t* data)
addr&=(vunum==0)?0xfff:0x3fff;
VURegs* vu=(vunum==0)?&VU0:&VU1;
data[0]=*(u64*)&vu->Micro[addr];
data[1]=*(u64*)&vu->Micro[addr+8];
CopyQWC(data,&vu->Micro[addr]);
}
// Profiled VU writes: Happen very infrequently, with exception of BIOS initialization (at most twice per
@ -584,11 +583,10 @@ static void __fastcall vuMicroWrite128(u32 addr,const mem128_t* data)
addr &= (vunum==0) ? 0xfff : 0x3fff;
VURegs& vu = (vunum==0) ? VU0 : VU1;
if (*(u64*)&vu.Micro[addr]!=data[0] || *(u64*)&vu.Micro[addr+8]!=data[1])
if ((u128&)vu.Micro[addr] != *data)
{
ClearVuFunc<vunum>(addr&(~7), 16);
*(u64*)&vu.Micro[addr]=data[0];
*(u64*)&vu.Micro[addr+8]=data[1];
CopyQWC(&vu.Micro[addr],data);
}
}

View File

@ -23,6 +23,18 @@
#include "MemoryTypes.h"
#include "vtlb.h"
#include <xmmintrin.h>
static __fi void CopyQWC( void* dest, const void* src )
{
_mm_store_ps( (float*)dest, _mm_load_ps((const float*)src) );
}
static __fi void ZeroQWC( void* dest )
{
_mm_store_ps( (float*)dest, _mm_setzero_ps() );
}
extern u8 *psM; //32mb Main Ram
extern u8 *psR; //4mb rom area
extern u8 *psR1; //256kb rom1 area (actually 196kb, but can't mask this)
@ -68,6 +80,7 @@ extern u8 *psMHW;
#define psHu16(mem) (*(u16*)&PS2MEM_HW[(mem) & 0xffff])
#define psHu32(mem) (*(u32*)&PS2MEM_HW[(mem) & 0xffff])
#define psHu64(mem) (*(u64*)&PS2MEM_HW[(mem) & 0xffff])
#define psHu128(mem)(*(u128*)&PS2MEM_HW[(mem) & 0xffff])
#define psMs8(mem) (*(s8 *)&PS2MEM_BASE[(mem) & 0x1ffffff])
#define psMs16(mem) (*(s16*)&PS2MEM_BASE[(mem) & 0x1ffffff])

View File

@ -35,4 +35,4 @@ typedef u8 mem8_t;
typedef u16 mem16_t;
typedef u32 mem32_t;
typedef u64 mem64_t;
typedef u64 mem128_t;
typedef u128 mem128_t;

View File

@ -50,6 +50,8 @@ extern s32 EEsCycle;
extern u32 EEoCycle;
union GPR_reg { // Declare union type GPR register
u128 UQ;
s128 SQ;
u64 UD[2]; //128 bits
s64 SD[2];
u32 UL[4];

View File

@ -627,9 +627,9 @@ static __aligned16 GPR_reg m_dummy_gpr_zero;
// Returns the x86 address of the requested GPR, which is safe for writing. (includes
// special handling for returning a dummy var for GPR0(zero), so that it's value is
// always preserved)
static u64* gpr_GetWritePtr( uint gpr )
static GPR_reg* gpr_GetWritePtr( uint gpr )
{
return (u64*)(( gpr == 0 ) ? &m_dummy_gpr_zero : &cpuRegs.GPR.r[gpr]);
return (( gpr == 0 ) ? &m_dummy_gpr_zero : &cpuRegs.GPR.r[gpr]);
}
void LD()
@ -639,7 +639,7 @@ void LD()
if( addr & 7 )
throw R5900Exception::AddressError( addr, false );
memRead64(addr, gpr_GetWritePtr(_Rt_));
memRead64(addr, (u64*)gpr_GetWritePtr(_Rt_));
}
static const u64 LDL_MASK[8] =
@ -687,7 +687,7 @@ void LQ()
// an address error due to unaligned access isn't possible like it is on other loads/stores.
u32 addr = cpuRegs.GPR.r[_Rs_].UL[0] + _Imm_;
memRead128(addr & ~0xf, gpr_GetWritePtr(_Rt_));
memRead128(addr & ~0xf, (u128*)gpr_GetWritePtr(_Rt_));
}
void SB()
@ -816,7 +816,7 @@ void SQ()
// an address error due to unaligned access isn't possible like it is on other loads/stores.
u32 addr = cpuRegs.GPR.r[_Rs_].UL[0] + _Imm_;
memWrite128(addr & ~0xf, &cpuRegs.GPR.r[_Rt_].UD[0]);
memWrite128(addr & ~0xf, cpuRegs.GPR.r[_Rt_].UD);
}
/*********************************************************

View File

@ -88,7 +88,7 @@ namespace OpcodeImpl
void LQC2() {
u32 addr = cpuRegs.GPR.r[_Rs_].UL[0] + (s16)cpuRegs.code;
if (_Ft_) {
memRead128(addr, &VU0.VF[_Ft_].UD[0]);
memRead128(addr, VU0.VF[_Ft_].UD);
} else {
u64 val[2];
memRead128(addr, val);
@ -102,7 +102,7 @@ namespace OpcodeImpl
u32 addr = _Imm_ + cpuRegs.GPR.r[_Rs_].UL[0];
//memWrite64(addr, VU0.VF[_Ft_].UD[0]);
//memWrite64(addr+8,VU0.VF[_Ft_].UD[1]);
memWrite128(addr, &VU0.VF[_Ft_].UD[0]);
memWrite128(addr, VU0.VF[_Ft_].UD);
}
}}}

View File

@ -91,38 +91,6 @@ __fi DataType __fastcall MemOp_r0(u32 addr)
return 0; // technically unreachable, but suppresses warnings.
}
// ------------------------------------------------------------------------
// Interpreterd VTLB lookup for 64 and 128 bit accesses.
template<int DataSize,typename DataType>
__fi void MemOp_r1(u32 addr, DataType* data)
{
u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS];
s32 ppf=addr+vmv;
if (!(ppf<0))
{
data[0]=*reinterpret_cast<DataType*>(ppf);
if (DataSize==128)
data[1]=*reinterpret_cast<DataType*>(ppf+8);
}
else
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
//return reinterpret_cast<TemplateHelper<DataSize,false>::HandlerType*>(RWFT[TemplateHelper<DataSize,false>::sidx][0][hand])(paddr,data);
switch( DataSize )
{
case 64: ((vtlbMemR64FP*)vtlbdata.RWFT[3][0][hand])(paddr, data); break;
case 128: ((vtlbMemR128FP*)vtlbdata.RWFT[4][0][hand])(paddr, data); break;
jNO_DEFAULT;
}
}
}
// ------------------------------------------------------------------------
template<int DataSize,typename DataType>
__fi void MemOp_w0(u32 addr, DataType data)
@ -151,35 +119,6 @@ __fi void MemOp_w0(u32 addr, DataType data)
}
}
// ------------------------------------------------------------------------
template<int DataSize,typename DataType>
__fi void MemOp_w1(u32 addr,const DataType* data)
{
verify(DataSize==128 || DataSize==64);
u32 vmv=vtlbdata.vmap[addr>>VTLB_PAGE_BITS];
s32 ppf=addr+vmv;
if (!(ppf<0))
{
*reinterpret_cast<DataType*>(ppf)=*data;
if (DataSize==128)
*reinterpret_cast<DataType*>(ppf+8)=data[1];
}
else
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
switch( DataSize )
{
case 64: return ((vtlbMemW64FP*)vtlbdata.RWFT[3][1][hand])(paddr, data);
case 128: return ((vtlbMemW128FP*)vtlbdata.RWFT[4][1][hand])(paddr, data);
jNO_DEFAULT;
}
}
}
mem8_t __fastcall vtlb_memRead8(u32 mem)
{
return MemOp_r0<8,mem8_t>(mem);
@ -192,14 +131,49 @@ mem32_t __fastcall vtlb_memRead32(u32 mem)
{
return MemOp_r0<32,mem32_t>(mem);
}
void __fastcall vtlb_memRead64(u32 mem, u64 *out)
void __fastcall vtlb_memRead64(u32 mem, mem64_t *out)
{
return MemOp_r1<64,mem64_t>(mem,out);
}
void __fastcall vtlb_memRead128(u32 mem, u64 *out)
u32 vmv=vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
s32 ppf=mem+vmv;
if (!(ppf<0))
{
return MemOp_r1<128,mem128_t>(mem,out);
*out = *(mem64_t*)ppf;
}
else
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
((vtlbMemR64FP*)vtlbdata.RWFT[3][0][hand])(paddr, out);
}
}
void __fastcall vtlb_memRead128(u32 mem, mem128_t *out)
{
u32 vmv=vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
s32 ppf=mem+vmv;
if (!(ppf<0))
{
CopyQWC(out,(void*)ppf);
}
else
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
((vtlbMemR128FP*)vtlbdata.RWFT[4][0][hand])(paddr, out);
}
}
void __fastcall vtlb_memRead128(u32 mem, u64 (&out)[2])
{
vtlb_memRead128(mem, (mem128_t*)out);
}
void __fastcall vtlb_memWrite8 (u32 mem, mem8_t value)
{
MemOp_w0<8,mem8_t>(mem,value);
@ -214,11 +188,45 @@ void __fastcall vtlb_memWrite32(u32 mem, mem32_t value)
}
void __fastcall vtlb_memWrite64(u32 mem, const mem64_t* value)
{
MemOp_w1<64,mem64_t>(mem,value);
u32 vmv=vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
s32 ppf=mem+vmv;
if (!(ppf<0))
{
*(mem64_t*)ppf = *value;
}
else
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
((vtlbMemW64FP*)vtlbdata.RWFT[3][1][hand])(paddr, value);
}
}
void __fastcall vtlb_memWrite128(u32 mem, const mem128_t *value)
{
MemOp_w1<128,mem128_t>(mem,value);
u32 vmv=vtlbdata.vmap[mem>>VTLB_PAGE_BITS];
s32 ppf=mem+vmv;
if (!(ppf<0))
{
CopyQWC((void*)ppf, value);
}
else
{
//has to: translate, find function, call function
u32 hand=(u8)vmv;
u32 paddr=ppf-hand+0x80000000;
//Console.WriteLn("Translated 0x%08X to 0x%08X", addr,paddr);
((vtlbMemW128FP*)vtlbdata.RWFT[4][1][hand])(paddr, value);
}
}
void __fastcall vtlb_memWrite128(u32 mem, const u64 (&out)[2])
{
vtlb_memWrite128(mem, (const mem128_t*)out);
}
// ===========================================================================================

View File

@ -68,13 +68,16 @@ extern void vtlb_VMapUnmap(u32 vaddr,u32 sz);
extern mem8_t __fastcall vtlb_memRead8(u32 mem);
extern mem16_t __fastcall vtlb_memRead16(u32 mem);
extern u32 __fastcall vtlb_memRead32(u32 mem);
extern void __fastcall vtlb_memRead64(u32 mem, u64 *out);
extern void __fastcall vtlb_memRead128(u32 mem, u64 *out);
extern void __fastcall vtlb_memRead64(u32 mem, mem64_t *out);
extern void __fastcall vtlb_memRead128(u32 mem, mem128_t *out);
extern void __fastcall vtlb_memRead128(u32 mem, u64 (&out)[2]);
extern void __fastcall vtlb_memWrite8 (u32 mem, mem8_t value);
extern void __fastcall vtlb_memWrite16(u32 mem, mem16_t value);
extern void __fastcall vtlb_memWrite32(u32 mem, u32 value);
extern void __fastcall vtlb_memWrite64(u32 mem, const u64* value);
extern void __fastcall vtlb_memWrite128(u32 mem, const u64* value);
extern void __fastcall vtlb_memWrite64(u32 mem, const mem64_t* value);
extern void __fastcall vtlb_memWrite128(u32 mem, const mem128_t* value);
extern void __fastcall vtlb_memWrite128(u32 mem, const u64 (&value)[2]);
extern void vtlb_DynGenWrite(u32 sz);
extern void vtlb_DynGenRead32(u32 bits, bool sign);