mirror of https://github.com/PCSX2/pcsx2.git
experimental new vif unpack rewrite... (currently disabled and slow)
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2345 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
dbb6898cd6
commit
79ee87a90f
|
@ -313,15 +313,25 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef newVif1
|
||||
extern void initNewVif(int idx);
|
||||
extern int nVifUnpack(int idx, u32 *data);
|
||||
static int testVif = 0;
|
||||
#endif
|
||||
static int __fastcall Vif1TransUnpack(u32 *data)
|
||||
{
|
||||
#ifdef newVif1
|
||||
if (!testVif) { initNewVif(1); testVif = 1; }
|
||||
//int temp = nVifUnpack(1, data);
|
||||
//if (temp >= 0) return temp;
|
||||
return nVifUnpack(1, data);
|
||||
#endif
|
||||
XMMRegisters::Freeze();
|
||||
|
||||
if (vif1.vifpacketsize < vif1.tag.size)
|
||||
{
|
||||
int ret = vif1.tag.size;
|
||||
/* size is less that the total size, transfer is 'in pieces' */
|
||||
// size is less that the total size, transfer is 'in pieces'
|
||||
if (vif1Regs->offset != 0 || vif1.cl != 0)
|
||||
{
|
||||
vif1.tag.size -= vif1.vifpacketsize - VIFalign<1>(data, &vif1.tag, vif1.vifpacketsize);
|
||||
|
|
|
@ -460,11 +460,13 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
|||
}
|
||||
return size>>2;
|
||||
}
|
||||
|
||||
#include "newVif.h"
|
||||
#ifndef newVif
|
||||
template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
|
||||
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
|
||||
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||
{
|
||||
//DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
||||
u32 *dest;
|
||||
u32 unpackType;
|
||||
UNPACKFUNCTYPE func;
|
||||
|
@ -786,6 +788,7 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif // #ifndef newVif
|
||||
|
||||
template void vuExecMicro<0>(u32 addr);
|
||||
template void vuExecMicro<1>(u32 addr);
|
||||
|
|
|
@ -60,4 +60,7 @@ static __forceinline u32 vif_size(u8 num)
|
|||
return (num == 0) ? 0x1000 : 0x4000;
|
||||
}
|
||||
|
||||
//#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
|
||||
//#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
|
||||
//#define newVif0 // Use New Code for Vif0 Unpacks (not implemented)
|
||||
#endif
|
||||
|
|
|
@ -836,6 +836,30 @@
|
|||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="newVif"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\x86\newVif.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\newVif_BlockBuffer.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\newVif_OldUnpack.inl"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\newVif_Unpack.inl"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\newVif_UnpackGen.inl"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="SPR"
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef newVif
|
||||
#include "x86emitter/x86emitter.h"
|
||||
using namespace x86Emitter;
|
||||
extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0);
|
||||
extern void _nVifUnpack(int idx, u8 *data, u32 size);
|
||||
|
||||
struct instBlock { u8 data[16*64]; };
|
||||
static __pagealigned instBlock nVifUpk[2][2][4][3][16]; // [USN][Masking][curCycle][CyclesToWrite-1][Unpack Type]
|
||||
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
|
||||
typedef u32 (__fastcall *nVifCall)(void*, void*);
|
||||
#define nVifUnpackF(dest, src, usn, doMask, curCycle, cycles, unpackType) { \
|
||||
(((nVifCall)((void*)&nVifUpk[usn][doMask][curCycle][cycles][unpackType]))(dest, src)); \
|
||||
}
|
||||
|
||||
#define _v0 0
|
||||
#define _v1 0x55
|
||||
#define _v2 0xaa
|
||||
#define _v3 0xff
|
||||
#define aMax(x, y) (((x) > (y) ? (x) : (y)))
|
||||
#define aMin(x, y) (((x) < (y) ? (x) : (y)))
|
||||
#define _f __forceinline
|
||||
|
||||
#define xShiftR(regX, n) { \
|
||||
if (usn) { xPSRL.D(regX, n); } \
|
||||
else { xPSRA.D(regX, n); } \
|
||||
}
|
||||
|
||||
u32 nVifT[16] = {
|
||||
4, // S-32
|
||||
2, // S-16
|
||||
1, // S-8
|
||||
0, // ----
|
||||
8, // V2-32
|
||||
4, // V2-16
|
||||
2, // V2-8
|
||||
0, // ----
|
||||
12,// V3-32
|
||||
6, // V3-16
|
||||
3, // V3-8
|
||||
0, // ----
|
||||
16,// V4-32
|
||||
8, // V4-16
|
||||
4, // V4-8
|
||||
2, // V4-5
|
||||
};
|
||||
|
||||
#include "newVif_BlockBuffer.h"
|
||||
#include "newVif_OldUnpack.inl"
|
||||
#include "newVif_UnpackGen.inl"
|
||||
#include "newVif_Unpack.inl"
|
||||
|
||||
#endif
|
|
@ -0,0 +1,40 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
class BlockBuffer {
|
||||
private:
|
||||
u32 mSize; // Cur Size
|
||||
u32 mSizeT; // Total Size
|
||||
u8* mData; // Data Ptr
|
||||
void grow(u32 newSize) {
|
||||
u8* temp = new u8[newSize];
|
||||
memcpy(temp, mData, mSizeT);
|
||||
safe_delete( mData );
|
||||
mData = temp;
|
||||
}
|
||||
public:
|
||||
BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; mData = new u8[mSizeT]; }
|
||||
virtual ~BlockBuffer() { safe_delete(mData); }
|
||||
void append(void *addr, u32 size) {
|
||||
if (mSize + size > mSizeT) grow(mSize*2 + size);
|
||||
memcpy(&mData[mSize], addr, size);
|
||||
mSize += size;
|
||||
}
|
||||
void clear() { mSize = 0; }
|
||||
u32 getSize() { return mSize; }
|
||||
u8* getBlock() { return mData; }
|
||||
};
|
|
@ -0,0 +1,167 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Old Vif Unpack Code
|
||||
// Only here for testing/reference
|
||||
// If newVif is defined and newVif1 isn't, vif1 will use this code
|
||||
// same goes for vif0...
|
||||
template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
|
||||
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
|
||||
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
||||
UNPACKFUNCTYPE func;
|
||||
const VIFUnpackFuncTable *ft;
|
||||
VURegs * VU;
|
||||
u8 *cdata = (u8*)data;
|
||||
u32 tempsize = 0;
|
||||
const u32 memlimit = vif_size(VIFdmanum);
|
||||
|
||||
if (VIFdmanum == 0) {
|
||||
VU = &VU0;
|
||||
vifRegs = vif0Regs;
|
||||
vifMaskRegs = g_vif0Masks;
|
||||
vif = &vif0;
|
||||
vifRow = g_vifmask.Row0;
|
||||
}
|
||||
else {
|
||||
VU = &VU1;
|
||||
vifRegs = vif1Regs;
|
||||
vifMaskRegs = g_vif1Masks;
|
||||
vif = &vif1;
|
||||
vifRow = g_vifmask.Row1;
|
||||
}
|
||||
|
||||
u32 *dest = (u32*)(VU->Mem + v->addr);
|
||||
u32 unpackType = v->cmd & 0xf;
|
||||
|
||||
ft = &VIFfuncTable[ unpackType ];
|
||||
func = vif->usn ? ft->funcU : ft->funcS;
|
||||
size <<= 2;
|
||||
|
||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
|
||||
if (v->addr >= memlimit) {
|
||||
DevCon.Warning("Overflown at the start");
|
||||
v->addr &= (memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
|
||||
size = min(size, (int)vifRegs->num * ft->gsize); //size will always be the same or smaller
|
||||
|
||||
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
|
||||
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
|
||||
|
||||
//Sanity Check (memory overflow)
|
||||
if (tempsize > memlimit) {
|
||||
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
|
||||
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
|
||||
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
|
||||
DevCon.WriteLn("what!!!!!!!!!");
|
||||
//tempsize = 0;
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
else {
|
||||
DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
tempsize = size;
|
||||
size = 0;
|
||||
}
|
||||
if (tempsize) {
|
||||
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
||||
size = 0;
|
||||
int addrstart = v->addr;
|
||||
//if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
|
||||
|
||||
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
|
||||
|
||||
while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) {
|
||||
if(v->addr >= memlimit) {
|
||||
DevCon.Warning("Mem limit overflow");
|
||||
v->addr &= (memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
cdata += ft->gsize;
|
||||
tempsize -= ft->gsize;
|
||||
|
||||
vifRegs->num--;
|
||||
vif->cl++;
|
||||
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
dest += incdest;
|
||||
v->addr +=(incdest * 4);
|
||||
vif->cl = 0;
|
||||
}
|
||||
else {
|
||||
dest += 4;
|
||||
v->addr += 16;
|
||||
}
|
||||
}
|
||||
if (v->addr >= memlimit) {
|
||||
v->addr &=(memlimit - 1);
|
||||
dest = (u32*)(VU->Mem + v->addr);
|
||||
}
|
||||
v->addr = addrstart;
|
||||
if(tempsize > 0) size = tempsize;
|
||||
}
|
||||
|
||||
if (size >= ft->dsize && vifRegs->num > 0) { //Else write what we do have
|
||||
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
|
||||
VIF_LOG("warning, end with size = %d", size);
|
||||
// unpack one qword
|
||||
//v->addr += (size / ft->dsize) * 4;
|
||||
func(dest, (u32*)cdata, size / ft->dsize);
|
||||
size = 0;
|
||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
|
||||
}
|
||||
}
|
||||
else { // filling write
|
||||
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
|
||||
if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
|
||||
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
||||
|
||||
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
|
||||
while (vifRegs->num > 0) {
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
vif->cl = 0;
|
||||
}
|
||||
// unpack one qword
|
||||
if (vif->cl < vifRegs->cycle.cl) {
|
||||
if(size < ft->gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
cdata += ft->gsize;
|
||||
size -= ft->gsize;
|
||||
vif->cl++;
|
||||
vifRegs->num--;
|
||||
if (vif->cl == vifRegs->cycle.wl) {
|
||||
vif->cl = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
func(dest, (u32*)cdata, ft->qsize);
|
||||
v->addr += 16;
|
||||
vifRegs->num--;
|
||||
vif->cl++;
|
||||
}
|
||||
dest += 4;
|
||||
if (vifRegs->num == 0) break;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,190 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// newVif! - author: cottonvibes(@gmail.com)
|
||||
|
||||
#pragma once
|
||||
|
||||
struct nVifStruct {
|
||||
u32 idx; // VIF0 or VIF1
|
||||
vifStruct* vif; // Vif Struct ptr
|
||||
VIFregisters* vifRegs; // Vif Regs ptr
|
||||
VURegs* VU; // VU Regs ptr
|
||||
u8* vuMemEnd; // End of VU Memory
|
||||
u32 vuMemLimit; // Use for fast AND
|
||||
BlockBuffer* vifBlock; // Block Buffer
|
||||
};
|
||||
nVifStruct nVif[2];
|
||||
|
||||
void initNewVif(int idx) {
|
||||
nVif[idx].idx = idx;
|
||||
nVif[idx].VU = idx ? &VU1 : &VU0;
|
||||
nVif[idx].vif = idx ? &vif1 : &vif0;
|
||||
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
|
||||
nVif[idx].vifBlock = new BlockBuffer(0x2000); // 8kb Block Buffer
|
||||
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
|
||||
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
|
||||
memset_8<0xcc,sizeof(nVifUpk)>(nVifUpk);
|
||||
for (int a = 0; a < 2; a++) {
|
||||
for (int b = 0; b < 2; b++) {
|
||||
for (int c = 0; c < 4; c++) {
|
||||
for (int d = 0; d < 3; d++) {
|
||||
nVifGen(a, b, c, d); //nVifUpk[2][2][4][3][16];
|
||||
}}}}
|
||||
}
|
||||
|
||||
int nVifUnpack(int idx, u32 *data) {
|
||||
XMMRegisters::Freeze();
|
||||
BlockBuffer* vB = nVif[idx].vifBlock;
|
||||
int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
||||
vif1.tag.size -= ret;
|
||||
_nVifUnpack(idx, (u8*)data, ret<<2);
|
||||
if (vif1.tag.size <= 0) vif1.tag.size = 0;
|
||||
if (vif1.tag.size <= 0) vif1.cmd = 0;
|
||||
XMMRegisters::Thaw();
|
||||
return ret;
|
||||
}
|
||||
|
||||
_f u8* setVUptr(int idx, int offset) {
|
||||
return (u8*)(nVif[idx].VU->Mem + (offset & nVif[idx].vuMemLimit));
|
||||
}
|
||||
|
||||
_f void incVUptr(int idx, u8* &ptr, int amount) {
|
||||
ptr += amount;
|
||||
int diff = ptr - nVif[idx].vuMemEnd;
|
||||
if (diff >= 0) {
|
||||
ptr = nVif[idx].VU->Mem + diff;
|
||||
}
|
||||
if ((uptr)ptr & 0xf) DevCon.WriteLn("unaligned wtf :(");
|
||||
}
|
||||
|
||||
_f void setMasks(VIFregisters* v) {
|
||||
for (int i = 0; i < 16; i++) {
|
||||
int m = (v->mask >> (i*2)) & 3;
|
||||
switch (m) {
|
||||
case 0: // Data
|
||||
nVifMask[0][i/4][i%4] = 0xffffffff;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = 0;
|
||||
break;
|
||||
case 1: // Row
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = ((u32*)&v->r0)[(i%4)*4];
|
||||
break;
|
||||
case 2: // Col
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0;
|
||||
nVifMask[2][i/4][i%4] = ((u32*)&v->c0)[(i/4)*4];
|
||||
break;
|
||||
case 3: // Write Protect
|
||||
nVifMask[0][i/4][i%4] = 0;
|
||||
nVifMask[1][i/4][i%4] = 0xffffffff;
|
||||
nVifMask[2][i/4][i%4] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_f void _nVifUnpack(int idx, u8 *data, u32 size) {
|
||||
/*if (nVif[idx].vifRegs->cycle.cl >= nVif[idx].vifRegs->cycle.wl) { // skipping write
|
||||
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
|
||||
else VIFunpack<1>((u32*)data, &vif1.tag, size>>2);
|
||||
return;
|
||||
}
|
||||
else*/ { // filling write
|
||||
vif = nVif[idx].vif;
|
||||
vifRegs = nVif[idx].vifRegs;
|
||||
int isFill = !!(vifRegs->cycle.cl < vifRegs->cycle.wl);
|
||||
int usn = !!(vif->usn);
|
||||
int doMask = !!(vif->tag.cmd & 0x10);
|
||||
int upkNum = vif->tag.cmd & 0xf;
|
||||
int doMode = !!(vifRegs->mode);
|
||||
if (doMask) setMasks(vifRegs);
|
||||
|
||||
//if (isFill)
|
||||
//DevCon.WriteLn("%s Write! [num = %d][%s]", (isFill?"Filling":"Skipping"), vifRegs->num, (vifRegs->num%3 ? "bad!" : "ok"));
|
||||
//DevCon.WriteLn("%s Write! [mask = %08x][type = %02d][num = %d]", (isFill?"Filling":"Skipping"), vifRegs->mask, upkNum, vifRegs->num);
|
||||
|
||||
u8* dest = setVUptr(idx, vif->tag.addr);
|
||||
const VIFUnpackFuncTable* ft = &VIFfuncTable[vif->tag.cmd & 0xf];
|
||||
UNPACKFUNCTYPE func = vif->usn ? ft->funcU : ft->funcS;
|
||||
int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
|
||||
int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
|
||||
//vif->cl = 0;
|
||||
while (vifRegs->num > 0) {
|
||||
if (vif->cl >= blockSize) {
|
||||
vif->cl = 0;
|
||||
}
|
||||
if (vif->cl < cycleSize) {
|
||||
if (size <= 0) { DevCon.WriteLn("_nVifUnpack: Out of Data!"); break; }
|
||||
if (doMode /*|| doMask*/) {
|
||||
//if (doMask)
|
||||
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
|
||||
func((u32*)dest, (u32*)data, ft->qsize);
|
||||
data += ft->gsize;
|
||||
size -= ft->gsize;
|
||||
vifRegs->num--;
|
||||
}
|
||||
else if (1) {
|
||||
//DevCon.WriteLn("SSE Unpack!");
|
||||
nVifUnpackF(dest, data, usn, doMask, aMin(vif->cl, 4), 0, upkNum);
|
||||
data += nVifT[upkNum];
|
||||
size -= nVifT[upkNum];
|
||||
vifRegs->num--;
|
||||
}
|
||||
else {
|
||||
//DevCon.WriteLn("SSE Unpack!");
|
||||
int c = aMin((cycleSize - vif->cl), 3);
|
||||
int t = nVifT[upkNum];
|
||||
size -= t * c;
|
||||
//if (c>1) { DevCon.WriteLn("C > 1!"); }
|
||||
if (c<0||c>3) { DevCon.WriteLn("C wtf!"); }
|
||||
if (size < 0) { DevCon.WriteLn("Size Shit"); size+=t*c;c=1;size-=t*c;}
|
||||
nVifUnpackF(dest, data, usn, doMask, aMin(vif->cl, 4), c-1, upkNum);
|
||||
data += t * c;
|
||||
vifRegs->num -= c;
|
||||
}
|
||||
}
|
||||
else if (isFill) {
|
||||
func((u32*)dest, (u32*)data, ft->qsize);
|
||||
vifRegs->num--;
|
||||
}
|
||||
incVUptr(idx, dest, 16);
|
||||
vif->cl = (vif->cl+1) % blockSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//int nVifUnpack(int idx, u32 *data) {
|
||||
// XMMRegisters::Freeze();
|
||||
// BlockBuffer* vB = nVif[idx].vifBlock;
|
||||
// int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
||||
// //vB->append(data, ret<<2);
|
||||
// vif1.tag.size -= ret;
|
||||
// //DevCon.WriteLn("2 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
|
||||
// //if (vif1.tag.size <= 0) {
|
||||
// //DevCon.WriteLn("3 [0x%x][%d][%d]", vif1.tag.addr, vB->getSize(), vif1.tag.size<<2);
|
||||
// //VIFunpack<1>(vB->getBlock(), &vif1.tag, vB->getSize()>>2);
|
||||
// //_nVifUnpack(idx, vB->getBlock(), vB->getSize());
|
||||
// _nVifUnpack(idx, (u8*)data, ret<<2);
|
||||
// if (vif1.tag.size <= 0) vif1.tag.size = 0;
|
||||
// if (vif1.tag.size <= 0) vif1.cmd = 0;
|
||||
// //vB->clear();
|
||||
// //}
|
||||
// //else { vif1.tag.size+=ret; ret = -1; vB->clear(); }
|
||||
// XMMRegisters::Thaw();
|
||||
// return ret;
|
||||
//}
|
|
@ -0,0 +1,223 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#define xMaskWrite(regX, x) { \
|
||||
if (x==0) xMOVAPS(xmm7, ptr32[ecx]); \
|
||||
if (x==1) xMOVAPS(xmm7, ptr32[ecx+0x10]); \
|
||||
if (x==2) xMOVAPS(xmm7, ptr32[ecx+0x20]); \
|
||||
int offX = aMin(curCycle+x, 4); \
|
||||
xPAND(regX, ptr32[nVifMask[0][offX]]); \
|
||||
xPAND(xmm7, ptr32[nVifMask[1][offX]]); \
|
||||
xPOR (regX, ptr32[nVifMask[2][offX]]); \
|
||||
xPOR (regX, xmm7); \
|
||||
if (x==0) xMOVAPS(ptr32[ecx], regX); \
|
||||
if (x==1) xMOVAPS(ptr32[ecx+0x10], regX); \
|
||||
if (x==2) xMOVAPS(ptr32[ecx+0x20], regX); \
|
||||
}
|
||||
|
||||
#define xMovDest(reg0, reg1, reg2) { \
|
||||
if (mask==0) { \
|
||||
if (cycles>=0) { xMOVAPS (ptr32[ecx], reg0); } \
|
||||
if (cycles>=1) { xMOVAPS (ptr32[ecx+0x10], reg1); } \
|
||||
if (cycles>=2) { xMOVAPS (ptr32[ecx+0x20], reg2); } \
|
||||
} \
|
||||
else { \
|
||||
if (cycles>=0) { xMaskWrite(reg0, 0); } \
|
||||
if (cycles>=1) { xMaskWrite(reg1, 1); } \
|
||||
if (cycles>=2) { xMaskWrite(reg2, 2); } \
|
||||
} \
|
||||
}
|
||||
|
||||
// xmm2 gets result
|
||||
void convertRGB() {
|
||||
xPSLL.D (xmm1, 3); // ABG|R5.000
|
||||
xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // ABG
|
||||
xPSLL.D (xmm1, 3); // AB|G5.000
|
||||
xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // AB
|
||||
xPSLL.D (xmm1, 3); // A|B5.000
|
||||
xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // A
|
||||
xPSLL.D (xmm1, 7); // A.0000000
|
||||
|
||||
xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A
|
||||
xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G
|
||||
xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B
|
||||
mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R
|
||||
mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R
|
||||
mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R
|
||||
|
||||
xPSLL.D (xmm2, 24); // can optimize to
|
||||
xPSRL.D (xmm2, 24); // single AND...
|
||||
}
|
||||
|
||||
// ecx = dest, edx = src
|
||||
void nVifGen(int usn, int mask, int curCycle, int cycles) {
|
||||
HostSys::MemProtect(nVifUpk, sizeof(nVifUpk), Protect_ReadWrite, false);
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x0]); // S-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x1]); // S-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x2]); // S-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=0) xPSHUF.D (xmm1, xmm0, _v0);
|
||||
if (cycles>=1) xPSHUF.D (xmm2, xmm0, _v1);
|
||||
if (cycles>=2) xPSHUF.D (xmm3, xmm0, _v2);
|
||||
if (cycles>=0) xMovDest (xmm1, xmm2, xmm3);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x3]); // ----
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x4]); // V2-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x10]);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x5]); // V2-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x6]); // V2-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=2) xPSHUF.D (xmm2, xmm0, _v2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=1) xPSHUF.D (xmm1, xmm0, 0xe);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x7]); // ----
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x8]); // V3-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+12]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+24]);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0x9]); // V3-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+6]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+12]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=1) xShiftR (xmm1, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0xa]); // V3-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+3]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+6]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=1) xShiftR (xmm1, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0xb]); // ----
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0xc]); // V4-32
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0xd]); // V4-16
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+0x10]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+0x20]);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 16);
|
||||
if (cycles>=1) xShiftR (xmm1, 16);
|
||||
if (cycles>=2) xShiftR (xmm2, 16);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0xe]); // V4-8
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=1) xMOVUPS (xmm1, ptr32[edx+4]);
|
||||
if (cycles>=2) xMOVUPS (xmm2, ptr32[edx+8]);
|
||||
if (cycles>=0) xPUNPCK.LBW(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LBW(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LBW(xmm2, xmm2);
|
||||
if (cycles>=0) xPUNPCK.LWD(xmm0, xmm0);
|
||||
if (cycles>=1) xPUNPCK.LWD(xmm1, xmm1);
|
||||
if (cycles>=2) xPUNPCK.LWD(xmm2, xmm2);
|
||||
if (cycles>=0) xShiftR (xmm0, 24);
|
||||
if (cycles>=1) xShiftR (xmm1, 24);
|
||||
if (cycles>=2) xShiftR (xmm2, 24);
|
||||
if (cycles>=0) xMovDest (xmm0, xmm1, xmm2);
|
||||
xRET();
|
||||
|
||||
// A | B5 | G5 | R5
|
||||
// ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000
|
||||
xSetPtr(&nVifUpk[usn][mask][curCycle][cycles][0xf]); // V4-5
|
||||
if (cycles>=0) xMOVUPS (xmm0, ptr32[edx]);
|
||||
if (cycles>=0) xMOVAPS (xmm1, xmm0);
|
||||
if (cycles>=0) convertRGB();
|
||||
if (cycles>=0) xMOVAPS (ptr32[ecx], xmm2);
|
||||
if (cycles>=1) xMOVAPS (xmm1, xmm0);
|
||||
if (cycles>=1) xPSRL.D (xmm1, 16);
|
||||
if (cycles>=1) convertRGB();
|
||||
if (cycles>=1) xMOVAPS (ptr32[ecx+0x10], xmm2);
|
||||
if (cycles>=2) xPSHUF.D (xmm1, xmm0, _v1);
|
||||
if (cycles>=2) convertRGB();
|
||||
if (cycles>=2) xMOVAPS (ptr32[ecx+0x20], xmm2);
|
||||
xRET();
|
||||
HostSys::MemProtect(nVifUpk, sizeof(nVifUpk), Protect_ReadOnly, true);
|
||||
}
|
Loading…
Reference in New Issue