mirror of https://github.com/PCSX2/pcsx2.git
newVif: some minor changes and cleanups...
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2351 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
747de4ebde
commit
2b3b60511b
|
@ -16,6 +16,7 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef newVif
|
||||
#include "newVif_BlockBuffer.h"
|
||||
#include "x86emitter/x86emitter.h"
|
||||
using namespace x86Emitter;
|
||||
extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0);
|
||||
|
@ -27,6 +28,7 @@ static __pagealigned u8 nVifUpkExec[__pagesize*16];
|
|||
static __aligned16 nVifCall nVifUpk[(2*2*16)*4]; // ([USN][Masking][Unpack Type]) [curCycle]
|
||||
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
|
||||
|
||||
#define _1mb (0x100000)
|
||||
#define _v0 0
|
||||
#define _v1 0x55
|
||||
#define _v2 0xaa
|
||||
|
@ -35,10 +37,25 @@ static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Ve
|
|||
#define aMin(x, y) std::min(x,y)
|
||||
#define _f __forceinline
|
||||
|
||||
#define xShiftR(regX, n) { \
|
||||
if (usn) { xPSRL.D(regX, n); } \
|
||||
else { xPSRA.D(regX, n); } \
|
||||
}
|
||||
struct nVifBlock {
|
||||
u8 upkType; // Unpack Type
|
||||
u8 num; // Num Field
|
||||
u8 mode; // Mode Field
|
||||
u8 cl; // CL Field
|
||||
u8 wl; // WL Field
|
||||
u32 mask; // Mask Field
|
||||
u8* startPtr; // Start Ptr of RecGen Code
|
||||
};
|
||||
|
||||
struct nVifStruct {
|
||||
u32 idx; // VIF0 or VIF1
|
||||
vifStruct* vif; // Vif Struct ptr
|
||||
VIFregisters* vifRegs; // Vif Regs ptr
|
||||
VURegs* VU; // VU Regs ptr
|
||||
u8* vuMemEnd; // End of VU Memory
|
||||
u32 vuMemLimit; // Use for fast AND
|
||||
BlockBuffer* vifCache; // Block Buffer
|
||||
};
|
||||
|
||||
static const u32 nVifT[16] = {
|
||||
4, // S-32
|
||||
|
@ -59,9 +76,10 @@ static const u32 nVifT[16] = {
|
|||
2, // V4-5
|
||||
};
|
||||
|
||||
#include "newVif_BlockBuffer.h"
|
||||
#include "newVif_OldUnpack.inl"
|
||||
#include "newVif_UnpackGen.inl"
|
||||
#include "newVif_Unpack.inl"
|
||||
|
||||
//#include "newVif_Dynarec.inl"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,20 +20,29 @@ private:
|
|||
u32 mSize; // Cur Size
|
||||
u32 mSizeT; // Total Size
|
||||
u8* mData; // Data Ptr
|
||||
void grow(u32 newSize) {
|
||||
u8* temp = new u8[newSize];
|
||||
memcpy(temp, mData, mSizeT);
|
||||
safe_delete( mData );
|
||||
mData = temp;
|
||||
void alloc(int size) {
|
||||
mData = SysMmapEx(NULL, size, 0, "nVif_BlockBuffer");
|
||||
if (!mData) throw Exception::OutOfMemory("nVif Error: Failed to allocate recompiler memory!");
|
||||
memset(mData, 0xcc, size);
|
||||
}
|
||||
void dealloc(u8* &dPtr, int size) {
|
||||
if (dPtr) { HostSys::Munmap(dPtr, size); dPtr = NULL; }
|
||||
}
|
||||
public:
|
||||
BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; mData = new u8[mSizeT]; }
|
||||
virtual ~BlockBuffer() { safe_delete(mData); }
|
||||
BlockBuffer(u32 tSize) { mSizeT = tSize; mSize = 0; alloc(mSizeT); }
|
||||
~BlockBuffer() { dealloc(mData, mSizeT); }
|
||||
void append(void *addr, u32 size) {
|
||||
if (mSize + size > mSizeT) grow(mSize*2 + size);
|
||||
memcpy(&mData[mSize], addr, size);
|
||||
mSize += size;
|
||||
}
|
||||
void grow(u32 newSize) {
|
||||
u8* temp = mData;
|
||||
alloc (newSize);
|
||||
memcpy (mData, temp, mSize);
|
||||
dealloc(temp, mSizeT);
|
||||
mSizeT = newSize;
|
||||
}
|
||||
void clear() { mSize = 0; }
|
||||
u32 getSize() { return mSize; }
|
||||
u8* getBlock() { return mData; }
|
||||
|
|
|
@ -13,20 +13,12 @@
|
|||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// newVif! - author: cottonvibes(@gmail.com)
|
||||
// newVif!
|
||||
// authors: cottonvibes(@gmail.com)
|
||||
// Jake.Stine (@gmail.com)
|
||||
|
||||
#pragma once
|
||||
|
||||
struct nVifStruct {
|
||||
u32 idx; // VIF0 or VIF1
|
||||
vifStruct* vif; // Vif Struct ptr
|
||||
VIFregisters* vifRegs; // Vif Regs ptr
|
||||
VURegs* VU; // VU Regs ptr
|
||||
u8* vuMemEnd; // End of VU Memory
|
||||
u32 vuMemLimit; // Use for fast AND
|
||||
BlockBuffer* vifBlock; // Block Buffer
|
||||
};
|
||||
|
||||
static __aligned16 nVifStruct nVif[2];
|
||||
|
||||
void initNewVif(int idx) {
|
||||
|
@ -34,9 +26,9 @@ void initNewVif(int idx) {
|
|||
nVif[idx].VU = idx ? &VU1 : &VU0;
|
||||
nVif[idx].vif = idx ? &vif1 : &vif0;
|
||||
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
|
||||
nVif[idx].vifBlock = new BlockBuffer(0x2000); // 8kb Block Buffer
|
||||
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
|
||||
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
|
||||
nVif[idx].vifCache = NULL;
|
||||
|
||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
|
||||
memset8<0xcc>( nVifUpkExec );
|
||||
|
@ -197,7 +189,7 @@ void _nVifUnpack(int idx, u8 *data, u32 size) {
|
|||
|
||||
vif = nVif[idx].vif;
|
||||
vifRegs = nVif[idx].vifRegs;
|
||||
const bool doMode = !!vifRegs->mode;
|
||||
const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10);
|
||||
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
||||
|
||||
//UnpackLoopTable[idx][doMode][isFill]( data, size );
|
||||
|
@ -219,18 +211,3 @@ void _nVifUnpack(int idx, u8 *data, u32 size) {
|
|||
//DevCon.WriteLn("%s Write! [mask = %08x][type = %02d][num = %d]", (isFill?"Filling":"Skipping"), vifRegs->mask, upkNum, vifRegs->num);
|
||||
}
|
||||
}
|
||||
|
||||
//data += ft.gsize;
|
||||
//size -= ft.gsize;
|
||||
//vifRegs->num--;
|
||||
//else {
|
||||
// //DevCon.WriteLn("SSE Unpack!");
|
||||
// int c = aMin((cycleSize - vif->cl), 3);
|
||||
// size -= vift * c;
|
||||
// //if (c>1) { DevCon.WriteLn("C > 1!"); }
|
||||
// if (c<0||c>3) { DbgCon.WriteLn("C wtf!"); }
|
||||
// if (size < 0) { DbgCon.WriteLn("Size Shit"); size+=vift*c;c=1;size-=vift*c;}
|
||||
// fnbase[(aMin(vif->cl, 4) * 4) + c-1](dest, data);
|
||||
// data += vift * c;
|
||||
// vifRegs->num -= c;
|
||||
//}
|
|
@ -15,47 +15,22 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#define xMaskWrite(regX, x) { \
|
||||
if (x==0) xMOVAPS(xmm7, ptr32[ecx]); \
|
||||
if (x==1) xMOVAPS(xmm7, ptr32[ecx+0x10]); \
|
||||
if (x==2) xMOVAPS(xmm7, ptr32[ecx+0x20]); \
|
||||
int offX = aMin(curCycle+x, 4); \
|
||||
#define xMaskWrite(regX) { \
|
||||
xMOVAPS(xmm7, ptr32[ecx]); \
|
||||
int offX = aMin(curCycle, 4); \
|
||||
xPAND(regX, ptr32[nVifMask[0][offX]]); \
|
||||
xPAND(xmm7, ptr32[nVifMask[1][offX]]); \
|
||||
xPOR (regX, ptr32[nVifMask[2][offX]]); \
|
||||
xPOR (regX, xmm7); \
|
||||
if (x==0) xMOVAPS(ptr32[ecx], regX); \
|
||||
if (x==1) xMOVAPS(ptr32[ecx+0x10], regX); \
|
||||
if (x==2) xMOVAPS(ptr32[ecx+0x20], regX); \
|
||||
xMOVAPS(ptr32[ecx], regX); \
|
||||
}
|
||||
|
||||
#define xMovDest(reg0) { \
|
||||
if (mask==0) { xMOVAPS (ptr32[ecx], reg0); } \
|
||||
else { xMaskWrite(reg0, 0); } \
|
||||
#define xMovDest(regX) { \
|
||||
if (mask==0) { xMOVAPS (ptr32[ecx], regX); } \
|
||||
else { xMaskWrite(regX); } \
|
||||
}
|
||||
|
||||
// xmm2 gets result
|
||||
void convertRGB() {
|
||||
xPSLL.D (xmm1, 3); // ABG|R5.000
|
||||
xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // ABG
|
||||
xPSLL.D (xmm1, 3); // AB|G5.000
|
||||
xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // AB
|
||||
xPSLL.D (xmm1, 3); // A|B5.000
|
||||
xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // A
|
||||
xPSLL.D (xmm1, 7); // A.0000000
|
||||
|
||||
xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A
|
||||
xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G
|
||||
xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B
|
||||
mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R
|
||||
mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R
|
||||
mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R
|
||||
|
||||
xPSLL.D (xmm2, 24); // can optimize to
|
||||
xPSRL.D (xmm2, 24); // single AND...
|
||||
#define xShiftR(regX, n) { \
|
||||
if (usn) { xPSRL.D(regX, n); } \
|
||||
else { xPSRA.D(regX, n); } \
|
||||
}
|
||||
|
||||
struct VifUnpackIndexer {
|
||||
|
@ -186,7 +161,24 @@ void nVifGen(int usn, int mask, int curCycle) {
|
|||
indexer.xSetCall(0xf); // V4-5
|
||||
xMOV16 (xmm0, ptr32[edx]);
|
||||
xMOVAPS (xmm1, xmm0);
|
||||
convertRGB();
|
||||
xPSLL.D (xmm1, 3); // ABG|R5.000
|
||||
xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // ABG
|
||||
xPSLL.D (xmm1, 3); // AB|G5.000
|
||||
xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // AB
|
||||
xPSLL.D (xmm1, 3); // A|B5.000
|
||||
xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits)
|
||||
xPSRL.D (xmm1, 8); // A
|
||||
xPSLL.D (xmm1, 7); // A.0000000
|
||||
xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A
|
||||
xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G
|
||||
xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B
|
||||
mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R
|
||||
mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R
|
||||
mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R
|
||||
xPSLL.D (xmm2, 24); // can optimize to
|
||||
xPSRL.D (xmm2, 24); // single AND...
|
||||
xMovDest (xmm2);
|
||||
xRET();
|
||||
|
||||
|
|
Loading…
Reference in New Issue