From 83081605b4e429c518f831f184bc49d524bca01b Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Thu, 30 Jul 2009 00:05:42 +0000 Subject: [PATCH 01/18] microVU: fixed a bug git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1583 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Compile.inl | 1 - 1 file changed, 1 deletion(-) diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 3c57ced0cd..92a1757e69 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -371,7 +371,6 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { mVUsetupRange(mVU, startPC, 1); // Reset regAlloc - mVU->regAlloc->flushAll(); mVU->regAlloc->reset(); // First Pass From a0828bc8f9a22493cdd4d1b5b3c3aecc89659281 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Thu, 30 Jul 2009 00:21:33 +0000 Subject: [PATCH 02/18] GSdx: - Trippled the number of cached textures, many games constantly recreated them - Don't clear some shaders at each drawcall (in dx10) , which is a nice speedup (but could potentially be bad, please check..) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1584 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSDevice.cpp | 2 +- plugins/GSdx/GSDevice10.cpp | 2 +- plugins/GSdx/GSDevice11.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSDevice.cpp b/plugins/GSdx/GSDevice.cpp index d023f45649..cd1317f2e3 100644 --- a/plugins/GSdx/GSDevice.cpp +++ b/plugins/GSdx/GSDevice.cpp @@ -127,7 +127,7 @@ void GSDevice::Recycle(GSTexture* t) { m_pool.push_front(t); - while(m_pool.size() > 200) + while(m_pool.size() > 600) { delete m_pool.back(); diff --git a/plugins/GSdx/GSDevice10.cpp b/plugins/GSdx/GSDevice10.cpp index 29e584d8a1..bba647ccce 100644 --- a/plugins/GSdx/GSDevice10.cpp +++ b/plugins/GSdx/GSDevice10.cpp @@ -269,7 +269,7 @@ void GSDevice10::DrawPrimitive() void GSDevice10::EndScene() { - PSSetShaderResources(NULL, NULL); + //PSSetShaderResources(NULL, NULL); // not clearing the rt/ds gives a little fps boost in complex games (5-10%) diff --git a/plugins/GSdx/GSDevice11.cpp b/plugins/GSdx/GSDevice11.cpp index 679b5016ec..10fb8d546e 100644 --- a/plugins/GSdx/GSDevice11.cpp +++ b/plugins/GSdx/GSDevice11.cpp @@ -268,7 +268,7 @@ void GSDevice11::DrawPrimitive() void GSDevice11::EndScene() { - PSSetShaderResources(NULL, NULL); + //PSSetShaderResources(NULL, NULL); // not clearing the rt/ds gives a little fps boost in complex games (5-10%) From b470a9ae31c7508824126c8ee4532a50e633be05 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Thu, 30 Jul 2009 01:35:25 +0000 Subject: [PATCH 03/18] microVU: Saw that SSE4.1 has ptest, and I wanted to try it out xD untested though cuz I don't have an SSE4.1 cpu :) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1585 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86_legacy_instructions.h | 1 + pcsx2/x86/ix86/ix86_legacy_sse.cpp | 1 + pcsx2/x86/microVU_Lower.inl | 29 +++++++++++++---------- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/pcsx2/x86/ix86/ix86_legacy_instructions.h b/pcsx2/x86/ix86/ix86_legacy_instructions.h index 1fe98cb92a..a026464e66 100644 --- a/pcsx2/x86/ix86/ix86_legacy_instructions.h +++ b/pcsx2/x86/ix86/ix86_legacy_instructions.h @@ -1364,6 +1364,7 @@ extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PTEST_XMM_to_XMM(x86SSERegType to, x86SSERegType from); //********************* // 3DNOW instructions * diff --git a/pcsx2/x86/ix86/ix86_legacy_sse.cpp b/pcsx2/x86/ix86/ix86_legacy_sse.cpp index 66074ca087..684316edaa 100644 --- a/pcsx2/x86/ix86/ix86_legacy_sse.cpp +++ b/pcsx2/x86/ix86/ix86_legacy_sse.cpp @@ -374,6 +374,7 @@ emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) { xBLEND.VPS( xRegisterSSE(to), (void*)from ); } emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMOVSX.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PTEST_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPTEST(xRegisterSSE(to), xRegisterSSE(from)); } emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index fb017f23e8..d760b0c4a7 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -26,20 +26,25 @@ // DIV/SQRT/RSQRT //------------------------------------------------------------------ -#define testZero(xmmReg, xmmTemp, gprTemp) { \ - SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \ - SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if zero */ \ - SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmTemp); /* Move the sign bits */ \ - TEST32ItoR(gprTemp, 1); /* Test "Is Zero" bit */ \ +// Test if Vector is +/- Zero +#define testZero(xmmReg, xmmTemp, gprTemp) { \ + SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); \ + SSE_CMPEQSS_XMM_to_XMM(xmmTemp, xmmReg); \ + if (!cpucaps.hasStreamingSIMD4Extensions) { \ + SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmTemp); \ + TEST32ItoR(gprTemp, 1); \ + } \ + else SSE4_PTEST_XMM_to_XMM(xmmTemp, xmmTemp); \ } -#define testNeg(xmmReg, gprTemp, aJump) { \ - SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \ - TEST32ItoR(gprTemp, 1); /* Check sign bit */ \ - aJump = JZ8(0); /* Skip if positive */ \ - MOV32ItoM((uptr)&mVU->divFlag, divI); /* Set Invalid Flags */ \ - SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \ - x86SetJ8(aJump); \ +// Test if Vector is Negative (Set Flags and Makes Positive) +#define testNeg(xmmReg, gprTemp, aJump) { \ + SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \ + TEST32ItoR(gprTemp, 1); \ + aJump = JZ8(0); \ + MOV32ItoM((uptr)&mVU->divFlag, divI); \ + SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); \ + x86SetJ8(aJump); \ } mVUop(mVU_DIV) { From 3d18ddf16ba330f34df2acdf085f89b3c0afc001 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Thu, 30 Jul 2009 01:50:18 +0000 Subject: [PATCH 04/18] minor fix for broken compiling git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1586 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/ix86/ix86_simd.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/x86/ix86/ix86_simd.cpp b/pcsx2/x86/ix86/ix86_simd.cpp index 2c1c18f469..f65af2cd2d 100644 --- a/pcsx2/x86/ix86/ix86_simd.cpp +++ b/pcsx2/x86/ix86/ix86_simd.cpp @@ -116,7 +116,7 @@ const SimdImpl_DestRegEither<0x66,0xef> xPXOR; // [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag // only if all bits in the result are 0. PTEST also sets the CF flag according // to the following condition: (xmm2/m128 AND NOT xmm1) == 0; -extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; +const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; const SimdImpl_Compare xCMPEQ; const SimdImpl_Compare xCMPLT; From d1f8bf4d71d215a9754078ee31b91e6c0e3d4996 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 30 Jul 2009 22:41:36 +0000 Subject: [PATCH 05/18] Some cleanup around iCore. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1587 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Gif.cpp | 2 +- pcsx2/IopHw.cpp | 2 +- pcsx2/x86/iCore.cpp | 8 +- pcsx2/x86/iCore.h | 85 ++++++++++++------ pcsx2/x86/ix86-32/iCore-32.cpp | 151 ++++++++++++++++++++------------ pcsx2/x86/ix86-32/iR5900-32.cpp | 2 - 6 files changed, 155 insertions(+), 95 deletions(-) diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index fb29f830b5..5f531da3e9 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -41,7 +41,7 @@ enum gifstate_t // Should be a gifstate_t rather then int, but I don't feel like possibly interfering with savestates right now. static int gifstate = GIF_STATE_READY; -static u64 s_gstag = 0; // used for querying the last tag +//static u64 s_gstag = 0; // used for querying the last tag // This should be a bool, as should the return value of hwDmacSrcChainWithStack. // Next time I feel like breaking the save state, it will be. --arcum42 diff --git a/pcsx2/IopHw.cpp b/pcsx2/IopHw.cpp index 53ad416e20..57efac37c4 100644 --- a/pcsx2/IopHw.cpp +++ b/pcsx2/IopHw.cpp @@ -667,7 +667,7 @@ void psxHwWrite8(u32 add, u8 value) { case 0x1f80380c: { - bool flush = false; + //bool flush = false; // Terminate lines on CR or full buffers, and ignore \n's if the string contents // are empty (otherwise terminate on \n too!) diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 48c94d210e..08d968b908 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -99,7 +99,7 @@ int _getFreeXMMreg() for (i=0; iregs[xmmregs[i].reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)) ) { + if (!(EEINST_ISLIVEXMM(xmmregs[i].reg))) { _freeXMMreg(i); return i; } @@ -144,12 +144,10 @@ int _getFreeXMMreg() } int _allocTempXMMreg(XMMSSEType type, int xmmreg) { - if (xmmreg == -1) { + if (xmmreg == -1) xmmreg = _getFreeXMMreg(); - } - else { + else _freeXMMreg(xmmreg); - } xmmregs[xmmreg].inuse = 1; xmmregs[xmmreg].type = XMMTYPE_TEMP; diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 6c947f47b8..01a4cc3fa0 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -102,7 +102,11 @@ #define X86TYPE_VU1 0x80 -#define X86_ISVI(type) ((type&~X86TYPE_VU1) == X86TYPE_VI) +//#define X86_ISVI(type) ((type&~X86TYPE_VU1) == X86TYPE_VI) +static __forceinline int X86_ISVI(int type) +{ + return ((type&~X86TYPE_VU1) == X86TYPE_VI); +} struct _x86regs { u8 inuse; @@ -199,25 +203,37 @@ int _signExtendXMMtoM(u32 to, x86SSERegType from, int candestroy); // returns tr // only valid during writes. If write128, then upper 64bits are in an mmxreg // (mmreg&0xf). Constant is used from gprreg ((mmreg>>16)&0x1f) -#define MEM_EECONSTTAG 0x0100 // argument is a GPR and comes from g_cpuConstRegs -#define MEM_PSXCONSTTAG 0x0200 -#define MEM_MEMORYTAG 0x0400 -#define MEM_MMXTAG 0x0800 // mmreg is mmxreg -#define MEM_XMMTAG 0x8000 // mmreg is xmmreg -#define MEM_X86TAG 0x4000 // ignored most of the time -#define MEM_GPRTAG 0x2000 // argument is a GPR reg -#define MEM_CONSTTAG 0x1000 // argument is a const +enum memtag +{ + MEM_EECONSTTAG = 0x0100, // argument is a GPR and comes from g_cpuConstRegs + MEM_PSXCONSTTAG = 0x0200, + MEM_MEMORYTAG = 0x0400, + MEM_MMXTAG = 0x0800, // mmreg is mmxreg + MEM_XMMTAG = 0x8000, // mmreg is xmmreg + MEM_X86TAG = 0x4000, // ignored most of the time + MEM_GPRTAG = 0x2000, // argument is a GPR reg + MEM_CONSTTAG = 0x1000 // argument is a const +}; -#define IS_EECONSTREG(reg) (reg>=0&&((reg)&MEM_EECONSTTAG)) -#define IS_PSXCONSTREG(reg) (reg>=0&&((reg)&MEM_PSXCONSTTAG)) -#define IS_MMXREG(reg) (reg>=0&&((reg)&MEM_MMXTAG)) -#define IS_XMMREG(reg) (reg>=0&&((reg)&MEM_XMMTAG)) +template static __forceinline bool IS_REG(s32 reg) +{ + return ((reg >= 0) && (reg & tag)); +} -// fixme - these 4 are only called for u32 registers; should the reg>=0 really be there? -#define IS_X86REG(reg) (reg>=0&&((reg)&MEM_X86TAG)) -#define IS_GPRREG(reg) (reg>=0&&((reg)&MEM_GPRTAG)) -#define IS_CONSTREG(reg) (reg>=0&&((reg)&MEM_CONSTTAG)) -#define IS_MEMORYREG(reg) (reg>=0&&((reg)&MEM_MEMORYTAG)) +template static __forceinline bool IS_REG(u32 reg) +{ + return (reg & tag); +} + +#define IS_EECONSTREG(reg) IS_REG(reg) +#define IS_PSXCONSTREG(reg) IS_REG(reg) +#define IS_MMXREG(reg) IS_REG(reg) +#define IS_XMMREG(reg) IS_REG(reg) + +#define IS_X86REG(reg) IS_REG(reg) +#define IS_GPRREG(reg) IS_REG(reg) +#define IS_CONSTREG(reg) IS_REG(reg) +#define IS_MEMORYREG(reg) IS_REG(reg) ////////////////////// // Instruction Info // @@ -265,13 +281,13 @@ extern u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern u32 _recIsRegUsed(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern void _recFillRegister(EEINST& pinst, int type, int reg, int write); -#define EEINST_ISLIVE64(reg) (g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1)) -#define EEINST_ISLIVEXMM(reg) (g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)) -#define EEINST_ISLIVE1(reg) (g_pCurInstInfo->regs[reg] & EEINST_LIVE1) -#define EEINST_ISLIVE2(reg) (g_pCurInstInfo->regs[reg] & EEINST_LIVE2) +static __forceinline bool EEINST_ISLIVE64(u32 reg) { return (g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1)); } +static __forceinline bool EEINST_ISLIVEXMM(u32 reg) { return (g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)); } +static __forceinline bool EEINST_ISLIVE1(u32 reg) { return (g_pCurInstInfo->regs[reg] & EEINST_LIVE1); } +static __forceinline bool EEINST_ISLIVE2(u32 reg) { return (g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } -#define FPUINST_ISLIVE(reg) (g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0) -#define FPUINST_LASTUSE(reg) (g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE) +static __forceinline bool FPUINST_ISLIVE(u32 reg) { return (g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } +static __forceinline bool FPUINST_LASTUSE(u32 reg) { return (g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } // if set, then the variable at this inst really has its upper 32 bits valid // The difference between EEINST_LIVE1 is that the latter is used in back propagation @@ -309,8 +325,8 @@ void SetMMXstate(); void SetFPUstate(); // max is 0x7f, when 0x80 is set, need to flush reg -#define MMX_GET_CACHE(ptr, index) ((u8*)ptr)[index] -#define MMX_SET_CACHE(ptr, ind3, ind2, ind1, ind0) ((u32*)ptr)[0] = (ind3<<24)|(ind2<<16)|(ind1<<8)|ind0; +//#define MMX_GET_CACHE(ptr, index) ((u8*)ptr)[index] +//#define MMX_SET_CACHE(ptr, ind3, ind2, ind1, ind0) ((u32*)ptr)[0] = (ind3<<24)|(ind2<<16)|(ind1<<8)|ind0; #define MMX_GPR 0 #define MMX_HI XMMGPR_HI #define MMX_LO XMMGPR_LO @@ -319,9 +335,20 @@ void SetFPUstate(); #define MMX_COP0 96 #define MMX_TEMP 0x7f -#define MMX_IS32BITS(x) (((x)>=MMX_FPU&&(x)= MMX_GPR && (x) < MMX_GPR+34) +static __forceinline bool MMX_IS32BITS(s32 x) +{ + return (((x >= MMX_FPU) && (x < MMX_COP0 + 32)) || (x == MMX_FPUACC)); +} + +static __forceinline bool MMX_ISGPR(s32 x) +{ + return ((x >= MMX_GPR) && (x < MMX_GPR + 34)); +} + +static __forceinline bool MMX_ISGPR(u32 x) +{ + return (x < MMX_GPR + 34); +} struct _mmxregs { u8 inuse; diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 70cffa7fcc..5e8181a61a 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -32,12 +32,11 @@ using namespace std; // landmass of shared code. (air) extern u32 g_psxConstRegs[32]; - u16 x86FpuState; -u16 g_mmxAllocCounter = 0; +static u16 g_mmxAllocCounter = 0; // X86 caching -int g_x86checknext; +static int g_x86checknext; // use special x86 register allocation for ia32 @@ -49,38 +48,83 @@ void _initX86regs() { u32 _x86GetAddr(int type, int reg) { - switch(type&~X86TYPE_VU1) { - case X86TYPE_GPR: return (u32)&cpuRegs.GPR.r[reg]; - case X86TYPE_VI: { - //assert( reg < 16 || reg == REG_R ); - return (type&X86TYPE_VU1)?(u32)&VU1.VI[reg]:(u32)&VU0.VI[reg]; - } - case X86TYPE_MEMOFFSET: return 0; - case X86TYPE_VIMEMOFFSET: return 0; - case X86TYPE_VUQREAD: return (type&X86TYPE_VU1)?(u32)&VU1.VI[REG_Q]:(u32)&VU0.VI[REG_Q]; - case X86TYPE_VUPREAD: return (type&X86TYPE_VU1)?(u32)&VU1.VI[REG_P]:(u32)&VU0.VI[REG_P]; - case X86TYPE_VUQWRITE: return (type&X86TYPE_VU1)?(u32)&VU1.q:(u32)&VU0.q; - case X86TYPE_VUPWRITE: return (type&X86TYPE_VU1)?(u32)&VU1.p:(u32)&VU0.p; - case X86TYPE_PSX: return (u32)&psxRegs.GPR.r[reg]; + u32 ret = 0; + + switch(type&~X86TYPE_VU1) + { + case X86TYPE_GPR: + ret = &cpuRegs.GPR.r[reg]; + break; + + case X86TYPE_VI: + if (type & X86TYPE_VU1) + ret = &VU1.VI[reg]; + else + ret = &VU0.VI[reg]; + break; + + case X86TYPE_MEMOFFSET: + ret = 0; + break; + + case X86TYPE_VIMEMOFFSET: + ret = 0; + break; + + case X86TYPE_VUQREAD: + if (type & X86TYPE_VU1) + ret = &VU1.VI[REG_Q]; + else + ret =&VU0.VI[REG_Q]; + break; + + case X86TYPE_VUPREAD: + if (type & X86TYPE_VU1) + ret = &VU1.VI[REG_P]; + else + ret =&VU0.VI[REG_P]; + break; + + case X86TYPE_VUQWRITE: + if (type & X86TYPE_VU1) + ret = &VU1.q; + else + ret =&VU0.q; + break; + + case X86TYPE_VUPWRITE: + if (type & X86TYPE_VU1) + ret = &VU1.p; + else + ret =&VU0.p; + break; + + case X86TYPE_PSX: + ret = (u32)&psxRegs.GPR.r[reg]; + break; + case X86TYPE_PCWRITEBACK: - return (u32)&g_recWriteback; + ret = (u32)&g_recWriteback; + break; + case X86TYPE_VUJUMP: - return (u32)&g_recWriteback; + ret = (u32)&g_recWriteback; + break; jNO_DEFAULT; } - return 0; + return ret; } int _getFreeX86reg(int mode) { - int i, tempi; + int tempi = -1; u32 bestcount = 0x10000; int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR; - for (i=0; i= maxreg ) continue; @@ -92,8 +136,7 @@ int _getFreeX86reg(int mode) } } - tempi = -1; - for (i=1; i 0) XOR32RtoR(EAX, EAX), eaxval = 0; if (eaxval == 0) NOT32R(EAX), eaxval = -1; @@ -184,17 +223,17 @@ void _flushConstRegs() done[1] |= done[3]; } - for (i = 1; i < 32; ++i) { + for (int i = 1; i < 32; ++i) { if (GPR_IS_CONST1(i)) { if (!(g_cpuFlushedConstReg&(1<= maxreg) ) { @@ -272,12 +309,10 @@ int _allocX86reg(int x86reg, int type, int reg, int mode) } } - if (x86reg == -1) { + if (x86reg == -1) x86reg = _getFreeX86reg(oldmode); - } - else { + else _freeX86reg(x86reg); - } x86regs[x86reg].type = type; x86regs[x86reg].reg = reg; @@ -378,6 +413,7 @@ void _deleteX86reg(int type, int reg, int flush) case 0: _freeX86reg(i); break; + case 1: if( x86regs[i].mode & MODE_WRITE) { @@ -391,6 +427,7 @@ void _deleteX86reg(int type, int reg, int flush) x86regs[i].mode |= MODE_READ; } return; + case 2: x86regs[i].inuse = 0; break; @@ -470,7 +507,7 @@ int _getFreeMMXreg() // check for dead regs for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { // mmxregs[i] is unsigned, and MMX_GPR == 0, so the first part is always true. + if (MMX_ISGPR(mmxregs[i].reg)) { if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR] & (EEINST_LIVE0|EEINST_LIVE1)) ) { _freeMMXreg(i); return i; @@ -485,7 +522,7 @@ int _getFreeMMXreg() // check for future xmm usage for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { + if (MMX_ISGPR(mmxregs[i].reg)) { if( !(g_pCurInstInfo->regs[mmxregs[i].reg] & EEINST_MMX) ) { _freeMMXreg(i); return i; @@ -612,7 +649,7 @@ int _checkMMXreg(int reg, int mode) PXORRtoR(i, i); } else { - if( MMX_ISGPR(reg) && (mode&(MODE_READHALF|MODE_READ)) ) _flushConstReg(reg-MMX_GPR); + if (MMX_ISGPR(reg) && (mode&(MODE_READHALF|MODE_READ))) _flushConstReg(reg-MMX_GPR); if( (mode & MODE_READHALF) || (MMX_IS32BITS(reg)&&(mode&MODE_READ)) ) MOVDMtoMMX(i, (u32)_MMXGetAddr(reg)); else @@ -713,7 +750,7 @@ u8 _hasFreeMMXreg() // check for dead regs for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { + if (MMX_ISGPR(mmxregs[i].reg)) { if( !EEINST_ISLIVE64(mmxregs[i].reg-MMX_GPR) ) { return 1; } @@ -723,7 +760,7 @@ u8 _hasFreeMMXreg() // check for dead regs for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { + if (MMX_ISGPR(mmxregs[i].reg)) { if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR]&EEINST_USED) ) { return 1; } @@ -739,8 +776,8 @@ void _freeMMXreg(int mmxreg) if (!mmxregs[mmxreg].inuse) return; if (mmxregs[mmxreg].mode & MODE_WRITE ) { - - if( mmxregs[mmxreg].reg >= MMX_GPR && mmxregs[mmxreg].reg < MMX_GPR+32 ) + // Not sure if this line is accurate, since if the 32 was 34, it would be MMX_ISGPR. + if ( /*mmxregs[mmxreg].reg >= MMX_GPR &&*/ mmxregs[mmxreg].reg < MMX_GPR+32 ) // Checking if a u32 is >=0 is pointless. assert( !(g_cpuHasConstReg & (1<<(mmxregs[mmxreg].reg-MMX_GPR))) ); assert( mmxregs[mmxreg].reg != MMX_GPR ); diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 8647cf8311..6f9ae84543 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -478,8 +478,6 @@ void recResetEE( void ) __asm__("emms"); #endif - #define GET_HWADDR(mem) - for (int i = 0; i < 0x10000; i++) recLUT_SetPage(recLUT, 0, 0, 0, i, 0); From a4e0009a7e7d3faebf2e43e6aae8c4fe6fc3b888 Mon Sep 17 00:00:00 2001 From: arcum42 Date: Thu, 30 Jul 2009 23:50:39 +0000 Subject: [PATCH 06/18] Stub out some functions in the cache code. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1588 96395faa-99c1-11dd-bbfe-3dabce05a288 --- common/include/Pcsx2Config.h | 4 ++++ pcsx2/Cache.cpp | 40 +++++++++++++++++----------------- pcsx2/Cache.h | 11 ++++++++++ pcsx2/Memory.cpp | 4 ++-- pcsx2/x86/ix86-32/iCore-32.cpp | 32 +++++++++++++-------------- 5 files changed, 53 insertions(+), 38 deletions(-) diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index 17b6bfe6f1..01be808273 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -120,6 +120,10 @@ extern SessionOverrideFlags g_Session; #define EE_CONST_PROP // rec2 - enables constant propagation (faster) +// These are broken, so don't enable. +//#define PCSX2_CACHE_EMU_MEM +//#define ENABLECACHE + // Memory Card configuration, per slot. struct McdConfig { diff --git a/pcsx2/Cache.cpp b/pcsx2/Cache.cpp index 7a996a81f7..1e066755ee 100644 --- a/pcsx2/Cache.cpp +++ b/pcsx2/Cache.cpp @@ -26,9 +26,8 @@ _cacheS pCache[64]; namespace R5900{ namespace Interpreter { - -// fixme - this code no longer compiles if PCSX2_CACHE_EMU_MEM is defined - do we need it any more? #ifdef PCSX2_CACHE_EMU_MEM + int getFreeCache(u32 mem, int mode, int * way) { u8 * out; u32 paddr; @@ -37,15 +36,16 @@ int getFreeCache(u32 mem, int mode, int * way) { int number; int i = (mem >> 6) & 0x3F; - paddr = memLUTR[mem >> 12]; - taddr[0] = memLUTW[pCache[i].tag[0]>>12]; - taddr[1] = memLUTW[pCache[i].tag[1]>>12]; + paddr = getMemR(mem); + taddr[0] = getMemW(pCache[i].tag[0]); + taddr[1] = getMemW(pCache[i].tag[1]); if (taddr[0] == paddr && (pCache[i].tag[0] & 0x20)) { *way = 0; return i; - }else if(taddr[1] == paddr && (pCache[i].tag[1] & 0x20)) + } + else if(taddr[1] == paddr && (pCache[i].tag[1] & 0x20)) { *way = 1; return i; @@ -66,8 +66,6 @@ int getFreeCache(u32 mem, int mode, int * way) { ((u64*)out)[6] = ((u64*)pCache[i].data[number][3].b8._8)[0]; ((u64*)out)[7] = ((u64*)pCache[i].data[number][3].b8._8)[1]; } - - if(mode == 1) { @@ -89,8 +87,10 @@ int getFreeCache(u32 mem, int mode, int * way) { ((u64*)pCache[i].data[number][3].b8._8)[0] = ((u64*)out)[6]; ((u64*)pCache[i].data[number][3].b8._8)[1] = ((u64*)out)[7]; - if(pCache[i].tag[number] & 0x10) pCache[i].tag[number] &= ~(0x10); - else pCache[i].tag[number] |= 0x10; + if(pCache[i].tag[number] & 0x10) + pCache[i].tag[number] &= ~(0x10); + else + pCache[i].tag[number] |= 0x10; pCache[i].tag[number] |= 0x20; *way = number; @@ -163,9 +163,9 @@ void CACHE() { int index = (addr >> 6) & 0x3F; u32 paddr[2]; int way; - u32 taddr = memLUTR[addr >> 12]; - paddr[0] = memLUTW[pCache[index].tag[0] >> 12]; - paddr[1] = memLUTW[pCache[index].tag[1] >> 12]; + u32 taddr = getMemR(addr); + paddr[0] = getMemW(pCache[index].tag[0]); + paddr[1] = getMemW(pCache[index].tag[1]); if(paddr[0] == taddr && (pCache[index].tag[0] & 0x20)) { @@ -199,9 +199,9 @@ void CACHE() { int index = (addr >> 6) & 0x3F; u32 paddr[2]; int way; - u32 taddr = memLUTW[addr >> 12]; - paddr[0] = memLUTW[pCache[index].tag[0] >> 12]; - paddr[1] = memLUTW[pCache[index].tag[1] >> 12]; + u32 taddr = getMemW(addr); + paddr[0] = getMemW(pCache[index].tag[0]); + paddr[1] = getMemW(pCache[index].tag[1]); if(paddr[0] == taddr && (pCache[index].tag[0] & 0x20)) { @@ -250,9 +250,9 @@ void CACHE() { int index = (addr >> 6) & 0x3F; u32 paddr[2]; int way; - u32 taddr = memLUTW[addr >> 12]; - paddr[0] = memLUTW[pCache[index].tag[0] >> 12]; - paddr[1] = memLUTW[pCache[index].tag[1] >> 12]; + u32 taddr = getMemW(addr); + paddr[0] = getMemW(pCache[index].tag[0]); + paddr[1] = getMemW(pCache[index].tag[1]); if(paddr[0] == taddr && (pCache[index].tag[0] & 0x20)) { @@ -360,7 +360,7 @@ void CACHE() { if(pCache[index].tag[way] & 0x60) // Dirty { - u32 paddr = memLUTW[pCache[index].tag[way] >> 12]; + u32 paddr = getMemW(pCache[index].tag[way]); char * t = (char *)(paddr); out = (u8*)(t + (addr & 0xFC0)); ((u64*)out)[0] = ((u64*)pCache[index].data[way][0].b8._8)[0]; diff --git a/pcsx2/Cache.h b/pcsx2/Cache.h index e9126fec69..de0ef9ca53 100644 --- a/pcsx2/Cache.h +++ b/pcsx2/Cache.h @@ -45,4 +45,15 @@ void writeCache64(u32 mem, u64 value); void writeCache128(u32 mem, u64 *value); u8 *readCache(u32 mem); +// Fixme - these two functions do nothing, and the cache code relies on these two functions. +static __forceinline u32 getMemR(s32 mem) +{ + return 0;//memLUTR[mem >> 12]; +} + +static __forceinline u32 getMemW(s32 mem) +{ + return 0;//memLUTW[mem>>12]; +} + #endif /* __CACHE_H__ */ diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index 9cdc11a4e0..22898c391f 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -594,8 +594,8 @@ void memClearPageAddr(u32 vaddr) vtlb_VMapUnmap(vaddr,0x1000); // -> whut ? #ifdef FULLTLB - memLUTRK[vaddr >> 12] = 0; - memLUTWK[vaddr >> 12] = 0; +// memLUTRK[vaddr >> 12] = 0; +// memLUTWK[vaddr >> 12] = 0; #endif } diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 5e8181a61a..5446c147f3 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -53,62 +53,62 @@ u32 _x86GetAddr(int type, int reg) switch(type&~X86TYPE_VU1) { case X86TYPE_GPR: - ret = &cpuRegs.GPR.r[reg]; + ret = (u32)&cpuRegs.GPR.r[reg]; break; case X86TYPE_VI: if (type & X86TYPE_VU1) - ret = &VU1.VI[reg]; + ret = (u32)&VU1.VI[reg]; else - ret = &VU0.VI[reg]; + ret = (u32)&VU0.VI[reg]; break; case X86TYPE_MEMOFFSET: - ret = 0; + ret = 0; break; case X86TYPE_VIMEMOFFSET: - ret = 0; + ret = 0; break; case X86TYPE_VUQREAD: if (type & X86TYPE_VU1) - ret = &VU1.VI[REG_Q]; + ret = (u32)&VU1.VI[REG_Q]; else - ret =&VU0.VI[REG_Q]; + ret = (u32)&VU0.VI[REG_Q]; break; case X86TYPE_VUPREAD: if (type & X86TYPE_VU1) - ret = &VU1.VI[REG_P]; + ret = (u32)&VU1.VI[REG_P]; else - ret =&VU0.VI[REG_P]; + ret = (u32)&VU0.VI[REG_P]; break; case X86TYPE_VUQWRITE: if (type & X86TYPE_VU1) - ret = &VU1.q; + ret = (u32)&VU1.q; else - ret =&VU0.q; + ret = (u32)&VU0.q; break; case X86TYPE_VUPWRITE: if (type & X86TYPE_VU1) - ret = &VU1.p; + ret = (u32)&VU1.p; else - ret =&VU0.p; + ret = (u32)&VU0.p; break; case X86TYPE_PSX: - ret = (u32)&psxRegs.GPR.r[reg]; + ret = (u32)&psxRegs.GPR.r[reg]; break; case X86TYPE_PCWRITEBACK: - ret = (u32)&g_recWriteback; + ret = (u32)&g_recWriteback; break; case X86TYPE_VUJUMP: - ret = (u32)&g_recWriteback; + ret = (u32)&g_recWriteback; break; jNO_DEFAULT; From dea6f1ced2d002bd34dfa69611497ad0d02da89c Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Fri, 31 Jul 2009 02:30:32 +0000 Subject: [PATCH 07/18] microVU: Optimizations, cleanup, and fixed a bug... git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1589 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Alloc.inl | 62 ++++++++++++++++++----------------- pcsx2/x86/microVU_Compile.inl | 10 +++--- pcsx2/x86/microVU_Lower.inl | 20 +++++------ pcsx2/x86/microVU_Misc.h | 12 ------- pcsx2/x86/microVU_Upper.inl | 58 ++++++++++++++++---------------- 5 files changed, 76 insertions(+), 86 deletions(-) diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index d929e00126..7fb35067bb 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -26,17 +26,17 @@ // Flag Allocators //------------------------------------------------------------------ -#define getFlagReg(regX, fInst) { \ - switch (fInst) { \ - case 0: regX = gprF0; break; \ - case 1: regX = gprF1; break; \ - case 2: regX = gprF2; break; \ - case 3: regX = gprF3; break; \ - default: \ - Console::Error("microVU: Flag Instance Error (fInst = %d)", params fInst); \ - regX = gprF0; \ - break; \ - } \ +#define getFlagReg(regX, fInst) { \ + switch (fInst) { \ + case 0: regX = gprF0; break; \ + case 1: regX = gprF1; break; \ + case 2: regX = gprF2; break; \ + case 3: regX = gprF3; break; \ + default: \ + Console::Error("microVU Error: fInst = %d", params fInst); \ + regX = gprF0; \ + break; \ + } \ } #define setBitSFLAG(bitTest, bitSet) { \ @@ -114,31 +114,33 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) { } //------------------------------------------------------------------ -// I/Q/P Reg Allocators +// I/P/Q Reg Allocators //------------------------------------------------------------------ -#define getIreg(reg, modXYZW) { \ - SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL); \ - if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8); \ - if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \ +microVUt(void) getIreg(mV, int reg, bool modXYZW) { + SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL); + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8); + if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } } -#define getQreg(reg) { \ - mVUunpack_xyzw(reg, xmmPQ, mVUinfo.readQ); \ - /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ +microVUt(void) getPreg(mV, int reg) { + mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); + /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -#define getPreg(reg) { \ - mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); \ - /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ +microVUt(void) getQreg(mV, int reg) { + mVUunpack_xyzw(reg, xmmPQ, mVUinfo.readQ); + /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -//------------------------------------------------------------------ -// Lower Instruction Allocator Helpers -//------------------------------------------------------------------ - -// VF to GPR -#define getReg8(GPRreg, _reg_, _fxf_) { \ - if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \ - else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \ +microVUt(void) writeQreg(mV, int reg, int qInstance) { + if (qInstance) { + if (!cpucaps.hasStreamingSIMD4Extensions) { + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + SSE_MOVSS_XMM_to_XMM(xmmPQ, reg); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + } + else SSE4_INSERTPS_XMM_to_XMM(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0)); + } + else SSE_MOVSS_XMM_to_XMM(xmmPQ, reg); } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 92a1757e69..28555dd9fd 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -31,10 +31,12 @@ } \ } -#define startLoop() { \ - mVUdebug1(); \ - memset(&mVUinfo, 0, sizeof(mVUinfo)); \ - memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \ +#define startLoop() { \ + if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } \ + if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } \ + if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } \ + memset(&mVUinfo, 0, sizeof(mVUinfo)); \ + memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \ } #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index d760b0c4a7..8bf0601e97 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -51,8 +51,10 @@ mVUop(mVU_DIV) { pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); } pass2 { u8 *ajmp, *bjmp, *cjmp, *djmp; + int Ft; + if (_Ftf_) Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); + else Ft = mVU->regAlloc->allocReg(_Ft_); int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); int t1 = mVU->regAlloc->allocReg(); testZero(Ft, t1, gprT1); // Test if Ft is zero @@ -77,9 +79,7 @@ mVUop(mVU_DIV) { mVUclamp1(Fs, t1, 8); x86SetJ8(djmp); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); - SSE_MOVSS_XMM_to_XMM(xmmPQ, Fs); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + writeQreg(mVU, Fs, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); @@ -99,9 +99,7 @@ mVUop(mVU_SQRT) { if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(Ft, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive) SSE_SQRTSS_XMM_to_XMM(Ft, Ft); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); - SSE_MOVSS_XMM_to_XMM(xmmPQ, Ft); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + writeQreg(mVU, Ft, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Ft); } @@ -140,9 +138,7 @@ mVUop(mVU_RSQRT) { mVUclamp1(Fs, t1, 8); x86SetJ8(djmp); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); - SSE_MOVSS_XMM_to_XMM(xmmPQ, Fs); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + writeQreg(mVU, Fs, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); @@ -741,7 +737,7 @@ mVUop(mVU_MFP) { pass1 { mVUanalyzeMFP(mVU, _Ft_); } pass2 { int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); - getPreg(Ft); + getPreg(mVU, Ft); mVU->regAlloc->clearNeeded(Ft); } pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); } @@ -759,7 +755,7 @@ mVUop(mVU_MOVE) { mVUop(mVU_MR32) { pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); } pass2 { - int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); + int Fs = mVU->regAlloc->allocReg(_Fs_); int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0)))); else SSE2_PSHUFD_XMM_to_XMM(Ft, Fs, 0x39); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index ec61a5757a..195c22a415 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -242,20 +242,8 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); // Debug Stuff... #ifdef mVUdebug #define mVUprint Console::Status -#define mVUdebug1() { \ - if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ - if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ - if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ - if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \ - if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ -} #else #define mVUprint 0&& -#define mVUdebug1() { \ - if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } \ - if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } \ - if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } \ -} #endif // Program Logging... diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index a4801f65c9..e65ca956de 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -34,7 +34,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1 = -1, int regT2 = -1, bool //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); if (mVUsFlagHack) { sFLAG.doFlag = 0; } if (!sFLAG.doFlag && !mFLAG.doFlag) { return; } - if (!(!mFLAG.doFlag || (_XYZW_SS && modXYZW))) { + if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) { if (regT2 < 0) { regT2 = mVU->regAlloc->allocReg(); regT2b = 1; } SSE2_PSHUFD_XMM_to_XMM(regT2, reg, 0x1B); // Flip wzyx to xyzw } @@ -119,27 +119,28 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) { } // Sets Up Ft Reg for Normal, BC, I, and Q Cases -void setupFtReg(microVU* mVU, int& Ft, int opCase) { +void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) { opCase1 { - if (_XYZW_SS2) Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); - else Ft = mVU->regAlloc->allocReg(_Ft_); + if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; } + else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; } } opCase2 { - int tempFt = mVU->regAlloc->allocReg(_Ft_); + tempFt = mVU->regAlloc->allocReg(_Ft_); Ft = mVU->regAlloc->allocReg(); mVUunpack_xyzw(Ft, tempFt, _bc_); mVU->regAlloc->clearNeeded(tempFt); + tempFt = Ft; } - opCase3 { Ft = mVU->regAlloc->allocReg(); getIreg(Ft, 1); } - opCase4 { Ft = mVU->regAlloc->allocReg(); getQreg(Ft); } + opCase3 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getIreg(mVU, Ft, 1); } + opCase4 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(mVU, Ft); } } // Normal FMAC Opcodes void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, const char* opName) { pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); } pass2 { - int Fs, Ft, ACC; - setupFtReg(mVU, Ft, opCase); + int Fs, Ft, ACC, tempFt; + setupFtReg(mVU, Ft, tempFt, opCase); if (isACC) { Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); @@ -156,11 +157,11 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co if (isACC) { if (_XYZW_SS) SSE_MOVSS_XMM_to_XMM(ACC, Fs); else mVUmergeRegs(ACC, Fs, _X_Y_Z_W); - mVUupdateFlags(mVU, ACC, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1))); + mVUupdateFlags(mVU, ACC, Fs, tempFt); if (_XYZW_SS2) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); mVU->regAlloc->clearNeeded(ACC); } - else mVUupdateFlags(mVU, Fs, ((opCase==2) ? Ft : -1), (((opCase==1) && _XYZW_SS2) ? Ft : -1)); + else mVUupdateFlags(mVU, Fs, tempFt); mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); @@ -172,8 +173,8 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* opName) { pass1 { setupPass1(mVU, opCase, 1, 0); } pass2 { - int Fs, Ft, ACC; - setupFtReg(mVU, Ft, opCase); + int Fs, Ft, ACC, tempFt; + setupFtReg(mVU, Ft, tempFt, opCase); Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); ACC = mVU->regAlloc->allocReg(32, 32, 0xf, 0); @@ -185,16 +186,17 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op else SSE_PS[2](mVU, Fs, Ft, -1, -1); if (_XYZW_SS || _X_Y_Z_W == 0xf) { - if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, -1, -1); - else SSE_PS[opType](mVU, ACC, Fs, -1, -1); - mVUupdateFlags(mVU, ACC, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1))); + if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, tempFt, -1); + else SSE_PS[opType](mVU, ACC, Fs, tempFt, -1); + mVUupdateFlags(mVU, ACC, Fs, tempFt); if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); } else { int tempACC = mVU->regAlloc->allocReg(); SSE_MOVAPS_XMM_to_XMM(tempACC, ACC); - SSE_PS[opType](mVU, tempACC, Fs, -1, -1); + SSE_PS[opType](mVU, tempACC, Fs, tempFt, -1); mVUmergeRegs(ACC, tempACC, _X_Y_Z_W); + mVUupdateFlags(mVU, ACC, Fs, tempFt); mVU->regAlloc->clearNeeded(tempACC); } @@ -209,8 +211,8 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) { pass1 { setupPass1(mVU, opCase, 0, 0); } pass2 { - int Fs, Ft, ACC; - setupFtReg(mVU, Ft, opCase); + int Fs, Ft, ACC, tempFt; + setupFtReg(mVU, Ft, tempFt, opCase); ACC = mVU->regAlloc->allocReg(32); Fs = mVU->regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); @@ -218,12 +220,12 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) { if (_XYZW_SS2) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); } opCase2 { mVUclamp1(Fs, -1, _X_Y_Z_W); } // Clamp Needed for alot of games (TOTA, DoM, etc...) - if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[0](mVU, Fs, ACC, -1, -1); } - else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[0](mVU, Fs, ACC, -1, -1); } + if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[0](mVU, Fs, ACC, tempFt, -1); } + else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[0](mVU, Fs, ACC, tempFt, -1); } if (_XYZW_SS2) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); } - mVUupdateFlags(mVU, Fs, ((opCase==2) ? Ft : -1), (((opCase==1) && _XYZW_SS2) ? Ft : -1)); + mVUupdateFlags(mVU, Fs, tempFt); mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); @@ -236,16 +238,16 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) { void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) { pass1 { setupPass1(mVU, opCase, 0, 0); } pass2 { - int Fs, Ft, Fd; - setupFtReg(mVU, Ft, opCase); + int Fs, Ft, Fd, tempFt; + setupFtReg(mVU, Ft, tempFt, opCase); - Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); Fd = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); - if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[1](mVU, Fd, Fs, -1, -1); } - else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[1](mVU, Fd, Fs, -1, -1); } + if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[1](mVU, Fd, Fs, tempFt, -1); } + else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[1](mVU, Fd, Fs, tempFt, -1); } - mVUupdateFlags(mVU, Fd, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1))); + mVUupdateFlags(mVU, Fd, Fs, tempFt); mVU->regAlloc->clearNeeded(Fd); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); From 8b288e89170344465ee696c881994b4c4c5766e0 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Fri, 31 Jul 2009 07:34:03 +0000 Subject: [PATCH 08/18] microVU: more optimizations git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1590 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Alloc.inl | 6 +++--- pcsx2/x86/microVU_Analyze.inl | 31 ++++++++++++++++--------------- pcsx2/x86/microVU_Flags.inl | 8 ++++---- pcsx2/x86/microVU_IR.h | 2 +- pcsx2/x86/microVU_Lower.inl | 6 +++--- pcsx2/x86/microVU_Upper.inl | 5 ++++- 6 files changed, 31 insertions(+), 27 deletions(-) diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index 7fb35067bb..6f1d741b35 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -128,12 +128,12 @@ microVUt(void) getPreg(mV, int reg) { /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -microVUt(void) getQreg(mV, int reg) { - mVUunpack_xyzw(reg, xmmPQ, mVUinfo.readQ); +microVUt(void) getQreg(int reg, int qInstance) { + mVUunpack_xyzw(reg, xmmPQ, qInstance); /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -microVUt(void) writeQreg(mV, int reg, int qInstance) { +microVUt(void) writeQreg(int reg, int qInstance) { if (qInstance) { if (!cpucaps.hasStreamingSIMD4Extensions) { SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 4349b9fdf7..ff19ae7d33 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -275,13 +275,18 @@ microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) { //------------------------------------------------------------------ // Sflag - Status Flag Opcodes //------------------------------------------------------------------ -#define flagSet(xFLAG) { \ - int curPC = iPC; \ - for (int i = mVUcount, j = 0; i > 0; i--, j++) { \ - incPC2(-2); \ - if (sFLAG.doFlag) { xFLAG = 1; if (j >= 3) { break; } } \ - } \ - iPC = curPC; \ +microVUt(void) flagSet(mV, bool setMacFlag) { + int curPC = iPC; + for (int i = mVUcount, j = 0; i > 0; i--, j++) { + j += mVUstall; + incPC2(-2); + if (sFLAG.doFlag && (j >= 3)) { + if (setMacFlag) { mFLAG.doFlag = 1; } + else { sFLAG.doNonSticky = 1; } + break; + } + } + iPC = curPC; } microVUt(void) mVUanalyzeSflag(mV, int It) { @@ -289,14 +294,10 @@ microVUt(void) mVUanalyzeSflag(mV, int It) { analyzeVIreg2(It, mVUlow.VI_write, 1); if (!It) { mVUlow.isNOP = 1; } else { - mVUinfo.swapOps = 1; mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block - flagSet(sFLAG.doNonSticky); - if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf; } - if (mVUcount >= 1) { incPC2(-2); mVUlow.useSflag = 1; incPC2(2); } - // Note: useSflag is used for status flag optimizations when a FSSET instruction is called. - // Do to stalls, it can only be set one instruction prior to the status flag read instruction - // if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior. + mVUinfo.swapOps = 1; + flagSet(mVU, 0); + if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf; } } } @@ -316,8 +317,8 @@ microVUt(void) mVUanalyzeMflag(mV, int Is, int It) { if (!It) { mVUlow.isNOP = 1; } else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) mVUinfo.swapOps = 1; + flagSet(mVU, 1); if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << 4; } - flagSet(mFLAG.doFlag); } } diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index 8ad213e870..ca4ccb1fd3 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -34,18 +34,18 @@ microVUt(void) mVUstatusFlagOp(mV) { int curPC = iPC; int i = mVUcount; bool runLoop = 1; - if (sFLAG.doFlag) { mVUlow.useSflag = 1; } + if (sFLAG.doFlag) { sFLAG.doNonSticky = 1; } else { for (; i > 0; i--) { incPC2(-2); - if (mVUlow.useSflag) { runLoop = 0; break; } - if (sFLAG.doFlag) { mVUlow.useSflag = 1; break; } + if (sFLAG.doNonSticky) { runLoop = 0; break; } + else if (sFLAG.doFlag) { sFLAG.doNonSticky = 1; break; } } } if (runLoop) { for (; i > 0; i--) { incPC2(-2); - if (mVUlow.useSflag) break; + if (sFLAG.doNonSticky) break; sFLAG.doFlag = 0; } } diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 1ce311dda5..44a1de55a3 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -107,7 +107,6 @@ struct microLowerOp { u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR) bool isNOP; // This instruction is a NOP bool isFSSET; // This instruction is a FSSET - bool useSflag; // This instruction uses/reads Sflag bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) bool memReadIs; // Read Is (VI reg) from memory (used by branches) @@ -245,6 +244,7 @@ public: clearReg(reg); // Clear Reg } void clearNeeded(int reg) { + if ((reg < 0) || (reg >= xmmTotal)) return; xmmReg[reg].isNeeded = 0; if (xmmReg[reg].xyzw) { // Reg was modified if (xmmReg[reg].reg > 0) { diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 8bf0601e97..ce5004e1f3 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -79,7 +79,7 @@ mVUop(mVU_DIV) { mVUclamp1(Fs, t1, 8); x86SetJ8(djmp); - writeQreg(mVU, Fs, mVUinfo.writeQ); + writeQreg(Fs, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); @@ -99,7 +99,7 @@ mVUop(mVU_SQRT) { if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(Ft, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive) SSE_SQRTSS_XMM_to_XMM(Ft, Ft); - writeQreg(mVU, Ft, mVUinfo.writeQ); + writeQreg(Ft, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Ft); } @@ -138,7 +138,7 @@ mVUop(mVU_RSQRT) { mVUclamp1(Fs, t1, 8); x86SetJ8(djmp); - writeQreg(mVU, Fs, mVUinfo.writeQ); + writeQreg(Fs, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index e65ca956de..920ad96da2 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -132,7 +132,10 @@ void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) { tempFt = Ft; } opCase3 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getIreg(mVU, Ft, 1); } - opCase4 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(mVU, Ft); } + opCase4 { + if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; } + else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); } + } } // Normal FMAC Opcodes From 3f4f3db3e63aafa75a7bb7767709d3cbb67a46e2 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Fri, 31 Jul 2009 23:59:06 +0000 Subject: [PATCH 09/18] GSdx: little code cleanup git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1591 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSDevice.cpp | 7 + plugins/GSdx/GSDevice.h | 3 +- plugins/GSdx/GSDevice10.cpp | 131 +++---- plugins/GSdx/GSDevice10.h | 49 ++- plugins/GSdx/GSDevice11.cpp | 137 +++---- plugins/GSdx/GSDevice11.h | 49 ++- plugins/GSdx/GSDevice9.cpp | 168 ++++----- plugins/GSdx/GSDevice9.h | 46 ++- plugins/GSdx/GSDeviceOGL.cpp | 10 - plugins/GSdx/GSDeviceOGL.h | 2 - plugins/GSdx/GSRendererHW.h | 2 +- plugins/GSdx/GSTextureCache.cpp | 556 +++++++++++++++-------------- plugins/GSdx/GSTextureCache.h | 24 +- plugins/GSdx/GSTextureCache10.cpp | 34 +- plugins/GSdx/GSTextureCache10.h | 22 +- plugins/GSdx/GSTextureCache11.cpp | 34 +- plugins/GSdx/GSTextureCache11.h | 22 +- plugins/GSdx/GSTextureCache9.cpp | 32 +- plugins/GSdx/GSTextureCache9.h | 22 +- plugins/GSdx/GSTextureCacheOGL.cpp | 9 - plugins/GSdx/GSTextureCacheOGL.h | 22 +- plugins/GSdx/vsprops/common.props | 2 +- 22 files changed, 607 insertions(+), 776 deletions(-) diff --git a/plugins/GSdx/GSDevice.cpp b/plugins/GSdx/GSDevice.cpp index cd1317f2e3..2b59f856f9 100644 --- a/plugins/GSdx/GSDevice.cpp +++ b/plugins/GSdx/GSDevice.cpp @@ -31,6 +31,7 @@ GSDevice::GSDevice() , m_blend(NULL) , m_1x1(NULL) { + memset(&m_vertices, 0, sizeof(m_vertices)); } GSDevice::~GSDevice() @@ -121,6 +122,12 @@ GSTexture* GSDevice::Fetch(int type, int w, int h, int format) return Create(type, w, h, format); } +void GSDevice::EndScene() +{ + m_vertices.start += m_vertices.count; + m_vertices.count = 0; +} + void GSDevice::Recycle(GSTexture* t) { if(t) diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index 087e49edfc..b87374608f 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -63,6 +63,7 @@ protected: GSTexture* m_1x1; GSTexture* m_current; struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps;} m_shader; + struct {size_t stride, start, count, limit;} m_vertices; virtual GSTexture* Create(int type, int w, int h, int format) = 0; @@ -85,7 +86,7 @@ public: virtual void BeginScene() {} virtual void DrawPrimitive() {}; - virtual void EndScene() {} + virtual void EndScene(); virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {} virtual void ClearRenderTarget(GSTexture* t, uint32 c) {} diff --git a/plugins/GSdx/GSDevice10.cpp b/plugins/GSdx/GSDevice10.cpp index bba647ccce..7c40689e1d 100644 --- a/plugins/GSdx/GSDevice10.cpp +++ b/plugins/GSdx/GSDevice10.cpp @@ -25,31 +25,11 @@ #include "resource.h" GSDevice10::GSDevice10() - : m_vb(NULL) - , m_vb_stride(0) - , m_layout(NULL) - , m_topology(D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED) - , m_vs(NULL) - , m_vs_cb(NULL) - , m_gs(NULL) - , m_ps(NULL) - , m_ps_cb(NULL) - , m_scissor(0, 0, 0, 0) - , m_viewport(0, 0) - , m_dss(NULL) - , m_sref(0) - , m_bs(NULL) - , m_bf(-1) - , m_rtv(NULL) - , m_dsv(NULL) { - memset(m_ps_srv, 0, sizeof(m_ps_srv)); - memset(m_ps_ss, 0, sizeof(m_ps_ss)); + memset(&m_state, 0, sizeof(m_state)); - m_vertices.stride = 0; - m_vertices.start = 0; - m_vertices.count = 0; - m_vertices.limit = 0; + m_state.topology = D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED; + m_state.bf = -1; } GSDevice10::~GSDevice10() @@ -258,27 +238,11 @@ void GSDevice10::Flip(bool limit) m_swapchain->Present(m_vsync && limit ? 1 : 0, 0); } -void GSDevice10::BeginScene() -{ -} - void GSDevice10::DrawPrimitive() { m_dev->Draw(m_vertices.count, m_vertices.start); } -void GSDevice10::EndScene() -{ - //PSSetShaderResources(NULL, NULL); - - // not clearing the rt/ds gives a little fps boost in complex games (5-10%) - - // OMSetRenderTargets(NULL, NULL); - - m_vertices.start += m_vertices.count; - m_vertices.count = 0; -} - void GSDevice10::ClearRenderTarget(GSTexture* t, const GSVector4& c) { m_dev->ClearRenderTargetView(*(GSTexture10*)t, c.v); @@ -483,6 +447,8 @@ void GSDevice10::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, // EndScene(); + + PSSetShaderResources(NULL, NULL); } void GSDevice10::DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c) @@ -525,14 +491,15 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c if(count * stride > m_vertices.limit * m_vertices.stride) { - m_vertices.vb_old = m_vertices.vb; - m_vertices.vb = NULL; + m_vb_old = m_vb; + m_vb = NULL; + m_vertices.start = 0; m_vertices.count = 0; m_vertices.limit = std::max(count * 3 / 2, 10000); } - if(m_vertices.vb == NULL) + if(m_vb == NULL) { D3D10_BUFFER_DESC bd; @@ -545,7 +512,7 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c HRESULT hr; - hr = m_dev->CreateBuffer(&bd, NULL, &m_vertices.vb); + hr = m_dev->CreateBuffer(&bd, NULL, &m_vb); if(FAILED(hr)) return; } @@ -561,25 +528,25 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c void* v = NULL; - if(SUCCEEDED(m_vertices.vb->Map(type, 0, &v))) + if(SUCCEEDED(m_vb->Map(type, 0, &v))) { GSVector4i::storent((uint8*)v + m_vertices.start * stride, vertices, count * stride); - m_vertices.vb->Unmap(); + m_vb->Unmap(); } m_vertices.count = count; m_vertices.stride = stride; - IASetVertexBuffer(m_vertices.vb, stride); + IASetVertexBuffer(m_vb, stride); } void GSDevice10::IASetVertexBuffer(ID3D10Buffer* vb, size_t stride) { - if(m_vb != vb || m_vb_stride != stride) + if(m_state.vb != vb || m_state.vb_stride != stride) { - m_vb = vb; - m_vb_stride = stride; + m_state.vb = vb; + m_state.vb_stride = stride; uint32 offset = 0; @@ -589,9 +556,9 @@ void GSDevice10::IASetVertexBuffer(ID3D10Buffer* vb, size_t stride) void GSDevice10::IASetInputLayout(ID3D10InputLayout* layout) { - if(m_layout != layout) + if(m_state.layout != layout) { - m_layout = layout; + m_state.layout = layout; m_dev->IASetInputLayout(layout); } @@ -599,9 +566,9 @@ void GSDevice10::IASetInputLayout(ID3D10InputLayout* layout) void GSDevice10::IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY topology) { - if(m_topology != topology) + if(m_state.topology != topology) { - m_topology = topology; + m_state.topology = topology; m_dev->IASetPrimitiveTopology(topology); } @@ -609,16 +576,16 @@ void GSDevice10::IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY topology) void GSDevice10::VSSetShader(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb) { - if(m_vs != vs) + if(m_state.vs != vs) { - m_vs = vs; + m_state.vs = vs; m_dev->VSSetShader(vs); } - if(m_vs_cb != vs_cb) + if(m_state.vs_cb != vs_cb) { - m_vs_cb = vs_cb; + m_state.vs_cb = vs_cb; m_dev->VSSetConstantBuffers(0, 1, &vs_cb); } @@ -626,9 +593,9 @@ void GSDevice10::VSSetShader(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb) void GSDevice10::GSSetShader(ID3D10GeometryShader* gs) { - if(m_gs != gs) + if(m_state.gs != gs) { - m_gs = gs; + m_state.gs = gs; m_dev->GSSetShader(gs); } @@ -642,10 +609,10 @@ void GSDevice10::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) if(sr0) srv0 = *(GSTexture10*)sr0; if(sr1) srv1 = *(GSTexture10*)sr1; - if(m_ps_srv[0] != srv0 || m_ps_srv[1] != srv1) + if(m_state.ps_srv[0] != srv0 || m_state.ps_srv[1] != srv1) { - m_ps_srv[0] = srv0; - m_ps_srv[1] = srv1; + m_state.ps_srv[0] = srv0; + m_state.ps_srv[1] = srv1; ID3D10ShaderResourceView* srvs[] = {srv0, srv1}; @@ -655,16 +622,16 @@ void GSDevice10::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) void GSDevice10::PSSetShader(ID3D10PixelShader* ps, ID3D10Buffer* ps_cb) { - if(m_ps != ps) + if(m_state.ps != ps) { - m_ps = ps; + m_state.ps = ps; m_dev->PSSetShader(ps); } - if(m_ps_cb != ps_cb) + if(m_state.ps_cb != ps_cb) { - m_ps_cb = ps_cb; + m_state.ps_cb = ps_cb; m_dev->PSSetConstantBuffers(0, 1, &ps_cb); } @@ -672,10 +639,10 @@ void GSDevice10::PSSetShader(ID3D10PixelShader* ps, ID3D10Buffer* ps_cb) void GSDevice10::PSSetSamplerState(ID3D10SamplerState* ss0, ID3D10SamplerState* ss1) { - if(m_ps_ss[0] != ss0 || m_ps_ss[1] != ss1) + if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1) { - m_ps_ss[0] = ss0; - m_ps_ss[1] = ss1; + m_state.ps_ss[0] = ss0; + m_state.ps_ss[1] = ss1; ID3D10SamplerState* sss[] = {ss0, ss1}; @@ -685,10 +652,10 @@ void GSDevice10::PSSetSamplerState(ID3D10SamplerState* ss0, ID3D10SamplerState* void GSDevice10::OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref) { - if(m_dss != dss || m_sref != sref) + if(m_state.dss != dss || m_state.sref != sref) { - m_dss = dss; - m_sref = sref; + m_state.dss = dss; + m_state.sref = sref; m_dev->OMSetDepthStencilState(dss, sref); } @@ -696,10 +663,10 @@ void GSDevice10::OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref void GSDevice10::OMSetBlendState(ID3D10BlendState* bs, float bf) { - if(m_bs != bs || m_bf != bf) + if(m_state.bs != bs || m_state.bf != bf) { - m_bs = bs; - m_bf = bf; + m_state.bs = bs; + m_state.bf = bf; float BlendFactor[] = {bf, bf, bf, 0}; @@ -715,17 +682,17 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector if(rt) rtv = *(GSTexture10*)rt; if(ds) dsv = *(GSTexture10*)ds; - if(m_rtv != rtv || m_dsv != dsv) + if(m_state.rtv != rtv || m_state.dsv != dsv) { - m_rtv = rtv; - m_dsv = dsv; + m_state.rtv = rtv; + m_state.dsv = dsv; m_dev->OMSetRenderTargets(1, &rtv, dsv); } - if(m_viewport != rt->m_size) + if(m_state.viewport != rt->m_size) { - m_viewport = rt->m_size; + m_state.viewport = rt->m_size; D3D10_VIEWPORT vp; @@ -743,9 +710,9 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); - if(!m_scissor.eq(r)) + if(!m_state.scissor.eq(r)) { - m_scissor = r; + m_state.scissor = r; m_dev->RSSetScissorRects(1, r); } diff --git a/plugins/GSdx/GSDevice10.h b/plugins/GSdx/GSDevice10.h index 49fd700a2c..c48f708ef3 100644 --- a/plugins/GSdx/GSDevice10.h +++ b/plugins/GSdx/GSDevice10.h @@ -26,28 +26,6 @@ class GSDevice10 : public GSDevice { - ID3D10Buffer* m_vb; - size_t m_vb_stride; - ID3D10InputLayout* m_layout; - D3D10_PRIMITIVE_TOPOLOGY m_topology; - ID3D10VertexShader* m_vs; - ID3D10Buffer* m_vs_cb; - ID3D10GeometryShader* m_gs; - ID3D10ShaderResourceView* m_ps_srv[2]; - ID3D10PixelShader* m_ps; - ID3D10Buffer* m_ps_cb; - ID3D10SamplerState* m_ps_ss[2]; - GSVector2i m_viewport; - GSVector4i m_scissor; - ID3D10DepthStencilState* m_dss; - uint8 m_sref; - ID3D10BlendState* m_bs; - float m_bf; - ID3D10RenderTargetView* m_rtv; - ID3D10DepthStencilView* m_dsv; - - // - GSTexture* Create(int type, int w, int h, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); @@ -57,12 +35,31 @@ class GSDevice10 : public GSDevice CComPtr m_dev; CComPtr m_swapchain; + CComPtr m_vb; + CComPtr m_vb_old; struct { - CComPtr vb, vb_old; - size_t stride, start, count, limit; - } m_vertices; + ID3D10Buffer* vb; + size_t vb_stride; + ID3D10InputLayout* layout; + D3D10_PRIMITIVE_TOPOLOGY topology; + ID3D10VertexShader* vs; + ID3D10Buffer* vs_cb; + ID3D10GeometryShader* gs; + ID3D10ShaderResourceView* ps_srv[2]; + ID3D10PixelShader* ps; + ID3D10Buffer* ps_cb; + ID3D10SamplerState* ps_ss[2]; + GSVector2i viewport; + GSVector4i scissor; + ID3D10DepthStencilState* dss; + uint8 sref; + ID3D10BlendState* bs; + float bf; + ID3D10RenderTargetView* rtv; + ID3D10DepthStencilView* dsv; + } m_state; public: // TODO CComPtr m_rs; @@ -99,9 +96,7 @@ public: bool Reset(int w, int h, int mode); void Flip(bool limit); - void BeginScene(); void DrawPrimitive(); - void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); diff --git a/plugins/GSdx/GSDevice11.cpp b/plugins/GSdx/GSDevice11.cpp index 10fb8d546e..d85a8f05ed 100644 --- a/plugins/GSdx/GSDevice11.cpp +++ b/plugins/GSdx/GSDevice11.cpp @@ -25,31 +25,11 @@ #include "resource.h" GSDevice11::GSDevice11() - : m_vb(NULL) - , m_vb_stride(0) - , m_layout(NULL) - , m_topology(D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED) - , m_vs(NULL) - , m_vs_cb(NULL) - , m_gs(NULL) - , m_ps(NULL) - , m_ps_cb(NULL) - , m_scissor(0, 0, 0, 0) - , m_viewport(0, 0) - , m_dss(NULL) - , m_sref(0) - , m_bs(NULL) - , m_bf(-1) - , m_rtv(NULL) - , m_dsv(NULL) { - memset(m_ps_srv, 0, sizeof(m_ps_srv)); - memset(m_ps_ss, 0, sizeof(m_ps_ss)); + memset(&m_state, 0, sizeof(m_state)); - m_vertices.stride = 0; - m_vertices.start = 0; - m_vertices.count = 0; - m_vertices.limit = 0; + m_state.topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + m_state.bf = -1; } GSDevice11::~GSDevice11() @@ -257,27 +237,11 @@ void GSDevice11::Flip(bool limit) m_swapchain->Present(m_vsync && limit ? 1 : 0, 0); } -void GSDevice11::BeginScene() -{ -} - void GSDevice11::DrawPrimitive() { m_ctx->Draw(m_vertices.count, m_vertices.start); } -void GSDevice11::EndScene() -{ - //PSSetShaderResources(NULL, NULL); - - // not clearing the rt/ds gives a little fps boost in complex games (5-10%) - - // OMSetRenderTargets(NULL, NULL); - - m_vertices.start += m_vertices.count; - m_vertices.count = 0; -} - void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c) { m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v); @@ -482,6 +446,8 @@ void GSDevice11::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, // EndScene(); + + PSSetShaderResources(NULL, NULL); } void GSDevice11::DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c) @@ -524,14 +490,15 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c if(count * stride > m_vertices.limit * m_vertices.stride) { - m_vertices.vb_old = m_vertices.vb; - m_vertices.vb = NULL; + m_vb_old = m_vb; + m_vb = NULL; + m_vertices.start = 0; m_vertices.count = 0; m_vertices.limit = std::max(count * 3 / 2, 10000); } - if(m_vertices.vb == NULL) + if(m_vb == NULL) { D3D11_BUFFER_DESC bd; @@ -544,7 +511,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c HRESULT hr; - hr = m_dev->CreateBuffer(&bd, NULL, &m_vertices.vb); + hr = m_dev->CreateBuffer(&bd, NULL, &m_vb); if(FAILED(hr)) return; } @@ -560,25 +527,25 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c D3D11_MAPPED_SUBRESOURCE m; - if(SUCCEEDED(m_ctx->Map(m_vertices.vb, 0, type, 0, &m))) + if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m))) { GSVector4i::storent((uint8*)m.pData + m_vertices.start * stride, vertices, count * stride); - m_ctx->Unmap(m_vertices.vb, 0); + m_ctx->Unmap(m_vb, 0); } m_vertices.count = count; m_vertices.stride = stride; - IASetVertexBuffer(m_vertices.vb, stride); + IASetVertexBuffer(m_vb, stride); } void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride) { - if(m_vb != vb || m_vb_stride != stride) + if(m_state.vb != vb || m_state.vb_stride != stride) { - m_vb = vb; - m_vb_stride = stride; + m_state.vb = vb; + m_state.vb_stride = stride; uint32 offset = 0; @@ -588,9 +555,9 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride) void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) { - if(m_layout != layout) + if(m_state.layout != layout) { - m_layout = layout; + m_state.layout = layout; m_ctx->IASetInputLayout(layout); } @@ -598,9 +565,9 @@ void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology) { - if(m_topology != topology) + if(m_state.topology != topology) { - m_topology = topology; + m_state.topology = topology; m_ctx->IASetPrimitiveTopology(topology); } @@ -608,16 +575,16 @@ void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology) void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) { - if(m_vs != vs) + if(m_state.vs != vs) { - m_vs = vs; + m_state.vs = vs; m_ctx->VSSetShader(vs, NULL, 0); } - if(m_vs_cb != vs_cb) + if(m_state.vs_cb != vs_cb) { - m_vs_cb = vs_cb; + m_state.vs_cb = vs_cb; m_ctx->VSSetConstantBuffers(0, 1, &vs_cb); } @@ -625,11 +592,11 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) void GSDevice11::GSSetShader(ID3D11GeometryShader* gs) { - if(m_gs != gs) + if(m_state.gs != gs) { - m_ctx->GSSetShader(gs, NULL, 0); + m_state.gs = gs; - m_gs = gs; + m_ctx->GSSetShader(gs, NULL, 0); } } @@ -641,10 +608,10 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) if(sr0) srv0 = *(GSTexture11*)sr0; if(sr1) srv1 = *(GSTexture11*)sr1; - if(m_ps_srv[0] != srv0 || m_ps_srv[1] != srv1) + if(m_state.ps_srv[0] != srv0 || m_state.ps_srv[1] != srv1) { - m_ps_srv[0] = srv0; - m_ps_srv[1] = srv1; + m_state.ps_srv[0] = srv0; + m_state.ps_srv[1] = srv1; ID3D11ShaderResourceView* srvs[] = {srv0, srv1}; @@ -654,16 +621,16 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb) { - if(m_ps != ps) + if(m_state.ps != ps) { - m_ps = ps; + m_state.ps = ps; m_ctx->PSSetShader(ps, NULL, 0); } - if(m_ps_cb != ps_cb) + if(m_state.ps_cb != ps_cb) { - m_ps_cb = ps_cb; + m_state.ps_cb = ps_cb; m_ctx->PSSetConstantBuffers(0, 1, &ps_cb); } @@ -671,10 +638,10 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb) void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1) { - if(m_ps_ss[0] != ss0 || m_ps_ss[1] != ss1) + if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1) { - m_ps_ss[0] = ss0; - m_ps_ss[1] = ss1; + m_state.ps_ss[0] = ss0; + m_state.ps_ss[1] = ss1; ID3D11SamplerState* sss[] = {ss0, ss1}; @@ -684,25 +651,25 @@ void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref) { - if(m_dss != dss || m_sref != sref) + if(m_state.dss != dss || m_state.sref != sref) { - m_ctx->OMSetDepthStencilState(dss, sref); + m_state.dss = dss; + m_state.sref = sref; - m_dss = dss; - m_sref = sref; + m_ctx->OMSetDepthStencilState(dss, sref); } } void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, float bf) { - if(m_bs != bs || m_bf != bf) + if(m_state.bs != bs || m_state.bf != bf) { + m_state.bs = bs; + m_state.bf = bf; + float BlendFactor[] = {bf, bf, bf, 0}; m_ctx->OMSetBlendState(bs, BlendFactor, 0xffffffff); - - m_bs = bs; - m_bf = bf; } } @@ -714,17 +681,17 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector if(rt) rtv = *(GSTexture11*)rt; if(ds) dsv = *(GSTexture11*)ds; - if(m_rtv != rtv || m_dsv != dsv) + if(m_state.rtv != rtv || m_state.dsv != dsv) { - m_rtv = rtv; - m_dsv = dsv; + m_state.rtv = rtv; + m_state.dsv = dsv; m_ctx->OMSetRenderTargets(1, &rtv, dsv); } - if(m_viewport != rt->m_size) + if(m_state.viewport != rt->m_size) { - m_viewport = rt->m_size; + m_state.viewport = rt->m_size; D3D11_VIEWPORT vp; @@ -742,9 +709,9 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); - if(!m_scissor.eq(r)) + if(!m_state.scissor.eq(r)) { - m_scissor = r; + m_state.scissor = r; m_ctx->RSSetScissorRects(1, r); } diff --git a/plugins/GSdx/GSDevice11.h b/plugins/GSdx/GSDevice11.h index 523e7ba301..e87187de31 100644 --- a/plugins/GSdx/GSDevice11.h +++ b/plugins/GSdx/GSDevice11.h @@ -26,28 +26,6 @@ class GSDevice11 : public GSDevice { - ID3D11Buffer* m_vb; - size_t m_vb_stride; - ID3D11InputLayout* m_layout; - D3D11_PRIMITIVE_TOPOLOGY m_topology; - ID3D11VertexShader* m_vs; - ID3D11Buffer* m_vs_cb; - ID3D11GeometryShader* m_gs; - ID3D11ShaderResourceView* m_ps_srv[2]; - ID3D11PixelShader* m_ps; - ID3D11Buffer* m_ps_cb; - ID3D11SamplerState* m_ps_ss[2]; - GSVector2i m_viewport; - GSVector4i m_scissor; - ID3D11DepthStencilState* m_dss; - uint8 m_sref; - ID3D11BlendState* m_bs; - float m_bf; - ID3D11RenderTargetView* m_rtv; - ID3D11DepthStencilView* m_dsv; - - // - GSTexture* Create(int type, int w, int h, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); @@ -58,12 +36,31 @@ class GSDevice11 : public GSDevice CComPtr m_dev; CComPtr m_ctx; CComPtr m_swapchain; + CComPtr m_vb; + CComPtr m_vb_old; struct { - CComPtr vb, vb_old; - size_t stride, start, count, limit; - } m_vertices; + ID3D11Buffer* vb; + size_t vb_stride; + ID3D11InputLayout* layout; + D3D11_PRIMITIVE_TOPOLOGY topology; + ID3D11VertexShader* vs; + ID3D11Buffer* vs_cb; + ID3D11GeometryShader* gs; + ID3D11ShaderResourceView* ps_srv[2]; + ID3D11PixelShader* ps; + ID3D11Buffer* ps_cb; + ID3D11SamplerState* ps_ss[2]; + GSVector2i viewport; + GSVector4i scissor; + ID3D11DepthStencilState* dss; + uint8 sref; + ID3D11BlendState* bs; + float bf; + ID3D11RenderTargetView* rtv; + ID3D11DepthStencilView* dsv; + } m_state; public: // TODO CComPtr m_rs; @@ -100,9 +97,7 @@ public: bool Reset(int w, int h, int mode); void Flip(bool limit); - void BeginScene(); void DrawPrimitive(); - void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index 5971220633..9d5f7a919c 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -25,42 +25,23 @@ #include "resource.h" GSDevice9::GSDevice9() - : m_vb(NULL) - , m_vb_stride(0) - , m_layout(NULL) - , m_topology((D3DPRIMITIVETYPE)0) - , m_vs(NULL) - , m_vs_cb(NULL) - , m_vs_cb_len(0) - , m_ps(NULL) - , m_ps_cb(NULL) - , m_ps_cb_len(0) - , m_ps_ss(NULL) - , m_scissor(0, 0, 0, 0) - , m_dss(NULL) - , m_bs(NULL) - , m_bf(0xffffffff) - , m_rtv(NULL) - , m_dsv(NULL) - , m_lost(false) + : m_lost(false) { m_rbswapped = true; memset(&m_pp, 0, sizeof(m_pp)); memset(&m_ddcaps, 0, sizeof(m_ddcaps)); memset(&m_d3dcaps, 0, sizeof(m_d3dcaps)); - memset(m_ps_srvs, 0, sizeof(m_ps_srvs)); - m_vertices.stride = 0; - m_vertices.start = 0; - m_vertices.count = 0; - m_vertices.limit = 0; + memset(&m_state, 0, sizeof(m_state)); + + m_state.bf = 0xffffffff; } GSDevice9::~GSDevice9() { - if(m_vs_cb) _aligned_free(m_vs_cb); - if(m_ps_cb) _aligned_free(m_ps_cb); + if(m_state.vs_cb) _aligned_free(m_state.vs_cb); + if(m_state.ps_cb) _aligned_free(m_state.ps_cb); } bool GSDevice9::Create(GSWnd* wnd, bool vsync) @@ -112,9 +93,14 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync) if(m_d3dcaps.VertexShaderVersion < (m_d3dcaps.PixelShaderVersion & ~0x10000)) { - ASSERT(0); + if(m_d3dcaps.VertexShaderVersion > D3DVS_VERSION(0, 0)) + { + ASSERT(0); - return false; + return false; + } + + // else vertex shader should be emulated in software (gma950) } m_d3dcaps.VertexShaderVersion = m_d3dcaps.PixelShaderVersion & ~0x10000; @@ -246,30 +232,18 @@ bool GSDevice9::Reset(int w, int h, int mode) m_swapchain = NULL; - m_vertices.vb = NULL; - m_vertices.vb_old = NULL; + m_vb = NULL; + m_vb_old = NULL; + m_vertices.start = 0; m_vertices.count = 0; - if(m_vs_cb) _aligned_free(m_vs_cb); - if(m_ps_cb) _aligned_free(m_ps_cb); + if(m_state.vs_cb) _aligned_free(m_state.vs_cb); + if(m_state.ps_cb) _aligned_free(m_state.ps_cb); - m_vb = NULL; - m_vb_stride = 0; - m_layout = NULL; - m_vs = NULL; - m_vs_cb = NULL; - m_vs_cb_len = 0; - m_ps = NULL; - m_ps_cb = NULL; - m_ps_cb_len = 0; - m_ps_ss = NULL; - m_scissor = GSVector4i::zero(); - m_dss = NULL; - m_bs = NULL; - m_bf = 0xffffffff; - m_rtv = NULL; - m_dsv = NULL; + memset(&m_state, 0, sizeof(m_state)); + + m_state.bf = 0xffffffff; memset(&m_pp, 0, sizeof(m_pp)); @@ -403,7 +377,7 @@ void GSDevice9::DrawPrimitive() { int prims = 0; - switch(m_topology) + switch(m_state.topology) { case D3DPT_TRIANGLELIST: prims = m_vertices.count / 3; @@ -423,15 +397,14 @@ void GSDevice9::DrawPrimitive() break; } - m_dev->DrawPrimitive(m_topology, m_vertices.start, prims); + m_dev->DrawPrimitive(m_state.topology, m_vertices.start, prims); } void GSDevice9::EndScene() { // m_dev->EndScene(); - m_vertices.start += m_vertices.count; - m_vertices.count = 0; + __super::EndScene(); } void GSDevice9::ClearRenderTarget(GSTexture* t, const GSVector4& c) @@ -707,18 +680,19 @@ void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t co if(count * stride > m_vertices.limit * m_vertices.stride) { - m_vertices.vb_old = m_vertices.vb; - m_vertices.vb = NULL; + m_vb_old = m_vb; + m_vb = NULL; + m_vertices.start = 0; m_vertices.count = 0; m_vertices.limit = std::max(count * 3 / 2, 10000); } - if(m_vertices.vb == NULL) + if(m_vb == NULL) { HRESULT hr; - hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vertices.vb, NULL); + hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL); if(FAILED(hr)) return; } @@ -734,25 +708,25 @@ void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t co void* v = NULL; - if(SUCCEEDED(m_vertices.vb->Lock(m_vertices.start * stride, count * stride, &v, flags))) + if(SUCCEEDED(m_vb->Lock(m_vertices.start * stride, count * stride, &v, flags))) { GSVector4i::storent(v, vertices, count * stride); - m_vertices.vb->Unlock(); + m_vb->Unlock(); } m_vertices.count = count; m_vertices.stride = stride; - IASetVertexBuffer(m_vertices.vb, stride); + IASetVertexBuffer(m_vb, stride); } void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) { - if(m_vb != vb || m_vb_stride != stride) + if(m_state.vb != vb || m_state.vb_stride != stride) { - m_vb = vb; - m_vb_stride = stride; + m_state.vb = vb; + m_state.vb_stride = stride; m_dev->SetStreamSource(0, vb, 0, stride); } @@ -760,9 +734,9 @@ void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout) { - if(m_layout != layout) + if(m_state.layout != layout) { - m_layout = layout; + m_state.layout = layout; m_dev->SetVertexDeclaration(layout); } @@ -770,14 +744,14 @@ void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout) void GSDevice9::IASetPrimitiveTopology(D3DPRIMITIVETYPE topology) { - m_topology = topology; + m_state.topology = topology; } void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len) { - if(m_vs != vs) + if(m_state.vs != vs) { - m_vs = vs; + m_state.vs = vs; m_dev->SetVertexShader(vs); } @@ -786,18 +760,18 @@ void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int { int size = vs_cb_len * sizeof(float) * 4; - if(m_vs_cb_len != vs_cb_len || m_vs_cb == NULL || memcmp(m_vs_cb, vs_cb, size)) + if(m_state.vs_cb_len != vs_cb_len || m_state.vs_cb == NULL || memcmp(m_state.vs_cb, vs_cb, size)) { - if(m_vs_cb == NULL || m_vs_cb_len < vs_cb_len) + if(m_state.vs_cb == NULL || m_state.vs_cb_len < vs_cb_len) { - if(m_vs_cb) _aligned_free(m_vs_cb); + if(m_state.vs_cb) _aligned_free(m_state.vs_cb); - m_vs_cb = (float*)_aligned_malloc(size, 16); + m_state.vs_cb = (float*)_aligned_malloc(size, 16); } - m_vs_cb_len = vs_cb_len; + m_state.vs_cb_len = vs_cb_len; - memcpy(m_vs_cb, vs_cb, size); + memcpy(m_state.vs_cb, vs_cb, size); m_dev->SetVertexShaderConstantF(0, vs_cb, vs_cb_len); } @@ -812,16 +786,16 @@ void GSDevice9::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) if(sr0) srv0 = *(GSTexture9*)sr0; if(sr1) srv1 = *(GSTexture9*)sr1; - if(m_ps_srvs[0] != srv0) + if(m_state.ps_srvs[0] != srv0) { - m_ps_srvs[0] = srv0; + m_state.ps_srvs[0] = srv0; m_dev->SetTexture(0, srv0); } - if(m_ps_srvs[1] != srv1) + if(m_state.ps_srvs[1] != srv1) { - m_ps_srvs[1] = srv1; + m_state.ps_srvs[1] = srv1; m_dev->SetTexture(1, srv1); } @@ -829,9 +803,9 @@ void GSDevice9::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len) { - if(m_ps != ps) + if(m_state.ps != ps) { - m_ps = ps; + m_state.ps = ps; m_dev->SetPixelShader(ps); } @@ -840,18 +814,18 @@ void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int p { int size = ps_cb_len * sizeof(float) * 4; - if(m_ps_cb_len != ps_cb_len || m_ps_cb == NULL || memcmp(m_ps_cb, ps_cb, size)) + if(m_state.ps_cb_len != ps_cb_len || m_state.ps_cb == NULL || memcmp(m_state.ps_cb, ps_cb, size)) { - if(m_ps_cb == NULL || m_ps_cb_len < ps_cb_len) + if(m_state.ps_cb == NULL || m_state.ps_cb_len < ps_cb_len) { - if(m_ps_cb) _aligned_free(m_ps_cb); + if(m_state.ps_cb) _aligned_free(m_state.ps_cb); - m_ps_cb = (float*)_aligned_malloc(size, 16); + m_state.ps_cb = (float*)_aligned_malloc(size, 16); } - m_ps_cb_len = ps_cb_len; + m_state.ps_cb_len = ps_cb_len; - memcpy(m_ps_cb, ps_cb, size); + memcpy(m_state.ps_cb, ps_cb, size); m_dev->SetPixelShaderConstantF(0, ps_cb, ps_cb_len); } @@ -860,9 +834,9 @@ void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int p void GSDevice9::PSSetSamplerState(Direct3DSamplerState9* ss) { - if(ss && m_ps_ss != ss) + if(ss && m_state.ps_ss != ss) { - m_ps_ss = ss; + m_state.ps_ss = ss; m_dev->SetSamplerState(0, D3DSAMP_ADDRESSU, ss->AddressU); m_dev->SetSamplerState(0, D3DSAMP_ADDRESSV, ss->AddressV); @@ -885,9 +859,9 @@ void GSDevice9::PSSetSamplerState(Direct3DSamplerState9* ss) void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss) { - if(m_dss != dss) + if(m_state.dss != dss) { - m_dss = dss; + m_state.dss = dss; m_dev->SetRenderState(D3DRS_ZENABLE, dss->DepthEnable); m_dev->SetRenderState(D3DRS_ZWRITEENABLE, dss->DepthWriteMask); @@ -914,10 +888,10 @@ void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss) void GSDevice9::OMSetBlendState(Direct3DBlendState9* bs, uint32 bf) { - if(m_bs != bs || m_bf != bf) + if(m_state.bs != bs || m_state.bf != bf) { - m_bs = bs; - m_bf = bf; + m_state.bs = bs; + m_state.bf = bf; m_dev->SetRenderState(D3DRS_ALPHABLENDENABLE, bs->BlendEnable); @@ -945,25 +919,25 @@ void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4 if(rt) rtv = *(GSTexture9*)rt; if(ds) dsv = *(GSTexture9*)ds; - if(m_rtv != rtv) + if(m_state.rtv != rtv) { - m_rtv = rtv; + m_state.rtv = rtv; m_dev->SetRenderTarget(0, rtv); } - if(m_dsv != dsv) + if(m_state.dsv != dsv) { - m_dsv = dsv; + m_state.dsv = dsv; m_dev->SetDepthStencilSurface(dsv); } GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); - if(!m_scissor.eq(r)) + if(!m_state.scissor.eq(r)) { - m_scissor = r; + m_state.scissor = r; m_dev->SetScissorRect(r); } diff --git a/plugins/GSdx/GSDevice9.h b/plugins/GSdx/GSDevice9.h index bee59f70ea..8069f96198 100644 --- a/plugins/GSdx/GSDevice9.h +++ b/plugins/GSdx/GSDevice9.h @@ -61,28 +61,6 @@ struct Direct3DBlendState9 class GSDevice9 : public GSDevice { -private: - IDirect3DVertexBuffer9* m_vb; - size_t m_vb_stride; - IDirect3DVertexDeclaration9* m_layout; - D3DPRIMITIVETYPE m_topology; - IDirect3DVertexShader9* m_vs; - float* m_vs_cb; - int m_vs_cb_len; - IDirect3DTexture9* m_ps_srvs[2]; - IDirect3DPixelShader9* m_ps; - float* m_ps_cb; - int m_ps_cb_len; - Direct3DSamplerState9* m_ps_ss; - GSVector4i m_scissor; - Direct3DDepthStencilState9* m_dss; - Direct3DBlendState9* m_bs; - uint32 m_bf; - IDirect3DSurface9* m_rtv; - IDirect3DSurface9* m_dsv; - - // - GSTexture* Create(int type, int w, int h, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); @@ -96,13 +74,31 @@ private: CComPtr m_d3d; CComPtr m_dev; CComPtr m_swapchain; + CComPtr m_vb; + CComPtr m_vb_old; bool m_lost; struct { - CComPtr vb, vb_old; - size_t stride, start, count, limit; - } m_vertices; + IDirect3DVertexBuffer9* vb; + size_t vb_stride; + IDirect3DVertexDeclaration9* layout; + D3DPRIMITIVETYPE topology; + IDirect3DVertexShader9* vs; + float* vs_cb; + int vs_cb_len; + IDirect3DTexture9* ps_srvs[2]; + IDirect3DPixelShader9* ps; + float* ps_cb; + int ps_cb_len; + Direct3DSamplerState9* ps_ss; + GSVector4i scissor; + Direct3DDepthStencilState9* dss; + Direct3DBlendState9* bs; + uint32 bf; + IDirect3DSurface9* rtv; + IDirect3DSurface9* dsv; + } m_state; public: // TODO diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 26c6e70139..784e201471 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -203,21 +203,11 @@ void GSDeviceOGL::Flip(bool limit) #endif } -void GSDeviceOGL::BeginScene() -{ -} - void GSDeviceOGL::DrawPrimitive() { glDrawArrays(m_topology, m_vertices.count, m_vertices.start); CheckError(); } -void GSDeviceOGL::EndScene() -{ - m_vertices.start += m_vertices.count; - m_vertices.count = 0; -} - void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) { GLuint texture = *(GSTextureOGL*)t; diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 10d58a9138..3d3c375258 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -114,9 +114,7 @@ public: void Present(const GSVector4i& r, int shader, bool limit); void Flip(bool limit); - void BeginScene(); void DrawPrimitive(); - void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 41cc49b4db..5bdd74f0bf 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -495,7 +495,7 @@ protected: GSTexture* t = NULL; - if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true, true)) + if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height)) { t = rt->m_texture; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 9c70530e9c..d9a254e9c0 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -106,21 +106,12 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con if(src == NULL) { - src = CreateSource(); + src = CreateSource(TEX0, TEXA, dst); - if(!(dst ? src->Create(dst) : src->Create(m_paltex))) + if(src == NULL) { - delete src; - return NULL; } - - if(psm.pal > 0) - { - memcpy(src->m_clut, clut, psm.pal * sizeof(clut[0])); - } - - m_src.Add(src, TEX0, m_renderer->m_context->offset.tex); } if(psm.pal > 0) @@ -144,7 +135,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con return src; } -GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb) +GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used) { uint32 bp = TEX0.TBP0; @@ -160,41 +151,20 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst = t; - if(!fb) dst->m_TEX0 = TEX0; + dst->m_TEX0 = TEX0; break; } } - if(dst == NULL && fb) - { - // HACK: try to find something close to the base pointer - - for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) - { - Target* t = *i; - - if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0)) - { - dst = t; - } - } - } - if(dst == NULL) { - dst = CreateTarget(); + dst = CreateTarget(TEX0, w, h, type); - dst->m_TEX0 = TEX0; - - if(!dst->Create(w, h, type)) + if(dst == NULL) { - delete dst; - return NULL; } - - m_dst[type].push_front(dst); } else { @@ -212,12 +182,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int { hh *= 2; } -/* - if(hh < 512) + + if(hh < 512 && m_renderer->m_context->SCISSOR.SCAY1 == 511) // vp2 { hh = 512; } -*/ + if(ww > 0 && hh > 0) { dst->m_texture->m_scale.x = (float)w / ww; @@ -233,6 +203,52 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int return dst; } +GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h) +{ + uint32 bp = TEX0.TBP0; + + Target* dst = NULL; + + for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) + { + Target* t = *i; + + if(bp == t->m_TEX0.TBP0) + { + dst = t; + + break; + } + else + { + // HACK: try to find something close to the base pointer + + if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0)) + { + dst = t; + } + } + } + + if(dst == NULL) + { + dst = CreateTarget(TEX0, w, h, RenderTarget); + + if(dst == NULL) + { + return NULL; + } + } + else + { + dst->Update(); + } + + dst->m_used = true; + + return dst; +} + void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect, bool target) { uint32 bp = o->bp; @@ -368,12 +384,7 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset* o, const GSVector4i& r) { if(GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) { - GSVector4i r2 = r.rintersect(t->m_valid); - - if(!r2.rempty()) - { - t->Read(r2); - } + Read(t, r.rintersect(t->m_valid)); return; } @@ -381,12 +392,7 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset* o, const GSVector4i& r) { // ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit - GSVector4i r2 = GSVector4i(r.left, r.top, r.right, r.top + (r.bottom - r.top) * 2).rintersect(t->m_valid); - - if(!r2.rempty()) - { - t->Read(r2); - } + Read(t, GSVector4i(r.left, r.top, r.right, r.top + (r.bottom - r.top) * 2).rintersect(t->m_valid)); return; } @@ -477,6 +483,241 @@ void GSTextureCache::IncAge() } } +GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst) +{ + Source* src = new Source(m_renderer); + + src->m_TEX0 = TEX0; + src->m_TEXA = TEXA; + + int tw = 1 << TEX0.TW; + int th = 1 << TEX0.TH; + int tp = (int)TEX0.TW << 6; + + if(dst == NULL) + { + if(m_paltex && GSLocalMemory::m_psm[TEX0.PSM].pal > 0) + { + src->m_fmt = GSTextureFX::FMT_8; + + src->m_texture = m_renderer->m_dev->CreateTexture(tw, th, Get8bitFormat()); + src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); + } + else + { + src->m_fmt = GSTextureFX::FMT_32; + + src->m_texture = m_renderer->m_dev->CreateTexture(tw, th); + } + } + else + { + // TODO: clean up this mess + + src->m_target = true; + + if(dst->m_type != RenderTarget) + { + // TODO + + delete src; + + return NULL; + } + + dst->Update(); + + // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) + + int w = (int)(dst->m_texture->m_scale.x * tw); + int h = (int)(dst->m_texture->m_scale.y * th); + + GSVector2i dstsize = dst->m_texture->GetSize(); + + // pitch conversion + + if(dst->m_TEX0.TBW != TEX0.TBW) // && dst->m_TEX0.PSM == TEX0.PSM + { + // sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left) + + // ASSERT(dst->m_TEX0.TBW > TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO) + + src->m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y); + + GSVector4 size = GSVector4(dstsize).xyxy(); + GSVector4 scale = GSVector4(dst->m_texture->m_scale).xyxy(); + + int bw = 64; + int bh = TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24 ? 32 : 64; + + GSVector4i br(0, 0, bw, bh); + + int sw = (int)dst->m_TEX0.TBW << 6; + + int dw = (int)TEX0.TBW << 6; + int dh = 1 << TEX0.TH; + + if(sw != 0) + for(int dy = 0; dy < dh; dy += bh) + { + for(int dx = 0; dx < dw; dx += bw) + { + int o = dy * dw / bh + dx; + + int sx = o % sw; + int sy = o / sw; + + GSVector4 sr = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size; + GSVector4 dr = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale; + + m_renderer->m_dev->StretchRect(dst->m_texture, sr, src->m_texture, dr); + + // TODO: this is quite a lot of StretchRect, do it with one Draw + } + } + } + else if(tw < tp) + { + // FIXME: timesplitters blurs the render target by blending itself over a couple of times + + if(tw == 256 && th == 128 && tp == 512 && (TEX0.TBP0 == 0 || TEX0.TBP0 == 0x00e00)) + { + return false; + } + } + + // width/height conversion + + GSVector2 scale = dst->m_texture->m_scale; + + GSVector4 dr(0, 0, w, h); + + if(w > dstsize.x) + { + scale.x = (float)dstsize.x / tw; + dr.z = (float)dstsize.x * scale.x / dst->m_texture->m_scale.x; + w = dstsize.x; + } + + if(h > dstsize.y) + { + scale.y = (float)dstsize.y / th; + dr.w = (float)dstsize.y * scale.y / dst->m_texture->m_scale.y; + h = dstsize.y; + } + + GSVector4 sr(0, 0, w, h); + + GSTexture* st = src->m_texture ? src->m_texture : dst->m_texture; + GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h); + + if(!src->m_texture) + { + src->m_texture = dt; + } + + if((sr == dr).alltrue()) + { + m_renderer->m_dev->CopyRect(st, dt, GSVector4i(0, 0, w, h)); + } + else + { + sr.z /= st->m_size.x; + sr.w /= st->m_size.y; + + m_renderer->m_dev->StretchRect(st, sr, dt, dr); + } + + if(dt != src->m_texture) + { + m_renderer->m_dev->Recycle(src->m_texture); + + src->m_texture = dt; + } + + src->m_texture->m_scale = scale; + + switch(TEX0.PSM) + { + default: + ASSERT(0); + case PSM_PSMCT32: + src->m_fmt = GSTextureFX::FMT_32; + break; + case PSM_PSMCT24: + src->m_fmt = GSTextureFX::FMT_24; + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + src->m_fmt = GSTextureFX::FMT_16; + break; + case PSM_PSMT8H: + src->m_fmt = GSTextureFX::FMT_8H; + src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); + break; + case PSM_PSMT4HL: + src->m_fmt = GSTextureFX::FMT_4HL; + src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); + break; + case PSM_PSMT4HH: + src->m_fmt = GSTextureFX::FMT_4HH; + src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); + break; + } + } + + if(src->m_texture == NULL) + { + ASSERT(0); + + return NULL; + } + + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; + + if(psm.pal > 0) + { + memcpy(src->m_clut, (const uint32*)m_renderer->m_mem.m_clut, psm.pal * sizeof(uint32)); + } + + m_src.Add(src, TEX0, m_renderer->m_context->offset.tex); + + return src; +} + +GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type) +{ + Target* t = new Target(m_renderer); + + t->m_TEX0 = TEX0; + + // FIXME: initial data should be unswizzled from local mem in Update() if dirty + + t->m_type = type; + + if(type == RenderTarget) + { + t->m_texture = m_renderer->m_dev->CreateRenderTarget(w, h); + + t->m_used = true; // FIXME + } + else if(type == DepthStencil) + { + t->m_texture = m_renderer->m_dev->CreateDepthStencil(w, h); + } + + if(t->m_texture == NULL) + { + ASSERT(0); + + return NULL; + } + + m_dst[type].push_front(t); + + return t; +} + // GSTextureCache::Surface GSTextureCache::Surface::Surface(GSRenderer* r) @@ -526,197 +767,6 @@ GSTextureCache::Source::~Source() _aligned_free(m_write.rect); } -bool GSTextureCache::Source::Create(bool paltex) -{ - m_TEX0 = m_renderer->m_context->TEX0; - m_TEXA = m_renderer->m_env.TEXA; - - ASSERT(m_texture == NULL); - - if(paltex && GSLocalMemory::m_psm[m_TEX0.PSM].pal > 0) - { - m_fmt = GSTextureFX::FMT_8; - - m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH, Get8bitFormat()); - m_palette = m_renderer->m_dev->CreateTexture(256, 1); - } - else - { - m_fmt = GSTextureFX::FMT_32; - - m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH); - } - - return m_texture != NULL; -} - -bool GSTextureCache::Source::Create(Target* dst) -{ - m_target = true; - - if(dst->m_type != RenderTarget) - { - // TODO - - return false; - } - - // TODO: clean up this mess - - dst->Update(); - - // m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1); - - m_TEX0 = m_renderer->m_context->TEX0; - m_TEXA = m_renderer->m_env.TEXA; - - int tw = 1 << m_TEX0.TW; - int th = 1 << m_TEX0.TH; - int tp = (int)m_TEX0.TW << 6; - - // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) - - int w = (int)(dst->m_texture->m_scale.x * tw); - int h = (int)(dst->m_texture->m_scale.y * th); - - GSVector2i dstsize = dst->m_texture->GetSize(); - - // pitch conversion - - if(dst->m_TEX0.TBW != m_TEX0.TBW) // && dst->m_TEX0.PSM == m_TEX0.PSM - { - // sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left) - - // ASSERT(dst->m_TEX0.TBW > m_TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO) - - ASSERT(m_texture == NULL); - - m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y); - - GSVector4 size = GSVector4(dstsize).xyxy(); - GSVector4 scale = GSVector4(dst->m_texture->m_scale).xyxy(); - - int bw = 64; - int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64; - - GSVector4i br(0, 0, bw, bh); - - int sw = (int)dst->m_TEX0.TBW << 6; - - int dw = (int)m_TEX0.TBW << 6; - int dh = 1 << m_TEX0.TH; - - if(sw != 0) - for(int dy = 0; dy < dh; dy += bh) - { - for(int dx = 0; dx < dw; dx += bw) - { - int o = dy * dw / bh + dx; - - int sx = o % sw; - int sy = o / sw; - - GSVector4 sr = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size; - GSVector4 dr = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale; - - m_renderer->m_dev->StretchRect(dst->m_texture, sr, m_texture, dr); - - // TODO: this is quite a lot of StretchRect, do it with one Draw - } - } - } - else if(tw < tp) - { - // FIXME: timesplitters blurs the render target by blending itself over a couple of times - - if(tw == 256 && th == 128 && tp == 512 && (m_TEX0.TBP0 == 0 || m_TEX0.TBP0 == 0x00e00)) - { - return false; - } - } - - // width/height conversion - - GSVector2 scale = dst->m_texture->m_scale; - - GSVector4 dr(0, 0, w, h); - - if(w > dstsize.x) - { - scale.x = (float)dstsize.x / tw; - dr.z = (float)dstsize.x * scale.x / dst->m_texture->m_scale.x; - w = dstsize.x; - } - - if(h > dstsize.y) - { - scale.y = (float)dstsize.y / th; - dr.w = (float)dstsize.y * scale.y / dst->m_texture->m_scale.y; - h = dstsize.y; - } - - GSVector4 sr(0, 0, w, h); - - GSTexture* st = m_texture ? m_texture : dst->m_texture; - GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h); - - if(!m_texture) - { - m_texture = dt; - } - - if((sr == dr).alltrue()) - { - m_renderer->m_dev->CopyRect(st, dt, GSVector4i(0, 0, w, h)); - } - else - { - sr.z /= st->m_size.x; - sr.w /= st->m_size.y; - - m_renderer->m_dev->StretchRect(st, sr, dt, dr); - } - - if(dt != m_texture) - { - m_renderer->m_dev->Recycle(m_texture); - - m_texture = dt; - } - - m_texture->m_scale = scale; - - switch(m_TEX0.PSM) - { - default: - ASSERT(0); - case PSM_PSMCT32: - m_fmt = GSTextureFX::FMT_32; - break; - case PSM_PSMCT24: - m_fmt = GSTextureFX::FMT_24; - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - m_fmt = GSTextureFX::FMT_16; - break; - case PSM_PSMT8H: - m_fmt = GSTextureFX::FMT_8H; - m_palette = m_renderer->m_dev->CreateTexture(256, 1); - break; - case PSM_PSMT4HL: - m_fmt = GSTextureFX::FMT_4HL; - m_palette = m_renderer->m_dev->CreateTexture(256, 1); - break; - case PSM_PSMT4HH: - m_fmt = GSTextureFX::FMT_4HH; - m_palette = m_renderer->m_dev->CreateTexture(256, 1); - break; - } - - return true; -} - void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect) { __super::Update(); @@ -912,28 +962,6 @@ GSTextureCache::Target::Target(GSRenderer* r) m_valid = GSVector4i::zero(); } -bool GSTextureCache::Target::Create(int w, int h, int type) -{ - ASSERT(m_texture == NULL); - - // FIXME: initial data should be unswizzled from local mem in Update() if dirty - - m_type = type; - - if(type == RenderTarget) - { - m_texture = m_renderer->m_dev->CreateRenderTarget(w, h); - - m_used = true; - } - else if(type == DepthStencil) - { - m_texture = m_renderer->m_dev->CreateDepthStencil(w, h); - } - - return m_texture != NULL; -} - void GSTextureCache::Target::Update() { __super::Update(); diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index af407edcaf..0b8af03050 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -46,8 +46,6 @@ public: virtual void Update(); }; - class Target; - class Source : public Surface { struct {GSVector4i* rect; uint32 count;} m_write; @@ -55,9 +53,6 @@ public: void Write(const GSVector4i& r); void Flush(uint32 count); - protected: - virtual int Get8bitFormat() = 0; - public: GSTexture* m_palette; bool m_initpalette; @@ -68,11 +63,9 @@ public: bool m_complete; public: - explicit Source(GSRenderer* renderer); + explicit Source(GSRenderer* r); virtual ~Source(); - virtual bool Create(bool paltex); - virtual bool Create(Target* dst); virtual void Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect); }; @@ -87,9 +80,7 @@ public: public: explicit Target(GSRenderer* r); - virtual bool Create(int w, int h, int type); virtual void Update(); - virtual void Read(const GSVector4i& r) = 0; }; protected: @@ -113,8 +104,14 @@ protected: list m_dst[2]; - virtual Source* CreateSource() = 0; - virtual Target* CreateTarget() = 0; + virtual Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL); + virtual Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type); + + virtual int Get8bitFormat() = 0; + + // TODO: virtual void Write(Source* s, const GSVector4i& r) = 0; + // TODO: virtual void Write(Target* t, const GSVector4i& r) = 0; + virtual void Read(Target* t, const GSVector4i& r) = 0; public: GSTextureCache(GSRenderer* r); @@ -123,7 +120,8 @@ public: void RemoveAll(); Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); - Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb = false); + Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used); + Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h); void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r, bool target = true); void InvalidateLocalMem(const GSOffset* o, const GSVector4i& r); diff --git a/plugins/GSdx/GSTextureCache10.cpp b/plugins/GSdx/GSTextureCache10.cpp index b4819a17e8..4e02c95e84 100644 --- a/plugins/GSdx/GSTextureCache10.cpp +++ b/plugins/GSdx/GSTextureCache10.cpp @@ -29,46 +29,42 @@ GSTextureCache10::GSTextureCache10(GSRenderer* r) { } -// Source10 - -// Target10 - -void GSTextureCache10::Target10::Read(const GSVector4i& r) +void GSTextureCache10::Read(Target* t, const GSVector4i& r) { - if(m_type != RenderTarget) + if(t->m_type != RenderTarget) { // TODO return; } - if(m_TEX0.PSM != PSM_PSMCT32 - && m_TEX0.PSM != PSM_PSMCT24 - && m_TEX0.PSM != PSM_PSMCT16 - && m_TEX0.PSM != PSM_PSMCT16S) + const GIFRegTEX0& TEX0 = t->m_TEX0; + + if(TEX0.PSM != PSM_PSMCT32 + && TEX0.PSM != PSM_PSMCT24 + && TEX0.PSM != PSM_PSMCT16 + && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } - if(!m_dirty.empty()) + if(!t->m_dirty.empty()) { return; } - // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, m_TEX0.TBP0); - - // m_renderer->m_perfmon.Put(GSPerfMon::ReadRT, 1); + // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(m_texture->m_scale).xyxy() / GSVector4(m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->m_scale).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; + DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(m_texture, src, w, h, format)) + if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format)) { GSTexture::GSMap m; @@ -76,9 +72,9 @@ void GSTextureCache10::Target10::Read(const GSVector4i& r) { // TODO: block level write - GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); + GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - switch(m_TEX0.PSM) + switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r); diff --git a/plugins/GSdx/GSTextureCache10.h b/plugins/GSdx/GSTextureCache10.h index 89ae14f438..199494ad7d 100644 --- a/plugins/GSdx/GSTextureCache10.h +++ b/plugins/GSdx/GSTextureCache10.h @@ -26,26 +26,10 @@ class GSTextureCache10 : public GSTextureCache { - class Source10 : public Source - { - protected: - int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} - - public: - explicit Source10(GSRenderer* r) : Source(r) {} - }; - - class Target10 : public Target - { - public: - explicit Target10(GSRenderer* r) : Target(r) {} - - void Read(const GSVector4i& r); - }; - protected: - Source* CreateSource() {return new Source10(m_renderer);} - Target* CreateTarget() {return new Target10(m_renderer);} + int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} + + void Read(Target* t, const GSVector4i& r); public: GSTextureCache10(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureCache11.cpp b/plugins/GSdx/GSTextureCache11.cpp index 99e1e70ff8..0af2c06da7 100644 --- a/plugins/GSdx/GSTextureCache11.cpp +++ b/plugins/GSdx/GSTextureCache11.cpp @@ -29,46 +29,42 @@ GSTextureCache11::GSTextureCache11(GSRenderer* r) { } -// Source11 - -// Target11 - -void GSTextureCache11::Target11::Read(const GSVector4i& r) +void GSTextureCache11::Read(Target* t, const GSVector4i& r) { - if(m_type != RenderTarget) + if(t->m_type != RenderTarget) { // TODO return; } - if(m_TEX0.PSM != PSM_PSMCT32 - && m_TEX0.PSM != PSM_PSMCT24 - && m_TEX0.PSM != PSM_PSMCT16 - && m_TEX0.PSM != PSM_PSMCT16S) + const GIFRegTEX0& TEX0 = t->m_TEX0; + + if(TEX0.PSM != PSM_PSMCT32 + && TEX0.PSM != PSM_PSMCT24 + && TEX0.PSM != PSM_PSMCT16 + && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } - if(!m_dirty.empty()) + if(!t->m_dirty.empty()) { return; } - // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, m_TEX0.TBP0); - - // m_renderer->m_perfmon.Put(GSPerfMon::ReadRT, 1); + // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(m_texture->m_scale).xyxy() / GSVector4(m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->m_scale).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; + DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(m_texture, src, w, h, format)) + if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format)) { GSTexture::GSMap m; @@ -76,9 +72,9 @@ void GSTextureCache11::Target11::Read(const GSVector4i& r) { // TODO: block level write - GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); + GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - switch(m_TEX0.PSM) + switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r); diff --git a/plugins/GSdx/GSTextureCache11.h b/plugins/GSdx/GSTextureCache11.h index 4f86f98221..f4da837984 100644 --- a/plugins/GSdx/GSTextureCache11.h +++ b/plugins/GSdx/GSTextureCache11.h @@ -26,26 +26,10 @@ class GSTextureCache11 : public GSTextureCache { - class Source11 : public Source - { - protected: - int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} - - public: - explicit Source11(GSRenderer* r) : Source(r) {} - }; - - class Target11 : public Target - { - public: - explicit Target11(GSRenderer* r) : Target(r) {} - - void Read(const GSVector4i& r); - }; - protected: - Source* CreateSource() {return new Source11(m_renderer);} - Target* CreateTarget() {return new Target11(m_renderer);} + int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} + + void Read(Target* t, const GSVector4i& r); public: GSTextureCache11(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureCache9.cpp b/plugins/GSdx/GSTextureCache9.cpp index ad9f2ddd18..f7b8c14c0e 100644 --- a/plugins/GSdx/GSTextureCache9.cpp +++ b/plugins/GSdx/GSTextureCache9.cpp @@ -29,44 +29,40 @@ GSTextureCache9::GSTextureCache9(GSRenderer* r) { } -// Source9 - -// Target9 - -void GSTextureCache9::Target9::Read(const GSVector4i& r) +void GSTextureCache9::Read(Target* t, const GSVector4i& r) { - if(m_type != RenderTarget) + if(t->m_type != RenderTarget) { // TODO return; } - if(m_TEX0.PSM != PSM_PSMCT32 - && m_TEX0.PSM != PSM_PSMCT24 - && m_TEX0.PSM != PSM_PSMCT16 - && m_TEX0.PSM != PSM_PSMCT16S) + const GIFRegTEX0& TEX0 = t->m_TEX0; + + if(TEX0.PSM != PSM_PSMCT32 + && TEX0.PSM != PSM_PSMCT24 + && TEX0.PSM != PSM_PSMCT16 + && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } - if(!m_dirty.empty()) + if(!t->m_dirty.empty()) { return; } - // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, m_TEX0.TBP0); - - // m_renderer->m_perfmon.Put(GSPerfMon::ReadRT, 1); + // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(m_texture->m_scale).xyxy() / GSVector4(m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->m_scale).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(m_texture, src, w, h)) + if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h)) { GSTexture::GSMap m; @@ -74,9 +70,9 @@ void GSTextureCache9::Target9::Read(const GSVector4i& r) { // TODO: block level write - GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); + GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - switch(m_TEX0.PSM) + switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r); diff --git a/plugins/GSdx/GSTextureCache9.h b/plugins/GSdx/GSTextureCache9.h index 30fe108d35..20437f5ce8 100644 --- a/plugins/GSdx/GSTextureCache9.h +++ b/plugins/GSdx/GSTextureCache9.h @@ -26,26 +26,10 @@ class GSTextureCache9 : public GSTextureCache { - class Source9 : public Source - { - protected: - int Get8bitFormat() {return D3DFMT_A8;} - - public: - explicit Source9(GSRenderer* r) : Source(r) {} - }; - - class Target9 : public Target - { - public: - explicit Target9(GSRenderer* r) : Target(r) {} - - void Read(const GSVector4i& r); - }; - protected: - Source* CreateSource() {return new Source9(m_renderer);} - Target* CreateTarget() {return new Target9(m_renderer);} + int Get8bitFormat() {return D3DFMT_A8;} + + void Read(Target* t, const GSVector4i& r); public: GSTextureCache9(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureCacheOGL.cpp b/plugins/GSdx/GSTextureCacheOGL.cpp index b9a8c06cb7..05dc1c4478 100644 --- a/plugins/GSdx/GSTextureCacheOGL.cpp +++ b/plugins/GSdx/GSTextureCacheOGL.cpp @@ -28,12 +28,3 @@ GSTextureCacheOGL::GSTextureCacheOGL(GSRenderer* r) : GSTextureCache(r) { } - -// SourceOGL - -// TargetOGL - -void GSTextureCacheOGL::TargetOGL::Read(const GSVector4i& r) -{ - // TODO -} diff --git a/plugins/GSdx/GSTextureCacheOGL.h b/plugins/GSdx/GSTextureCacheOGL.h index 07c4913e7a..49e9c69419 100644 --- a/plugins/GSdx/GSTextureCacheOGL.h +++ b/plugins/GSdx/GSTextureCacheOGL.h @@ -26,26 +26,10 @@ class GSTextureCacheOGL : public GSTextureCache { - class SourceOGL : public Source - { - protected: - int Get8bitFormat() {return 0;} // TODO - - public: - explicit SourceOGL(GSRenderer* r) : Source(r) {} - }; - - class TargetOGL : public Target - { - public: - explicit TargetOGL(GSRenderer* r) : Target(r) {} - - void Read(const GSVector4i& r); - }; - protected: - Source* CreateSource() {return new SourceOGL(m_renderer);} - Target* CreateTarget() {return new TargetOGL(m_renderer);} + int Get8bitFormat() {return 0;} // TODO + + void Read(Target* t, const GSVector4i& r) {} // TODO public: GSTextureCacheOGL(GSRenderer* r); diff --git a/plugins/GSdx/vsprops/common.props b/plugins/GSdx/vsprops/common.props index f2b3e7153f..86d9f38569 100644 --- a/plugins/GSdx/vsprops/common.props +++ b/plugins/GSdx/vsprops/common.props @@ -1,7 +1,7 @@  <_PropertySheetDisplayName>common - $(SolutionDir)\bin\$(PcsxSubsection)\ + $(SolutionDir)bin\$(PcsxSubsection)\ $(PlatformName)\$(Configuration)\ $(ProjectName)-$(SSEtype) From 5c38811967c12b7cf6b024b7935393d4a640c42d Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 1 Aug 2009 02:44:22 +0000 Subject: [PATCH 10/18] pcsx2: removed some msvc++ warnings from one of arcum42's cleanups microVU: minor fixes. The ps2's VUs (and FPU) pretty much always do sqrt(abs(x)) whenever doing sqrt's. SSE's sqrt will give you a NaN if x is negative instead, so force abs(x) before doing sqrt (unless the value is known to be positive). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1592 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/iCore.h | 14 +++++++------- pcsx2/x86/microVU_Lower.inl | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 01a4cc3fa0..316a56c066 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -222,7 +222,7 @@ template static __forceinline bool IS_REG(s32 reg) template static __forceinline bool IS_REG(u32 reg) { - return (reg & tag); + return !!(reg & tag); } #define IS_EECONSTREG(reg) IS_REG(reg) @@ -281,13 +281,13 @@ extern u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern u32 _recIsRegUsed(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern void _recFillRegister(EEINST& pinst, int type, int reg, int write); -static __forceinline bool EEINST_ISLIVE64(u32 reg) { return (g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1)); } -static __forceinline bool EEINST_ISLIVEXMM(u32 reg) { return (g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)); } -static __forceinline bool EEINST_ISLIVE1(u32 reg) { return (g_pCurInstInfo->regs[reg] & EEINST_LIVE1); } -static __forceinline bool EEINST_ISLIVE2(u32 reg) { return (g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } +static __forceinline bool EEINST_ISLIVE64(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1)); } +static __forceinline bool EEINST_ISLIVEXMM(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)); } +static __forceinline bool EEINST_ISLIVE1(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE1); } +static __forceinline bool EEINST_ISLIVE2(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } -static __forceinline bool FPUINST_ISLIVE(u32 reg) { return (g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } -static __forceinline bool FPUINST_LASTUSE(u32 reg) { return (g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } +static __forceinline bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } +static __forceinline bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } // if set, then the variable at this inst really has its upper 32 bits valid // The difference between EEINST_LIVE1 is that the latter is used in back propagation diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index ce5004e1f3..d46065e580 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -355,6 +355,7 @@ mVUop(mVU_ERSQRT) { pass2 { int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip); SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ); @@ -417,6 +418,7 @@ mVUop(mVU_ESQRT) { pass2 { int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip); SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); From 4bb4bebd46ba42ad8900bd5b933174972ed5f4e6 Mon Sep 17 00:00:00 2001 From: gigaherz Date: Sat, 1 Aug 2009 14:25:19 +0000 Subject: [PATCH 11/18] KiNGKiMO noticed a crash when specifying a iso filename in the cmdline, to make pcsx2 auto-boot that file. I fixed that, and while I was at it, change the bootmodes around. The way it works now is: - Base mode: 0=normal (cdvd plugin), 1=load elf, 2=use iso loader, 3=emulate no disc - Bios flag: 65536+base = mode startup through bios boot process. Default is to skip bios. So for example to load X.iso using the internal iso loader, and executing through the bios, you would now do: pcsx2.exe -bootmode 65538 X.iso This needs implementing in linux side, and maybe changing it so it's nicer user-wise (for example, reading the number as HEX would make it 10002 instead of 65538). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1593 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/CDVD/CDVDisoReader.cpp | 3 ++- pcsx2/HostGui.h | 13 +++++++++---- pcsx2/Plugins.cpp | 4 ++++ pcsx2/windows/WinMain.cpp | 14 ++++++++++++-- pcsx2/windows/WinSysExec.cpp | 2 +- 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/pcsx2/CDVD/CDVDisoReader.cpp b/pcsx2/CDVD/CDVDisoReader.cpp index c7f70c4e53..645b44172c 100644 --- a/pcsx2/CDVD/CDVDisoReader.cpp +++ b/pcsx2/CDVD/CDVDisoReader.cpp @@ -49,7 +49,8 @@ void CALLBACK ISOshutdown() s32 CALLBACK ISOopen(const char* pTitle) { - //if (pTitle != NULL) strcpy(isoFileName, pTitle); + if ((pTitle != NULL) && (strlen(pTitle) > 0)) + strcpy(isoFileName, pTitle); iso = isoOpen(isoFileName); if (iso == NULL) diff --git a/pcsx2/HostGui.h b/pcsx2/HostGui.h index d411f95acb..1d506c0352 100644 --- a/pcsx2/HostGui.h +++ b/pcsx2/HostGui.h @@ -23,10 +23,15 @@ enum StartupMode { - BootMode_Bios, - BootMode_Quick, - BootMode_Elf, - BootMode_Normal + BootMode_Normal = 0, + BootMode_Elf = 1, // not compatible with bios flag, probably + BootMode_Iso = 2, + BootMode_NoDisc = 3, // nodisc implies bios. + + BootMode_Quick = 0, + BootMode_Bios = 0x10000, + + BootMode_ModeMask = 0xFFFF, }; class StartupParams diff --git a/pcsx2/Plugins.cpp b/pcsx2/Plugins.cpp index edc4682081..11d80109c3 100644 --- a/pcsx2/Plugins.cpp +++ b/pcsx2/Plugins.cpp @@ -965,6 +965,10 @@ int OpenPlugins(const char* pTitleFilename) { if (!plugins_initialized) { + // prevent a crash + if(CDVD.init == NULL) + CDVD = ISO; // CDVD_plugin; + if( InitPlugins() == -1 ) return -1; } diff --git a/pcsx2/windows/WinMain.cpp b/pcsx2/windows/WinMain.cpp index a1ab0e8b93..c3e0241878 100644 --- a/pcsx2/windows/WinMain.cpp +++ b/pcsx2/windows/WinMain.cpp @@ -373,16 +373,26 @@ void RunGui() { // Initially bypass GUI and start PCSX2 directly. // Manually load plugins using the user's configured image (if non-elf). + + int mode = g_Startup.BootMode & BootMode_ModeMask; - if( g_Startup.Enabled && (g_Startup.BootMode != BootMode_Elf) ) + if( g_Startup.Enabled && (mode != BootMode_Elf) ) { + + if(mode == BootMode_Iso) + CDVD=ISO; + else if(mode == BootMode_NoDisc) + CDVD=NODISC; + else + CDVD=CDVD_plugin; + if (OpenPlugins(g_Startup.ImageName) == -1) return; } SysPrepareExecution( (g_Startup.BootMode == BootMode_Elf) ? g_Startup.ImageName : NULL, - (g_Startup.BootMode == BootMode_Bios) + ((g_Startup.BootMode & BootMode_Bios) != 0) ); } diff --git a/pcsx2/windows/WinSysExec.cpp b/pcsx2/windows/WinSysExec.cpp index b23150014f..071ec150ba 100644 --- a/pcsx2/windows/WinSysExec.cpp +++ b/pcsx2/windows/WinSysExec.cpp @@ -66,7 +66,7 @@ int SysPageFaultExceptionFilter( EXCEPTION_POINTERS* eps ) int ParseCommandLine( int tokenCount, TCHAR *const *const tokens ) { int tidx = 0; - g_Startup.BootMode = BootMode_Bios; + g_Startup.BootMode = BootMode_Normal; while( tidx < tokenCount ) { From 3d87e4845abeeb71d831eef04635715a89ab27fd Mon Sep 17 00:00:00 2001 From: arcum42 Date: Sun, 2 Aug 2009 07:21:17 +0000 Subject: [PATCH 12/18] Add the changes from r1593 to Linux. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1594 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/Linux/LnxMain.cpp | 1 - pcsx2/Linux/LnxMain.h | 3 +++ pcsx2/Linux/LnxSysExec.cpp | 14 ++++++++++++-- pcsx2/Linux/LnxSysExec.h | 3 +++ 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pcsx2/Linux/LnxMain.cpp b/pcsx2/Linux/LnxMain.cpp index 219864c43f..d61d69676a 100644 --- a/pcsx2/Linux/LnxMain.cpp +++ b/pcsx2/Linux/LnxMain.cpp @@ -17,7 +17,6 @@ */ #include "LnxMain.h" -#include "CDVD/CDVDisoReader.h" using namespace R5900; diff --git a/pcsx2/Linux/LnxMain.h b/pcsx2/Linux/LnxMain.h index 86b74cebd0..7639f9f7b6 100644 --- a/pcsx2/Linux/LnxMain.h +++ b/pcsx2/Linux/LnxMain.h @@ -23,6 +23,9 @@ #include "LnxMain.h" #include "HostGui.h" +// For Linux ISO loading +#include "CDVD/CDVDisoReader.h" + extern bool applychanges; extern bool Slots[5]; diff --git a/pcsx2/Linux/LnxSysExec.cpp b/pcsx2/Linux/LnxSysExec.cpp index e5da6434f0..2bd55604c5 100644 --- a/pcsx2/Linux/LnxSysExec.cpp +++ b/pcsx2/Linux/LnxSysExec.cpp @@ -183,16 +183,26 @@ void RunGui() { // Initially bypass GUI and start PCSX2 directly. // Manually load plugins using the user's configured image (if non-elf). + + int mode = g_Startup.BootMode & BootMode_ModeMask; - if( g_Startup.Enabled && (g_Startup.BootMode != BootMode_Elf) ) + if( g_Startup.Enabled && (mode != BootMode_Elf) ) { + + if(mode == BootMode_Iso) + CDVD = ISO; + else if(mode == BootMode_NoDisc) + CDVD = NODISC; + else + CDVD = CDVD_plugin; + if (OpenPlugins(g_Startup.ImageName) == -1) return; } SysPrepareExecution( (g_Startup.BootMode == BootMode_Elf) ? g_Startup.ImageName : NULL, - (g_Startup.BootMode == BootMode_Bios) + ((g_Startup.BootMode & BootMode_Bios) != 0) ); } diff --git a/pcsx2/Linux/LnxSysExec.h b/pcsx2/Linux/LnxSysExec.h index 639a7e7671..b429712848 100644 --- a/pcsx2/Linux/LnxSysExec.h +++ b/pcsx2/Linux/LnxSysExec.h @@ -24,6 +24,9 @@ #include #include "x86/iR5900.h" +// For Linux ISO loading +#include "CDVD/CDVDisoReader.h" + void SysPageFaultExceptionFilter( int signal, siginfo_t *info, void * ); void __fastcall InstallLinuxExceptionHandler(); void __fastcall ReleaseLinuxExceptionHandler(); From 37b63f8a0ca0e4c13f362da4f82f9311b84df801 Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sun, 2 Aug 2009 18:46:39 +0000 Subject: [PATCH 13/18] Zeropad/Win32: Make what appears to be a fairly significant bugfix, for what it's worth. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1596 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/zeropad/keyboard.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/plugins/zeropad/keyboard.cpp b/plugins/zeropad/keyboard.cpp index 855bf89a1b..2384b927a1 100644 --- a/plugins/zeropad/keyboard.cpp +++ b/plugins/zeropad/keyboard.cpp @@ -31,16 +31,20 @@ return -1; } +#ifdef _WINDOWS_ +WORD toCharTemp; +#endif + char* KeysymToChar(int keysym) { - #ifdef __LINUX__ +#ifdef __LINUX__ return XKeysymToString(keysym); #else - LPWORD temp; - - ToAscii((UINT) keysym, NULL, NULL, temp, NULL); - return (char*)temp; - #endif + // fixed this to return *valid* results, and not some pointer + // to the fourth oblivion-- air + ToAscii((UINT) keysym, NULL, NULL, &toCharTemp, NULL); + return (char*)(&toCharTemp); +#endif } void PollForKeyboardInput(int pad) From cea0f395aa9b82280740408e543811b273571486 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sun, 2 Aug 2009 23:07:30 +0000 Subject: [PATCH 14/18] GSdx: experimenting with msaa, add msaa=N to gsdx.ini to activate it (N=2,4,8) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1597 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSDevice.cpp | 59 +++++++++++++++++------- plugins/GSdx/GSDevice.h | 12 +++-- plugins/GSdx/GSDevice10.cpp | 74 ++++++++++++++++++++++++------- plugins/GSdx/GSDevice10.h | 8 ++-- plugins/GSdx/GSDevice11.cpp | 72 +++++++++++++++++++++++------- plugins/GSdx/GSDevice11.h | 8 ++-- plugins/GSdx/GSDevice7.cpp | 4 +- plugins/GSdx/GSDevice7.h | 2 +- plugins/GSdx/GSDevice9.cpp | 70 +++++++++++++++++++++++------ plugins/GSdx/GSDevice9.h | 8 ++-- plugins/GSdx/GSDeviceNull.cpp | 2 +- plugins/GSdx/GSDeviceNull.h | 2 +- plugins/GSdx/GSDeviceOGL.cpp | 18 ++++---- plugins/GSdx/GSDeviceOGL.h | 6 +-- plugins/GSdx/GSRenderer.cpp | 6 +-- plugins/GSdx/GSRendererDX.h | 14 +++--- plugins/GSdx/GSRendererDX10.cpp | 12 +++-- plugins/GSdx/GSRendererDX11.cpp | 12 +++-- plugins/GSdx/GSRendererDX9.cpp | 14 +++--- plugins/GSdx/GSTexture.cpp | 8 ++++ plugins/GSdx/GSTexture.h | 20 ++++++--- plugins/GSdx/GSTexture10.cpp | 21 ++++----- plugins/GSdx/GSTexture10.h | 3 -- plugins/GSdx/GSTexture11.cpp | 21 ++++----- plugins/GSdx/GSTexture11.h | 3 -- plugins/GSdx/GSTexture7.cpp | 20 ++++----- plugins/GSdx/GSTexture7.h | 4 -- plugins/GSdx/GSTexture9.cpp | 32 +++++++------ plugins/GSdx/GSTexture9.h | 3 -- plugins/GSdx/GSTextureCache.cpp | 47 +++++++++++++------- plugins/GSdx/GSTextureCache10.cpp | 2 +- plugins/GSdx/GSTextureCache11.cpp | 2 +- plugins/GSdx/GSTextureCache9.cpp | 2 +- plugins/GSdx/GSTextureOGL.cpp | 16 ++----- plugins/GSdx/GSTextureOGL.h | 3 -- plugins/GSdx/GSdx_vs2010.vcxproj | 8 ---- 36 files changed, 395 insertions(+), 223 deletions(-) diff --git a/plugins/GSdx/GSDevice.cpp b/plugins/GSdx/GSDevice.cpp index 2b59f856f9..8d1dd4007a 100644 --- a/plugins/GSdx/GSDevice.cpp +++ b/plugins/GSdx/GSDevice.cpp @@ -20,6 +20,7 @@ */ #include "StdAfx.h" +#include "GSdx.h" #include "GSDevice.h" GSDevice::GSDevice() @@ -32,6 +33,11 @@ GSDevice::GSDevice() , m_1x1(NULL) { memset(&m_vertices, 0, sizeof(m_vertices)); + + m_msaa = theApp.GetConfig("msaa", 0); + + m_msaa_desc.Count = 1; + m_msaa_desc.Quality = 0; } GSDevice::~GSDevice() @@ -83,7 +89,7 @@ void GSDevice::Present(const GSVector4i& r, int shader, bool limit) int w = std::max(cr.width(), 1); int h = std::max(cr.height(), 1); - if(!m_backbuffer || m_backbuffer->m_size.x != w || m_backbuffer->m_size.y != h) + if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h, DontCare)) { @@ -103,15 +109,20 @@ void GSDevice::Present(const GSVector4i& r, int shader, bool limit) Flip(limit); } -GSTexture* GSDevice::Fetch(int type, int w, int h, int format) +GSTexture* GSDevice::Fetch(int type, int w, int h, bool msaa, int format) { + if(m_msaa < 2) + { + msaa = false; + } + GSVector2i size(w, h); for(list::iterator i = m_pool.begin(); i != m_pool.end(); i++) { GSTexture* t = *i; - if(t->GetType() == type && t->GetFormat() == format && t->GetSize() == size) + if(t->GetType() == type && t->GetFormat() == format && t->GetSize() == size && t->IsMSAA() == msaa) { m_pool.erase(i); @@ -119,7 +130,7 @@ GSTexture* GSDevice::Fetch(int type, int w, int h, int format) } } - return Create(type, w, h, format); + return Create(type, w, h, msaa, format); } void GSDevice::EndScene() @@ -143,24 +154,24 @@ void GSDevice::Recycle(GSTexture* t) } } -GSTexture* GSDevice::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDevice::CreateRenderTarget(int w, int h, bool msaa, int format) { - return Fetch(GSTexture::RenderTarget, w, h, format); + return Fetch(GSTexture::RenderTarget, w, h, msaa, format); } -GSTexture* GSDevice::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDevice::CreateDepthStencil(int w, int h, bool msaa, int format) { - return Fetch(GSTexture::DepthStencil, w, h, format); + return Fetch(GSTexture::DepthStencil, w, h, msaa, format); } GSTexture* GSDevice::CreateTexture(int w, int h, int format) { - return Fetch(GSTexture::Texture, w, h, format); + return Fetch(GSTexture::Texture, w, h, false, format); } GSTexture* GSDevice::CreateOffscreen(int w, int h, int format) { - return Fetch(GSTexture::Offscreen, w, h, format); + return Fetch(GSTexture::Offscreen, w, h, false, format); } void GSDevice::StretchRect(GSTexture* st, GSTexture* dt, const GSVector4& dr, int shader, bool linear) @@ -177,7 +188,7 @@ void GSDevice::Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVec { if(!m_merge || !(m_merge->GetSize() == fs)) { - m_merge = CreateRenderTarget(fs.x, fs.y); + m_merge = CreateRenderTarget(fs.x, fs.y, false); } // TODO: m_1x1 @@ -188,7 +199,25 @@ void GSDevice::Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVec if(m_merge) { - DoMerge(st, sr, dr, m_merge, slbg, mmod, c); + GSTexture* tex[2] = {NULL, NULL}; + + for(int i = 0; i < countof(tex); i++) + { + if(st[i] != NULL) + { + tex[i] = st[i]->IsMSAA() ? Resolve(st[i]) : st[i]; + } + } + + DoMerge(tex, sr, dr, m_merge, slbg, mmod, c); + + for(int i = 0; i < countof(tex); i++) + { + if(tex[i] != st[i]) + { + Recycle(tex[i]); + } + } } else { @@ -202,7 +231,7 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse { if(!m_weavebob || !(m_weavebob->GetSize() == ds)) { - m_weavebob = CreateRenderTarget(ds.x, ds.y); + m_weavebob = CreateRenderTarget(ds.x, ds.y, false); } if(mode == 0 || mode == 2) // weave or blend @@ -217,7 +246,7 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse if(!m_blend || !(m_blend->GetSize() == ds)) { - m_blend = CreateRenderTarget(ds.x, ds.y); + m_blend = CreateRenderTarget(ds.x, ds.y, false); } DoInterlace(m_weavebob, m_blend, 2, false, 0); @@ -247,7 +276,7 @@ bool GSDevice::ResizeTexture(GSTexture** t, int w, int h) GSTexture* t2 = *t; - if(t2 == NULL || t2->m_size.x != w || t2->m_size.y != h) + if(t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h) { delete t2; diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index b87374608f..8fb542ce94 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -50,7 +50,7 @@ class GSDevice : public GSAlignedClass<16> { list m_pool; - GSTexture* Fetch(int type, int w, int h, int format); + GSTexture* Fetch(int type, int w, int h, bool msaa, int format); protected: GSWnd* m_wnd; @@ -64,8 +64,10 @@ protected: GSTexture* m_current; struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps;} m_shader; struct {size_t stride, start, count, limit;} m_vertices; + uint32 m_msaa; + DXGI_SAMPLE_DESC m_msaa_desc; - virtual GSTexture* Create(int type, int w, int h, int format) = 0; + virtual GSTexture* Create(int type, int w, int h, bool msaa, int format) = 0; virtual void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c) = 0; virtual void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset) = 0; @@ -93,11 +95,13 @@ public: virtual void ClearDepth(GSTexture* t, float c) {} virtual void ClearStencil(GSTexture* t, uint8 c) {} - virtual GSTexture* CreateRenderTarget(int w, int h, int format = 0); - virtual GSTexture* CreateDepthStencil(int w, int h, int format = 0); + virtual GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + virtual GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); virtual GSTexture* CreateTexture(int w, int h, int format = 0); virtual GSTexture* CreateOffscreen(int w, int h, int format = 0); + virtual GSTexture* Resolve(GSTexture* t) {return NULL;} + virtual GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0) {return NULL;} virtual void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r) {} diff --git a/plugins/GSdx/GSDevice10.cpp b/plugins/GSdx/GSDevice10.cpp index 7c40689e1d..976b62b8c3 100644 --- a/plugins/GSdx/GSDevice10.cpp +++ b/plugins/GSdx/GSDevice10.cpp @@ -43,7 +43,7 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) return false; } - HRESULT hr; + HRESULT hr = E_FAIL; DXGI_SWAP_CHAIN_DESC scd; D3D10_BUFFER_DESC bd; @@ -66,7 +66,7 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) scd.SampleDesc.Quality = 0; scd.Windowed = TRUE; - uint32 flags = D3D10_CREATE_DEVICE_SINGLETHREADED; //disables thread safety, should be fine (speedup) + uint32 flags = D3D10_CREATE_DEVICE_SINGLETHREADED; #ifdef DEBUG flags |= D3D10_CREATE_DEVICE_DEBUG; @@ -95,6 +95,22 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) if(FAILED(hr)) return false; + // msaa + + for(uint32 i = 2; i <= D3D10_MAX_MULTISAMPLE_SAMPLE_COUNT; i++) + { + uint32 quality[2] = {0, 0}; + + if(SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, i, &quality[0])) && quality[0] > 0 + && SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_D32_FLOAT_S8X24_UINT, i, &quality[1])) && quality[1] > 0) + { + m_msaa_desc.Count = i; + m_msaa_desc.Quality = std::min(quality[0] - 1, quality[1] - 1); + + if(i >= m_msaa) break; + } + } + // convert D3D10_INPUT_ELEMENT_DESC il_convert[] = @@ -179,7 +195,7 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) rd.SlopeScaledDepthBias = 0; rd.DepthClipEnable = false; // ??? rd.ScissorEnable = true; - rd.MultisampleEnable = false; + rd.MultisampleEnable = true; rd.AntialiasedLineEnable = false; hr = m_dev->CreateRasterizerState(&rd, &m_rs); @@ -265,7 +281,7 @@ void GSDevice10::ClearStencil(GSTexture* t, uint8 c) m_dev->ClearDepthStencilView(*(GSTexture10*)t, D3D10_CLEAR_STENCIL, 0, c); } -GSTexture* GSDevice10::Create(int type, int w, int h, int format) +GSTexture* GSDevice10::Create(int type, int w, int h, bool msaa, int format) { HRESULT hr; @@ -282,6 +298,11 @@ GSTexture* GSDevice10::Create(int type, int w, int h, int format) desc.SampleDesc.Quality = 0; desc.Usage = D3D10_USAGE_DEFAULT; + if(msaa) + { + desc.SampleDesc = m_msaa_desc; + } + switch(type) { case GSTexture::RenderTarget: @@ -323,14 +344,14 @@ GSTexture* GSDevice10::Create(int type, int w, int h, int format) return t; } -GSTexture* GSDevice10::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDevice10::CreateRenderTarget(int w, int h, bool msaa, int format) { - return __super::CreateRenderTarget(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); + return __super::CreateRenderTarget(w, h, msaa, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); } -GSTexture* GSDevice10::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDevice10::CreateDepthStencil(int w, int h, bool msaa, int format) { - return __super::CreateDepthStencil(w, h, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); // DXGI_FORMAT_R32G8X24_TYPELESS + return __super::CreateDepthStencil(w, h, msaa, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); // DXGI_FORMAT_R32G8X24_TYPELESS } GSTexture* GSDevice10::CreateTexture(int w, int h, int format) @@ -343,6 +364,22 @@ GSTexture* GSDevice10::CreateOffscreen(int w, int h, int format) return __super::CreateOffscreen(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); } +GSTexture* GSDevice10::Resolve(GSTexture* t) +{ + ASSERT(t != NULL && t->IsMSAA()); + + if(GSTexture* dst = CreateRenderTarget(t->GetWidth(), t->GetHeight(), false, t->GetFormat())) + { + dst->SetScale(t->GetScale()); + + m_dev->ResolveSubresource(*(GSTexture10*)dst, 0, *(GSTexture10*)t, 0, (DXGI_FORMAT)t->GetFormat()); + + return dst; + } + + return NULL; +} + GSTexture* GSDevice10::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format) { GSTexture* dst = NULL; @@ -359,11 +396,16 @@ GSTexture* GSDevice10::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, return false; } - if(GSTexture* rt = CreateRenderTarget(w, h, format)) + if(GSTexture* rt = CreateRenderTarget(w, h, false, format)) { GSVector4 dr(0, 0, w, h); - StretchRect(src, sr, rt, dr, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); + if(GSTexture* src2 = src->IsMSAA() ? Resolve(src) : src) + { + StretchRect(src2, sr, rt, dr, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); + + if(src2 != src) Recycle(src2); + } dst = CreateOffscreen(w, h, format); @@ -605,7 +647,7 @@ void GSDevice10::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) { ID3D10ShaderResourceView* srv0 = NULL; ID3D10ShaderResourceView* srv1 = NULL; - + if(sr0) srv0 = *(GSTexture10*)sr0; if(sr1) srv1 = *(GSTexture10*)sr1; @@ -690,9 +732,9 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector m_dev->OMSetRenderTargets(1, &rtv, dsv); } - if(m_state.viewport != rt->m_size) + if(m_state.viewport != rt->GetSize()) { - m_state.viewport = rt->m_size; + m_state.viewport = rt->GetSize(); D3D10_VIEWPORT vp; @@ -700,15 +742,15 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector vp.TopLeftX = 0; vp.TopLeftY = 0; - vp.Width = rt->m_size.x; - vp.Height = rt->m_size.y; + vp.Width = rt->GetWidth(); + vp.Height = rt->GetHeight(); vp.MinDepth = 0.0f; vp.MaxDepth = 1.0f; m_dev->RSSetViewports(1, &vp); } - GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); + GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); if(!m_state.scissor.eq(r)) { diff --git a/plugins/GSdx/GSDevice10.h b/plugins/GSdx/GSDevice10.h index c48f708ef3..6c427da15e 100644 --- a/plugins/GSdx/GSDevice10.h +++ b/plugins/GSdx/GSDevice10.h @@ -26,7 +26,7 @@ class GSDevice10 : public GSDevice { - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); @@ -103,11 +103,13 @@ public: void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - GSTexture* CreateRenderTarget(int w, int h, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, int format = 0); + GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); + GSTexture* Resolve(GSTexture* t); + GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0); void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r); diff --git a/plugins/GSdx/GSDevice11.cpp b/plugins/GSdx/GSDevice11.cpp index d85a8f05ed..d38eb25979 100644 --- a/plugins/GSdx/GSDevice11.cpp +++ b/plugins/GSdx/GSDevice11.cpp @@ -43,7 +43,7 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) return false; } - HRESULT hr; + HRESULT hr = E_FAIL; DXGI_SWAP_CHAIN_DESC scd; D3D11_BUFFER_DESC bd; @@ -66,7 +66,7 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) scd.SampleDesc.Quality = 0; scd.Windowed = TRUE; - uint32 flags = D3D11_CREATE_DEVICE_SINGLETHREADED; //disables thread safety, should be fine (speedup) + uint32 flags = D3D11_CREATE_DEVICE_SINGLETHREADED; #ifdef DEBUG flags |= D3D11_CREATE_DEVICE_DEBUG; @@ -95,6 +95,22 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) hr = m_dev->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &options, sizeof(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS)); + // msaa + + for(uint32 i = 2; i <= D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT; i++) + { + uint32 quality[2] = {0, 0}; + + if(SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, i, &quality[0])) && quality[0] > 0 + && SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_D32_FLOAT_S8X24_UINT, i, &quality[1])) && quality[1] > 0) + { + m_msaa_desc.Count = i; + m_msaa_desc.Quality = std::min(quality[0] - 1, quality[1] - 1); + + if(i >= m_msaa) break; + } + } + // convert D3D11_INPUT_ELEMENT_DESC il_convert[] = @@ -178,7 +194,7 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) rd.SlopeScaledDepthBias = 0; rd.DepthClipEnable = false; // ??? rd.ScissorEnable = true; - rd.MultisampleEnable = false; + rd.MultisampleEnable = true; rd.AntialiasedLineEnable = false; hr = m_dev->CreateRasterizerState(&rd, &m_rs); @@ -264,7 +280,7 @@ void GSDevice11::ClearStencil(GSTexture* t, uint8 c) m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_STENCIL, 0, c); } -GSTexture* GSDevice11::Create(int type, int w, int h, int format) +GSTexture* GSDevice11::Create(int type, int w, int h, bool msaa, int format) { HRESULT hr; @@ -281,6 +297,11 @@ GSTexture* GSDevice11::Create(int type, int w, int h, int format) desc.SampleDesc.Quality = 0; desc.Usage = D3D11_USAGE_DEFAULT; + if(msaa) + { + desc.SampleDesc = m_msaa_desc; + } + switch(type) { case GSTexture::RenderTarget: @@ -322,14 +343,14 @@ GSTexture* GSDevice11::Create(int type, int w, int h, int format) return t; } -GSTexture* GSDevice11::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDevice11::CreateRenderTarget(int w, int h, bool msaa, int format) { - return __super::CreateRenderTarget(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); + return __super::CreateRenderTarget(w, h, msaa, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); } -GSTexture* GSDevice11::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDevice11::CreateDepthStencil(int w, int h, bool msaa, int format) { - return __super::CreateDepthStencil(w, h, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); + return __super::CreateDepthStencil(w, h, msaa, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); } GSTexture* GSDevice11::CreateTexture(int w, int h, int format) @@ -342,6 +363,22 @@ GSTexture* GSDevice11::CreateOffscreen(int w, int h, int format) return __super::CreateOffscreen(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); } +GSTexture* GSDevice11::Resolve(GSTexture* t) +{ + ASSERT(t != NULL && t->IsMSAA()); + + if(GSTexture* dst = CreateRenderTarget(t->GetWidth(), t->GetHeight(), false, t->GetFormat())) + { + dst->SetScale(t->GetScale()); + + m_ctx->ResolveSubresource(*(GSTexture11*)dst, 0, *(GSTexture11*)t, 0, (DXGI_FORMAT)t->GetFormat()); + + return dst; + } + + return NULL; +} + GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format) { GSTexture* dst = NULL; @@ -358,11 +395,16 @@ GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, return false; } - if(GSTexture* rt = CreateRenderTarget(w, h, format)) + if(GSTexture* rt = CreateRenderTarget(w, h, false, format)) { GSVector4 dr(0, 0, w, h); - StretchRect(src, sr, rt, dr, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); + if(GSTexture* src2 = src->IsMSAA() ? Resolve(src) : src) + { + StretchRect(src2, sr, rt, dr, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); + + if(src2 != src) Recycle(src2); + } dst = CreateOffscreen(w, h, format); @@ -689,9 +731,9 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector m_ctx->OMSetRenderTargets(1, &rtv, dsv); } - if(m_state.viewport != rt->m_size) + if(m_state.viewport != rt->GetSize()) { - m_state.viewport = rt->m_size; + m_state.viewport = rt->GetSize(); D3D11_VIEWPORT vp; @@ -699,15 +741,15 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector vp.TopLeftX = 0; vp.TopLeftY = 0; - vp.Width = rt->m_size.x; - vp.Height = rt->m_size.y; + vp.Width = rt->GetWidth(); + vp.Height = rt->GetHeight(); vp.MinDepth = 0.0f; vp.MaxDepth = 1.0f; m_ctx->RSSetViewports(1, &vp); } - GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); + GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); if(!m_state.scissor.eq(r)) { diff --git a/plugins/GSdx/GSDevice11.h b/plugins/GSdx/GSDevice11.h index e87187de31..b607153c7b 100644 --- a/plugins/GSdx/GSDevice11.h +++ b/plugins/GSdx/GSDevice11.h @@ -26,7 +26,7 @@ class GSDevice11 : public GSDevice { - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); @@ -104,11 +104,13 @@ public: void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - GSTexture* CreateRenderTarget(int w, int h, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, int format = 0); + GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); + GSTexture* Resolve(GSTexture* t); + GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0); void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r); diff --git a/plugins/GSdx/GSDevice7.cpp b/plugins/GSdx/GSDevice7.cpp index aa3c42abdd..2bc18d6e96 100644 --- a/plugins/GSdx/GSDevice7.cpp +++ b/plugins/GSdx/GSDevice7.cpp @@ -145,7 +145,7 @@ void GSDevice7::Present(const GSVector4i& r, int shader, bool limit) int w = std::max(cr.width(), 1); int h = std::max(cr.height(), 1); - if(!m_backbuffer || m_backbuffer->m_size.x != w || m_backbuffer->m_size.y != h) + if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h, DontCare)) { @@ -198,7 +198,7 @@ void GSDevice7::Present(const GSVector4i& r, int shader, bool limit) } } -GSTexture* GSDevice7::Create(int type, int w, int h, int format) +GSTexture* GSDevice7::Create(int type, int w, int h, bool msaa, int format) { HRESULT hr; diff --git a/plugins/GSdx/GSDevice7.h b/plugins/GSdx/GSDevice7.h index baa2caf9ad..eb5ed4bbfb 100644 --- a/plugins/GSdx/GSDevice7.h +++ b/plugins/GSdx/GSDevice7.h @@ -31,7 +31,7 @@ private: CComPtr m_primary; bool m_lost; - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index 9d5f7a919c..e47ca99785 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -124,6 +124,22 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync) return false; } + // msaa + + for(uint32 i = 2; i <= 16; i++) + { + DWORD quality[2] = {0, 0}; + + if(SUCCEEDED(m_d3d->CheckDeviceMultiSampleType(m_d3dcaps.AdapterOrdinal, m_d3dcaps.DeviceType, D3DFMT_A8R8G8B8, TRUE, (D3DMULTISAMPLE_TYPE)i, &quality[0])) && quality[0] > 0 + && SUCCEEDED(m_d3d->CheckDeviceMultiSampleType(m_d3dcaps.AdapterOrdinal, m_d3dcaps.DeviceType, D3DFMT_D24S8, TRUE, (D3DMULTISAMPLE_TYPE)i, &quality[1])) && quality[1] > 0) + { + m_msaa_desc.Count = i; + m_msaa_desc.Quality = std::min(quality[0] - 1, quality[1] - 1); + + if(i >= m_msaa) break; + } + } + // if(!Reset(1, 1, theApp.GetConfig("windowed", 1) ? Windowed : Fullscreen)) @@ -278,9 +294,13 @@ bool GSDevice9::Reset(int w, int h, int mode) if(!m_dev) { - //D3DCREATE_MULTITHREADED flag shouldn't be needed uint32 flags = m_d3dcaps.VertexProcessingCaps ? D3DCREATE_HARDWARE_VERTEXPROCESSING : D3DCREATE_SOFTWARE_VERTEXPROCESSING; + if(flags & D3DCREATE_HARDWARE_VERTEXPROCESSING) + { + flags |= D3DCREATE_PUREDEVICE; + } + hr = m_d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, (HWND)m_wnd->GetHandle(), flags, &m_pp, &m_dev); if(FAILED(hr)) return false; @@ -423,7 +443,7 @@ void GSDevice9::ClearRenderTarget(GSTexture* rt, uint32 c) void GSDevice9::ClearDepth(GSTexture* t, float c) { - GSTexture* rt = CreateRenderTarget(t->m_size.x, t->m_size.y); + GSTexture* rt = CreateRenderTarget(t->GetWidth(), t->GetHeight(), t->IsMSAA()); CComPtr rtsurface; CComPtr dssurface; @@ -444,7 +464,7 @@ void GSDevice9::ClearDepth(GSTexture* t, float c) void GSDevice9::ClearStencil(GSTexture* t, uint8 c) { - GSTexture* rt = CreateRenderTarget(t->m_size.x, t->m_size.y); + GSTexture* rt = CreateRenderTarget(t->GetWidth(), t->GetHeight(), t->IsMSAA()); CComPtr rtsurface; CComPtr dssurface; @@ -463,20 +483,23 @@ void GSDevice9::ClearStencil(GSTexture* t, uint8 c) Recycle(rt); } -GSTexture* GSDevice9::Create(int type, int w, int h, int format) +GSTexture* GSDevice9::Create(int type, int w, int h, bool msaa, int format) { HRESULT hr; CComPtr texture; CComPtr surface; + // TODO: msaa + switch(type) { case GSTexture::RenderTarget: - hr = m_dev->CreateTexture(w, h, 1, D3DUSAGE_RENDERTARGET, (D3DFORMAT)format, D3DPOOL_DEFAULT, &texture, NULL); + if(msaa) hr = m_dev->CreateRenderTarget(w, h, (D3DFORMAT)format, (D3DMULTISAMPLE_TYPE)m_msaa_desc.Count, m_msaa_desc.Quality, FALSE, &surface, NULL); + else hr = m_dev->CreateTexture(w, h, 1, D3DUSAGE_RENDERTARGET, (D3DFORMAT)format, D3DPOOL_DEFAULT, &texture, NULL); break; case GSTexture::DepthStencil: - hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, D3DMULTISAMPLE_NONE, 0, FALSE, &surface, NULL); + hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, (D3DMULTISAMPLE_TYPE)m_msaa_desc.Count, m_msaa_desc.Quality, FALSE, &surface, NULL); break; case GSTexture::Texture: hr = m_dev->CreateTexture(w, h, 1, 0, (D3DFORMAT)format, D3DPOOL_MANAGED, &texture, NULL); @@ -514,14 +537,14 @@ GSTexture* GSDevice9::Create(int type, int w, int h, int format) return t; } -GSTexture* GSDevice9::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDevice9::CreateRenderTarget(int w, int h, bool msaa, int format) { - return __super::CreateRenderTarget(w, h, format ? format : D3DFMT_A8R8G8B8); + return __super::CreateRenderTarget(w, h, msaa, format ? format : D3DFMT_A8R8G8B8); } -GSTexture* GSDevice9::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDevice9::CreateDepthStencil(int w, int h, bool msaa, int format) { - return __super::CreateDepthStencil(w, h, format ? format : D3DFMT_D24S8); + return __super::CreateDepthStencil(w, h, msaa, format ? format : D3DFMT_D24S8); } GSTexture* GSDevice9::CreateTexture(int w, int h, int format) @@ -534,6 +557,22 @@ GSTexture* GSDevice9::CreateOffscreen(int w, int h, int format) return __super::CreateOffscreen(w, h, format ? format : D3DFMT_A8R8G8B8); } +GSTexture* GSDevice9::Resolve(GSTexture* t) +{ + ASSERT(t != NULL && t->IsMSAA()); + + if(GSTexture* dst = CreateRenderTarget(t->GetWidth(), t->GetHeight(), false, t->GetFormat())) + { + dst->SetScale(t->GetScale()); + + m_dev->StretchRect(*(GSTexture9*)t, NULL, *(GSTexture9*)dst, NULL, D3DTEXF_POINT); + + return dst; + } + + return NULL; +} + GSTexture* GSDevice9::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format) { GSTexture* dst = NULL; @@ -550,11 +589,16 @@ GSTexture* GSDevice9::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, return false; } - if(GSTexture* rt = CreateRenderTarget(w, h, format)) + if(GSTexture* rt = CreateRenderTarget(w, h, false, format)) { GSVector4 dr(0, 0, w, h); - StretchRect(src, sr, rt, dr, m_convert.ps[1], NULL, 0); + if(GSTexture* src2 = src->IsMSAA() ? Resolve(src) : src) + { + StretchRect(src2, sr, rt, dr, m_convert.ps[1], NULL, 0); + + if(src2 != src) Recycle(src2); + } dst = CreateOffscreen(w, h, format); @@ -933,7 +977,7 @@ void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4 m_dev->SetDepthStencilSurface(dsv); } - GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); + GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); if(!m_state.scissor.eq(r)) { diff --git a/plugins/GSdx/GSDevice9.h b/plugins/GSdx/GSDevice9.h index 8069f96198..07f4382a42 100644 --- a/plugins/GSdx/GSDevice9.h +++ b/plugins/GSdx/GSDevice9.h @@ -61,7 +61,7 @@ struct Direct3DBlendState9 class GSDevice9 : public GSDevice { - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); @@ -142,11 +142,13 @@ public: void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - GSTexture* CreateRenderTarget(int w, int h, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, int format = 0); + GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); + GSTexture* Resolve(GSTexture* t); + GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0); void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r); diff --git a/plugins/GSdx/GSDeviceNull.cpp b/plugins/GSdx/GSDeviceNull.cpp index 473782856f..9839643e29 100644 --- a/plugins/GSdx/GSDeviceNull.cpp +++ b/plugins/GSdx/GSDeviceNull.cpp @@ -42,7 +42,7 @@ bool GSDeviceNull::Reset(int w, int h, int mode) return true; } -GSTexture* GSDeviceNull::Create(int type, int w, int h, int format) +GSTexture* GSDeviceNull::Create(int type, int w, int h, bool msaa, int format) { return new GSTextureNull(type, w, h, format); } diff --git a/plugins/GSdx/GSDeviceNull.h b/plugins/GSdx/GSDeviceNull.h index b69841d70a..dbd19feb28 100644 --- a/plugins/GSdx/GSDeviceNull.h +++ b/plugins/GSdx/GSDeviceNull.h @@ -27,7 +27,7 @@ class GSDeviceNull : public GSDevice { private: - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c) {} void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0) {} diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 784e201471..147809639b 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -252,7 +252,7 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) glClear(GL_STENCIL_BUFFER_BIT); CheckError(); } -GSTexture* GSDeviceOGL::Create(int type, int w, int h, int format) +GSTexture* GSDeviceOGL::Create(int type, int w, int h, bool msaa, int format) { GLuint texture = 0; @@ -296,14 +296,14 @@ GSTexture* GSDeviceOGL::Create(int type, int w, int h, int format) return t; } -GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format) { - return __super::CreateRenderTarget(w, h, format ? format : GL_RGBA8); + return __super::CreateRenderTarget(w, h, msaa, format ? format : GL_RGBA8); } -GSTexture* GSDeviceOGL::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDeviceOGL::CreateDepthStencil(int w, int h, bool msaa, int format) { - return __super::CreateDepthStencil(w, h, format ? format : GL_DEPTH32F_STENCIL8); // TODO: GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8 + return __super::CreateDepthStencil(w, h, msaa, format ? format : GL_DEPTH32F_STENCIL8); // TODO: GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8 } GSTexture* GSDeviceOGL::CreateTexture(int w, int h, int format) @@ -509,14 +509,14 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_DEPTH_COMPONENT, dsi); CheckError(); } - if(m_viewport != rt->m_size) + if(m_viewport != rt->GetSize()) { - m_viewport = rt->m_size; + m_viewport = rt->GetSize(); - glViewport(0, 0, rt->m_size.x, rt->m_size.y); CheckError(); + glViewport(0, 0, rt->GetWidth(), rt->GetHeight()); CheckError(); } - GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); + GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); if(!m_scissor.eq(r)) { diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 3d3c375258..eb0ecf6533 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -100,7 +100,7 @@ class GSDeviceOGL : public GSDevice // - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); @@ -121,8 +121,8 @@ public: void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - GSTexture* CreateRenderTarget(int w, int h, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, int format = 0); + GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index a191366f9c..7964640209 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -208,7 +208,7 @@ bool GSRenderer::Merge(int field) r += GSVector4i(0, 1).xyxy(); } - GSVector4 scale = GSVector4(tex[i]->m_scale).xyxy(); + GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); @@ -216,7 +216,7 @@ bool GSRenderer::Merge(int field) if(dr[i].top - baseline >= 4) // 2? { - o.y = tex[i]->m_scale.y * (dr[i].top - baseline); + o.y = tex[i]->GetScale().y * (dr[i].top - baseline); if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { @@ -251,7 +251,7 @@ bool GSRenderer::Merge(int field) int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; - m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->m_scale.y : tex[0]->m_scale.y); + m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } diff --git a/plugins/GSdx/GSRendererDX.h b/plugins/GSdx/GSRendererDX.h index 7ff1187eca..2d0bd04b23 100644 --- a/plugins/GSdx/GSRendererDX.h +++ b/plugins/GSdx/GSRendererDX.h @@ -163,12 +163,12 @@ public: GSTextureFX::VSConstantBuffer vs_cb; - float sx = 2.0f * rt->m_scale.x / (rt->m_size.x << 4); - float sy = 2.0f * rt->m_scale.y / (rt->m_size.y << 4); + float sx = 2.0f * rt->GetScale().x / (rt->GetWidth() << 4); + float sy = 2.0f * rt->GetScale().y / (rt->GetHeight() << 4); float ox = (float)(int)context->XYOFFSET.OFX; float oy = (float)(int)context->XYOFFSET.OFY; - float ox2 = 2.0f * m_pixelcenter.x / rt->m_size.x; - float oy2 = 2.0f * m_pixelcenter.y / rt->m_size.y; + float ox2 = 2.0f * m_pixelcenter.x / rt->GetWidth(); + float oy2 = 2.0f * m_pixelcenter.y / rt->GetHeight(); vs_cb.VertexScale = GSVector4(sx, -sy, 1.0f / UINT_MAX, 0.0f); vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); @@ -230,8 +230,8 @@ public: ps_sel.ltf = m_filter == 2 ? IsLinear() : m_filter; ps_sel.rt = tex->m_target; - int w = tex->m_texture->m_size.x; - int h = tex->m_texture->m_size.y; + int w = tex->m_texture->GetWidth(); + int h = tex->m_texture->GetHeight(); int tw = (int)(1 << context->TEX0.TW); int th = (int)(1 << context->TEX0.TH); @@ -266,7 +266,7 @@ public: // rs - GSVector4i scissor = GSVector4i(GSVector4(rt->m_scale).xyxy() * context->scissor.in).rintersect(GSVector4i(rt->GetSize()).zwxy()); + GSVector4i scissor = GSVector4i(GSVector4(rt->GetScale()).xyxy() * context->scissor.in).rintersect(GSVector4i(rt->GetSize()).zwxy()); m_dev->OMSetRenderTargets(rt, ds, &scissor); m_dev->PSSetShaderResources(tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL); diff --git a/plugins/GSdx/GSRendererDX10.cpp b/plugins/GSdx/GSRendererDX10.cpp index e4da9d823f..3f44ea790f 100644 --- a/plugins/GSdx/GSRendererDX10.cpp +++ b/plugins/GSdx/GSRendererDX10.cpp @@ -199,9 +199,9 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds) GSDevice10* dev = (GSDevice10*)m_dev; - const GSVector2i& size = rt->m_size; + const GSVector2i& size = rt->GetSize(); - if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y)) + if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y, rt->IsMSAA())) { // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows @@ -217,7 +217,7 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds) // ia - GSVector4 s = GSVector4(rt->m_scale.x / size.x, rt->m_scale.y / size.y); + GSVector4 s = GSVector4(rt->GetScale().x / size.x, rt->GetScale().y / size.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); @@ -245,7 +245,9 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds) // ps - dev->PSSetShaderResources(rt, NULL); + GSTexture* rt2 = rt->IsMSAA() ? dev->Resolve(rt) : rt; + + dev->PSSetShaderResources(rt2, NULL); dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL); dev->PSSetSamplerState(dev->m_convert.pt, NULL); @@ -258,5 +260,7 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds) dev->EndScene(); dev->Recycle(t); + + if(rt2 != rt) dev->Recycle(rt2); } } diff --git a/plugins/GSdx/GSRendererDX11.cpp b/plugins/GSdx/GSRendererDX11.cpp index ac14308100..55930cdef2 100644 --- a/plugins/GSdx/GSRendererDX11.cpp +++ b/plugins/GSdx/GSRendererDX11.cpp @@ -199,9 +199,9 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds) GSDevice11* dev = (GSDevice11*)m_dev; - const GSVector2i& size = rt->m_size; + const GSVector2i& size = rt->GetSize(); - if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y)) + if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y, rt->IsMSAA())) { // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows @@ -217,7 +217,7 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds) // ia - GSVector4 s = GSVector4(rt->m_scale.x / size.x, rt->m_scale.y / size.y); + GSVector4 s = GSVector4(rt->GetScale().x / size.x, rt->GetScale().y / size.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); @@ -245,7 +245,9 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds) // ps - dev->PSSetShaderResources(rt, NULL); + GSTexture* rt2 = rt->IsMSAA() ? dev->Resolve(rt) : rt; + + dev->PSSetShaderResources(rt2, NULL); dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL); dev->PSSetSamplerState(dev->m_convert.pt, NULL); @@ -258,5 +260,7 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds) dev->EndScene(); dev->Recycle(t); + + if(rt2 != rt) dev->Recycle(rt2); } } diff --git a/plugins/GSdx/GSRendererDX9.cpp b/plugins/GSdx/GSRendererDX9.cpp index 819c81d2e1..6e3774bdc5 100644 --- a/plugins/GSdx/GSRendererDX9.cpp +++ b/plugins/GSdx/GSRendererDX9.cpp @@ -216,9 +216,9 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds) GSDevice9* dev = (GSDevice9*)m_dev; - const GSVector2i& size = rt->m_size; + const GSVector2i& size = rt->GetSize(); - if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y)) + if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y, rt->IsMSAA())) { // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows @@ -234,7 +234,7 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds) // ia - GSVector4 s = GSVector4(rt->m_scale.x / size.x, rt->m_scale.y / size.y); + GSVector4 s = GSVector4(rt->GetScale().x / size.x, rt->GetScale().y / size.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); @@ -258,7 +258,9 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds) // ps - dev->PSSetShaderResources(rt, NULL); + GSTexture* rt2 = rt->IsMSAA() ? dev->Resolve(rt) : rt; + + dev->PSSetShaderResources(rt2, NULL); dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL, 0); dev->PSSetSamplerState(&dev->m_convert.pt); @@ -271,6 +273,8 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds) dev->EndScene(); dev->Recycle(t); + + if(rt2 != rt) dev->Recycle(rt2); } } @@ -287,7 +291,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt) // ia - GSVector4 s = GSVector4(rt->m_scale.x / rt->m_size.x, rt->m_scale.y / rt->m_size.y); + GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight()); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); diff --git a/plugins/GSdx/GSTexture.cpp b/plugins/GSdx/GSTexture.cpp index ebdbadedfa..055fc1c8a7 100644 --- a/plugins/GSdx/GSTexture.cpp +++ b/plugins/GSdx/GSTexture.cpp @@ -21,3 +21,11 @@ #include "StdAfx.h" #include "GSTexture.h" + +GSTexture::GSTexture() + : m_scale(1, 1) + , m_size(0, 0) + , m_type(None) + , m_msaa(false) +{ +} diff --git a/plugins/GSdx/GSTexture.h b/plugins/GSdx/GSTexture.h index c661b28405..74986c7224 100644 --- a/plugins/GSdx/GSTexture.h +++ b/plugins/GSdx/GSTexture.h @@ -25,30 +25,38 @@ class GSTexture { -public: +protected: GSVector2 m_scale; GSVector2i m_size; + int m_type; + int m_format; + bool m_msaa; +public: struct GSMap {uint8* bits; int pitch;}; enum {None, RenderTarget, DepthStencil, Texture, Offscreen}; public: - GSTexture() : m_scale(1, 1), m_size(0, 0) {} + GSTexture(); virtual ~GSTexture() {} virtual operator bool() {ASSERT(0); return false;} - virtual int GetType() const = 0; - virtual int GetFormat() const = 0; - virtual bool Update(const GSVector4i& r, const void* data, int pitch) = 0; virtual bool Map(GSMap& m, const GSVector4i* r = NULL) = 0; virtual void Unmap() = 0; virtual bool Save(const string& fn, bool dds = false) = 0; + GSVector2 GetScale() const {return m_scale;} + void SetScale(const GSVector2& scale) {m_scale = scale;} + int GetWidth() const {return m_size.x;} int GetHeight() const {return m_size.y;} - GSVector2i GetSize() const {return m_size;} + + int GetType() const {return m_type;} + int GetFormat() const {return m_format;} + + bool IsMSAA() const {return m_msaa;} }; diff --git a/plugins/GSdx/GSTexture10.cpp b/plugins/GSdx/GSTexture10.cpp index 4d02646f42..3d7a4be8eb 100644 --- a/plugins/GSdx/GSTexture10.cpp +++ b/plugins/GSdx/GSTexture10.cpp @@ -32,20 +32,15 @@ GSTexture10::GSTexture10(ID3D10Texture2D* texture) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; -} -int GSTexture10::GetType() const -{ - if(m_desc.BindFlags & D3D10_BIND_RENDER_TARGET) return GSTexture::RenderTarget; - if(m_desc.BindFlags & D3D10_BIND_DEPTH_STENCIL) return GSTexture::DepthStencil; - if(m_desc.BindFlags & D3D10_BIND_SHADER_RESOURCE) return GSTexture::Texture; - if(m_desc.Usage == D3D10_USAGE_STAGING) return GSTexture::Offscreen; - return GSTexture::None; -} + if(m_desc.BindFlags & D3D10_BIND_RENDER_TARGET) m_type = RenderTarget; + else if(m_desc.BindFlags & D3D10_BIND_DEPTH_STENCIL) m_type = DepthStencil; + else if(m_desc.BindFlags & D3D10_BIND_SHADER_RESOURCE) m_type = Texture; + else if(m_desc.Usage == D3D10_USAGE_STAGING) m_type = Offscreen; -int GSTexture10::GetFormat() const -{ - return m_desc.Format; + m_format = (int)m_desc.Format; + + m_msaa = m_desc.SampleDesc.Count > 1; } bool GSTexture10::Update(const GSVector4i& r, const void* data, int pitch) @@ -162,6 +157,8 @@ GSTexture10::operator ID3D10ShaderResourceView*() { if(!m_srv && m_dev && m_texture) { + ASSERT(!m_msaa); + D3D10_SHADER_RESOURCE_VIEW_DESC* desc = NULL; if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS) diff --git a/plugins/GSdx/GSTexture10.h b/plugins/GSdx/GSTexture10.h index c98ebcd051..da9d927279 100644 --- a/plugins/GSdx/GSTexture10.h +++ b/plugins/GSdx/GSTexture10.h @@ -35,9 +35,6 @@ class GSTexture10 : public GSTexture public: explicit GSTexture10(ID3D10Texture2D* texture); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSTexture11.cpp b/plugins/GSdx/GSTexture11.cpp index 3b3073dacd..286aec3b70 100644 --- a/plugins/GSdx/GSTexture11.cpp +++ b/plugins/GSdx/GSTexture11.cpp @@ -34,20 +34,15 @@ GSTexture11::GSTexture11(ID3D11Texture2D* texture) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; -} -int GSTexture11::GetType() const -{ - if(m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) return GSTexture::RenderTarget; - if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) return GSTexture::DepthStencil; - if(m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) return GSTexture::Texture; - if(m_desc.Usage == D3D11_USAGE_STAGING) return GSTexture::Offscreen; - return GSTexture::None; -} + if(m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) m_type = RenderTarget; + else if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) m_type = DepthStencil; + else if(m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) m_type = Texture; + else if(m_desc.Usage == D3D11_USAGE_STAGING) m_type = Offscreen; -int GSTexture11::GetFormat() const -{ - return m_desc.Format; + m_format = (int)m_desc.Format; + + m_msaa = m_desc.SampleDesc.Count > 1; } bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch) @@ -164,6 +159,8 @@ GSTexture11::operator ID3D11ShaderResourceView*() { if(!m_srv && m_dev && m_texture) { + ASSERT(!m_msaa); + m_dev->CreateShaderResourceView(m_texture, NULL, &m_srv); } diff --git a/plugins/GSdx/GSTexture11.h b/plugins/GSdx/GSTexture11.h index fc2a7061b6..8aad1258bd 100644 --- a/plugins/GSdx/GSTexture11.h +++ b/plugins/GSdx/GSTexture11.h @@ -36,9 +36,6 @@ class GSTexture11 : public GSTexture public: explicit GSTexture11(ID3D11Texture2D* texture); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSTexture7.cpp b/plugins/GSdx/GSTexture7.cpp index a3055c014a..3f52beecd4 100644 --- a/plugins/GSdx/GSTexture7.cpp +++ b/plugins/GSdx/GSTexture7.cpp @@ -23,8 +23,7 @@ #include "GSTexture7.h" GSTexture7::GSTexture7(int type, IDirectDrawSurface7* system) - : m_type(type) - , m_system(system) + : m_system(system) { memset(&m_desc, 0, sizeof(m_desc)); @@ -34,11 +33,14 @@ GSTexture7::GSTexture7(int type, IDirectDrawSurface7* system) m_size.x = (int)m_desc.dwWidth; m_size.y = (int)m_desc.dwHeight; + + m_type = type; + + m_format = (int)m_desc.ddpfPixelFormat.dwFourCC; } GSTexture7::GSTexture7(int type, IDirectDrawSurface7* system, IDirectDrawSurface7* video) - : m_type(type) - , m_system(system) + : m_system(system) , m_video(video) { memset(&m_desc, 0, sizeof(m_desc)); @@ -49,16 +51,10 @@ GSTexture7::GSTexture7(int type, IDirectDrawSurface7* system, IDirectDrawSurface m_size.x = (int)m_desc.dwWidth; m_size.y = (int)m_desc.dwHeight; -} -int GSTexture7::GetType() const -{ - return m_type; -} + m_type = type; -int GSTexture7::GetFormat() const -{ - return (int)m_desc.ddpfPixelFormat.dwFourCC; + m_format = (int)m_desc.ddpfPixelFormat.dwFourCC; } bool GSTexture7::Update(const GSVector4i& r, const void* data, int pitch) diff --git a/plugins/GSdx/GSTexture7.h b/plugins/GSdx/GSTexture7.h index 7d73cb0670..1daf555366 100644 --- a/plugins/GSdx/GSTexture7.h +++ b/plugins/GSdx/GSTexture7.h @@ -26,7 +26,6 @@ class GSTexture7 : public GSTexture { - int m_type; CComPtr m_system; CComPtr m_video; DDSURFACEDESC2 m_desc; @@ -35,9 +34,6 @@ public: GSTexture7(int type, IDirectDrawSurface7* system); GSTexture7(int type, IDirectDrawSurface7* system, IDirectDrawSurface7* video); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSTexture9.cpp b/plugins/GSdx/GSTexture9.cpp index 26106feb34..477ee37a27 100644 --- a/plugins/GSdx/GSTexture9.cpp +++ b/plugins/GSdx/GSTexture9.cpp @@ -38,6 +38,15 @@ GSTexture9::GSTexture9(IDirect3DSurface9* surface) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; + + if(m_desc.Usage & D3DUSAGE_RENDERTARGET) m_type = RenderTarget; + else if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) m_type = DepthStencil; + else if(m_desc.Pool == D3DPOOL_MANAGED) m_type = Texture; + else if(m_desc.Pool == D3DPOOL_SYSTEMMEM) m_type = Offscreen; + + m_format = (int)m_desc.Format; + + m_msaa = m_desc.MultiSampleType != D3DMULTISAMPLE_NONE; } GSTexture9::GSTexture9(IDirect3DTexture9* texture) @@ -52,26 +61,21 @@ GSTexture9::GSTexture9(IDirect3DTexture9* texture) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; + + if(m_desc.Usage & D3DUSAGE_RENDERTARGET) m_type = RenderTarget; + else if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) m_type = DepthStencil; + else if(m_desc.Pool == D3DPOOL_MANAGED) m_type = Texture; + else if(m_desc.Pool == D3DPOOL_SYSTEMMEM) m_type = Offscreen; + + m_format = (int)m_desc.Format; + + m_msaa = m_desc.MultiSampleType > 1; } GSTexture9::~GSTexture9() { } -int GSTexture9::GetType() const -{ - if(m_desc.Usage & D3DUSAGE_RENDERTARGET) return GSTexture::RenderTarget; - if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) return GSTexture::DepthStencil; - if(m_desc.Pool == D3DPOOL_MANAGED) return GSTexture::Texture; - if(m_desc.Pool == D3DPOOL_SYSTEMMEM) return GSTexture::Offscreen; - return GSTexture::None; -} - -int GSTexture9::GetFormat() const -{ - return m_desc.Format; -} - bool GSTexture9::Update(const GSVector4i& r, const void* data, int pitch) { if(m_surface) diff --git a/plugins/GSdx/GSTexture9.h b/plugins/GSdx/GSTexture9.h index a2a0297dba..e28b085278 100644 --- a/plugins/GSdx/GSTexture9.h +++ b/plugins/GSdx/GSTexture9.h @@ -35,9 +35,6 @@ public: explicit GSTexture9(IDirect3DTexture9* texture); virtual ~GSTexture9(); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index d9a254e9c0..29835c9857 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -190,8 +190,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int if(ww > 0 && hh > 0) { - dst->m_texture->m_scale.x = (float)w / ww; - dst->m_texture->m_scale.y = (float)h / hh; + dst->m_texture->SetScale(GSVector2((float)w / ww, (float)h / hh)); } } @@ -527,10 +526,19 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con dst->Update(); + GSTexture* tmp = NULL; + + if(dst->m_texture->IsMSAA()) + { + tmp = dst->m_texture; + + dst->m_texture = m_renderer->m_dev->Resolve(dst->m_texture); + } + // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) - int w = (int)(dst->m_texture->m_scale.x * tw); - int h = (int)(dst->m_texture->m_scale.y * th); + int w = (int)(dst->m_texture->GetScale().x * tw); + int h = (int)(dst->m_texture->GetScale().y * th); GSVector2i dstsize = dst->m_texture->GetSize(); @@ -542,10 +550,10 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // ASSERT(dst->m_TEX0.TBW > TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO) - src->m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y); + src->m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y, false); GSVector4 size = GSVector4(dstsize).xyxy(); - GSVector4 scale = GSVector4(dst->m_texture->m_scale).xyxy(); + GSVector4 scale = GSVector4(dst->m_texture->GetScale()).xyxy(); int bw = 64; int bh = TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24 ? 32 : 64; @@ -588,28 +596,28 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // width/height conversion - GSVector2 scale = dst->m_texture->m_scale; + GSVector2 scale = dst->m_texture->GetScale(); GSVector4 dr(0, 0, w, h); if(w > dstsize.x) { scale.x = (float)dstsize.x / tw; - dr.z = (float)dstsize.x * scale.x / dst->m_texture->m_scale.x; + dr.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x; w = dstsize.x; } if(h > dstsize.y) { scale.y = (float)dstsize.y / th; - dr.w = (float)dstsize.y * scale.y / dst->m_texture->m_scale.y; + dr.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y; h = dstsize.y; } GSVector4 sr(0, 0, w, h); GSTexture* st = src->m_texture ? src->m_texture : dst->m_texture; - GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h); + GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h, false); if(!src->m_texture) { @@ -622,8 +630,8 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } else { - sr.z /= st->m_size.x; - sr.w /= st->m_size.y; + sr.z /= st->GetWidth(); + sr.w /= st->GetHeight(); m_renderer->m_dev->StretchRect(st, sr, dt, dr); } @@ -635,7 +643,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_texture = dt; } - src->m_texture->m_scale = scale; + src->m_texture->SetScale(scale); switch(TEX0.PSM) { @@ -664,6 +672,13 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); break; } + + if(tmp != NULL) + { + m_renderer->m_dev->Recycle(dst->m_texture); + + dst->m_texture = tmp; + } } if(src->m_texture == NULL) @@ -697,13 +712,13 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int if(type == RenderTarget) { - t->m_texture = m_renderer->m_dev->CreateRenderTarget(w, h); + t->m_texture = m_renderer->m_dev->CreateRenderTarget(w, h, true); t->m_used = true; // FIXME } else if(type == DepthStencil) { - t->m_texture = m_renderer->m_dev->CreateDepthStencil(w, h); + t->m_texture = m_renderer->m_dev->CreateDepthStencil(w, h, true); } if(t->m_texture == NULL) @@ -1008,7 +1023,7 @@ void GSTextureCache::Target::Update() // m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4); - m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->m_scale).xyxy()); + m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy()); m_renderer->m_dev->Recycle(t); } diff --git a/plugins/GSdx/GSTextureCache10.cpp b/plugins/GSdx/GSTextureCache10.cpp index 4e02c95e84..cc71f28341 100644 --- a/plugins/GSdx/GSTextureCache10.cpp +++ b/plugins/GSdx/GSTextureCache10.cpp @@ -60,7 +60,7 @@ void GSTextureCache10::Read(Target* t, const GSVector4i& r) int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->m_scale).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; diff --git a/plugins/GSdx/GSTextureCache11.cpp b/plugins/GSdx/GSTextureCache11.cpp index 0af2c06da7..9fd664d026 100644 --- a/plugins/GSdx/GSTextureCache11.cpp +++ b/plugins/GSdx/GSTextureCache11.cpp @@ -60,7 +60,7 @@ void GSTextureCache11::Read(Target* t, const GSVector4i& r) int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->m_scale).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; diff --git a/plugins/GSdx/GSTextureCache9.cpp b/plugins/GSdx/GSTextureCache9.cpp index f7b8c14c0e..571db7a759 100644 --- a/plugins/GSdx/GSTextureCache9.cpp +++ b/plugins/GSdx/GSTextureCache9.cpp @@ -60,7 +60,7 @@ void GSTextureCache9::Read(Target* t, const GSVector4i& r) int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->m_scale).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h)) { diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index 49c4f7b2da..5b109e0abd 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -25,8 +25,6 @@ GSTextureOGL::GSTextureOGL(GLuint texture, int type, int width, int height, int format) : m_texture(texture) - , m_type(type) - , m_format(format) { m_size.x = width; m_size.y = height; @@ -34,6 +32,10 @@ GSTextureOGL::GSTextureOGL(GLuint texture, int type, int width, int height, int // TODO: offscreen type should be just a memory array, also returned in Map glGenBuffers(1, &m_pbo); GSDeviceOGL::CheckError(); + + m_type = type; + + m_format = format; } GSTextureOGL::~GSTextureOGL() @@ -57,16 +59,6 @@ GSTextureOGL::~GSTextureOGL() } } -int GSTextureOGL::GetType() const -{ - return m_type; -} - -int GSTextureOGL::GetFormat() const -{ - return m_format; -} - bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pbo); GSDeviceOGL::CheckError(); diff --git a/plugins/GSdx/GSTextureOGL.h b/plugins/GSdx/GSTextureOGL.h index 5a992223d1..f6169cd4fc 100644 --- a/plugins/GSdx/GSTextureOGL.h +++ b/plugins/GSdx/GSTextureOGL.h @@ -35,9 +35,6 @@ public: GSTextureOGL(GLuint texture, int type, int width, int height, int format = 0); virtual ~GSTextureOGL(); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSdx_vs2010.vcxproj b/plugins/GSdx/GSdx_vs2010.vcxproj index e10b7f43a0..9ad2e1e88f 100644 --- a/plugins/GSdx/GSdx_vs2010.vcxproj +++ b/plugins/GSdx/GSdx_vs2010.vcxproj @@ -138,12 +138,10 @@ DynamicLibrary - false MultiByte DynamicLibrary - false MultiByte true @@ -160,7 +158,6 @@ DynamicLibrary - false MultiByte @@ -171,7 +168,6 @@ DynamicLibrary - false MultiByte true @@ -183,7 +179,6 @@ DynamicLibrary - false MultiByte true @@ -195,7 +190,6 @@ DynamicLibrary - false MultiByte @@ -205,7 +199,6 @@ DynamicLibrary - false MultiByte @@ -215,7 +208,6 @@ DynamicLibrary - false MultiByte true From 570e4ce85ecbbd49067a53c02b401093d921490f Mon Sep 17 00:00:00 2001 From: gabest11 Date: Mon, 3 Aug 2009 00:17:35 +0000 Subject: [PATCH 15/18] GSdx: fixed dx9 + msaa=0 git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1598 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSDevice9.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index e47ca99785..a3b45ed01e 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -490,8 +490,6 @@ GSTexture* GSDevice9::Create(int type, int w, int h, bool msaa, int format) CComPtr texture; CComPtr surface; - // TODO: msaa - switch(type) { case GSTexture::RenderTarget: @@ -499,7 +497,8 @@ GSTexture* GSDevice9::Create(int type, int w, int h, bool msaa, int format) else hr = m_dev->CreateTexture(w, h, 1, D3DUSAGE_RENDERTARGET, (D3DFORMAT)format, D3DPOOL_DEFAULT, &texture, NULL); break; case GSTexture::DepthStencil: - hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, (D3DMULTISAMPLE_TYPE)m_msaa_desc.Count, m_msaa_desc.Quality, FALSE, &surface, NULL); + if(msaa) hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, (D3DMULTISAMPLE_TYPE)m_msaa_desc.Count, m_msaa_desc.Quality, FALSE, &surface, NULL); + else hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, D3DMULTISAMPLE_NONE, 0, FALSE, &surface, NULL); break; case GSTexture::Texture: hr = m_dev->CreateTexture(w, h, 1, 0, (D3DFORMAT)format, D3DPOOL_MANAGED, &texture, NULL); From a2c4aad28e579e1912da9875aaa0ae2138004254 Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Mon, 3 Aug 2009 00:43:14 +0000 Subject: [PATCH 16/18] microVU: - Normal Clamp Mode clamps a few more stuff (Fixes Ice Age 3 falling through floor bug) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1599 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/x86/microVU_Compile.inl | 16 ++++++++++++++-- pcsx2/x86/microVU_Misc.h | 3 ++- pcsx2/x86/microVU_Upper.inl | 6 ++++-- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 28555dd9fd..e7c5769bb8 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -46,7 +46,6 @@ #define incQ() { mVU->q = (mVU->q+1) & 1; } #define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); } #define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); } -#define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } } #define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); } //------------------------------------------------------------------ @@ -76,6 +75,19 @@ microVUt(void) doSwapOp(mV) { else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } } +microVUt(void) doIbit(mV) { + if (mVUup.iBit) { + incPC(-1); + if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) { + Console::Status("microVU%d: Clamping I Reg", params mVU->index); + int tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg + MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI); + } + else MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); + incPC(1); + } +} + // Used by mVUsetupRange microVUt(void) mVUcheckIsSame(mV) { @@ -434,7 +446,7 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { for (x = 0; x < endCount; x++) { if (mVUinfo.isEOB) { x = 0xffff; } if (mVUup.mBit) { OR32ItoM((uptr)&mVU->regs->flags, VUFLAG_MFLAGSET); } - if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(); } + if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(mVU); } else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); } else { doSwapOp(mVU); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index 195c22a415..44c17a32fe 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -80,9 +80,10 @@ declareAllVariables #define _Z ((mVU->code>>22) & 0x1) #define _W ((mVU->code>>21) & 0x1) -#define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF )) +#define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF)) #define _XYZW_SS (_X+_Y+_Z+_W==1) #define _XYZW_SS2 (_XYZW_SS && (_X_Y_Z_W != 8)) +#define _XYZW_PS (_X_Y_Z_W == 0xf) #define _bc_ (mVU->code & 0x3) #define _bc_x ((mVU->code & 0x3) == 0) diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index 920ad96da2..d7c7f69acc 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -152,8 +152,10 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co } else { Fs = mVU->regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); } - opCase2 { if (opType == 2) { mVUclamp1(Fs, -1, _X_Y_Z_W); } } // Clamp Needed for alot of games (TOTA, DoM, etc...) - + opCase1 { if((opType == 2) && _XYZW_PS) { mVUclamp1(Ft, -1, _X_Y_Z_W); } } // Clamp Needed for Ice Age 3 (VU0) + opCase1 { if((opType == 2) && _XYZW_PS) { mVUclamp1(Fs, -1, _X_Y_Z_W); } } // Clamp Needed for Ice Age 3 (VU0) + opCase2 { if (opType == 2) { mVUclamp1(Fs, -1, _X_Y_Z_W); } } // Clamp Needed for alot of games (TOTA, DoM, etc...) + if (_XYZW_SS) SSE_SS[opType](mVU, Fs, Ft, -1, -1); else SSE_PS[opType](mVU, Fs, Ft, -1, -1); From 3435ca8b749fd9d14d0559e2f0c5b36996f8f3d8 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Mon, 3 Aug 2009 11:01:36 +0000 Subject: [PATCH 17/18] GSdx: fixed the crash when booting through the bios git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1600 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.h | 31 ++++++++++++++++++++++--------- plugins/GSdx/GSClut.h | 2 +- plugins/GSdx/GSRenderer.h | 2 +- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index ef73ea85f5..657f96d5d0 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -377,15 +377,28 @@ REG64_(GSReg, IMR) REG_END REG64_(GSReg, PMODE) - uint32 EN1:1; - uint32 EN2:1; - uint32 CRTMD:3; - uint32 MMOD:1; - uint32 AMOD:1; - uint32 SLBG:1; - uint32 ALP:8; - uint32 _PAD:16; - uint32 _PAD1:32; +union +{ + struct + { + uint32 EN1:1; + uint32 EN2:1; + uint32 CRTMD:3; + uint32 MMOD:1; + uint32 AMOD:1; + uint32 SLBG:1; + uint32 ALP:8; + uint32 _PAD:16; + uint32 _PAD1:32; + }; + + struct + { + uint32 EN:2; + uint32 _PAD2:30; + uint32 _PAD3:32; + }; +}; REG_END REG64_(GSReg, SIGLBLID) diff --git a/plugins/GSdx/GSClut.h b/plugins/GSdx/GSClut.h index e4301ac6fd..1ac00ba328 100644 --- a/plugins/GSdx/GSClut.h +++ b/plugins/GSdx/GSClut.h @@ -71,7 +71,7 @@ __declspec(align(16)) class GSClut : public GSAlignedClass<16> template void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); template void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) {ASSERT(0);} // xenosaga 3 + void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) {} // xenosaga 3, bios static void WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut); static void WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut); diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index d671bdc7ea..484a144561 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -80,7 +80,7 @@ public: virtual bool CanUpscale() { - return !m_nativeres; + return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition) } }; From a41f80cfcaba47b312224605c4f10660af316490 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Mon, 3 Aug 2009 11:05:24 +0000 Subject: [PATCH 18/18] memcpy on PcsxConfig made pcsx2 crash upon exiting, when the string members were deallocated, I guess they were added later. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1601 96395faa-99c1-11dd-bbfe-3dabce05a288 --- pcsx2/windows/WinMain.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/windows/WinMain.cpp b/pcsx2/windows/WinMain.cpp index c3e0241878..9451239bf8 100644 --- a/pcsx2/windows/WinMain.cpp +++ b/pcsx2/windows/WinMain.cpp @@ -195,7 +195,7 @@ void WinRun() // Load the command line overrides for plugins. // Back up the user's preferences in winConfig. - memcpy( &winConfig, &Config, sizeof( PcsxConfig ) ); + winConfig = Config; // don't use memcpy _doPluginOverride( "GS", g_Startup.gsdll, Config.Plugins.GS ); _doPluginOverride( "CDVD", g_Startup.cdvddll, Config.Plugins.CDVD );