diff --git a/common/include/Pcsx2Config.h b/common/include/Pcsx2Config.h index e7a891d8f0..94cdec2542 100644 --- a/common/include/Pcsx2Config.h +++ b/common/include/Pcsx2Config.h @@ -118,6 +118,10 @@ extern SessionOverrideFlags g_Session; #define EE_CONST_PROP // rec2 - enables constant propagation (faster) +// These are broken, so don't enable. +//#define PCSX2_CACHE_EMU_MEM +//#define ENABLECACHE + // Memory Card configuration, per slot. struct McdConfig { diff --git a/common/include/x86emitter/legacy_instructions.h b/common/include/x86emitter/legacy_instructions.h index 192601504b..3cdd3422b9 100644 --- a/common/include/x86emitter/legacy_instructions.h +++ b/common/include/x86emitter/legacy_instructions.h @@ -1363,6 +1363,7 @@ extern void SSE4_PMINSD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMAXUD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMINUD_M128_to_XMM(x86SSERegType to, uptr from); extern void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +extern void SSE4_PTEST_XMM_to_XMM(x86SSERegType to, x86SSERegType from); //********************* // 3DNOW instructions * diff --git a/common/src/x86emitter/legacy_sse.cpp b/common/src/x86emitter/legacy_sse.cpp index 93bc311d2a..c3c4163264 100644 --- a/common/src/x86emitter/legacy_sse.cpp +++ b/common/src/x86emitter/legacy_sse.cpp @@ -334,8 +334,6 @@ emitterT void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) emitterT void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ) { xPSHUF.HW( xRegisterSSE(to), (void*)from, imm8 ); } -emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } - emitterT void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.LPS( xRegisterSSE(to), (void*)from ); } emitterT void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { xUNPCK.LPS( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ) { xUNPCK.HPS( xRegisterSSE(to), (void*)from ); } @@ -360,7 +358,7 @@ emitterT void SSE2_PMADDWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { emitterT void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xHADD.PS( xRegisterSSE(to), xRegisterSSE(from) ); } emitterT void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from) { xHADD.PS( xRegisterSSE(to), (void*)from ); } -emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) { xPINSR.D( xRegisterSSE(to), xRegister32(from), imm8 ); } +emitterT void SSE4_PINSRD_R32_to_XMM(x86SSERegType to, x86IntRegType from, u8 imm8) { xPINSR.D( xRegisterSSE(to), xRegister32(from), imm8 ); } emitterT void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) { xINSERTPS( xRegisterSSE(to), xRegisterSSE(from), imm8 ); } emitterT void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) { xEXTRACTPS( xRegister32(to), xRegisterSSE(from), imm8 ); } @@ -374,6 +372,8 @@ emitterT void SSE4_BLENDVPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { emitterT void SSE4_BLENDVPS_M128_to_XMM(x86SSERegType to, uptr from) { xBLEND.VPS( xRegisterSSE(to), (void*)from ); } emitterT void SSE4_PMOVSXDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMOVSX.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PMULDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPMUL.DQ( xRegisterSSE(to), xRegisterSSE(from) ); } +emitterT void SSE4_PTEST_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { xPTEST(xRegisterSSE(to), xRegisterSSE(from)); } emitterT void SSE_LDMXCSR( uptr from ) { xLDMXCSR( (u32*)from ); } diff --git a/common/src/x86emitter/simd.cpp b/common/src/x86emitter/simd.cpp index 7a4b02f24e..323ec7302f 100644 --- a/common/src/x86emitter/simd.cpp +++ b/common/src/x86emitter/simd.cpp @@ -114,7 +114,7 @@ const SimdImpl_DestRegEither<0x66,0xef> xPXOR; // [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag // only if all bits in the result are 0. PTEST also sets the CF flag according // to the following condition: (xmm2/m128 AND NOT xmm1) == 0; -extern const Internal::SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; +const SimdImpl_DestRegSSE<0x66,0x1738> xPTEST; const SimdImpl_Compare xCMPEQ; const SimdImpl_Compare xCMPLT; diff --git a/pcsx2/CDVD/CDVDisoReader.cpp b/pcsx2/CDVD/CDVDisoReader.cpp index 4ded3aafde..c4a9e52d80 100644 --- a/pcsx2/CDVD/CDVDisoReader.cpp +++ b/pcsx2/CDVD/CDVDisoReader.cpp @@ -43,7 +43,8 @@ void CALLBACK ISOclose() s32 CALLBACK ISOopen(const char* pTitle) { - //if (pTitle != NULL) strcpy(isoFileName, pTitle); + if ((pTitle != NULL) && (strlen(pTitle) > 0)) + strcpy(isoFileName, pTitle); ISOclose(); // just in case diff --git a/pcsx2/Cache.cpp b/pcsx2/Cache.cpp index 7a996a81f7..1e066755ee 100644 --- a/pcsx2/Cache.cpp +++ b/pcsx2/Cache.cpp @@ -26,9 +26,8 @@ _cacheS pCache[64]; namespace R5900{ namespace Interpreter { - -// fixme - this code no longer compiles if PCSX2_CACHE_EMU_MEM is defined - do we need it any more? #ifdef PCSX2_CACHE_EMU_MEM + int getFreeCache(u32 mem, int mode, int * way) { u8 * out; u32 paddr; @@ -37,15 +36,16 @@ int getFreeCache(u32 mem, int mode, int * way) { int number; int i = (mem >> 6) & 0x3F; - paddr = memLUTR[mem >> 12]; - taddr[0] = memLUTW[pCache[i].tag[0]>>12]; - taddr[1] = memLUTW[pCache[i].tag[1]>>12]; + paddr = getMemR(mem); + taddr[0] = getMemW(pCache[i].tag[0]); + taddr[1] = getMemW(pCache[i].tag[1]); if (taddr[0] == paddr && (pCache[i].tag[0] & 0x20)) { *way = 0; return i; - }else if(taddr[1] == paddr && (pCache[i].tag[1] & 0x20)) + } + else if(taddr[1] == paddr && (pCache[i].tag[1] & 0x20)) { *way = 1; return i; @@ -66,8 +66,6 @@ int getFreeCache(u32 mem, int mode, int * way) { ((u64*)out)[6] = ((u64*)pCache[i].data[number][3].b8._8)[0]; ((u64*)out)[7] = ((u64*)pCache[i].data[number][3].b8._8)[1]; } - - if(mode == 1) { @@ -89,8 +87,10 @@ int getFreeCache(u32 mem, int mode, int * way) { ((u64*)pCache[i].data[number][3].b8._8)[0] = ((u64*)out)[6]; ((u64*)pCache[i].data[number][3].b8._8)[1] = ((u64*)out)[7]; - if(pCache[i].tag[number] & 0x10) pCache[i].tag[number] &= ~(0x10); - else pCache[i].tag[number] |= 0x10; + if(pCache[i].tag[number] & 0x10) + pCache[i].tag[number] &= ~(0x10); + else + pCache[i].tag[number] |= 0x10; pCache[i].tag[number] |= 0x20; *way = number; @@ -163,9 +163,9 @@ void CACHE() { int index = (addr >> 6) & 0x3F; u32 paddr[2]; int way; - u32 taddr = memLUTR[addr >> 12]; - paddr[0] = memLUTW[pCache[index].tag[0] >> 12]; - paddr[1] = memLUTW[pCache[index].tag[1] >> 12]; + u32 taddr = getMemR(addr); + paddr[0] = getMemW(pCache[index].tag[0]); + paddr[1] = getMemW(pCache[index].tag[1]); if(paddr[0] == taddr && (pCache[index].tag[0] & 0x20)) { @@ -199,9 +199,9 @@ void CACHE() { int index = (addr >> 6) & 0x3F; u32 paddr[2]; int way; - u32 taddr = memLUTW[addr >> 12]; - paddr[0] = memLUTW[pCache[index].tag[0] >> 12]; - paddr[1] = memLUTW[pCache[index].tag[1] >> 12]; + u32 taddr = getMemW(addr); + paddr[0] = getMemW(pCache[index].tag[0]); + paddr[1] = getMemW(pCache[index].tag[1]); if(paddr[0] == taddr && (pCache[index].tag[0] & 0x20)) { @@ -250,9 +250,9 @@ void CACHE() { int index = (addr >> 6) & 0x3F; u32 paddr[2]; int way; - u32 taddr = memLUTW[addr >> 12]; - paddr[0] = memLUTW[pCache[index].tag[0] >> 12]; - paddr[1] = memLUTW[pCache[index].tag[1] >> 12]; + u32 taddr = getMemW(addr); + paddr[0] = getMemW(pCache[index].tag[0]); + paddr[1] = getMemW(pCache[index].tag[1]); if(paddr[0] == taddr && (pCache[index].tag[0] & 0x20)) { @@ -360,7 +360,7 @@ void CACHE() { if(pCache[index].tag[way] & 0x60) // Dirty { - u32 paddr = memLUTW[pCache[index].tag[way] >> 12]; + u32 paddr = getMemW(pCache[index].tag[way]); char * t = (char *)(paddr); out = (u8*)(t + (addr & 0xFC0)); ((u64*)out)[0] = ((u64*)pCache[index].data[way][0].b8._8)[0]; diff --git a/pcsx2/Cache.h b/pcsx2/Cache.h index e9126fec69..de0ef9ca53 100644 --- a/pcsx2/Cache.h +++ b/pcsx2/Cache.h @@ -45,4 +45,15 @@ void writeCache64(u32 mem, u64 value); void writeCache128(u32 mem, u64 *value); u8 *readCache(u32 mem); +// Fixme - these two functions do nothing, and the cache code relies on these two functions. +static __forceinline u32 getMemR(s32 mem) +{ + return 0;//memLUTR[mem >> 12]; +} + +static __forceinline u32 getMemW(s32 mem) +{ + return 0;//memLUTW[mem>>12]; +} + #endif /* __CACHE_H__ */ diff --git a/pcsx2/Gif.cpp b/pcsx2/Gif.cpp index a61aa8b38b..34729262bc 100644 --- a/pcsx2/Gif.cpp +++ b/pcsx2/Gif.cpp @@ -41,7 +41,7 @@ enum gifstate_t // Should be a gifstate_t rather then int, but I don't feel like possibly interfering with savestates right now. static int gifstate = GIF_STATE_READY; -static u64 s_gstag = 0; // used for querying the last tag +//static u64 s_gstag = 0; // used for querying the last tag // This should be a bool, as should the return value of hwDmacSrcChainWithStack. // Next time I feel like breaking the save state, it will be. --arcum42 diff --git a/pcsx2/HostGui.h b/pcsx2/HostGui.h index 684a26dc7c..53f6b8a7e8 100644 --- a/pcsx2/HostGui.h +++ b/pcsx2/HostGui.h @@ -23,10 +23,15 @@ enum StartupMode { - BootMode_Bios, - BootMode_Quick, - BootMode_Elf, - BootMode_Normal + BootMode_Normal = 0, + BootMode_Elf = 1, // not compatible with bios flag, probably + BootMode_Iso = 2, + BootMode_NoDisc = 3, // nodisc implies bios. + + BootMode_Quick = 0, + BootMode_Bios = 0x10000, + + BootMode_ModeMask = 0xFFFF, }; class StartupParams diff --git a/pcsx2/IopHw.cpp b/pcsx2/IopHw.cpp index 69ed0cafc7..e85c8d16fd 100644 --- a/pcsx2/IopHw.cpp +++ b/pcsx2/IopHw.cpp @@ -667,7 +667,7 @@ void psxHwWrite8(u32 add, u8 value) { case 0x1f80380c: { - bool flush = false; + //bool flush = false; // Terminate lines on CR or full buffers, and ignore \n's if the string contents // are empty (otherwise terminate on \n too!) diff --git a/pcsx2/Memory.cpp b/pcsx2/Memory.cpp index 8733430063..99d7bc76d5 100644 --- a/pcsx2/Memory.cpp +++ b/pcsx2/Memory.cpp @@ -595,8 +595,8 @@ void memClearPageAddr(u32 vaddr) vtlb_VMapUnmap(vaddr,0x1000); // -> whut ? #ifdef FULLTLB - memLUTRK[vaddr >> 12] = 0; - memLUTWK[vaddr >> 12] = 0; +// memLUTRK[vaddr >> 12] = 0; +// memLUTWK[vaddr >> 12] = 0; #endif } diff --git a/pcsx2/Plugins.cpp b/pcsx2/Plugins.cpp index ebedc6c410..2fd07ec9a8 100644 --- a/pcsx2/Plugins.cpp +++ b/pcsx2/Plugins.cpp @@ -1430,6 +1430,10 @@ int OpenPlugins(const char* pTitleFilename) { if (!plugins_initialized) { + // prevent a crash + if(CDVD.init == NULL) + CDVD = ISO; // CDVD_plugin; + if( InitPlugins() == -1 ) return -1; } diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 48c94d210e..08d968b908 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -99,7 +99,7 @@ int _getFreeXMMreg() for (i=0; iregs[xmmregs[i].reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)) ) { + if (!(EEINST_ISLIVEXMM(xmmregs[i].reg))) { _freeXMMreg(i); return i; } @@ -144,12 +144,10 @@ int _getFreeXMMreg() } int _allocTempXMMreg(XMMSSEType type, int xmmreg) { - if (xmmreg == -1) { + if (xmmreg == -1) xmmreg = _getFreeXMMreg(); - } - else { + else _freeXMMreg(xmmreg); - } xmmregs[xmmreg].inuse = 1; xmmregs[xmmreg].type = XMMTYPE_TEMP; diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 05897c981b..3aa9fcfbec 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -102,7 +102,11 @@ #define X86TYPE_VU1 0x80 -#define X86_ISVI(type) ((type&~X86TYPE_VU1) == X86TYPE_VI) +//#define X86_ISVI(type) ((type&~X86TYPE_VU1) == X86TYPE_VI) +static __forceinline int X86_ISVI(int type) +{ + return ((type&~X86TYPE_VU1) == X86TYPE_VI); +} struct _x86regs { u8 inuse; @@ -199,25 +203,37 @@ int _signExtendXMMtoM(u32 to, x86SSERegType from, int candestroy); // returns tr // only valid during writes. If write128, then upper 64bits are in an mmxreg // (mmreg&0xf). Constant is used from gprreg ((mmreg>>16)&0x1f) -#define MEM_EECONSTTAG 0x0100 // argument is a GPR and comes from g_cpuConstRegs -#define MEM_PSXCONSTTAG 0x0200 -#define MEM_MEMORYTAG 0x0400 -#define MEM_MMXTAG 0x0800 // mmreg is mmxreg -#define MEM_XMMTAG 0x8000 // mmreg is xmmreg -#define MEM_X86TAG 0x4000 // ignored most of the time -#define MEM_GPRTAG 0x2000 // argument is a GPR reg -#define MEM_CONSTTAG 0x1000 // argument is a const +enum memtag +{ + MEM_EECONSTTAG = 0x0100, // argument is a GPR and comes from g_cpuConstRegs + MEM_PSXCONSTTAG = 0x0200, + MEM_MEMORYTAG = 0x0400, + MEM_MMXTAG = 0x0800, // mmreg is mmxreg + MEM_XMMTAG = 0x8000, // mmreg is xmmreg + MEM_X86TAG = 0x4000, // ignored most of the time + MEM_GPRTAG = 0x2000, // argument is a GPR reg + MEM_CONSTTAG = 0x1000 // argument is a const +}; -#define IS_EECONSTREG(reg) (reg>=0&&((reg)&MEM_EECONSTTAG)) -#define IS_PSXCONSTREG(reg) (reg>=0&&((reg)&MEM_PSXCONSTTAG)) -#define IS_MMXREG(reg) (reg>=0&&((reg)&MEM_MMXTAG)) -#define IS_XMMREG(reg) (reg>=0&&((reg)&MEM_XMMTAG)) +template static __forceinline bool IS_REG(s32 reg) +{ + return ((reg >= 0) && (reg & tag)); +} -// fixme - these 4 are only called for u32 registers; should the reg>=0 really be there? -#define IS_X86REG(reg) (reg>=0&&((reg)&MEM_X86TAG)) -#define IS_GPRREG(reg) (reg>=0&&((reg)&MEM_GPRTAG)) -#define IS_CONSTREG(reg) (reg>=0&&((reg)&MEM_CONSTTAG)) -#define IS_MEMORYREG(reg) (reg>=0&&((reg)&MEM_MEMORYTAG)) +template static __forceinline bool IS_REG(u32 reg) +{ + return !!(reg & tag); +} + +#define IS_EECONSTREG(reg) IS_REG(reg) +#define IS_PSXCONSTREG(reg) IS_REG(reg) +#define IS_MMXREG(reg) IS_REG(reg) +#define IS_XMMREG(reg) IS_REG(reg) + +#define IS_X86REG(reg) IS_REG(reg) +#define IS_GPRREG(reg) IS_REG(reg) +#define IS_CONSTREG(reg) IS_REG(reg) +#define IS_MEMORYREG(reg) IS_REG(reg) ////////////////////// // Instruction Info // @@ -265,13 +281,13 @@ extern u32 _recIsRegWritten(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern u32 _recIsRegUsed(EEINST* pinst, int size, u8 xmmtype, u8 reg); extern void _recFillRegister(EEINST& pinst, int type, int reg, int write); -#define EEINST_ISLIVE64(reg) (g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1)) -#define EEINST_ISLIVEXMM(reg) (g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)) -#define EEINST_ISLIVE1(reg) (g_pCurInstInfo->regs[reg] & EEINST_LIVE1) -#define EEINST_ISLIVE2(reg) (g_pCurInstInfo->regs[reg] & EEINST_LIVE2) +static __forceinline bool EEINST_ISLIVE64(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1)); } +static __forceinline bool EEINST_ISLIVEXMM(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & (EEINST_LIVE0|EEINST_LIVE1|EEINST_LIVE2)); } +static __forceinline bool EEINST_ISLIVE1(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE1); } +static __forceinline bool EEINST_ISLIVE2(u32 reg) { return !!(g_pCurInstInfo->regs[reg] & EEINST_LIVE2); } -#define FPUINST_ISLIVE(reg) (g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0) -#define FPUINST_LASTUSE(reg) (g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE) +static __forceinline bool FPUINST_ISLIVE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LIVE0); } +static __forceinline bool FPUINST_LASTUSE(u32 reg) { return !!(g_pCurInstInfo->fpuregs[reg] & EEINST_LASTUSE); } // if set, then the variable at this inst really has its upper 32 bits valid // The difference between EEINST_LIVE1 is that the latter is used in back propagation @@ -309,8 +325,8 @@ void SetMMXstate(); void SetFPUstate(); // max is 0x7f, when 0x80 is set, need to flush reg -#define MMX_GET_CACHE(ptr, index) ((u8*)ptr)[index] -#define MMX_SET_CACHE(ptr, ind3, ind2, ind1, ind0) ((u32*)ptr)[0] = (ind3<<24)|(ind2<<16)|(ind1<<8)|ind0; +//#define MMX_GET_CACHE(ptr, index) ((u8*)ptr)[index] +//#define MMX_SET_CACHE(ptr, ind3, ind2, ind1, ind0) ((u32*)ptr)[0] = (ind3<<24)|(ind2<<16)|(ind1<<8)|ind0; #define MMX_GPR 0 #define MMX_HI XMMGPR_HI #define MMX_LO XMMGPR_LO @@ -319,9 +335,20 @@ void SetFPUstate(); #define MMX_COP0 96 #define MMX_TEMP 0x7f -#define MMX_IS32BITS(x) (((x)>=MMX_FPU&&(x)= MMX_GPR && (x) < MMX_GPR+34) +static __forceinline bool MMX_IS32BITS(s32 x) +{ + return (((x >= MMX_FPU) && (x < MMX_COP0 + 32)) || (x == MMX_FPUACC)); +} + +static __forceinline bool MMX_ISGPR(s32 x) +{ + return ((x >= MMX_GPR) && (x < MMX_GPR + 34)); +} + +static __forceinline bool MMX_ISGPR(u32 x) +{ + return (x < MMX_GPR + 34); +} struct _mmxregs { u8 inuse; diff --git a/pcsx2/x86/ix86-32/iCore-32.cpp b/pcsx2/x86/ix86-32/iCore-32.cpp index 6588fee7fb..095e7fd08f 100644 --- a/pcsx2/x86/ix86-32/iCore-32.cpp +++ b/pcsx2/x86/ix86-32/iCore-32.cpp @@ -32,12 +32,11 @@ using namespace std; // landmass of shared code. (air) extern u32 g_psxConstRegs[32]; - u16 x86FpuState; -u16 g_mmxAllocCounter = 0; +static u16 g_mmxAllocCounter = 0; // X86 caching -int g_x86checknext; +static int g_x86checknext; // use special x86 register allocation for ia32 @@ -49,38 +48,83 @@ void _initX86regs() { u32 _x86GetAddr(int type, int reg) { - switch(type&~X86TYPE_VU1) { - case X86TYPE_GPR: return (u32)&cpuRegs.GPR.r[reg]; - case X86TYPE_VI: { - //assert( reg < 16 || reg == REG_R ); - return (type&X86TYPE_VU1)?(u32)&VU1.VI[reg]:(u32)&VU0.VI[reg]; - } - case X86TYPE_MEMOFFSET: return 0; - case X86TYPE_VIMEMOFFSET: return 0; - case X86TYPE_VUQREAD: return (type&X86TYPE_VU1)?(u32)&VU1.VI[REG_Q]:(u32)&VU0.VI[REG_Q]; - case X86TYPE_VUPREAD: return (type&X86TYPE_VU1)?(u32)&VU1.VI[REG_P]:(u32)&VU0.VI[REG_P]; - case X86TYPE_VUQWRITE: return (type&X86TYPE_VU1)?(u32)&VU1.q:(u32)&VU0.q; - case X86TYPE_VUPWRITE: return (type&X86TYPE_VU1)?(u32)&VU1.p:(u32)&VU0.p; - case X86TYPE_PSX: return (u32)&psxRegs.GPR.r[reg]; + u32 ret = 0; + + switch(type&~X86TYPE_VU1) + { + case X86TYPE_GPR: + ret = (u32)&cpuRegs.GPR.r[reg]; + break; + + case X86TYPE_VI: + if (type & X86TYPE_VU1) + ret = (u32)&VU1.VI[reg]; + else + ret = (u32)&VU0.VI[reg]; + break; + + case X86TYPE_MEMOFFSET: + ret = 0; + break; + + case X86TYPE_VIMEMOFFSET: + ret = 0; + break; + + case X86TYPE_VUQREAD: + if (type & X86TYPE_VU1) + ret = (u32)&VU1.VI[REG_Q]; + else + ret = (u32)&VU0.VI[REG_Q]; + break; + + case X86TYPE_VUPREAD: + if (type & X86TYPE_VU1) + ret = (u32)&VU1.VI[REG_P]; + else + ret = (u32)&VU0.VI[REG_P]; + break; + + case X86TYPE_VUQWRITE: + if (type & X86TYPE_VU1) + ret = (u32)&VU1.q; + else + ret = (u32)&VU0.q; + break; + + case X86TYPE_VUPWRITE: + if (type & X86TYPE_VU1) + ret = (u32)&VU1.p; + else + ret = (u32)&VU0.p; + break; + + case X86TYPE_PSX: + ret = (u32)&psxRegs.GPR.r[reg]; + break; + case X86TYPE_PCWRITEBACK: - return (u32)&g_recWriteback; + ret = (u32)&g_recWriteback; + break; + case X86TYPE_VUJUMP: - return (u32)&g_recWriteback; + ret = (u32)&g_recWriteback; + break; jNO_DEFAULT; } - return 0; + return ret; } int _getFreeX86reg(int mode) { - int i, tempi; + int tempi = -1; u32 bestcount = 0x10000; int maxreg = (mode&MODE_8BITREG)?4:iREGCNT_GPR; - for (i=0; i= maxreg ) continue; @@ -92,8 +136,7 @@ int _getFreeX86reg(int mode) } } - tempi = -1; - for (i=1; i 0) XOR32RtoR(EAX, EAX), eaxval = 0; if (eaxval == 0) NOT32R(EAX), eaxval = -1; @@ -184,17 +223,17 @@ void _flushConstRegs() done[1] |= done[3]; } - for (i = 1; i < 32; ++i) { + for (int i = 1; i < 32; ++i) { if (GPR_IS_CONST1(i)) { if (!(g_cpuFlushedConstReg&(1<= maxreg) ) { @@ -272,12 +309,10 @@ int _allocX86reg(int x86reg, int type, int reg, int mode) } } - if (x86reg == -1) { + if (x86reg == -1) x86reg = _getFreeX86reg(oldmode); - } - else { + else _freeX86reg(x86reg); - } x86regs[x86reg].type = type; x86regs[x86reg].reg = reg; @@ -378,6 +413,7 @@ void _deleteX86reg(int type, int reg, int flush) case 0: _freeX86reg(i); break; + case 1: if( x86regs[i].mode & MODE_WRITE) { @@ -391,6 +427,7 @@ void _deleteX86reg(int type, int reg, int flush) x86regs[i].mode |= MODE_READ; } return; + case 2: x86regs[i].inuse = 0; break; @@ -470,7 +507,7 @@ int _getFreeMMXreg() // check for dead regs for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { // mmxregs[i] is unsigned, and MMX_GPR == 0, so the first part is always true. + if (MMX_ISGPR(mmxregs[i].reg)) { if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR] & (EEINST_LIVE0|EEINST_LIVE1)) ) { _freeMMXreg(i); return i; @@ -485,7 +522,7 @@ int _getFreeMMXreg() // check for future xmm usage for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { + if (MMX_ISGPR(mmxregs[i].reg)) { if( !(g_pCurInstInfo->regs[mmxregs[i].reg] & EEINST_MMX) ) { _freeMMXreg(i); return i; @@ -612,7 +649,7 @@ int _checkMMXreg(int reg, int mode) PXORRtoR(i, i); } else { - if( MMX_ISGPR(reg) && (mode&(MODE_READHALF|MODE_READ)) ) _flushConstReg(reg-MMX_GPR); + if (MMX_ISGPR(reg) && (mode&(MODE_READHALF|MODE_READ))) _flushConstReg(reg-MMX_GPR); if( (mode & MODE_READHALF) || (MMX_IS32BITS(reg)&&(mode&MODE_READ)) ) MOVDMtoMMX(i, (u32)_MMXGetAddr(reg)); else @@ -713,7 +750,7 @@ u8 _hasFreeMMXreg() // check for dead regs for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { + if (MMX_ISGPR(mmxregs[i].reg)) { if( !EEINST_ISLIVE64(mmxregs[i].reg-MMX_GPR) ) { return 1; } @@ -723,7 +760,7 @@ u8 _hasFreeMMXreg() // check for dead regs for (i=0; i= MMX_GPR && mmxregs[i].reg < MMX_GPR+34 ) { + if (MMX_ISGPR(mmxregs[i].reg)) { if( !(g_pCurInstInfo->regs[mmxregs[i].reg-MMX_GPR]&EEINST_USED) ) { return 1; } @@ -739,8 +776,8 @@ void _freeMMXreg(int mmxreg) if (!mmxregs[mmxreg].inuse) return; if (mmxregs[mmxreg].mode & MODE_WRITE ) { - - if( mmxregs[mmxreg].reg >= MMX_GPR && mmxregs[mmxreg].reg < MMX_GPR+32 ) + // Not sure if this line is accurate, since if the 32 was 34, it would be MMX_ISGPR. + if ( /*mmxregs[mmxreg].reg >= MMX_GPR &&*/ mmxregs[mmxreg].reg < MMX_GPR+32 ) // Checking if a u32 is >=0 is pointless. assert( !(g_cpuHasConstReg & (1<<(mmxregs[mmxreg].reg-MMX_GPR))) ); assert( mmxregs[mmxreg].reg != MMX_GPR ); diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index f105575eb0..58dbc2e729 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -477,8 +477,6 @@ void recResetEE( void ) __asm__("emms"); #endif - #define GET_HWADDR(mem) - for (int i = 0; i < 0x10000; i++) recLUT_SetPage(recLUT, 0, 0, 0, i, 0); diff --git a/pcsx2/x86/microVU_Alloc.inl b/pcsx2/x86/microVU_Alloc.inl index d929e00126..d66d8a07d3 100644 --- a/pcsx2/x86/microVU_Alloc.inl +++ b/pcsx2/x86/microVU_Alloc.inl @@ -26,17 +26,17 @@ // Flag Allocators //------------------------------------------------------------------ -#define getFlagReg(regX, fInst) { \ - switch (fInst) { \ - case 0: regX = gprF0; break; \ - case 1: regX = gprF1; break; \ - case 2: regX = gprF2; break; \ - case 3: regX = gprF3; break; \ - default: \ - Console::Error("microVU: Flag Instance Error (fInst = %d)", params fInst); \ - regX = gprF0; \ - break; \ - } \ +#define getFlagReg(regX, fInst) { \ + switch (fInst) { \ + case 0: regX = gprF0; break; \ + case 1: regX = gprF1; break; \ + case 2: regX = gprF2; break; \ + case 3: regX = gprF3; break; \ + default: \ + Console::Error("microVU Error: fInst = %d", params fInst); \ + regX = gprF0; \ + break; \ + } \ } #define setBitSFLAG(bitTest, bitSet) { \ @@ -114,31 +114,33 @@ microVUt(void) mVUallocVIb(mV, int GPRreg, int _reg_) { } //------------------------------------------------------------------ -// I/Q/P Reg Allocators +// I/P/Q Reg Allocators //------------------------------------------------------------------ -#define getIreg(reg, modXYZW) { \ - SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL); \ - if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8); \ - if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } \ +microVUt(void) getIreg(mV, int reg, bool modXYZW) { + SSE_MOVSS_M32_to_XMM(reg, (uptr)&mVU->regs->VI[REG_I].UL); + if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, -1, 8); + if (!((_XYZW_SS && modXYZW) || (_X_Y_Z_W == 8))) { mVUunpack_xyzw(reg, reg, 0); } } -#define getQreg(reg) { \ - mVUunpack_xyzw(reg, xmmPQ, mVUinfo.readQ); \ - /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ +microVUt(void) getPreg(mV, int reg) { + mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); + /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -#define getPreg(reg) { \ - mVUunpack_xyzw(reg, xmmPQ, (2 + mVUinfo.readP)); \ - /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ \ +microVUt(void) getQreg(int reg, int qInstance) { + mVUunpack_xyzw(reg, xmmPQ, qInstance); + /*if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2(reg, xmmT1, 15);*/ } -//------------------------------------------------------------------ -// Lower Instruction Allocator Helpers -//------------------------------------------------------------------ - -// VF to GPR -#define getReg8(GPRreg, _reg_, _fxf_) { \ - if (!_reg_ && (_fxf_ < 3)) { XOR32RtoR(GPRreg, GPRreg); } \ - else { MOV32MtoR(GPRreg, (uptr)&mVU->regs->VF[_reg_].UL[0]); } \ +microVUt(void) writeQreg(int reg, int qInstance) { + if (qInstance) { + if (!x86caps.hasStreamingSIMD4Extensions) { + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + SSE_MOVSS_XMM_to_XMM(xmmPQ, reg); + SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + } + else SSE4_INSERTPS_XMM_to_XMM(xmmPQ, reg, _MM_MK_INSERTPS_NDX(0, 1, 0)); + } + else SSE_MOVSS_XMM_to_XMM(xmmPQ, reg); } diff --git a/pcsx2/x86/microVU_Analyze.inl b/pcsx2/x86/microVU_Analyze.inl index 4349b9fdf7..ff19ae7d33 100644 --- a/pcsx2/x86/microVU_Analyze.inl +++ b/pcsx2/x86/microVU_Analyze.inl @@ -275,13 +275,18 @@ microVUt(void) mVUanalyzeR2(mV, int Ft, bool canBeNOP) { //------------------------------------------------------------------ // Sflag - Status Flag Opcodes //------------------------------------------------------------------ -#define flagSet(xFLAG) { \ - int curPC = iPC; \ - for (int i = mVUcount, j = 0; i > 0; i--, j++) { \ - incPC2(-2); \ - if (sFLAG.doFlag) { xFLAG = 1; if (j >= 3) { break; } } \ - } \ - iPC = curPC; \ +microVUt(void) flagSet(mV, bool setMacFlag) { + int curPC = iPC; + for (int i = mVUcount, j = 0; i > 0; i--, j++) { + j += mVUstall; + incPC2(-2); + if (sFLAG.doFlag && (j >= 3)) { + if (setMacFlag) { mFLAG.doFlag = 1; } + else { sFLAG.doNonSticky = 1; } + break; + } + } + iPC = curPC; } microVUt(void) mVUanalyzeSflag(mV, int It) { @@ -289,14 +294,10 @@ microVUt(void) mVUanalyzeSflag(mV, int It) { analyzeVIreg2(It, mVUlow.VI_write, 1); if (!It) { mVUlow.isNOP = 1; } else { - mVUinfo.swapOps = 1; mVUsFlagHack = 0; // Don't Optimize Out Status Flags for this block - flagSet(sFLAG.doNonSticky); - if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf; } - if (mVUcount >= 1) { incPC2(-2); mVUlow.useSflag = 1; incPC2(2); } - // Note: useSflag is used for status flag optimizations when a FSSET instruction is called. - // Do to stalls, it can only be set one instruction prior to the status flag read instruction - // if we were guaranteed no-stalls were to happen, it could be set 4 instruction prior. + mVUinfo.swapOps = 1; + flagSet(mVU, 0); + if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf; } } } @@ -316,8 +317,8 @@ microVUt(void) mVUanalyzeMflag(mV, int Is, int It) { if (!It) { mVUlow.isNOP = 1; } else { // Need set _doMac for 4 previous Ops (need to do all 4 because stalls could change the result needed) mVUinfo.swapOps = 1; + flagSet(mVU, 1); if (mVUcount < 4) { mVUpBlock->pState.needExactMatch |= 0xf << 4; } - flagSet(mFLAG.doFlag); } } diff --git a/pcsx2/x86/microVU_Compile.inl b/pcsx2/x86/microVU_Compile.inl index 3c57ced0cd..e7c5769bb8 100644 --- a/pcsx2/x86/microVU_Compile.inl +++ b/pcsx2/x86/microVU_Compile.inl @@ -31,10 +31,12 @@ } \ } -#define startLoop() { \ - mVUdebug1(); \ - memset(&mVUinfo, 0, sizeof(mVUinfo)); \ - memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \ +#define startLoop() { \ + if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } \ + if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } \ + if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } \ + memset(&mVUinfo, 0, sizeof(mVUinfo)); \ + memset(&mVUregsTemp, 0, sizeof(mVUregsTemp)); \ } #define calcCycles(reg, x) { reg = ((reg > x) ? (reg - x) : 0); } @@ -44,7 +46,6 @@ #define incQ() { mVU->q = (mVU->q+1) & 1; } #define doUpperOp() { mVUopU(mVU, 1); mVUdivSet(mVU); } #define doLowerOp() { incPC(-1); mVUopL(mVU, 1); incPC(1); } -#define doIbit() { if (mVUup.iBit) { incPC(-1); MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); incPC(1); } } #define blockCreate(addr) { if (!mVUblocks[addr]) mVUblocks[addr] = new microBlockManager(); } //------------------------------------------------------------------ @@ -74,6 +75,19 @@ microVUt(void) doSwapOp(mV) { else { mVUopL(mVU, 1); incPC(1); doUpperOp(); } } +microVUt(void) doIbit(mV) { + if (mVUup.iBit) { + incPC(-1); + if (CHECK_VU_OVERFLOW && ((curI & 0x7fffffff) >= 0x7f800000)) { + Console::Status("microVU%d: Clamping I Reg", params mVU->index); + int tempI = (0x80000000 & curI) | 0x7f7fffff; // Clamp I Reg + MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, tempI); + } + else MOV32ItoM((uptr)&mVU->regs->VI[REG_I].UL, curI); + incPC(1); + } +} + // Used by mVUsetupRange microVUt(void) mVUcheckIsSame(mV) { @@ -371,7 +385,6 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { mVUsetupRange(mVU, startPC, 1); // Reset regAlloc - mVU->regAlloc->flushAll(); mVU->regAlloc->reset(); // First Pass @@ -433,7 +446,7 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) { for (x = 0; x < endCount; x++) { if (mVUinfo.isEOB) { x = 0xffff; } if (mVUup.mBit) { OR32ItoM((uptr)&mVU->regs->flags, VUFLAG_MFLAGSET); } - if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(); } + if (mVUlow.isNOP) { incPC(1); doUpperOp(); doIbit(mVU); } else if (!mVUinfo.swapOps) { incPC(1); doUpperOp(); doLowerOp(); } else { doSwapOp(mVU); } if (mVUinfo.doXGKICK) { mVU_XGKICK_DELAY(mVU, 1); } diff --git a/pcsx2/x86/microVU_Flags.inl b/pcsx2/x86/microVU_Flags.inl index 8ad213e870..ca4ccb1fd3 100644 --- a/pcsx2/x86/microVU_Flags.inl +++ b/pcsx2/x86/microVU_Flags.inl @@ -34,18 +34,18 @@ microVUt(void) mVUstatusFlagOp(mV) { int curPC = iPC; int i = mVUcount; bool runLoop = 1; - if (sFLAG.doFlag) { mVUlow.useSflag = 1; } + if (sFLAG.doFlag) { sFLAG.doNonSticky = 1; } else { for (; i > 0; i--) { incPC2(-2); - if (mVUlow.useSflag) { runLoop = 0; break; } - if (sFLAG.doFlag) { mVUlow.useSflag = 1; break; } + if (sFLAG.doNonSticky) { runLoop = 0; break; } + else if (sFLAG.doFlag) { sFLAG.doNonSticky = 1; break; } } } if (runLoop) { for (; i > 0; i--) { incPC2(-2); - if (mVUlow.useSflag) break; + if (sFLAG.doNonSticky) break; sFLAG.doFlag = 0; } } diff --git a/pcsx2/x86/microVU_IR.h b/pcsx2/x86/microVU_IR.h index 1ce311dda5..44a1de55a3 100644 --- a/pcsx2/x86/microVU_IR.h +++ b/pcsx2/x86/microVU_IR.h @@ -107,7 +107,6 @@ struct microLowerOp { u32 branch; // Branch Type (0 = Not a Branch, 1 = B. 2 = BAL, 3~8 = Conditional Branches, 9 = JALR, 10 = JR) bool isNOP; // This instruction is a NOP bool isFSSET; // This instruction is a FSSET - bool useSflag; // This instruction uses/reads Sflag bool noWriteVF; // Don't write back the result of a lower op to VF reg if upper op writes to same reg (or if VF = 0) bool backupVI; // Backup VI reg to memory if modified before branch (branch uses old VI value unless opcode is ILW or ILWR) bool memReadIs; // Read Is (VI reg) from memory (used by branches) @@ -245,6 +244,7 @@ public: clearReg(reg); // Clear Reg } void clearNeeded(int reg) { + if ((reg < 0) || (reg >= xmmTotal)) return; xmmReg[reg].isNeeded = 0; if (xmmReg[reg].xyzw) { // Reg was modified if (xmmReg[reg].reg > 0) { diff --git a/pcsx2/x86/microVU_Lower.inl b/pcsx2/x86/microVU_Lower.inl index 16beb54f14..87c79f9ae7 100644 --- a/pcsx2/x86/microVU_Lower.inl +++ b/pcsx2/x86/microVU_Lower.inl @@ -26,28 +26,35 @@ // DIV/SQRT/RSQRT //------------------------------------------------------------------ -#define testZero(xmmReg, xmmTemp, gprTemp) { \ - SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); /* Clear xmmTemp (make it 0) */ \ - SSE_CMPEQPS_XMM_to_XMM(xmmTemp, xmmReg); /* Set all F's if zero */ \ - SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmTemp); /* Move the sign bits */ \ - TEST32ItoR(gprTemp, 1); /* Test "Is Zero" bit */ \ +// Test if Vector is +/- Zero +#define testZero(xmmReg, xmmTemp, gprTemp) { \ + SSE_XORPS_XMM_to_XMM(xmmTemp, xmmTemp); \ + SSE_CMPEQSS_XMM_to_XMM(xmmTemp, xmmReg); \ + if (!x86caps.hasStreamingSIMD4Extensions) { \ + SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmTemp); \ + TEST32ItoR(gprTemp, 1); \ + } \ + else SSE4_PTEST_XMM_to_XMM(xmmTemp, xmmTemp); \ } -#define testNeg(xmmReg, gprTemp, aJump) { \ - SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \ - TEST32ItoR(gprTemp, 1); /* Check sign bit */ \ - aJump = JZ8(0); /* Skip if positive */ \ - MOV32ItoM((uptr)&mVU->divFlag, divI); /* Set Invalid Flags */ \ - SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); /* Abs(xmmReg) */ \ - x86SetJ8(aJump); \ +// Test if Vector is Negative (Set Flags and Makes Positive) +#define testNeg(xmmReg, gprTemp, aJump) { \ + SSE_MOVMSKPS_XMM_to_R32(gprTemp, xmmReg); \ + TEST32ItoR(gprTemp, 1); \ + aJump = JZ8(0); \ + MOV32ItoM((uptr)&mVU->divFlag, divI); \ + SSE_ANDPS_M128_to_XMM(xmmReg, (uptr)mVU_absclip); \ + x86SetJ8(aJump); \ } mVUop(mVU_DIV) { pass1 { mVUanalyzeFDIV(mVU, _Fs_, _Fsf_, _Ft_, _Ftf_, 7); } pass2 { u8 *ajmp, *bjmp, *cjmp, *djmp; + int Ft; + if (_Ftf_) Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); + else Ft = mVU->regAlloc->allocReg(_Ft_); int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); - int Ft = mVU->regAlloc->allocReg(_Ft_, 0, (1 << (3 - _Ftf_))); int t1 = mVU->regAlloc->allocReg(); testZero(Ft, t1, gprT1); // Test if Ft is zero @@ -72,9 +79,7 @@ mVUop(mVU_DIV) { mVUclamp1(Fs, t1, 8); x86SetJ8(djmp); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); - SSE_MOVSS_XMM_to_XMM(xmmPQ, Fs); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + writeQreg(Fs, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); @@ -94,9 +99,7 @@ mVUop(mVU_SQRT) { if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(Ft, (uptr)mVU_maxvals); // Clamp infinities (only need to do positive clamp since xmmFt is positive) SSE_SQRTSS_XMM_to_XMM(Ft, Ft); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); - SSE_MOVSS_XMM_to_XMM(xmmPQ, Ft); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + writeQreg(Ft, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Ft); } @@ -135,9 +138,7 @@ mVUop(mVU_RSQRT) { mVUclamp1(Fs, t1, 8); x86SetJ8(djmp); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); - SSE_MOVSS_XMM_to_XMM(xmmPQ, Fs); - if (mVUinfo.writeQ) SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, 0xe1); + writeQreg(Fs, mVUinfo.writeQ); mVU->regAlloc->clearNeeded(Fs); mVU->regAlloc->clearNeeded(Ft); @@ -354,6 +355,7 @@ mVUop(mVU_ERSQRT) { pass2 { int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip); SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); SSE_MOVSS_M32_to_XMM (Fs, (uptr)mVU_one); SSE_DIVSS_XMM_to_XMM (Fs, xmmPQ); @@ -416,6 +418,7 @@ mVUop(mVU_ESQRT) { pass2 { int Fs = mVU->regAlloc->allocReg(_Fs_, 0, (1 << (3 - _Fsf_))); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip xmmPQ to get Valid P instance + SSE_ANDPS_M128_to_XMM (Fs, (uptr)mVU_absclip); SSE_SQRTSS_XMM_to_XMM (xmmPQ, Fs); SSE2_PSHUFD_XMM_to_XMM(xmmPQ, xmmPQ, mVUinfo.writeP ? 0x27 : 0xC6); // Flip back mVU->regAlloc->clearNeeded(Fs); @@ -736,7 +739,7 @@ mVUop(mVU_MFP) { pass1 { mVUanalyzeMFP(mVU, _Ft_); } pass2 { int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); - getPreg(Ft); + getPreg(mVU, Ft); mVU->regAlloc->clearNeeded(Ft); } pass3 { mVUlog("MFP.%s vf%02d, P", _XYZW_String, _Ft_); } @@ -754,7 +757,7 @@ mVUop(mVU_MOVE) { mVUop(mVU_MR32) { pass1 { mVUanalyzeMR32(mVU, _Fs_, _Ft_); } pass2 { - int Fs = mVU->regAlloc->allocReg(_Fs_, 0, 0xf); + int Fs = mVU->regAlloc->allocReg(_Fs_); int Ft = mVU->regAlloc->allocReg(-1, _Ft_, _X_Y_Z_W); if (_XYZW_SS) mVUunpack_xyzw(Ft, Fs, (_X ? 1 : (_Y ? 2 : (_Z ? 3 : 0)))); else SSE2_PSHUFD_XMM_to_XMM(Ft, Fs, 0x39); diff --git a/pcsx2/x86/microVU_Misc.h b/pcsx2/x86/microVU_Misc.h index be447d82ed..226fe77cb9 100644 --- a/pcsx2/x86/microVU_Misc.h +++ b/pcsx2/x86/microVU_Misc.h @@ -80,9 +80,10 @@ declareAllVariables #define _Z ((mVU->code>>22) & 0x1) #define _W ((mVU->code>>21) & 0x1) -#define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF )) +#define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF)) #define _XYZW_SS (_X+_Y+_Z+_W==1) #define _XYZW_SS2 (_XYZW_SS && (_X_Y_Z_W != 8)) +#define _XYZW_PS (_X_Y_Z_W == 0xf) #define _bc_ (mVU->code & 0x3) #define _bc_x ((mVU->code & 0x3) == 0) @@ -242,20 +243,8 @@ typedef u32 (__fastcall *mVUCall)(void*, void*); // Debug Stuff... #ifdef mVUdebug #define mVUprint Console::Status -#define mVUdebug1() { \ - if (curI & _Ibit_) { SysPrintf("microVU: I-bit set!\n"); } \ - if (curI & _Ebit_) { SysPrintf("microVU: E-bit set!\n"); } \ - if (curI & _Mbit_) { SysPrintf("microVU: M-bit set!\n"); } \ - if (curI & _Dbit_) { SysPrintf("microVU: D-bit set!\n"); } \ - if (curI & _Tbit_) { SysPrintf("microVU: T-bit set!\n"); } \ -} #else #define mVUprint 0&& -#define mVUdebug1() { \ - if (curI & _Mbit_) { Console::Status("microVU%d: M-bit set!", params getIndex); } \ - if (curI & _Dbit_) { DevCon::Status ("microVU%d: D-bit set!", params getIndex); } \ - if (curI & _Tbit_) { DevCon::Status ("microVU%d: T-bit set!", params getIndex); } \ -} #endif // Program Logging... diff --git a/pcsx2/x86/microVU_Upper.inl b/pcsx2/x86/microVU_Upper.inl index a4801f65c9..d7c7f69acc 100644 --- a/pcsx2/x86/microVU_Upper.inl +++ b/pcsx2/x86/microVU_Upper.inl @@ -34,7 +34,7 @@ microVUt(void) mVUupdateFlags(mV, int reg, int regT1 = -1, int regT2 = -1, bool //SysPrintf("Status = %d; Mac = %d\n", sFLAG.doFlag, mFLAG.doFlag); if (mVUsFlagHack) { sFLAG.doFlag = 0; } if (!sFLAG.doFlag && !mFLAG.doFlag) { return; } - if (!(!mFLAG.doFlag || (_XYZW_SS && modXYZW))) { + if ((mFLAG.doFlag && !(_XYZW_SS && modXYZW))) { if (regT2 < 0) { regT2 = mVU->regAlloc->allocReg(); regT2b = 1; } SSE2_PSHUFD_XMM_to_XMM(regT2, reg, 0x1B); // Flip wzyx to xyzw } @@ -119,27 +119,31 @@ void setupPass1(microVU* mVU, int opCase, bool isACC, bool noFlagUpdate) { } // Sets Up Ft Reg for Normal, BC, I, and Q Cases -void setupFtReg(microVU* mVU, int& Ft, int opCase) { +void setupFtReg(microVU* mVU, int& Ft, int& tempFt, int opCase) { opCase1 { - if (_XYZW_SS2) Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); - else Ft = mVU->regAlloc->allocReg(_Ft_); + if (_XYZW_SS2) { Ft = mVU->regAlloc->allocReg(_Ft_, 0, _X_Y_Z_W); tempFt = Ft; } + else { Ft = mVU->regAlloc->allocReg(_Ft_); tempFt = -1; } } opCase2 { - int tempFt = mVU->regAlloc->allocReg(_Ft_); + tempFt = mVU->regAlloc->allocReg(_Ft_); Ft = mVU->regAlloc->allocReg(); mVUunpack_xyzw(Ft, tempFt, _bc_); mVU->regAlloc->clearNeeded(tempFt); + tempFt = Ft; + } + opCase3 { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getIreg(mVU, Ft, 1); } + opCase4 { + if (_XYZW_SS && !mVUinfo.readQ) { Ft = xmmPQ; tempFt = -1; } + else { Ft = mVU->regAlloc->allocReg(); tempFt = Ft; getQreg(Ft, mVUinfo.readQ); } } - opCase3 { Ft = mVU->regAlloc->allocReg(); getIreg(Ft, 1); } - opCase4 { Ft = mVU->regAlloc->allocReg(); getQreg(Ft); } } // Normal FMAC Opcodes void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, const char* opName) { pass1 { setupPass1(mVU, opCase, isACC, ((opType == 3) || (opType == 4))); } pass2 { - int Fs, Ft, ACC; - setupFtReg(mVU, Ft, opCase); + int Fs, Ft, ACC, tempFt; + setupFtReg(mVU, Ft, tempFt, opCase); if (isACC) { Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); @@ -148,19 +152,21 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co } else { Fs = mVU->regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); } - opCase2 { if (opType == 2) { mVUclamp1(Fs, -1, _X_Y_Z_W); } } // Clamp Needed for alot of games (TOTA, DoM, etc...) - + opCase1 { if((opType == 2) && _XYZW_PS) { mVUclamp1(Ft, -1, _X_Y_Z_W); } } // Clamp Needed for Ice Age 3 (VU0) + opCase1 { if((opType == 2) && _XYZW_PS) { mVUclamp1(Fs, -1, _X_Y_Z_W); } } // Clamp Needed for Ice Age 3 (VU0) + opCase2 { if (opType == 2) { mVUclamp1(Fs, -1, _X_Y_Z_W); } } // Clamp Needed for alot of games (TOTA, DoM, etc...) + if (_XYZW_SS) SSE_SS[opType](mVU, Fs, Ft, -1, -1); else SSE_PS[opType](mVU, Fs, Ft, -1, -1); if (isACC) { if (_XYZW_SS) SSE_MOVSS_XMM_to_XMM(ACC, Fs); else mVUmergeRegs(ACC, Fs, _X_Y_Z_W); - mVUupdateFlags(mVU, ACC, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1))); + mVUupdateFlags(mVU, ACC, Fs, tempFt); if (_XYZW_SS2) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); mVU->regAlloc->clearNeeded(ACC); } - else mVUupdateFlags(mVU, Fs, ((opCase==2) ? Ft : -1), (((opCase==1) && _XYZW_SS2) ? Ft : -1)); + else mVUupdateFlags(mVU, Fs, tempFt); mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); @@ -172,8 +178,8 @@ void mVU_FMACa(microVU* mVU, int recPass, int opCase, int opType, bool isACC, co void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* opName) { pass1 { setupPass1(mVU, opCase, 1, 0); } pass2 { - int Fs, Ft, ACC; - setupFtReg(mVU, Ft, opCase); + int Fs, Ft, ACC, tempFt; + setupFtReg(mVU, Ft, tempFt, opCase); Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); ACC = mVU->regAlloc->allocReg(32, 32, 0xf, 0); @@ -185,16 +191,17 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op else SSE_PS[2](mVU, Fs, Ft, -1, -1); if (_XYZW_SS || _X_Y_Z_W == 0xf) { - if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, -1, -1); - else SSE_PS[opType](mVU, ACC, Fs, -1, -1); - mVUupdateFlags(mVU, ACC, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1))); + if (_XYZW_SS) SSE_SS[opType](mVU, ACC, Fs, tempFt, -1); + else SSE_PS[opType](mVU, ACC, Fs, tempFt, -1); + mVUupdateFlags(mVU, ACC, Fs, tempFt); if (_XYZW_SS && _X_Y_Z_W != 8) SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); } else { int tempACC = mVU->regAlloc->allocReg(); SSE_MOVAPS_XMM_to_XMM(tempACC, ACC); - SSE_PS[opType](mVU, tempACC, Fs, -1, -1); + SSE_PS[opType](mVU, tempACC, Fs, tempFt, -1); mVUmergeRegs(ACC, tempACC, _X_Y_Z_W); + mVUupdateFlags(mVU, ACC, Fs, tempFt); mVU->regAlloc->clearNeeded(tempACC); } @@ -209,8 +216,8 @@ void mVU_FMACb(microVU* mVU, int recPass, int opCase, int opType, const char* op void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) { pass1 { setupPass1(mVU, opCase, 0, 0); } pass2 { - int Fs, Ft, ACC; - setupFtReg(mVU, Ft, opCase); + int Fs, Ft, ACC, tempFt; + setupFtReg(mVU, Ft, tempFt, opCase); ACC = mVU->regAlloc->allocReg(32); Fs = mVU->regAlloc->allocReg(_Fs_, _Fd_, _X_Y_Z_W); @@ -218,12 +225,12 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) { if (_XYZW_SS2) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); } opCase2 { mVUclamp1(Fs, -1, _X_Y_Z_W); } // Clamp Needed for alot of games (TOTA, DoM, etc...) - if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[0](mVU, Fs, ACC, -1, -1); } - else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[0](mVU, Fs, ACC, -1, -1); } + if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[0](mVU, Fs, ACC, tempFt, -1); } + else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[0](mVU, Fs, ACC, tempFt, -1); } if (_XYZW_SS2) { SSE2_PSHUFD_XMM_to_XMM(ACC, ACC, shuffleSS(_X_Y_Z_W)); } - mVUupdateFlags(mVU, Fs, ((opCase==2) ? Ft : -1), (((opCase==1) && _XYZW_SS2) ? Ft : -1)); + mVUupdateFlags(mVU, Fs, tempFt); mVU->regAlloc->clearNeeded(Fs); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); @@ -236,16 +243,16 @@ void mVU_FMACc(microVU* mVU, int recPass, int opCase, const char* opName) { void mVU_FMACd(microVU* mVU, int recPass, int opCase, const char* opName) { pass1 { setupPass1(mVU, opCase, 0, 0); } pass2 { - int Fs, Ft, Fd; - setupFtReg(mVU, Ft, opCase); + int Fs, Ft, Fd, tempFt; + setupFtReg(mVU, Ft, tempFt, opCase); - Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); + Fs = mVU->regAlloc->allocReg(_Fs_, 0, _X_Y_Z_W); Fd = mVU->regAlloc->allocReg(32, _Fd_, _X_Y_Z_W); - if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[1](mVU, Fd, Fs, -1, -1); } - else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[1](mVU, Fd, Fs, -1, -1); } + if (_XYZW_SS) { SSE_SS[2](mVU, Fs, Ft, -1, -1); SSE_SS[1](mVU, Fd, Fs, tempFt, -1); } + else { SSE_PS[2](mVU, Fs, Ft, -1, -1); SSE_PS[1](mVU, Fd, Fs, tempFt, -1); } - mVUupdateFlags(mVU, Fd, Fs, ((opCase==2) ? Ft : (((opCase==1) && _XYZW_SS2) ? Ft : -1))); + mVUupdateFlags(mVU, Fd, Fs, tempFt); mVU->regAlloc->clearNeeded(Fd); // Always Clear Written Reg First mVU->regAlloc->clearNeeded(Ft); diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index ef73ea85f5..657f96d5d0 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -377,15 +377,28 @@ REG64_(GSReg, IMR) REG_END REG64_(GSReg, PMODE) - uint32 EN1:1; - uint32 EN2:1; - uint32 CRTMD:3; - uint32 MMOD:1; - uint32 AMOD:1; - uint32 SLBG:1; - uint32 ALP:8; - uint32 _PAD:16; - uint32 _PAD1:32; +union +{ + struct + { + uint32 EN1:1; + uint32 EN2:1; + uint32 CRTMD:3; + uint32 MMOD:1; + uint32 AMOD:1; + uint32 SLBG:1; + uint32 ALP:8; + uint32 _PAD:16; + uint32 _PAD1:32; + }; + + struct + { + uint32 EN:2; + uint32 _PAD2:30; + uint32 _PAD3:32; + }; +}; REG_END REG64_(GSReg, SIGLBLID) diff --git a/plugins/GSdx/GSClut.h b/plugins/GSdx/GSClut.h index e4301ac6fd..1ac00ba328 100644 --- a/plugins/GSdx/GSClut.h +++ b/plugins/GSdx/GSClut.h @@ -71,7 +71,7 @@ __declspec(align(16)) class GSClut : public GSAlignedClass<16> template void WriteCLUT16_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); template void WriteCLUT16S_CSM2(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); - void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) {ASSERT(0);} // xenosaga 3 + void WriteCLUT_NULL(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) {} // xenosaga 3, bios static void WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut); static void WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut); diff --git a/plugins/GSdx/GSDevice.cpp b/plugins/GSdx/GSDevice.cpp index d023f45649..8d1dd4007a 100644 --- a/plugins/GSdx/GSDevice.cpp +++ b/plugins/GSdx/GSDevice.cpp @@ -20,6 +20,7 @@ */ #include "StdAfx.h" +#include "GSdx.h" #include "GSDevice.h" GSDevice::GSDevice() @@ -31,6 +32,12 @@ GSDevice::GSDevice() , m_blend(NULL) , m_1x1(NULL) { + memset(&m_vertices, 0, sizeof(m_vertices)); + + m_msaa = theApp.GetConfig("msaa", 0); + + m_msaa_desc.Count = 1; + m_msaa_desc.Quality = 0; } GSDevice::~GSDevice() @@ -82,7 +89,7 @@ void GSDevice::Present(const GSVector4i& r, int shader, bool limit) int w = std::max(cr.width(), 1); int h = std::max(cr.height(), 1); - if(!m_backbuffer || m_backbuffer->m_size.x != w || m_backbuffer->m_size.y != h) + if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h, DontCare)) { @@ -102,15 +109,20 @@ void GSDevice::Present(const GSVector4i& r, int shader, bool limit) Flip(limit); } -GSTexture* GSDevice::Fetch(int type, int w, int h, int format) +GSTexture* GSDevice::Fetch(int type, int w, int h, bool msaa, int format) { + if(m_msaa < 2) + { + msaa = false; + } + GSVector2i size(w, h); for(list::iterator i = m_pool.begin(); i != m_pool.end(); i++) { GSTexture* t = *i; - if(t->GetType() == type && t->GetFormat() == format && t->GetSize() == size) + if(t->GetType() == type && t->GetFormat() == format && t->GetSize() == size && t->IsMSAA() == msaa) { m_pool.erase(i); @@ -118,7 +130,13 @@ GSTexture* GSDevice::Fetch(int type, int w, int h, int format) } } - return Create(type, w, h, format); + return Create(type, w, h, msaa, format); +} + +void GSDevice::EndScene() +{ + m_vertices.start += m_vertices.count; + m_vertices.count = 0; } void GSDevice::Recycle(GSTexture* t) @@ -127,7 +145,7 @@ void GSDevice::Recycle(GSTexture* t) { m_pool.push_front(t); - while(m_pool.size() > 200) + while(m_pool.size() > 600) { delete m_pool.back(); @@ -136,24 +154,24 @@ void GSDevice::Recycle(GSTexture* t) } } -GSTexture* GSDevice::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDevice::CreateRenderTarget(int w, int h, bool msaa, int format) { - return Fetch(GSTexture::RenderTarget, w, h, format); + return Fetch(GSTexture::RenderTarget, w, h, msaa, format); } -GSTexture* GSDevice::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDevice::CreateDepthStencil(int w, int h, bool msaa, int format) { - return Fetch(GSTexture::DepthStencil, w, h, format); + return Fetch(GSTexture::DepthStencil, w, h, msaa, format); } GSTexture* GSDevice::CreateTexture(int w, int h, int format) { - return Fetch(GSTexture::Texture, w, h, format); + return Fetch(GSTexture::Texture, w, h, false, format); } GSTexture* GSDevice::CreateOffscreen(int w, int h, int format) { - return Fetch(GSTexture::Offscreen, w, h, format); + return Fetch(GSTexture::Offscreen, w, h, false, format); } void GSDevice::StretchRect(GSTexture* st, GSTexture* dt, const GSVector4& dr, int shader, bool linear) @@ -170,7 +188,7 @@ void GSDevice::Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVec { if(!m_merge || !(m_merge->GetSize() == fs)) { - m_merge = CreateRenderTarget(fs.x, fs.y); + m_merge = CreateRenderTarget(fs.x, fs.y, false); } // TODO: m_1x1 @@ -181,7 +199,25 @@ void GSDevice::Merge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, const GSVec if(m_merge) { - DoMerge(st, sr, dr, m_merge, slbg, mmod, c); + GSTexture* tex[2] = {NULL, NULL}; + + for(int i = 0; i < countof(tex); i++) + { + if(st[i] != NULL) + { + tex[i] = st[i]->IsMSAA() ? Resolve(st[i]) : st[i]; + } + } + + DoMerge(tex, sr, dr, m_merge, slbg, mmod, c); + + for(int i = 0; i < countof(tex); i++) + { + if(tex[i] != st[i]) + { + Recycle(tex[i]); + } + } } else { @@ -195,7 +231,7 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse { if(!m_weavebob || !(m_weavebob->GetSize() == ds)) { - m_weavebob = CreateRenderTarget(ds.x, ds.y); + m_weavebob = CreateRenderTarget(ds.x, ds.y, false); } if(mode == 0 || mode == 2) // weave or blend @@ -210,7 +246,7 @@ void GSDevice::Interlace(const GSVector2i& ds, int field, int mode, float yoffse if(!m_blend || !(m_blend->GetSize() == ds)) { - m_blend = CreateRenderTarget(ds.x, ds.y); + m_blend = CreateRenderTarget(ds.x, ds.y, false); } DoInterlace(m_weavebob, m_blend, 2, false, 0); @@ -240,7 +276,7 @@ bool GSDevice::ResizeTexture(GSTexture** t, int w, int h) GSTexture* t2 = *t; - if(t2 == NULL || t2->m_size.x != w || t2->m_size.y != h) + if(t2 == NULL || t2->GetWidth() != w || t2->GetHeight() != h) { delete t2; diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index 087e49edfc..8fb542ce94 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -50,7 +50,7 @@ class GSDevice : public GSAlignedClass<16> { list m_pool; - GSTexture* Fetch(int type, int w, int h, int format); + GSTexture* Fetch(int type, int w, int h, bool msaa, int format); protected: GSWnd* m_wnd; @@ -63,8 +63,11 @@ protected: GSTexture* m_1x1; GSTexture* m_current; struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps;} m_shader; + struct {size_t stride, start, count, limit;} m_vertices; + uint32 m_msaa; + DXGI_SAMPLE_DESC m_msaa_desc; - virtual GSTexture* Create(int type, int w, int h, int format) = 0; + virtual GSTexture* Create(int type, int w, int h, bool msaa, int format) = 0; virtual void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c) = 0; virtual void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset) = 0; @@ -85,18 +88,20 @@ public: virtual void BeginScene() {} virtual void DrawPrimitive() {}; - virtual void EndScene() {} + virtual void EndScene(); virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {} virtual void ClearRenderTarget(GSTexture* t, uint32 c) {} virtual void ClearDepth(GSTexture* t, float c) {} virtual void ClearStencil(GSTexture* t, uint8 c) {} - virtual GSTexture* CreateRenderTarget(int w, int h, int format = 0); - virtual GSTexture* CreateDepthStencil(int w, int h, int format = 0); + virtual GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + virtual GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); virtual GSTexture* CreateTexture(int w, int h, int format = 0); virtual GSTexture* CreateOffscreen(int w, int h, int format = 0); + virtual GSTexture* Resolve(GSTexture* t) {return NULL;} + virtual GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0) {return NULL;} virtual void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r) {} diff --git a/plugins/GSdx/GSDevice10.cpp b/plugins/GSdx/GSDevice10.cpp index 29e584d8a1..976b62b8c3 100644 --- a/plugins/GSdx/GSDevice10.cpp +++ b/plugins/GSdx/GSDevice10.cpp @@ -25,31 +25,11 @@ #include "resource.h" GSDevice10::GSDevice10() - : m_vb(NULL) - , m_vb_stride(0) - , m_layout(NULL) - , m_topology(D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED) - , m_vs(NULL) - , m_vs_cb(NULL) - , m_gs(NULL) - , m_ps(NULL) - , m_ps_cb(NULL) - , m_scissor(0, 0, 0, 0) - , m_viewport(0, 0) - , m_dss(NULL) - , m_sref(0) - , m_bs(NULL) - , m_bf(-1) - , m_rtv(NULL) - , m_dsv(NULL) { - memset(m_ps_srv, 0, sizeof(m_ps_srv)); - memset(m_ps_ss, 0, sizeof(m_ps_ss)); + memset(&m_state, 0, sizeof(m_state)); - m_vertices.stride = 0; - m_vertices.start = 0; - m_vertices.count = 0; - m_vertices.limit = 0; + m_state.topology = D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED; + m_state.bf = -1; } GSDevice10::~GSDevice10() @@ -63,7 +43,7 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) return false; } - HRESULT hr; + HRESULT hr = E_FAIL; DXGI_SWAP_CHAIN_DESC scd; D3D10_BUFFER_DESC bd; @@ -86,7 +66,7 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) scd.SampleDesc.Quality = 0; scd.Windowed = TRUE; - uint32 flags = D3D10_CREATE_DEVICE_SINGLETHREADED; //disables thread safety, should be fine (speedup) + uint32 flags = D3D10_CREATE_DEVICE_SINGLETHREADED; #ifdef DEBUG flags |= D3D10_CREATE_DEVICE_DEBUG; @@ -115,6 +95,22 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) if(FAILED(hr)) return false; + // msaa + + for(uint32 i = 2; i <= D3D10_MAX_MULTISAMPLE_SAMPLE_COUNT; i++) + { + uint32 quality[2] = {0, 0}; + + if(SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, i, &quality[0])) && quality[0] > 0 + && SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_D32_FLOAT_S8X24_UINT, i, &quality[1])) && quality[1] > 0) + { + m_msaa_desc.Count = i; + m_msaa_desc.Quality = std::min(quality[0] - 1, quality[1] - 1); + + if(i >= m_msaa) break; + } + } + // convert D3D10_INPUT_ELEMENT_DESC il_convert[] = @@ -199,7 +195,7 @@ bool GSDevice10::Create(GSWnd* wnd, bool vsync) rd.SlopeScaledDepthBias = 0; rd.DepthClipEnable = false; // ??? rd.ScissorEnable = true; - rd.MultisampleEnable = false; + rd.MultisampleEnable = true; rd.AntialiasedLineEnable = false; hr = m_dev->CreateRasterizerState(&rd, &m_rs); @@ -258,27 +254,11 @@ void GSDevice10::Flip(bool limit) m_swapchain->Present(m_vsync && limit ? 1 : 0, 0); } -void GSDevice10::BeginScene() -{ -} - void GSDevice10::DrawPrimitive() { m_dev->Draw(m_vertices.count, m_vertices.start); } -void GSDevice10::EndScene() -{ - PSSetShaderResources(NULL, NULL); - - // not clearing the rt/ds gives a little fps boost in complex games (5-10%) - - // OMSetRenderTargets(NULL, NULL); - - m_vertices.start += m_vertices.count; - m_vertices.count = 0; -} - void GSDevice10::ClearRenderTarget(GSTexture* t, const GSVector4& c) { m_dev->ClearRenderTargetView(*(GSTexture10*)t, c.v); @@ -301,7 +281,7 @@ void GSDevice10::ClearStencil(GSTexture* t, uint8 c) m_dev->ClearDepthStencilView(*(GSTexture10*)t, D3D10_CLEAR_STENCIL, 0, c); } -GSTexture* GSDevice10::Create(int type, int w, int h, int format) +GSTexture* GSDevice10::Create(int type, int w, int h, bool msaa, int format) { HRESULT hr; @@ -318,6 +298,11 @@ GSTexture* GSDevice10::Create(int type, int w, int h, int format) desc.SampleDesc.Quality = 0; desc.Usage = D3D10_USAGE_DEFAULT; + if(msaa) + { + desc.SampleDesc = m_msaa_desc; + } + switch(type) { case GSTexture::RenderTarget: @@ -359,14 +344,14 @@ GSTexture* GSDevice10::Create(int type, int w, int h, int format) return t; } -GSTexture* GSDevice10::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDevice10::CreateRenderTarget(int w, int h, bool msaa, int format) { - return __super::CreateRenderTarget(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); + return __super::CreateRenderTarget(w, h, msaa, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); } -GSTexture* GSDevice10::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDevice10::CreateDepthStencil(int w, int h, bool msaa, int format) { - return __super::CreateDepthStencil(w, h, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); // DXGI_FORMAT_R32G8X24_TYPELESS + return __super::CreateDepthStencil(w, h, msaa, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); // DXGI_FORMAT_R32G8X24_TYPELESS } GSTexture* GSDevice10::CreateTexture(int w, int h, int format) @@ -379,6 +364,22 @@ GSTexture* GSDevice10::CreateOffscreen(int w, int h, int format) return __super::CreateOffscreen(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); } +GSTexture* GSDevice10::Resolve(GSTexture* t) +{ + ASSERT(t != NULL && t->IsMSAA()); + + if(GSTexture* dst = CreateRenderTarget(t->GetWidth(), t->GetHeight(), false, t->GetFormat())) + { + dst->SetScale(t->GetScale()); + + m_dev->ResolveSubresource(*(GSTexture10*)dst, 0, *(GSTexture10*)t, 0, (DXGI_FORMAT)t->GetFormat()); + + return dst; + } + + return NULL; +} + GSTexture* GSDevice10::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format) { GSTexture* dst = NULL; @@ -395,11 +396,16 @@ GSTexture* GSDevice10::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, return false; } - if(GSTexture* rt = CreateRenderTarget(w, h, format)) + if(GSTexture* rt = CreateRenderTarget(w, h, false, format)) { GSVector4 dr(0, 0, w, h); - StretchRect(src, sr, rt, dr, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); + if(GSTexture* src2 = src->IsMSAA() ? Resolve(src) : src) + { + StretchRect(src2, sr, rt, dr, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); + + if(src2 != src) Recycle(src2); + } dst = CreateOffscreen(w, h, format); @@ -483,6 +489,8 @@ void GSDevice10::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, // EndScene(); + + PSSetShaderResources(NULL, NULL); } void GSDevice10::DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c) @@ -525,14 +533,15 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c if(count * stride > m_vertices.limit * m_vertices.stride) { - m_vertices.vb_old = m_vertices.vb; - m_vertices.vb = NULL; + m_vb_old = m_vb; + m_vb = NULL; + m_vertices.start = 0; m_vertices.count = 0; m_vertices.limit = std::max(count * 3 / 2, 10000); } - if(m_vertices.vb == NULL) + if(m_vb == NULL) { D3D10_BUFFER_DESC bd; @@ -545,7 +554,7 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c HRESULT hr; - hr = m_dev->CreateBuffer(&bd, NULL, &m_vertices.vb); + hr = m_dev->CreateBuffer(&bd, NULL, &m_vb); if(FAILED(hr)) return; } @@ -561,25 +570,25 @@ void GSDevice10::IASetVertexBuffer(const void* vertices, size_t stride, size_t c void* v = NULL; - if(SUCCEEDED(m_vertices.vb->Map(type, 0, &v))) + if(SUCCEEDED(m_vb->Map(type, 0, &v))) { GSVector4i::storent((uint8*)v + m_vertices.start * stride, vertices, count * stride); - m_vertices.vb->Unmap(); + m_vb->Unmap(); } m_vertices.count = count; m_vertices.stride = stride; - IASetVertexBuffer(m_vertices.vb, stride); + IASetVertexBuffer(m_vb, stride); } void GSDevice10::IASetVertexBuffer(ID3D10Buffer* vb, size_t stride) { - if(m_vb != vb || m_vb_stride != stride) + if(m_state.vb != vb || m_state.vb_stride != stride) { - m_vb = vb; - m_vb_stride = stride; + m_state.vb = vb; + m_state.vb_stride = stride; uint32 offset = 0; @@ -589,9 +598,9 @@ void GSDevice10::IASetVertexBuffer(ID3D10Buffer* vb, size_t stride) void GSDevice10::IASetInputLayout(ID3D10InputLayout* layout) { - if(m_layout != layout) + if(m_state.layout != layout) { - m_layout = layout; + m_state.layout = layout; m_dev->IASetInputLayout(layout); } @@ -599,9 +608,9 @@ void GSDevice10::IASetInputLayout(ID3D10InputLayout* layout) void GSDevice10::IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY topology) { - if(m_topology != topology) + if(m_state.topology != topology) { - m_topology = topology; + m_state.topology = topology; m_dev->IASetPrimitiveTopology(topology); } @@ -609,16 +618,16 @@ void GSDevice10::IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY topology) void GSDevice10::VSSetShader(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb) { - if(m_vs != vs) + if(m_state.vs != vs) { - m_vs = vs; + m_state.vs = vs; m_dev->VSSetShader(vs); } - if(m_vs_cb != vs_cb) + if(m_state.vs_cb != vs_cb) { - m_vs_cb = vs_cb; + m_state.vs_cb = vs_cb; m_dev->VSSetConstantBuffers(0, 1, &vs_cb); } @@ -626,9 +635,9 @@ void GSDevice10::VSSetShader(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb) void GSDevice10::GSSetShader(ID3D10GeometryShader* gs) { - if(m_gs != gs) + if(m_state.gs != gs) { - m_gs = gs; + m_state.gs = gs; m_dev->GSSetShader(gs); } @@ -638,14 +647,14 @@ void GSDevice10::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) { ID3D10ShaderResourceView* srv0 = NULL; ID3D10ShaderResourceView* srv1 = NULL; - + if(sr0) srv0 = *(GSTexture10*)sr0; if(sr1) srv1 = *(GSTexture10*)sr1; - if(m_ps_srv[0] != srv0 || m_ps_srv[1] != srv1) + if(m_state.ps_srv[0] != srv0 || m_state.ps_srv[1] != srv1) { - m_ps_srv[0] = srv0; - m_ps_srv[1] = srv1; + m_state.ps_srv[0] = srv0; + m_state.ps_srv[1] = srv1; ID3D10ShaderResourceView* srvs[] = {srv0, srv1}; @@ -655,16 +664,16 @@ void GSDevice10::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) void GSDevice10::PSSetShader(ID3D10PixelShader* ps, ID3D10Buffer* ps_cb) { - if(m_ps != ps) + if(m_state.ps != ps) { - m_ps = ps; + m_state.ps = ps; m_dev->PSSetShader(ps); } - if(m_ps_cb != ps_cb) + if(m_state.ps_cb != ps_cb) { - m_ps_cb = ps_cb; + m_state.ps_cb = ps_cb; m_dev->PSSetConstantBuffers(0, 1, &ps_cb); } @@ -672,10 +681,10 @@ void GSDevice10::PSSetShader(ID3D10PixelShader* ps, ID3D10Buffer* ps_cb) void GSDevice10::PSSetSamplerState(ID3D10SamplerState* ss0, ID3D10SamplerState* ss1) { - if(m_ps_ss[0] != ss0 || m_ps_ss[1] != ss1) + if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1) { - m_ps_ss[0] = ss0; - m_ps_ss[1] = ss1; + m_state.ps_ss[0] = ss0; + m_state.ps_ss[1] = ss1; ID3D10SamplerState* sss[] = {ss0, ss1}; @@ -685,10 +694,10 @@ void GSDevice10::PSSetSamplerState(ID3D10SamplerState* ss0, ID3D10SamplerState* void GSDevice10::OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref) { - if(m_dss != dss || m_sref != sref) + if(m_state.dss != dss || m_state.sref != sref) { - m_dss = dss; - m_sref = sref; + m_state.dss = dss; + m_state.sref = sref; m_dev->OMSetDepthStencilState(dss, sref); } @@ -696,10 +705,10 @@ void GSDevice10::OMSetDepthStencilState(ID3D10DepthStencilState* dss, uint8 sref void GSDevice10::OMSetBlendState(ID3D10BlendState* bs, float bf) { - if(m_bs != bs || m_bf != bf) + if(m_state.bs != bs || m_state.bf != bf) { - m_bs = bs; - m_bf = bf; + m_state.bs = bs; + m_state.bf = bf; float BlendFactor[] = {bf, bf, bf, 0}; @@ -715,17 +724,17 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector if(rt) rtv = *(GSTexture10*)rt; if(ds) dsv = *(GSTexture10*)ds; - if(m_rtv != rtv || m_dsv != dsv) + if(m_state.rtv != rtv || m_state.dsv != dsv) { - m_rtv = rtv; - m_dsv = dsv; + m_state.rtv = rtv; + m_state.dsv = dsv; m_dev->OMSetRenderTargets(1, &rtv, dsv); } - if(m_viewport != rt->m_size) + if(m_state.viewport != rt->GetSize()) { - m_viewport = rt->m_size; + m_state.viewport = rt->GetSize(); D3D10_VIEWPORT vp; @@ -733,19 +742,19 @@ void GSDevice10::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector vp.TopLeftX = 0; vp.TopLeftY = 0; - vp.Width = rt->m_size.x; - vp.Height = rt->m_size.y; + vp.Width = rt->GetWidth(); + vp.Height = rt->GetHeight(); vp.MinDepth = 0.0f; vp.MaxDepth = 1.0f; m_dev->RSSetViewports(1, &vp); } - GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); + GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); - if(!m_scissor.eq(r)) + if(!m_state.scissor.eq(r)) { - m_scissor = r; + m_state.scissor = r; m_dev->RSSetScissorRects(1, r); } diff --git a/plugins/GSdx/GSDevice10.h b/plugins/GSdx/GSDevice10.h index 49fd700a2c..6c427da15e 100644 --- a/plugins/GSdx/GSDevice10.h +++ b/plugins/GSdx/GSDevice10.h @@ -26,29 +26,7 @@ class GSDevice10 : public GSDevice { - ID3D10Buffer* m_vb; - size_t m_vb_stride; - ID3D10InputLayout* m_layout; - D3D10_PRIMITIVE_TOPOLOGY m_topology; - ID3D10VertexShader* m_vs; - ID3D10Buffer* m_vs_cb; - ID3D10GeometryShader* m_gs; - ID3D10ShaderResourceView* m_ps_srv[2]; - ID3D10PixelShader* m_ps; - ID3D10Buffer* m_ps_cb; - ID3D10SamplerState* m_ps_ss[2]; - GSVector2i m_viewport; - GSVector4i m_scissor; - ID3D10DepthStencilState* m_dss; - uint8 m_sref; - ID3D10BlendState* m_bs; - float m_bf; - ID3D10RenderTargetView* m_rtv; - ID3D10DepthStencilView* m_dsv; - - // - - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); @@ -57,12 +35,31 @@ class GSDevice10 : public GSDevice CComPtr m_dev; CComPtr m_swapchain; + CComPtr m_vb; + CComPtr m_vb_old; struct { - CComPtr vb, vb_old; - size_t stride, start, count, limit; - } m_vertices; + ID3D10Buffer* vb; + size_t vb_stride; + ID3D10InputLayout* layout; + D3D10_PRIMITIVE_TOPOLOGY topology; + ID3D10VertexShader* vs; + ID3D10Buffer* vs_cb; + ID3D10GeometryShader* gs; + ID3D10ShaderResourceView* ps_srv[2]; + ID3D10PixelShader* ps; + ID3D10Buffer* ps_cb; + ID3D10SamplerState* ps_ss[2]; + GSVector2i viewport; + GSVector4i scissor; + ID3D10DepthStencilState* dss; + uint8 sref; + ID3D10BlendState* bs; + float bf; + ID3D10RenderTargetView* rtv; + ID3D10DepthStencilView* dsv; + } m_state; public: // TODO CComPtr m_rs; @@ -99,20 +96,20 @@ public: bool Reset(int w, int h, int mode); void Flip(bool limit); - void BeginScene(); void DrawPrimitive(); - void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - GSTexture* CreateRenderTarget(int w, int h, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, int format = 0); + GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); + GSTexture* Resolve(GSTexture* t); + GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0); void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r); diff --git a/plugins/GSdx/GSDevice11.cpp b/plugins/GSdx/GSDevice11.cpp index 679b5016ec..d38eb25979 100644 --- a/plugins/GSdx/GSDevice11.cpp +++ b/plugins/GSdx/GSDevice11.cpp @@ -25,31 +25,11 @@ #include "resource.h" GSDevice11::GSDevice11() - : m_vb(NULL) - , m_vb_stride(0) - , m_layout(NULL) - , m_topology(D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED) - , m_vs(NULL) - , m_vs_cb(NULL) - , m_gs(NULL) - , m_ps(NULL) - , m_ps_cb(NULL) - , m_scissor(0, 0, 0, 0) - , m_viewport(0, 0) - , m_dss(NULL) - , m_sref(0) - , m_bs(NULL) - , m_bf(-1) - , m_rtv(NULL) - , m_dsv(NULL) { - memset(m_ps_srv, 0, sizeof(m_ps_srv)); - memset(m_ps_ss, 0, sizeof(m_ps_ss)); + memset(&m_state, 0, sizeof(m_state)); - m_vertices.stride = 0; - m_vertices.start = 0; - m_vertices.count = 0; - m_vertices.limit = 0; + m_state.topology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + m_state.bf = -1; } GSDevice11::~GSDevice11() @@ -63,7 +43,7 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) return false; } - HRESULT hr; + HRESULT hr = E_FAIL; DXGI_SWAP_CHAIN_DESC scd; D3D11_BUFFER_DESC bd; @@ -86,7 +66,7 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) scd.SampleDesc.Quality = 0; scd.Windowed = TRUE; - uint32 flags = D3D11_CREATE_DEVICE_SINGLETHREADED; //disables thread safety, should be fine (speedup) + uint32 flags = D3D11_CREATE_DEVICE_SINGLETHREADED; #ifdef DEBUG flags |= D3D11_CREATE_DEVICE_DEBUG; @@ -115,6 +95,22 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) hr = m_dev->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &options, sizeof(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS)); + // msaa + + for(uint32 i = 2; i <= D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT; i++) + { + uint32 quality[2] = {0, 0}; + + if(SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_R8G8B8A8_UNORM, i, &quality[0])) && quality[0] > 0 + && SUCCEEDED(m_dev->CheckMultisampleQualityLevels(DXGI_FORMAT_D32_FLOAT_S8X24_UINT, i, &quality[1])) && quality[1] > 0) + { + m_msaa_desc.Count = i; + m_msaa_desc.Quality = std::min(quality[0] - 1, quality[1] - 1); + + if(i >= m_msaa) break; + } + } + // convert D3D11_INPUT_ELEMENT_DESC il_convert[] = @@ -198,7 +194,7 @@ bool GSDevice11::Create(GSWnd* wnd, bool vsync) rd.SlopeScaledDepthBias = 0; rd.DepthClipEnable = false; // ??? rd.ScissorEnable = true; - rd.MultisampleEnable = false; + rd.MultisampleEnable = true; rd.AntialiasedLineEnable = false; hr = m_dev->CreateRasterizerState(&rd, &m_rs); @@ -257,27 +253,11 @@ void GSDevice11::Flip(bool limit) m_swapchain->Present(m_vsync && limit ? 1 : 0, 0); } -void GSDevice11::BeginScene() -{ -} - void GSDevice11::DrawPrimitive() { m_ctx->Draw(m_vertices.count, m_vertices.start); } -void GSDevice11::EndScene() -{ - PSSetShaderResources(NULL, NULL); - - // not clearing the rt/ds gives a little fps boost in complex games (5-10%) - - // OMSetRenderTargets(NULL, NULL); - - m_vertices.start += m_vertices.count; - m_vertices.count = 0; -} - void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c) { m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v); @@ -300,7 +280,7 @@ void GSDevice11::ClearStencil(GSTexture* t, uint8 c) m_ctx->ClearDepthStencilView(*(GSTexture11*)t, D3D11_CLEAR_STENCIL, 0, c); } -GSTexture* GSDevice11::Create(int type, int w, int h, int format) +GSTexture* GSDevice11::Create(int type, int w, int h, bool msaa, int format) { HRESULT hr; @@ -317,6 +297,11 @@ GSTexture* GSDevice11::Create(int type, int w, int h, int format) desc.SampleDesc.Quality = 0; desc.Usage = D3D11_USAGE_DEFAULT; + if(msaa) + { + desc.SampleDesc = m_msaa_desc; + } + switch(type) { case GSTexture::RenderTarget: @@ -358,14 +343,14 @@ GSTexture* GSDevice11::Create(int type, int w, int h, int format) return t; } -GSTexture* GSDevice11::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDevice11::CreateRenderTarget(int w, int h, bool msaa, int format) { - return __super::CreateRenderTarget(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); + return __super::CreateRenderTarget(w, h, msaa, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); } -GSTexture* GSDevice11::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDevice11::CreateDepthStencil(int w, int h, bool msaa, int format) { - return __super::CreateDepthStencil(w, h, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); + return __super::CreateDepthStencil(w, h, msaa, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); } GSTexture* GSDevice11::CreateTexture(int w, int h, int format) @@ -378,6 +363,22 @@ GSTexture* GSDevice11::CreateOffscreen(int w, int h, int format) return __super::CreateOffscreen(w, h, format ? format : DXGI_FORMAT_R8G8B8A8_UNORM); } +GSTexture* GSDevice11::Resolve(GSTexture* t) +{ + ASSERT(t != NULL && t->IsMSAA()); + + if(GSTexture* dst = CreateRenderTarget(t->GetWidth(), t->GetHeight(), false, t->GetFormat())) + { + dst->SetScale(t->GetScale()); + + m_ctx->ResolveSubresource(*(GSTexture11*)dst, 0, *(GSTexture11*)t, 0, (DXGI_FORMAT)t->GetFormat()); + + return dst; + } + + return NULL; +} + GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format) { GSTexture* dst = NULL; @@ -394,11 +395,16 @@ GSTexture* GSDevice11::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, return false; } - if(GSTexture* rt = CreateRenderTarget(w, h, format)) + if(GSTexture* rt = CreateRenderTarget(w, h, false, format)) { GSVector4 dr(0, 0, w, h); - StretchRect(src, sr, rt, dr, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); + if(GSTexture* src2 = src->IsMSAA() ? Resolve(src) : src) + { + StretchRect(src2, sr, rt, dr, m_convert.ps[format == DXGI_FORMAT_R16_UINT ? 1 : 0], NULL); + + if(src2 != src) Recycle(src2); + } dst = CreateOffscreen(w, h, format); @@ -482,6 +488,8 @@ void GSDevice11::StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, // EndScene(); + + PSSetShaderResources(NULL, NULL); } void GSDevice11::DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c) @@ -524,14 +532,15 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c if(count * stride > m_vertices.limit * m_vertices.stride) { - m_vertices.vb_old = m_vertices.vb; - m_vertices.vb = NULL; + m_vb_old = m_vb; + m_vb = NULL; + m_vertices.start = 0; m_vertices.count = 0; m_vertices.limit = std::max(count * 3 / 2, 10000); } - if(m_vertices.vb == NULL) + if(m_vb == NULL) { D3D11_BUFFER_DESC bd; @@ -544,7 +553,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c HRESULT hr; - hr = m_dev->CreateBuffer(&bd, NULL, &m_vertices.vb); + hr = m_dev->CreateBuffer(&bd, NULL, &m_vb); if(FAILED(hr)) return; } @@ -560,25 +569,25 @@ void GSDevice11::IASetVertexBuffer(const void* vertices, size_t stride, size_t c D3D11_MAPPED_SUBRESOURCE m; - if(SUCCEEDED(m_ctx->Map(m_vertices.vb, 0, type, 0, &m))) + if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m))) { GSVector4i::storent((uint8*)m.pData + m_vertices.start * stride, vertices, count * stride); - m_ctx->Unmap(m_vertices.vb, 0); + m_ctx->Unmap(m_vb, 0); } m_vertices.count = count; m_vertices.stride = stride; - IASetVertexBuffer(m_vertices.vb, stride); + IASetVertexBuffer(m_vb, stride); } void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride) { - if(m_vb != vb || m_vb_stride != stride) + if(m_state.vb != vb || m_state.vb_stride != stride) { - m_vb = vb; - m_vb_stride = stride; + m_state.vb = vb; + m_state.vb_stride = stride; uint32 offset = 0; @@ -588,9 +597,9 @@ void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride) void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) { - if(m_layout != layout) + if(m_state.layout != layout) { - m_layout = layout; + m_state.layout = layout; m_ctx->IASetInputLayout(layout); } @@ -598,9 +607,9 @@ void GSDevice11::IASetInputLayout(ID3D11InputLayout* layout) void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology) { - if(m_topology != topology) + if(m_state.topology != topology) { - m_topology = topology; + m_state.topology = topology; m_ctx->IASetPrimitiveTopology(topology); } @@ -608,16 +617,16 @@ void GSDevice11::IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY topology) void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) { - if(m_vs != vs) + if(m_state.vs != vs) { - m_vs = vs; + m_state.vs = vs; m_ctx->VSSetShader(vs, NULL, 0); } - if(m_vs_cb != vs_cb) + if(m_state.vs_cb != vs_cb) { - m_vs_cb = vs_cb; + m_state.vs_cb = vs_cb; m_ctx->VSSetConstantBuffers(0, 1, &vs_cb); } @@ -625,11 +634,11 @@ void GSDevice11::VSSetShader(ID3D11VertexShader* vs, ID3D11Buffer* vs_cb) void GSDevice11::GSSetShader(ID3D11GeometryShader* gs) { - if(m_gs != gs) + if(m_state.gs != gs) { - m_ctx->GSSetShader(gs, NULL, 0); + m_state.gs = gs; - m_gs = gs; + m_ctx->GSSetShader(gs, NULL, 0); } } @@ -641,10 +650,10 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) if(sr0) srv0 = *(GSTexture11*)sr0; if(sr1) srv1 = *(GSTexture11*)sr1; - if(m_ps_srv[0] != srv0 || m_ps_srv[1] != srv1) + if(m_state.ps_srv[0] != srv0 || m_state.ps_srv[1] != srv1) { - m_ps_srv[0] = srv0; - m_ps_srv[1] = srv1; + m_state.ps_srv[0] = srv0; + m_state.ps_srv[1] = srv1; ID3D11ShaderResourceView* srvs[] = {srv0, srv1}; @@ -654,16 +663,16 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb) { - if(m_ps != ps) + if(m_state.ps != ps) { - m_ps = ps; + m_state.ps = ps; m_ctx->PSSetShader(ps, NULL, 0); } - if(m_ps_cb != ps_cb) + if(m_state.ps_cb != ps_cb) { - m_ps_cb = ps_cb; + m_state.ps_cb = ps_cb; m_ctx->PSSetConstantBuffers(0, 1, &ps_cb); } @@ -671,10 +680,10 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb) void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1) { - if(m_ps_ss[0] != ss0 || m_ps_ss[1] != ss1) + if(m_state.ps_ss[0] != ss0 || m_state.ps_ss[1] != ss1) { - m_ps_ss[0] = ss0; - m_ps_ss[1] = ss1; + m_state.ps_ss[0] = ss0; + m_state.ps_ss[1] = ss1; ID3D11SamplerState* sss[] = {ss0, ss1}; @@ -684,25 +693,25 @@ void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref) { - if(m_dss != dss || m_sref != sref) + if(m_state.dss != dss || m_state.sref != sref) { - m_ctx->OMSetDepthStencilState(dss, sref); + m_state.dss = dss; + m_state.sref = sref; - m_dss = dss; - m_sref = sref; + m_ctx->OMSetDepthStencilState(dss, sref); } } void GSDevice11::OMSetBlendState(ID3D11BlendState* bs, float bf) { - if(m_bs != bs || m_bf != bf) + if(m_state.bs != bs || m_state.bf != bf) { + m_state.bs = bs; + m_state.bf = bf; + float BlendFactor[] = {bf, bf, bf, 0}; m_ctx->OMSetBlendState(bs, BlendFactor, 0xffffffff); - - m_bs = bs; - m_bf = bf; } } @@ -714,17 +723,17 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector if(rt) rtv = *(GSTexture11*)rt; if(ds) dsv = *(GSTexture11*)ds; - if(m_rtv != rtv || m_dsv != dsv) + if(m_state.rtv != rtv || m_state.dsv != dsv) { - m_rtv = rtv; - m_dsv = dsv; + m_state.rtv = rtv; + m_state.dsv = dsv; m_ctx->OMSetRenderTargets(1, &rtv, dsv); } - if(m_viewport != rt->m_size) + if(m_state.viewport != rt->GetSize()) { - m_viewport = rt->m_size; + m_state.viewport = rt->GetSize(); D3D11_VIEWPORT vp; @@ -732,19 +741,19 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector vp.TopLeftX = 0; vp.TopLeftY = 0; - vp.Width = rt->m_size.x; - vp.Height = rt->m_size.y; + vp.Width = rt->GetWidth(); + vp.Height = rt->GetHeight(); vp.MinDepth = 0.0f; vp.MaxDepth = 1.0f; m_ctx->RSSetViewports(1, &vp); } - GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); + GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); - if(!m_scissor.eq(r)) + if(!m_state.scissor.eq(r)) { - m_scissor = r; + m_state.scissor = r; m_ctx->RSSetScissorRects(1, r); } diff --git a/plugins/GSdx/GSDevice11.h b/plugins/GSdx/GSDevice11.h index 523e7ba301..b607153c7b 100644 --- a/plugins/GSdx/GSDevice11.h +++ b/plugins/GSdx/GSDevice11.h @@ -26,29 +26,7 @@ class GSDevice11 : public GSDevice { - ID3D11Buffer* m_vb; - size_t m_vb_stride; - ID3D11InputLayout* m_layout; - D3D11_PRIMITIVE_TOPOLOGY m_topology; - ID3D11VertexShader* m_vs; - ID3D11Buffer* m_vs_cb; - ID3D11GeometryShader* m_gs; - ID3D11ShaderResourceView* m_ps_srv[2]; - ID3D11PixelShader* m_ps; - ID3D11Buffer* m_ps_cb; - ID3D11SamplerState* m_ps_ss[2]; - GSVector2i m_viewport; - GSVector4i m_scissor; - ID3D11DepthStencilState* m_dss; - uint8 m_sref; - ID3D11BlendState* m_bs; - float m_bf; - ID3D11RenderTargetView* m_rtv; - ID3D11DepthStencilView* m_dsv; - - // - - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); @@ -58,12 +36,31 @@ class GSDevice11 : public GSDevice CComPtr m_dev; CComPtr m_ctx; CComPtr m_swapchain; + CComPtr m_vb; + CComPtr m_vb_old; struct { - CComPtr vb, vb_old; - size_t stride, start, count, limit; - } m_vertices; + ID3D11Buffer* vb; + size_t vb_stride; + ID3D11InputLayout* layout; + D3D11_PRIMITIVE_TOPOLOGY topology; + ID3D11VertexShader* vs; + ID3D11Buffer* vs_cb; + ID3D11GeometryShader* gs; + ID3D11ShaderResourceView* ps_srv[2]; + ID3D11PixelShader* ps; + ID3D11Buffer* ps_cb; + ID3D11SamplerState* ps_ss[2]; + GSVector2i viewport; + GSVector4i scissor; + ID3D11DepthStencilState* dss; + uint8 sref; + ID3D11BlendState* bs; + float bf; + ID3D11RenderTargetView* rtv; + ID3D11DepthStencilView* dsv; + } m_state; public: // TODO CComPtr m_rs; @@ -100,20 +97,20 @@ public: bool Reset(int w, int h, int mode); void Flip(bool limit); - void BeginScene(); void DrawPrimitive(); - void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - GSTexture* CreateRenderTarget(int w, int h, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, int format = 0); + GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); + GSTexture* Resolve(GSTexture* t); + GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0); void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r); diff --git a/plugins/GSdx/GSDevice7.cpp b/plugins/GSdx/GSDevice7.cpp index aa3c42abdd..2bc18d6e96 100644 --- a/plugins/GSdx/GSDevice7.cpp +++ b/plugins/GSdx/GSDevice7.cpp @@ -145,7 +145,7 @@ void GSDevice7::Present(const GSVector4i& r, int shader, bool limit) int w = std::max(cr.width(), 1); int h = std::max(cr.height(), 1); - if(!m_backbuffer || m_backbuffer->m_size.x != w || m_backbuffer->m_size.y != h) + if(!m_backbuffer || m_backbuffer->GetWidth() != w || m_backbuffer->GetHeight() != h) { if(!Reset(w, h, DontCare)) { @@ -198,7 +198,7 @@ void GSDevice7::Present(const GSVector4i& r, int shader, bool limit) } } -GSTexture* GSDevice7::Create(int type, int w, int h, int format) +GSTexture* GSDevice7::Create(int type, int w, int h, bool msaa, int format) { HRESULT hr; diff --git a/plugins/GSdx/GSDevice7.h b/plugins/GSdx/GSDevice7.h index baa2caf9ad..eb5ed4bbfb 100644 --- a/plugins/GSdx/GSDevice7.h +++ b/plugins/GSdx/GSDevice7.h @@ -31,7 +31,7 @@ private: CComPtr m_primary; bool m_lost; - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); diff --git a/plugins/GSdx/GSDevice9.cpp b/plugins/GSdx/GSDevice9.cpp index 5971220633..a3b45ed01e 100644 --- a/plugins/GSdx/GSDevice9.cpp +++ b/plugins/GSdx/GSDevice9.cpp @@ -25,42 +25,23 @@ #include "resource.h" GSDevice9::GSDevice9() - : m_vb(NULL) - , m_vb_stride(0) - , m_layout(NULL) - , m_topology((D3DPRIMITIVETYPE)0) - , m_vs(NULL) - , m_vs_cb(NULL) - , m_vs_cb_len(0) - , m_ps(NULL) - , m_ps_cb(NULL) - , m_ps_cb_len(0) - , m_ps_ss(NULL) - , m_scissor(0, 0, 0, 0) - , m_dss(NULL) - , m_bs(NULL) - , m_bf(0xffffffff) - , m_rtv(NULL) - , m_dsv(NULL) - , m_lost(false) + : m_lost(false) { m_rbswapped = true; memset(&m_pp, 0, sizeof(m_pp)); memset(&m_ddcaps, 0, sizeof(m_ddcaps)); memset(&m_d3dcaps, 0, sizeof(m_d3dcaps)); - memset(m_ps_srvs, 0, sizeof(m_ps_srvs)); - m_vertices.stride = 0; - m_vertices.start = 0; - m_vertices.count = 0; - m_vertices.limit = 0; + memset(&m_state, 0, sizeof(m_state)); + + m_state.bf = 0xffffffff; } GSDevice9::~GSDevice9() { - if(m_vs_cb) _aligned_free(m_vs_cb); - if(m_ps_cb) _aligned_free(m_ps_cb); + if(m_state.vs_cb) _aligned_free(m_state.vs_cb); + if(m_state.ps_cb) _aligned_free(m_state.ps_cb); } bool GSDevice9::Create(GSWnd* wnd, bool vsync) @@ -112,9 +93,14 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync) if(m_d3dcaps.VertexShaderVersion < (m_d3dcaps.PixelShaderVersion & ~0x10000)) { - ASSERT(0); + if(m_d3dcaps.VertexShaderVersion > D3DVS_VERSION(0, 0)) + { + ASSERT(0); - return false; + return false; + } + + // else vertex shader should be emulated in software (gma950) } m_d3dcaps.VertexShaderVersion = m_d3dcaps.PixelShaderVersion & ~0x10000; @@ -138,6 +124,22 @@ bool GSDevice9::Create(GSWnd* wnd, bool vsync) return false; } + // msaa + + for(uint32 i = 2; i <= 16; i++) + { + DWORD quality[2] = {0, 0}; + + if(SUCCEEDED(m_d3d->CheckDeviceMultiSampleType(m_d3dcaps.AdapterOrdinal, m_d3dcaps.DeviceType, D3DFMT_A8R8G8B8, TRUE, (D3DMULTISAMPLE_TYPE)i, &quality[0])) && quality[0] > 0 + && SUCCEEDED(m_d3d->CheckDeviceMultiSampleType(m_d3dcaps.AdapterOrdinal, m_d3dcaps.DeviceType, D3DFMT_D24S8, TRUE, (D3DMULTISAMPLE_TYPE)i, &quality[1])) && quality[1] > 0) + { + m_msaa_desc.Count = i; + m_msaa_desc.Quality = std::min(quality[0] - 1, quality[1] - 1); + + if(i >= m_msaa) break; + } + } + // if(!Reset(1, 1, theApp.GetConfig("windowed", 1) ? Windowed : Fullscreen)) @@ -246,30 +248,18 @@ bool GSDevice9::Reset(int w, int h, int mode) m_swapchain = NULL; - m_vertices.vb = NULL; - m_vertices.vb_old = NULL; + m_vb = NULL; + m_vb_old = NULL; + m_vertices.start = 0; m_vertices.count = 0; - if(m_vs_cb) _aligned_free(m_vs_cb); - if(m_ps_cb) _aligned_free(m_ps_cb); + if(m_state.vs_cb) _aligned_free(m_state.vs_cb); + if(m_state.ps_cb) _aligned_free(m_state.ps_cb); - m_vb = NULL; - m_vb_stride = 0; - m_layout = NULL; - m_vs = NULL; - m_vs_cb = NULL; - m_vs_cb_len = 0; - m_ps = NULL; - m_ps_cb = NULL; - m_ps_cb_len = 0; - m_ps_ss = NULL; - m_scissor = GSVector4i::zero(); - m_dss = NULL; - m_bs = NULL; - m_bf = 0xffffffff; - m_rtv = NULL; - m_dsv = NULL; + memset(&m_state, 0, sizeof(m_state)); + + m_state.bf = 0xffffffff; memset(&m_pp, 0, sizeof(m_pp)); @@ -304,9 +294,13 @@ bool GSDevice9::Reset(int w, int h, int mode) if(!m_dev) { - //D3DCREATE_MULTITHREADED flag shouldn't be needed uint32 flags = m_d3dcaps.VertexProcessingCaps ? D3DCREATE_HARDWARE_VERTEXPROCESSING : D3DCREATE_SOFTWARE_VERTEXPROCESSING; + if(flags & D3DCREATE_HARDWARE_VERTEXPROCESSING) + { + flags |= D3DCREATE_PUREDEVICE; + } + hr = m_d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, (HWND)m_wnd->GetHandle(), flags, &m_pp, &m_dev); if(FAILED(hr)) return false; @@ -403,7 +397,7 @@ void GSDevice9::DrawPrimitive() { int prims = 0; - switch(m_topology) + switch(m_state.topology) { case D3DPT_TRIANGLELIST: prims = m_vertices.count / 3; @@ -423,15 +417,14 @@ void GSDevice9::DrawPrimitive() break; } - m_dev->DrawPrimitive(m_topology, m_vertices.start, prims); + m_dev->DrawPrimitive(m_state.topology, m_vertices.start, prims); } void GSDevice9::EndScene() { // m_dev->EndScene(); - m_vertices.start += m_vertices.count; - m_vertices.count = 0; + __super::EndScene(); } void GSDevice9::ClearRenderTarget(GSTexture* t, const GSVector4& c) @@ -450,7 +443,7 @@ void GSDevice9::ClearRenderTarget(GSTexture* rt, uint32 c) void GSDevice9::ClearDepth(GSTexture* t, float c) { - GSTexture* rt = CreateRenderTarget(t->m_size.x, t->m_size.y); + GSTexture* rt = CreateRenderTarget(t->GetWidth(), t->GetHeight(), t->IsMSAA()); CComPtr rtsurface; CComPtr dssurface; @@ -471,7 +464,7 @@ void GSDevice9::ClearDepth(GSTexture* t, float c) void GSDevice9::ClearStencil(GSTexture* t, uint8 c) { - GSTexture* rt = CreateRenderTarget(t->m_size.x, t->m_size.y); + GSTexture* rt = CreateRenderTarget(t->GetWidth(), t->GetHeight(), t->IsMSAA()); CComPtr rtsurface; CComPtr dssurface; @@ -490,7 +483,7 @@ void GSDevice9::ClearStencil(GSTexture* t, uint8 c) Recycle(rt); } -GSTexture* GSDevice9::Create(int type, int w, int h, int format) +GSTexture* GSDevice9::Create(int type, int w, int h, bool msaa, int format) { HRESULT hr; @@ -500,10 +493,12 @@ GSTexture* GSDevice9::Create(int type, int w, int h, int format) switch(type) { case GSTexture::RenderTarget: - hr = m_dev->CreateTexture(w, h, 1, D3DUSAGE_RENDERTARGET, (D3DFORMAT)format, D3DPOOL_DEFAULT, &texture, NULL); + if(msaa) hr = m_dev->CreateRenderTarget(w, h, (D3DFORMAT)format, (D3DMULTISAMPLE_TYPE)m_msaa_desc.Count, m_msaa_desc.Quality, FALSE, &surface, NULL); + else hr = m_dev->CreateTexture(w, h, 1, D3DUSAGE_RENDERTARGET, (D3DFORMAT)format, D3DPOOL_DEFAULT, &texture, NULL); break; case GSTexture::DepthStencil: - hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, D3DMULTISAMPLE_NONE, 0, FALSE, &surface, NULL); + if(msaa) hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, (D3DMULTISAMPLE_TYPE)m_msaa_desc.Count, m_msaa_desc.Quality, FALSE, &surface, NULL); + else hr = m_dev->CreateDepthStencilSurface(w, h, (D3DFORMAT)format, D3DMULTISAMPLE_NONE, 0, FALSE, &surface, NULL); break; case GSTexture::Texture: hr = m_dev->CreateTexture(w, h, 1, 0, (D3DFORMAT)format, D3DPOOL_MANAGED, &texture, NULL); @@ -541,14 +536,14 @@ GSTexture* GSDevice9::Create(int type, int w, int h, int format) return t; } -GSTexture* GSDevice9::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDevice9::CreateRenderTarget(int w, int h, bool msaa, int format) { - return __super::CreateRenderTarget(w, h, format ? format : D3DFMT_A8R8G8B8); + return __super::CreateRenderTarget(w, h, msaa, format ? format : D3DFMT_A8R8G8B8); } -GSTexture* GSDevice9::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDevice9::CreateDepthStencil(int w, int h, bool msaa, int format) { - return __super::CreateDepthStencil(w, h, format ? format : D3DFMT_D24S8); + return __super::CreateDepthStencil(w, h, msaa, format ? format : D3DFMT_D24S8); } GSTexture* GSDevice9::CreateTexture(int w, int h, int format) @@ -561,6 +556,22 @@ GSTexture* GSDevice9::CreateOffscreen(int w, int h, int format) return __super::CreateOffscreen(w, h, format ? format : D3DFMT_A8R8G8B8); } +GSTexture* GSDevice9::Resolve(GSTexture* t) +{ + ASSERT(t != NULL && t->IsMSAA()); + + if(GSTexture* dst = CreateRenderTarget(t->GetWidth(), t->GetHeight(), false, t->GetFormat())) + { + dst->SetScale(t->GetScale()); + + m_dev->StretchRect(*(GSTexture9*)t, NULL, *(GSTexture9*)dst, NULL, D3DTEXF_POINT); + + return dst; + } + + return NULL; +} + GSTexture* GSDevice9::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format) { GSTexture* dst = NULL; @@ -577,11 +588,16 @@ GSTexture* GSDevice9::CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, return false; } - if(GSTexture* rt = CreateRenderTarget(w, h, format)) + if(GSTexture* rt = CreateRenderTarget(w, h, false, format)) { GSVector4 dr(0, 0, w, h); - StretchRect(src, sr, rt, dr, m_convert.ps[1], NULL, 0); + if(GSTexture* src2 = src->IsMSAA() ? Resolve(src) : src) + { + StretchRect(src2, sr, rt, dr, m_convert.ps[1], NULL, 0); + + if(src2 != src) Recycle(src2); + } dst = CreateOffscreen(w, h, format); @@ -707,18 +723,19 @@ void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t co if(count * stride > m_vertices.limit * m_vertices.stride) { - m_vertices.vb_old = m_vertices.vb; - m_vertices.vb = NULL; + m_vb_old = m_vb; + m_vb = NULL; + m_vertices.start = 0; m_vertices.count = 0; m_vertices.limit = std::max(count * 3 / 2, 10000); } - if(m_vertices.vb == NULL) + if(m_vb == NULL) { HRESULT hr; - hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vertices.vb, NULL); + hr = m_dev->CreateVertexBuffer(m_vertices.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL); if(FAILED(hr)) return; } @@ -734,25 +751,25 @@ void GSDevice9::IASetVertexBuffer(const void* vertices, size_t stride, size_t co void* v = NULL; - if(SUCCEEDED(m_vertices.vb->Lock(m_vertices.start * stride, count * stride, &v, flags))) + if(SUCCEEDED(m_vb->Lock(m_vertices.start * stride, count * stride, &v, flags))) { GSVector4i::storent(v, vertices, count * stride); - m_vertices.vb->Unlock(); + m_vb->Unlock(); } m_vertices.count = count; m_vertices.stride = stride; - IASetVertexBuffer(m_vertices.vb, stride); + IASetVertexBuffer(m_vb, stride); } void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) { - if(m_vb != vb || m_vb_stride != stride) + if(m_state.vb != vb || m_state.vb_stride != stride) { - m_vb = vb; - m_vb_stride = stride; + m_state.vb = vb; + m_state.vb_stride = stride; m_dev->SetStreamSource(0, vb, 0, stride); } @@ -760,9 +777,9 @@ void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride) void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout) { - if(m_layout != layout) + if(m_state.layout != layout) { - m_layout = layout; + m_state.layout = layout; m_dev->SetVertexDeclaration(layout); } @@ -770,14 +787,14 @@ void GSDevice9::IASetInputLayout(IDirect3DVertexDeclaration9* layout) void GSDevice9::IASetPrimitiveTopology(D3DPRIMITIVETYPE topology) { - m_topology = topology; + m_state.topology = topology; } void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int vs_cb_len) { - if(m_vs != vs) + if(m_state.vs != vs) { - m_vs = vs; + m_state.vs = vs; m_dev->SetVertexShader(vs); } @@ -786,18 +803,18 @@ void GSDevice9::VSSetShader(IDirect3DVertexShader9* vs, const float* vs_cb, int { int size = vs_cb_len * sizeof(float) * 4; - if(m_vs_cb_len != vs_cb_len || m_vs_cb == NULL || memcmp(m_vs_cb, vs_cb, size)) + if(m_state.vs_cb_len != vs_cb_len || m_state.vs_cb == NULL || memcmp(m_state.vs_cb, vs_cb, size)) { - if(m_vs_cb == NULL || m_vs_cb_len < vs_cb_len) + if(m_state.vs_cb == NULL || m_state.vs_cb_len < vs_cb_len) { - if(m_vs_cb) _aligned_free(m_vs_cb); + if(m_state.vs_cb) _aligned_free(m_state.vs_cb); - m_vs_cb = (float*)_aligned_malloc(size, 16); + m_state.vs_cb = (float*)_aligned_malloc(size, 16); } - m_vs_cb_len = vs_cb_len; + m_state.vs_cb_len = vs_cb_len; - memcpy(m_vs_cb, vs_cb, size); + memcpy(m_state.vs_cb, vs_cb, size); m_dev->SetVertexShaderConstantF(0, vs_cb, vs_cb_len); } @@ -812,16 +829,16 @@ void GSDevice9::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) if(sr0) srv0 = *(GSTexture9*)sr0; if(sr1) srv1 = *(GSTexture9*)sr1; - if(m_ps_srvs[0] != srv0) + if(m_state.ps_srvs[0] != srv0) { - m_ps_srvs[0] = srv0; + m_state.ps_srvs[0] = srv0; m_dev->SetTexture(0, srv0); } - if(m_ps_srvs[1] != srv1) + if(m_state.ps_srvs[1] != srv1) { - m_ps_srvs[1] = srv1; + m_state.ps_srvs[1] = srv1; m_dev->SetTexture(1, srv1); } @@ -829,9 +846,9 @@ void GSDevice9::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1) void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len) { - if(m_ps != ps) + if(m_state.ps != ps) { - m_ps = ps; + m_state.ps = ps; m_dev->SetPixelShader(ps); } @@ -840,18 +857,18 @@ void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int p { int size = ps_cb_len * sizeof(float) * 4; - if(m_ps_cb_len != ps_cb_len || m_ps_cb == NULL || memcmp(m_ps_cb, ps_cb, size)) + if(m_state.ps_cb_len != ps_cb_len || m_state.ps_cb == NULL || memcmp(m_state.ps_cb, ps_cb, size)) { - if(m_ps_cb == NULL || m_ps_cb_len < ps_cb_len) + if(m_state.ps_cb == NULL || m_state.ps_cb_len < ps_cb_len) { - if(m_ps_cb) _aligned_free(m_ps_cb); + if(m_state.ps_cb) _aligned_free(m_state.ps_cb); - m_ps_cb = (float*)_aligned_malloc(size, 16); + m_state.ps_cb = (float*)_aligned_malloc(size, 16); } - m_ps_cb_len = ps_cb_len; + m_state.ps_cb_len = ps_cb_len; - memcpy(m_ps_cb, ps_cb, size); + memcpy(m_state.ps_cb, ps_cb, size); m_dev->SetPixelShaderConstantF(0, ps_cb, ps_cb_len); } @@ -860,9 +877,9 @@ void GSDevice9::PSSetShader(IDirect3DPixelShader9* ps, const float* ps_cb, int p void GSDevice9::PSSetSamplerState(Direct3DSamplerState9* ss) { - if(ss && m_ps_ss != ss) + if(ss && m_state.ps_ss != ss) { - m_ps_ss = ss; + m_state.ps_ss = ss; m_dev->SetSamplerState(0, D3DSAMP_ADDRESSU, ss->AddressU); m_dev->SetSamplerState(0, D3DSAMP_ADDRESSV, ss->AddressV); @@ -885,9 +902,9 @@ void GSDevice9::PSSetSamplerState(Direct3DSamplerState9* ss) void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss) { - if(m_dss != dss) + if(m_state.dss != dss) { - m_dss = dss; + m_state.dss = dss; m_dev->SetRenderState(D3DRS_ZENABLE, dss->DepthEnable); m_dev->SetRenderState(D3DRS_ZWRITEENABLE, dss->DepthWriteMask); @@ -914,10 +931,10 @@ void GSDevice9::OMSetDepthStencilState(Direct3DDepthStencilState9* dss) void GSDevice9::OMSetBlendState(Direct3DBlendState9* bs, uint32 bf) { - if(m_bs != bs || m_bf != bf) + if(m_state.bs != bs || m_state.bf != bf) { - m_bs = bs; - m_bf = bf; + m_state.bs = bs; + m_state.bf = bf; m_dev->SetRenderState(D3DRS_ALPHABLENDENABLE, bs->BlendEnable); @@ -945,25 +962,25 @@ void GSDevice9::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4 if(rt) rtv = *(GSTexture9*)rt; if(ds) dsv = *(GSTexture9*)ds; - if(m_rtv != rtv) + if(m_state.rtv != rtv) { - m_rtv = rtv; + m_state.rtv = rtv; m_dev->SetRenderTarget(0, rtv); } - if(m_dsv != dsv) + if(m_state.dsv != dsv) { - m_dsv = dsv; + m_state.dsv = dsv; m_dev->SetDepthStencilSurface(dsv); } - GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); + GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); - if(!m_scissor.eq(r)) + if(!m_state.scissor.eq(r)) { - m_scissor = r; + m_state.scissor = r; m_dev->SetScissorRect(r); } diff --git a/plugins/GSdx/GSDevice9.h b/plugins/GSdx/GSDevice9.h index bee59f70ea..07f4382a42 100644 --- a/plugins/GSdx/GSDevice9.h +++ b/plugins/GSdx/GSDevice9.h @@ -61,29 +61,7 @@ struct Direct3DBlendState9 class GSDevice9 : public GSDevice { -private: - IDirect3DVertexBuffer9* m_vb; - size_t m_vb_stride; - IDirect3DVertexDeclaration9* m_layout; - D3DPRIMITIVETYPE m_topology; - IDirect3DVertexShader9* m_vs; - float* m_vs_cb; - int m_vs_cb_len; - IDirect3DTexture9* m_ps_srvs[2]; - IDirect3DPixelShader9* m_ps; - float* m_ps_cb; - int m_ps_cb_len; - Direct3DSamplerState9* m_ps_ss; - GSVector4i m_scissor; - Direct3DDepthStencilState9* m_dss; - Direct3DBlendState9* m_bs; - uint32 m_bf; - IDirect3DSurface9* m_rtv; - IDirect3DSurface9* m_dsv; - - // - - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); @@ -96,13 +74,31 @@ private: CComPtr m_d3d; CComPtr m_dev; CComPtr m_swapchain; + CComPtr m_vb; + CComPtr m_vb_old; bool m_lost; struct { - CComPtr vb, vb_old; - size_t stride, start, count, limit; - } m_vertices; + IDirect3DVertexBuffer9* vb; + size_t vb_stride; + IDirect3DVertexDeclaration9* layout; + D3DPRIMITIVETYPE topology; + IDirect3DVertexShader9* vs; + float* vs_cb; + int vs_cb_len; + IDirect3DTexture9* ps_srvs[2]; + IDirect3DPixelShader9* ps; + float* ps_cb; + int ps_cb_len; + Direct3DSamplerState9* ps_ss; + GSVector4i scissor; + Direct3DDepthStencilState9* dss; + Direct3DBlendState9* bs; + uint32 bf; + IDirect3DSurface9* rtv; + IDirect3DSurface9* dsv; + } m_state; public: // TODO @@ -146,11 +142,13 @@ public: void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - GSTexture* CreateRenderTarget(int w, int h, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, int format = 0); + GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); + GSTexture* Resolve(GSTexture* t); + GSTexture* CopyOffscreen(GSTexture* src, const GSVector4& sr, int w, int h, int format = 0); void CopyRect(GSTexture* st, GSTexture* dt, const GSVector4i& r); diff --git a/plugins/GSdx/GSDeviceNull.cpp b/plugins/GSdx/GSDeviceNull.cpp index 473782856f..9839643e29 100644 --- a/plugins/GSdx/GSDeviceNull.cpp +++ b/plugins/GSdx/GSDeviceNull.cpp @@ -42,7 +42,7 @@ bool GSDeviceNull::Reset(int w, int h, int mode) return true; } -GSTexture* GSDeviceNull::Create(int type, int w, int h, int format) +GSTexture* GSDeviceNull::Create(int type, int w, int h, bool msaa, int format) { return new GSTextureNull(type, w, h, format); } diff --git a/plugins/GSdx/GSDeviceNull.h b/plugins/GSdx/GSDeviceNull.h index b69841d70a..dbd19feb28 100644 --- a/plugins/GSdx/GSDeviceNull.h +++ b/plugins/GSdx/GSDeviceNull.h @@ -27,7 +27,7 @@ class GSDeviceNull : public GSDevice { private: - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c) {} void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0) {} diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 26c6e70139..147809639b 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -203,21 +203,11 @@ void GSDeviceOGL::Flip(bool limit) #endif } -void GSDeviceOGL::BeginScene() -{ -} - void GSDeviceOGL::DrawPrimitive() { glDrawArrays(m_topology, m_vertices.count, m_vertices.start); CheckError(); } -void GSDeviceOGL::EndScene() -{ - m_vertices.start += m_vertices.count; - m_vertices.count = 0; -} - void GSDeviceOGL::ClearRenderTarget(GSTexture* t, const GSVector4& c) { GLuint texture = *(GSTextureOGL*)t; @@ -262,7 +252,7 @@ void GSDeviceOGL::ClearStencil(GSTexture* t, uint8 c) glClear(GL_STENCIL_BUFFER_BIT); CheckError(); } -GSTexture* GSDeviceOGL::Create(int type, int w, int h, int format) +GSTexture* GSDeviceOGL::Create(int type, int w, int h, bool msaa, int format) { GLuint texture = 0; @@ -306,14 +296,14 @@ GSTexture* GSDeviceOGL::Create(int type, int w, int h, int format) return t; } -GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, int format) +GSTexture* GSDeviceOGL::CreateRenderTarget(int w, int h, bool msaa, int format) { - return __super::CreateRenderTarget(w, h, format ? format : GL_RGBA8); + return __super::CreateRenderTarget(w, h, msaa, format ? format : GL_RGBA8); } -GSTexture* GSDeviceOGL::CreateDepthStencil(int w, int h, int format) +GSTexture* GSDeviceOGL::CreateDepthStencil(int w, int h, bool msaa, int format) { - return __super::CreateDepthStencil(w, h, format ? format : GL_DEPTH32F_STENCIL8); // TODO: GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8 + return __super::CreateDepthStencil(w, h, msaa, format ? format : GL_DEPTH32F_STENCIL8); // TODO: GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8 } GSTexture* GSDeviceOGL::CreateTexture(int w, int h, int format) @@ -519,14 +509,14 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_DEPTH_COMPONENT, dsi); CheckError(); } - if(m_viewport != rt->m_size) + if(m_viewport != rt->GetSize()) { - m_viewport = rt->m_size; + m_viewport = rt->GetSize(); - glViewport(0, 0, rt->m_size.x, rt->m_size.y); CheckError(); + glViewport(0, 0, rt->GetWidth(), rt->GetHeight()); CheckError(); } - GSVector4i r = scissor ? *scissor : GSVector4i(rt->m_size).zwxy(); + GSVector4i r = scissor ? *scissor : GSVector4i(rt->GetSize()).zwxy(); if(!m_scissor.eq(r)) { diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 10d58a9138..eb0ecf6533 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -100,7 +100,7 @@ class GSDeviceOGL : public GSDevice // - GSTexture* Create(int type, int w, int h, int format); + GSTexture* Create(int type, int w, int h, bool msaa, int format); void DoMerge(GSTexture* st[2], GSVector4* sr, GSVector4* dr, GSTexture* dt, bool slbg, bool mmod, const GSVector4& c); void DoInterlace(GSTexture* st, GSTexture* dt, int shader, bool linear, float yoffset = 0); @@ -114,17 +114,15 @@ public: void Present(const GSVector4i& r, int shader, bool limit); void Flip(bool limit); - void BeginScene(); void DrawPrimitive(); - void EndScene(); void ClearRenderTarget(GSTexture* t, const GSVector4& c); void ClearRenderTarget(GSTexture* t, uint32 c); void ClearDepth(GSTexture* t, float c); void ClearStencil(GSTexture* t, uint8 c); - GSTexture* CreateRenderTarget(int w, int h, int format = 0); - GSTexture* CreateDepthStencil(int w, int h, int format = 0); + GSTexture* CreateRenderTarget(int w, int h, bool msaa, int format = 0); + GSTexture* CreateDepthStencil(int w, int h, bool msaa, int format = 0); GSTexture* CreateTexture(int w, int h, int format = 0); GSTexture* CreateOffscreen(int w, int h, int format = 0); diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index f6532ed5f1..88dc480379 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -208,7 +208,7 @@ bool GSRenderer::Merge(int field) r += GSVector4i(0, 1).xyxy(); } - GSVector4 scale = GSVector4(tex[i]->m_scale).xyxy(); + GSVector4 scale = GSVector4(tex[i]->GetScale()).xyxy(); src[i] = GSVector4(r) * scale / GSVector4(tex[i]->GetSize()).xyxy(); @@ -216,7 +216,7 @@ bool GSRenderer::Merge(int field) if(dr[i].top - baseline >= 4) // 2? { - o.y = tex[i]->m_scale.y * (dr[i].top - baseline); + o.y = tex[i]->GetScale().y * (dr[i].top - baseline); if(m_regs->SMODE2.INT && m_regs->SMODE2.FFMD) { @@ -251,7 +251,7 @@ bool GSRenderer::Merge(int field) int field2 = 1 - ((m_interlace - 1) & 1); int mode = (m_interlace - 1) >> 1; - m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->m_scale.y : tex[0]->m_scale.y); + m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y); } } diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index d671bdc7ea..484a144561 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -80,7 +80,7 @@ public: virtual bool CanUpscale() { - return !m_nativeres; + return !m_nativeres && m_regs->PMODE.EN != 0; // upscale ratio depends on the display size, with no output it may not be set correctly (ps2 logo to game transition) } }; diff --git a/plugins/GSdx/GSRendererDX.h b/plugins/GSdx/GSRendererDX.h index 7ff1187eca..2d0bd04b23 100644 --- a/plugins/GSdx/GSRendererDX.h +++ b/plugins/GSdx/GSRendererDX.h @@ -163,12 +163,12 @@ public: GSTextureFX::VSConstantBuffer vs_cb; - float sx = 2.0f * rt->m_scale.x / (rt->m_size.x << 4); - float sy = 2.0f * rt->m_scale.y / (rt->m_size.y << 4); + float sx = 2.0f * rt->GetScale().x / (rt->GetWidth() << 4); + float sy = 2.0f * rt->GetScale().y / (rt->GetHeight() << 4); float ox = (float)(int)context->XYOFFSET.OFX; float oy = (float)(int)context->XYOFFSET.OFY; - float ox2 = 2.0f * m_pixelcenter.x / rt->m_size.x; - float oy2 = 2.0f * m_pixelcenter.y / rt->m_size.y; + float ox2 = 2.0f * m_pixelcenter.x / rt->GetWidth(); + float oy2 = 2.0f * m_pixelcenter.y / rt->GetHeight(); vs_cb.VertexScale = GSVector4(sx, -sy, 1.0f / UINT_MAX, 0.0f); vs_cb.VertexOffset = GSVector4(ox * sx + ox2 + 1, -(oy * sy + oy2 + 1), 0.0f, -1.0f); @@ -230,8 +230,8 @@ public: ps_sel.ltf = m_filter == 2 ? IsLinear() : m_filter; ps_sel.rt = tex->m_target; - int w = tex->m_texture->m_size.x; - int h = tex->m_texture->m_size.y; + int w = tex->m_texture->GetWidth(); + int h = tex->m_texture->GetHeight(); int tw = (int)(1 << context->TEX0.TW); int th = (int)(1 << context->TEX0.TH); @@ -266,7 +266,7 @@ public: // rs - GSVector4i scissor = GSVector4i(GSVector4(rt->m_scale).xyxy() * context->scissor.in).rintersect(GSVector4i(rt->GetSize()).zwxy()); + GSVector4i scissor = GSVector4i(GSVector4(rt->GetScale()).xyxy() * context->scissor.in).rintersect(GSVector4i(rt->GetSize()).zwxy()); m_dev->OMSetRenderTargets(rt, ds, &scissor); m_dev->PSSetShaderResources(tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL); diff --git a/plugins/GSdx/GSRendererDX10.cpp b/plugins/GSdx/GSRendererDX10.cpp index e4da9d823f..3f44ea790f 100644 --- a/plugins/GSdx/GSRendererDX10.cpp +++ b/plugins/GSdx/GSRendererDX10.cpp @@ -199,9 +199,9 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds) GSDevice10* dev = (GSDevice10*)m_dev; - const GSVector2i& size = rt->m_size; + const GSVector2i& size = rt->GetSize(); - if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y)) + if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y, rt->IsMSAA())) { // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows @@ -217,7 +217,7 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds) // ia - GSVector4 s = GSVector4(rt->m_scale.x / size.x, rt->m_scale.y / size.y); + GSVector4 s = GSVector4(rt->GetScale().x / size.x, rt->GetScale().y / size.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); @@ -245,7 +245,9 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds) // ps - dev->PSSetShaderResources(rt, NULL); + GSTexture* rt2 = rt->IsMSAA() ? dev->Resolve(rt) : rt; + + dev->PSSetShaderResources(rt2, NULL); dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL); dev->PSSetSamplerState(dev->m_convert.pt, NULL); @@ -258,5 +260,7 @@ void GSRendererDX10::SetupDATE(GSTexture* rt, GSTexture* ds) dev->EndScene(); dev->Recycle(t); + + if(rt2 != rt) dev->Recycle(rt2); } } diff --git a/plugins/GSdx/GSRendererDX11.cpp b/plugins/GSdx/GSRendererDX11.cpp index ac14308100..55930cdef2 100644 --- a/plugins/GSdx/GSRendererDX11.cpp +++ b/plugins/GSdx/GSRendererDX11.cpp @@ -199,9 +199,9 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds) GSDevice11* dev = (GSDevice11*)m_dev; - const GSVector2i& size = rt->m_size; + const GSVector2i& size = rt->GetSize(); - if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y)) + if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y, rt->IsMSAA())) { // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows @@ -217,7 +217,7 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds) // ia - GSVector4 s = GSVector4(rt->m_scale.x / size.x, rt->m_scale.y / size.y); + GSVector4 s = GSVector4(rt->GetScale().x / size.x, rt->GetScale().y / size.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); @@ -245,7 +245,9 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds) // ps - dev->PSSetShaderResources(rt, NULL); + GSTexture* rt2 = rt->IsMSAA() ? dev->Resolve(rt) : rt; + + dev->PSSetShaderResources(rt2, NULL); dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL); dev->PSSetSamplerState(dev->m_convert.pt, NULL); @@ -258,5 +260,7 @@ void GSRendererDX11::SetupDATE(GSTexture* rt, GSTexture* ds) dev->EndScene(); dev->Recycle(t); + + if(rt2 != rt) dev->Recycle(rt2); } } diff --git a/plugins/GSdx/GSRendererDX9.cpp b/plugins/GSdx/GSRendererDX9.cpp index 819c81d2e1..6e3774bdc5 100644 --- a/plugins/GSdx/GSRendererDX9.cpp +++ b/plugins/GSdx/GSRendererDX9.cpp @@ -216,9 +216,9 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds) GSDevice9* dev = (GSDevice9*)m_dev; - const GSVector2i& size = rt->m_size; + const GSVector2i& size = rt->GetSize(); - if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y)) + if(GSTexture* t = dev->CreateRenderTarget(size.x, size.y, rt->IsMSAA())) { // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows @@ -234,7 +234,7 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds) // ia - GSVector4 s = GSVector4(rt->m_scale.x / size.x, rt->m_scale.y / size.y); + GSVector4 s = GSVector4(rt->GetScale().x / size.x, rt->GetScale().y / size.y); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); @@ -258,7 +258,9 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds) // ps - dev->PSSetShaderResources(rt, NULL); + GSTexture* rt2 = rt->IsMSAA() ? dev->Resolve(rt) : rt; + + dev->PSSetShaderResources(rt2, NULL); dev->PSSetShader(dev->m_convert.ps[m_context->TEST.DATM ? 2 : 3], NULL, 0); dev->PSSetSamplerState(&dev->m_convert.pt); @@ -271,6 +273,8 @@ void GSRendererDX9::SetupDATE(GSTexture* rt, GSTexture* ds) dev->EndScene(); dev->Recycle(t); + + if(rt2 != rt) dev->Recycle(rt2); } } @@ -287,7 +291,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt) // ia - GSVector4 s = GSVector4(rt->m_scale.x / rt->m_size.x, rt->m_scale.y / rt->m_size.y); + GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight()); GSVector4 o = GSVector4(-1.0f, 1.0f); GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy()); diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 41cc49b4db..5bdd74f0bf 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -495,7 +495,7 @@ protected: GSTexture* t = NULL; - if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true, true)) + if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height)) { t = rt->m_texture; diff --git a/plugins/GSdx/GSTexture.cpp b/plugins/GSdx/GSTexture.cpp index ebdbadedfa..055fc1c8a7 100644 --- a/plugins/GSdx/GSTexture.cpp +++ b/plugins/GSdx/GSTexture.cpp @@ -21,3 +21,11 @@ #include "StdAfx.h" #include "GSTexture.h" + +GSTexture::GSTexture() + : m_scale(1, 1) + , m_size(0, 0) + , m_type(None) + , m_msaa(false) +{ +} diff --git a/plugins/GSdx/GSTexture.h b/plugins/GSdx/GSTexture.h index c661b28405..74986c7224 100644 --- a/plugins/GSdx/GSTexture.h +++ b/plugins/GSdx/GSTexture.h @@ -25,30 +25,38 @@ class GSTexture { -public: +protected: GSVector2 m_scale; GSVector2i m_size; + int m_type; + int m_format; + bool m_msaa; +public: struct GSMap {uint8* bits; int pitch;}; enum {None, RenderTarget, DepthStencil, Texture, Offscreen}; public: - GSTexture() : m_scale(1, 1), m_size(0, 0) {} + GSTexture(); virtual ~GSTexture() {} virtual operator bool() {ASSERT(0); return false;} - virtual int GetType() const = 0; - virtual int GetFormat() const = 0; - virtual bool Update(const GSVector4i& r, const void* data, int pitch) = 0; virtual bool Map(GSMap& m, const GSVector4i* r = NULL) = 0; virtual void Unmap() = 0; virtual bool Save(const string& fn, bool dds = false) = 0; + GSVector2 GetScale() const {return m_scale;} + void SetScale(const GSVector2& scale) {m_scale = scale;} + int GetWidth() const {return m_size.x;} int GetHeight() const {return m_size.y;} - GSVector2i GetSize() const {return m_size;} + + int GetType() const {return m_type;} + int GetFormat() const {return m_format;} + + bool IsMSAA() const {return m_msaa;} }; diff --git a/plugins/GSdx/GSTexture10.cpp b/plugins/GSdx/GSTexture10.cpp index 4d02646f42..3d7a4be8eb 100644 --- a/plugins/GSdx/GSTexture10.cpp +++ b/plugins/GSdx/GSTexture10.cpp @@ -32,20 +32,15 @@ GSTexture10::GSTexture10(ID3D10Texture2D* texture) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; -} -int GSTexture10::GetType() const -{ - if(m_desc.BindFlags & D3D10_BIND_RENDER_TARGET) return GSTexture::RenderTarget; - if(m_desc.BindFlags & D3D10_BIND_DEPTH_STENCIL) return GSTexture::DepthStencil; - if(m_desc.BindFlags & D3D10_BIND_SHADER_RESOURCE) return GSTexture::Texture; - if(m_desc.Usage == D3D10_USAGE_STAGING) return GSTexture::Offscreen; - return GSTexture::None; -} + if(m_desc.BindFlags & D3D10_BIND_RENDER_TARGET) m_type = RenderTarget; + else if(m_desc.BindFlags & D3D10_BIND_DEPTH_STENCIL) m_type = DepthStencil; + else if(m_desc.BindFlags & D3D10_BIND_SHADER_RESOURCE) m_type = Texture; + else if(m_desc.Usage == D3D10_USAGE_STAGING) m_type = Offscreen; -int GSTexture10::GetFormat() const -{ - return m_desc.Format; + m_format = (int)m_desc.Format; + + m_msaa = m_desc.SampleDesc.Count > 1; } bool GSTexture10::Update(const GSVector4i& r, const void* data, int pitch) @@ -162,6 +157,8 @@ GSTexture10::operator ID3D10ShaderResourceView*() { if(!m_srv && m_dev && m_texture) { + ASSERT(!m_msaa); + D3D10_SHADER_RESOURCE_VIEW_DESC* desc = NULL; if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS) diff --git a/plugins/GSdx/GSTexture10.h b/plugins/GSdx/GSTexture10.h index c98ebcd051..da9d927279 100644 --- a/plugins/GSdx/GSTexture10.h +++ b/plugins/GSdx/GSTexture10.h @@ -35,9 +35,6 @@ class GSTexture10 : public GSTexture public: explicit GSTexture10(ID3D10Texture2D* texture); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSTexture11.cpp b/plugins/GSdx/GSTexture11.cpp index 3b3073dacd..286aec3b70 100644 --- a/plugins/GSdx/GSTexture11.cpp +++ b/plugins/GSdx/GSTexture11.cpp @@ -34,20 +34,15 @@ GSTexture11::GSTexture11(ID3D11Texture2D* texture) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; -} -int GSTexture11::GetType() const -{ - if(m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) return GSTexture::RenderTarget; - if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) return GSTexture::DepthStencil; - if(m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) return GSTexture::Texture; - if(m_desc.Usage == D3D11_USAGE_STAGING) return GSTexture::Offscreen; - return GSTexture::None; -} + if(m_desc.BindFlags & D3D11_BIND_RENDER_TARGET) m_type = RenderTarget; + else if(m_desc.BindFlags & D3D11_BIND_DEPTH_STENCIL) m_type = DepthStencil; + else if(m_desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) m_type = Texture; + else if(m_desc.Usage == D3D11_USAGE_STAGING) m_type = Offscreen; -int GSTexture11::GetFormat() const -{ - return m_desc.Format; + m_format = (int)m_desc.Format; + + m_msaa = m_desc.SampleDesc.Count > 1; } bool GSTexture11::Update(const GSVector4i& r, const void* data, int pitch) @@ -164,6 +159,8 @@ GSTexture11::operator ID3D11ShaderResourceView*() { if(!m_srv && m_dev && m_texture) { + ASSERT(!m_msaa); + m_dev->CreateShaderResourceView(m_texture, NULL, &m_srv); } diff --git a/plugins/GSdx/GSTexture11.h b/plugins/GSdx/GSTexture11.h index fc2a7061b6..8aad1258bd 100644 --- a/plugins/GSdx/GSTexture11.h +++ b/plugins/GSdx/GSTexture11.h @@ -36,9 +36,6 @@ class GSTexture11 : public GSTexture public: explicit GSTexture11(ID3D11Texture2D* texture); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSTexture7.cpp b/plugins/GSdx/GSTexture7.cpp index a3055c014a..3f52beecd4 100644 --- a/plugins/GSdx/GSTexture7.cpp +++ b/plugins/GSdx/GSTexture7.cpp @@ -23,8 +23,7 @@ #include "GSTexture7.h" GSTexture7::GSTexture7(int type, IDirectDrawSurface7* system) - : m_type(type) - , m_system(system) + : m_system(system) { memset(&m_desc, 0, sizeof(m_desc)); @@ -34,11 +33,14 @@ GSTexture7::GSTexture7(int type, IDirectDrawSurface7* system) m_size.x = (int)m_desc.dwWidth; m_size.y = (int)m_desc.dwHeight; + + m_type = type; + + m_format = (int)m_desc.ddpfPixelFormat.dwFourCC; } GSTexture7::GSTexture7(int type, IDirectDrawSurface7* system, IDirectDrawSurface7* video) - : m_type(type) - , m_system(system) + : m_system(system) , m_video(video) { memset(&m_desc, 0, sizeof(m_desc)); @@ -49,16 +51,10 @@ GSTexture7::GSTexture7(int type, IDirectDrawSurface7* system, IDirectDrawSurface m_size.x = (int)m_desc.dwWidth; m_size.y = (int)m_desc.dwHeight; -} -int GSTexture7::GetType() const -{ - return m_type; -} + m_type = type; -int GSTexture7::GetFormat() const -{ - return (int)m_desc.ddpfPixelFormat.dwFourCC; + m_format = (int)m_desc.ddpfPixelFormat.dwFourCC; } bool GSTexture7::Update(const GSVector4i& r, const void* data, int pitch) diff --git a/plugins/GSdx/GSTexture7.h b/plugins/GSdx/GSTexture7.h index 7d73cb0670..1daf555366 100644 --- a/plugins/GSdx/GSTexture7.h +++ b/plugins/GSdx/GSTexture7.h @@ -26,7 +26,6 @@ class GSTexture7 : public GSTexture { - int m_type; CComPtr m_system; CComPtr m_video; DDSURFACEDESC2 m_desc; @@ -35,9 +34,6 @@ public: GSTexture7(int type, IDirectDrawSurface7* system); GSTexture7(int type, IDirectDrawSurface7* system, IDirectDrawSurface7* video); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSTexture9.cpp b/plugins/GSdx/GSTexture9.cpp index 26106feb34..477ee37a27 100644 --- a/plugins/GSdx/GSTexture9.cpp +++ b/plugins/GSdx/GSTexture9.cpp @@ -38,6 +38,15 @@ GSTexture9::GSTexture9(IDirect3DSurface9* surface) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; + + if(m_desc.Usage & D3DUSAGE_RENDERTARGET) m_type = RenderTarget; + else if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) m_type = DepthStencil; + else if(m_desc.Pool == D3DPOOL_MANAGED) m_type = Texture; + else if(m_desc.Pool == D3DPOOL_SYSTEMMEM) m_type = Offscreen; + + m_format = (int)m_desc.Format; + + m_msaa = m_desc.MultiSampleType != D3DMULTISAMPLE_NONE; } GSTexture9::GSTexture9(IDirect3DTexture9* texture) @@ -52,26 +61,21 @@ GSTexture9::GSTexture9(IDirect3DTexture9* texture) m_size.x = (int)m_desc.Width; m_size.y = (int)m_desc.Height; + + if(m_desc.Usage & D3DUSAGE_RENDERTARGET) m_type = RenderTarget; + else if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) m_type = DepthStencil; + else if(m_desc.Pool == D3DPOOL_MANAGED) m_type = Texture; + else if(m_desc.Pool == D3DPOOL_SYSTEMMEM) m_type = Offscreen; + + m_format = (int)m_desc.Format; + + m_msaa = m_desc.MultiSampleType > 1; } GSTexture9::~GSTexture9() { } -int GSTexture9::GetType() const -{ - if(m_desc.Usage & D3DUSAGE_RENDERTARGET) return GSTexture::RenderTarget; - if(m_desc.Usage & D3DUSAGE_DEPTHSTENCIL) return GSTexture::DepthStencil; - if(m_desc.Pool == D3DPOOL_MANAGED) return GSTexture::Texture; - if(m_desc.Pool == D3DPOOL_SYSTEMMEM) return GSTexture::Offscreen; - return GSTexture::None; -} - -int GSTexture9::GetFormat() const -{ - return m_desc.Format; -} - bool GSTexture9::Update(const GSVector4i& r, const void* data, int pitch) { if(m_surface) diff --git a/plugins/GSdx/GSTexture9.h b/plugins/GSdx/GSTexture9.h index a2a0297dba..e28b085278 100644 --- a/plugins/GSdx/GSTexture9.h +++ b/plugins/GSdx/GSTexture9.h @@ -35,9 +35,6 @@ public: explicit GSTexture9(IDirect3DTexture9* texture); virtual ~GSTexture9(); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 9c70530e9c..29835c9857 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -106,21 +106,12 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con if(src == NULL) { - src = CreateSource(); + src = CreateSource(TEX0, TEXA, dst); - if(!(dst ? src->Create(dst) : src->Create(m_paltex))) + if(src == NULL) { - delete src; - return NULL; } - - if(psm.pal > 0) - { - memcpy(src->m_clut, clut, psm.pal * sizeof(clut[0])); - } - - m_src.Add(src, TEX0, m_renderer->m_context->offset.tex); } if(psm.pal > 0) @@ -144,7 +135,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con return src; } -GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb) +GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used) { uint32 bp = TEX0.TBP0; @@ -160,41 +151,20 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst = t; - if(!fb) dst->m_TEX0 = TEX0; + dst->m_TEX0 = TEX0; break; } } - if(dst == NULL && fb) - { - // HACK: try to find something close to the base pointer - - for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) - { - Target* t = *i; - - if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0)) - { - dst = t; - } - } - } - if(dst == NULL) { - dst = CreateTarget(); + dst = CreateTarget(TEX0, w, h, type); - dst->m_TEX0 = TEX0; - - if(!dst->Create(w, h, type)) + if(dst == NULL) { - delete dst; - return NULL; } - - m_dst[type].push_front(dst); } else { @@ -212,16 +182,15 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int { hh *= 2; } -/* - if(hh < 512) + + if(hh < 512 && m_renderer->m_context->SCISSOR.SCAY1 == 511) // vp2 { hh = 512; } -*/ + if(ww > 0 && hh > 0) { - dst->m_texture->m_scale.x = (float)w / ww; - dst->m_texture->m_scale.y = (float)h / hh; + dst->m_texture->SetScale(GSVector2((float)w / ww, (float)h / hh)); } } @@ -233,6 +202,52 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int return dst; } +GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h) +{ + uint32 bp = TEX0.TBP0; + + Target* dst = NULL; + + for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) + { + Target* t = *i; + + if(bp == t->m_TEX0.TBP0) + { + dst = t; + + break; + } + else + { + // HACK: try to find something close to the base pointer + + if(t->m_TEX0.TBP0 <= bp && bp < t->m_TEX0.TBP0 + 0x700 && (!dst || t->m_TEX0.TBP0 >= dst->m_TEX0.TBP0)) + { + dst = t; + } + } + } + + if(dst == NULL) + { + dst = CreateTarget(TEX0, w, h, RenderTarget); + + if(dst == NULL) + { + return NULL; + } + } + else + { + dst->Update(); + } + + dst->m_used = true; + + return dst; +} + void GSTextureCache::InvalidateVideoMem(const GSOffset* o, const GSVector4i& rect, bool target) { uint32 bp = o->bp; @@ -368,12 +383,7 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset* o, const GSVector4i& r) { if(GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) { - GSVector4i r2 = r.rintersect(t->m_valid); - - if(!r2.rempty()) - { - t->Read(r2); - } + Read(t, r.rintersect(t->m_valid)); return; } @@ -381,12 +391,7 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset* o, const GSVector4i& r) { // ffx-2 riku changing to her default (shoots some reflecting glass at the end), 16-bit rt read as 32-bit - GSVector4i r2 = GSVector4i(r.left, r.top, r.right, r.top + (r.bottom - r.top) * 2).rintersect(t->m_valid); - - if(!r2.rempty()) - { - t->Read(r2); - } + Read(t, GSVector4i(r.left, r.top, r.right, r.top + (r.bottom - r.top) * 2).rintersect(t->m_valid)); return; } @@ -477,6 +482,257 @@ void GSTextureCache::IncAge() } } +GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst) +{ + Source* src = new Source(m_renderer); + + src->m_TEX0 = TEX0; + src->m_TEXA = TEXA; + + int tw = 1 << TEX0.TW; + int th = 1 << TEX0.TH; + int tp = (int)TEX0.TW << 6; + + if(dst == NULL) + { + if(m_paltex && GSLocalMemory::m_psm[TEX0.PSM].pal > 0) + { + src->m_fmt = GSTextureFX::FMT_8; + + src->m_texture = m_renderer->m_dev->CreateTexture(tw, th, Get8bitFormat()); + src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); + } + else + { + src->m_fmt = GSTextureFX::FMT_32; + + src->m_texture = m_renderer->m_dev->CreateTexture(tw, th); + } + } + else + { + // TODO: clean up this mess + + src->m_target = true; + + if(dst->m_type != RenderTarget) + { + // TODO + + delete src; + + return NULL; + } + + dst->Update(); + + GSTexture* tmp = NULL; + + if(dst->m_texture->IsMSAA()) + { + tmp = dst->m_texture; + + dst->m_texture = m_renderer->m_dev->Resolve(dst->m_texture); + } + + // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) + + int w = (int)(dst->m_texture->GetScale().x * tw); + int h = (int)(dst->m_texture->GetScale().y * th); + + GSVector2i dstsize = dst->m_texture->GetSize(); + + // pitch conversion + + if(dst->m_TEX0.TBW != TEX0.TBW) // && dst->m_TEX0.PSM == TEX0.PSM + { + // sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left) + + // ASSERT(dst->m_TEX0.TBW > TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO) + + src->m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y, false); + + GSVector4 size = GSVector4(dstsize).xyxy(); + GSVector4 scale = GSVector4(dst->m_texture->GetScale()).xyxy(); + + int bw = 64; + int bh = TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24 ? 32 : 64; + + GSVector4i br(0, 0, bw, bh); + + int sw = (int)dst->m_TEX0.TBW << 6; + + int dw = (int)TEX0.TBW << 6; + int dh = 1 << TEX0.TH; + + if(sw != 0) + for(int dy = 0; dy < dh; dy += bh) + { + for(int dx = 0; dx < dw; dx += bw) + { + int o = dy * dw / bh + dx; + + int sx = o % sw; + int sy = o / sw; + + GSVector4 sr = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size; + GSVector4 dr = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale; + + m_renderer->m_dev->StretchRect(dst->m_texture, sr, src->m_texture, dr); + + // TODO: this is quite a lot of StretchRect, do it with one Draw + } + } + } + else if(tw < tp) + { + // FIXME: timesplitters blurs the render target by blending itself over a couple of times + + if(tw == 256 && th == 128 && tp == 512 && (TEX0.TBP0 == 0 || TEX0.TBP0 == 0x00e00)) + { + return false; + } + } + + // width/height conversion + + GSVector2 scale = dst->m_texture->GetScale(); + + GSVector4 dr(0, 0, w, h); + + if(w > dstsize.x) + { + scale.x = (float)dstsize.x / tw; + dr.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x; + w = dstsize.x; + } + + if(h > dstsize.y) + { + scale.y = (float)dstsize.y / th; + dr.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y; + h = dstsize.y; + } + + GSVector4 sr(0, 0, w, h); + + GSTexture* st = src->m_texture ? src->m_texture : dst->m_texture; + GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h, false); + + if(!src->m_texture) + { + src->m_texture = dt; + } + + if((sr == dr).alltrue()) + { + m_renderer->m_dev->CopyRect(st, dt, GSVector4i(0, 0, w, h)); + } + else + { + sr.z /= st->GetWidth(); + sr.w /= st->GetHeight(); + + m_renderer->m_dev->StretchRect(st, sr, dt, dr); + } + + if(dt != src->m_texture) + { + m_renderer->m_dev->Recycle(src->m_texture); + + src->m_texture = dt; + } + + src->m_texture->SetScale(scale); + + switch(TEX0.PSM) + { + default: + ASSERT(0); + case PSM_PSMCT32: + src->m_fmt = GSTextureFX::FMT_32; + break; + case PSM_PSMCT24: + src->m_fmt = GSTextureFX::FMT_24; + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + src->m_fmt = GSTextureFX::FMT_16; + break; + case PSM_PSMT8H: + src->m_fmt = GSTextureFX::FMT_8H; + src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); + break; + case PSM_PSMT4HL: + src->m_fmt = GSTextureFX::FMT_4HL; + src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); + break; + case PSM_PSMT4HH: + src->m_fmt = GSTextureFX::FMT_4HH; + src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); + break; + } + + if(tmp != NULL) + { + m_renderer->m_dev->Recycle(dst->m_texture); + + dst->m_texture = tmp; + } + } + + if(src->m_texture == NULL) + { + ASSERT(0); + + return NULL; + } + + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; + + if(psm.pal > 0) + { + memcpy(src->m_clut, (const uint32*)m_renderer->m_mem.m_clut, psm.pal * sizeof(uint32)); + } + + m_src.Add(src, TEX0, m_renderer->m_context->offset.tex); + + return src; +} + +GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type) +{ + Target* t = new Target(m_renderer); + + t->m_TEX0 = TEX0; + + // FIXME: initial data should be unswizzled from local mem in Update() if dirty + + t->m_type = type; + + if(type == RenderTarget) + { + t->m_texture = m_renderer->m_dev->CreateRenderTarget(w, h, true); + + t->m_used = true; // FIXME + } + else if(type == DepthStencil) + { + t->m_texture = m_renderer->m_dev->CreateDepthStencil(w, h, true); + } + + if(t->m_texture == NULL) + { + ASSERT(0); + + return NULL; + } + + m_dst[type].push_front(t); + + return t; +} + // GSTextureCache::Surface GSTextureCache::Surface::Surface(GSRenderer* r) @@ -526,197 +782,6 @@ GSTextureCache::Source::~Source() _aligned_free(m_write.rect); } -bool GSTextureCache::Source::Create(bool paltex) -{ - m_TEX0 = m_renderer->m_context->TEX0; - m_TEXA = m_renderer->m_env.TEXA; - - ASSERT(m_texture == NULL); - - if(paltex && GSLocalMemory::m_psm[m_TEX0.PSM].pal > 0) - { - m_fmt = GSTextureFX::FMT_8; - - m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH, Get8bitFormat()); - m_palette = m_renderer->m_dev->CreateTexture(256, 1); - } - else - { - m_fmt = GSTextureFX::FMT_32; - - m_texture = m_renderer->m_dev->CreateTexture(1 << m_TEX0.TW, 1 << m_TEX0.TH); - } - - return m_texture != NULL; -} - -bool GSTextureCache::Source::Create(Target* dst) -{ - m_target = true; - - if(dst->m_type != RenderTarget) - { - // TODO - - return false; - } - - // TODO: clean up this mess - - dst->Update(); - - // m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1); - - m_TEX0 = m_renderer->m_context->TEX0; - m_TEXA = m_renderer->m_env.TEXA; - - int tw = 1 << m_TEX0.TW; - int th = 1 << m_TEX0.TH; - int tp = (int)m_TEX0.TW << 6; - - // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) - - int w = (int)(dst->m_texture->m_scale.x * tw); - int h = (int)(dst->m_texture->m_scale.y * th); - - GSVector2i dstsize = dst->m_texture->GetSize(); - - // pitch conversion - - if(dst->m_TEX0.TBW != m_TEX0.TBW) // && dst->m_TEX0.PSM == m_TEX0.PSM - { - // sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left) - - // ASSERT(dst->m_TEX0.TBW > m_TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO) - - ASSERT(m_texture == NULL); - - m_texture = m_renderer->m_dev->CreateRenderTarget(dstsize.x, dstsize.y); - - GSVector4 size = GSVector4(dstsize).xyxy(); - GSVector4 scale = GSVector4(dst->m_texture->m_scale).xyxy(); - - int bw = 64; - int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64; - - GSVector4i br(0, 0, bw, bh); - - int sw = (int)dst->m_TEX0.TBW << 6; - - int dw = (int)m_TEX0.TBW << 6; - int dh = 1 << m_TEX0.TH; - - if(sw != 0) - for(int dy = 0; dy < dh; dy += bh) - { - for(int dx = 0; dx < dw; dx += bw) - { - int o = dy * dw / bh + dx; - - int sx = o % sw; - int sy = o / sw; - - GSVector4 sr = GSVector4(GSVector4i(sx, sy).xyxy() + br) * scale / size; - GSVector4 dr = GSVector4(GSVector4i(dx, dy).xyxy() + br) * scale; - - m_renderer->m_dev->StretchRect(dst->m_texture, sr, m_texture, dr); - - // TODO: this is quite a lot of StretchRect, do it with one Draw - } - } - } - else if(tw < tp) - { - // FIXME: timesplitters blurs the render target by blending itself over a couple of times - - if(tw == 256 && th == 128 && tp == 512 && (m_TEX0.TBP0 == 0 || m_TEX0.TBP0 == 0x00e00)) - { - return false; - } - } - - // width/height conversion - - GSVector2 scale = dst->m_texture->m_scale; - - GSVector4 dr(0, 0, w, h); - - if(w > dstsize.x) - { - scale.x = (float)dstsize.x / tw; - dr.z = (float)dstsize.x * scale.x / dst->m_texture->m_scale.x; - w = dstsize.x; - } - - if(h > dstsize.y) - { - scale.y = (float)dstsize.y / th; - dr.w = (float)dstsize.y * scale.y / dst->m_texture->m_scale.y; - h = dstsize.y; - } - - GSVector4 sr(0, 0, w, h); - - GSTexture* st = m_texture ? m_texture : dst->m_texture; - GSTexture* dt = m_renderer->m_dev->CreateRenderTarget(w, h); - - if(!m_texture) - { - m_texture = dt; - } - - if((sr == dr).alltrue()) - { - m_renderer->m_dev->CopyRect(st, dt, GSVector4i(0, 0, w, h)); - } - else - { - sr.z /= st->m_size.x; - sr.w /= st->m_size.y; - - m_renderer->m_dev->StretchRect(st, sr, dt, dr); - } - - if(dt != m_texture) - { - m_renderer->m_dev->Recycle(m_texture); - - m_texture = dt; - } - - m_texture->m_scale = scale; - - switch(m_TEX0.PSM) - { - default: - ASSERT(0); - case PSM_PSMCT32: - m_fmt = GSTextureFX::FMT_32; - break; - case PSM_PSMCT24: - m_fmt = GSTextureFX::FMT_24; - break; - case PSM_PSMCT16: - case PSM_PSMCT16S: - m_fmt = GSTextureFX::FMT_16; - break; - case PSM_PSMT8H: - m_fmt = GSTextureFX::FMT_8H; - m_palette = m_renderer->m_dev->CreateTexture(256, 1); - break; - case PSM_PSMT4HL: - m_fmt = GSTextureFX::FMT_4HL; - m_palette = m_renderer->m_dev->CreateTexture(256, 1); - break; - case PSM_PSMT4HH: - m_fmt = GSTextureFX::FMT_4HH; - m_palette = m_renderer->m_dev->CreateTexture(256, 1); - break; - } - - return true; -} - void GSTextureCache::Source::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect) { __super::Update(); @@ -912,28 +977,6 @@ GSTextureCache::Target::Target(GSRenderer* r) m_valid = GSVector4i::zero(); } -bool GSTextureCache::Target::Create(int w, int h, int type) -{ - ASSERT(m_texture == NULL); - - // FIXME: initial data should be unswizzled from local mem in Update() if dirty - - m_type = type; - - if(type == RenderTarget) - { - m_texture = m_renderer->m_dev->CreateRenderTarget(w, h); - - m_used = true; - } - else if(type == DepthStencil) - { - m_texture = m_renderer->m_dev->CreateDepthStencil(w, h); - } - - return m_texture != NULL; -} - void GSTextureCache::Target::Update() { __super::Update(); @@ -980,7 +1023,7 @@ void GSTextureCache::Target::Update() // m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4); - m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->m_scale).xyxy()); + m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy()); m_renderer->m_dev->Recycle(t); } diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index af407edcaf..0b8af03050 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -46,8 +46,6 @@ public: virtual void Update(); }; - class Target; - class Source : public Surface { struct {GSVector4i* rect; uint32 count;} m_write; @@ -55,9 +53,6 @@ public: void Write(const GSVector4i& r); void Flush(uint32 count); - protected: - virtual int Get8bitFormat() = 0; - public: GSTexture* m_palette; bool m_initpalette; @@ -68,11 +63,9 @@ public: bool m_complete; public: - explicit Source(GSRenderer* renderer); + explicit Source(GSRenderer* r); virtual ~Source(); - virtual bool Create(bool paltex); - virtual bool Create(Target* dst); virtual void Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect); }; @@ -87,9 +80,7 @@ public: public: explicit Target(GSRenderer* r); - virtual bool Create(int w, int h, int type); virtual void Update(); - virtual void Read(const GSVector4i& r) = 0; }; protected: @@ -113,8 +104,14 @@ protected: list m_dst[2]; - virtual Source* CreateSource() = 0; - virtual Target* CreateTarget() = 0; + virtual Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL); + virtual Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type); + + virtual int Get8bitFormat() = 0; + + // TODO: virtual void Write(Source* s, const GSVector4i& r) = 0; + // TODO: virtual void Write(Target* t, const GSVector4i& r) = 0; + virtual void Read(Target* t, const GSVector4i& r) = 0; public: GSTextureCache(GSRenderer* r); @@ -123,7 +120,8 @@ public: void RemoveAll(); Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); - Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used, bool fb = false); + Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used); + Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h); void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r, bool target = true); void InvalidateLocalMem(const GSOffset* o, const GSVector4i& r); diff --git a/plugins/GSdx/GSTextureCache10.cpp b/plugins/GSdx/GSTextureCache10.cpp index b4819a17e8..cc71f28341 100644 --- a/plugins/GSdx/GSTextureCache10.cpp +++ b/plugins/GSdx/GSTextureCache10.cpp @@ -29,46 +29,42 @@ GSTextureCache10::GSTextureCache10(GSRenderer* r) { } -// Source10 - -// Target10 - -void GSTextureCache10::Target10::Read(const GSVector4i& r) +void GSTextureCache10::Read(Target* t, const GSVector4i& r) { - if(m_type != RenderTarget) + if(t->m_type != RenderTarget) { // TODO return; } - if(m_TEX0.PSM != PSM_PSMCT32 - && m_TEX0.PSM != PSM_PSMCT24 - && m_TEX0.PSM != PSM_PSMCT16 - && m_TEX0.PSM != PSM_PSMCT16S) + const GIFRegTEX0& TEX0 = t->m_TEX0; + + if(TEX0.PSM != PSM_PSMCT32 + && TEX0.PSM != PSM_PSMCT24 + && TEX0.PSM != PSM_PSMCT16 + && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } - if(!m_dirty.empty()) + if(!t->m_dirty.empty()) { return; } - // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, m_TEX0.TBP0); - - // m_renderer->m_perfmon.Put(GSPerfMon::ReadRT, 1); + // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(m_texture->m_scale).xyxy() / GSVector4(m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; + DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(m_texture, src, w, h, format)) + if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format)) { GSTexture::GSMap m; @@ -76,9 +72,9 @@ void GSTextureCache10::Target10::Read(const GSVector4i& r) { // TODO: block level write - GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); + GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - switch(m_TEX0.PSM) + switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r); diff --git a/plugins/GSdx/GSTextureCache10.h b/plugins/GSdx/GSTextureCache10.h index 89ae14f438..199494ad7d 100644 --- a/plugins/GSdx/GSTextureCache10.h +++ b/plugins/GSdx/GSTextureCache10.h @@ -26,26 +26,10 @@ class GSTextureCache10 : public GSTextureCache { - class Source10 : public Source - { - protected: - int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} - - public: - explicit Source10(GSRenderer* r) : Source(r) {} - }; - - class Target10 : public Target - { - public: - explicit Target10(GSRenderer* r) : Target(r) {} - - void Read(const GSVector4i& r); - }; - protected: - Source* CreateSource() {return new Source10(m_renderer);} - Target* CreateTarget() {return new Target10(m_renderer);} + int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} + + void Read(Target* t, const GSVector4i& r); public: GSTextureCache10(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureCache11.cpp b/plugins/GSdx/GSTextureCache11.cpp index 99e1e70ff8..9fd664d026 100644 --- a/plugins/GSdx/GSTextureCache11.cpp +++ b/plugins/GSdx/GSTextureCache11.cpp @@ -29,46 +29,42 @@ GSTextureCache11::GSTextureCache11(GSRenderer* r) { } -// Source11 - -// Target11 - -void GSTextureCache11::Target11::Read(const GSVector4i& r) +void GSTextureCache11::Read(Target* t, const GSVector4i& r) { - if(m_type != RenderTarget) + if(t->m_type != RenderTarget) { // TODO return; } - if(m_TEX0.PSM != PSM_PSMCT32 - && m_TEX0.PSM != PSM_PSMCT24 - && m_TEX0.PSM != PSM_PSMCT16 - && m_TEX0.PSM != PSM_PSMCT16S) + const GIFRegTEX0& TEX0 = t->m_TEX0; + + if(TEX0.PSM != PSM_PSMCT32 + && TEX0.PSM != PSM_PSMCT24 + && TEX0.PSM != PSM_PSMCT16 + && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } - if(!m_dirty.empty()) + if(!t->m_dirty.empty()) { return; } - // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, m_TEX0.TBP0); - - // m_renderer->m_perfmon.Put(GSPerfMon::ReadRT, 1); + // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(m_texture->m_scale).xyxy() / GSVector4(m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; + DXGI_FORMAT format = TEX0.PSM == PSM_PSMCT16 || TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM; - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(m_texture, src, w, h, format)) + if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h, format)) { GSTexture::GSMap m; @@ -76,9 +72,9 @@ void GSTextureCache11::Target11::Read(const GSVector4i& r) { // TODO: block level write - GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); + GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - switch(m_TEX0.PSM) + switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r); diff --git a/plugins/GSdx/GSTextureCache11.h b/plugins/GSdx/GSTextureCache11.h index 4f86f98221..f4da837984 100644 --- a/plugins/GSdx/GSTextureCache11.h +++ b/plugins/GSdx/GSTextureCache11.h @@ -26,26 +26,10 @@ class GSTextureCache11 : public GSTextureCache { - class Source11 : public Source - { - protected: - int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} - - public: - explicit Source11(GSRenderer* r) : Source(r) {} - }; - - class Target11 : public Target - { - public: - explicit Target11(GSRenderer* r) : Target(r) {} - - void Read(const GSVector4i& r); - }; - protected: - Source* CreateSource() {return new Source11(m_renderer);} - Target* CreateTarget() {return new Target11(m_renderer);} + int Get8bitFormat() {return DXGI_FORMAT_A8_UNORM;} + + void Read(Target* t, const GSVector4i& r); public: GSTextureCache11(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureCache9.cpp b/plugins/GSdx/GSTextureCache9.cpp index ad9f2ddd18..571db7a759 100644 --- a/plugins/GSdx/GSTextureCache9.cpp +++ b/plugins/GSdx/GSTextureCache9.cpp @@ -29,44 +29,40 @@ GSTextureCache9::GSTextureCache9(GSRenderer* r) { } -// Source9 - -// Target9 - -void GSTextureCache9::Target9::Read(const GSVector4i& r) +void GSTextureCache9::Read(Target* t, const GSVector4i& r) { - if(m_type != RenderTarget) + if(t->m_type != RenderTarget) { // TODO return; } - if(m_TEX0.PSM != PSM_PSMCT32 - && m_TEX0.PSM != PSM_PSMCT24 - && m_TEX0.PSM != PSM_PSMCT16 - && m_TEX0.PSM != PSM_PSMCT16S) + const GIFRegTEX0& TEX0 = t->m_TEX0; + + if(TEX0.PSM != PSM_PSMCT32 + && TEX0.PSM != PSM_PSMCT24 + && TEX0.PSM != PSM_PSMCT16 + && TEX0.PSM != PSM_PSMCT16S) { //ASSERT(0); return; } - if(!m_dirty.empty()) + if(!t->m_dirty.empty()) { return; } - // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, m_TEX0.TBP0); - - // m_renderer->m_perfmon.Put(GSPerfMon::ReadRT, 1); + // printf("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n", r.left, r.top, r.right, r.bottom, TEX0.TBP0); int w = r.width(); int h = r.height(); - GSVector4 src = GSVector4(r) * GSVector4(m_texture->m_scale).xyxy() / GSVector4(m_texture->GetSize()).xyxy(); + GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); - if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(m_texture, src, w, h)) + if(GSTexture* offscreen = m_renderer->m_dev->CopyOffscreen(t->m_texture, src, w, h)) { GSTexture::GSMap m; @@ -74,9 +70,9 @@ void GSTextureCache9::Target9::Read(const GSVector4i& r) { // TODO: block level write - GSOffset* o = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); + GSOffset* o = m_renderer->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM); - switch(m_TEX0.PSM) + switch(TEX0.PSM) { case PSM_PSMCT32: m_renderer->m_mem.WritePixel32(m.bits, m.pitch, o, r); diff --git a/plugins/GSdx/GSTextureCache9.h b/plugins/GSdx/GSTextureCache9.h index 30fe108d35..20437f5ce8 100644 --- a/plugins/GSdx/GSTextureCache9.h +++ b/plugins/GSdx/GSTextureCache9.h @@ -26,26 +26,10 @@ class GSTextureCache9 : public GSTextureCache { - class Source9 : public Source - { - protected: - int Get8bitFormat() {return D3DFMT_A8;} - - public: - explicit Source9(GSRenderer* r) : Source(r) {} - }; - - class Target9 : public Target - { - public: - explicit Target9(GSRenderer* r) : Target(r) {} - - void Read(const GSVector4i& r); - }; - protected: - Source* CreateSource() {return new Source9(m_renderer);} - Target* CreateTarget() {return new Target9(m_renderer);} + int Get8bitFormat() {return D3DFMT_A8;} + + void Read(Target* t, const GSVector4i& r); public: GSTextureCache9(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureCacheOGL.cpp b/plugins/GSdx/GSTextureCacheOGL.cpp index b9a8c06cb7..05dc1c4478 100644 --- a/plugins/GSdx/GSTextureCacheOGL.cpp +++ b/plugins/GSdx/GSTextureCacheOGL.cpp @@ -28,12 +28,3 @@ GSTextureCacheOGL::GSTextureCacheOGL(GSRenderer* r) : GSTextureCache(r) { } - -// SourceOGL - -// TargetOGL - -void GSTextureCacheOGL::TargetOGL::Read(const GSVector4i& r) -{ - // TODO -} diff --git a/plugins/GSdx/GSTextureCacheOGL.h b/plugins/GSdx/GSTextureCacheOGL.h index 07c4913e7a..49e9c69419 100644 --- a/plugins/GSdx/GSTextureCacheOGL.h +++ b/plugins/GSdx/GSTextureCacheOGL.h @@ -26,26 +26,10 @@ class GSTextureCacheOGL : public GSTextureCache { - class SourceOGL : public Source - { - protected: - int Get8bitFormat() {return 0;} // TODO - - public: - explicit SourceOGL(GSRenderer* r) : Source(r) {} - }; - - class TargetOGL : public Target - { - public: - explicit TargetOGL(GSRenderer* r) : Target(r) {} - - void Read(const GSVector4i& r); - }; - protected: - Source* CreateSource() {return new SourceOGL(m_renderer);} - Target* CreateTarget() {return new TargetOGL(m_renderer);} + int Get8bitFormat() {return 0;} // TODO + + void Read(Target* t, const GSVector4i& r) {} // TODO public: GSTextureCacheOGL(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index 49c4f7b2da..5b109e0abd 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -25,8 +25,6 @@ GSTextureOGL::GSTextureOGL(GLuint texture, int type, int width, int height, int format) : m_texture(texture) - , m_type(type) - , m_format(format) { m_size.x = width; m_size.y = height; @@ -34,6 +32,10 @@ GSTextureOGL::GSTextureOGL(GLuint texture, int type, int width, int height, int // TODO: offscreen type should be just a memory array, also returned in Map glGenBuffers(1, &m_pbo); GSDeviceOGL::CheckError(); + + m_type = type; + + m_format = format; } GSTextureOGL::~GSTextureOGL() @@ -57,16 +59,6 @@ GSTextureOGL::~GSTextureOGL() } } -int GSTextureOGL::GetType() const -{ - return m_type; -} - -int GSTextureOGL::GetFormat() const -{ - return m_format; -} - bool GSTextureOGL::Update(const GSVector4i& r, const void* data, int pitch) { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, m_pbo); GSDeviceOGL::CheckError(); diff --git a/plugins/GSdx/GSTextureOGL.h b/plugins/GSdx/GSTextureOGL.h index 5a992223d1..f6169cd4fc 100644 --- a/plugins/GSdx/GSTextureOGL.h +++ b/plugins/GSdx/GSTextureOGL.h @@ -35,9 +35,6 @@ public: GSTextureOGL(GLuint texture, int type, int width, int height, int format = 0); virtual ~GSTextureOGL(); - int GetType() const; - int GetFormat() const; - bool Update(const GSVector4i& r, const void* data, int pitch); bool Map(GSMap& m, const GSVector4i* r); void Unmap(); diff --git a/plugins/GSdx/GSdx_vs2010.vcxproj b/plugins/GSdx/GSdx_vs2010.vcxproj index e10b7f43a0..9ad2e1e88f 100644 --- a/plugins/GSdx/GSdx_vs2010.vcxproj +++ b/plugins/GSdx/GSdx_vs2010.vcxproj @@ -138,12 +138,10 @@ DynamicLibrary - false MultiByte DynamicLibrary - false MultiByte true @@ -160,7 +158,6 @@ DynamicLibrary - false MultiByte @@ -171,7 +168,6 @@ DynamicLibrary - false MultiByte true @@ -183,7 +179,6 @@ DynamicLibrary - false MultiByte true @@ -195,7 +190,6 @@ DynamicLibrary - false MultiByte @@ -205,7 +199,6 @@ DynamicLibrary - false MultiByte @@ -215,7 +208,6 @@ DynamicLibrary - false MultiByte true diff --git a/plugins/GSdx/vsprops/common.props b/plugins/GSdx/vsprops/common.props index f2b3e7153f..86d9f38569 100644 --- a/plugins/GSdx/vsprops/common.props +++ b/plugins/GSdx/vsprops/common.props @@ -1,7 +1,7 @@  <_PropertySheetDisplayName>common - $(SolutionDir)\bin\$(PcsxSubsection)\ + $(SolutionDir)bin\$(PcsxSubsection)\ $(PlatformName)\$(Configuration)\ $(ProjectName)-$(SSEtype) diff --git a/plugins/zeropad/keyboard.cpp b/plugins/zeropad/keyboard.cpp index 855bf89a1b..2384b927a1 100644 --- a/plugins/zeropad/keyboard.cpp +++ b/plugins/zeropad/keyboard.cpp @@ -31,16 +31,20 @@ return -1; } +#ifdef _WINDOWS_ +WORD toCharTemp; +#endif + char* KeysymToChar(int keysym) { - #ifdef __LINUX__ +#ifdef __LINUX__ return XKeysymToString(keysym); #else - LPWORD temp; - - ToAscii((UINT) keysym, NULL, NULL, temp, NULL); - return (char*)temp; - #endif + // fixed this to return *valid* results, and not some pointer + // to the fourth oblivion-- air + ToAscii((UINT) keysym, NULL, NULL, &toCharTemp, NULL); + return (char*)(&toCharTemp); +#endif } void PollForKeyboardInput(int pad)