mirror of https://github.com/PCSX2/pcsx2.git
GSdx-ogl: LINUX only. sync from trunk (5068:5090)
git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@5091 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
commit
4a00648d9f
|
@ -57,7 +57,11 @@ include(SelectPcsx2Plugins)
|
|||
# add additional project-wide include directories
|
||||
include_directories(${PROJECT_SOURCE_DIR}/common/include
|
||||
${PROJECT_SOURCE_DIR}/common/include/Utilities
|
||||
${PROJECT_SOURCE_DIR}/common/include/x86emitter)
|
||||
${PROJECT_SOURCE_DIR}/common/include/x86emitter
|
||||
# WORKAROUND Some issue with multiarch on Debian/Ubuntu
|
||||
/usr/include/i386-linux-gnu
|
||||
/usr/include/x86_64-linux-gnu
|
||||
)
|
||||
|
||||
# make the translation
|
||||
if(EXISTS "${PROJECT_SOURCE_DIR}/locales")
|
||||
|
|
|
@ -5649,6 +5649,7 @@ Serial = SLUS-20911
|
|||
Name = Shin Megami Tensei - Nocturne
|
||||
Region = NTSC-U
|
||||
Compat = 5
|
||||
eeRoundMode = 0 // Ladder glitch in "Assembly of Nihilo B11" level
|
||||
---------------------------------------------
|
||||
Serial = SLUS-20912
|
||||
Name = Superbikes TT
|
||||
|
@ -10338,6 +10339,7 @@ Region = NTSC-U
|
|||
Serial = SLUS-28045
|
||||
Name = Shin Megami Tensei - Nocturne [Trade Demo]
|
||||
Region = NTSC-U
|
||||
eeRoundMode = 0 // Ladder glitch in "Assembly of Nihilo B11" level
|
||||
---------------------------------------------
|
||||
Serial = SLUS-28046
|
||||
Name = Guilty Gear Isuka [Trade Demo]
|
||||
|
@ -13611,6 +13613,7 @@ Region = NTSC-K
|
|||
Serial = SLKA-25160
|
||||
Name = Shin Megami Tensei III - Nocturne Maniax
|
||||
Region = NTSC-K
|
||||
eeRoundMode = 0 // Ladder glitch in "Assembly of Nihilo B11" level
|
||||
---------------------------------------------
|
||||
Serial = SLKA-25165
|
||||
Name = Mobile Suit Gundam - Seed Destiny - Rengou vs. Z.A.F.T. II Plus
|
||||
|
@ -17250,10 +17253,12 @@ Region = NTSC-J
|
|||
Serial = SLPM-65241
|
||||
Name = Shin Megami Tensei 3 - Nocturne [Limited Edition]
|
||||
Region = NTSC-J
|
||||
eeRoundMode = 0 // Ladder glitch in "Assembly of Nihilo B11" level
|
||||
---------------------------------------------
|
||||
Serial = SLPM-65242
|
||||
Name = Shin Megami Tensei 3 - Nocturne
|
||||
Region = NTSC-J
|
||||
eeRoundMode = 0 // Ladder glitch in "Assembly of Nihilo B11" level
|
||||
---------------------------------------------
|
||||
Serial = SLPM-65243
|
||||
Name = Densha de Go! Professional 2
|
||||
|
@ -18019,11 +18024,13 @@ Region = NTSC-J
|
|||
Serial = SLPM-65461
|
||||
Name = Shin Megami Tensei 3 - Nocturne - Maniacs
|
||||
Region = NTSC-J
|
||||
eeRoundMode = 0 // Ladder glitch in "Assembly of Nihilo B11" level
|
||||
---------------------------------------------
|
||||
Serial = SLPM-65462
|
||||
Name = Shin Megami Tensei 3 - Nocturne - Maniacs
|
||||
Region = NTSC-J
|
||||
Compat = 5
|
||||
eeRoundMode = 0 // Ladder glitch in "Assembly of Nihilo B11" level
|
||||
---------------------------------------------
|
||||
Serial = SLPM-65463
|
||||
Name = Rocky
|
||||
|
@ -23700,6 +23707,7 @@ Region = NTSC-J
|
|||
Serial = SLPM-74205
|
||||
Name = Shin Megami Tensei III - Nocturne [PlayStation 2 The Best]
|
||||
Region = NTSC-J
|
||||
eeRoundMode = 0 // Ladder glitch in "Assembly of Nihilo B11" level
|
||||
---------------------------------------------
|
||||
Serial = SLPM-74206
|
||||
Name = Onimusha [PlayStation 2 The Best]
|
||||
|
|
|
@ -169,6 +169,11 @@ static wxLanguage i18n_FallbackToAnotherLang( wxLanguage wxLangId )
|
|||
case wxLANGUAGE_CHINESE_SINGAPORE : return wxLANGUAGE_CHINESE_SIMPLIFIED;
|
||||
|
||||
case wxLANGUAGE_SAMI :
|
||||
// The correct fallback for Sami would be
|
||||
// however, currently wxWidgets (2.9.3) only supports wxLANGUAGE_SAMI.
|
||||
// case: wxLANGUAGE_SAMI_LULE_SWEDEN :
|
||||
// case: wxLANGUAGE_SAMI_NORTHERN_SWEDEN :
|
||||
// case: wxLANGUAGE_SAMI_SOUTHERN_SWEDEN :
|
||||
case wxLANGUAGE_SWEDISH_FINLAND : return wxLANGUAGE_SWEDISH;
|
||||
|
||||
case wxLANGUAGE_PORTUGUESE : return wxLANGUAGE_PORTUGUESE_BRAZILIAN;
|
||||
|
@ -178,8 +183,30 @@ static wxLanguage i18n_FallbackToAnotherLang( wxLanguage wxLangId )
|
|||
case wxLANGUAGE_GERMAN_BELGIUM :
|
||||
case wxLANGUAGE_GERMAN_LIECHTENSTEIN :
|
||||
case wxLANGUAGE_GERMAN_LUXEMBOURG :
|
||||
// Currently wxWidgets (2.9.3) doesn't support Sorbian.
|
||||
// case wxLANGUAGE_LOWER_SORBIAN :
|
||||
// case wxLANGUAGE_UPPER_SORBIAN :
|
||||
case wxLANGUAGE_GERMAN_SWISS : return wxLANGUAGE_GERMAN;
|
||||
|
||||
case wxLANGUAGE_SPANISH_ARGENTINA:
|
||||
case wxLANGUAGE_SPANISH_BOLIVIA:
|
||||
case wxLANGUAGE_SPANISH_CHILE:
|
||||
case wxLANGUAGE_SPANISH_COLOMBIA:
|
||||
case wxLANGUAGE_SPANISH_COSTA_RICA:
|
||||
case wxLANGUAGE_SPANISH_DOMINICAN_REPUBLIC:
|
||||
case wxLANGUAGE_SPANISH_ECUADOR:
|
||||
case wxLANGUAGE_SPANISH_EL_SALVADOR:
|
||||
case wxLANGUAGE_SPANISH_GUATEMALA:
|
||||
case wxLANGUAGE_SPANISH_HONDURAS:
|
||||
case wxLANGUAGE_SPANISH_MEXICAN:
|
||||
case wxLANGUAGE_SPANISH_NICARAGUA:
|
||||
case wxLANGUAGE_SPANISH_PANAMA:
|
||||
case wxLANGUAGE_SPANISH_PARAGUAY:
|
||||
case wxLANGUAGE_SPANISH_PERU:
|
||||
case wxLANGUAGE_SPANISH_PUERTO_RICO:
|
||||
case wxLANGUAGE_SPANISH_URUGUAY:
|
||||
case wxLANGUAGE_SPANISH_VENEZUELA: return wxLANGUAGE_SPANISH_MODERN;
|
||||
|
||||
case wxLANGUAGE_ITALIAN_SWISS : return wxLANGUAGE_ITALIAN;
|
||||
|
||||
default : break;
|
||||
|
|
|
@ -107,10 +107,6 @@ set(GSdxSources
|
|||
GSUtil.cpp
|
||||
GSVector.cpp
|
||||
GSVertexTrace.cpp
|
||||
GSVertexTrace.x64.avx.cpp
|
||||
GSVertexTrace.x86.cpp
|
||||
GSVertexTrace.x86.avx.cpp
|
||||
GSVertexTrace.x64.cpp
|
||||
GSWnd.cpp
|
||||
GSdx.cpp
|
||||
stdafx.cpp
|
||||
|
|
|
@ -28,8 +28,8 @@ const GSVector4i GPULocalMemory::m_xxbx(0x00007c00);
|
|||
const GSVector4i GPULocalMemory::m_xgxx(0x000003e0);
|
||||
const GSVector4i GPULocalMemory::m_rxxx(0x0000001f);
|
||||
|
||||
#define VM_SIZE ((1 << (12 + 11)) * sizeof(uint16))
|
||||
#define VM_ALLOC_SIZE (VM_SIZE * 2)
|
||||
#define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16))
|
||||
#define VM_ALLOC_SIZE (VM_REAL_SIZE * 2)
|
||||
#define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32)
|
||||
|
||||
GPULocalMemory::GPULocalMemory()
|
||||
|
@ -39,7 +39,7 @@ GPULocalMemory::GPULocalMemory()
|
|||
|
||||
//
|
||||
|
||||
int size = VM_SIZE;
|
||||
int size = VM_REAL_SIZE;
|
||||
|
||||
m_vm = (uint16*)vmalloc(VM_ALLOC_SIZE, false);
|
||||
|
||||
|
|
|
@ -214,7 +214,7 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
|
|||
s_gs = NULL;
|
||||
}
|
||||
|
||||
if(renderer == 12)
|
||||
if(renderer == 15)
|
||||
{
|
||||
#ifdef _WINDOWS
|
||||
|
||||
|
@ -225,12 +225,11 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if(s_gs == NULL)
|
||||
{
|
||||
s_gs = new GSRendererCS();
|
||||
delete s_gs;
|
||||
|
||||
s_renderer = renderer;
|
||||
}
|
||||
s_gs = new GSRendererCS();
|
||||
|
||||
s_renderer = renderer;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -90,6 +90,12 @@ enum GIF_REG
|
|||
GIF_REG_NOP = 0x0f,
|
||||
};
|
||||
|
||||
enum GIF_REG_COMPLEX
|
||||
{
|
||||
GIF_REG_STQRGBAXYZF2 = 0x00,
|
||||
GIF_REG_STQRGBAXYZ2 = 0x01,
|
||||
};
|
||||
|
||||
enum GIF_A_D_REG
|
||||
{
|
||||
GIF_A_D_REG_PRIM = 0x00,
|
||||
|
@ -821,7 +827,16 @@ union
|
|||
};
|
||||
};
|
||||
REG_END2
|
||||
__forceinline bool IsRepeating() {return (1 << TW) > (int)(TBW << 6) || (PSM == PSM_PSMT8 || PSM == PSM_PSMT4) && TBW == 1;}
|
||||
__forceinline bool IsRepeating()
|
||||
{
|
||||
if(TBW < 2)
|
||||
{
|
||||
if(PSM == PSM_PSMT8) return TW > 7 || TH > 6;
|
||||
if(PSM == PSM_PSMT4) return TW > 7 || TH > 7;
|
||||
}
|
||||
|
||||
return (TBW << 6) < (1u << TW);
|
||||
}
|
||||
REG_END2
|
||||
|
||||
REG64_(GIFReg, TEX1)
|
||||
|
@ -1090,21 +1105,77 @@ REG_SET_END
|
|||
__aligned(struct, 32) GIFPath
|
||||
{
|
||||
GIFTag tag;
|
||||
uint32 reg;
|
||||
uint32 nreg;
|
||||
uint32 nloop;
|
||||
uint32 adonly;
|
||||
uint32 nreg;
|
||||
uint32 reg;
|
||||
uint32 type;
|
||||
GSVector4i regs;
|
||||
|
||||
void SetTag(const void* mem)
|
||||
enum {TYPE_UNKNOWN, TYPE_ADONLY, TYPE_STQRGBAXYZF2, TYPE_STQRGBAXYZ2};
|
||||
|
||||
__forceinline void SetTag(const void* mem)
|
||||
{
|
||||
GSVector4i v = GSVector4i::load<false>(mem);
|
||||
GSVector4i::store<true>(&tag, v);
|
||||
const GIFTag* RESTRICT src = (const GIFTag*)mem;
|
||||
|
||||
// the compiler has a hard time not reloading every time a field of src is accessed
|
||||
|
||||
uint32 a = src->u32[0];
|
||||
uint32 b = src->u32[1];
|
||||
|
||||
tag.u32[0] = a;
|
||||
tag.u32[1] = b;
|
||||
|
||||
nloop = a & 0x7fff;
|
||||
|
||||
if(nloop == 0) return;
|
||||
|
||||
GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though
|
||||
|
||||
nreg = (b & 0xf0000000) ? (b >> 28) : 16; // src->NREG
|
||||
regs = v.upl8(v >> 4) & GSVector4i::x0f(nreg);
|
||||
reg = 0;
|
||||
regs = v.uph8(v >> 4) & 0x0f0f0f0f;
|
||||
nreg = tag.NREG ? tag.NREG : 16;
|
||||
nloop = tag.NLOOP;
|
||||
adonly = regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1;
|
||||
|
||||
type = TYPE_UNKNOWN;
|
||||
|
||||
if(tag.FLG == GIF_FLG_PACKED)
|
||||
{
|
||||
if(regs.eq8(GSVector4i(0x0e0e0e0e)).mask() == (1 << nreg) - 1)
|
||||
{
|
||||
type = TYPE_ADONLY;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(nreg)
|
||||
{
|
||||
case 1: break;
|
||||
case 2: break;
|
||||
case 3:
|
||||
if(regs.u32[0] == 0x00040102) type = TYPE_STQRGBAXYZF2; // many games, TODO: formats mixed with NOPs (xeno2: 040f010f02, 04010f020f, mgs3: 04010f0f02, 0401020f0f, 04010f020f)
|
||||
if(regs.u32[0] == 0x00050102) type = TYPE_STQRGBAXYZ2; // GoW (has other crazy formats, like ...030503050103)
|
||||
// TODO: common types with UV instead
|
||||
break;
|
||||
case 4: break;
|
||||
case 5: break;
|
||||
case 6: break;
|
||||
case 7: break;
|
||||
case 8: break;
|
||||
case 9:
|
||||
if(regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x00000004) {type = TYPE_STQRGBAXYZF2; nreg = 3; nloop *= 3;} // ffx
|
||||
break;
|
||||
case 10: break;
|
||||
case 11: break;
|
||||
case 12:
|
||||
if(regs.u32[0] == 0x02040102 && regs.u32[1] == 0x01020401 && regs.u32[2] == 0x04010204) {type = TYPE_STQRGBAXYZF2; nreg = 3; nloop *= 4;} // dq8 (not many, mostly 040102)
|
||||
break;
|
||||
case 13: break;
|
||||
case 14: break;
|
||||
case 15: break;
|
||||
case 16: break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline uint8 GetReg()
|
||||
|
|
|
@ -884,7 +884,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs
|
||||
template<bool AEM> static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs
|
||||
{
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
|
@ -895,44 +895,36 @@ public:
|
|||
GSVector4i bm = m_xxbx;
|
||||
GSVector4i l, h;
|
||||
|
||||
if(TEXA.AEM)
|
||||
for(int i = 0; i < 8; i++, dst += dstpitch)
|
||||
{
|
||||
for(int i = 0; i < 8; i++, dst += dstpitch)
|
||||
GSVector4i v0 = s[i * 2 + 0];
|
||||
|
||||
l = v0.upl16(v0);
|
||||
h = v0.uph16(v0);
|
||||
|
||||
if(AEM)
|
||||
{
|
||||
GSVector4i v0 = s[i * 2 + 0];
|
||||
|
||||
l = v0.upl16(v0);
|
||||
h = v0.uph16(v0);
|
||||
|
||||
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero());
|
||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
|
||||
}
|
||||
else
|
||||
{
|
||||
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
|
||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
|
||||
}
|
||||
|
||||
GSVector4i v1 = s[i * 2 + 1];
|
||||
GSVector4i v1 = s[i * 2 + 1];
|
||||
|
||||
l = v1.upl16(v1);
|
||||
h = v1.uph16(v1);
|
||||
l = v1.upl16(v1);
|
||||
h = v1.uph16(v1);
|
||||
|
||||
if(AEM)
|
||||
{
|
||||
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero());
|
||||
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < 8; i++, dst += dstpitch)
|
||||
else
|
||||
{
|
||||
GSVector4i v0 = s[i * 2 + 0];
|
||||
|
||||
l = v0.upl16(v0);
|
||||
h = v0.uph16(v0);
|
||||
|
||||
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
|
||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
|
||||
|
||||
GSVector4i v1 = s[i * 2 + 1];
|
||||
|
||||
l = v1.upl16(v1);
|
||||
h = v1.uph16(v1);
|
||||
|
||||
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
|
||||
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
|
||||
}
|
||||
|
@ -1432,6 +1424,56 @@ public:
|
|||
}
|
||||
}
|
||||
}
|
||||
template<bool AEM> __forceinline static GSVector4i Expand16to32(const GSVector4i& c, const GSVector4i& TA0, const GSVector4i& TA1)
|
||||
{
|
||||
return ((c & m_rxxx) << 3) | ((c & m_xgxx) << 6) | ((c & m_xxbx) << 9) | (AEM ? TA0.blend8(TA1, c.sra16(15)).andnot(c == GSVector4i::zero()) : TA0.blend(TA1, c.sra16(15)));
|
||||
}
|
||||
|
||||
template<bool AEM> __forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
#if 0 // not faster
|
||||
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
GSVector4i TA0(TEXA.TA0 << 24);
|
||||
GSVector4i TA1(TEXA.TA1 << 24);
|
||||
|
||||
for(int i = 0; i < 4; i++, dst += dstpitch * 2)
|
||||
{
|
||||
GSVector4i v0 = s[i * 4 + 0];
|
||||
GSVector4i v1 = s[i * 4 + 1];
|
||||
GSVector4i v2 = s[i * 4 + 2];
|
||||
GSVector4i v3 = s[i * 4 + 3];
|
||||
|
||||
GSVector4i::sw16(v0, v1, v2, v3);
|
||||
GSVector4i::sw32(v0, v1, v2, v3);
|
||||
GSVector4i::sw16(v0, v2, v1, v3);
|
||||
|
||||
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
|
||||
|
||||
d0[0] = Expand16to32<AEM>(v0.upl16(v0), TA0, TA1);
|
||||
d0[1] = Expand16to32<AEM>(v0.uph16(v0), TA0, TA1);
|
||||
d0[2] = Expand16to32<AEM>(v1.upl16(v1), TA0, TA1);
|
||||
d0[3] = Expand16to32<AEM>(v1.uph16(v1), TA0, TA1);
|
||||
|
||||
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
|
||||
|
||||
d1[0] = Expand16to32<AEM>(v2.upl16(v2), TA0, TA1);
|
||||
d1[1] = Expand16to32<AEM>(v2.uph16(v2), TA0, TA1);
|
||||
d1[2] = Expand16to32<AEM>(v3.upl16(v3), TA0, TA1);
|
||||
d1[3] = Expand16to32<AEM>(v3.uph16(v3), TA0, TA1);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16<AEM>(block, dst, dstpitch, TEXA);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static void ReadAndExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
||||
{
|
||||
|
|
|
@ -389,6 +389,8 @@ void GSClut::GetAlphaMinMax32(int& amin, int& amax)
|
|||
|
||||
void GSClut::WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut)
|
||||
{
|
||||
// 4 blocks
|
||||
|
||||
for(int i = 0; i < 64; i += 16)
|
||||
{
|
||||
WriteCLUT_T32_I4_CSM1(&src[i + 0], &clut[i * 2 + 0]);
|
||||
|
@ -400,6 +402,8 @@ void GSClut::WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT
|
|||
|
||||
__forceinline void GSClut::WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut)
|
||||
{
|
||||
// 1 block
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)clut;
|
||||
|
||||
|
@ -420,6 +424,8 @@ __forceinline void GSClut::WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uin
|
|||
|
||||
void GSClut::WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut)
|
||||
{
|
||||
// 2 blocks
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)clut;
|
||||
|
||||
|
@ -443,6 +449,8 @@ void GSClut::WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT
|
|||
|
||||
__forceinline void GSClut::WriteCLUT_T16_I4_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut)
|
||||
{
|
||||
// 1 block (half)
|
||||
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
clut[i] = src[clutTableT16I4[i]];
|
||||
|
|
|
@ -103,6 +103,7 @@ public:
|
|||
virtual void BeginScene() {}
|
||||
virtual void DrawPrimitive() {};
|
||||
virtual void DrawIndexedPrimitive() {}
|
||||
virtual void DrawIndexedPrimitive(int offset, int count) {}
|
||||
virtual void EndScene();
|
||||
|
||||
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}
|
||||
|
|
|
@ -98,8 +98,6 @@ bool GSDevice11::Create(GSWnd* wnd)
|
|||
hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, levels, countof(levels), D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &level, &m_ctx);
|
||||
// hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_REFERENCE, NULL, flags, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &level, &m_ctx);
|
||||
|
||||
//return false;
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
if(!SetFeatureLevel(level, true))
|
||||
|
@ -360,6 +358,13 @@ void GSDevice11::DrawIndexedPrimitive()
|
|||
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
|
||||
}
|
||||
|
||||
void GSDevice11::DrawIndexedPrimitive(int offset, int count)
|
||||
{
|
||||
ASSERT(offset + count <= m_index.count);
|
||||
|
||||
m_ctx->DrawIndexed(count, m_index.start + offset, m_vertex.start);
|
||||
}
|
||||
|
||||
void GSDevice11::Dispatch(uint32 x, uint32 y, uint32 z)
|
||||
{
|
||||
m_ctx->Dispatch(x, y, z);
|
||||
|
@ -720,6 +725,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
|
|||
}
|
||||
|
||||
void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
|
||||
{
|
||||
void* ptr = NULL;
|
||||
|
||||
if(IAMapVertexBuffer(&ptr, stride, count))
|
||||
{
|
||||
GSVector4i::storent(ptr, vertex, count * stride);
|
||||
|
||||
IAUnmapVertexBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
|
||||
{
|
||||
ASSERT(m_vertex.count == 0);
|
||||
|
||||
|
@ -729,7 +746,6 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
|||
m_vb = NULL;
|
||||
|
||||
m_vertex.start = 0;
|
||||
m_vertex.count = 0;
|
||||
m_vertex.limit = std::max<int>(count * 3 / 2, 11000);
|
||||
}
|
||||
|
||||
|
@ -748,7 +764,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
|||
|
||||
hr = m_dev->CreateBuffer(&bd, NULL, &m_vb);
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
if(FAILED(hr)) return false;
|
||||
}
|
||||
|
||||
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
||||
|
@ -762,17 +778,24 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
|||
|
||||
D3D11_MAPPED_SUBRESOURCE m;
|
||||
|
||||
if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m)))
|
||||
if(FAILED(m_ctx->Map(m_vb, 0, type, 0, &m)))
|
||||
{
|
||||
GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride);
|
||||
|
||||
m_ctx->Unmap(m_vb, 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
*vertex = (uint8*)m.pData + m_vertex.start * stride;
|
||||
|
||||
m_vertex.count = count;
|
||||
m_vertex.stride = stride;
|
||||
|
||||
IASetVertexBuffer(m_vb, stride);
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSDevice11::IAUnmapVertexBuffer()
|
||||
{
|
||||
m_ctx->Unmap(m_vb, 0);
|
||||
|
||||
IASetVertexBuffer(m_vb, m_vertex.stride);
|
||||
}
|
||||
|
||||
void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
|
||||
|
@ -798,7 +821,7 @@ void GSDevice11::IASetIndexBuffer(const void* index, size_t count)
|
|||
m_ib_old = m_ib;
|
||||
m_ib = NULL;
|
||||
|
||||
m_index.count = 0;
|
||||
m_index.start = 0;
|
||||
m_index.limit = std::max<int>(count * 3 / 2, 11000);
|
||||
}
|
||||
|
||||
|
@ -904,7 +927,11 @@ void GSDevice11::PSSetShaderResources(GSTexture* sr0, GSTexture* sr1)
|
|||
{
|
||||
PSSetShaderResource(0, sr0);
|
||||
PSSetShaderResource(1, sr1);
|
||||
PSSetShaderResource(2, NULL);
|
||||
|
||||
for(int i = 2; i < countof(m_state.ps_srv); i++)
|
||||
{
|
||||
PSSetShaderResource(i, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
|
||||
|
@ -913,6 +940,13 @@ void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
|
|||
|
||||
if(sr) srv = *(GSTexture11*)sr;
|
||||
|
||||
PSSetShaderResourceView(i, srv);
|
||||
}
|
||||
|
||||
void GSDevice11::PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv)
|
||||
{
|
||||
ASSERT(i < countof(m_state.ps_srv));
|
||||
|
||||
if(m_state.ps_srv[i] != srv)
|
||||
{
|
||||
m_state.ps_srv[i] = srv;
|
||||
|
@ -944,14 +978,14 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
|||
|
||||
if(m_srv_changed)
|
||||
{
|
||||
m_ctx->PSSetShaderResources(0, 3, m_state.ps_srv);
|
||||
m_ctx->PSSetShaderResources(0, countof(m_state.ps_srv), m_state.ps_srv);
|
||||
|
||||
m_srv_changed = false;
|
||||
}
|
||||
|
||||
if(m_ss_changed)
|
||||
{
|
||||
m_ctx->PSSetSamplers(0, 3, m_state.ps_ss);
|
||||
m_ctx->PSSetSamplers(0, countof(m_state.ps_ss), m_state.ps_ss);
|
||||
|
||||
m_ss_changed = false;
|
||||
}
|
||||
|
@ -966,9 +1000,9 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
|||
|
||||
void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv)
|
||||
{
|
||||
// TODO: if(m_state.cs_srv[i] != srv)
|
||||
if(m_state.cs_srv[i] != srv)
|
||||
{
|
||||
// TODO: m_state.cs_srv[i] = srv;
|
||||
m_state.cs_srv[i] = srv;
|
||||
|
||||
m_ctx->CSSetShaderResources(i, 1, &srv);
|
||||
}
|
||||
|
@ -976,17 +1010,14 @@ void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv)
|
|||
|
||||
void GSDevice11::CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav)
|
||||
{
|
||||
// TODO: if(m_state.cs_uav[i] != uav)
|
||||
{
|
||||
// TODO: m_state.cs_uav[i] = uav;
|
||||
uint32 counters[8];
|
||||
|
||||
memset(counters, 0, sizeof(counters));
|
||||
|
||||
// uint32 count[] = {-1};
|
||||
|
||||
m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, NULL);
|
||||
}
|
||||
m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, counters);
|
||||
}
|
||||
|
||||
void GSDevice11::CSSetShader(ID3D11ComputeShader* cs)
|
||||
void GSDevice11::CSSetShader(ID3D11ComputeShader* cs, ID3D11Buffer* cs_cb)
|
||||
{
|
||||
if(m_state.cs != cs)
|
||||
{
|
||||
|
@ -994,6 +1025,13 @@ void GSDevice11::CSSetShader(ID3D11ComputeShader* cs)
|
|||
|
||||
m_ctx->CSSetShader(cs, NULL, 0);
|
||||
}
|
||||
|
||||
if(m_state.cs_cb != cs_cb)
|
||||
{
|
||||
m_state.cs_cb = cs_cb;
|
||||
|
||||
m_ctx->CSSetConstantBuffers(0, 1, &cs_cb);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref)
|
||||
|
@ -1064,6 +1102,41 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice11::OMSetRenderTargets(const GSVector2i& rtsize, int count, ID3D11UnorderedAccessView** uav, uint32* counters, const GSVector4i* scissor)
|
||||
{
|
||||
m_ctx->OMSetRenderTargetsAndUnorderedAccessViews(0, NULL, NULL, 0, count, uav, counters);
|
||||
|
||||
m_state.rtv = NULL;
|
||||
m_state.dsv = NULL;
|
||||
|
||||
if(m_state.viewport != rtsize)
|
||||
{
|
||||
m_state.viewport = rtsize;
|
||||
|
||||
D3D11_VIEWPORT vp;
|
||||
|
||||
memset(&vp, 0, sizeof(vp));
|
||||
|
||||
vp.TopLeftX = 0;
|
||||
vp.TopLeftY = 0;
|
||||
vp.Width = (float)rtsize.x;
|
||||
vp.Height = (float)rtsize.y;
|
||||
vp.MinDepth = 0.0f;
|
||||
vp.MaxDepth = 1.0f;
|
||||
|
||||
m_ctx->RSSetViewports(1, &vp);
|
||||
}
|
||||
|
||||
GSVector4i r = scissor ? *scissor : GSVector4i(rtsize).zwxy();
|
||||
|
||||
if(!m_state.scissor.eq(r))
|
||||
{
|
||||
m_state.scissor = r;
|
||||
|
||||
m_ctx->RSSetScissorRects(1, r);
|
||||
}
|
||||
}
|
||||
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
@ -1135,6 +1208,38 @@ HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MAC
|
|||
return hr;
|
||||
}
|
||||
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs, D3D11_SO_DECLARATION_ENTRY* layout, int count)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
vector<D3D11_SHADER_MACRO> m;
|
||||
|
||||
PrepareShaderMacro(m, macro);
|
||||
|
||||
CComPtr<ID3D11Blob> shader, error;
|
||||
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.gs.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
|
||||
if(error)
|
||||
{
|
||||
printf("%s\n", (const char*)error->GetBufferPointer());
|
||||
}
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
return hr;
|
||||
}
|
||||
|
||||
hr = m_dev->CreateGeometryShaderWithStreamOutput((void*)shader->GetBufferPointer(), shader->GetBufferSize(), layout, count, NULL, 0, D3D11_SO_NO_RASTERIZED_STREAM, NULL, gs);
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
return hr;
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
@ -1177,7 +1282,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MAC
|
|||
|
||||
CComPtr<ID3D11Blob> shader, error;
|
||||
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.cs.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
|
||||
if(error)
|
||||
{
|
||||
|
|
|
@ -60,11 +60,13 @@ class GSDevice11 : public GSDeviceDX
|
|||
ID3D11VertexShader* vs;
|
||||
ID3D11Buffer* vs_cb;
|
||||
ID3D11GeometryShader* gs;
|
||||
ID3D11ShaderResourceView* ps_srv[3];
|
||||
ID3D11ShaderResourceView* ps_srv[16];
|
||||
ID3D11PixelShader* ps;
|
||||
ID3D11Buffer* ps_cb;
|
||||
ID3D11SamplerState* ps_ss[3];
|
||||
ID3D11ShaderResourceView* cs_srv[16];
|
||||
ID3D11ComputeShader* cs;
|
||||
ID3D11Buffer* cs_cb;
|
||||
GSVector2i viewport;
|
||||
GSVector4i scissor;
|
||||
ID3D11DepthStencilState* dss;
|
||||
|
@ -146,6 +148,7 @@ public:
|
|||
|
||||
void DrawPrimitive();
|
||||
void DrawIndexedPrimitive();
|
||||
void DrawIndexedPrimitive(int offset, int count);
|
||||
void Dispatch(uint32 x, uint32 y, uint32 z);
|
||||
|
||||
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
|
||||
|
@ -169,6 +172,8 @@ public:
|
|||
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
|
||||
|
||||
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
||||
bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count);
|
||||
void IAUnmapVertexBuffer();
|
||||
void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride);
|
||||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
void IASetIndexBuffer(ID3D11Buffer* ib);
|
||||
|
@ -178,16 +183,17 @@ public:
|
|||
void GSSetShader(ID3D11GeometryShader* gs);
|
||||
void PSSetShaderResources(GSTexture* sr0, GSTexture* sr1);
|
||||
void PSSetShaderResource(int i, GSTexture* sr);
|
||||
void PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv);
|
||||
void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb);
|
||||
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2 = NULL);
|
||||
void CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv);
|
||||
void CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav);
|
||||
void CSSetShader(ID3D11ComputeShader* cs);
|
||||
void CSSetShader(ID3D11ComputeShader* cs, ID3D11Buffer* cs_cb);
|
||||
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref);
|
||||
void OMSetBlendState(ID3D11BlendState* bs, float bf);
|
||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
|
||||
void OMSetRenderTargets(const GSVector2i& rtsize, int count, ID3D11UnorderedAccessView** uav, uint32* counters, const GSVector4i* scissor = NULL);
|
||||
|
||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel);
|
||||
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
|
||||
|
@ -202,6 +208,7 @@ public:
|
|||
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il);
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs);
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs, D3D11_SO_DECLARATION_ENTRY* layout, int count);
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps);
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
|
||||
HRESULT CompileShader(const char* fn, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
|
||||
|
|
|
@ -911,6 +911,18 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti
|
|||
}
|
||||
|
||||
void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
|
||||
{
|
||||
void* ptr = NULL;
|
||||
|
||||
if(IAMapVertexBuffer(&ptr, stride, count))
|
||||
{
|
||||
GSVector4i::storent(ptr, vertex, count * stride);
|
||||
|
||||
IAUnmapVertexBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
bool GSDevice9::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
|
||||
{
|
||||
ASSERT(m_vertex.count == 0);
|
||||
|
||||
|
@ -930,7 +942,7 @@ void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t coun
|
|||
|
||||
hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
if(FAILED(hr)) return false;
|
||||
}
|
||||
|
||||
uint32 flags = D3DLOCK_NOOVERWRITE;
|
||||
|
@ -942,19 +954,22 @@ void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t coun
|
|||
flags = D3DLOCK_DISCARD;
|
||||
}
|
||||
|
||||
void* ptr = NULL;
|
||||
|
||||
if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags)))
|
||||
if(FAILED(m_vb->Lock(m_vertex.start * stride, count * stride, vertex, flags)))
|
||||
{
|
||||
GSVector4i::storent(ptr, vertex, count * stride);
|
||||
|
||||
m_vb->Unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
m_vertex.count = count;
|
||||
m_vertex.stride = stride;
|
||||
|
||||
IASetVertexBuffer(m_vb, stride);
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSDevice9::IAUnmapVertexBuffer()
|
||||
{
|
||||
m_vb->Unlock();
|
||||
|
||||
IASetVertexBuffer(m_vb, m_vertex.stride);
|
||||
}
|
||||
|
||||
void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)
|
||||
|
|
|
@ -196,6 +196,8 @@ public:
|
|||
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true);
|
||||
|
||||
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
||||
bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count);
|
||||
void IAUnmapVertexBuffer();
|
||||
void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride);
|
||||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
void IASetIndexBuffer(IDirect3DIndexBuffer9* ib);
|
||||
|
@ -216,7 +218,6 @@ public:
|
|||
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il);
|
||||
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps);
|
||||
|
||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel) {}
|
||||
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
|
||||
|
|
|
@ -279,7 +279,6 @@ public:
|
|||
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
|
||||
void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const {level = m_shader.level;}
|
||||
|
||||
virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0;
|
||||
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
|
||||
virtual void SetupGS(GSSelector sel) = 0;
|
||||
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;
|
||||
|
|
|
@ -91,6 +91,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
|||
sel.fb = m_global.sel.fb;
|
||||
sel.zb = m_global.sel.zb;
|
||||
sel.zoverflow = m_global.sel.zoverflow;
|
||||
sel.notest = m_global.sel.notest;
|
||||
|
||||
m_sp = m_sp_map[sel];
|
||||
}
|
||||
|
@ -272,17 +273,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
// Init
|
||||
|
||||
int skip = left & 3;
|
||||
int skip, steps;
|
||||
|
||||
left -= skip;
|
||||
|
||||
int steps = pixels + skip - 4;
|
||||
if(!sel.notest)
|
||||
{
|
||||
skip = left & 3;
|
||||
steps = pixels + skip - 4;
|
||||
left -= skip;
|
||||
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
}
|
||||
else
|
||||
{
|
||||
skip = 0;
|
||||
steps = pixels - 4;
|
||||
}
|
||||
|
||||
const GSVector2i* fza_base = &m_global.fzbr[top];
|
||||
const GSVector2i* fza_offset = &m_global.fzbc[left >> 2];
|
||||
|
||||
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(sel.fwrite && sel.fge)
|
||||
|
@ -318,7 +326,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
}
|
||||
else if(sel.ltf)
|
||||
{
|
||||
vf = v.xxzzlh().srl16(1);
|
||||
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
}
|
||||
|
||||
s = GSVector4::cast(u);
|
||||
|
@ -508,8 +516,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
u -= 0x8000;
|
||||
v -= 0x8000;
|
||||
|
||||
uf = u.xxzzlh().srl16(1);
|
||||
vf = v.xxzzlh().srl16(1);
|
||||
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
}
|
||||
|
||||
GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
|
||||
|
@ -629,8 +637,8 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
u -= 0x8000;
|
||||
v -= 0x8000;
|
||||
|
||||
uf = u.xxzzlh().srl16(1);
|
||||
vf = v.xxzzlh().srl16(1);
|
||||
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
}
|
||||
|
||||
GSVector4i uv0 = u.sra32(16).ps32(v.sra32(16));
|
||||
|
@ -764,11 +772,11 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
if(sel.ltf)
|
||||
{
|
||||
uf = u.xxzzlh().srl16(1);
|
||||
uf = u.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
vf = v.xxzzlh().srl16(1);
|
||||
vf = v.xxzzlh().srl16(16 - GS_BILINEAR_PRECISION).sll16(15 - GS_BILINEAR_PRECISION);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1000,27 +1008,30 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
int fzm = 0;
|
||||
|
||||
if(sel.fwrite)
|
||||
if(!sel.notest)
|
||||
{
|
||||
fm |= test;
|
||||
}
|
||||
if(sel.fwrite)
|
||||
{
|
||||
fm |= test;
|
||||
}
|
||||
|
||||
if(sel.zwrite)
|
||||
{
|
||||
zm |= test;
|
||||
}
|
||||
if(sel.zwrite)
|
||||
{
|
||||
zm |= test;
|
||||
}
|
||||
|
||||
if(sel.fwrite && sel.zwrite)
|
||||
{
|
||||
fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();
|
||||
}
|
||||
else if(sel.fwrite)
|
||||
{
|
||||
fzm = ~(fm == GSVector4i::xffffffff()).ps32().mask();
|
||||
}
|
||||
else if(sel.zwrite)
|
||||
{
|
||||
fzm = ~(zm == GSVector4i::xffffffff()).ps32().mask();
|
||||
if(sel.fwrite && sel.zwrite)
|
||||
{
|
||||
fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();
|
||||
}
|
||||
else if(sel.fwrite)
|
||||
{
|
||||
fzm = ~(fm == GSVector4i::xffffffff()).ps32().mask();
|
||||
}
|
||||
else if(sel.zwrite)
|
||||
{
|
||||
fzm = ~(zm == GSVector4i::xffffffff()).ps32().mask();
|
||||
}
|
||||
}
|
||||
|
||||
// WriteZBuf
|
||||
|
@ -1030,16 +1041,39 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
if(sel.ztest && sel.zpsm < 2)
|
||||
{
|
||||
zs = zs.blend8(zd, zm);
|
||||
}
|
||||
|
||||
if(fzm & 0x0f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs);
|
||||
if(fzm & 0xf000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs);
|
||||
bool fast = sel.ztest ? sel.zpsm < 2 : sel.zpsm == 0 && sel.notest;
|
||||
|
||||
if(sel.notest)
|
||||
{
|
||||
if(fast)
|
||||
{
|
||||
GSVector4i::storel((uint8*)m_global.vm + za * 2, zs);
|
||||
GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePixel(zs, za, 0, sel.zpsm);
|
||||
WritePixel(zs, za, 1, sel.zpsm);
|
||||
WritePixel(zs, za, 2, sel.zpsm);
|
||||
WritePixel(zs, za, 3, sel.zpsm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(fzm & 0x0300) WritePixel(zs, za, 0, sel.zpsm);
|
||||
if(fzm & 0x0c00) WritePixel(zs, za, 1, sel.zpsm);
|
||||
if(fzm & 0x3000) WritePixel(zs, za, 2, sel.zpsm);
|
||||
if(fzm & 0xc000) WritePixel(zs, za, 3, sel.zpsm);
|
||||
if(fast)
|
||||
{
|
||||
if(fzm & 0x0f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs);
|
||||
if(fzm & 0xf000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(fzm & 0x0300) WritePixel(zs, za, 0, sel.zpsm);
|
||||
if(fzm & 0x0c00) WritePixel(zs, za, 1, sel.zpsm);
|
||||
if(fzm & 0x3000) WritePixel(zs, za, 2, sel.zpsm);
|
||||
if(fzm & 0xc000) WritePixel(zs, za, 3, sel.zpsm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1197,17 +1231,37 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
fs = fs.blend(fd, fm);
|
||||
}
|
||||
|
||||
if(sel.rfb && sel.fpsm < 2)
|
||||
bool fast = sel.rfb ? sel.fpsm < 2 : sel.fpsm == 0 && sel.notest;
|
||||
|
||||
if(sel.notest)
|
||||
{
|
||||
if(fzm & 0x000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs);
|
||||
if(fzm & 0x00f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs);
|
||||
if(fast)
|
||||
{
|
||||
GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs);
|
||||
GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePixel(fs, fa, 0, sel.fpsm);
|
||||
WritePixel(fs, fa, 1, sel.fpsm);
|
||||
WritePixel(fs, fa, 2, sel.fpsm);
|
||||
WritePixel(fs, fa, 3, sel.fpsm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(fzm & 0x0003) WritePixel(fs, fa, 0, sel.fpsm);
|
||||
if(fzm & 0x000c) WritePixel(fs, fa, 1, sel.fpsm);
|
||||
if(fzm & 0x0030) WritePixel(fs, fa, 2, sel.fpsm);
|
||||
if(fzm & 0x00c0) WritePixel(fs, fa, 3, sel.fpsm);
|
||||
if(fast)
|
||||
{
|
||||
if(fzm & 0x000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs);
|
||||
if(fzm & 0x00f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(fzm & 0x0003) WritePixel(fs, fa, 0, sel.fpsm);
|
||||
if(fzm & 0x000c) WritePixel(fs, fa, 1, sel.fpsm);
|
||||
if(fzm & 0x0030) WritePixel(fs, fa, 2, sel.fpsm);
|
||||
if(fzm & 0x00c0) WritePixel(fs, fa, 3, sel.fpsm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1273,7 +1327,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
}
|
||||
}
|
||||
|
||||
test = GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
if(!sel.notest)
|
||||
{
|
||||
test = GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1492,6 +1549,7 @@ void GSDrawScanline::DrawRectT(const int* RESTRICT row, const int* RESTRICT col,
|
|||
if(masked) ASSERT(mask.u32[0] != 0);
|
||||
|
||||
color = color.andnot(mask);
|
||||
c = color.extract32<0>();
|
||||
|
||||
GSVector4i br = r.ralign<Align_Inside>(GSVector2i(8 * 4 / sizeof(T), 8));
|
||||
|
||||
|
|
|
@ -250,31 +250,40 @@ L("exit");
|
|||
|
||||
void GSDrawScanlineCodeGenerator::Init()
|
||||
{
|
||||
// int skip = left & 3;
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
// int skip = left & 3;
|
||||
|
||||
mov(ebx, edx);
|
||||
and(edx, 3);
|
||||
mov(ebx, edx);
|
||||
and(edx, 3);
|
||||
|
||||
// left -= skip;
|
||||
// int steps = pixels + skip - 4;
|
||||
|
||||
sub(ebx, edx);
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
|
||||
// int steps = pixels + skip - 4;
|
||||
// left -= skip;
|
||||
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
sub(ebx, edx);
|
||||
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
shl(edx, 4);
|
||||
shl(edx, 4);
|
||||
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
|
||||
vpor(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
vpor(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
mov(ebx, edx); // left
|
||||
xor(edx, edx); // skip
|
||||
lea(ecx, ptr[ecx - 4]); // steps
|
||||
}
|
||||
|
||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||
|
||||
|
@ -380,7 +389,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
vpshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm6, 1);
|
||||
vpsrlw(xmm6, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm6, 15 - GS_BILINEAR_PRECISION);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm6);
|
||||
}
|
||||
}
|
||||
|
@ -573,14 +583,17 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
// test = m_test[7 + (steps & (steps >> 31))];
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
// test = m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
|
@ -730,7 +743,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 1);
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vmovdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
|
@ -739,7 +753,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 1);
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
}
|
||||
|
@ -1283,14 +1298,16 @@ return;
|
|||
|
||||
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 1);
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vmovdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 1);
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
|
||||
|
@ -1524,14 +1541,16 @@ return;
|
|||
|
||||
vpshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 1);
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vmovdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
vpshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpsrlw(xmm0, 1);
|
||||
vpsrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) vpsllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
vmovdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
|
||||
|
@ -2302,6 +2321,11 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask()
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// fm |= test;
|
||||
// zm |= test;
|
||||
|
||||
|
@ -2348,17 +2372,17 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
return;
|
||||
}
|
||||
|
||||
bool fast = m_sel.ztest && m_sel.zpsm < 2;
|
||||
|
||||
vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
|
||||
|
||||
if(fast)
|
||||
if(m_sel.ztest && m_sel.zpsm < 2)
|
||||
{
|
||||
// zs = zs.blend8(zd, zm);
|
||||
|
||||
vpblendvb(xmm1, ptr[&m_local.temp.zd], xmm4);
|
||||
}
|
||||
|
||||
bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
|
||||
}
|
||||
|
||||
|
@ -2664,7 +2688,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm
|
||||
}
|
||||
|
||||
bool fast = m_sel.rfb && m_sel.fpsm < 2;
|
||||
bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
|
||||
}
|
||||
|
@ -2677,49 +2701,67 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
{
|
||||
if(fast)
|
||||
if(m_sel.notest)
|
||||
{
|
||||
// if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xf0);
|
||||
je("@f");
|
||||
vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
L("@@");
|
||||
|
||||
// vmaskmovps?
|
||||
if(fast)
|
||||
{
|
||||
vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel(src, addr, 3, psm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>());
|
||||
// if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>());
|
||||
// if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
if(fast)
|
||||
{
|
||||
// if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
L("@@");
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
L("@@");
|
||||
test(mask, 0xf0);
|
||||
je("@f");
|
||||
vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
L("@@");
|
||||
// vmaskmovps?
|
||||
}
|
||||
else
|
||||
{
|
||||
// if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>());
|
||||
// if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>());
|
||||
// if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
L("@@");
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
L("@@");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -250,31 +250,40 @@ L("exit");
|
|||
|
||||
void GSDrawScanlineCodeGenerator::Init()
|
||||
{
|
||||
// int skip = left & 3;
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
// int skip = left & 3;
|
||||
|
||||
mov(ebx, edx);
|
||||
and(edx, 3);
|
||||
mov(ebx, edx);
|
||||
and(edx, 3);
|
||||
|
||||
// left -= skip;
|
||||
// int steps = pixels + skip - 4;
|
||||
|
||||
sub(ebx, edx);
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
|
||||
// int steps = pixels + skip - 4;
|
||||
// left -= skip;
|
||||
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
sub(ebx, edx);
|
||||
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
shl(edx, 4);
|
||||
shl(edx, 4);
|
||||
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
|
||||
por(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
por(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
mov(ebx, edx); // left
|
||||
xor(edx, edx); // skip
|
||||
lea(ecx, ptr[ecx - 4]); // steps
|
||||
}
|
||||
|
||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||
|
||||
|
@ -380,7 +389,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
pshuflw(xmm6, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm6, xmm6, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm6, 1);
|
||||
psrlw(xmm6, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm6, 15 - GS_BILINEAR_PRECISION);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm6);
|
||||
}
|
||||
}
|
||||
|
@ -578,14 +588,17 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
// test = m_test[7 + (steps & (steps >> 31))];
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
// test = m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
|
@ -735,7 +748,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 1);
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
movdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
if(m_sel.prim != GS_SPRITE_CLASS)
|
||||
|
@ -744,7 +758,8 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 1);
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
}
|
||||
|
@ -1338,14 +1353,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 1);
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
movdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 1);
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
|
||||
|
@ -1591,14 +1608,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
pshuflw(xmm0, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 1);
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
movdqa(ptr[&m_local.temp.uf], xmm0);
|
||||
|
||||
// GSVector4i vf = v.xxzzlh().srl16(1);
|
||||
|
||||
pshuflw(xmm0, xmm3, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
pshufhw(xmm0, xmm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
psrlw(xmm0, 1);
|
||||
psrlw(xmm0, 16 - GS_BILINEAR_PRECISION);
|
||||
if(GS_BILINEAR_PRECISION < 15) psllw(xmm0, 15 - GS_BILINEAR_PRECISION);
|
||||
movdqa(ptr[&m_local.temp.vf], xmm0);
|
||||
}
|
||||
|
||||
|
@ -2415,6 +2434,11 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask()
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// fm |= test;
|
||||
// zm |= test;
|
||||
|
||||
|
@ -2462,11 +2486,9 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
return;
|
||||
}
|
||||
|
||||
bool fast = m_sel.ztest && m_sel.zpsm < 2;
|
||||
|
||||
movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
|
||||
|
||||
if(fast)
|
||||
if(m_sel.ztest && m_sel.zpsm < 2)
|
||||
{
|
||||
// zs = zs.blend8(zd, zm);
|
||||
|
||||
|
@ -2475,6 +2497,8 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
blend8(xmm1, xmm7);
|
||||
}
|
||||
|
||||
bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
|
||||
}
|
||||
|
||||
|
@ -2804,7 +2828,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm
|
||||
}
|
||||
|
||||
bool fast = m_sel.rfb && m_sel.fpsm < 2;
|
||||
bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
|
||||
}
|
||||
|
@ -2817,47 +2841,65 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
{
|
||||
if(fast)
|
||||
if(m_sel.notest)
|
||||
{
|
||||
// if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
movq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xf0);
|
||||
je("@f");
|
||||
movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
L("@@");
|
||||
if(fast)
|
||||
{
|
||||
movq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel(src, addr, 3, psm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>());
|
||||
// if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>());
|
||||
// if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
if(fast)
|
||||
{
|
||||
// if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
L("@@");
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
movq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
L("@@");
|
||||
test(mask, 0xf0);
|
||||
je("@f");
|
||||
movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
L("@@");
|
||||
}
|
||||
else
|
||||
{
|
||||
// if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>());
|
||||
// if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>());
|
||||
// if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
L("@@");
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
L("@@");
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
L("@@");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -53,7 +53,8 @@ public:
|
|||
GSOffset* fb;
|
||||
GSOffset* zb;
|
||||
GSOffset* tex;
|
||||
GSPixelOffset4* fzb;
|
||||
GSPixelOffset* fzb;
|
||||
GSPixelOffset4* fzb4;
|
||||
} offset;
|
||||
|
||||
GSDrawingContext()
|
||||
|
|
|
@ -342,55 +342,55 @@ GSLocalMemory::GSLocalMemory()
|
|||
|
||||
m_psm[PSM_PSMCT24].rtx = &GSLocalMemory::ReadTexture24;
|
||||
m_psm[PSM_PSMCT16].rtx = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMCT16S].rtx = &GSLocalMemory::ReadTexture16S;
|
||||
m_psm[PSM_PSMCT16S].rtx = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMT8].rtx = &GSLocalMemory::ReadTexture8;
|
||||
m_psm[PSM_PSMT4].rtx = &GSLocalMemory::ReadTexture4;
|
||||
m_psm[PSM_PSMT8H].rtx = &GSLocalMemory::ReadTexture8H;
|
||||
m_psm[PSM_PSMT4HL].rtx = &GSLocalMemory::ReadTexture4HL;
|
||||
m_psm[PSM_PSMT4HH].rtx = &GSLocalMemory::ReadTexture4HH;
|
||||
m_psm[PSM_PSMZ32].rtx = &GSLocalMemory::ReadTexture32Z;
|
||||
m_psm[PSM_PSMZ24].rtx = &GSLocalMemory::ReadTexture24Z;
|
||||
m_psm[PSM_PSMZ16].rtx = &GSLocalMemory::ReadTexture16Z;
|
||||
m_psm[PSM_PSMZ16S].rtx = &GSLocalMemory::ReadTexture16SZ;
|
||||
m_psm[PSM_PSMZ32].rtx = &GSLocalMemory::ReadTexture32;
|
||||
m_psm[PSM_PSMZ24].rtx = &GSLocalMemory::ReadTexture24;
|
||||
m_psm[PSM_PSMZ16].rtx = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMZ16S].rtx = &GSLocalMemory::ReadTexture16;
|
||||
|
||||
m_psm[PSM_PSMCT24].rtxP = &GSLocalMemory::ReadTexture24;
|
||||
m_psm[PSM_PSMCT16].rtxP = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMCT16S].rtxP = &GSLocalMemory::ReadTexture16S;
|
||||
m_psm[PSM_PSMCT16S].rtxP = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMT8].rtxP = &GSLocalMemory::ReadTexture8P;
|
||||
m_psm[PSM_PSMT4].rtxP = &GSLocalMemory::ReadTexture4P;
|
||||
m_psm[PSM_PSMT8H].rtxP = &GSLocalMemory::ReadTexture8HP;
|
||||
m_psm[PSM_PSMT4HL].rtxP = &GSLocalMemory::ReadTexture4HLP;
|
||||
m_psm[PSM_PSMT4HH].rtxP = &GSLocalMemory::ReadTexture4HHP;
|
||||
m_psm[PSM_PSMZ32].rtxP = &GSLocalMemory::ReadTexture32Z;
|
||||
m_psm[PSM_PSMZ24].rtxP = &GSLocalMemory::ReadTexture24Z;
|
||||
m_psm[PSM_PSMZ16].rtxP = &GSLocalMemory::ReadTexture16Z;
|
||||
m_psm[PSM_PSMZ16S].rtxP = &GSLocalMemory::ReadTexture16SZ;
|
||||
m_psm[PSM_PSMZ32].rtxP = &GSLocalMemory::ReadTexture32;
|
||||
m_psm[PSM_PSMZ24].rtxP = &GSLocalMemory::ReadTexture24;
|
||||
m_psm[PSM_PSMZ16].rtxP = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMZ16S].rtxP = &GSLocalMemory::ReadTexture16;
|
||||
|
||||
m_psm[PSM_PSMCT24].rtxb = &GSLocalMemory::ReadTextureBlock24;
|
||||
m_psm[PSM_PSMCT16].rtxb = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMCT16S].rtxb = &GSLocalMemory::ReadTextureBlock16S;
|
||||
m_psm[PSM_PSMCT16S].rtxb = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMT8].rtxb = &GSLocalMemory::ReadTextureBlock8;
|
||||
m_psm[PSM_PSMT4].rtxb = &GSLocalMemory::ReadTextureBlock4;
|
||||
m_psm[PSM_PSMT8H].rtxb = &GSLocalMemory::ReadTextureBlock8H;
|
||||
m_psm[PSM_PSMT4HL].rtxb = &GSLocalMemory::ReadTextureBlock4HL;
|
||||
m_psm[PSM_PSMT4HH].rtxb = &GSLocalMemory::ReadTextureBlock4HH;
|
||||
m_psm[PSM_PSMZ32].rtxb = &GSLocalMemory::ReadTextureBlock32Z;
|
||||
m_psm[PSM_PSMZ24].rtxb = &GSLocalMemory::ReadTextureBlock24Z;
|
||||
m_psm[PSM_PSMZ16].rtxb = &GSLocalMemory::ReadTextureBlock16Z;
|
||||
m_psm[PSM_PSMZ16S].rtxb = &GSLocalMemory::ReadTextureBlock16SZ;
|
||||
m_psm[PSM_PSMZ32].rtxb = &GSLocalMemory::ReadTextureBlock32;
|
||||
m_psm[PSM_PSMZ24].rtxb = &GSLocalMemory::ReadTextureBlock24;
|
||||
m_psm[PSM_PSMZ16].rtxb = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMZ16S].rtxb = &GSLocalMemory::ReadTextureBlock16;
|
||||
|
||||
m_psm[PSM_PSMCT24].rtxbP = &GSLocalMemory::ReadTextureBlock24;
|
||||
m_psm[PSM_PSMCT16].rtxbP = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMCT16S].rtxbP = &GSLocalMemory::ReadTextureBlock16S;
|
||||
m_psm[PSM_PSMCT16S].rtxbP = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMT8].rtxbP = &GSLocalMemory::ReadTextureBlock8P;
|
||||
m_psm[PSM_PSMT4].rtxbP = &GSLocalMemory::ReadTextureBlock4P;
|
||||
m_psm[PSM_PSMT8H].rtxbP = &GSLocalMemory::ReadTextureBlock8HP;
|
||||
m_psm[PSM_PSMT4HL].rtxbP = &GSLocalMemory::ReadTextureBlock4HLP;
|
||||
m_psm[PSM_PSMT4HH].rtxbP = &GSLocalMemory::ReadTextureBlock4HHP;
|
||||
m_psm[PSM_PSMZ32].rtxbP = &GSLocalMemory::ReadTextureBlock32Z;
|
||||
m_psm[PSM_PSMZ24].rtxbP = &GSLocalMemory::ReadTextureBlock24Z;
|
||||
m_psm[PSM_PSMZ16].rtxbP = &GSLocalMemory::ReadTextureBlock16Z;
|
||||
m_psm[PSM_PSMZ16S].rtxbP = &GSLocalMemory::ReadTextureBlock16SZ;
|
||||
m_psm[PSM_PSMZ32].rtxbP = &GSLocalMemory::ReadTextureBlock32;
|
||||
m_psm[PSM_PSMZ24].rtxbP = &GSLocalMemory::ReadTextureBlock24;
|
||||
m_psm[PSM_PSMZ16].rtxbP = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMZ16S].rtxbP = &GSLocalMemory::ReadTextureBlock16;
|
||||
|
||||
m_psm[PSM_PSMCT16].bpp = m_psm[PSM_PSMCT16S].bpp = 16;
|
||||
m_psm[PSM_PSMT8].bpp = 8;
|
||||
|
@ -473,6 +473,62 @@ GSOffset* GSLocalMemory::GetOffset(uint32 bp, uint32 bw, uint32 psm)
|
|||
return o;
|
||||
}
|
||||
|
||||
GSPixelOffset* GSLocalMemory::GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
||||
{
|
||||
uint32 fbp = FRAME.Block();
|
||||
uint32 zbp = ZBUF.Block();
|
||||
uint32 fpsm = FRAME.PSM;
|
||||
uint32 zpsm = ZBUF.PSM;
|
||||
uint32 bw = FRAME.FBW;
|
||||
|
||||
ASSERT(m_psm[fpsm].trbpp > 8 || m_psm[zpsm].trbpp > 8);
|
||||
|
||||
// "(psm & 0x0f) ^ ((psm & 0xf0) >> 2)" creates 4 bit unique identifiers for render target formats (only)
|
||||
|
||||
uint32 fpsm_hash = (fpsm & 0x0f) ^ ((fpsm & 0x30) >> 2);
|
||||
uint32 zpsm_hash = (zpsm & 0x0f) ^ ((zpsm & 0x30) >> 2);
|
||||
|
||||
uint32 hash = (FRAME.FBP << 0) | (ZBUF.ZBP << 9) | (bw << 18) | (fpsm_hash << 24) | (zpsm_hash << 28);
|
||||
|
||||
hash_map<uint32, GSPixelOffset*>::iterator i = m_pomap.find(hash);
|
||||
|
||||
if(i != m_pomap.end())
|
||||
{
|
||||
return i->second;
|
||||
}
|
||||
|
||||
GSPixelOffset* o = (GSPixelOffset*)_aligned_malloc(sizeof(GSPixelOffset), 32);
|
||||
|
||||
o->hash = hash;
|
||||
o->fbp = fbp;
|
||||
o->zbp = zbp;
|
||||
o->fpsm = fpsm;
|
||||
o->zpsm = zpsm;
|
||||
o->bw = bw;
|
||||
|
||||
pixelAddress fpa = m_psm[fpsm].pa;
|
||||
pixelAddress zpa = m_psm[zpsm].pa;
|
||||
|
||||
int fs = m_psm[fpsm].bpp >> 5;
|
||||
int zs = m_psm[zpsm].bpp >> 5;
|
||||
|
||||
for(int i = 0; i < 2048; i++)
|
||||
{
|
||||
o->row[i].x = (int)fpa(0, i, fbp, bw) << fs;
|
||||
o->row[i].y = (int)zpa(0, i, zbp, bw) << zs;
|
||||
}
|
||||
|
||||
for(int i = 0; i < 2048; i++)
|
||||
{
|
||||
o->col[i].x = m_psm[fpsm].rowOffset[0][i] << fs;
|
||||
o->col[i].y = m_psm[zpsm].rowOffset[0][i] << zs;
|
||||
}
|
||||
|
||||
m_pomap[hash] = o;
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF)
|
||||
{
|
||||
uint32 fbp = FRAME.Block();
|
||||
|
@ -1550,28 +1606,22 @@ void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT o, const GSVector4i&
|
|||
|
||||
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock16<true>(src, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
else
|
||||
{
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock16<false>(src, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture8(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
|
@ -1629,61 +1679,6 @@ void GSLocalMemory::ReadTexture4HH(const GSOffset* RESTRICT o, const GSVector4i&
|
|||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture32Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
{
|
||||
ReadBlock32<true>(src, dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture24Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
else
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
///////////////////
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
|
@ -1709,20 +1704,16 @@ void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, cons
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
ReadAndExpandBlock16<true>(BlockPtr(bp), dst, dstpitch, TEXA);
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadAndExpandBlock16<false>(BlockPtr(bp), dst, dstpitch, TEXA);
|
||||
}
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
|
@ -1760,45 +1751,6 @@ void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, con
|
|||
ReadAndExpandBlock4HH_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock32Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(32);
|
||||
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
ReadAndExpandBlock24<true>(BlockPtr(bp), dst, dstpitch, TEXA);
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadAndExpandBlock24<false>(BlockPtr(bp), dst, dstpitch, TEXA);
|
||||
}
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
|
||||
///////////////////
|
||||
|
||||
void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
|
|
|
@ -56,6 +56,16 @@ public:
|
|||
uint32* GetPages(const GSVector4i& rect, uint32* pages = NULL, GSVector4i* bbox = NULL);
|
||||
};
|
||||
|
||||
struct GSPixelOffset
|
||||
{
|
||||
// 16 bit offsets (m_vm16[...])
|
||||
|
||||
GSVector2i row[2048]; // f yn | z yn
|
||||
GSVector2i col[2048]; // f xn | z xn
|
||||
uint32 hash;
|
||||
uint32 fbp, zbp, fpsm, zpsm, bw;
|
||||
};
|
||||
|
||||
struct GSPixelOffset4
|
||||
{
|
||||
// 16 bit offsets (m_vm16[...])
|
||||
|
@ -158,6 +168,7 @@ protected:
|
|||
//
|
||||
|
||||
hash_map<uint32, GSOffset*> m_omap;
|
||||
hash_map<uint32, GSPixelOffset*> m_pomap;
|
||||
hash_map<uint32, GSPixelOffset4*> m_po4map;
|
||||
hash_map<uint64, vector<GSVector2i>*> m_p2tmap;
|
||||
|
||||
|
@ -166,6 +177,7 @@ public:
|
|||
virtual ~GSLocalMemory();
|
||||
|
||||
GSOffset* GetOffset(uint32 bp, uint32 bw, uint32 psm);
|
||||
GSPixelOffset* GetPixelOffset(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
||||
GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
|
||||
vector<GSVector2i>* GetPage2TileMap(const GIFRegTEX0& TEX0);
|
||||
|
||||
|
@ -863,32 +875,22 @@ public:
|
|||
void ReadTexture32(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture24(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture8(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture4(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture8H(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture4HL(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture4HH(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture32Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture24Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
|
||||
void ReadTexture(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
|
||||
void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock4(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock8H(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock4HL(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock32Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
|
||||
// pal ? 8 : 32
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ public:
|
|||
|
||||
enum counter_t
|
||||
{
|
||||
Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad,
|
||||
Frame, Prim, Draw, Swizzle, Unswizzle, Fillrate, Quad, SyncPoint,
|
||||
CounterLast,
|
||||
};
|
||||
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
|
||||
#define THREAD_HEIGHT 4
|
||||
|
||||
int GSRasterizerData::s_counter = 0;
|
||||
|
||||
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
|
||||
: m_ds(ds)
|
||||
, m_id(id)
|
||||
|
@ -40,7 +42,7 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
|
|||
m_edge.buff = (GSVertexSW*)vmalloc(sizeof(GSVertexSW) * 2048, false);
|
||||
m_edge.count = 0;
|
||||
|
||||
m_myscanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64);
|
||||
m_scanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64);
|
||||
|
||||
int row = 0;
|
||||
|
||||
|
@ -48,14 +50,14 @@ GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* pe
|
|||
{
|
||||
for(int i = 0; i < threads; i++, row++)
|
||||
{
|
||||
m_myscanline[row] = i == id ? 1 : 0;
|
||||
m_scanline[row] = i == id ? 1 : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSRasterizer::~GSRasterizer()
|
||||
{
|
||||
_aligned_free(m_myscanline);
|
||||
_aligned_free(m_scanline);
|
||||
|
||||
if(m_edge.buff != NULL) vmfree(m_edge.buff, sizeof(GSVertexSW) * 2048);
|
||||
|
||||
|
@ -66,7 +68,7 @@ bool GSRasterizer::IsOneOfMyScanlines(int top) const
|
|||
{
|
||||
ASSERT(top >= 0 && top < 2048);
|
||||
|
||||
return m_myscanline[top >> THREAD_HEIGHT] != 0;
|
||||
return m_scanline[top >> THREAD_HEIGHT] != 0;
|
||||
}
|
||||
|
||||
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
|
||||
|
@ -78,7 +80,7 @@ bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
|
|||
|
||||
while(top < bottom)
|
||||
{
|
||||
if(m_myscanline[top++])
|
||||
if(m_scanline[top++])
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
@ -91,9 +93,9 @@ int GSRasterizer::FindMyNextScanline(int top) const
|
|||
{
|
||||
int i = top >> THREAD_HEIGHT;
|
||||
|
||||
if(m_myscanline[i] == 0)
|
||||
if(m_scanline[i] == 0)
|
||||
{
|
||||
while(m_myscanline[++i] == 0);
|
||||
while(m_scanline[++i] == 0);
|
||||
|
||||
top = i << THREAD_HEIGHT;
|
||||
}
|
||||
|
@ -124,6 +126,8 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
|
||||
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
|
||||
|
||||
data->start = __rdtsc();
|
||||
|
||||
m_ds->BeginDraw(data);
|
||||
|
||||
const GSVertexSW* vertex = data->vertex;
|
||||
|
@ -140,8 +144,6 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
m_fscissor_x = GSVector4(data->scissor).xzxz();
|
||||
m_fscissor_y = GSVector4(data->scissor).ywyw();
|
||||
|
||||
uint64 start = __rdtsc();
|
||||
|
||||
switch(data->primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
|
@ -206,7 +208,9 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
__assume(0);
|
||||
}
|
||||
|
||||
uint64 ticks = __rdtsc() - start;
|
||||
data->pixels = m_pixels;
|
||||
|
||||
uint64 ticks = __rdtsc() - data->start;
|
||||
|
||||
m_ds->EndDraw(data->frame, ticks, m_pixels);
|
||||
}
|
||||
|
@ -444,28 +448,18 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
|
|||
|
||||
GSVector4 dxy01c = dxy01 * cross;
|
||||
|
||||
GSVector4 _z = dxy01c * dv[1].p.zzzz(dv[0].p); // dx0 * z1, dy0 * z1, dx1 * z0, dy1 * z0
|
||||
GSVector4 _f = dxy01c * dv[1].p.wwww(dv[0].p); // dx0 * f1, dy0 * f1, dx1 * f0, dy1 * f0
|
||||
/*
|
||||
dscan = dv[1] * dxy01c.yyyy() - dv[0] * dxy01c.wwww();
|
||||
dedge = dv[0] * dxy01c.zzzz() - dv[1] * dxy01c.xxxx();
|
||||
*/
|
||||
|
||||
GSVector4 _zf = _z.ywyw(_f).hsub(_z.zxzx(_f)); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0, dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1
|
||||
dscan.p = dv[1].p * dxy01c.yyyy() - dv[0].p * dxy01c.wwww();
|
||||
dscan.t = dv[1].t * dxy01c.yyyy() - dv[0].t * dxy01c.wwww();
|
||||
dscan.c = dv[1].c * dxy01c.yyyy() - dv[0].c * dxy01c.wwww();
|
||||
|
||||
dscan.p = _zf.zwxy(); // dy0 * z1 - dy1 * z0, dy0 * f1 - dy1 * f0
|
||||
dedge.p = _zf; // dx1 * z0 - dx0 * z1, dx1 * f0 - dx0 * f1
|
||||
|
||||
GSVector4 _s = dxy01c * dv[1].t.xxxx(dv[0].t); // dx0 * s1, dy0 * s1, dx1 * s0, dy1 * s0
|
||||
GSVector4 _t = dxy01c * dv[1].t.yyyy(dv[0].t); // dx0 * t1, dy0 * t1, dx1 * t0, dy1 * t0
|
||||
GSVector4 _q = dxy01c * dv[1].t.zzzz(dv[0].t); // dx0 * q1, dy0 * q1, dx1 * q0, dy1 * q0
|
||||
|
||||
dscan.t = _s.ywyw(_t).hsub(_q.ywyw()); // dy0 * s1 - dy1 * s0, dy0 * t1 - dy1 * t0, dy0 * q1 - dy1 * q0
|
||||
dedge.t = _s.zxzx(_t).hsub(_q.zxzx()); // dx1 * s0 - dx0 * s1, dx1 * t0 - dx0 * t1, dx1 * q0 - dx0 * q1
|
||||
|
||||
GSVector4 _r = dxy01c * dv[1].c.xxxx(dv[0].c); // dx0 * r1, dy0 * r1, dx1 * r0, dy1 * r0
|
||||
GSVector4 _g = dxy01c * dv[1].c.yyyy(dv[0].c); // dx0 * g1, dy0 * g1, dx1 * g0, dy1 * g0
|
||||
GSVector4 _b = dxy01c * dv[1].c.zzzz(dv[0].c); // dx0 * b1, dy0 * b1, dx1 * b0, dy1 * b0
|
||||
GSVector4 _a = dxy01c * dv[1].c.wwww(dv[0].c); // dx0 * a1, dy0 * a1, dx1 * a0, dy1 * a0
|
||||
|
||||
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
|
||||
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
|
||||
dedge.p = dv[0].p * dxy01c.zzzz() - dv[1].p * dxy01c.xxxx();
|
||||
dedge.t = dv[0].t * dxy01c.zzzz() - dv[1].t * dxy01c.xxxx();
|
||||
dedge.c = dv[0].c * dxy01c.zzzz() - dv[1].c * dxy01c.xxxx();
|
||||
|
||||
if(m1 & 1)
|
||||
{
|
||||
|
@ -555,7 +549,13 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
|||
scan.t = edge.t + dedge.t * dy;
|
||||
scan.c = edge.c + dedge.c * dy;
|
||||
|
||||
AddScanline(e++, pixels, left, top, scan + dscan * (l - p0).xxxx());
|
||||
GSVector4 prestep = (l - p0).xxxx();
|
||||
|
||||
scan.p += dscan.p * prestep;
|
||||
scan.t += dscan.t * prestep;
|
||||
scan.c += dscan.c * prestep;
|
||||
|
||||
AddScanline(e++, pixels, left, top, scan);
|
||||
}
|
||||
|
||||
top++;
|
||||
|
@ -904,11 +904,20 @@ void GSRasterizer::Flush(const GSVertexSW* vertex, const uint32* index, const GS
|
|||
|
||||
//
|
||||
|
||||
GSRasterizerList::GSRasterizerList()
|
||||
: GSJobQueue<shared_ptr<GSRasterizerData> >()
|
||||
, m_sync_count(0)
|
||||
, m_syncpoint_count(0)
|
||||
GSRasterizerList::GSRasterizerList(int threads, GSPerfMon* perfmon)
|
||||
: m_perfmon(perfmon)
|
||||
{
|
||||
m_scanline = (uint8*)_aligned_malloc((2048 >> THREAD_HEIGHT) + 16, 64);
|
||||
|
||||
int row = 0;
|
||||
|
||||
while(row < (2048 >> THREAD_HEIGHT))
|
||||
{
|
||||
for(int i = 0; i < threads; i++, row++)
|
||||
{
|
||||
m_scanline[row] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSRasterizerList::~GSRasterizerList()
|
||||
|
@ -917,31 +926,49 @@ GSRasterizerList::~GSRasterizerList()
|
|||
{
|
||||
delete *i;
|
||||
}
|
||||
|
||||
_aligned_free(m_scanline);
|
||||
}
|
||||
|
||||
void GSRasterizerList::Queue(shared_ptr<GSRasterizerData> data)
|
||||
{
|
||||
// disable dispatcher thread for now and pass-through directly,
|
||||
// would only be relevant if data->syncpoint was utilized more,
|
||||
// it would hide the syncing latency from the main gs thread
|
||||
GSVector4i r = data->bbox.rintersect(data->scissor);
|
||||
|
||||
// Push(data);
|
||||
ASSERT(r.top >= 0 && r.top < 2048 && r.bottom >= 0 && r.bottom < 2048);
|
||||
|
||||
Process(data); m_count++;
|
||||
int top = r.top >> THREAD_HEIGHT;
|
||||
int bottom = std::min<int>((r.bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT, top + m_workers.size());
|
||||
|
||||
while(top < bottom)
|
||||
{
|
||||
m_workers[m_scanline[top++]]->Push(data);
|
||||
}
|
||||
}
|
||||
|
||||
void GSRasterizerList::Sync()
|
||||
{
|
||||
if(GetCount() == 0) return;
|
||||
if(!IsSynced())
|
||||
{
|
||||
for(size_t i = 0; i < m_workers.size(); i++)
|
||||
{
|
||||
m_workers[i]->Wait();
|
||||
}
|
||||
|
||||
Wait(); // first dispatch all items to workers
|
||||
m_perfmon->Put(GSPerfMon::SyncPoint, 1);
|
||||
}
|
||||
}
|
||||
|
||||
bool GSRasterizerList::IsSynced() const
|
||||
{
|
||||
for(size_t i = 0; i < m_workers.size(); i++)
|
||||
{
|
||||
m_workers[i]->Wait(); // then wait all workers to finish their jobs
|
||||
if(!m_workers[i]->IsEmpty())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
m_sync_count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
int GSRasterizerList::GetPixels(bool reset)
|
||||
|
@ -956,24 +983,6 @@ int GSRasterizerList::GetPixels(bool reset)
|
|||
return pixels;
|
||||
}
|
||||
|
||||
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
|
||||
{
|
||||
if(item->syncpoint)
|
||||
{
|
||||
for(size_t i = 0; i < m_workers.size(); i++)
|
||||
{
|
||||
m_workers[i]->Wait();
|
||||
}
|
||||
|
||||
m_syncpoint_count++;
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < m_workers.size(); i++)
|
||||
{
|
||||
m_workers[i]->Push(item);
|
||||
}
|
||||
}
|
||||
|
||||
// GSRasterizerList::GSWorker
|
||||
|
||||
GSRasterizerList::GSWorker::GSWorker(GSRasterizer* r)
|
||||
|
@ -994,16 +1003,6 @@ int GSRasterizerList::GSWorker::GetPixels(bool reset)
|
|||
return m_r->GetPixels(reset);
|
||||
}
|
||||
|
||||
void GSRasterizerList::GSWorker::Push(const shared_ptr<GSRasterizerData>& item)
|
||||
{
|
||||
GSVector4i r = item->bbox.rintersect(item->scissor);
|
||||
|
||||
if(m_r->IsOneOfMyScanlines(r.top, r.bottom))
|
||||
{
|
||||
GSJobQueue<shared_ptr<GSRasterizerData> >::Push(item);
|
||||
}
|
||||
}
|
||||
|
||||
void GSRasterizerList::GSWorker::Process(shared_ptr<GSRasterizerData>& item)
|
||||
{
|
||||
m_r->Draw(item.get());
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
|
||||
__aligned(class, 32) GSRasterizerData : public GSAlignedClass<32>
|
||||
{
|
||||
static int s_counter;
|
||||
|
||||
public:
|
||||
GSVector4i scissor;
|
||||
GSVector4i bbox;
|
||||
|
@ -39,8 +41,10 @@ public:
|
|||
int vertex_count;
|
||||
uint32* index;
|
||||
int index_count;
|
||||
bool syncpoint;
|
||||
uint64 frame;
|
||||
uint64 start;
|
||||
int pixels;
|
||||
int counter;
|
||||
|
||||
GSRasterizerData()
|
||||
: scissor(GSVector4i::zero())
|
||||
|
@ -51,9 +55,11 @@ public:
|
|||
, vertex_count(0)
|
||||
, index(NULL)
|
||||
, index_count(0)
|
||||
, syncpoint(false)
|
||||
, frame(0)
|
||||
, start(0)
|
||||
, pixels(0)
|
||||
{
|
||||
counter = s_counter++;
|
||||
}
|
||||
|
||||
virtual ~GSRasterizerData()
|
||||
|
@ -109,6 +115,7 @@ public:
|
|||
|
||||
virtual void Queue(shared_ptr<GSRasterizerData> data) = 0;
|
||||
virtual void Sync() = 0;
|
||||
virtual bool IsSynced() const = 0;
|
||||
virtual int GetPixels(bool reset = true) = 0;
|
||||
};
|
||||
|
||||
|
@ -119,7 +126,7 @@ protected:
|
|||
IDrawScanline* m_ds;
|
||||
int m_id;
|
||||
int m_threads;
|
||||
uint8* m_myscanline;
|
||||
uint8* m_scanline;
|
||||
GSVector4i m_scissor;
|
||||
GSVector4 m_fscissor_x;
|
||||
GSVector4 m_fscissor_y;
|
||||
|
@ -155,12 +162,12 @@ public:
|
|||
|
||||
void Queue(shared_ptr<GSRasterizerData> data);
|
||||
void Sync() {}
|
||||
bool IsSynced() const {return true;}
|
||||
int GetPixels(bool reset);
|
||||
};
|
||||
|
||||
class GSRasterizerList
|
||||
: public IRasterizer
|
||||
, private GSJobQueue<shared_ptr<GSRasterizerData> >
|
||||
{
|
||||
protected:
|
||||
class GSWorker : public GSJobQueue<shared_ptr<GSRasterizerData> >
|
||||
|
@ -175,17 +182,14 @@ protected:
|
|||
|
||||
// GSJobQueue
|
||||
|
||||
void Push(const shared_ptr<GSRasterizerData>& item);
|
||||
void Process(shared_ptr<GSRasterizerData>& item);
|
||||
};
|
||||
|
||||
GSPerfMon* m_perfmon;
|
||||
vector<GSWorker*> m_workers;
|
||||
uint8* m_scanline;
|
||||
|
||||
GSRasterizerList();
|
||||
|
||||
// GSJobQueue
|
||||
|
||||
void Process(shared_ptr<GSRasterizerData>& item);
|
||||
GSRasterizerList(int threads, GSPerfMon* perfmon);
|
||||
|
||||
public:
|
||||
virtual ~GSRasterizerList();
|
||||
|
@ -200,7 +204,7 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
GSRasterizerList* rl = new GSRasterizerList();
|
||||
GSRasterizerList* rl = new GSRasterizerList(threads, perfmon);
|
||||
|
||||
for(int i = 0; i < threads; i++)
|
||||
{
|
||||
|
@ -211,12 +215,10 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
int m_sync_count;
|
||||
int m_syncpoint_count;
|
||||
|
||||
// IRasterizer
|
||||
|
||||
void Queue(shared_ptr<GSRasterizerData> data);
|
||||
void Sync();
|
||||
bool IsSynced() const;
|
||||
int GetPixels(bool reset);
|
||||
};
|
||||
|
|
|
@ -22,9 +22,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSRenderer.h"
|
||||
|
||||
GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride)
|
||||
: GSState(vt, vertex_stride)
|
||||
, m_dev(NULL)
|
||||
GSRenderer::GSRenderer()
|
||||
: m_dev(NULL)
|
||||
, m_shader(0)
|
||||
, m_shift_key(false)
|
||||
, m_control_key(false)
|
||||
|
@ -38,12 +37,6 @@ GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride)
|
|||
m_aa1 = !!theApp.GetConfig("aa1", 0);
|
||||
m_mipmap = !!theApp.GetConfig("mipmap", 1);
|
||||
m_fxaa = !!theApp.GetConfig("fxaa", 0);
|
||||
|
||||
s_n = 0;
|
||||
s_dump = !!theApp.GetConfig("dump", 0);
|
||||
s_save = !!theApp.GetConfig("save", 0);
|
||||
s_savez = !!theApp.GetConfig("savez", 0);
|
||||
s_saven = theApp.GetConfig("saven", 0);
|
||||
}
|
||||
|
||||
GSRenderer::~GSRenderer()
|
||||
|
@ -259,7 +252,7 @@ bool GSRenderer::Merge(int field)
|
|||
{
|
||||
int field2 = 1 - ((m_interlace - 1) & 1);
|
||||
int mode = (m_interlace - 1) >> 1;
|
||||
|
||||
|
||||
m_dev->Interlace(ds, field ^ field2, mode, tex[1] ? tex[1]->GetScale().y : tex[0]->GetScale().y);
|
||||
}
|
||||
|
||||
|
@ -306,6 +299,8 @@ void GSRenderer::VSync(int field)
|
|||
ResetDevice();
|
||||
}
|
||||
|
||||
m_dev->AgePool();
|
||||
|
||||
// osd
|
||||
|
||||
if((m_perfmon.GetFrame() & 0x1f) == 0)
|
||||
|
@ -334,7 +329,7 @@ void GSRenderer::VSync(int field)
|
|||
s2.c_str(),
|
||||
theApp.m_gs_interlace[m_interlace].name.c_str(),
|
||||
theApp.m_gs_aspectratio[m_aspectratio].name.c_str(),
|
||||
(int)m_perfmon.Get(GSPerfMon::Quad),
|
||||
(int)m_perfmon.Get(GSPerfMon::SyncPoint),
|
||||
(int)m_perfmon.Get(GSPerfMon::Prim),
|
||||
(int)m_perfmon.Get(GSPerfMon::Draw),
|
||||
m_perfmon.CPU(),
|
||||
|
|
|
@ -55,14 +55,8 @@ public:
|
|||
GSWnd m_wnd;
|
||||
GSDevice* m_dev;
|
||||
|
||||
int s_n;
|
||||
bool s_dump;
|
||||
bool s_save;
|
||||
bool s_savez;
|
||||
int s_saven;
|
||||
|
||||
public:
|
||||
GSRenderer(GSVertexTrace* vt, size_t vertex_stride);
|
||||
GSRenderer();
|
||||
virtual ~GSRenderer();
|
||||
|
||||
virtual bool CreateWnd(const string& title, int w, int h);
|
||||
|
|
|
@ -22,18 +22,28 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSRendererCS.h"
|
||||
|
||||
#define PS_BATCH_SIZE 512
|
||||
|
||||
GSRendererCS::GSRendererCS()
|
||||
: GSRenderer(new GSVertexTraceCS(this), sizeof(GSVertex))
|
||||
: GSRenderer()
|
||||
{
|
||||
m_nativeres = true;
|
||||
|
||||
InitConvertVertex(GSRendererCS);
|
||||
|
||||
memset(m_vm_valid, 0, sizeof(m_vm_valid));
|
||||
|
||||
memset(m_texture, 0, sizeof(m_texture));
|
||||
|
||||
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
|
||||
}
|
||||
|
||||
GSRendererCS::~GSRendererCS()
|
||||
{
|
||||
for(int i = 0; i < countof(m_texture); i++)
|
||||
{
|
||||
delete m_texture[i];
|
||||
}
|
||||
|
||||
_aligned_free(m_output);
|
||||
}
|
||||
|
||||
bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
||||
|
@ -41,27 +51,157 @@ bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
|||
if(!__super::CreateDevice(dev_unk))
|
||||
return false;
|
||||
|
||||
HRESULT hr;
|
||||
|
||||
D3D11_DEPTH_STENCIL_DESC dsd;
|
||||
D3D11_BLEND_DESC bsd;
|
||||
D3D11_SAMPLER_DESC sd;
|
||||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_TEXTURE2D_DESC td;
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
|
||||
D3D_FEATURE_LEVEL level;
|
||||
|
||||
((GSDeviceDX*)dev_unk)->GetFeatureLevel(level);
|
||||
|
||||
if(level < D3D_FEATURE_LEVEL_10_0)
|
||||
if(level < D3D_FEATURE_LEVEL_11_0)
|
||||
return false;
|
||||
|
||||
HRESULT hr;
|
||||
|
||||
GSDevice11* dev = (GSDevice11*)dev_unk;
|
||||
|
||||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
// empty depth stencil state
|
||||
|
||||
memset(&dsd, 0, sizeof(dsd));
|
||||
|
||||
dsd.StencilEnable = false;
|
||||
dsd.DepthEnable = false;
|
||||
|
||||
hr = (*dev)->CreateDepthStencilState(&dsd, &m_dss);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// empty blend state
|
||||
|
||||
memset(&bsd, 0, sizeof(bsd));
|
||||
|
||||
bsd.RenderTarget[0].BlendEnable = false;
|
||||
|
||||
hr = (*dev)->CreateBlendState(&bsd, &m_bs);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// point sampler
|
||||
|
||||
memset(&sd, 0, sizeof(sd));
|
||||
|
||||
sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
|
||||
|
||||
sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
|
||||
sd.MaxLOD = FLT_MAX;
|
||||
sd.MaxAnisotropy = 16;
|
||||
sd.ComparisonFunc = D3D11_COMPARISON_NEVER;
|
||||
|
||||
hr = (*dev)->CreateSamplerState(&sd, &m_ss);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// link buffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = 256 << 20; // 256 MB w00t
|
||||
bd.StructureByteStride = sizeof(uint32) * 4; // c, z, id, next
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_lb);
|
||||
|
||||
{
|
||||
uint32 data[] = {0, 0, 0xffffffff, 0};
|
||||
|
||||
D3D11_BOX box;
|
||||
memset(&box, 0, sizeof(box));
|
||||
box.right = sizeof(data);
|
||||
box.bottom = 1;
|
||||
box.back = 1;
|
||||
|
||||
ctx->UpdateSubresource(m_lb, 0, &box, data, 0, 0);
|
||||
}
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_UNKNOWN;
|
||||
uavd.Buffer.NumElements = bd.ByteWidth / bd.StructureByteStride;
|
||||
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_COUNTER;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_lb, &uavd, &m_lb_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvd.Buffer.NumElements = bd.ByteWidth / bd.StructureByteStride;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_lb, &srvd, &m_lb_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// start offset buffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(uint32) * 2048 * 2048; // index
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_sob);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavd.Buffer.NumElements = bd.ByteWidth / sizeof(uint32);
|
||||
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_sob, &uavd, &m_sob_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
srvd.BufferEx.NumElements = bd.ByteWidth / sizeof(uint32);
|
||||
srvd.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_sob, &srvd, &m_sob_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
const uint32 tmp = 0;
|
||||
|
||||
ctx->ClearUnorderedAccessViewUint(m_sob_uav, &tmp); // initial clear, next time Draw should restore it in Step 2
|
||||
|
||||
// video memory (4MB)
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = 4 * 1024 * 1024;
|
||||
bd.StructureByteStride = 4;
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
|
||||
|
@ -81,35 +221,32 @@ bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
|||
hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
/*
|
||||
memset(&td, 0, sizeof(td));
|
||||
|
||||
// vertex buffer
|
||||
td.Width = PAGE_SIZE;
|
||||
td.Height = MAX_PAGES;
|
||||
td.Format = DXGI_FORMAT_R8_UINT;
|
||||
td.MipLevels = 1;
|
||||
td.ArraySize = 1;
|
||||
td.SampleDesc.Count = 1;
|
||||
td.SampleDesc.Quality = 0;
|
||||
td.Usage = D3D11_USAGE_DEFAULT;
|
||||
td.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(GSVertex) * 10000;
|
||||
bd.StructureByteStride = sizeof(GSVertex);
|
||||
bd.Usage = D3D11_USAGE_DYNAMIC;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vb);
|
||||
hr = (*dev)->CreateTexture2D(&td, NULL, &m_vm);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// index buffer
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
uavd.Format = DXGI_FORMAT_R8_UINT;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
|
||||
|
||||
bd.ByteWidth = sizeof(uint32) * 10000 * 3;
|
||||
bd.Usage = D3D11_USAGE_DYNAMIC;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_ib);
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
*/
|
||||
// one page, for copying between cpu<->gpu
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
@ -121,219 +258,429 @@ bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
|||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_pb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
/*
|
||||
memset(&td, 0, sizeof(td));
|
||||
|
||||
td.Width = PAGE_SIZE;
|
||||
td.Height = 1;
|
||||
td.Format = DXGI_FORMAT_R8_UINT;
|
||||
td.MipLevels = 1;
|
||||
td.ArraySize = 1;
|
||||
td.SampleDesc.Count = 1;
|
||||
td.SampleDesc.Quality = 0;
|
||||
td.Usage = D3D11_USAGE_STAGING;
|
||||
td.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
|
||||
|
||||
hr = (*dev)->CreateTexture2D(&td, NULL, &m_pb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
*/
|
||||
// VSConstantBuffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(VSConstantBuffer);
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vs_cb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// PS
|
||||
|
||||
D3D11_SHADER_MACRO macro[] =
|
||||
{
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
hr = dev->CompileShader(IDR_CS_FX, "ps_main0", macro, &m_ps0);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// PSConstantBuffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(PSConstantBuffer);
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_ps_cb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
//
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSRendererCS::ResetDevice()
|
||||
{
|
||||
for(int i = 0; i < countof(m_texture); i++)
|
||||
{
|
||||
delete m_texture[i];
|
||||
|
||||
m_texture[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererCS::VSync(int field)
|
||||
{
|
||||
__super::VSync(field);
|
||||
|
||||
//printf("%lld\n", m_perfmon.GetFrame());
|
||||
}
|
||||
|
||||
GSTexture* GSRendererCS::GetOutput(int i)
|
||||
{
|
||||
// TODO: create a compute shader which unswizzles the frame from m_vm to the output texture
|
||||
|
||||
return NULL;
|
||||
}
|
||||
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererCS::ConvertVertex(size_t dst_index, size_t src_index)
|
||||
{
|
||||
// TODO: vertex format more fitting as the input for the compute shader
|
||||
int w = DISPFB.FBW * 64;
|
||||
int h = GetFrameRect(i).bottom;
|
||||
|
||||
if(src_index != dst_index)
|
||||
// TODO: round up bottom
|
||||
|
||||
if(m_dev->ResizeTexture(&m_texture[i], w, h))
|
||||
{
|
||||
GSVertex v = ((GSVertex*)m_vertex.buff)[src_index];
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM];
|
||||
|
||||
((GSVertex*)m_vertex.buff)[dst_index] = v;
|
||||
GSVector4i r(0, 0, w, h);
|
||||
GSVector4i r2 = r.ralign<Align_Outside>(psm.bs);
|
||||
|
||||
GSOffset* o = m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM);
|
||||
|
||||
Read(o, r2, false);
|
||||
|
||||
(m_mem.*psm.rtx)(o, r2, m_output, 1024 * 4, m_env.TEXA);
|
||||
|
||||
m_texture[i]->Update(r, m_output, 1024 * 4);
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), (int)DISPFB.PSM));
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
}
|
||||
|
||||
return m_texture[i];
|
||||
}
|
||||
|
||||
void GSRendererCS::Draw()
|
||||
{
|
||||
HRESULT hr;
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
GSDrawingContext* context = m_context;
|
||||
|
||||
GSVector2i rtsize(2048, 2048);
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
||||
uint32 fm = context->FRAME.FBMSK;
|
||||
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
|
||||
if(fm != 0xffffffff)
|
||||
{
|
||||
Write(context->offset.fb, r);
|
||||
|
||||
// TODO: m_tc->InvalidateVideoMem(context->offset.fb, r, false);
|
||||
}
|
||||
|
||||
if(zm != 0xffffffff)
|
||||
{
|
||||
Write(context->offset.zb, r);
|
||||
|
||||
// TODO: m_tc->InvalidateVideoMem(context->offset.zb, r, false);
|
||||
}
|
||||
|
||||
// TODO: if(24-bit) fm/zm |= 0xff000000;
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
|
||||
|
||||
// TODO: unswizzle pages of r to a texture, check m_vm_valid, bit not set cpu->gpu, set gpu->gpu
|
||||
|
||||
// TODO: Write transfer should directly write to m_vm, then Read/Write syncing won't be necessary, clut must be updated with the gpu also
|
||||
|
||||
// TODO: tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
|
||||
|
||||
// if(!tex) return;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
D3D11_MAPPED_SUBRESOURCE map;
|
||||
|
||||
CComPtr<ID3D11ShaderResourceView> vb_srv;
|
||||
CComPtr<ID3D11ShaderResourceView> ib_srv;
|
||||
|
||||
// TODO: cache these in hash_maps
|
||||
|
||||
CComPtr<ID3D11Buffer> fbr, fbc, zbr, zbc;
|
||||
CComPtr<ID3D11ShaderResourceView> fbr_srv, fbc_srv, zbr_srv, zbc_srv;
|
||||
|
||||
// TODO: grow m_vb, m_ib if needed
|
||||
|
||||
if(m_vertex.next > 10000) return;
|
||||
if(m_index.tail > 30000) return;
|
||||
|
||||
// TODO: fill/advance/discardwhenfull, as in GSDevice11::IASetVertexBuffer/IASetIndexBuffer
|
||||
|
||||
hr = ctx->Map(m_vb, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); // discarding, until properly advancing the start pointer around
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
|
||||
memcpy(map.pData, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
|
||||
|
||||
ctx->Unmap(m_vb, 0);
|
||||
|
||||
//
|
||||
|
||||
hr = ctx->Map(m_ib, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); // discarding, until properly advancing the start pointer around
|
||||
dev->BeginScene();
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
// SetupOM
|
||||
|
||||
memcpy(map.pData, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
|
||||
ctx->Unmap(m_ib, 0);
|
||||
|
||||
// TODO: UpdateResource might be faster, based on my exprience with the real vertex buffer, write-no-overwrite/discarded dynamic buffer + map is better
|
||||
|
||||
//
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
srvd.Buffer.FirstElement = 0;
|
||||
srvd.Buffer.NumElements = m_vertex.next;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_vb, &srvd, &vb_srv); // TODO: have to create this dyncamically in Draw() or pass the start/count in a const reg
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_R32_UINT;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
srvd.Buffer.FirstElement = 0;
|
||||
srvd.Buffer.NumElements = m_index.tail;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_ib, &srvd, &ib_srv); // TODO: have to create this dyncamically in Draw() or pass the start/count in a const reg
|
||||
|
||||
// fzb offsets
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(int) * 4096;
|
||||
bd.StructureByteStride = sizeof(int);
|
||||
bd.Usage = D3D11_USAGE_IMMUTABLE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
|
||||
D3D11_SUBRESOURCE_DATA data;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
|
||||
data.pSysMem = m_context->offset.fb->pixel.row;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &fbr);
|
||||
|
||||
data.pSysMem = m_context->offset.fb->pixel.col[0]; // same column layout for every line in case of frame and zbuffer formats
|
||||
dev->OMSetDepthStencilState(m_dss, 0);
|
||||
dev->OMSetBlendState(m_bs, 0);
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &fbc);
|
||||
ID3D11UnorderedAccessView* uavs[] = {m_vm_uav, m_lb_uav, m_sob_uav};
|
||||
uint32 counters[] = {1, 0, 0};
|
||||
|
||||
data.pSysMem = m_context->offset.zb->pixel.row;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &zbr);
|
||||
dev->OMSetRenderTargets(rtsize, countof(uavs), uavs, counters, &scissor);
|
||||
|
||||
data.pSysMem = m_context->offset.zb->pixel.col[0]; // same column layout for every line in case of frame and zbuffer formats
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &zbc);
|
||||
// SetupIA
|
||||
|
||||
// TODO: D3D10_SHADER_MACRO (primclass, less frequently changing drawing attribs, etc.)
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology;
|
||||
|
||||
uint32 sel = 0; // TODO
|
||||
|
||||
hash_map<uint32, CComPtr<ID3D11ComputeShader> >::iterator i = m_cs.find(sel);
|
||||
|
||||
CComPtr<ID3D11ComputeShader> cs;
|
||||
|
||||
if(i == m_cs.end())
|
||||
switch(m_vt.m_primclass)
|
||||
{
|
||||
// hr = dev->CompileShader(IDR_CS_FX, "cs_main", NULL, &cs);
|
||||
hr = dev->CompileShader("E:\\Progs\\pcsx2\\plugins\\GSdx\\res\\cs.fx", "cs_main", NULL, &cs);
|
||||
case GS_POINT_CLASS:
|
||||
topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
GSVector4i r2 = bbox.add32(GSVector4i(-1, -1, 1, 1)).rintersect(scissor);
|
||||
|
||||
m_cs[sel] = cs;
|
||||
m_vertex.buff[m_vertex.next + 0].XYZ.X = context->XYOFFSET.OFX + (r2.left << 4);
|
||||
m_vertex.buff[m_vertex.next + 0].XYZ.Y = context->XYOFFSET.OFY + (r2.top << 4);
|
||||
m_vertex.buff[m_vertex.next + 1].XYZ.X = context->XYOFFSET.OFX + (r2.right << 4);
|
||||
m_vertex.buff[m_vertex.next + 1].XYZ.Y = context->XYOFFSET.OFY + (r2.bottom << 4);
|
||||
|
||||
m_index.buff[m_index.tail + 0] = m_vertex.next + 0;
|
||||
m_index.buff[m_index.tail + 1] = m_vertex.next + 1;
|
||||
|
||||
dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertex), m_vertex.next + 2);
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail + 2);
|
||||
|
||||
// SetupVS
|
||||
|
||||
VSSelector vs_sel;
|
||||
|
||||
vs_sel.tme = PRIM->TME;
|
||||
vs_sel.fst = PRIM->FST;
|
||||
|
||||
VSConstantBuffer vs_cb;
|
||||
|
||||
float sx = 2.0f / (rtsize.x << 4);
|
||||
float sy = 2.0f / (rtsize.y << 4);
|
||||
//float sx = 1.0f / 16;
|
||||
//float sy = 1.0f / 16;
|
||||
float ox = (float)(int)context->XYOFFSET.OFX;
|
||||
float oy = (float)(int)context->XYOFFSET.OFY;
|
||||
|
||||
vs_cb.VertexScale = GSVector4(sx, -sy, 0.0f, 0.0f);
|
||||
vs_cb.VertexOffset = GSVector4(ox * sx + 1, -(oy * sy + 1), 0.0f, -1.0f);
|
||||
//vs_cb.VertexScale = GSVector4(sx, sy, 0.0f, 0.0f);
|
||||
//vs_cb.VertexOffset = GSVector4(ox * sx, oy * sy, 0.0f, -1.0f);
|
||||
|
||||
{
|
||||
GSVertexShader11 vs;
|
||||
|
||||
hash_map<uint32, GSVertexShader11>::const_iterator i = m_vs.find(vs_sel);
|
||||
|
||||
if(i != m_vs.end())
|
||||
{
|
||||
vs = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
string str[2];
|
||||
|
||||
str[0] = format("%d", vs_sel.tme);
|
||||
str[1] = format("%d", vs_sel.fst);
|
||||
|
||||
D3D11_SHADER_MACRO macro[] =
|
||||
{
|
||||
{"VS_TME", str[0].c_str()},
|
||||
{"VS_FST", str[1].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
D3D11_INPUT_ELEMENT_DESC layout[] =
|
||||
{
|
||||
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
|
||||
dev->CompileShader(IDR_CS_FX, "vs_main", macro, &vs.vs, layout, countof(layout), &vs.il);
|
||||
|
||||
m_vs[vs_sel] = vs;
|
||||
}
|
||||
|
||||
ctx->UpdateSubresource(m_vs_cb, 0, NULL, &vs_cb, 0, 0); // TODO: only update if changed
|
||||
|
||||
dev->VSSetShader(vs.vs, m_vs_cb);
|
||||
|
||||
dev->IASetInputLayout(vs.il);
|
||||
}
|
||||
|
||||
// SetupGS
|
||||
|
||||
GSSelector gs_sel;
|
||||
|
||||
gs_sel.iip = PRIM->IIP;
|
||||
|
||||
CComPtr<ID3D11GeometryShader> gs[2];
|
||||
|
||||
for(int j = 0; j < 2; j++)
|
||||
{
|
||||
gs_sel.prim = j == 0 ? m_vt.m_primclass : GS_SPRITE_CLASS;
|
||||
|
||||
hash_map<uint32, CComPtr<ID3D11GeometryShader> >::const_iterator i = m_gs.find(gs_sel);
|
||||
|
||||
if(i != m_gs.end())
|
||||
{
|
||||
gs[j] = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
string str[2];
|
||||
|
||||
str[0] = format("%d", gs_sel.iip);
|
||||
str[1] = format("%d", j == 0 ? gs_sel.prim : GS_SPRITE_CLASS);
|
||||
|
||||
D3D11_SHADER_MACRO macro[] =
|
||||
{
|
||||
{"GS_IIP", str[0].c_str()},
|
||||
{"GS_PRIM", str[1].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
dev->CompileShader(IDR_CS_FX, "gs_main", macro, &gs[j]);
|
||||
|
||||
m_gs[gs_sel] = gs[j];
|
||||
}
|
||||
}
|
||||
|
||||
// SetupPS
|
||||
|
||||
dev->PSSetSamplerState(m_ss, NULL, NULL);
|
||||
|
||||
PSSelector ps_sel;
|
||||
|
||||
ps_sel.fpsm = context->FRAME.PSM;
|
||||
ps_sel.zpsm = context->ZBUF.PSM;
|
||||
|
||||
CComPtr<ID3D11PixelShader> ps[2] = {m_ps0, NULL};
|
||||
|
||||
hash_map<uint32, CComPtr<ID3D11PixelShader> >::const_iterator i = m_ps1.find(ps_sel);
|
||||
|
||||
if(i != m_ps1.end())
|
||||
{
|
||||
ps[1] = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
cs = i->second;
|
||||
string str[15];
|
||||
|
||||
str[0] = format("%d", PS_BATCH_SIZE);
|
||||
str[1] = format("%d", context->FRAME.PSM);
|
||||
str[2] = format("%d", context->ZBUF.PSM);
|
||||
|
||||
D3D11_SHADER_MACRO macro[] =
|
||||
{
|
||||
{"PS_BATCH_SIZE", str[0].c_str()},
|
||||
{"PS_FPSM", str[1].c_str()},
|
||||
{"PS_ZPSM", str[2].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
dev->CompileShader(IDR_CS_FX, "ps_main1", macro, &ps[1]);
|
||||
|
||||
m_ps1[ps_sel] = ps[1];
|
||||
}
|
||||
|
||||
PSConstantBuffer ps_cb;
|
||||
|
||||
ps_cb.fm = fm;
|
||||
ps_cb.zm = zm;
|
||||
|
||||
ctx->UpdateSubresource(m_ps_cb, 0, NULL, &ps_cb, 0, 0); // TODO: only update if changed
|
||||
|
||||
OffsetBuffer* fzbo = NULL;
|
||||
|
||||
//
|
||||
GetOffsetBuffer(&fzbo);
|
||||
|
||||
dev->CSSetShaderUAV(0, m_vm_uav);
|
||||
|
||||
dev->CSSetShaderSRV(0, vb_srv);
|
||||
dev->CSSetShaderSRV(1, ib_srv);
|
||||
dev->CSSetShaderSRV(2, fbr_srv);
|
||||
dev->CSSetShaderSRV(3, fbc_srv);
|
||||
dev->CSSetShaderSRV(4, zbr_srv);
|
||||
dev->CSSetShaderSRV(5, zbc_srv);
|
||||
|
||||
dev->CSSetShader(cs);
|
||||
dev->PSSetShaderResourceView(0, fzbo->row_srv);
|
||||
dev->PSSetShaderResourceView(1, fzbo->col_srv);
|
||||
// TODO: palette, texture
|
||||
|
||||
GSVector4i bbox = GSVector4i(0, 0, 640, 512); // TODO: vertex trace
|
||||
int step = PS_BATCH_SIZE * GSUtil::GetVertexCount(PRIM->PRIM);
|
||||
|
||||
GSVector4i r = bbox.ralign<Align_Outside>(GSVector2i(16, 8));
|
||||
for(int i = 0; i < m_index.tail; i += step)
|
||||
{
|
||||
dev->IASetPrimitiveTopology(topology);
|
||||
dev->GSSetShader(gs[0]);
|
||||
dev->PSSetShader(ps[0], m_ps_cb);
|
||||
dev->DrawIndexedPrimitive(i, std::min<int>(m_index.tail - i, step));
|
||||
|
||||
bool fb = true; // TODO: frame buffer used
|
||||
bool zb = true; // TODO: z-buffer used
|
||||
dev->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_LINELIST);
|
||||
dev->GSSetShader(gs[1]);
|
||||
dev->PSSetShader(ps[1], m_ps_cb);
|
||||
dev->DrawIndexedPrimitive(m_index.tail, 2);
|
||||
|
||||
if(fb) Write(m_context->offset.fb, r);
|
||||
if(zb) Write(m_context->offset.zb, r);
|
||||
//printf("%d/%d, %d %d %d %d\n", i, m_index.tail, r2.x, r2.y, r2.z, r2.w);
|
||||
}
|
||||
|
||||
// TODO: constant buffer (frequently chaning drawing attribs)
|
||||
// TODO: texture (implement texture cache)
|
||||
// TODO: clut to a palette texture (should be texture1d, not simply buffer, it is random accessed)
|
||||
// TODO: CSSetShaderSRV(6 7 8 ..., texture level 0 1 2 ...) or use Texture3D?
|
||||
// TODO: invalidate texture cache
|
||||
dev->EndScene();
|
||||
|
||||
/*
|
||||
CComPtr<ID3D11Query> q;
|
||||
if(0)
|
||||
{
|
||||
std::string s;
|
||||
/*
|
||||
s = format("c:\\temp1\\_%05d_f%lld_fb0_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), 0, 0);
|
||||
m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024);
|
||||
Read(m_mem.GetOffset(0, 16, PSM_PSMCT32), GSVector4i(0, 0, 1024, 1024), false);
|
||||
*/
|
||||
//
|
||||
if(fm != 0xffffffff) Read(context->offset.fb, r, false);
|
||||
//
|
||||
if(zm != 0xffffffff) Read(context->offset.zb, r, false);
|
||||
|
||||
D3D11_QUERY_DESC qd;
|
||||
memset(&qd, 0, sizeof(qd));
|
||||
qd.Query = D3D11_QUERY_EVENT;
|
||||
s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
|
||||
m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
|
||||
|
||||
hr = (*dev)->CreateQuery(&qd, &q);
|
||||
s = format("c:\\temp1\\_%05d_f%lld_zt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
|
||||
m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512);
|
||||
|
||||
ctx->Begin(q);
|
||||
*/
|
||||
|
||||
printf("[%lld] dispatch %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
|
||||
__rdtsc(),
|
||||
m_context->FRAME.Block(), m_context->FRAME.PSM,
|
||||
m_context->ZBUF.Block(), m_context->ZBUF.PSM,
|
||||
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH,
|
||||
PRIM->PRIM, m_vertex.next, m_index.tail);
|
||||
/*
|
||||
s = format("c:\\temp1\\_%05d_f%lld_fb1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), 0, 0);
|
||||
m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024);
|
||||
*/
|
||||
|
||||
GSVector4i rsize = r.rsize();
|
||||
|
||||
dev->Dispatch(rsize.z >> 4, rsize.w >> 3, 1); // TODO: pass upper-left corner offset (r.xy) in a const buffer
|
||||
|
||||
/*
|
||||
ctx->End(q);
|
||||
|
||||
uint64 t0 = __rdtsc();
|
||||
|
||||
BOOL b;
|
||||
|
||||
while(S_OK != ctx->GetData(q, &b, sizeof(BOOL), 0)) {}
|
||||
|
||||
printf("%lld\n", __rdtsc() - t0);
|
||||
*/
|
||||
s_n++;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererCS::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
{
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
Read(o, r, true); // TODO: fully overwritten pages are not needed to be read, only invalidated
|
||||
Read(o, r, true); // TODO: fully overwritten pages are not needed to be read, only invalidated (important)
|
||||
|
||||
// TODO: false deps, 8H/4HL/4HH texture sharing pages with 24-bit target
|
||||
// TODO: invalidate texture cache
|
||||
|
@ -356,6 +703,10 @@ void GSRendererCS::Write(GSOffset* o, const GSVector4i& r)
|
|||
|
||||
memset(&box, 0, sizeof(box));
|
||||
|
||||
box.right = 1;
|
||||
box.bottom = 1;
|
||||
box.back = 1;
|
||||
|
||||
uint32* pages = o->GetPages(r);
|
||||
|
||||
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
|
||||
|
@ -370,10 +721,20 @@ void GSRendererCS::Write(GSOffset* o, const GSVector4i& r)
|
|||
m_vm_valid[row] |= col;
|
||||
|
||||
box.left = page * PAGE_SIZE;
|
||||
box.right = box.left + PAGE_SIZE;
|
||||
box.right = (page + 1) * PAGE_SIZE;
|
||||
|
||||
ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + box.left, 0, 0);
|
||||
ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + page * PAGE_SIZE, 0, 0);
|
||||
/*
|
||||
// m_vm texture row is 2k in bytes, one page is 8k => starting row: addr / 4k, number of rows: 8k / 2k = 4
|
||||
|
||||
box.left = 0;
|
||||
box.right = PAGE_SIZE;
|
||||
box.top = page;
|
||||
box.bottom = box.top + 1;
|
||||
|
||||
ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + page * PAGE_SIZE, 0, 0);
|
||||
*/
|
||||
if(0)
|
||||
printf("[%lld] write %05x %d %d (%d)\n", __rdtsc(), o->bp, o->bw, o->psm, page);
|
||||
}
|
||||
}
|
||||
|
@ -391,6 +752,10 @@ void GSRendererCS::Read(GSOffset* o, const GSVector4i& r, bool invalidate)
|
|||
|
||||
memset(&box, 0, sizeof(box));
|
||||
|
||||
box.right = 1;
|
||||
box.bottom = 1;
|
||||
box.back = 1;
|
||||
|
||||
uint32* pages = o->GetPages(r);
|
||||
|
||||
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
|
||||
|
@ -402,21 +767,34 @@ void GSRendererCS::Read(GSOffset* o, const GSVector4i& r, bool invalidate)
|
|||
|
||||
if(m_vm_valid[row] & col)
|
||||
{
|
||||
if(invalidate) m_vm_valid[row] ^= col;
|
||||
if(invalidate)
|
||||
{
|
||||
m_vm_valid[row] ^= col;
|
||||
}
|
||||
|
||||
box.left = page * PAGE_SIZE;
|
||||
box.right = box.left + PAGE_SIZE;
|
||||
box.right = (page + 1) * PAGE_SIZE;
|
||||
|
||||
ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box);
|
||||
/*
|
||||
// m_vm texture row is 2k in bytes, one page is 8k => starting row: addr / 4k, number of rows: 8k / 2k = 4
|
||||
|
||||
box.left = 0;
|
||||
box.right = PAGE_SIZE;
|
||||
box.top = page;
|
||||
box.bottom = box.top + 1;
|
||||
|
||||
ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box);
|
||||
*/
|
||||
D3D11_MAPPED_SUBRESOURCE map;
|
||||
|
||||
if(SUCCEEDED(ctx->Map(m_pb, 0, D3D11_MAP_READ_WRITE, 0, &map)))
|
||||
if(SUCCEEDED(ctx->Map(m_pb, 0, D3D11_MAP_READ, 0, &map)))
|
||||
{
|
||||
memcpy(m_mem.m_vm8 + box.left, map.pData, PAGE_SIZE);
|
||||
memcpy(m_mem.m_vm8 + page * PAGE_SIZE, map.pData, PAGE_SIZE);
|
||||
|
||||
ctx->Unmap(m_pb, 0);
|
||||
|
||||
|
||||
if(0)
|
||||
printf("[%lld] read %05x %d %d (%d)\n", __rdtsc(), o->bp, o->bw, o->psm, page);
|
||||
}
|
||||
}
|
||||
|
@ -424,3 +802,64 @@ void GSRendererCS::Read(GSOffset* o, const GSVector4i& r, bool invalidate)
|
|||
|
||||
delete [] pages;
|
||||
}
|
||||
|
||||
bool GSRendererCS::GetOffsetBuffer(OffsetBuffer** fzbo)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
D3D11_SUBRESOURCE_DATA data;
|
||||
|
||||
hash_map<uint32, OffsetBuffer>::iterator i = m_offset.find(m_context->offset.fzb->hash);
|
||||
|
||||
if(i == m_offset.end())
|
||||
{
|
||||
OffsetBuffer ob;
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(GSVector2i) * 2048;
|
||||
bd.Usage = D3D11_USAGE_IMMUTABLE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
|
||||
data.pSysMem = m_context->offset.fzb->row;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &ob.row);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
data.pSysMem = m_context->offset.fzb->col;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &ob.col);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_R32G32_SINT;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
srvd.Buffer.FirstElement = 0;
|
||||
srvd.Buffer.NumElements = 2048;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(ob.row, &srvd, &ob.row_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(ob.col, &srvd, &ob.col_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
m_offset[m_context->offset.fzb->hash] = ob;
|
||||
|
||||
i = m_offset.find(m_context->offset.fzb->hash);
|
||||
}
|
||||
|
||||
*fzbo = &i->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -26,28 +26,114 @@
|
|||
|
||||
class GSRendererCS : public GSRenderer
|
||||
{
|
||||
class GSVertexTraceCS : public GSVertexTrace
|
||||
struct VSSelector
|
||||
{
|
||||
public:
|
||||
GSVertexTraceCS(const GSState* state) : GSVertexTrace(state) {}
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32 tme:1;
|
||||
uint32 fst:1;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
};
|
||||
|
||||
operator uint32() {return key & 0x3;}
|
||||
|
||||
VSSelector() : key(0) {}
|
||||
};
|
||||
|
||||
__aligned(struct, 32) VSConstantBuffer
|
||||
{
|
||||
GSVector4 VertexScale;
|
||||
GSVector4 VertexOffset;
|
||||
};
|
||||
|
||||
struct GSSelector
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32 iip:1;
|
||||
uint32 prim:2;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
};
|
||||
|
||||
operator uint32() {return key & 0x7;}
|
||||
|
||||
GSSelector() : key(0) {}
|
||||
};
|
||||
|
||||
struct PSSelector
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32 fpsm:6;
|
||||
uint32 zpsm:6;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
};
|
||||
|
||||
operator uint32() {return key & 0x3ff;}
|
||||
|
||||
PSSelector() : key(0) {}
|
||||
};
|
||||
|
||||
__aligned(struct, 32) PSConstantBuffer
|
||||
{
|
||||
uint32 fm;
|
||||
uint32 zm;
|
||||
};
|
||||
|
||||
CComPtr<ID3D11DepthStencilState> m_dss;
|
||||
CComPtr<ID3D11BlendState> m_bs;
|
||||
CComPtr<ID3D11SamplerState> m_ss;
|
||||
CComPtr<ID3D11Buffer> m_lb;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_lb_uav;
|
||||
CComPtr<ID3D11ShaderResourceView> m_lb_srv;
|
||||
CComPtr<ID3D11Buffer> m_sob;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_sob_uav;
|
||||
CComPtr<ID3D11ShaderResourceView> m_sob_srv;
|
||||
CComPtr<ID3D11Buffer> m_vm;
|
||||
//CComPtr<ID3D11Texture2D> m_vm;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_vm_uav;
|
||||
CComPtr<ID3D11Buffer> m_vb;
|
||||
CComPtr<ID3D11Buffer> m_ib;
|
||||
CComPtr<ID3D11Buffer> m_pb;
|
||||
hash_map<uint32, CComPtr<ID3D11ComputeShader> > m_cs;
|
||||
uint32 m_vm_valid[16];
|
||||
CComPtr<ID3D11Buffer> m_pb;
|
||||
//CComPtr<ID3D11Texture2D> m_pb;
|
||||
hash_map<uint32, GSVertexShader11 > m_vs;
|
||||
CComPtr<ID3D11Buffer> m_vs_cb;
|
||||
hash_map<uint32, CComPtr<ID3D11GeometryShader> > m_gs;
|
||||
CComPtr<ID3D11PixelShader> m_ps0;
|
||||
hash_map<uint32, CComPtr<ID3D11PixelShader> > m_ps1;
|
||||
CComPtr<ID3D11Buffer> m_ps_cb;
|
||||
|
||||
void Write(GSOffset* o, const GSVector4i& r);
|
||||
void Read(GSOffset* o, const GSVector4i& r, bool invalidate);
|
||||
|
||||
|
||||
struct OffsetBuffer
|
||||
{
|
||||
CComPtr<ID3D11Buffer> row, col;
|
||||
CComPtr<ID3D11ShaderResourceView> row_srv, col_srv;
|
||||
};
|
||||
|
||||
hash_map<uint32, OffsetBuffer> m_offset;
|
||||
|
||||
bool GetOffsetBuffer(OffsetBuffer** fzbo);
|
||||
|
||||
protected:
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
GSTexture* m_texture[2];
|
||||
uint8* m_output;
|
||||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
void ResetDevice();
|
||||
void VSync(int field);
|
||||
GSTexture* GetOutput(int i);
|
||||
void Draw();
|
||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
|
|
|
@ -23,10 +23,9 @@
|
|||
#include "GSRendererDX.h"
|
||||
#include "GSDeviceDX.h"
|
||||
|
||||
GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter)
|
||||
: GSRendererHW(vt, vertex_stride, tc)
|
||||
GSRendererDX::GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter)
|
||||
: GSRendererHW(tc)
|
||||
, m_pixelcenter(pixelcenter)
|
||||
, m_topology(-1)
|
||||
{
|
||||
m_logz = !!theApp.GetConfig("logz", 0);
|
||||
m_fba = !!theApp.GetConfig("fba", 1);
|
||||
|
@ -61,7 +60,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
|
||||
GSVector4 o = GSVector4(-1.0f, 1.0f);
|
||||
|
||||
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 dst = src * 2.0f + o.xxxx();
|
||||
|
||||
GSVertexPT1 vertices[] =
|
||||
|
@ -111,7 +110,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
|
||||
if(!IsOpaque())
|
||||
{
|
||||
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS;
|
||||
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
|
||||
|
||||
om_bsel.a = context->ALPHA.A;
|
||||
om_bsel.b = context->ALPHA.B;
|
||||
|
@ -154,11 +153,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
{
|
||||
if(context->ZBUF.PSM == PSM_PSMZ24)
|
||||
{
|
||||
if(m_vt->m_max.p.z > 0xffffff)
|
||||
if(m_vt.m_max.p.z > 0xffffff)
|
||||
{
|
||||
ASSERT(m_vt->m_min.p.z > 0xffffff);
|
||||
ASSERT(m_vt.m_min.p.z > 0xffffff);
|
||||
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
|
||||
if (m_vt->m_min.p.z > 0xffffff)
|
||||
if (m_vt.m_min.p.z > 0xffffff)
|
||||
{
|
||||
vs_sel.bppz = 1;
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
|
@ -167,11 +166,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
}
|
||||
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
|
||||
{
|
||||
if(m_vt->m_max.p.z > 0xffff)
|
||||
if(m_vt.m_max.p.z > 0xffff)
|
||||
{
|
||||
ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo
|
||||
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
|
||||
// Fixme : Same as above, I guess.
|
||||
if (m_vt->m_min.p.z > 0xffff)
|
||||
if (m_vt.m_min.p.z > 0xffff)
|
||||
{
|
||||
vs_sel.bppz = 2;
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
|
@ -213,7 +212,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
GSDeviceDX::GSSelector gs_sel;
|
||||
|
||||
gs_sel.iip = PRIM->IIP;
|
||||
gs_sel.prim = m_vt->m_primclass;
|
||||
gs_sel.prim = m_vt.m_primclass;
|
||||
|
||||
// ps
|
||||
|
||||
|
@ -233,7 +232,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
}
|
||||
}
|
||||
|
||||
if (env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
|
||||
if(env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
|
||||
{
|
||||
ps_sel.colclip = 1;
|
||||
}
|
||||
|
@ -281,7 +280,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
ps_sel.aem = env.TEXA.AEM;
|
||||
ps_sel.tfx = context->TEX0.TFX;
|
||||
ps_sel.tcc = context->TEX0.TCC;
|
||||
ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter;
|
||||
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
|
||||
ps_sel.rt = tex->m_target;
|
||||
|
||||
int w = tex->m_texture->GetWidth();
|
||||
|
@ -330,8 +329,9 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
|
||||
uint8 afix = context->ALPHA.FIX;
|
||||
|
||||
SetupIA();
|
||||
|
||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology);
|
||||
dev->SetupVS(vs_sel, &vs_cb);
|
||||
dev->SetupGS(gs_sel);
|
||||
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
|
||||
|
|
|
@ -32,13 +32,12 @@ class GSRendererDX : public GSRendererHW
|
|||
bool UserHacks_AlphaHack;
|
||||
|
||||
protected:
|
||||
int m_topology;
|
||||
|
||||
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
virtual void SetupIA() = 0;
|
||||
virtual void UpdateFBA(GSTexture* rt) {}
|
||||
|
||||
public:
|
||||
GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
|
||||
GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
|
||||
virtual ~GSRendererDX();
|
||||
|
||||
};
|
||||
|
|
|
@ -25,9 +25,8 @@
|
|||
#include "resource.h"
|
||||
|
||||
GSRendererDX11::GSRendererDX11()
|
||||
: GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
|
||||
: GSRendererDX(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
|
||||
{
|
||||
InitConvertVertex(GSRendererDX11);
|
||||
}
|
||||
|
||||
bool GSRendererDX11::CreateDevice(GSDevice* dev)
|
||||
|
@ -38,43 +37,38 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX11::ConvertVertex(size_t dst_index, size_t src_index)
|
||||
void GSRendererDX11::SetupIA()
|
||||
{
|
||||
GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
|
||||
GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
GSVector4i v0 = ((GSVector4i*)s)[0];
|
||||
GSVector4i v1 = ((GSVector4i*)s)[1];
|
||||
void* ptr = NULL;
|
||||
|
||||
if(tme && fst)
|
||||
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next))
|
||||
{
|
||||
// TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed
|
||||
GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
|
||||
|
||||
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
|
||||
dev->IAUnmapVertexBuffer();
|
||||
}
|
||||
|
||||
((GSVector4i*)d)[0] = v0;
|
||||
((GSVector4i*)d)[1] = v1;
|
||||
}
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
|
||||
|
||||
void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
switch(m_vt->m_primclass)
|
||||
D3D11_PRIMITIVE_TOPOLOGY t;
|
||||
|
||||
switch(m_vt.m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
__super::DrawPrims(rt, ds, tex);
|
||||
|
||||
dev->IASetPrimitiveTopology(t);
|
||||
}
|
||||
|
|
|
@ -28,14 +28,7 @@
|
|||
class GSRendererDX11 : public GSRendererDX
|
||||
{
|
||||
protected:
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
|
||||
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}
|
||||
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;}
|
||||
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;}
|
||||
void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;}
|
||||
void SetupIA();
|
||||
|
||||
public:
|
||||
GSRendererDX11();
|
||||
|
|
|
@ -25,9 +25,8 @@
|
|||
#include "resource.h"
|
||||
|
||||
GSRendererDX9::GSRendererDX9()
|
||||
: GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this))
|
||||
: GSRendererDX(new GSTextureCache9(this))
|
||||
{
|
||||
InitConvertVertex(GSRendererDX9);
|
||||
}
|
||||
|
||||
bool GSRendererDX9::CreateDevice(GSDevice* dev)
|
||||
|
@ -57,56 +56,21 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
|
||||
void GSRendererDX9::SetupIA()
|
||||
{
|
||||
GSVertex* s = (GSVertex*)((GSVertexHW9*)m_vertex.buff + src_index);
|
||||
GSVertexHW9* d = (GSVertexHW9*)m_vertex.buff + dst_index;
|
||||
D3DPRIMITIVETYPE topology;
|
||||
|
||||
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
|
||||
|
||||
if(tme && !fst)
|
||||
{
|
||||
p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
|
||||
}
|
||||
else
|
||||
{
|
||||
p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
|
||||
}
|
||||
|
||||
GSVector4 t = GSVector4::zero();
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(fst)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16());
|
||||
}
|
||||
else
|
||||
{
|
||||
t = GSVector4::loadl(&s->ST);
|
||||
}
|
||||
}
|
||||
|
||||
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
|
||||
|
||||
d->p = p;
|
||||
d->t = t;
|
||||
}
|
||||
|
||||
void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
switch(m_vt->m_primclass)
|
||||
switch(m_vt.m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
|
||||
m_topology = D3DPT_POINTLIST;
|
||||
topology = D3DPT_POINTLIST;
|
||||
|
||||
break;
|
||||
|
||||
case GS_LINE_CLASS:
|
||||
|
||||
m_topology = D3DPT_LINELIST;
|
||||
topology = D3DPT_LINELIST;
|
||||
|
||||
if(PRIM->IIP == 0)
|
||||
{
|
||||
|
@ -122,7 +86,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
case GS_TRIANGLE_CLASS:
|
||||
|
||||
m_topology = D3DPT_TRIANGLELIST;
|
||||
topology = D3DPT_TRIANGLELIST;
|
||||
|
||||
if(PRIM->IIP == 0)
|
||||
{
|
||||
|
@ -138,7 +102,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
case GS_SPRITE_CLASS:
|
||||
|
||||
m_topology = D3DPT_TRIANGLELIST;
|
||||
topology = D3DPT_TRIANGLELIST;
|
||||
|
||||
// each sprite converted to quad needs twice the space
|
||||
|
||||
|
@ -154,29 +118,35 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
size_t count = m_vertex.next;
|
||||
|
||||
int i = (int)count * 2 - 4;
|
||||
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
|
||||
GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4;
|
||||
uint32* RESTRICT index = &m_index.buff[count * 3] - 6;
|
||||
GSVertex* s = &m_vertex.buff[count - 2];
|
||||
GSVertex* q = &m_vertex.buff[count * 2 - 4];
|
||||
uint32* RESTRICT index = &m_index.buff[count * 3 - 6];
|
||||
|
||||
for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
|
||||
{
|
||||
GSVertexHW9 v0 = s[0];
|
||||
GSVertexHW9 v1 = s[1];
|
||||
GSVertex v0 = s[0];
|
||||
GSVertex v1 = s[1];
|
||||
|
||||
v0.p = v0.p.xyzw(v1.p); // z, q
|
||||
v0.t = v0.t.xyzw(v1.t); // c, f
|
||||
v0.RGBAQ = v1.RGBAQ;
|
||||
v0.XYZ.Z = v1.XYZ.Z;
|
||||
v0.FOG = v1.FOG;
|
||||
|
||||
q[0] = v0;
|
||||
q[3] = v1;
|
||||
|
||||
// swap x, s
|
||||
// swap x, s, u
|
||||
|
||||
GSVector4 p = v0.p.insert<0, 0>(v1.p);
|
||||
GSVector4 t = v0.t.insert<0, 0>(v1.t);
|
||||
v1.p = v1.p.insert<0, 0>(v0.p);
|
||||
v1.t = v1.t.insert<0, 0>(v0.t);
|
||||
v0.p = p;
|
||||
v0.t = t;
|
||||
uint16 x = v0.XYZ.X;
|
||||
v0.XYZ.X = v1.XYZ.X;
|
||||
v1.XYZ.X = x;
|
||||
|
||||
float s = v0.ST.S;
|
||||
v0.ST.S = v1.ST.S;
|
||||
v1.ST.S = s;
|
||||
|
||||
uint16 u = v0.U;
|
||||
v0.U = v1.U;
|
||||
v1.U = u;
|
||||
|
||||
q[1] = v0;
|
||||
q[2] = v1;
|
||||
|
@ -199,9 +169,56 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
__assume(0);
|
||||
}
|
||||
|
||||
(*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
|
||||
GSDevice9* dev = (GSDevice9*)m_dev;
|
||||
|
||||
__super::DrawPrims(rt, ds, tex);
|
||||
(*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
|
||||
|
||||
void* ptr = NULL;
|
||||
|
||||
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next))
|
||||
{
|
||||
GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff;
|
||||
GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr;
|
||||
|
||||
for(int i = 0; i < m_vertex.next; i++, s++, d++)
|
||||
{
|
||||
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
|
||||
|
||||
if(PRIM->TME && !PRIM->FST)
|
||||
{
|
||||
p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
|
||||
}
|
||||
else
|
||||
{
|
||||
p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
|
||||
}
|
||||
|
||||
GSVector4 t = GSVector4::zero();
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
if(PRIM->FST)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16());
|
||||
}
|
||||
else
|
||||
{
|
||||
t = GSVector4::loadl(&s->ST);
|
||||
}
|
||||
}
|
||||
|
||||
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
|
||||
|
||||
d->p = p;
|
||||
d->t = t;
|
||||
}
|
||||
|
||||
dev->IAUnmapVertexBuffer();
|
||||
}
|
||||
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
|
||||
|
||||
dev->IASetPrimitiveTopology(topology);
|
||||
}
|
||||
|
||||
void GSRendererDX9::UpdateFBA(GSTexture* rt)
|
||||
|
@ -220,7 +237,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt)
|
|||
GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight());
|
||||
GSVector4 o = GSVector4(-1.0f, 1.0f);
|
||||
|
||||
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 dst = src * 2.0f + o.xxxx();
|
||||
|
||||
GSVertexPT1 vertices[] =
|
||||
|
|
|
@ -34,17 +34,9 @@ protected:
|
|||
Direct3DBlendState9 bs;
|
||||
} m_fba;
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
|
||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
void SetupIA();
|
||||
void UpdateFBA(GSTexture* rt);
|
||||
|
||||
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;}
|
||||
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;}
|
||||
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];}
|
||||
void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;}
|
||||
|
||||
public:
|
||||
GSRendererDX9();
|
||||
virtual ~GSRendererDX9() {}
|
||||
|
|
|
@ -22,9 +22,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSRendererHW.h"
|
||||
|
||||
GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc)
|
||||
: GSRenderer(vt, vertex_stride)
|
||||
, m_tc(tc)
|
||||
GSRendererHW::GSRendererHW(GSTextureCache* tc)
|
||||
: m_tc(tc)
|
||||
, m_width(1024)
|
||||
, m_height(1024)
|
||||
, m_skip(0)
|
||||
|
@ -101,19 +100,18 @@ void GSRendererHW::Reset()
|
|||
|
||||
void GSRendererHW::VSync(int field)
|
||||
{
|
||||
GSRenderer::VSync(field);
|
||||
|
||||
m_tc->IncAge();
|
||||
m_dev->AgePool();
|
||||
|
||||
m_skip = 0;
|
||||
|
||||
if(m_reset)
|
||||
{
|
||||
m_tc->RemoveAll();
|
||||
|
||||
m_reset = false;
|
||||
}
|
||||
|
||||
GSRenderer::VSync(field);
|
||||
|
||||
m_tc->IncAge();
|
||||
|
||||
m_skip = 0;
|
||||
}
|
||||
|
||||
void GSRendererHW::ResetDevice()
|
||||
|
@ -212,7 +210,7 @@ void GSRendererHW::Draw()
|
|||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
|
||||
|
||||
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
|
||||
|
||||
|
@ -299,7 +297,7 @@ void GSRendererHW::Draw()
|
|||
|
||||
//
|
||||
|
||||
GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in));
|
||||
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
|
||||
|
||||
if(fm != 0xffffffff)
|
||||
{
|
||||
|
@ -411,14 +409,14 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
|
||||
if(lines == 0)
|
||||
{
|
||||
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
|
||||
if(m_vt.m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
|
||||
{
|
||||
lines = m_vertex.next / 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_vt->m_primclass == GS_POINT_CLASS)
|
||||
if(m_vt.m_primclass == GS_POINT_CLASS)
|
||||
{
|
||||
if(m_vertex.next >= 16 * 512)
|
||||
{
|
||||
|
@ -429,14 +427,14 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
int ox = m_context->XYOFFSET.OFX;
|
||||
int oy = m_context->XYOFFSET.OFY;
|
||||
|
||||
const uint8* RESTRICT v = m_vertex.buff;
|
||||
const GSVertex* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
|
||||
for(int i = (int)m_vertex.next; i >= 0; i--, v++)
|
||||
{
|
||||
int x = (GetPosX(v) - ox) >> 4;
|
||||
int y = (GetPosY(v) - oy) >> 4;
|
||||
int x = (v->XYZ.X - ox) >> 4;
|
||||
int y = (v->XYZ.Y - oy) >> 4;
|
||||
|
||||
video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v);
|
||||
video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0];
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -446,7 +444,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
lines = 0;
|
||||
}
|
||||
}
|
||||
else if(m_vt->m_primclass == GS_LINE_CLASS)
|
||||
else if(m_vt.m_primclass == GS_LINE_CLASS)
|
||||
{
|
||||
if(m_vertex.next == lines * 2)
|
||||
{
|
||||
|
@ -459,10 +457,8 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
|
||||
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
|
||||
|
||||
size_t stride = m_vertex.stride;
|
||||
|
||||
memcpy(&m_vertex.buff[stride * 2], &m_vertex.buff[stride * (m_vertex.next - 2)], stride);
|
||||
memcpy(&m_vertex.buff[stride * 3], &m_vertex.buff[stride * (m_vertex.next - 1)], stride);
|
||||
m_vertex.buff[2] = m_vertex.buff[m_vertex.next - 2];
|
||||
m_vertex.buff[3] = m_vertex.buff[m_vertex.next - 1];
|
||||
|
||||
m_index.buff[0] = 0;
|
||||
m_index.buff[1] = 1;
|
||||
|
@ -474,7 +470,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
m_vertex.head = m_vertex.tail = m_vertex.next = 4;
|
||||
m_index.tail = 6;
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
|
||||
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -506,11 +502,11 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
|
|||
{
|
||||
// missing red channel fix (looks alright in pcsx2 r5000+)
|
||||
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
GSVertex* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
|
||||
for(int i = (int)m_vertex.next; i >= 0; i--, v++)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
uint32 c = v->RGBAQ.u32[0];
|
||||
|
||||
uint32 r = (c >> 0) & 0xff;
|
||||
uint32 g = (c >> 8) & 0xff;
|
||||
|
@ -518,11 +514,11 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
|
|||
|
||||
if(r == 0 && g != 0 && b != 0)
|
||||
{
|
||||
SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1));
|
||||
v->RGBAQ.u32[0] = (c & 0xffffff00) | ((g + b + 1) >> 1);
|
||||
}
|
||||
}
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass);
|
||||
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt.m_primclass);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -702,7 +698,7 @@ bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTex
|
|||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0))
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_max.p.z == 0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
@ -758,7 +754,7 @@ bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureC
|
|||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
@ -784,7 +780,7 @@ bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureC
|
|||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
@ -798,7 +794,7 @@ bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCac
|
|||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z)
|
||||
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_eq.z)
|
||||
{
|
||||
m_context->TEST.ZTST = ZTST_ALWAYS;
|
||||
//m_dev->ClearDepth(ds, 0);
|
||||
|
@ -810,7 +806,7 @@ bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCac
|
|||
|
||||
bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME)
|
||||
if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
|
@ -819,16 +815,16 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
|
|||
{
|
||||
if(m_vertex.next == 16)
|
||||
{
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
GSVertex* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = 0; i < 16; i++, v += m_vertex.stride)
|
||||
for(int i = 0; i < 16; i++, v++)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
uint32 c = v->RGBAQ.u32[0];
|
||||
uint32 a = c >> 24;
|
||||
|
||||
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
||||
|
||||
SetColor(v, c);
|
||||
v->RGBAQ.u32[0] = c;
|
||||
|
||||
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
|
||||
}
|
||||
|
@ -839,16 +835,16 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
|
|||
}
|
||||
else if(m_vertex.next == 256)
|
||||
{
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
GSVertex* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = 0; i < 256; i++, v += m_vertex.stride)
|
||||
for(int i = 0; i < 256; i++, v++)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
uint32 c = v->RGBAQ.u32[0];
|
||||
uint32 a = c >> 24;
|
||||
|
||||
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
||||
|
||||
SetColor(v, c);
|
||||
v->RGBAQ.u32[0] = c;
|
||||
|
||||
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
|
||||
}
|
||||
|
|
|
@ -126,11 +126,6 @@ private:
|
|||
|
||||
} m_hacks;
|
||||
|
||||
virtual int GetPosX(const void* vertex) const = 0;
|
||||
virtual int GetPosY(const void* vertex) const = 0;
|
||||
virtual uint32 GetColor(const void* vertex) const = 0;
|
||||
virtual void SetColor(void* vertex, uint32 c) const = 0;
|
||||
|
||||
#pragma endregion
|
||||
|
||||
protected:
|
||||
|
@ -139,7 +134,7 @@ protected:
|
|||
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
|
||||
|
||||
public:
|
||||
GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc);
|
||||
GSRendererHW(GSTextureCache* tc);
|
||||
virtual ~GSRendererHW();
|
||||
|
||||
void SetGameCRC(uint32 crc, int options);
|
||||
|
|
|
@ -32,11 +32,6 @@ class GSRendererNull : public GSRenderer
|
|||
};
|
||||
|
||||
protected:
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index)
|
||||
{
|
||||
}
|
||||
|
||||
void Draw()
|
||||
{
|
||||
}
|
||||
|
@ -48,8 +43,7 @@ protected:
|
|||
|
||||
public:
|
||||
GSRendererNull()
|
||||
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertex))
|
||||
: GSRenderer()
|
||||
{
|
||||
InitConvertVertex(GSRendererNull);
|
||||
}
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -29,27 +29,48 @@ class GSRendererSW : public GSRenderer
|
|||
{
|
||||
class SharedData : public GSDrawScanline::SharedData
|
||||
{
|
||||
__aligned(struct, 16) TextureLevel
|
||||
{
|
||||
GSVector4i r;
|
||||
GSTextureCacheSW::Texture* t;
|
||||
};
|
||||
|
||||
public:
|
||||
GSRendererSW* m_parent;
|
||||
const uint32* m_fb_pages;
|
||||
const uint32* m_zb_pages;
|
||||
const uint32* m_tex_pages[7 + 1]; // NULL terminated
|
||||
int m_fpsm;
|
||||
int m_zpsm;
|
||||
bool m_using_pages;
|
||||
TextureLevel m_tex[7 + 1]; // NULL terminated
|
||||
enum {SyncNone, SyncSource, SyncTarget} m_syncpoint;
|
||||
|
||||
public:
|
||||
SharedData(GSRendererSW* parent);
|
||||
virtual ~SharedData();
|
||||
|
||||
void UseTargetPages(const uint32* fb_pages, const uint32* zb_pages);
|
||||
void UseSourcePages(GSTextureCacheSW::Texture* t, int level);
|
||||
void UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm);
|
||||
void ReleasePages();
|
||||
|
||||
void SetSource(GSTextureCacheSW::Texture* t, const GSVector4i& r, int level);
|
||||
void UpdateSource();
|
||||
};
|
||||
|
||||
typedef void (GSRendererSW::*ConvertVertexBufferPtr)(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||
|
||||
ConvertVertexBufferPtr m_cvb[4][2][2];
|
||||
|
||||
template<uint32 primclass, uint32 tme, uint32 fst>
|
||||
void ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||
|
||||
protected:
|
||||
IRasterizer* m_rl;
|
||||
GSTextureCacheSW* m_tc;
|
||||
GSTexture* m_texture[2];
|
||||
uint8* m_output;
|
||||
bool m_reset;
|
||||
GSPixelOffset4* m_fzb;
|
||||
GSVector4i m_fzb_bbox;
|
||||
uint32 m_fzb_cur_pages[16];
|
||||
uint32 m_fzb_pages[512]; // uint16 frame/zbuf pages interleaved
|
||||
uint16 m_tex_pages[512];
|
||||
uint32 m_tmp_pages[512 + 1];
|
||||
|
@ -60,19 +81,19 @@ protected:
|
|||
GSTexture* GetOutput(int i);
|
||||
|
||||
void Draw();
|
||||
void Queue(shared_ptr<GSRasterizerData>& item);
|
||||
void Sync(int reason);
|
||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut = false);
|
||||
|
||||
void UsePages(const uint32* pages, int type);
|
||||
void ReleasePages(const uint32* pages, int type);
|
||||
template<uint32 mask> bool CheckTargetPages(const uint32* pages);
|
||||
|
||||
bool CheckTargetPages(const uint32* fb_pages, const uint32* zb_pages, const GSVector4i& r);
|
||||
bool CheckSourcePages(SharedData* sd);
|
||||
|
||||
bool GetScanlineGlobalData(SharedData* data);
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
|
||||
public:
|
||||
GSRendererSW(int threads);
|
||||
virtual ~GSRendererSW();
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include "GSLocalMemory.h"
|
||||
#include "GSVector.h"
|
||||
|
||||
#define GS_BILINEAR_PRECISION 4 // max precision 15, but several games like okami, rogue galaxy, dq8 break above 4
|
||||
|
||||
union GSScanlineSelector
|
||||
{
|
||||
struct
|
||||
|
@ -65,8 +67,9 @@ union GSScanlineSelector
|
|||
|
||||
uint32 edge:1; // 48
|
||||
uint32 tw:3; // 49 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
|
||||
uint32 lcm:1; // 50
|
||||
uint32 mmin:2; // 51
|
||||
uint32 lcm:1; // 52
|
||||
uint32 mmin:2; // 53
|
||||
uint32 notest:1; // 54 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
|
||||
};
|
||||
|
||||
struct
|
||||
|
|
|
@ -315,7 +315,7 @@ void GSSettingsDlg::UpdateControls()
|
|||
EnableWindow(GetDlgItem(m_hWnd, IDC_NATIVERES), hw);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_FILTER), hw);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_PALTEX), hw);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw && GSDevice9::GetMaxDepth(m_lastValidMsaa) < 32);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_FBA), dx9 && hw);
|
||||
//EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); // Let uers set software params regardless of renderer used
|
||||
//EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw);
|
||||
|
|
|
@ -38,7 +38,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
|||
{
|
||||
mov(edx, dword[esp + _dscan]);
|
||||
|
||||
for(int i = 0; i < 5; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
vmovaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vmovdqa(ptr[&m_local.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
|
@ -103,7 +103,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
vmulps(xmm1, xmm0, xmm3);
|
||||
vmovdqa(ptr[&m_local.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
|
||||
|
@ -139,36 +139,6 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
vmovdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]);
|
||||
vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
/*
|
||||
// GSVector4 z = p.zzzz();
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// m_local.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
vbroadcastss(xmm1, ptr[&GSVector4::m_half]);
|
||||
vmulps(xmm1, xmm0);
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vpslld(xmm1, 1);
|
||||
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpsrld(xmm2, 31);
|
||||
vpand(xmm0, xmm2);
|
||||
|
||||
vpor(xmm0, xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_local.p.z = GSVector4i(z);
|
||||
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
*/
|
||||
|
||||
vmovdqa(ptr[&m_local.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
|
@ -210,7 +180,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4 v = ds/dt * m_shift[i];
|
||||
|
||||
|
@ -272,7 +242,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||
|
||||
|
@ -302,7 +272,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
|||
{
|
||||
mov(edx, dword[esp + _dscan]);
|
||||
|
||||
for(int i = 0; i < 5; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
movaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(ptr[&m_local.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
|
@ -107,7 +107,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
mulps(xmm1, xmm3);
|
||||
movdqa(ptr[&m_local.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
|
||||
|
@ -144,36 +144,6 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
movdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]);
|
||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
/*
|
||||
// GSVector4 z = p.zzzz();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// m_local.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
movaps(xmm1, ptr[&GSVector4::m_half]);
|
||||
mulps(xmm1, xmm0);
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
pslld(xmm1, 1);
|
||||
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
psrld(xmm2, 31);
|
||||
pand(xmm0, xmm2);
|
||||
|
||||
por(xmm0, xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_local.p.z = GSVector4i(z);
|
||||
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
*/
|
||||
|
||||
movdqa(ptr[&m_local.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
|
@ -217,7 +187,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4 v = ds/dt * m_shift[i];
|
||||
|
||||
|
@ -282,7 +252,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||
|
||||
|
@ -315,7 +285,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
|
||||
//#define Offset_UV // Fixes / breaks various titles
|
||||
|
||||
GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
||||
GSState::GSState()
|
||||
: m_version(6)
|
||||
, m_mt(false)
|
||||
, m_irq(NULL)
|
||||
|
@ -35,24 +35,20 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
|||
, m_crc(0)
|
||||
, m_options(0)
|
||||
, m_frameskip(0)
|
||||
, m_vt(vt)
|
||||
, m_vt(this)
|
||||
, m_q(1.0f)
|
||||
, m_texflush(true)
|
||||
{
|
||||
m_nativeres = !!theApp.GetConfig("nativeres", 0);
|
||||
|
||||
memset(&m_v, 0, sizeof(m_v));
|
||||
m_q = 1.0f;
|
||||
memset(&m_vertex, 0, sizeof(m_vertex));
|
||||
memset(&m_index, 0, sizeof(m_index));
|
||||
|
||||
ASSERT(vertex_stride >= sizeof(GSVertex));
|
||||
|
||||
m_vertex.stride = vertex_stride;
|
||||
m_vertex.tmp = (uint8*)_aligned_malloc(m_vertex.stride * 2, 32);
|
||||
m_v.RGBAQ.Q = 1.0f;
|
||||
|
||||
GrowVertexBuffer();
|
||||
|
||||
memset(m_cv, 0, sizeof(m_cv));
|
||||
|
||||
m_sssize = 0;
|
||||
|
||||
m_sssize += sizeof(m_version);
|
||||
|
@ -110,12 +106,16 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
|||
Reset();
|
||||
|
||||
ResetHandlers();
|
||||
|
||||
s_n = 0;
|
||||
s_dump = !!theApp.GetConfig("dump", 0);
|
||||
s_save = !!theApp.GetConfig("save", 0);
|
||||
s_savez = !!theApp.GetConfig("savez", 0);
|
||||
s_saven = theApp.GetConfig("saven", 0);
|
||||
}
|
||||
|
||||
GSState::~GSState()
|
||||
{
|
||||
_aligned_free(m_vertex.tmp);
|
||||
|
||||
if(m_vertex.buff) _aligned_free(m_vertex.buff);
|
||||
if(m_index.buff) _aligned_free(m_index.buff);
|
||||
}
|
||||
|
@ -165,50 +165,28 @@ void GSState::SetFrameSkip(int skip)
|
|||
{
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerNOP;
|
||||
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerNOP;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerNOP;
|
||||
|
||||
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = &GSState::GIFPackedRegHandlerNOP;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF2<GS_INVALID, 1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ2<GS_INVALID, 1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<0>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = (GIFPackedRegHandler)(GIFRegHandler)&GSState::GIFRegHandlerCLAMP<1>;
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
|
||||
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 0>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF2<GS_INVALID, 1>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ2<GS_INVALID, 1>;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
|
||||
|
||||
UpdateVertexKick();
|
||||
}
|
||||
}
|
||||
|
||||
void GSState::Reset()
|
||||
{
|
||||
printf("GS reset\n");
|
||||
|
||||
// FIXME: memset(m_mem.m_vm8, 0, m_mem.m_vmsize); // bios logo not shown cut in half after reset, missing graphics in GoW after first FMV
|
||||
memset(&m_path[0], 0, sizeof(m_path[0]) * countof(m_path));
|
||||
memset(&m_v, 0, sizeof(m_v));
|
||||
|
||||
|
@ -223,6 +201,8 @@ void GSState::Reset()
|
|||
m_vertex.tail = 0;
|
||||
m_vertex.next = 0;
|
||||
m_index.tail = 0;
|
||||
|
||||
m_texflush = true;
|
||||
}
|
||||
|
||||
void GSState::ResetHandlers()
|
||||
|
@ -253,6 +233,8 @@ void GSState::ResetHandlers()
|
|||
m_fpGIFRegHandlerXYZ[P][1] = &GSState::GIFRegHandlerXYZF2<P, 1>; \
|
||||
m_fpGIFRegHandlerXYZ[P][2] = &GSState::GIFRegHandlerXYZ2<P, 0>; \
|
||||
m_fpGIFRegHandlerXYZ[P][3] = &GSState::GIFRegHandlerXYZ2<P, 1>; \
|
||||
m_fpGIFPackedRegHandlerSTQRGBAXYZF2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZF2<P>; \
|
||||
m_fpGIFPackedRegHandlerSTQRGBAXYZ2[P] = &GSState::GIFPackedRegHandlerSTQRGBAXYZ2<P>; \
|
||||
|
||||
SetHandlerXYZ(GS_POINTLIST);
|
||||
SetHandlerXYZ(GS_LINELIST);
|
||||
|
@ -334,6 +316,8 @@ GSVector4i GSState::GetDisplayRect(int i)
|
|||
return r;
|
||||
}
|
||||
|
||||
// There's a problem when games expand/shrink and relocate the visible area since GSdx doesn't support
|
||||
// moving the output area. (Disgaea 2 intro FMV when upscaling is used, also those games hackfixed below.)
|
||||
GSVector4i GSState::GetFrameRect(int i)
|
||||
{
|
||||
if(i < 0) i = IsEnabled(1) ? 1 : 0;
|
||||
|
@ -356,12 +340,20 @@ GSVector4i GSState::GetFrameRect(int i)
|
|||
r.top = m_regs->DISP[i].DISPFB.DBY;
|
||||
r.right = r.left + w;
|
||||
r.bottom = r.top + h;
|
||||
//printf("%d %d %d %d %d %d\n",w,h,r.left,r.top,r.right,r.bottom);
|
||||
|
||||
/*static GSVector4i old_r = (GSVector4i) 0;
|
||||
if ((old_r.left != r.left) || (old_r.right != r.right) || (old_r.top != r.top) || (old_r.right != r.right)){
|
||||
printf("w %d h %d left %d top %d right %d bottom %d\n",w,h,r.left,r.top,r.right,r.bottom);
|
||||
}
|
||||
old_r = r;*/
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
GSVector2i GSState::GetDeviceSize(int i)
|
||||
{
|
||||
// TODO: return (m_regs->SMODE1.CMOD & 1) ? GSVector2i(640, 576) : GSVector2i(640, 480);
|
||||
|
||||
// TODO: other params of SMODE1 should affect the true device display size
|
||||
|
||||
// TODO2: pal games at 60Hz
|
||||
|
@ -439,19 +431,12 @@ void GSState::GIFPackedRegHandlerRGBA(const GIFPackedReg* RESTRICT r)
|
|||
|
||||
m_v.RGBAQ.u32[0] = (uint32)GSVector4i::store(v);
|
||||
|
||||
#elif _M_SSE >= 0x200
|
||||
#else
|
||||
|
||||
GSVector4i v = GSVector4i::load<false>(r) & GSVector4i::x000000ff();
|
||||
|
||||
m_v.RGBAQ.u32[0] = v.rgba32();
|
||||
|
||||
#else
|
||||
|
||||
m_v.RGBAQ.R = r->RGBA.R;
|
||||
m_v.RGBAQ.G = r->RGBA.G;
|
||||
m_v.RGBAQ.B = r->RGBA.B;
|
||||
m_v.RGBAQ.A = r->RGBA.A;
|
||||
|
||||
#endif
|
||||
|
||||
m_v.RGBAQ.Q = m_q;
|
||||
|
@ -463,16 +448,11 @@ void GSState::GIFPackedRegHandlerSTQ(const GIFPackedReg* RESTRICT r)
|
|||
|
||||
m_v.ST.u64 = r->u64[0];
|
||||
|
||||
#elif _M_SSE >= 0x200
|
||||
#else
|
||||
|
||||
GSVector4i v = GSVector4i::loadl(r);
|
||||
GSVector4i::storel(&m_v.ST.u64, v);
|
||||
|
||||
#else
|
||||
|
||||
m_v.ST.S = r->STQ.S;
|
||||
m_v.ST.T = r->STQ.T;
|
||||
|
||||
#endif
|
||||
|
||||
m_q = r->STQ.Q;
|
||||
|
@ -546,6 +526,69 @@ void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r)
|
|||
{
|
||||
}
|
||||
|
||||
template<uint32 prim>
|
||||
void GSState::GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size)
|
||||
{
|
||||
ASSERT(size > 0 && size % 3 == 0);
|
||||
|
||||
const GIFPackedReg* RESTRICT r_end = r + size;
|
||||
|
||||
while(r < r_end)
|
||||
{
|
||||
GSVector4i st = GSVector4i::loadl(&r[0].u64[0]);
|
||||
GSVector4i q = GSVector4i::loadl(&r[0].u64[1]);
|
||||
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
|
||||
|
||||
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
|
||||
|
||||
GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]);
|
||||
GSVector4i zf = GSVector4i::loadl(&r[2].u64[1]);
|
||||
xy = xy.upl16(xy.srl<4>()).upl32(GSVector4i::loadl(&m_v.UV));
|
||||
zf = zf.srl32(4) & GSVector4i::x00ffffff().upl32(GSVector4i::x000000ff());
|
||||
|
||||
m_v.m[1] = xy.upl32(zf); // TODO: only store the last one
|
||||
|
||||
VertexKick<prim>(r[2].XYZF2.Skip());
|
||||
|
||||
r += 3;
|
||||
}
|
||||
|
||||
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
|
||||
}
|
||||
|
||||
template<uint32 prim>
|
||||
void GSState::GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, uint32 size)
|
||||
{
|
||||
ASSERT(size > 0 && size % 3 == 0);
|
||||
|
||||
const GIFPackedReg* RESTRICT r_end = r + size;
|
||||
|
||||
while(r < r_end)
|
||||
{
|
||||
GSVector4i st = GSVector4i::loadl(&r[0].u64[0]);
|
||||
GSVector4i q = GSVector4i::loadl(&r[0].u64[1]);
|
||||
GSVector4i rgba = (GSVector4i::load<false>(&r[1]) & GSVector4i::x000000ff()).ps32().pu16();
|
||||
|
||||
m_v.m[0] = st.upl64(rgba.upl32(q)); // TODO: only store the last one
|
||||
|
||||
GSVector4i xy = GSVector4i::loadl(&r[2].u64[0]);
|
||||
GSVector4i z = GSVector4i::loadl(&r[2].u64[1]);
|
||||
GSVector4i xyz = xy.upl16(xy.srl<4>()).upl32(z);
|
||||
|
||||
m_v.m[1] = xyz.upl64(GSVector4i::loadl(&m_v.UV)); // TODO: only store the last one
|
||||
|
||||
VertexKick<prim>(r[2].XYZ2.Skip());
|
||||
|
||||
r += 3;
|
||||
}
|
||||
|
||||
m_q = r[-3].STQ.Q; // remember the last one, STQ outputs this to the temp Q each time
|
||||
}
|
||||
|
||||
void GSState::GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size)
|
||||
{
|
||||
}
|
||||
|
||||
// GIFRegHandler*
|
||||
|
||||
void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r)
|
||||
|
@ -553,13 +596,13 @@ void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r)
|
|||
// ASSERT(0);
|
||||
}
|
||||
|
||||
__forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
|
||||
__forceinline void GSState::ApplyPRIM(uint32 prim)
|
||||
{
|
||||
// ASSERT(r->PRIM.PRIM < 7);
|
||||
|
||||
if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM)) // NOTE: assume strips/fans are converted to lists
|
||||
if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim & 7)) // NOTE: assume strips/fans are converted to lists
|
||||
{
|
||||
if((m_env.PRIM.u32[0] ^ prim.u32[0]) & 0x7f8) // all fields except PRIM
|
||||
if((m_env.PRIM.u32[0] ^ prim) & 0x7f8) // all fields except PRIM
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -569,8 +612,8 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
|
|||
Flush();
|
||||
}
|
||||
|
||||
m_env.PRIM = (GSVector4i)prim;
|
||||
m_env.PRMODE._PRIM = prim.PRIM;
|
||||
m_env.PRIM.u32[0] = prim;
|
||||
m_env.PRMODE._PRIM = prim;
|
||||
|
||||
UpdateContext();
|
||||
|
||||
|
@ -590,7 +633,7 @@ void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
|
|||
{
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ApplyPRIM(r->PRIM);
|
||||
ApplyPRIM(r->PRIM.u32[0]);
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
|
||||
|
@ -681,17 +724,49 @@ template<int i> void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
|
|||
if(wt)
|
||||
{
|
||||
GIFRegBITBLTBUF BITBLTBUF;
|
||||
|
||||
BITBLTBUF.SBP = TEX0.CBP;
|
||||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = TEX0.CSM;
|
||||
GSVector4i r;
|
||||
|
||||
GSVector4i r = GSVector4i::zero();
|
||||
if(TEX0.CSM == 0)
|
||||
{
|
||||
BITBLTBUF.SBP = TEX0.CBP;
|
||||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = TEX0.CSM;
|
||||
|
||||
r.right = GSLocalMemory::m_psm[TEX0.CPSM].pgs.x;
|
||||
r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].pgs.y;
|
||||
r.left = 0;
|
||||
r.top = 0;
|
||||
r.right = GSLocalMemory::m_psm[TEX0.CPSM].bs.x;
|
||||
r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].bs.y;
|
||||
|
||||
int blocks = 4;
|
||||
|
||||
if(GSLocalMemory::m_psm[TEX0.CPSM].bpp == 16)
|
||||
{
|
||||
blocks >>= 1;
|
||||
}
|
||||
|
||||
if(GSLocalMemory::m_psm[TEX0.PSM].bpp == 4)
|
||||
{
|
||||
blocks >>= 1;
|
||||
}
|
||||
|
||||
InvalidateLocalMem(BITBLTBUF, r, true);
|
||||
for(int j = 0; j < blocks; j++, BITBLTBUF.SBP++)
|
||||
{
|
||||
InvalidateLocalMem(BITBLTBUF, r, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
BITBLTBUF.SBP = TEX0.CBP;
|
||||
BITBLTBUF.SBW = m_env.TEXCLUT.CBW;
|
||||
BITBLTBUF.SPSM = TEX0.CSM;
|
||||
|
||||
r.left = m_env.TEXCLUT.COU;
|
||||
r.top = m_env.TEXCLUT.COV;
|
||||
r.right = r.left + GSLocalMemory::m_psm[TEX0.CPSM].pal;
|
||||
r.bottom = r.top + 1;
|
||||
|
||||
InvalidateLocalMem(BITBLTBUF, r, true);
|
||||
}
|
||||
|
||||
m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT);
|
||||
}
|
||||
|
@ -701,8 +776,13 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
|
|||
{
|
||||
GIFRegTEX0 TEX0 = r->TEX0;
|
||||
|
||||
if(TEX0.TW > 10) TEX0.TW = 10;
|
||||
if(TEX0.TH > 10) TEX0.TH = 10;
|
||||
// Tokyo Xtreme Racer Drift 2, TW/TH == 0, PRIM->FST == 1
|
||||
// Just setting the max texture size to make the texture cache allocate some surface.
|
||||
// The vertex trace will narrow the updated area down to the minimum, upper-left 8x8
|
||||
// for a single letter, but it may address the whole thing if it wants to.
|
||||
|
||||
if(TEX0.TW > 10 || TEX0.TW == 0) TEX0.TW = 10;
|
||||
if(TEX0.TH > 10 || TEX0.TH == 0) TEX0.TH = 10;
|
||||
|
||||
if((TEX0.TBW & 1) && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT4))
|
||||
{
|
||||
|
@ -915,7 +995,7 @@ void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r)
|
|||
|
||||
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r)
|
||||
{
|
||||
// TRACE(_T("TEXFLUSH\n"));
|
||||
m_texflush = true;
|
||||
}
|
||||
|
||||
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r)
|
||||
|
@ -1037,7 +1117,8 @@ template<int i> void GSState::GIFRegHandlerFRAME(const GIFReg* RESTRICT r)
|
|||
{
|
||||
m_env.CTXT[i].offset.fb = m_mem.GetOffset(r->FRAME.Block(), r->FRAME.FBW, r->FRAME.PSM);
|
||||
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), r->FRAME.FBW, m_env.CTXT[i].ZBUF.PSM);
|
||||
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset4(r->FRAME, m_env.CTXT[i].ZBUF);
|
||||
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(r->FRAME, m_env.CTXT[i].ZBUF);
|
||||
m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(r->FRAME, m_env.CTXT[i].ZBUF);
|
||||
}
|
||||
|
||||
m_env.CTXT[i].FRAME = (GSVector4i)r->FRAME;
|
||||
|
@ -1075,7 +1156,8 @@ template<int i> void GSState::GIFRegHandlerZBUF(const GIFReg* RESTRICT r)
|
|||
if((m_env.CTXT[i].ZBUF.u32[0] ^ ZBUF.u32[0]) & 0x3f0001ff) // ZBP PSM
|
||||
{
|
||||
m_env.CTXT[i].offset.zb = m_mem.GetOffset(ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, ZBUF.PSM);
|
||||
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, ZBUF);
|
||||
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, ZBUF);
|
||||
m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, ZBUF);
|
||||
}
|
||||
|
||||
m_env.CTXT[i].ZBUF = (GSVector4i)ZBUF;
|
||||
|
@ -1230,40 +1312,8 @@ void GSState::FlushPrim()
|
|||
{
|
||||
if(m_index.tail > 0)
|
||||
{
|
||||
if(0)
|
||||
{
|
||||
uint8* buff = new uint8[m_vertex.next];
|
||||
GSVertex buff[2];
|
||||
|
||||
memset(buff, 0, m_vertex.next);
|
||||
|
||||
for(size_t i = 0; i < m_index.tail; i++)
|
||||
{
|
||||
ASSERT(m_index.buff[i] < m_vertex.next);
|
||||
|
||||
buff[m_index.buff[i]] = 1;
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
|
||||
for(size_t i = 0; i < m_vertex.next; i++)
|
||||
{
|
||||
if(buff[i] == 0)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if(count > 0)
|
||||
{
|
||||
printf("unref %lld %d/%d\n", m_perfmon.GetFrame(), count, m_vertex.next);
|
||||
}
|
||||
|
||||
delete [] buff;
|
||||
}
|
||||
|
||||
uint8* buff = m_vertex.tmp;
|
||||
|
||||
size_t stride = m_vertex.stride;
|
||||
size_t head = m_vertex.head;
|
||||
size_t tail = m_vertex.tail;
|
||||
size_t next = m_vertex.next;
|
||||
|
@ -1282,11 +1332,11 @@ void GSState::FlushPrim()
|
|||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
unused = tail - head;
|
||||
memcpy(buff, &m_vertex.buff[stride * head], stride * unused);
|
||||
memcpy(buff, &m_vertex.buff[head], sizeof(GSVertex) * unused);
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
memcpy(buff, &m_vertex.buff[stride * head], stride); unused = 1;
|
||||
if(tail - 1 > head) {memcpy(&buff[stride], &m_vertex.buff[stride * (tail - 1)], stride); unused = 2;}
|
||||
buff[0] = m_vertex.buff[head]; unused = 1;
|
||||
if(tail - 1 > head) {buff[1] = m_vertex.buff[tail - 1]; unused = 2;}
|
||||
break;
|
||||
case GS_INVALID:
|
||||
break;
|
||||
|
@ -1301,7 +1351,7 @@ void GSState::FlushPrim()
|
|||
{
|
||||
// FIXME: berserk fpsm = 27 (8H)
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
|
||||
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
|
||||
|
||||
Draw();
|
||||
|
||||
|
@ -1315,7 +1365,7 @@ void GSState::FlushPrim()
|
|||
|
||||
if(unused > 0)
|
||||
{
|
||||
memcpy(m_vertex.buff, buff, stride * unused);
|
||||
memcpy(m_vertex.buff, buff, sizeof(GSVertex) * unused);
|
||||
|
||||
m_vertex.tail = unused;
|
||||
m_vertex.next = next > head ? next - head : 0;
|
||||
|
@ -1641,7 +1691,7 @@ void GSState::SoftReset(uint32 mask)
|
|||
|
||||
m_env.TRXDIR.XDIR = 3; //-1 ; set it to invalid value
|
||||
|
||||
m_q = 1;
|
||||
m_q = 1.0f;
|
||||
}
|
||||
|
||||
void GSState::ReadFIFO(uint8* mem, int size)
|
||||
|
@ -1665,6 +1715,8 @@ template void GSState::Transfer<1>(const uint8* mem, uint32 size);
|
|||
template void GSState::Transfer<2>(const uint8* mem, uint32 size);
|
||||
template void GSState::Transfer<3>(const uint8* mem, uint32 size);
|
||||
|
||||
static hash_map<uint64, uint64> s_tags;
|
||||
|
||||
template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
||||
{
|
||||
GSPerfMonAutoTimer pmat(&m_perfmon);
|
||||
|
@ -1679,6 +1731,16 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
{
|
||||
path.SetTag(mem);
|
||||
|
||||
if(0)
|
||||
{
|
||||
GIFTag* t = (GIFTag*)mem;
|
||||
uint64 hash;
|
||||
if(t->NREG < 8) hash = t->u32[2] & ((1 << t->NREG * 4) - 1);
|
||||
else if(t->NREG < 16) {hash = t->u32[2]; ((uint32*)&hash)[1] = t->u32[3] & ((1 << (t->NREG - 8) * 4) - 1);}
|
||||
else hash = t->u64[1];
|
||||
s_tags[hash] += path.nloop * path.nreg;
|
||||
}
|
||||
|
||||
mem += sizeof(GIFTag);
|
||||
size--;
|
||||
|
||||
|
@ -1690,9 +1752,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
|
||||
if(path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED)
|
||||
{
|
||||
GIFRegPRIM r;
|
||||
r.u64 = path.tag.PRIM;
|
||||
ApplyPRIM(r);
|
||||
ApplyPRIM(path.tag.PRIM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1726,8 +1786,28 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
{
|
||||
size -= total;
|
||||
|
||||
if(path.adonly)
|
||||
switch(path.type)
|
||||
{
|
||||
case GIFPath::TYPE_UNKNOWN:
|
||||
|
||||
{
|
||||
uint32 reg = 0;
|
||||
|
||||
do
|
||||
{
|
||||
(this->*m_fpGIFPackedRegHandlers[path.GetReg(reg++)])((GIFPackedReg*)mem);
|
||||
|
||||
mem += sizeof(GIFPackedReg);
|
||||
|
||||
reg = reg & ((int)(reg - path.nreg) >> 31); // resets reg back to 0 when it becomes equal to path.nreg
|
||||
}
|
||||
while(--total > 0);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case GIFPath::TYPE_ADONLY: // very common
|
||||
|
||||
do
|
||||
{
|
||||
(this->*m_fpGIFRegHandlers[((GIFPackedReg*)mem)->A_D.ADDR])(&((GIFPackedReg*)mem)->r);
|
||||
|
@ -1735,20 +1815,28 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
mem += sizeof(GIFPackedReg);
|
||||
}
|
||||
while(--total > 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32 reg = 0;
|
||||
|
||||
do
|
||||
{
|
||||
(this->*m_fpGIFPackedRegHandlers[path.GetReg(reg++)])((GIFPackedReg*)mem);
|
||||
break;
|
||||
|
||||
case GIFPath::TYPE_STQRGBAXYZF2: // majority of the vertices are formatted like this
|
||||
|
||||
mem += sizeof(GIFPackedReg);
|
||||
(this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2])((GIFPackedReg*)mem, total);
|
||||
|
||||
reg = reg & ((int)(reg - path.nreg) >> 31); // resets reg back to 0 when it becomes equal to path.nreg
|
||||
}
|
||||
while(--total > 0);
|
||||
mem += total * sizeof(GIFPackedReg);
|
||||
|
||||
break;
|
||||
|
||||
case GIFPath::TYPE_STQRGBAXYZ2:
|
||||
|
||||
(this->*m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2])((GIFPackedReg*)mem, total);
|
||||
|
||||
mem += total * sizeof(GIFPackedReg);
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
path.nloop = 0;
|
||||
|
@ -1952,6 +2040,12 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly)
|
|||
{
|
||||
m_path[i].tag.NREG = m_path[i].nreg;
|
||||
m_path[i].tag.NLOOP = m_path[i].nloop;
|
||||
m_path[i].tag.REGS = 0;
|
||||
|
||||
for(size_t j = 0; j < countof(m_path[i].regs.u8); j++)
|
||||
{
|
||||
m_path[i].tag.u32[2 + (j >> 3)] |= m_path[i].regs.u8[j] << ((j & 7) << 2);
|
||||
}
|
||||
|
||||
WriteState(data, &m_path[i].tag);
|
||||
WriteState(data, &m_path[i].reg);
|
||||
|
@ -2070,7 +2164,8 @@ int GSState::Defrost(const GSFreezeData* fd)
|
|||
m_env.CTXT[i].offset.fb = m_mem.GetOffset(m_env.CTXT[i].FRAME.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].FRAME.PSM);
|
||||
m_env.CTXT[i].offset.zb = m_mem.GetOffset(m_env.CTXT[i].ZBUF.Block(), m_env.CTXT[i].FRAME.FBW, m_env.CTXT[i].ZBUF.PSM);
|
||||
m_env.CTXT[i].offset.tex = m_mem.GetOffset(m_env.CTXT[i].TEX0.TBP0, m_env.CTXT[i].TEX0.TBW, m_env.CTXT[i].TEX0.PSM);
|
||||
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
|
||||
m_env.CTXT[i].offset.fzb = m_mem.GetPixelOffset(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
|
||||
m_env.CTXT[i].offset.fzb4 = m_mem.GetPixelOffset4(m_env.CTXT[i].FRAME, m_env.CTXT[i].ZBUF);
|
||||
}
|
||||
|
||||
UpdateScissor();
|
||||
|
@ -2104,6 +2199,8 @@ void GSState::UpdateScissor()
|
|||
|
||||
void GSState::UpdateVertexKick()
|
||||
{
|
||||
if(m_frameskip) return;
|
||||
|
||||
uint32 prim = PRIM->PRIM;
|
||||
|
||||
m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = m_fpGIFPackedRegHandlerXYZ[prim][0];
|
||||
|
@ -2116,19 +2213,20 @@ void GSState::UpdateVertexKick()
|
|||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = m_fpGIFRegHandlerXYZ[prim][2];
|
||||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = m_fpGIFRegHandlerXYZ[prim][3];
|
||||
|
||||
m_cvf = m_cv[prim][PRIM->TME][PRIM->FST];
|
||||
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim];
|
||||
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZ2] = m_fpGIFPackedRegHandlerSTQRGBAXYZ2[prim];
|
||||
}
|
||||
|
||||
void GSState::GrowVertexBuffer()
|
||||
{
|
||||
int maxcount = std::max<int>(m_vertex.maxcount * 3 / 2, 10000);
|
||||
|
||||
uint8* vertex = (uint8*)_aligned_malloc(m_vertex.stride * maxcount, 16);
|
||||
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 16);
|
||||
uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 16); // worst case is slightly less than vertex number * 3
|
||||
|
||||
if(m_vertex.buff != NULL)
|
||||
{
|
||||
memcpy(vertex, m_vertex.buff, m_vertex.stride * m_vertex.tail);
|
||||
memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
|
||||
|
||||
_aligned_free(m_vertex.buff);
|
||||
}
|
||||
|
@ -2160,17 +2258,13 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
GSVector4i v0(m_v.m[0]);
|
||||
GSVector4i v1(m_v.m[1]);
|
||||
|
||||
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[m_vertex.stride * tail];
|
||||
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[tail];
|
||||
|
||||
tailptr[0] = v0;
|
||||
tailptr[1] = v1;
|
||||
|
||||
m_vertex.xy[xy_tail & 3] = GSVector4(v1.upl32(v1.sub16(GSVector4i::load(m_ofxy)).sra16(4)).upl16()); // zw not sign extended, only useful for eq tests
|
||||
|
||||
#ifdef _DEBUG
|
||||
memset(&tailptr[2], 0, m_vertex.stride - sizeof(GSVertex));
|
||||
#endif
|
||||
|
||||
m_vertex.tail = ++tail;
|
||||
m_vertex.xy_tail = ++xy_tail;
|
||||
|
||||
|
@ -2286,8 +2380,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
|
||||
uint32* RESTRICT buff = &m_index.buff[m_index.tail];
|
||||
|
||||
size_t src_index = head;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
|
@ -2295,7 +2387,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 1;
|
||||
m_index.tail += 1;
|
||||
(this->*m_cvf)(head, head);
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
buff[0] = head + 0;
|
||||
|
@ -2303,18 +2394,20 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.head = head + 2;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
(this->*m_cvf)(head + 0, head + 0);
|
||||
(this->*m_cvf)(head + 1, head + 1);
|
||||
break;
|
||||
case GS_LINESTRIP:
|
||||
if(next < head) {head = next; m_vertex.tail = next + 2;}
|
||||
if(next < head)
|
||||
{
|
||||
m_vertex.buff[next + 0] = m_vertex.buff[head + 0];
|
||||
m_vertex.buff[next + 1] = m_vertex.buff[head + 1];
|
||||
head = next;
|
||||
m_vertex.tail = next + 2;
|
||||
}
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
if(head + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
|
||||
/*if(head + 1 >= next)*/ (this->*m_cvf)(head + 1, src_index + 1); // this is always a new vertex
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
buff[0] = head + 0;
|
||||
|
@ -2323,21 +2416,22 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.head = head + 3;
|
||||
m_vertex.next = head + 3;
|
||||
m_index.tail += 3;
|
||||
(this->*m_cvf)(head + 0, head + 0);
|
||||
(this->*m_cvf)(head + 1, head + 1);
|
||||
(this->*m_cvf)(head + 2, head + 2);
|
||||
break;
|
||||
case GS_TRIANGLESTRIP:
|
||||
if(next < head) {head = next; m_vertex.tail = next + 3;}
|
||||
if(next < head)
|
||||
{
|
||||
m_vertex.buff[next + 0] = m_vertex.buff[head + 0];
|
||||
m_vertex.buff[next + 1] = m_vertex.buff[head + 1];
|
||||
m_vertex.buff[next + 2] = m_vertex.buff[head + 2];
|
||||
head = next;
|
||||
m_vertex.tail = next + 3;
|
||||
}
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
buff[2] = head + 2;
|
||||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 3;
|
||||
m_index.tail += 3;
|
||||
if(src_index + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
|
||||
if(src_index + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
|
||||
/*if(src_index + 2 >= next)*/ (this->*m_cvf)(head + 2, src_index + 2); // this is always a new vertex
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
|
||||
|
@ -2346,9 +2440,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
buff[2] = tail - 1;
|
||||
m_vertex.next = tail;
|
||||
m_index.tail += 3;
|
||||
if(head >= next) (this->*m_cvf)(head, head);
|
||||
if(tail - 2 >= next) (this->*m_cvf)(tail - 2, tail - 2);
|
||||
/*if(tail - 1 >= next)*/ (this->*m_cvf)(tail - 1, tail - 1); // this is always a new vertex
|
||||
break;
|
||||
case GS_SPRITE:
|
||||
buff[0] = head + 0;
|
||||
|
@ -2356,10 +2447,8 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.head = head + 2;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
(this->*m_cvf)(head + 0, head + 0);
|
||||
(this->*m_cvf)(head + 1, head + 1);
|
||||
break;
|
||||
case GS_INVALID:
|
||||
case GS_INVALID:
|
||||
m_vertex.tail = head;
|
||||
break;
|
||||
default:
|
||||
|
@ -2425,7 +2514,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
|
|||
|
||||
if(wms + wmt < 6)
|
||||
{
|
||||
GSVector4 st = m_vt->m_min.t.xyxy(m_vt->m_max.t);
|
||||
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
|
||||
|
||||
if(linear)
|
||||
{
|
||||
|
@ -2503,7 +2592,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
|
|||
|
||||
void GSState::GetAlphaMinMax()
|
||||
{
|
||||
if(m_vt->m_alpha.valid)
|
||||
if(m_vt.m_alpha.valid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -2511,7 +2600,7 @@ void GSState::GetAlphaMinMax()
|
|||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
GSVector4i a = m_vt->m_min.c.uph32(m_vt->m_max.c).zzww();
|
||||
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
|
||||
|
||||
if(PRIM->TME && context->TEX0.TCC)
|
||||
{
|
||||
|
@ -2563,9 +2652,9 @@ void GSState::GetAlphaMinMax()
|
|||
}
|
||||
}
|
||||
|
||||
m_vt->m_alpha.min = a.x;
|
||||
m_vt->m_alpha.max = a.z;
|
||||
m_vt->m_alpha.valid = true;
|
||||
m_vt.m_alpha.min = a.x;
|
||||
m_vt.m_alpha.max = a.z;
|
||||
m_vt.m_alpha.valid = true;
|
||||
}
|
||||
|
||||
bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
|
||||
|
@ -2582,8 +2671,8 @@ bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
|
|||
{
|
||||
GetAlphaMinMax();
|
||||
|
||||
int amin = m_vt->m_alpha.min;
|
||||
int amax = m_vt->m_alpha.max;
|
||||
int amin = m_vt.m_alpha.min;
|
||||
int amax = m_vt.m_alpha.max;
|
||||
|
||||
int aref = context->TEST.AREF;
|
||||
|
||||
|
@ -2667,8 +2756,8 @@ bool GSState::IsOpaque()
|
|||
{
|
||||
GetAlphaMinMax();
|
||||
|
||||
amin = m_vt->m_alpha.min;
|
||||
amax = m_vt->m_alpha.max;
|
||||
amin = m_vt.m_alpha.min;
|
||||
amax = m_vt.m_alpha.max;
|
||||
}
|
||||
else if(context->ALPHA.C == 1)
|
||||
{
|
||||
|
|
|
@ -59,8 +59,18 @@ class GSState : public GSAlignedClass<32>
|
|||
GIFRegHandler m_fpGIFRegHandlers[256];
|
||||
GIFRegHandler m_fpGIFRegHandlerXYZ[8][4];
|
||||
|
||||
typedef void (GSState::*GIFPackedRegHandlerC)(const GIFPackedReg* RESTRICT r, uint32 size);
|
||||
|
||||
GIFPackedRegHandlerC m_fpGIFPackedRegHandlersC[2];
|
||||
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZF2[8];
|
||||
GIFPackedRegHandlerC m_fpGIFPackedRegHandlerSTQRGBAXYZ2[8];
|
||||
|
||||
template<uint32 prim> void GIFPackedRegHandlerSTQRGBAXYZF2(const GIFPackedReg* RESTRICT r, uint32 size);
|
||||
template<uint32 prim> void GIFPackedRegHandlerSTQRGBAXYZ2(const GIFPackedReg* RESTRICT r, uint32 size);
|
||||
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size);
|
||||
|
||||
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
|
||||
void ApplyPRIM(const GIFRegPRIM& PRIM);
|
||||
void ApplyPRIM(uint32 prim);
|
||||
|
||||
void GIFRegHandlerNull(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerPRIM(const GIFReg* RESTRICT r);
|
||||
|
@ -133,15 +143,14 @@ protected:
|
|||
float m_q;
|
||||
GSVector4 m_scissor;
|
||||
uint32 m_ofxy;
|
||||
bool m_texflush;
|
||||
|
||||
struct
|
||||
{
|
||||
uint8* buff;
|
||||
size_t stride;
|
||||
GSVertex* buff;
|
||||
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
|
||||
GSVector4 xy[4];
|
||||
size_t xy_tail;
|
||||
uint8* tmp;
|
||||
} m_vertex;
|
||||
|
||||
struct
|
||||
|
@ -150,26 +159,6 @@ protected:
|
|||
size_t tail;
|
||||
} m_index;
|
||||
|
||||
typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
|
||||
|
||||
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
|
||||
|
||||
#define InitConvertVertex2(T, P) \
|
||||
m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 0>; \
|
||||
m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 1>; \
|
||||
m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 0>; \
|
||||
m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 1>; \
|
||||
|
||||
#define InitConvertVertex(T) \
|
||||
InitConvertVertex2(T, GS_POINTLIST) \
|
||||
InitConvertVertex2(T, GS_LINELIST) \
|
||||
InitConvertVertex2(T, GS_LINESTRIP) \
|
||||
InitConvertVertex2(T, GS_TRIANGLELIST) \
|
||||
InitConvertVertex2(T, GS_TRIANGLESTRIP) \
|
||||
InitConvertVertex2(T, GS_TRIANGLEFAN) \
|
||||
InitConvertVertex2(T, GS_SPRITE) \
|
||||
InitConvertVertex2(T, GS_INVALID) \
|
||||
|
||||
void UpdateContext();
|
||||
void UpdateScissor();
|
||||
|
||||
|
@ -182,7 +171,7 @@ protected:
|
|||
|
||||
// following functions need m_vt to be initialized
|
||||
|
||||
GSVertexTrace* m_vt;
|
||||
GSVertexTrace m_vt;
|
||||
|
||||
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
|
||||
void GetAlphaMinMax();
|
||||
|
@ -205,8 +194,14 @@ public:
|
|||
GSDump m_dump;
|
||||
bool m_nativeres;
|
||||
|
||||
int s_n;
|
||||
bool s_dump;
|
||||
bool s_save;
|
||||
bool s_savez;
|
||||
int s_saven;
|
||||
|
||||
public:
|
||||
GSState(GSVertexTrace* vt, size_t vertex_stride);
|
||||
GSState();
|
||||
virtual ~GSState();
|
||||
|
||||
void ResetHandlers();
|
||||
|
|
|
@ -167,6 +167,18 @@ GSTexture11::operator ID3D11ShaderResourceView*()
|
|||
return m_srv;
|
||||
}
|
||||
|
||||
GSTexture11::operator ID3D11UnorderedAccessView*()
|
||||
{
|
||||
if(!m_uav && m_dev && m_texture)
|
||||
{
|
||||
ASSERT(!m_msaa);
|
||||
|
||||
m_dev->CreateUnorderedAccessView(m_texture, NULL, &m_uav);
|
||||
}
|
||||
|
||||
return m_uav;
|
||||
}
|
||||
|
||||
GSTexture11::operator ID3D11RenderTargetView*()
|
||||
{
|
||||
ASSERT(m_dev);
|
||||
|
|
|
@ -30,6 +30,7 @@ class GSTexture11 : public GSTexture
|
|||
CComPtr<ID3D11Texture2D> m_texture;
|
||||
D3D11_TEXTURE2D_DESC m_desc;
|
||||
CComPtr<ID3D11ShaderResourceView> m_srv;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_uav;
|
||||
CComPtr<ID3D11RenderTargetView> m_rtv;
|
||||
CComPtr<ID3D11DepthStencilView> m_dsv;
|
||||
|
||||
|
@ -43,6 +44,7 @@ public:
|
|||
|
||||
operator ID3D11Texture2D*();
|
||||
operator ID3D11ShaderResourceView*();
|
||||
operator ID3D11UnorderedAccessView*();
|
||||
operator ID3D11RenderTargetView*();
|
||||
operator ID3D11DepthStencilView*();
|
||||
};
|
||||
|
|
|
@ -281,6 +281,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
|
|||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); // new frame buffers after reset should be cleared, don't display memory garbage
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -178,6 +178,11 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
|||
m_TEX0 = TEX0;
|
||||
m_TEXA = TEXA;
|
||||
|
||||
if(m_tw == 0)
|
||||
{
|
||||
m_tw = std::max<int>(m_TEX0.TW, GSLocalMemory::m_psm[m_TEX0.PSM].pal == 0 ? 3 : 5); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
|
||||
}
|
||||
|
||||
memset(m_valid, 0, sizeof(m_valid));
|
||||
memset(m_pages.bm, 0, sizeof(m_pages.bm));
|
||||
|
||||
|
@ -239,17 +244,6 @@ bool GSTextureCacheSW::Texture::Update(const GSVector4i& rect)
|
|||
|
||||
if(m_buff == NULL)
|
||||
{
|
||||
uint32 tw0 = std::max<int>(m_TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated for m_buff
|
||||
|
||||
if(m_tw == 0)
|
||||
{
|
||||
m_tw = tw0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(m_tw >= tw0);
|
||||
}
|
||||
|
||||
uint32 pitch = (1 << m_tw) << shift;
|
||||
|
||||
m_buff = _aligned_malloc(pitch * th * 4, 32);
|
||||
|
|
|
@ -82,13 +82,6 @@ bool GSDevice11::CreateTextureFX()
|
|||
return true;
|
||||
}
|
||||
|
||||
void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
|
||||
{
|
||||
IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count);
|
||||
IASetIndexBuffer(index, index_count);
|
||||
IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim);
|
||||
}
|
||||
|
||||
void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||
{
|
||||
hash_map<uint32, GSVertexShader11 >::const_iterator i = m_vs.find(sel);
|
||||
|
@ -118,6 +111,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
|||
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
|
||||
|
|
|
@ -61,13 +61,6 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix)
|
|||
return t;
|
||||
}
|
||||
|
||||
void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
|
||||
{
|
||||
IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count);
|
||||
IASetIndexBuffer(index, index_count);
|
||||
IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim);
|
||||
}
|
||||
|
||||
void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||
{
|
||||
hash_map<uint32, GSVertexShader9>::const_iterator i = m_vs.find(sel);
|
||||
|
|
|
@ -28,9 +28,13 @@ InitializeConditionVariablePtr pInitializeConditionVariable;
|
|||
WakeConditionVariablePtr pWakeConditionVariable;
|
||||
WakeAllConditionVariablePtr pWakeAllConditionVariable;
|
||||
SleepConditionVariableSRWPtr pSleepConditionVariableSRW;
|
||||
InitializeSRWLockPtr pInitializeSRWLock;;
|
||||
InitializeSRWLockPtr pInitializeSRWLock;
|
||||
AcquireSRWLockExclusivePtr pAcquireSRWLockExclusive;
|
||||
TryAcquireSRWLockExclusivePtr pTryAcquireSRWLockExclusive;
|
||||
ReleaseSRWLockExclusivePtr pReleaseSRWLockExclusive;
|
||||
AcquireSRWLockSharedPtr pAcquireSRWLockShared;
|
||||
TryAcquireSRWLockSharedPtr pTryAcquireSRWLockShared;
|
||||
ReleaseSRWLockSharedPtr pReleaseSRWLockShared;
|
||||
|
||||
class InitCondVar
|
||||
{
|
||||
|
@ -47,7 +51,11 @@ public:
|
|||
pSleepConditionVariableSRW = (SleepConditionVariableSRWPtr)GetProcAddress(m_kernel32, "SleepConditionVariableSRW");
|
||||
pInitializeSRWLock = (InitializeSRWLockPtr)GetProcAddress(m_kernel32, "InitializeSRWLock");
|
||||
pAcquireSRWLockExclusive = (AcquireSRWLockExclusivePtr)GetProcAddress(m_kernel32, "AcquireSRWLockExclusive");
|
||||
pTryAcquireSRWLockExclusive = (TryAcquireSRWLockExclusivePtr)GetProcAddress(m_kernel32, "TryAcquireSRWLockExclusive");
|
||||
pReleaseSRWLockExclusive = (ReleaseSRWLockExclusivePtr)GetProcAddress(m_kernel32, "ReleaseSRWLockExclusive");
|
||||
pAcquireSRWLockShared = (AcquireSRWLockSharedPtr)GetProcAddress(m_kernel32, "AcquireSRWLockShared");
|
||||
pTryAcquireSRWLockShared = (TryAcquireSRWLockSharedPtr)GetProcAddress(m_kernel32, "TryAcquireSRWLockShared");
|
||||
pReleaseSRWLockShared = (ReleaseSRWLockSharedPtr)GetProcAddress(m_kernel32, "ReleaseSRWLockShared");
|
||||
}
|
||||
|
||||
virtual ~InitCondVar()
|
||||
|
|
|
@ -21,25 +21,56 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "GSdx.h"
|
||||
|
||||
class IGSThread
|
||||
{
|
||||
protected:
|
||||
virtual void ThreadProc() = 0;
|
||||
};
|
||||
|
||||
class IGSLock
|
||||
{
|
||||
public:
|
||||
virtual void Lock() = 0;
|
||||
virtual bool TryLock() = 0;
|
||||
virtual void Unlock() = 0;
|
||||
};
|
||||
|
||||
class IGSEvent
|
||||
{
|
||||
public:
|
||||
virtual void Set() = 0;
|
||||
virtual bool Wait(IGSLock* l) = 0;
|
||||
};
|
||||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
typedef void (WINAPI * InitializeConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
|
||||
typedef void (WINAPI * WakeConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
|
||||
typedef void (WINAPI * WakeAllConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
|
||||
typedef void (WINAPI * SleepConditionVariableSRWPtr)(CONDITION_VARIABLE* ConditionVariable, SRWLOCK* SRWLock, DWORD dwMilliseconds, ULONG Flags);
|
||||
typedef BOOL (WINAPI * SleepConditionVariableSRWPtr)(CONDITION_VARIABLE* ConditionVariable, SRWLOCK* SRWLock, DWORD dwMilliseconds, ULONG Flags);
|
||||
typedef void (WINAPI * InitializeSRWLockPtr)(SRWLOCK* SRWLock);
|
||||
typedef void (WINAPI * AcquireSRWLockExclusivePtr)(SRWLOCK* SRWLock);
|
||||
typedef BOOLEAN (WINAPI * TryAcquireSRWLockExclusivePtr)(SRWLOCK* SRWLock);
|
||||
typedef void (WINAPI * ReleaseSRWLockExclusivePtr)(SRWLOCK* SRWLock);
|
||||
typedef void (WINAPI * AcquireSRWLockSharedPtr)(SRWLOCK* SRWLock);
|
||||
typedef BOOLEAN (WINAPI * TryAcquireSRWLockSharedPtr)(SRWLOCK* SRWLock);
|
||||
typedef void (WINAPI * ReleaseSRWLockSharedPtr)(SRWLOCK* SRWLock);
|
||||
|
||||
extern InitializeConditionVariablePtr pInitializeConditionVariable;
|
||||
extern WakeConditionVariablePtr pWakeConditionVariable;
|
||||
extern WakeAllConditionVariablePtr pWakeAllConditionVariable;
|
||||
extern SleepConditionVariableSRWPtr pSleepConditionVariableSRW;
|
||||
extern InitializeSRWLockPtr pInitializeSRWLock;;
|
||||
extern InitializeSRWLockPtr pInitializeSRWLock;
|
||||
extern AcquireSRWLockExclusivePtr pAcquireSRWLockExclusive;
|
||||
extern TryAcquireSRWLockExclusivePtr pTryAcquireSRWLockExclusive;
|
||||
extern ReleaseSRWLockExclusivePtr pReleaseSRWLockExclusive;
|
||||
extern AcquireSRWLockSharedPtr pAcquireSRWLockShared;
|
||||
extern TryAcquireSRWLockSharedPtr pTryAcquireSRWLockShared;
|
||||
extern ReleaseSRWLockSharedPtr pReleaseSRWLockShared;
|
||||
|
||||
class GSThread
|
||||
class GSThread : public IGSThread
|
||||
{
|
||||
DWORD m_ThreadId;
|
||||
HANDLE m_hThread;
|
||||
|
@ -47,8 +78,6 @@ class GSThread
|
|||
static DWORD WINAPI StaticThreadProc(void* lpParam);
|
||||
|
||||
protected:
|
||||
virtual void ThreadProc() = 0;
|
||||
|
||||
void CreateThread();
|
||||
void CloseThread();
|
||||
|
||||
|
@ -57,7 +86,7 @@ public:
|
|||
virtual ~GSThread();
|
||||
};
|
||||
|
||||
class GSCritSec
|
||||
class GSCritSec : public IGSLock
|
||||
{
|
||||
CRITICAL_SECTION m_cs;
|
||||
|
||||
|
@ -65,26 +94,25 @@ public:
|
|||
GSCritSec() {InitializeCriticalSection(&m_cs);}
|
||||
~GSCritSec() {DeleteCriticalSection(&m_cs);}
|
||||
|
||||
void Lock() {EnterCriticalSection(&m_cs);}
|
||||
bool TryLock() {return TryEnterCriticalSection(&m_cs) == TRUE;}
|
||||
void Unlock() {LeaveCriticalSection(&m_cs);}
|
||||
void Lock() {EnterCriticalSection(&m_cs);}
|
||||
bool TryLock() {return TryEnterCriticalSection(&m_cs) == TRUE;}
|
||||
void Unlock() {LeaveCriticalSection(&m_cs);}
|
||||
};
|
||||
|
||||
class GSEvent
|
||||
class GSEvent : public IGSEvent
|
||||
{
|
||||
protected:
|
||||
HANDLE m_hEvent;
|
||||
|
||||
public:
|
||||
GSEvent(bool manual = false, bool initial = false) {m_hEvent = CreateEvent(NULL, manual, initial, NULL);}
|
||||
GSEvent() {m_hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);}
|
||||
~GSEvent() {CloseHandle(m_hEvent);}
|
||||
|
||||
void Set() {SetEvent(m_hEvent);}
|
||||
void Reset() {ResetEvent(m_hEvent);}
|
||||
bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;}
|
||||
bool Wait(IGSLock* l) {if(l) l->Unlock(); bool b = WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0; if(l) l->Lock(); return b;}
|
||||
};
|
||||
|
||||
class GSCondVarLock
|
||||
class GSCondVarLock : public IGSLock
|
||||
{
|
||||
SRWLOCK m_lock;
|
||||
|
||||
|
@ -92,12 +120,13 @@ public:
|
|||
GSCondVarLock() {pInitializeSRWLock(&m_lock);}
|
||||
|
||||
void Lock() {pAcquireSRWLockExclusive(&m_lock);}
|
||||
bool TryLock() {return pTryAcquireSRWLockExclusive(&m_lock) == TRUE;}
|
||||
void Unlock() {pReleaseSRWLockExclusive(&m_lock);}
|
||||
|
||||
|
||||
operator SRWLOCK* () {return &m_lock;}
|
||||
};
|
||||
|
||||
class GSCondVar
|
||||
class GSCondVar : public IGSEvent
|
||||
{
|
||||
CONDITION_VARIABLE m_cv;
|
||||
|
||||
|
@ -105,7 +134,7 @@ public:
|
|||
GSCondVar() {pInitializeConditionVariable(&m_cv);}
|
||||
|
||||
void Set() {pWakeConditionVariable(&m_cv);}
|
||||
void Wait(GSCondVarLock& lock) {pSleepConditionVariableSRW(&m_cv, lock, INFINITE, 0);}
|
||||
bool Wait(IGSLock* l) {return pSleepConditionVariableSRW(&m_cv, *(GSCondVarLock*)l, INFINITE, 0) != 0;}
|
||||
|
||||
operator CONDITION_VARIABLE* () {return &m_cv;}
|
||||
};
|
||||
|
@ -114,9 +143,8 @@ public:
|
|||
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include "GSdx.h"
|
||||
|
||||
class GSThread
|
||||
class GSThread : public IGSThread
|
||||
{
|
||||
pthread_attr_t m_thread_attr;
|
||||
pthread_t m_thread;
|
||||
|
@ -124,8 +152,6 @@ class GSThread
|
|||
static void* StaticThreadProc(void* param);
|
||||
|
||||
protected:
|
||||
virtual void ThreadProc() = 0;
|
||||
|
||||
void CreateThread();
|
||||
void CloseThread();
|
||||
|
||||
|
@ -134,16 +160,16 @@ public:
|
|||
virtual ~GSThread();
|
||||
};
|
||||
|
||||
class GSCritSec
|
||||
class GSCritSec : public IGSLock
|
||||
{
|
||||
pthread_mutexattr_t m_mutex_attr;
|
||||
pthread_mutex_t m_mutex;
|
||||
|
||||
public:
|
||||
GSCritSec()
|
||||
GSCritSec(bool recursive = true)
|
||||
{
|
||||
pthread_mutexattr_init(&m_mutex_attr);
|
||||
pthread_mutexattr_settype(&m_mutex_attr, PTHREAD_MUTEX_RECURSIVE);
|
||||
pthread_mutexattr_settype(&m_mutex_attr, recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL);
|
||||
pthread_mutex_init(&m_mutex, &m_mutex_attr);
|
||||
}
|
||||
|
||||
|
@ -158,7 +184,7 @@ public:
|
|||
void Unlock() {pthread_mutex_unlock(&m_mutex);}
|
||||
};
|
||||
|
||||
class GSEvent
|
||||
class GSEvent : public IGSEvent
|
||||
{
|
||||
protected:
|
||||
sem_t m_sem;
|
||||
|
@ -168,35 +194,18 @@ public:
|
|||
~GSEvent() {sem_destroy(&m_sem);}
|
||||
|
||||
void Set() {sem_post(&m_sem);}
|
||||
bool Wait() {return sem_wait(&m_sem) == 0;}
|
||||
bool Wait(IGSLock* l) {if(l) l->Unlock(); bool b = sem_wait(&m_sem) == 0; if(l) l->Lock(); return b;}
|
||||
};
|
||||
|
||||
// Note except the mutex attribute the code is same as GSCritSec object
|
||||
class GSCondVarLock
|
||||
class GSCondVarLock : public GSCritSec
|
||||
{
|
||||
pthread_mutexattr_t m_mutex_attr;
|
||||
pthread_mutex_t m_mutex;
|
||||
|
||||
public:
|
||||
GSCondVarLock()
|
||||
GSCondVarLock() : GSCritSec(false)
|
||||
{
|
||||
pthread_mutexattr_init(&m_mutex_attr);
|
||||
pthread_mutexattr_settype(&m_mutex_attr, PTHREAD_MUTEX_NORMAL);
|
||||
pthread_mutex_init(&m_mutex, &m_mutex_attr);
|
||||
}
|
||||
virtual ~GSCondVarLock()
|
||||
{
|
||||
pthread_mutex_destroy(&m_mutex);
|
||||
pthread_mutexattr_destroy(&m_mutex_attr);
|
||||
}
|
||||
|
||||
void Lock() {pthread_mutex_lock(&m_mutex);}
|
||||
void Unlock() {pthread_mutex_unlock(&m_mutex);}
|
||||
|
||||
operator pthread_mutex_t* () {return &m_mutex;}
|
||||
};
|
||||
|
||||
class GSCondVar
|
||||
class GSCondVar : public IGSEvent
|
||||
{
|
||||
pthread_cond_t m_cv;
|
||||
pthread_condattr_t m_cv_attr;
|
||||
|
@ -207,6 +216,7 @@ public:
|
|||
pthread_condattr_init(&m_cv_attr);
|
||||
pthread_cond_init(&m_cv, &m_cv_attr);
|
||||
}
|
||||
|
||||
virtual ~GSCondVar()
|
||||
{
|
||||
pthread_condattr_destroy(&m_cv_attr);
|
||||
|
@ -214,7 +224,7 @@ public:
|
|||
}
|
||||
|
||||
void Set() {pthread_cond_signal(&m_cv);}
|
||||
void Wait(GSCondVarLock& lock) {pthread_cond_wait(&m_cv, lock);}
|
||||
bool Wait(IGSLock* l) {pthread_cond_wait(&m_cv, *(GSCondVarLock*)l) == 0;}
|
||||
|
||||
operator pthread_cond_t* () {return &m_cv;}
|
||||
};
|
||||
|
@ -223,102 +233,49 @@ public:
|
|||
|
||||
class GSAutoLock
|
||||
{
|
||||
protected:
|
||||
GSCritSec* m_cs;
|
||||
IGSLock* m_lock;
|
||||
|
||||
public:
|
||||
GSAutoLock(GSCritSec* cs) {m_cs = cs; m_cs->Lock();}
|
||||
~GSAutoLock() {m_cs->Unlock();}
|
||||
};
|
||||
|
||||
class GSEventSpin
|
||||
{
|
||||
protected:
|
||||
volatile long m_sync;
|
||||
volatile bool m_manual;
|
||||
|
||||
public:
|
||||
GSEventSpin(bool manual = false, bool initial = false) {m_sync = initial ? 1 : 0; m_manual = manual;}
|
||||
~GSEventSpin() {}
|
||||
|
||||
void Set() {_interlockedbittestandset(&m_sync, 0);}
|
||||
void Reset() {_interlockedbittestandreset(&m_sync, 0);}
|
||||
bool Wait()
|
||||
{
|
||||
if(m_manual) while(!m_sync) _mm_pause();
|
||||
else while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause();
|
||||
return true;
|
||||
}
|
||||
GSAutoLock(IGSLock* l) {(m_lock = l)->Lock();}
|
||||
~GSAutoLock() {m_lock->Unlock();}
|
||||
};
|
||||
|
||||
template<class T> class GSJobQueue : private GSThread
|
||||
{
|
||||
protected:
|
||||
int m_count;
|
||||
queue<T> m_queue;
|
||||
volatile long m_count; // NOTE: it is the safest to have our own counter because m_queue.pop() might decrement its own before the last item runs out of its scope and gets destroyed (implementation dependent)
|
||||
volatile bool m_exit;
|
||||
struct {GSCritSec lock; GSEvent notempty; volatile long count;} m_ev;
|
||||
struct {GSCondVar notempty, empty; GSCondVarLock lock; bool available;} m_cv;
|
||||
IGSEvent* m_notempty;
|
||||
IGSEvent* m_empty;
|
||||
IGSLock* m_lock;
|
||||
|
||||
void ThreadProc()
|
||||
{
|
||||
if(m_cv.available)
|
||||
m_lock->Lock();
|
||||
|
||||
while(true)
|
||||
{
|
||||
m_cv.lock.Lock();
|
||||
|
||||
while(true)
|
||||
while(m_queue.empty())
|
||||
{
|
||||
while(m_queue.empty())
|
||||
{
|
||||
m_cv.notempty.Wait(m_cv.lock);
|
||||
m_notempty->Wait(m_lock);
|
||||
|
||||
if(m_exit) {m_cv.lock.Unlock(); return;}
|
||||
}
|
||||
|
||||
T& item = m_queue.front();
|
||||
|
||||
m_cv.lock.Unlock();
|
||||
|
||||
Process(item);
|
||||
|
||||
m_cv.lock.Lock();
|
||||
|
||||
m_queue.pop();
|
||||
|
||||
if(m_queue.empty())
|
||||
{
|
||||
m_cv.empty.Set();
|
||||
}
|
||||
if(m_exit) {m_lock->Unlock(); return;}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_ev.lock.Lock();
|
||||
|
||||
while(true)
|
||||
T& item = m_queue.front();
|
||||
|
||||
m_lock->Unlock();
|
||||
|
||||
Process(item);
|
||||
|
||||
m_lock->Lock();
|
||||
|
||||
m_queue.pop();
|
||||
|
||||
if(--m_count == 0)
|
||||
{
|
||||
while(m_queue.empty())
|
||||
{
|
||||
m_ev.lock.Unlock();
|
||||
|
||||
m_ev.notempty.Wait();
|
||||
|
||||
if(m_exit) {return;}
|
||||
|
||||
m_ev.lock.Lock();
|
||||
}
|
||||
|
||||
T& item = m_queue.front();
|
||||
|
||||
m_ev.lock.Unlock();
|
||||
|
||||
Process(item);
|
||||
|
||||
m_ev.lock.Lock();
|
||||
|
||||
m_queue.pop();
|
||||
|
||||
_InterlockedDecrement(&m_ev.count);
|
||||
m_empty->Set();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -328,19 +285,30 @@ public:
|
|||
: m_count(0)
|
||||
, m_exit(false)
|
||||
{
|
||||
m_ev.count = 0;
|
||||
bool condvar = !!theApp.GetConfig("condvar", 1);
|
||||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
m_cv.available = pInitializeConditionVariable != NULL;
|
||||
|
||||
#elif defined(_LINUX)
|
||||
|
||||
//m_cv.available = true;
|
||||
m_cv.available = !!theApp.GetConfig("condvar", 1);
|
||||
if(pInitializeConditionVariable == NULL)
|
||||
{
|
||||
condvar = false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if(condvar)
|
||||
{
|
||||
m_notempty = new GSCondVar();
|
||||
m_empty = new GSCondVar();
|
||||
m_lock = new GSCondVarLock();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_notempty = new GSEvent();
|
||||
m_empty = new GSEvent();
|
||||
m_lock = new GSCritSec();
|
||||
}
|
||||
|
||||
CreateThread();
|
||||
}
|
||||
|
||||
|
@ -348,68 +316,51 @@ public:
|
|||
{
|
||||
m_exit = true;
|
||||
|
||||
if(m_cv.available)
|
||||
{
|
||||
m_cv.notempty.Set();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_ev.notempty.Set();
|
||||
}
|
||||
m_notempty->Set();
|
||||
|
||||
CloseThread();
|
||||
|
||||
delete m_notempty;
|
||||
delete m_empty;
|
||||
delete m_lock;
|
||||
}
|
||||
|
||||
int GetCount() const
|
||||
bool IsEmpty() const
|
||||
{
|
||||
return m_count;
|
||||
ASSERT(m_count >= 0);
|
||||
|
||||
return m_count == 0;
|
||||
}
|
||||
|
||||
virtual void Push(const T& item)
|
||||
void Push(const T& item)
|
||||
{
|
||||
if(m_cv.available)
|
||||
{
|
||||
m_cv.lock.Lock();
|
||||
m_lock->Lock();
|
||||
|
||||
m_queue.push(item);
|
||||
m_queue.push(item);
|
||||
|
||||
m_cv.lock.Unlock();
|
||||
|
||||
m_cv.notempty.Set();
|
||||
}
|
||||
else
|
||||
if(m_count++ == 0)
|
||||
{
|
||||
GSAutoLock l(&m_ev.lock);
|
||||
|
||||
m_queue.push(item);
|
||||
|
||||
_InterlockedIncrement(&m_ev.count);
|
||||
|
||||
m_ev.notempty.Set();
|
||||
m_notempty->Set();
|
||||
}
|
||||
|
||||
m_count++;
|
||||
m_lock->Unlock();
|
||||
}
|
||||
|
||||
virtual void Wait()
|
||||
void Wait()
|
||||
{
|
||||
if(m_cv.available)
|
||||
if(m_count > 0)
|
||||
{
|
||||
m_cv.lock.Lock();
|
||||
m_lock->Lock();
|
||||
|
||||
while(!m_queue.empty())
|
||||
while(m_count != 0)
|
||||
{
|
||||
m_cv.empty.Wait(m_cv.lock);
|
||||
m_empty->Wait(m_lock);
|
||||
}
|
||||
|
||||
m_cv.lock.Unlock();
|
||||
ASSERT(m_queue.empty());
|
||||
|
||||
m_lock->Unlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
// NOTE: it is the safest to have our own counter because m_queue.pop() might decrement its own before the last item runs out of its scope and gets destroyed (implementation dependent)
|
||||
|
||||
while(m_ev.count > 0) _mm_pause();
|
||||
}
|
||||
|
||||
m_count++;
|
||||
}
|
||||
|
||||
virtual void Process(T& item) = 0;
|
||||
|
|
|
@ -22,6 +22,48 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSVector.h"
|
||||
|
||||
const GSVector4i GSVector4i::m_xff[17] =
|
||||
{
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
};
|
||||
|
||||
const GSVector4i GSVector4i::m_x0f[17] =
|
||||
{
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
};
|
||||
|
||||
const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
|
||||
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
|
||||
const GSVector4 GSVector4::m_half(0.5f);
|
||||
|
|
|
@ -79,6 +79,9 @@ class GSVector4;
|
|||
|
||||
__aligned(class, 16) GSVector4i
|
||||
{
|
||||
static const GSVector4i m_xff[17];
|
||||
static const GSVector4i m_x0f[17];
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
|
@ -2343,6 +2346,9 @@ public:
|
|||
__forceinline static GSVector4i xfff8(const GSVector4i& v) {return xffffffff(v).sll16( 3);}
|
||||
__forceinline static GSVector4i xfffc(const GSVector4i& v) {return xffffffff(v).sll16( 2);}
|
||||
__forceinline static GSVector4i xfffe(const GSVector4i& v) {return xffffffff(v).sll16( 1);}
|
||||
|
||||
__forceinline static GSVector4i xff(int n) {return m_xff[n];}
|
||||
__forceinline static GSVector4i x0f(int n) {return m_x0f[n];}
|
||||
};
|
||||
|
||||
__aligned(class, 16) GSVector4
|
||||
|
@ -2909,6 +2915,11 @@ public:
|
|||
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));
|
||||
}
|
||||
|
||||
__forceinline static void storent(void* p, const GSVector4& v)
|
||||
{
|
||||
_mm_stream_ps((float*)p, v.m);
|
||||
}
|
||||
|
||||
__forceinline static void storel(void* p, const GSVector4& v)
|
||||
{
|
||||
_mm_store_sd((double*)p, _mm_castps_pd(v.m));
|
||||
|
|
|
@ -37,7 +37,8 @@ __aligned(struct, 32) GSVertex
|
|||
GIFRegST ST;
|
||||
GIFRegRGBAQ RGBAQ;
|
||||
GIFRegXYZ XYZ;
|
||||
uint32 UV, FOG;
|
||||
union {uint32 UV; struct {uint16 U, V;};};
|
||||
uint32 FOG;
|
||||
};
|
||||
|
||||
__m128i m[2];
|
||||
|
|
|
@ -37,45 +37,4 @@ __aligned(struct, 32) GSVertexHW9
|
|||
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;}
|
||||
};
|
||||
|
||||
__aligned(union, 32) GSVertexHW11
|
||||
{
|
||||
struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct {float x, y;} t;
|
||||
GIFRegST ST;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
union {struct {uint8 r, g, b, a; float q;}; uint32 c0;};
|
||||
GIFRegRGBAQ RGBAQ;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
struct {union {struct {uint16 x, y;}; uint32 xy;}; uint32 z;} p;
|
||||
GIFRegXYZ XYZ;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
struct {uint32 _pad; union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;};};
|
||||
GIFRegFOG FOG;
|
||||
};
|
||||
};
|
||||
|
||||
GSVertexHW11& operator = (GSVertexHW11& v)
|
||||
{
|
||||
GSVector4i* RESTRICT src = (GSVector4i*)&v;
|
||||
GSVector4i* RESTRICT dst = (GSVector4i*)this;
|
||||
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
#pragma pack(pop)
|
||||
|
|
|
@ -29,10 +29,38 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
|
|||
GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||
: m_state(state)
|
||||
{
|
||||
#define InitUpdate3(P, IIP, TME, FST, COLOR) \
|
||||
m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR>;
|
||||
|
||||
#define InitUpdate2(P, IIP, TME) \
|
||||
InitUpdate3(P, IIP, TME, 0, 0) \
|
||||
InitUpdate3(P, IIP, TME, 0, 1) \
|
||||
InitUpdate3(P, IIP, TME, 1, 0) \
|
||||
InitUpdate3(P, IIP, TME, 1, 1) \
|
||||
|
||||
#define InitUpdate(P) \
|
||||
InitUpdate2(P, 0, 0) \
|
||||
InitUpdate2(P, 0, 1) \
|
||||
InitUpdate2(P, 1, 0) \
|
||||
InitUpdate2(P, 1, 1) \
|
||||
|
||||
InitUpdate(GS_POINT_CLASS);
|
||||
InitUpdate(GS_LINE_CLASS);
|
||||
InitUpdate(GS_TRIANGLE_CLASS);
|
||||
InitUpdate(GS_SPRITE_CLASS);
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_primclass = primclass;
|
||||
|
||||
uint32 iip = m_state->PRIM->IIP;
|
||||
uint32 tme = m_state->PRIM->TME;
|
||||
uint32 fst = m_state->PRIM->FST;
|
||||
uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
|
||||
|
||||
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, count);
|
||||
|
||||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
@ -82,90 +110,350 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, G
|
|||
}
|
||||
}
|
||||
|
||||
uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
|
||||
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
||||
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
|
||||
{
|
||||
m_primclass = primclass;
|
||||
|
||||
uint32 hash = m_primclass | (m_state->PRIM->IIP << 2) | (m_state->PRIM->TME << 3) | (m_state->PRIM->FST << 4);
|
||||
|
||||
if(!(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC))
|
||||
{
|
||||
hash |= 1 << 5;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
GSVertexTraceSW::GSVertexTraceSW(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceSW", NULL)
|
||||
{
|
||||
}
|
||||
|
||||
void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
}
|
||||
|
||||
GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceHW9", NULL)
|
||||
{
|
||||
}
|
||||
|
||||
void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
|
||||
GSVector4 o(context->XYOFFSET);
|
||||
GSVector4 s(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
|
||||
int n = 1;
|
||||
|
||||
m_min.p = (m_min.p - o) * s;
|
||||
m_max.p = (m_max.p - o) * s;
|
||||
|
||||
if(m_state->PRIM->TME)
|
||||
switch(primclass)
|
||||
{
|
||||
if(m_state->PRIM->FST)
|
||||
{
|
||||
s = GSVector4(1 << (16 - 4), 1).xxyy();
|
||||
}
|
||||
else
|
||||
{
|
||||
s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1);
|
||||
}
|
||||
|
||||
m_min.t *= s;
|
||||
m_max.t *= s;
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
}
|
||||
GSVector4 tmin = s_minmax.xxxx();
|
||||
GSVector4 tmax = s_minmax.yyyy();
|
||||
GSVector4i cmin = GSVector4i::xffffffff();
|
||||
GSVector4i cmax = GSVector4i::zero();
|
||||
|
||||
GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceHW11", NULL)
|
||||
{
|
||||
}
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
GSVector4i pmin = GSVector4i::xffffffff();
|
||||
GSVector4i pmax = GSVector4i::zero();
|
||||
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
#else
|
||||
|
||||
GSVector4 pmin = s_minmax.xxxx();
|
||||
GSVector4 pmax = s_minmax.yyyy();
|
||||
|
||||
#endif
|
||||
|
||||
const GSVertex* RESTRICT v = (GSVertex*)vertex;
|
||||
|
||||
for(int i = 0; i < count; i += n)
|
||||
{
|
||||
if(primclass == GS_POINT_CLASS)
|
||||
{
|
||||
GSVector4i c(v[index[i]].m[0]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
cmin = cmin.min_u8(c);
|
||||
cmax = cmax.max_u8(c);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
GSVector4 stq = GSVector4::cast(c);
|
||||
|
||||
GSVector4 q = stq.wwww();
|
||||
|
||||
stq = (stq.xyww() * q.rcpnr()).xyww(q);
|
||||
|
||||
tmin = tmin.min(stq);
|
||||
tmax = tmax.max(stq);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv(v[index[i]].m[1]);
|
||||
|
||||
GSVector4 st = GSVector4(uv.uph16()).xyxy();
|
||||
|
||||
tmin = tmin.min(st);
|
||||
tmax = tmax.max(st);
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf(v[index[i]].m[1]);
|
||||
|
||||
GSVector4i xy = xyzf.upl16();
|
||||
GSVector4i z = xyzf.yyyy();
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf));
|
||||
|
||||
pmin = pmin.min_u32(p);
|
||||
pmax = pmax.max_u32(p);
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww())));
|
||||
|
||||
pmin = pmin.min(p);
|
||||
pmax = pmax.max(p);
|
||||
|
||||
#endif
|
||||
}
|
||||
else if(primclass == GS_LINE_CLASS)
|
||||
{
|
||||
GSVector4i c0(v[index[i + 0]].m[0]);
|
||||
GSVector4i c1(v[index[i + 1]].m[0]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
if(iip)
|
||||
{
|
||||
cmin = cmin.min_u8(c0.min_u8(c1));
|
||||
cmax = cmax.max_u8(c0.max_u8(c1));
|
||||
}
|
||||
else
|
||||
{
|
||||
cmin = cmin.min_u8(c1);
|
||||
cmax = cmax.max_u8(c1);
|
||||
}
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
GSVector4 stq0 = GSVector4::cast(c0);
|
||||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
|
||||
GSVector4 q = stq0.wwww(stq1).rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1);
|
||||
|
||||
tmin = tmin.min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq0.max(stq1));
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv0(v[index[i + 0]].m[1]);
|
||||
GSVector4i uv1(v[index[i + 1]].m[1]);
|
||||
|
||||
GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
|
||||
GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();
|
||||
|
||||
tmin = tmin.min(st0.min(st1));
|
||||
tmax = tmax.max(st0.max(st1));
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf0(v[index[i + 0]].m[1]);
|
||||
GSVector4i xyzf1(v[index[i + 1]].m[1]);
|
||||
|
||||
GSVector4i xy0 = xyzf0.upl16();
|
||||
GSVector4i z0 = xyzf0.yyyy();
|
||||
GSVector4i xy1 = xyzf1.upl16();
|
||||
GSVector4i z1 = xyzf1.yyyy();
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
|
||||
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
|
||||
|
||||
pmin = pmin.min_u32(p0.min_u32(p1));
|
||||
pmax = pmax.max_u32(p0.max_u32(p1));
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
|
||||
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
|
||||
|
||||
pmin = pmin.min(p0.min(p1));
|
||||
pmax = pmax.max(p0.max(p1));
|
||||
|
||||
#endif
|
||||
}
|
||||
else if(primclass == GS_TRIANGLE_CLASS)
|
||||
{
|
||||
GSVector4i c0(v[index[i + 0]].m[0]);
|
||||
GSVector4i c1(v[index[i + 1]].m[0]);
|
||||
GSVector4i c2(v[index[i + 2]].m[0]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
if(iip)
|
||||
{
|
||||
cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1));
|
||||
cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1));
|
||||
}
|
||||
else
|
||||
{
|
||||
cmin = cmin.min_u8(c2);
|
||||
cmax = cmax.max_u8(c2);
|
||||
}
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
GSVector4 stq0 = GSVector4::cast(c0);
|
||||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
GSVector4 stq2 = GSVector4::cast(c2);
|
||||
|
||||
GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1);
|
||||
stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2);
|
||||
|
||||
tmin = tmin.min(stq2).min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq2).max(stq0.max(stq1));
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv0(v[index[i + 0]].m[1]);
|
||||
GSVector4i uv1(v[index[i + 1]].m[1]);
|
||||
GSVector4i uv2(v[index[i + 2]].m[1]);
|
||||
|
||||
GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
|
||||
GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();
|
||||
GSVector4 st2 = GSVector4(uv2.uph16()).xyxy();
|
||||
|
||||
tmin = tmin.min(st2).min(st0.min(st1));
|
||||
tmax = tmax.max(st2).max(st0.max(st1));
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf0(v[index[i + 0]].m[1]);
|
||||
GSVector4i xyzf1(v[index[i + 1]].m[1]);
|
||||
GSVector4i xyzf2(v[index[i + 2]].m[1]);
|
||||
|
||||
GSVector4i xy0 = xyzf0.upl16();
|
||||
GSVector4i z0 = xyzf0.yyyy();
|
||||
GSVector4i xy1 = xyzf1.upl16();
|
||||
GSVector4i z1 = xyzf1.yyyy();
|
||||
GSVector4i xy2 = xyzf2.upl16();
|
||||
GSVector4i z2 = xyzf2.yyyy();
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
|
||||
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
|
||||
GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2));
|
||||
|
||||
pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1));
|
||||
pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1));
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
|
||||
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
|
||||
GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww())));
|
||||
|
||||
pmin = pmin.min(p2).min(p0.min(p1));
|
||||
pmax = pmax.max(p2).max(p0.max(p1));
|
||||
|
||||
#endif
|
||||
}
|
||||
else if(primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
GSVector4i c0(v[index[i + 0]].m[0]);
|
||||
GSVector4i c1(v[index[i + 1]].m[0]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
if(iip)
|
||||
{
|
||||
cmin = cmin.min_u8(c0.min_u8(c1));
|
||||
cmax = cmax.max_u8(c0.max_u8(c1));
|
||||
}
|
||||
else
|
||||
{
|
||||
cmin = cmin.min_u8(c1);
|
||||
cmax = cmax.max_u8(c1);
|
||||
}
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
GSVector4 stq0 = GSVector4::cast(c0);
|
||||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
|
||||
GSVector4 q = stq1.wwww().rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q).xyww(stq1);
|
||||
stq1 = (stq1.xyww() * q).xyww(stq1);
|
||||
|
||||
tmin = tmin.min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq0.max(stq1));
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv0(v[index[i + 0]].m[1]);
|
||||
GSVector4i uv1(v[index[i + 1]].m[1]);
|
||||
|
||||
GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
|
||||
GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();
|
||||
|
||||
tmin = tmin.min(st0.min(st1));
|
||||
tmax = tmax.max(st0.max(st1));
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf0(v[index[i + 0]].m[1]);
|
||||
GSVector4i xyzf1(v[index[i + 1]].m[1]);
|
||||
|
||||
GSVector4i xy0 = xyzf0.upl16();
|
||||
GSVector4i z0 = xyzf0.yyyy();
|
||||
GSVector4i xy1 = xyzf1.upl16();
|
||||
GSVector4i z1 = xyzf1.yyyy();
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1));
|
||||
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
|
||||
|
||||
pmin = pmin.min_u32(p0.min_u32(p1));
|
||||
pmax = pmax.max_u32(p0.max_u32(p1));
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww())));
|
||||
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
|
||||
|
||||
pmin = pmin.min(p0.min(p1));
|
||||
pmax = pmax.max(p0.max(p1));
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
pmin = pmin.blend16<0x30>(pmin.srl32(1));
|
||||
pmax = pmax.blend16<0x30>(pmax.srl32(1));
|
||||
|
||||
#endif
|
||||
|
||||
GSVector4 o(context->XYOFFSET);
|
||||
GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);
|
||||
|
||||
m_min.p = (m_min.p - o) * s;
|
||||
m_max.p = (m_max.p - o) * s;
|
||||
m_min.p = (GSVector4(pmin) - o) * s;
|
||||
m_max.p = (GSVector4(pmax) - o) * s;
|
||||
|
||||
if(m_state->PRIM->TME)
|
||||
if(tme)
|
||||
{
|
||||
if(m_state->PRIM->FST)
|
||||
if(fst)
|
||||
{
|
||||
s = GSVector4(1 << (16 - 4), 1).xxyy();
|
||||
}
|
||||
|
@ -174,10 +462,23 @@ void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int coun
|
|||
s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1);
|
||||
}
|
||||
|
||||
m_min.t *= s;
|
||||
m_max.t *= s;
|
||||
m_min.t = tmin * s;
|
||||
m_max.t = tmax * s;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_min.t = GSVector4::zero();
|
||||
m_max.t = GSVector4::zero();
|
||||
}
|
||||
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
if(color)
|
||||
{
|
||||
m_min.c = cmin.zzzz().u8to32();
|
||||
m_max.c = cmax.zzzz().u8to32();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_min.c = GSVector4i::zero();
|
||||
m_max.c = GSVector4i::zero();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -38,12 +38,15 @@ public:
|
|||
protected:
|
||||
const GSState* m_state;
|
||||
|
||||
uint32 Hash(GS_PRIM_CLASS primclass);
|
||||
|
||||
typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max);
|
||||
|
||||
static const GSVector4 s_minmax;
|
||||
|
||||
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
|
||||
|
||||
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
||||
|
||||
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
||||
void FindMinMax(const void* vertex, const uint32* index, int count);
|
||||
|
||||
public:
|
||||
GS_PRIM_CLASS m_primclass;
|
||||
|
||||
|
@ -69,55 +72,7 @@ public:
|
|||
GSVertexTrace(const GSState* state);
|
||||
virtual ~GSVertexTrace() {}
|
||||
|
||||
virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
|
||||
bool IsLinear() const {return m_filter.linear;}
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceSW(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceDX9(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceDX11(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
|
|
@ -1,496 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexTrace.h"
|
||||
|
||||
#if _M_SSE >= 0x500 && (defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
vbroadcastss(xmm4, ptr[rax + 0]);
|
||||
vbroadcastss(xmm5, ptr[rax + 4]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm2, xmm4);
|
||||
vmovaps(xmm3, xmm5);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
vmovaps(xmm1, ptr[rdx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
|
||||
|
||||
vminps(xmm2, xmm0);
|
||||
vmaxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
if(primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
vdivps(xmm0, xmm1);
|
||||
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
}
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexSW));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vpsrld(xmm2, 7);
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
|
||||
vcvttps2dq(xmm3, xmm3);
|
||||
vpsrld(xmm3, 7);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 6;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
vbroadcastss(xmm4, ptr[rax + 0]);
|
||||
vbroadcastss(xmm5, ptr[rax + 4]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
vmovaps(xmm1, ptr[rdx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
||||
if(tme && !fst && primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.min_u8(v[i + j].c);
|
||||
// max.c = max.c.min_u8(v[i + j].c);
|
||||
|
||||
vpminub(xmm2, xmm0);
|
||||
vpmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
// t /= p.wwww();
|
||||
|
||||
vdivps(xmm0, xmm1);
|
||||
}
|
||||
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexHW9));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm2, xmm2);
|
||||
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm3, xmm3);
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin.xyww(pmin);
|
||||
// m_max.t = tmax.xyww(pmax);
|
||||
|
||||
vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
vbroadcastss(xmm4, ptr[rax + 0]);
|
||||
vbroadcastss(xmm5, ptr[rax + 4]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW11)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
vpminub(xmm2, xmm0);
|
||||
vpmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
vmovaps(xmm1, xmm0);
|
||||
}
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
vdivps(xmm0, xmm1);
|
||||
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
|
||||
}
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
vmovdqa(xmm0, ptr[rdx + j * sizeof(GSVertexHW11) + 16]);
|
||||
vpmovzxwd(xmm1, xmm0);
|
||||
|
||||
vpsrld(xmm0, 1);
|
||||
vpunpcklqdq(xmm1, xmm0);
|
||||
vcvtdq2ps(xmm1, xmm1);
|
||||
|
||||
vminps(xmm4, xmm1);
|
||||
vmaxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexHW11));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm2, xmm2);
|
||||
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm3, xmm3);
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
// m_max.p = pmax.xyww();
|
||||
|
||||
vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,543 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexTrace.h"
|
||||
|
||||
#if _M_SSE < 0x500 && (defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
movss(xmm4, ptr[rax + 0]);
|
||||
movss(xmm5, ptr[rax + 4]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
movaps(xmm2, xmm4);
|
||||
movaps(xmm3, xmm5);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, ptr[rdx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
|
||||
|
||||
minps(xmm2, xmm0);
|
||||
maxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
if(primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
divps(xmm0, xmm1);
|
||||
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
}
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexSW));
|
||||
sub(rcx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
psrld(xmm2, 7);
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
psrld(xmm3, 7);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 6;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
movss(xmm4, ptr[rax + 0]);
|
||||
movss(xmm5, ptr[rax + 16]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
pxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, ptr[rdx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme && !fst && primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.min_u8(v[i + j].c);
|
||||
// max.c = max.c.min_u8(v[i + j].c);
|
||||
|
||||
pminub(xmm2, xmm0);
|
||||
pmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
// t /= p.wwww();
|
||||
|
||||
divps(xmm0, xmm1);
|
||||
}
|
||||
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexHW9));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm2, xmm2);
|
||||
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm3, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
|
||||
punpckhbw(xmm2, xmm0);
|
||||
punpcklwd(xmm2, xmm0);
|
||||
|
||||
punpckhbw(xmm3, xmm0);
|
||||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin.xyww(pmin);
|
||||
// m_max.t = tmax.xyww(pmax);
|
||||
|
||||
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
movss(xmm4, ptr[rax + 0]);
|
||||
movss(xmm5, ptr[rax + 16]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
pxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW11)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
pminub(xmm2, xmm0);
|
||||
pmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
}
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
divps(xmm0, xmm1);
|
||||
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
|
||||
}
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
movdqa(xmm0, ptr[rdx + j * sizeof(GSVertexHW11) + 16]);
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pmovzxwd(xmm1, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
movdqa(xmm1, xmm0);
|
||||
punpcklwd(xmm1, xmm1);
|
||||
psrld(xmm1, 16);
|
||||
}
|
||||
|
||||
psrld(xmm0, 1);
|
||||
punpcklqdq(xmm1, xmm0);
|
||||
cvtdq2ps(xmm1, xmm1);
|
||||
|
||||
minps(xmm4, xmm1);
|
||||
maxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexHW11));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm2, xmm2);
|
||||
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm3, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
|
||||
punpckhbw(xmm2, xmm0);
|
||||
punpcklwd(xmm2, xmm0);
|
||||
|
||||
punpckhbw(xmm3, xmm0);
|
||||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
// m_max.p = pmax.xyww();
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,513 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexTrace.h"
|
||||
|
||||
#if _M_SSE >= 0x500 && !(defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
static const int _args = 4;
|
||||
static const int _count = _args + 4; // rcx
|
||||
static const int _vertex = _args + 8; // rdx
|
||||
static const int _index = _args + 12; // r8
|
||||
static const int _min = _args + 16; // r9
|
||||
static const int _max = _args + 20; // _args + 4
|
||||
|
||||
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&s_minmax.x]);
|
||||
vbroadcastss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm2, xmm4);
|
||||
vmovaps(xmm3, xmm5);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
|
||||
|
||||
vminps(xmm2, xmm0);
|
||||
vmaxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
if(primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
vdivps(xmm0, xmm1);
|
||||
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
}
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vpsrld(xmm2, 7);
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
|
||||
vcvttps2dq(xmm3, xmm3);
|
||||
vpsrld(xmm3, 7);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&s_minmax.x]);
|
||||
vbroadcastss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
||||
if(tme && !fst && primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.min_u8(v[i + j].c);
|
||||
// max.c = max.c.min_u8(v[i + j].c);
|
||||
|
||||
vpminub(xmm2, xmm0);
|
||||
vpmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
// t /= p.wwww();
|
||||
|
||||
vdivps(xmm0, xmm1);
|
||||
}
|
||||
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm2, xmm2);
|
||||
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm3, xmm3);
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin.xyww(pmin);
|
||||
// m_max.t = tmax.xyww(pmax);
|
||||
|
||||
vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&s_minmax.x]);
|
||||
vbroadcastss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW11)
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[edx + eax]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
vpminub(xmm2, xmm0);
|
||||
vpmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
vmovaps(xmm1, xmm0);
|
||||
}
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
vdivps(xmm0, xmm1);
|
||||
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
|
||||
}
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
vmovdqa(xmm0, ptr[edx + eax + 16]);
|
||||
vpmovzxwd(xmm1, xmm0);
|
||||
|
||||
vpsrld(xmm0, 1);
|
||||
vpunpcklqdq(xmm1, xmm0);
|
||||
vcvtdq2ps(xmm1, xmm1);
|
||||
|
||||
vminps(xmm4, xmm1);
|
||||
vmaxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm2, xmm2);
|
||||
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm3, xmm3);
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
// m_max.p = pmax.xyww();
|
||||
|
||||
vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,562 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexTrace.h"
|
||||
|
||||
#if _M_SSE < 0x500 && !(defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
static const int _args = 4;
|
||||
static const int _count = _args + 4; // rcx
|
||||
static const int _vertex = _args + 8; // rdx
|
||||
static const int _index = _args + 12; // r8
|
||||
static const int _min = _args + 16; // r9
|
||||
static const int _max = _args + 20; // _args + 4
|
||||
|
||||
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&s_minmax.x]);
|
||||
movss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
movaps(xmm2, xmm4);
|
||||
movaps(xmm3, xmm5);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
|
||||
|
||||
minps(xmm2, xmm0);
|
||||
maxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
if(primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
divps(xmm0, xmm1);
|
||||
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
}
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
psrld(xmm2, 7);
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
psrld(xmm3, 7);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 6;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&s_minmax.x]);
|
||||
movss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
pxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme && !fst && primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.min_u8(v[i + j].c);
|
||||
// max.c = max.c.min_u8(v[i + j].c);
|
||||
|
||||
pminub(xmm2, xmm0);
|
||||
pmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
// t /= p.wwww();
|
||||
|
||||
divps(xmm0, xmm1);
|
||||
}
|
||||
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm2, xmm2);
|
||||
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm3, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
|
||||
punpckhbw(xmm2, xmm0);
|
||||
punpcklwd(xmm2, xmm0);
|
||||
|
||||
punpckhbw(xmm3, xmm0);
|
||||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin.xyww(pmin);
|
||||
// m_max.t = tmax.xyww(pmax);
|
||||
|
||||
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&s_minmax.x]);
|
||||
movss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
pxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW11)
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[edx + eax]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
pminub(xmm2, xmm0);
|
||||
pmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
}
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
divps(xmm0, xmm1);
|
||||
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
|
||||
}
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
movdqa(xmm0, ptr[edx + eax + 16]);
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pmovzxwd(xmm1, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
movdqa(xmm1, xmm0);
|
||||
punpcklwd(xmm1, xmm1);
|
||||
psrld(xmm1, 16);
|
||||
}
|
||||
|
||||
psrld(xmm0, 1);
|
||||
punpcklqdq(xmm1, xmm0);
|
||||
cvtdq2ps(xmm1, xmm1);
|
||||
|
||||
minps(xmm4, xmm1);
|
||||
maxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm2, xmm2);
|
||||
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm3, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
|
||||
punpckhbw(xmm2, xmm0);
|
||||
punpcklwd(xmm2, xmm0);
|
||||
|
||||
punpckhbw(xmm3, xmm0);
|
||||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
// m_max.p = pmax.xyww();
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -618,62 +618,6 @@
|
|||
<ClCompile Include="GSVertexList.cpp" />
|
||||
<ClCompile Include="GSVertexSW.cpp" />
|
||||
<ClCompile Include="GSVertexTrace.cpp" />
|
||||
<ClCompile Include="GSVertexTrace.x64.avx.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x64.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x86.avx.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x86.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSWnd.cpp" />
|
||||
<ClCompile Include="stdafx.cpp">
|
||||
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">Create</PrecompiledHeader>
|
||||
|
|
|
@ -288,18 +288,6 @@
|
|||
<ClCompile Include="GSDeviceSDL.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x64.avx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x64.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x86.avx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x86.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSSetupPrimCodeGenerator.x64.avx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -1024,6 +1024,10 @@
|
|||
RelativePath=".\GSRenderer.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSRendererCS.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSRendererDX.cpp"
|
||||
>
|
||||
|
@ -1240,110 +1244,6 @@
|
|||
RelativePath=".\GSVertexTrace.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSVertexTrace.x64.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug SSE2|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSE2|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSSE3|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug SSSE3|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug SSE4|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSE4|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSVertexTrace.x86.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug SSE2|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSE2|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSSE3|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug SSSE3|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug SSE4|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSE4|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSWnd.cpp"
|
||||
>
|
||||
|
@ -1630,6 +1530,10 @@
|
|||
RelativePath=".\GSRenderer.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSRendererCS.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSRendererDX.h"
|
||||
>
|
||||
|
|
|
@ -1,73 +1,383 @@
|
|||
struct Vertex
|
||||
#ifndef VS_TME
|
||||
#define VS_TME 1
|
||||
#define VS_FST 1
|
||||
#endif
|
||||
|
||||
#ifndef GS_IIP
|
||||
#define GS_IIP 0
|
||||
#define GS_PRIM 2
|
||||
#endif
|
||||
|
||||
#ifndef PS_BATCH_SIZE
|
||||
#define PS_BATCH_SIZE 2048
|
||||
#define PS_FPSM PSM_PSMCT32
|
||||
#define PS_ZPSM PSM_PSMZ16
|
||||
#endif
|
||||
|
||||
#define PSM_PSMCT32 0
|
||||
#define PSM_PSMCT24 1
|
||||
#define PSM_PSMCT16 2
|
||||
#define PSM_PSMCT16S 10
|
||||
#define PSM_PSMT8 19
|
||||
#define PSM_PSMT4 20
|
||||
#define PSM_PSMT8H 27
|
||||
#define PSM_PSMT4HL 36
|
||||
#define PSM_PSMT4HH 44
|
||||
#define PSM_PSMZ32 48
|
||||
#define PSM_PSMZ24 49
|
||||
#define PSM_PSMZ16 50
|
||||
#define PSM_PSMZ16S 58
|
||||
|
||||
struct VS_INPUT
|
||||
{
|
||||
float2 st;
|
||||
uint c;
|
||||
float q;
|
||||
uint xy, z;
|
||||
uint uv, f;
|
||||
float2 st : TEXCOORD0;
|
||||
float4 c : COLOR0;
|
||||
float q : TEXCOORD1;
|
||||
uint2 p : POSITION0;
|
||||
uint z : POSITION1;
|
||||
uint2 uv : TEXCOORD2;
|
||||
float4 f : COLOR1;
|
||||
};
|
||||
|
||||
struct VS_OUTPUT
|
||||
{
|
||||
float4 p : SV_Position;
|
||||
float2 z : TEXCOORD0;
|
||||
float4 t : TEXCOORD1;
|
||||
float4 c : COLOR0;
|
||||
};
|
||||
|
||||
struct GS_OUTPUT
|
||||
{
|
||||
float4 p : SV_Position;
|
||||
float2 z : TEXCOORD0;
|
||||
float4 t : TEXCOORD1;
|
||||
float4 c : COLOR0;
|
||||
uint id : SV_PrimitiveID;
|
||||
};
|
||||
|
||||
cbuffer VSConstantBuffer : register(c0)
|
||||
{
|
||||
float4 VertexScale;
|
||||
float4 VertexOffset;
|
||||
};
|
||||
|
||||
cbuffer PSConstantBuffer : register(c0)
|
||||
{
|
||||
uint2 WriteMask;
|
||||
};
|
||||
|
||||
struct FragmentLinkItem
|
||||
{
|
||||
uint c, z, id, next;
|
||||
};
|
||||
|
||||
RWByteAddressBuffer VideoMemory : register(u0);
|
||||
RWStructuredBuffer<FragmentLinkItem> FragmentLinkBuffer : register(u1);
|
||||
RWByteAddressBuffer StartOffsetBuffer : register(u2);
|
||||
//RWTexture2D<uint> VideoMemory : register(u2); // 8192 * 512 R8_UINT
|
||||
|
||||
StructuredBuffer<Vertex> VertexBuffer : register(t0);
|
||||
Buffer<uint> IndexBuffer : register(t1);
|
||||
Buffer<int2> FZRowOffset : register(t0);
|
||||
Buffer<int2> FZColOffset : register(t1);
|
||||
Texture2D<float4> Palette : register(t2);
|
||||
Texture2D<float4> Texture : register(t3);
|
||||
|
||||
Buffer<int> FrameRowOffset : register(t2);
|
||||
Buffer<int> FrameColOffset : register(t3);
|
||||
Buffer<int> ZBufRowOffset : register(t4);
|
||||
Buffer<int> ZBufColOffset : register(t5);
|
||||
|
||||
cbuffer DrawingEnvironment : register(c0)
|
||||
VS_OUTPUT vs_main(VS_INPUT input)
|
||||
{
|
||||
// TODO
|
||||
};
|
||||
VS_OUTPUT output;
|
||||
|
||||
// one group is 16x8 pixels and one thread does 2 pixels, otherwise could not read-merge-write 16-bit targets safely
|
||||
// neighburing pixels are next to eachother in memory, at least we don't have to calculate the address twice
|
||||
output.p = float4(input.p, 0.0f, 0.0f) * VertexScale - VertexOffset;
|
||||
output.z = float2(input.z & 0xffff, input.z >> 16); // TODO: min(input.z, 0xffffff00) ?
|
||||
|
||||
// TODO: they say groupshared memory is faster, try unswizzling the corresponding chunk of memory initially (how to do that once by only one thread?) then write-back when finished, unless it was untouched
|
||||
|
||||
[numthreads(8, 8, 1)]
|
||||
void cs_main(uint3 gid : SV_GroupID, uint3 tid : SV_GroupThreadID)
|
||||
{
|
||||
uint count;
|
||||
|
||||
IndexBuffer.GetDimensions(count);
|
||||
|
||||
// #if GS_PRIM == 2 (triangle)
|
||||
|
||||
for(uint i = 0; i < count; i += 3)
|
||||
if(VS_TME)
|
||||
{
|
||||
Vertex v0 = VertexBuffer[IndexBuffer[i + 0]];
|
||||
Vertex v1 = VertexBuffer[IndexBuffer[i + 1]];
|
||||
Vertex v2 = VertexBuffer[IndexBuffer[i + 2]];
|
||||
|
||||
uint x = gid.x + tid.x * 2;
|
||||
uint y = gid.y + tid.y;
|
||||
|
||||
uint fa = FrameRowOffset[y] + FrameColOffset[x];
|
||||
uint za = ZBufRowOffset[y] + ZBufColOffset[x];
|
||||
|
||||
// TODO: quickly reject if x, y is outside the triangle
|
||||
// TODO: calculate interpolated values at x, y
|
||||
// TODO: run the GS pipeline
|
||||
// TODO: repeat for x+1, y
|
||||
// TODO: output two pixels (might be better to process a single pixel, more threads, if there is no 16-bit target involved)
|
||||
|
||||
// testing...
|
||||
|
||||
uint4 c = VideoMemory.Load4(fa); // does this load 4*4 bytes? or 4 bytes each expanded uint?
|
||||
|
||||
c = (v0.c >> uint4(0, 8, 16, 24)) & 0xff; // => ushr r1.yzw, r1.xxxx, l(0, 8, 16, 24), v0.c auto-converted to uint4 and per-component shift in one instruction, SSE is embarrassed
|
||||
|
||||
VideoMemory.Store4(fa, c); // same question, 4*4 bytes or compressed to uint
|
||||
if(VS_FST)
|
||||
{
|
||||
output.t.xy = input.uv;
|
||||
output.t.w = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
output.t.xy = input.st;
|
||||
output.t.w = input.q;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
output.t.xy = 0;
|
||||
output.t.w = 1.0f;
|
||||
}
|
||||
|
||||
// #endif
|
||||
output.c = input.c;
|
||||
output.t.z = input.f.r;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
// TODO: DrawPoint (this is going to be a waste of resources)
|
||||
// TODO: DrawLine (line hit-test, will it work?)
|
||||
// TODO: DrawSprite (similar to DrawTriangle)
|
||||
// TODO: if read-backs are too slow, implement GSState::Write/FlushWrite/Read/clut.Write in a compute shader
|
||||
// TODO: unswizzle pages from VideoMemory to the texture cache (if they are marked as valid, otherwise upload from GSLocalMemory::m_vm8)
|
||||
#if GS_PRIM == 0
|
||||
|
||||
[maxvertexcount(1)]
|
||||
void gs_main(point VS_OUTPUT input[1], inout PointStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
GS_OUTPUT output;
|
||||
|
||||
output.p = input[0].p;
|
||||
output.z = input[0].z;
|
||||
output.t = input[0].t;
|
||||
output.c = input[0].c;
|
||||
output.id = id;
|
||||
|
||||
stream.Append(output);
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 1
|
||||
|
||||
[maxvertexcount(2)]
|
||||
void gs_main(line VS_OUTPUT input[2], inout LineStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
[unroll]
|
||||
for(int i = 0; i < 2; i++)
|
||||
{
|
||||
GS_OUTPUT output;
|
||||
|
||||
output.p = input[i].p;
|
||||
output.z = input[i].z;
|
||||
output.t = input[i].t;
|
||||
output.c = input[i].c;
|
||||
output.id = id;
|
||||
|
||||
#if GS_IIP == 0
|
||||
if(i != 1) output.c = input[1].c;
|
||||
#endif
|
||||
|
||||
stream.Append(output);
|
||||
}
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 2
|
||||
|
||||
[maxvertexcount(3)]
|
||||
void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
[unroll]
|
||||
for(int i = 0; i < 3; i++)
|
||||
{
|
||||
GS_OUTPUT output;
|
||||
|
||||
output.p = input[i].p;
|
||||
output.z = input[i].z;
|
||||
output.t = input[i].t;
|
||||
output.c = input[i].c;
|
||||
output.id = id;
|
||||
|
||||
#if GS_IIP == 0
|
||||
if(i != 2) output.c = input[2].c;
|
||||
#endif
|
||||
|
||||
stream.Append(output);
|
||||
}
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 3
|
||||
|
||||
[maxvertexcount(4)]
|
||||
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
GS_OUTPUT lt, rb, lb, rt;
|
||||
|
||||
lt.p = input[0].p;
|
||||
lt.z = input[1].z;
|
||||
lt.t.xy = input[0].t.xy;
|
||||
lt.t.zw = input[1].t.zw;
|
||||
lt.c = input[0].c;
|
||||
lt.id = id;
|
||||
|
||||
#if GS_IIP == 0
|
||||
lt.c = input[1].c;
|
||||
#endif
|
||||
|
||||
rb.p = input[1].p;
|
||||
rb.z = input[1].z;
|
||||
rb.t = input[1].t;
|
||||
rb.c = input[1].c;
|
||||
rb.id = id;
|
||||
|
||||
lb = lt;
|
||||
lb.p.y = rb.p.y;
|
||||
lb.t.y = rb.t.y;
|
||||
|
||||
rt = rb;
|
||||
rt.p.y = lt.p.y;
|
||||
rt.t.y = lt.t.y;
|
||||
|
||||
stream.Append(lt);
|
||||
stream.Append(lb);
|
||||
stream.Append(rt);
|
||||
stream.Append(rb);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
uint CompressColor32(float4 f)
|
||||
{
|
||||
uint4 c = (uint4)(f * 0xff) << uint4(0, 8, 16, 24);
|
||||
|
||||
return c.r | c.g | c.b | c.a;
|
||||
}
|
||||
|
||||
uint DecompressColor16(uint c)
|
||||
{
|
||||
uint r = (c & 0x001f) << 3;
|
||||
uint g = (c & 0x03e0) << 6;
|
||||
uint b = (c & 0x7c00) << 9;
|
||||
uint a = (c & 0x8000) << 15;
|
||||
|
||||
return r | g | b | a;
|
||||
}
|
||||
|
||||
uint ReadPixel(uint addr)
|
||||
{
|
||||
return VideoMemory.Load(addr) >> ((addr & 2) << 3);
|
||||
}
|
||||
|
||||
void WritePixel(uint addr, uint value, uint psm)
|
||||
{
|
||||
uint tmp;
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case PSM_PSMCT32:
|
||||
case PSM_PSMZ32:
|
||||
case PSM_PSMCT24:
|
||||
case PSM_PSMZ24:
|
||||
VideoMemory.Store(addr, value);
|
||||
break;
|
||||
case PSM_PSMCT16:
|
||||
case PSM_PSMCT16S:
|
||||
case PSM_PSMZ16:
|
||||
case PSM_PSMZ16S:
|
||||
tmp = (addr & 2) << 3;
|
||||
value = ((value << tmp) ^ VideoMemory.Load(addr)) & (0x0000ffff << tmp);
|
||||
VideoMemory.InterlockedXor(addr, value, tmp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ps_main0(GS_OUTPUT input)
|
||||
{
|
||||
uint x = (uint)input.p.x;
|
||||
uint y = (uint)input.p.y;
|
||||
|
||||
uint tail = FragmentLinkBuffer.IncrementCounter();
|
||||
|
||||
uint index = (y << 11) + x;
|
||||
uint next = 0;
|
||||
|
||||
StartOffsetBuffer.InterlockedExchange(index * 4, tail, next);
|
||||
|
||||
FragmentLinkItem item;
|
||||
|
||||
// TODO: preprocess color (tfx, alpha test), z-test
|
||||
|
||||
item.c = CompressColor32(input.c);
|
||||
item.z = (uint)(input.z.y * 0x10000 + input.z.x);
|
||||
item.id = input.id;
|
||||
item.next = next;
|
||||
|
||||
FragmentLinkBuffer[tail] = item;
|
||||
}
|
||||
|
||||
void ps_main1(GS_OUTPUT input)
|
||||
{
|
||||
uint2 pos = (uint2)input.p.xy;
|
||||
|
||||
// sort fragments
|
||||
|
||||
uint StartOffsetIndex = (pos.y << 11) + pos.x;
|
||||
|
||||
int index[PS_BATCH_SIZE];
|
||||
int count = 0;
|
||||
|
||||
uint next = StartOffsetBuffer.Load(StartOffsetIndex * 4);
|
||||
|
||||
StartOffsetBuffer.Store(StartOffsetIndex * 4, 0);
|
||||
|
||||
[allow_uav_condition]
|
||||
while(next != 0)
|
||||
{
|
||||
index[count++] = next;
|
||||
|
||||
next = FragmentLinkBuffer[next].next;
|
||||
}
|
||||
|
||||
int N2 = 1 << (int)(ceil(log2(count)));
|
||||
|
||||
[allow_uav_condition]
|
||||
for(int i = count; i < N2; i++)
|
||||
{
|
||||
index[i] = 0;
|
||||
}
|
||||
|
||||
[allow_uav_condition]
|
||||
for(int k = 2; k <= N2; k = 2 * k)
|
||||
{
|
||||
[allow_uav_condition]
|
||||
for(int j = k >> 1; j > 0 ; j = j >> 1)
|
||||
{
|
||||
[allow_uav_condition]
|
||||
for(int i = 0; i < N2; i++)
|
||||
{
|
||||
uint i_id = FragmentLinkBuffer[index[i]].id;
|
||||
|
||||
int ixj = i ^ j;
|
||||
|
||||
if(ixj > i)
|
||||
{
|
||||
uint ixj_id = FragmentLinkBuffer[index[ixj]].id;
|
||||
|
||||
if((i & k) == 0 && i_id > ixj_id)
|
||||
{
|
||||
int temp = index[i];
|
||||
index[i] = index[ixj];
|
||||
index[ixj] = temp;
|
||||
}
|
||||
|
||||
if((i & k) != 0 && i_id < ixj_id)
|
||||
{
|
||||
int temp = index[i];
|
||||
index[i] = index[ixj];
|
||||
index[ixj] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint2 addr = (uint2)(FZRowOffset[pos.y] + FZColOffset[pos.x]) << 1;
|
||||
|
||||
uint dc = ReadPixel(addr.x);
|
||||
uint dz = ReadPixel(addr.y);
|
||||
|
||||
uint sc = dc;
|
||||
uint sz = dz;
|
||||
|
||||
[allow_uav_condition]
|
||||
while(--count >= 0)
|
||||
{
|
||||
FragmentLinkItem f = FragmentLinkBuffer[index[count]];
|
||||
|
||||
// TODO
|
||||
|
||||
if(sz < f.z)
|
||||
{
|
||||
sc = f.c;
|
||||
sz = f.z;
|
||||
}
|
||||
}
|
||||
|
||||
uint c = sc; // (dc & ~WriteMask.x) | (sc & WriteMask.x);
|
||||
uint z = 0;//sz; //(dz & ~WriteMask.y) | (sz & WriteMask.y);
|
||||
|
||||
WritePixel(addr.x, c, PS_FPSM);
|
||||
WritePixel(addr.y, z, PS_ZPSM);
|
||||
}
|
||||
|
|
|
@ -40,11 +40,12 @@
|
|||
|
||||
struct VS_INPUT
|
||||
{
|
||||
float2 st : TEXCOORD0;
|
||||
float4 c : COLOR0;
|
||||
float q : TEXCOORD1;
|
||||
uint2 p : POSITION0;
|
||||
uint z : POSITION1;
|
||||
float2 t : TEXCOORD0;
|
||||
float q : TEXCOORD1;
|
||||
float4 c : COLOR0;
|
||||
uint2 uv : TEXCOORD2;
|
||||
float4 f : COLOR1;
|
||||
};
|
||||
|
||||
|
@ -602,12 +603,12 @@ VS_OUTPUT vs_main(VS_INPUT input)
|
|||
{
|
||||
if(VS_FST)
|
||||
{
|
||||
output.t.xy = input.t * TextureScale;
|
||||
output.t.xy = input.uv * TextureScale;
|
||||
output.t.w = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
output.t.xy = input.t;
|
||||
output.t.xy = input.st;
|
||||
output.t.w = input.q;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue