grab bag of optimizations: many optimizations to sse functions, now using intrinsic functions. this would enable these functions to work on gcc (i think) if our configuration supported it, but it will fail for some vc++ express installations. those guys will have to either #define SSE2_NOINTRIN or help me figure out which installations are broken and why and how to fix it. also, collapse BG layer pixel blenders into a single function, so watch for regressions there.
This commit is contained in:
parent
baef153e05
commit
062a228877
|
@ -39,6 +39,7 @@
|
|||
|
||||
//#undef FORCEINLINE
|
||||
//#define FORCEINLINE
|
||||
//#define SSE2_NOINTRIN
|
||||
|
||||
ARM9_struct ARM9Mem;
|
||||
|
||||
|
@ -491,29 +492,6 @@ FORCEINLINE void GPU::setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
enum OBJFunc
|
||||
{
|
||||
None, Blend, Increase, Decrease
|
||||
};
|
||||
template<OBJFunc FUNC, bool WINDOW>
|
||||
static void _master_setFinalOBJColor(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
|
||||
static void setFinalOBJColorSpecialNoneWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
static void setFinalOBJColorSpecialBlendWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
static void setFinalOBJColorSpecialIncreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
static void setFinalOBJColorSpecialDecreaseWnd (GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
|
||||
const GPU::FinalOBJColFunct pixelBlittersOBJ[8] = {
|
||||
_master_setFinalOBJColor<None,false>,
|
||||
_master_setFinalOBJColor<Blend,false>,
|
||||
_master_setFinalOBJColor<Increase,false>,
|
||||
_master_setFinalOBJColor<Decrease,false>,
|
||||
_master_setFinalOBJColor<None,true>,
|
||||
_master_setFinalOBJColor<Blend,true>,
|
||||
_master_setFinalOBJColor<Increase,true>,
|
||||
_master_setFinalOBJColor<Decrease,true> };
|
||||
|
||||
/*****************************************************************************/
|
||||
// INITIALIZATION
|
||||
/*****************************************************************************/
|
||||
|
@ -591,7 +569,7 @@ GPU * GPU_Init(u8 l)
|
|||
g->need_update_winh[1] = true;
|
||||
g->setFinalColorBck_funcNum = 0;
|
||||
g->setFinalColor3d_funcNum = 0;
|
||||
g->setFinalColorSpr = _master_setFinalOBJColor<None,false>;
|
||||
g->setFinalColorSpr_funcNum = 0;
|
||||
|
||||
return g;
|
||||
}
|
||||
|
@ -602,7 +580,7 @@ void GPU_Reset(GPU *g, u8 l)
|
|||
|
||||
g->setFinalColorBck_funcNum = 0;
|
||||
g->setFinalColor3d_funcNum = 0;
|
||||
g->setFinalColorSpr = _master_setFinalOBJColor<None,false>;
|
||||
g->setFinalColorSpr_funcNum = 0;
|
||||
g->core = l;
|
||||
g->BGSize[0][0] = g->BGSize[1][0] = g->BGSize[2][0] = g->BGSize[3][0] = 256;
|
||||
g->BGSize[0][1] = g->BGSize[1][1] = g->BGSize[2][1] = g->BGSize[3][1] = 256;
|
||||
|
@ -729,7 +707,7 @@ void SetupFinalPixelBlitter (GPU *gpu)
|
|||
u8 windowUsed = (gpu->WIN0_ENABLED | gpu->WIN1_ENABLED | gpu->WINOBJ_ENABLED);
|
||||
u8 blendMode = (gpu->BLDCNT >> 6)&3;
|
||||
|
||||
gpu->setFinalColorSpr = pixelBlittersOBJ[windowUsed*4 + blendMode];
|
||||
gpu->setFinalColorSpr_funcNum = windowUsed*4 + blendMode;
|
||||
gpu->setFinalColorBck_funcNum = windowUsed*4 + blendMode;
|
||||
gpu->setFinalColor3d_funcNum = windowUsed*4 + blendMode;
|
||||
|
||||
|
@ -961,128 +939,47 @@ FORCEINLINE void GPU::renderline_checkWindows(u16 x, bool &draw, bool &effect) c
|
|||
// PIXEL RENDERING - BGS
|
||||
/*****************************************************************************/
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE void GPU::setFinalBGColorSpecialNone(u16 &color, const u32 x)
|
||||
{
|
||||
}
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE void GPU::setFinalBGColorSpecialBlend(u16 &color, const u32 x)
|
||||
template<bool BACKDROP, BlendFunc FUNC, bool WINDOW>
|
||||
FORCEINLINE FASTCALL bool GPU::_master_setFinalBGColor(u16 &color, const u32 x)
|
||||
{
|
||||
//no further analysis for no special effects. just draw it.
|
||||
if(FUNC == None) return true;
|
||||
|
||||
//blend backdrop with what?? this doesn't make sense
|
||||
if(BACKDROP) return;
|
||||
if(blend1)
|
||||
if(FUNC==Blend && BACKDROP) return true;
|
||||
|
||||
bool windowEffect = true;
|
||||
|
||||
if(WINDOW)
|
||||
{
|
||||
//If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
int bg_under = bgPixels[x];
|
||||
if(blend2[bg_under])
|
||||
color = blend(color,T2ReadWord(currDst, x<<1));
|
||||
bool windowDraw;
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
//backdrop must always be drawn
|
||||
if(BACKDROP) windowDraw = true;
|
||||
|
||||
//we never have anything more to do if the window rejected us
|
||||
if(!windowDraw) return false;
|
||||
}
|
||||
}
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE void GPU::setFinalBGColorSpecialIncrease (u16 &color, const u32 x)
|
||||
{
|
||||
if(blend1) // the bg to draw has a special color effect
|
||||
{
|
||||
color = currentFadeInColors[color];
|
||||
}
|
||||
}
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE void GPU::setFinalBGColorSpecialDecrease(u16 &color, const u32 x)
|
||||
{
|
||||
if(blend1) // the bg to draw has a special color effect
|
||||
{
|
||||
color = currentFadeOutColors[color];
|
||||
}
|
||||
}
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE bool GPU::setFinalBGColorSpecialNoneWnd(u16 &color, const u32 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(BACKDROP) windowDraw = true; //backdrop must always be drawn
|
||||
|
||||
if (blend1 && windowEffect) // the bg to draw has a special color effect
|
||||
{
|
||||
//special effects rejected. just draw it.
|
||||
if(!(blend1 && windowEffect))
|
||||
return true;
|
||||
|
||||
const u8 bg_under = bgPixels[x];
|
||||
|
||||
//perform the special effect
|
||||
switch(FUNC) {
|
||||
case Blend: if(blend2[bg_under]) color = blend(color,T2ReadWord(currDst, x<<1)); break;
|
||||
case Increase: color = currentFadeInColors[color]; break;
|
||||
case Decrease: color = currentFadeOutColors[color]; break;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((windowEffect && (BLDCNT & (0x100 << currBgNum))) || windowDraw)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE bool GPU::setFinalBGColorSpecialBlendWnd(u16 &color, const u32 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(BACKDROP) windowDraw = true; //backdrop must always be drawn
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
if(blend1 && windowEffect)
|
||||
{
|
||||
int bg_under = bgPixels[x];
|
||||
|
||||
// If the layer we are drawing on is selected as 2nd source, we can blend
|
||||
if(blend2[bg_under])
|
||||
color = blend(color,T2ReadWord(currDst, x<<1));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE bool GPU::setFinalBGColorSpecialIncreaseWnd(u16 &color, const u32 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(BACKDROP) windowDraw = true; //backdrop must always be drawn
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
if(blend1 && windowEffect)
|
||||
{
|
||||
color = currentFadeInColors[color];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE bool GPU::setFinalBGColorSpecialDecreaseWnd(u16 &color, const u32 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
renderline_checkWindows(x, windowDraw, windowEffect);
|
||||
|
||||
if(BACKDROP) windowDraw = true; //backdrop must always be drawn
|
||||
|
||||
if(windowDraw)
|
||||
{
|
||||
if(blend1 && windowEffect)
|
||||
{
|
||||
color = currentFadeOutColors[color];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
// PIXEL RENDERING - OBJS
|
||||
/*****************************************************************************/
|
||||
|
||||
template<OBJFunc FUNC, bool WINDOW>
|
||||
static void _master_setFinalOBJColor(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x)
|
||||
template<BlendFunc FUNC, bool WINDOW>
|
||||
static FORCEINLINE void _master_setFinalOBJColor(GPU *gpu, u8 *dst, u16 color, u8 alpha, u8 type, u16 x)
|
||||
{
|
||||
bool windowDraw = true, windowEffect = true;
|
||||
|
||||
|
@ -1094,19 +991,19 @@ static void _master_setFinalOBJColor(GPU *gpu, u32 passing, u8 *dst, u16 color,
|
|||
}
|
||||
|
||||
//this inspects the layer beneath the sprite to see if the current blend flags make it a candidate for blending
|
||||
int bg_under = gpu->bgPixels[x];
|
||||
bool allowBlend = ((bg_under != 4) && (gpu->BLDCNT & (0x100 << bg_under)));
|
||||
const int bg_under = gpu->bgPixels[x];
|
||||
const bool allowBlend = (bg_under != 4) && gpu->blend2[bg_under];
|
||||
|
||||
bool sourceEffectSelected = (gpu->BLDCNT & 0x10)!=0;
|
||||
const bool sourceEffectSelected = gpu->blend1;
|
||||
|
||||
//note that the fadein and fadeout is done here before blending,
|
||||
//so that a fade and blending can be applied at the same time
|
||||
//so that a fade and blending can be applied at the same time (actually, I don't think that is legal..)
|
||||
bool forceBlendingForNormal = false;
|
||||
if(windowEffect && sourceEffectSelected)
|
||||
switch(FUNC)
|
||||
{
|
||||
case Increase: if(!allowBlend) color = fadeInColors[gpu->BLDY_EVY][color&0x7FFF]; break;
|
||||
case Decrease: if(!allowBlend) color = fadeOutColors[gpu->BLDY_EVY][color&0x7FFF]; break;
|
||||
case Increase: if(!allowBlend) color = gpu->currentFadeInColors[color&0x7FFF]; break;
|
||||
case Decrease: if(!allowBlend) color = gpu->currentFadeOutColors[color&0x7FFF]; break;
|
||||
|
||||
//only when blend color effect is selected, ordinarily opaque sprites are blended with the color effect params
|
||||
case Blend: forceBlendingForNormal = true; break;
|
||||
|
@ -1115,7 +1012,7 @@ static void _master_setFinalOBJColor(GPU *gpu, u32 passing, u8 *dst, u16 color,
|
|||
|
||||
if(allowBlend)
|
||||
{
|
||||
u16 backColor = T2ReadWord(dst,passing);
|
||||
u16 backColor = T2ReadWord(dst,x<<1);
|
||||
//this hasn't been tested: this blending occurs without regard to the color effect,
|
||||
//but rather purely from the sprite's alpha
|
||||
if(type == GPU_OBJ_MODE_Bitmap)
|
||||
|
@ -1124,11 +1021,13 @@ static void _master_setFinalOBJColor(GPU *gpu, u32 passing, u8 *dst, u16 color,
|
|||
color = gpu->blend(color,backColor);
|
||||
}
|
||||
|
||||
T2WriteWord(dst, passing, (color | 0x8000));
|
||||
T2WriteWord(dst, x<<1, (color | 0x8000));
|
||||
gpu->bgPixels[x] = 4;
|
||||
}
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u32 x)
|
||||
//FUNCNUM is only set for backdrop, for an optimization of looking it up early
|
||||
template<bool BACKDROP, int FUNCNUM>
|
||||
FORCEINLINE void GPU::setFinalColorBG(u16 color, const u32 x)
|
||||
{
|
||||
//It is not safe to assert this here.
|
||||
//This is probably the best place to enforce it, since almost every single color that comes in here
|
||||
|
@ -1136,17 +1035,19 @@ template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u
|
|||
//assert((color&0x8000)==0);
|
||||
if(!BACKDROP) color &= 0x7FFF; //but for the backdrop we can easily guarantee earlier that theres no bit here
|
||||
|
||||
bool draw=true;
|
||||
switch(setFinalColorBck_funcNum)
|
||||
bool draw;
|
||||
|
||||
const int test = BACKDROP?FUNCNUM:setFinalColorBck_funcNum;
|
||||
switch(test)
|
||||
{
|
||||
case 0x0: setFinalBGColorSpecialNone<BACKDROP>(color,x); break;
|
||||
case 0x1: setFinalBGColorSpecialBlend<BACKDROP>(color,x); break;
|
||||
case 0x2: setFinalBGColorSpecialIncrease<BACKDROP>(color,x); break;
|
||||
case 0x3: setFinalBGColorSpecialDecrease<BACKDROP>(color,x); break;
|
||||
case 0x4: draw=setFinalBGColorSpecialNoneWnd<BACKDROP>(color,x); break;
|
||||
case 0x5: draw=setFinalBGColorSpecialBlendWnd<BACKDROP>(color,x); break;
|
||||
case 0x6: draw=setFinalBGColorSpecialIncreaseWnd<BACKDROP>(color,x); break;
|
||||
case 0x7: draw=setFinalBGColorSpecialDecreaseWnd<BACKDROP>(color,x); break;
|
||||
case 0: draw = _master_setFinalBGColor<BACKDROP,None,false>(color,x); break;
|
||||
case 1: draw = _master_setFinalBGColor<BACKDROP,Blend,false>(color,x); break;
|
||||
case 2: draw = _master_setFinalBGColor<BACKDROP,Increase,false>(color,x); break;
|
||||
case 3: draw = _master_setFinalBGColor<BACKDROP,Decrease,false>(color,x); break;
|
||||
case 4: draw = _master_setFinalBGColor<BACKDROP,None,true>(color,x); break;
|
||||
case 5: draw = _master_setFinalBGColor<BACKDROP,Blend,true>(color,x); break;
|
||||
case 6: draw = _master_setFinalBGColor<BACKDROP,Increase,true>(color,x); break;
|
||||
case 7: draw = _master_setFinalBGColor<BACKDROP,Decrease,true>(color,x); break;
|
||||
};
|
||||
|
||||
if(BACKDROP || draw) //backdrop must always be drawn
|
||||
|
@ -1159,7 +1060,6 @@ template<bool BACKDROP> FORCEINLINE void GPU::setFinalColorBG(u16 color, const u
|
|||
|
||||
FORCEINLINE void GPU::setFinalColor3d(int dstX, int srcX)
|
||||
{
|
||||
//if someone disagrees with these, they could be reimplemented as a function pointer easily
|
||||
switch(setFinalColor3d_funcNum)
|
||||
{
|
||||
case 0x0: setFinal3DColorSpecialNone(dstX,srcX); break;
|
||||
|
@ -1173,9 +1073,31 @@ FORCEINLINE void GPU::setFinalColor3d(int dstX, int srcX)
|
|||
};
|
||||
}
|
||||
|
||||
FORCEINLINE void setFinalColorSpr(GPU* gpu, u8 *dst, u16 color, u8 alpha, u8 type, u16 x)
|
||||
{
|
||||
switch(gpu->setFinalColorSpr_funcNum)
|
||||
{
|
||||
case 0x0: _master_setFinalOBJColor<None,false>(gpu, dst, color, alpha, type, x); break;
|
||||
case 0x1: _master_setFinalOBJColor<Blend,false>(gpu, dst, color, alpha, type, x); break;
|
||||
case 0x2: _master_setFinalOBJColor<Increase,false>(gpu, dst, color, alpha, type, x); break;
|
||||
case 0x3: _master_setFinalOBJColor<Decrease,false>(gpu, dst, color, alpha, type, x); break;
|
||||
case 0x4: _master_setFinalOBJColor<None,true>(gpu, dst, color, alpha, type, x); break;
|
||||
case 0x5: _master_setFinalOBJColor<Blend,true>(gpu, dst, color, alpha, type, x); break;
|
||||
case 0x6: _master_setFinalOBJColor<Increase,true>(gpu, dst, color, alpha, type, x); break;
|
||||
case 0x7: _master_setFinalOBJColor<Decrease,true>(gpu, dst, color, alpha, type, x); break;
|
||||
};
|
||||
}
|
||||
|
||||
template<bool MOSAIC, bool BACKDROP>
|
||||
FORCEINLINE void GPU::__setFinalColorBck(u16 color, const u32 x, const int opaque)
|
||||
{
|
||||
return ___setFinalColorBck<MOSAIC, BACKDROP, 0>(color,x,opaque);
|
||||
}
|
||||
|
||||
//this was forced inline because most of the time it just falls through to setFinalColorBck() and the function call
|
||||
//overhead was ridiculous and terrible
|
||||
template<bool MOSAIC, bool BACKDROP> FORCEINLINE void GPU::__setFinalColorBck(u16 color, const u32 x, const bool opaque)
|
||||
template<bool MOSAIC, bool BACKDROP, int FUNCNUM>
|
||||
FORCEINLINE void GPU::___setFinalColorBck(u16 color, const u32 x, const int opaque)
|
||||
{
|
||||
//I commented out this line to make a point.
|
||||
//under ordinary circumstances, nobody should pass in something >=256
|
||||
|
@ -1206,7 +1128,7 @@ template<bool MOSAIC, bool BACKDROP> FORCEINLINE void GPU::__setFinalColorBck(u1
|
|||
if(color != 0xFFFF)
|
||||
{
|
||||
finish:
|
||||
setFinalColorBG<BACKDROP>(color,x);
|
||||
setFinalColorBG<BACKDROP,FUNCNUM>(color,x);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1244,7 +1166,7 @@ static void mosaicSpriteLinePixel(GPU * gpu, int x, u16 l, u8 * dst, u8 * dst_al
|
|||
if(!objColor.opaque) prioTab[x] = 0xFF;
|
||||
}
|
||||
|
||||
static void mosaicSpriteLine(GPU * gpu, u16 l, u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab)
|
||||
FORCEINLINE static void mosaicSpriteLine(GPU * gpu, u16 l, u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab)
|
||||
{
|
||||
//don't even try this unless the mosaic is effective
|
||||
if(gpu->mosaicLookup.widthValue != 0 || gpu->mosaicLookup.heightValue != 0)
|
||||
|
@ -1281,7 +1203,7 @@ template<bool MOSAIC> void lineLarge8bpp(GPU * gpu)
|
|||
XBG &= wmask;
|
||||
u8 pixel = map[XBG];
|
||||
u16 color = T1ReadWord(pal, pixel<<1);
|
||||
gpu->__setFinalColorBck<MOSAIC,false>(color,x,color!=0);
|
||||
gpu->__setFinalColorBck<MOSAIC,false>(color,x,color);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1821,14 +1743,6 @@ FORCEINLINE BOOL compute_sprite_vars(_OAM_ * spriteInfo, u16 l,
|
|||
// SPRITE RENDERING
|
||||
/*****************************************************************************/
|
||||
|
||||
void GPU::spriteRender(u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab)
|
||||
{
|
||||
if(spriteRenderMode == SPRITE_1D)
|
||||
_spriteRender<SPRITE_1D>(dst,dst_alpha,typeTab, prioTab);
|
||||
else
|
||||
_spriteRender<SPRITE_2D>(dst,dst_alpha,typeTab, prioTab);
|
||||
}
|
||||
|
||||
//TODO - refactor this so there isnt as much duped code between rotozoomed and non-rotozoomed versions
|
||||
|
||||
template<GPU::SpriteRenderMode MODE>
|
||||
|
@ -2324,13 +2238,6 @@ void GPU_set_DISPCAPCNT(u32 val)
|
|||
gpu->dispCapCnt.srcB = (val >> 25) & 0x01;
|
||||
gpu->dispCapCnt.capSrc = (val >> 29) & 0x03;
|
||||
|
||||
//gpu->dispCapCnt.dstBlock = = (gpu->dispCapCnt.writeBlock * 0x20000) +
|
||||
// (gpu->dispCapCnt.writeOffset * 0x8000);
|
||||
//
|
||||
//gpu->dispCapCnt.src = (gpu->dispCapCnt.readBlock * 0x20000) +
|
||||
// (gpu->dispCapCnt.readOffset * 0x8000);
|
||||
//
|
||||
|
||||
switch((val >> 20) & 0x03)
|
||||
{
|
||||
case 0:
|
||||
|
@ -2357,33 +2264,48 @@ void GPU_set_DISPCAPCNT(u32 val)
|
|||
gpu->dispCapCnt.capSrc, gpu->dispCapCnt.dst - ARM9Mem.ARM9_LCD, gpu->dispCapCnt.src - ARM9Mem.ARM9_LCD,
|
||||
gpu->dispCapCnt.srcA, gpu->dispCapCnt.srcB);*/
|
||||
}
|
||||
// #define BRIGHT_TABLES
|
||||
|
||||
static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
||||
{
|
||||
CACHE_ALIGN u8 spr[512];
|
||||
CACHE_ALIGN u8 sprAlpha[256];
|
||||
CACHE_ALIGN u8 sprType[256];
|
||||
CACHE_ALIGN u8 sprPrio[256];
|
||||
|
||||
GPU * gpu = screen->gpu;
|
||||
struct _DISPCNT * dispCnt = &(gpu->dispx_st)->dispx_DISPCNT.bits;
|
||||
itemsForPriority_t * item;
|
||||
u8 spr[512];
|
||||
u8 sprAlpha[256];
|
||||
u8 sprType[256];
|
||||
u8 sprPrio[256];
|
||||
u8 prio;
|
||||
u16 i16;
|
||||
BOOL BG_enabled = TRUE;
|
||||
|
||||
gpu->currentFadeInColors = &fadeInColors[gpu->BLDY_EVY][0];
|
||||
gpu->currentFadeOutColors = &fadeOutColors[gpu->BLDY_EVY][0];
|
||||
|
||||
u16 backdrop_color = T1ReadWord(ARM9Mem.ARM9_VMEM, gpu->core * 0x400) & 0x7FFF;
|
||||
|
||||
//we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing
|
||||
//this is currently eating up 2fps or so. it is a reasonable candidate for optimization.
|
||||
gpu->currBgNum = 5;
|
||||
for(int x=0;x<256;x++) {
|
||||
gpu->__setFinalColorBck<false,true>(backdrop_color,x,1);
|
||||
}
|
||||
memset(gpu->bgPixels,5,256);
|
||||
switch(gpu->setFinalColorBck_funcNum) {
|
||||
case 0: case 1: //for backdrops, (even with window enabled) none and blend are both the same: just copy the color
|
||||
case 4: case 5:
|
||||
memset_u16_le<256>(gpu->currDst,backdrop_color);
|
||||
break;
|
||||
case 2:
|
||||
//for non-windowed fade, we can just fade the color and fill
|
||||
memset_u16_le<256>(gpu->currDst,gpu->currentFadeInColors[backdrop_color]);
|
||||
break;
|
||||
case 3:
|
||||
//likewise for non-windowed fadeout
|
||||
memset_u16_le<256>(gpu->currDst,gpu->currentFadeOutColors[backdrop_color]);
|
||||
break;
|
||||
|
||||
//this check isnt really helpful. it just slows us down in the cases where we need the most speed
|
||||
//if (!gpu->LayersEnable[0] && !gpu->LayersEnable[1] && !gpu->LayersEnable[2] && !gpu->LayersEnable[3] && !gpu->LayersEnable[4]) return;
|
||||
//windowed fades need special treatment
|
||||
case 6: for(int x=0;x<256;x++) gpu->___setFinalColorBck<false,true,6>(backdrop_color,x,1); break;
|
||||
case 7: for(int x=0;x<256;x++) gpu->___setFinalColorBck<false,true,7>(backdrop_color,x,1); break;
|
||||
}
|
||||
|
||||
memset(gpu->bgPixels,5,256);
|
||||
|
||||
// init background color & priorities
|
||||
memset(sprAlpha, 0, 256);
|
||||
|
@ -2392,9 +2314,11 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
|||
memset(sprWin, 0, 256);
|
||||
|
||||
// init pixels priorities
|
||||
for (int i=0; i<NB_PRIORITIES; i++) {
|
||||
gpu->itemsForPriority[i].nbPixelsX = 0;
|
||||
}
|
||||
assert(NB_PRIORITIES==4);
|
||||
gpu->itemsForPriority[0].nbPixelsX = 0;
|
||||
gpu->itemsForPriority[1].nbPixelsX = 0;
|
||||
gpu->itemsForPriority[2].nbPixelsX = 0;
|
||||
gpu->itemsForPriority[3].nbPixelsX = 0;
|
||||
|
||||
// for all the pixels in the line
|
||||
if (gpu->LayersEnable[4])
|
||||
|
@ -2405,7 +2329,6 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
|||
//zero 06-may-09: I properly supported window color effects for backdrop, but I am not sure
|
||||
//how it interacts with this. I wish we knew why we needed this
|
||||
|
||||
|
||||
gpu->spriteRender(spr, sprAlpha, sprType, sprPrio);
|
||||
mosaicSpriteLine(gpu, l, spr, sprAlpha, sprType, sprPrio);
|
||||
|
||||
|
@ -2413,7 +2336,7 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
|||
for(int i = 0; i<256; i++)
|
||||
{
|
||||
// assign them to the good priority item
|
||||
prio = sprPrio[i];
|
||||
int prio = sprPrio[i];
|
||||
if (prio >=4) continue;
|
||||
|
||||
item = &(gpu->itemsForPriority[prio]);
|
||||
|
@ -2426,9 +2349,12 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
|||
if (!gpu->LayersEnable[0] && !gpu->LayersEnable[1] && !gpu->LayersEnable[2] && !gpu->LayersEnable[3])
|
||||
BG_enabled = FALSE;
|
||||
|
||||
for(int j=0;j<8;j++)
|
||||
gpu->blend2[j] = (gpu->BLDCNT & (0x100 << j));
|
||||
|
||||
// paint lower priorities fist
|
||||
// then higher priorities on top
|
||||
for(prio=NB_PRIORITIES; prio > 0; )
|
||||
for(int prio=NB_PRIORITIES; prio > 0; )
|
||||
{
|
||||
prio--;
|
||||
item = &(gpu->itemsForPriority[prio]);
|
||||
|
@ -2442,18 +2368,10 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
|||
{
|
||||
gpu->currBgNum = i16;
|
||||
gpu->blend1 = gpu->BLDCNT & (1 << gpu->currBgNum);
|
||||
for(int j=0;j<8;j++)
|
||||
gpu->blend2[j] = (gpu->BLDCNT & (0x100 << j));
|
||||
gpu->currentFadeInColors = &fadeInColors[gpu->BLDY_EVY][0];
|
||||
gpu->currentFadeOutColors = &fadeOutColors[gpu->BLDY_EVY][0];
|
||||
//gpu->bgFunc = gpu->setFinalColorBck_funcNum;
|
||||
|
||||
struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[i16].bits;
|
||||
gpu->curr_mosaic_enabled = bgCnt->Mosaic_Enable;
|
||||
|
||||
//mosaic test hacks
|
||||
//gpu->curr_mosaic_enabled = true;
|
||||
|
||||
if (gpu->core == GPU_MAIN)
|
||||
{
|
||||
if (i16 == 0 && dispCnt->BG0_3D)
|
||||
|
@ -2495,16 +2413,12 @@ static void GPU_ligne_layer(NDS_Screen * screen, u16 l)
|
|||
if (gpu->LayersEnable[4])
|
||||
{
|
||||
gpu->currBgNum = 4;
|
||||
////analyze mosaic configuration
|
||||
//u16 mosaic_control = T1ReadWord((u8 *)&gpu->dispx_st->dispx_MISC.MOSAIC, 0);
|
||||
//gpu->curr_mosaic_enabled
|
||||
|
||||
gpu->blend1 = gpu->BLDCNT & (1 << gpu->currBgNum);
|
||||
|
||||
for (int i=0; i < item->nbPixelsX; i++)
|
||||
{
|
||||
i16=item->PixelsX[i];
|
||||
// T2WriteWord(dst, i16 << 1, T2ReadWord(spr, i16 << 1));
|
||||
// gpu->bgPixels[i16] = 4;
|
||||
gpu->setFinalColorSpr(gpu, (i16<<1), gpu->currDst, T2ReadWord(spr, (i16<<1)), sprAlpha[i16], sprType[i16], i16);
|
||||
setFinalColorSpr(gpu, gpu->currDst, T2ReadWord(spr, (i16<<1)), sprAlpha[i16], sprType[i16], i16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2686,10 +2600,8 @@ static INLINE void GPU_ligne_MasterBrightness(NDS_Screen * screen, u16 l)
|
|||
if(factor>16) factor=16;
|
||||
|
||||
|
||||
// Apply final brightness adjust (MASTER_BRIGHT)
|
||||
// Reference: http://nocash.emubase.de/gbatek.htm#dsvideo (Under MASTER_BRIGHTNESS)
|
||||
/* Mightymax> it should be more effective if the windowmanager applies brightness when drawing */
|
||||
/* it will most likly take acceleration, while we are stuck here with CPU power */
|
||||
//Apply final brightness adjust (MASTER_BRIGHT)
|
||||
//http://nocash.emubase.de/gbatek.htm#dsvideo (Under MASTER_BRIGHTNESS)
|
||||
|
||||
switch (gpu->MasterBrightMode)
|
||||
{
|
||||
|
@ -2841,9 +2753,6 @@ void GPU_ligne(NDS_Screen * screen, u16 l, bool skip)
|
|||
return;
|
||||
}
|
||||
|
||||
//if(gpu->core == 1)
|
||||
// printf("%d\n",l);
|
||||
|
||||
//blacken the screen if it is turned off by the user
|
||||
if(!CommonSettings.showGpu.screens[gpu->core])
|
||||
{
|
||||
|
@ -2852,12 +2761,6 @@ void GPU_ligne(NDS_Screen * screen, u16 l, bool skip)
|
|||
return;
|
||||
}
|
||||
|
||||
//{
|
||||
// extern int currFrameCounter;
|
||||
// u8 * dst = GPU_screen + (screen->offset + l) * 512;
|
||||
// memset(dst,currFrameCounter,512);
|
||||
//}
|
||||
|
||||
//cache some parameters which are assumed to be stable throughout the rendering of the entire line
|
||||
gpu->currLine = l;
|
||||
u16 mosaic_control = T1ReadWord((u8 *)&gpu->dispx_st->dispx_MISC.MOSAIC, 0);
|
||||
|
|
|
@ -123,6 +123,10 @@ typedef union
|
|||
#define BGxENABLED(cnt,num) ((num<8)? ((cnt.val>>8) & num):0)
|
||||
|
||||
|
||||
enum BlendFunc
|
||||
{
|
||||
None, Blend, Increase, Decrease
|
||||
};
|
||||
|
||||
|
||||
/*******************************************************************************
|
||||
|
@ -601,10 +605,19 @@ typedef struct
|
|||
#define NB_BG 4
|
||||
typedef struct
|
||||
{
|
||||
u8 BGs[NB_BG], nbBGs;
|
||||
u8 PixelsX[256];
|
||||
// doh ! yoda says : 256 pixels we can have...
|
||||
u8 BGs[NB_BG], nbBGs;
|
||||
u8 pad[1];
|
||||
u16 nbPixelsX;
|
||||
//256+8:
|
||||
u8 pad2[248];
|
||||
|
||||
//things were slower when i organized this struct this way. whatever.
|
||||
//u8 PixelsX[256];
|
||||
//int BGs[NB_BG], nbBGs;
|
||||
//int nbPixelsX;
|
||||
////<-- 256 + 24
|
||||
//u8 pad2[256-24];
|
||||
} itemsForPriority_t;
|
||||
#define ARM9MEM_ABG 0x06000000
|
||||
#define ARM9MEM_BBG 0x06200000
|
||||
|
@ -761,13 +774,13 @@ struct GPU
|
|||
|
||||
u16 blend(u16 colA, u16 colB);
|
||||
|
||||
typedef void (*FinalOBJColFunct)(GPU *gpu, u32 passing, u8 *dst, u16 color, u8 alpha, u8 type, u16 x);
|
||||
typedef void (*Final3DColFunct)(GPU *gpu, int dstX, int srcX);
|
||||
template<bool BACKDROP, BlendFunc FUNC, bool WINDOW>
|
||||
FORCEINLINE FASTCALL bool _master_setFinalBGColor(u16 &color, const u32 x);
|
||||
|
||||
int setFinalColorBck_funcNum;
|
||||
int bgFunc;
|
||||
int setFinalColor3d_funcNum;
|
||||
FinalOBJColFunct setFinalColorSpr;
|
||||
int setFinalColorSpr_funcNum;
|
||||
//Final3DColFunct setFinalColor3D;
|
||||
enum SpriteRenderMode {
|
||||
SPRITE_1D, SPRITE_2D
|
||||
|
@ -775,9 +788,17 @@ struct GPU
|
|||
|
||||
template<GPU::SpriteRenderMode MODE>
|
||||
void _spriteRender(u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab);
|
||||
void spriteRender(u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab);
|
||||
|
||||
inline void spriteRender(u8 * dst, u8 * dst_alpha, u8 * typeTab, u8 * prioTab)
|
||||
{
|
||||
if(spriteRenderMode == SPRITE_1D)
|
||||
_spriteRender<SPRITE_1D>(dst,dst_alpha,typeTab, prioTab);
|
||||
else
|
||||
_spriteRender<SPRITE_2D>(dst,dst_alpha,typeTab, prioTab);
|
||||
}
|
||||
|
||||
template<bool BACKDROP> void setFinalColorBG(u16 color, const u32 x);
|
||||
|
||||
template<bool BACKDROP, int FUNCNUM> void setFinalColorBG(u16 color, const u32 x);
|
||||
void setFinalColor3d(int dstX, int srcX);
|
||||
|
||||
template<bool BACKDROP> FORCEINLINE void setFinalBGColorSpecialNone(u16 &color, const u32 x);
|
||||
|
@ -799,7 +820,8 @@ struct GPU
|
|||
FORCEINLINE void setFinal3DColorSpecialDecreaseWnd(int dstX, int srcX);
|
||||
|
||||
|
||||
template<bool MOSAIC, bool BACKDROP> void __setFinalColorBck(u16 color, const u32 x, const bool opaque);
|
||||
template<bool MOSAIC, bool BACKDROP> FORCEINLINE void __setFinalColorBck(u16 color, const u32 x, const int opaque);
|
||||
template<bool MOSAIC, bool BACKDROP, int FUNCNUM> FORCEINLINE void ___setFinalColorBck(u16 color, const u32 x, const int opaque);
|
||||
void setAffineStart(int layer, int xy, u32 val);
|
||||
void setAffineStartWord(int layer, int xy, u16 val, int word);
|
||||
u32 getAffineStart(int layer, int xy);
|
||||
|
|
|
@ -24,6 +24,16 @@
|
|||
#include <math.h>
|
||||
|
||||
#include "types.h"
|
||||
#include "mem.h"
|
||||
|
||||
#if !defined(NOSSE2) && !defined(SSE2_NOINTRIN)
|
||||
#define SSE2_INTRIN
|
||||
#endif
|
||||
|
||||
#ifdef SSE2_INTRIN
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
|
||||
|
@ -108,7 +118,9 @@ void Vector4Copy(float *dst, const float *src);
|
|||
//this isnt as fast as it could be if we used a visual c++ intrinsic, but those appear not to be universally available
|
||||
FORCEINLINE u32 u32floor(float f)
|
||||
{
|
||||
#ifndef NOSSE2
|
||||
#if defined(SSE2_INTRIN)
|
||||
return (u32)_mm_cvttss_si32(_mm_set_ss(f));
|
||||
#elif !defined(NOSSE2)
|
||||
__asm cvttss2si eax, f;
|
||||
#else
|
||||
return (u32)f;
|
||||
|
@ -116,7 +128,9 @@ FORCEINLINE u32 u32floor(float f)
|
|||
}
|
||||
FORCEINLINE u32 u32floor(double d)
|
||||
{
|
||||
#ifndef NOSSE2
|
||||
#if defined(SSE2_INTRIN)
|
||||
return (u32)_mm_cvttsd_si32(_mm_set_sd(d));
|
||||
#elif !defined(NOSSE2)
|
||||
__asm cvttsd2si eax, d;
|
||||
#else
|
||||
return (u32)d;
|
||||
|
@ -127,7 +141,9 @@ FORCEINLINE u32 u32floor(double d)
|
|||
//be sure that the results are the same thing as floorf!
|
||||
FORCEINLINE s32 s32floor(float f)
|
||||
{
|
||||
#ifndef NOSSE2
|
||||
#if defined(SSE2_INTRIN)
|
||||
return _mm_cvttss_si32( _mm_add_ss(_mm_set_ss(-0.5f),_mm_add_ss(_mm_set_ss(f), _mm_set_ss(f))) ) >> 1;
|
||||
#elif !defined(NOSSE2)
|
||||
static const float c = -0.5f;
|
||||
__asm
|
||||
{
|
||||
|
@ -142,5 +158,49 @@ FORCEINLINE s32 s32floor(float f)
|
|||
#endif
|
||||
}
|
||||
|
||||
//now comes some sse2 functions coded solely with intrinsics.
|
||||
//let's wait and see how many people this upsets.
|
||||
//they can always #define SSE2_NOINTRIN in their userconfig.h....
|
||||
|
||||
#ifdef SSE2_INTRIN
|
||||
|
||||
template<int NUM>
|
||||
static FORCEINLINE void memset_u16_le(void* dst, u16 val)
|
||||
{
|
||||
u32 u32val;
|
||||
//just for the endian safety
|
||||
T1WriteWord((u8*)&u32val,0,val);
|
||||
T1WriteWord((u8*)&u32val,2,val);
|
||||
const __m128i temp = _mm_set_epi32(u32val,u32val,u32val,u32val);
|
||||
MACRODO_N(NUM/8,_mm_store_si128((__m128i*)((u8*)dst+(X)*16), temp));
|
||||
}
|
||||
#else
|
||||
template<int NUM>
|
||||
static FORCEINLINE void memset_u16_le(void* dst, u16 val)
|
||||
{
|
||||
for(int i=0;i<NUM;i++)
|
||||
T1WriteWord((u8*)dst,i<<1,val);
|
||||
}
|
||||
#endif
|
||||
|
||||
//WARNING: I do not think this is as fast as a memset, for some reason.
|
||||
//at least in vc2005 with sse enabled. better figure out why before using it
|
||||
#ifdef SSE2_INTRIN
|
||||
template<int NUM>
|
||||
static FORCEINLINE void memset_u8(void* _dst, u8 val)
|
||||
{
|
||||
const u8* dst = (u8*)_dst;
|
||||
u32 u32val = (val<<24)|(val<<16)|(val<<8)|val;
|
||||
const __m128i temp = _mm_set_epi32(u32val,u32val,u32val,u32val);
|
||||
MACRODO_N(NUM/16,_mm_store_si128((__m128i*)(dst+(X)*16), temp));
|
||||
}
|
||||
#else
|
||||
template<int NUM>
|
||||
static FORCEINLINE void memset_u8(void* dst, u8 val)
|
||||
{
|
||||
memset(dst,val,NUM);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -627,7 +627,7 @@ typedef int fixed28_4;
|
|||
static bool failure;
|
||||
|
||||
// handle floor divides and mods correctly
|
||||
INLINE void FloorDivMod(long Numerator, long Denominator, long &Floor, long &Mod)
|
||||
FORCEINLINE void FloorDivMod(long Numerator, long Denominator, long &Floor, long &Mod)
|
||||
{
|
||||
//These must be caused by invalid or degenerate shapes.. not sure yet.
|
||||
//check it out in the mario face intro of SM64
|
||||
|
@ -658,10 +658,10 @@ INLINE void FloorDivMod(long Numerator, long Denominator, long &Floor, long &Mod
|
|||
}
|
||||
}
|
||||
|
||||
INLINE fixed28_4 FloatToFixed28_4( float Value ) {
|
||||
FORCEINLINE fixed28_4 FloatToFixed28_4( float Value ) {
|
||||
return (fixed28_4)(Value * 16);
|
||||
}
|
||||
INLINE float Fixed28_4ToFloat( fixed28_4 Value ) {
|
||||
FORCEINLINE float Fixed28_4ToFloat( fixed28_4 Value ) {
|
||||
return Value / 16.0;
|
||||
}
|
||||
//inline fixed16_16 FloatToFixed16_16( float Value ) {
|
||||
|
@ -670,11 +670,11 @@ INLINE float Fixed28_4ToFloat( fixed28_4 Value ) {
|
|||
//inline float Fixed16_16ToFloat( fixed16_16 Value ) {
|
||||
// return Value / 65536.0;
|
||||
//}
|
||||
INLINE fixed28_4 Fixed28_4Mul( fixed28_4 A, fixed28_4 B ) {
|
||||
FORCEINLINE fixed28_4 Fixed28_4Mul( fixed28_4 A, fixed28_4 B ) {
|
||||
// could make this asm to prevent overflow
|
||||
return (A * B) / 16; // 28.4 * 28.4 = 24.8 / 16 = 28.4
|
||||
}
|
||||
INLINE int Ceil28_4( fixed28_4 Value ) {
|
||||
FORCEINLINE int Ceil28_4( fixed28_4 Value ) {
|
||||
int ReturnValue;
|
||||
int Numerator = Value - 1 + 16;
|
||||
if(Numerator >= 0) {
|
||||
|
@ -700,7 +700,7 @@ struct edge_fx_fl {
|
|||
float curr, step, stepExtra;
|
||||
FORCEINLINE void doStep() { curr += step; }
|
||||
FORCEINLINE void doStepExtra() { curr += stepExtra; }
|
||||
void initialize(float top, float bottom, float dx, float dy, long XStep, float XPrestep, float YPrestep) {
|
||||
FORCEINLINE void initialize(float top, float bottom, float dx, float dy, long XStep, float XPrestep, float YPrestep) {
|
||||
dx = 0;
|
||||
dy *= (bottom-top);
|
||||
curr = top + YPrestep * dy + XPrestep * dx;
|
||||
|
@ -764,7 +764,7 @@ FORCEINLINE int edge_fx_fl::Step() {
|
|||
}
|
||||
|
||||
//draws a single scanline
|
||||
static void drawscanline(edge_fx_fl *pLeft, edge_fx_fl *pRight)
|
||||
FORCEINLINE static void drawscanline(edge_fx_fl *pLeft, edge_fx_fl *pRight)
|
||||
{
|
||||
int XStart = pLeft->X;
|
||||
int width = pRight->X - XStart;
|
||||
|
|
|
@ -30,6 +30,11 @@
|
|||
#define NOSSE2
|
||||
#endif
|
||||
|
||||
//if theres no sse2, also enforce no intrinsics
|
||||
#if defined(NOSSE2)
|
||||
#define SSE2_NOINTRIN
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#define strcasecmp(x,y) _stricmp(x,y)
|
||||
#else
|
||||
|
@ -331,5 +336,31 @@ char (*BLAHBLAHBLAH( UNALIGNED T (&)[N] ))[N];
|
|||
#endif
|
||||
|
||||
|
||||
//fairly standard for loop macros
|
||||
#define MACRODO1(TRICK,TODO) { const int X = TRICK; TODO; }
|
||||
#define MACRODO2(X,TODO) { MACRODO1((X),TODO) MACRODO1(((X)+1),TODO) }
|
||||
#define MACRODO4(X,TODO) { MACRODO2((X),TODO) MACRODO2(((X)+2),TODO) }
|
||||
#define MACRODO8(X,TODO) { MACRODO4((X),TODO) MACRODO4(((X)+4),TODO) }
|
||||
#define MACRODO16(X,TODO) { MACRODO8((X),TODO) MACRODO8(((X)+8),TODO) }
|
||||
#define MACRODO32(X,TODO) { MACRODO16((X),TODO) MACRODO16(((X)+16),TODO) }
|
||||
#define MACRODO64(X,TODO) { MACRODO32((X),TODO) MACRODO32(((X)+32),TODO) }
|
||||
#define MACRODO128(X,TODO) { MACRODO64((X),TODO) MACRODO64(((X)+64),TODO) }
|
||||
#define MACRODO256(X,TODO) { MACRODO128((X),TODO) MACRODO128(((X)+128),TODO) }
|
||||
|
||||
//this one lets you loop any number of times (as long as N<256)
|
||||
#define MACRODO_N(N,TODO) {\
|
||||
if((N)&0x100) MACRODO256(0,TODO); \
|
||||
if((N)&0x080) MACRODO128((N)&(0x100),TODO); \
|
||||
if((N)&0x040) MACRODO64((N)&(0x100|0x080),TODO); \
|
||||
if((N)&0x020) MACRODO32((N)&(0x100|0x080|0x040),TODO); \
|
||||
if((N)&0x010) MACRODO16((N)&(0x100|0x080|0x040|0x020),TODO); \
|
||||
if((N)&0x008) MACRODO8((N)&(0x100|0x080|0x040|0x020|0x010),TODO); \
|
||||
if((N)&0x004) MACRODO4((N)&(0x100|0x080|0x040|0x020|0x010|0x008),TODO); \
|
||||
if((N)&0x002) MACRODO2((N)&(0x100|0x080|0x040|0x020|0x010|0x008|0x004),TODO); \
|
||||
if((N)&0x001) MACRODO1((N)&(0x100|0x080|0x040|0x020|0x010|0x008|0x004|0x002),TODO); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
//#define NOSSE2 //disables SSE2 optimizations (better change it in the vc++ codegen options too)
|
||||
//#define DEVELOPER //enables dev+ features
|
||||
//#define GDB_STUB //enables the gdb stub. for some reason this is separate from dev+ for now
|
||||
//#define SSE2_NOINTRIN //indicates that you have a crippled compiler with no sse2 intrinsics (only relevant for SSE2 builds)
|
||||
|
||||
|
||||
#endif //_USERCONFIG_H
|
||||
|
|
Loading…
Reference in New Issue