GPU:
- Add new malloc_alignedN() functions for easier dynamic allocation of aligned memory blocks. - Rework buffer allocations using the new malloc_alignedN() functions. - To enable SSSE3, also require ENABLE_SSE2 and ENABLE_SSE3. - Add some more SSE2/SSSE3 optimizations. - CACHE_ALIGN and malloc_alignedCacheLine() now set 64 byte alignment on 64-bit systems. - Do a bunch more code cleanup.
This commit is contained in:
parent
3b332f3a12
commit
d1db3dd7ef
|
@ -26,6 +26,15 @@
|
|||
#include <algorithm>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
#include "MMU.h"
|
||||
#include "FIFO.h"
|
||||
#include "debug.h"
|
||||
|
@ -54,10 +63,7 @@ GPU::MosaicLookup GPU::mosaicLookup;
|
|||
|
||||
//#define DEBUG_TRI
|
||||
|
||||
//this value should be 32-byte aligned
|
||||
u16 *GPU_screen = NULL;
|
||||
//and this is the raw pointer
|
||||
u16 *GPU_screen_raw = NULL;
|
||||
|
||||
static size_t _gpuFramebufferWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
static size_t _gpuFramebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
|
@ -70,9 +76,9 @@ static size_t _gpuDstPitchIndex[GPU_FRAMEBUFFER_NATIVE_WIDTH]; // Key: Source
|
|||
static size_t _gpuDstLineCount[GPU_FRAMEBUFFER_NATIVE_HEIGHT]; // Key: Source line index / Value: Number of destination lines for the source line
|
||||
static size_t _gpuDstLineIndex[GPU_FRAMEBUFFER_NATIVE_HEIGHT]; // Key: Source line index / Value: First destination line that maps to the source line
|
||||
|
||||
CACHE_ALIGN u8 sprWin[GPU_FRAMEBUFFER_NATIVE_WIDTH * 8];
|
||||
static CACHE_ALIGN u8 sprWin[GPU_FRAMEBUFFER_NATIVE_WIDTH * 8];
|
||||
|
||||
u16 gpu_angle = 0;
|
||||
static u16 gpu_angle = 0;
|
||||
|
||||
const SpriteSize sprSizeTab[4][4] =
|
||||
{
|
||||
|
@ -110,7 +116,7 @@ const short sizeTab[8][4][2] =
|
|||
|
||||
static u8 *win_empty = NULL;
|
||||
static CACHE_ALIGN u16 fadeInColors[17][0x8000];
|
||||
CACHE_ALIGN u16 fadeOutColors[17][0x8000];
|
||||
static CACHE_ALIGN u16 fadeOutColors[17][0x8000];
|
||||
|
||||
//this should be public, because it gets used somewhere else
|
||||
CACHE_ALIGN u8 gpuBlendTable555[17][17][32][32];
|
||||
|
@ -264,15 +270,14 @@ void GPU_DeInit(GPU *gpu)
|
|||
{
|
||||
if (gpu == &GPU_main || gpu == &GPU_sub) return;
|
||||
|
||||
free(gpu->tempScanlineBufferRaw);
|
||||
free_aligned(gpu->tempScanlineBuffer);
|
||||
gpu->tempScanlineBuffer = NULL;
|
||||
gpu->tempScanlineBufferRaw = NULL;
|
||||
free(gpu->bgPixels);
|
||||
free_aligned(gpu->bgPixels);
|
||||
gpu->bgPixels = NULL;
|
||||
|
||||
free(gpu->h_win[0]);
|
||||
free_aligned(gpu->h_win[0]);
|
||||
gpu->h_win[0] = NULL;
|
||||
free(gpu->h_win[1]);
|
||||
free_aligned(gpu->h_win[1]);
|
||||
gpu->h_win[1] = NULL;
|
||||
|
||||
free(gpu);
|
||||
|
@ -608,7 +613,7 @@ FORCEINLINE void GPU::renderline_checkWindows(const size_t dstX, bool &draw, boo
|
|||
/*****************************************************************************/
|
||||
|
||||
template<BlendFunc FUNC, bool WINDOW>
|
||||
FORCEINLINE FASTCALL void GPU::_master_setFinal3dColor(const size_t dstX, u16 &outDst, u8 *bgPixelsLine, const FragmentColor src)
|
||||
FORCEINLINE FASTCALL void GPU::_master_setFinal3dColor(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const FragmentColor src)
|
||||
{
|
||||
u8 alpha = src.a;
|
||||
u16 final;
|
||||
|
@ -635,7 +640,7 @@ FORCEINLINE FASTCALL void GPU::_master_setFinal3dColor(const size_t dstX, u16 &o
|
|||
//if the layer underneath is a blend bottom layer, then 3d always alpha blends with it
|
||||
COLOR c2, cfinal;
|
||||
|
||||
c2.val = outDst;
|
||||
c2.val = dstLine[dstX];
|
||||
|
||||
cfinal.bits.red = ((src.r * alpha) + ((c2.bits.red<<1) * (32 - alpha)))>>6;
|
||||
cfinal.bits.green = ((src.g * alpha) + ((c2.bits.green<<1) * (32 - alpha)))>>6;
|
||||
|
@ -662,12 +667,12 @@ FORCEINLINE FASTCALL void GPU::_master_setFinal3dColor(const size_t dstX, u16 &o
|
|||
}
|
||||
}
|
||||
|
||||
outDst = final | 0x8000;
|
||||
dstLine[dstX] = final | 0x8000;
|
||||
bgPixelsLine[dstX] = 0;
|
||||
}
|
||||
|
||||
template<bool BACKDROP, BlendFunc FUNC, bool WINDOW>
|
||||
FORCEINLINE FASTCALL bool GPU::_master_setFinalBGColor(const u16 *dstLine, const u8 *bgPixelsLine, u16 &outColor, const size_t dstX)
|
||||
FORCEINLINE FASTCALL bool GPU::_master_setFinalBGColor(const size_t dstX, const u16 *dstLine, const u8 *bgPixelsLine, u16 &outColor)
|
||||
{
|
||||
//no further analysis for no special effects. on backdrops. just draw it.
|
||||
if ((FUNC == NoBlend) && BACKDROP) return true;
|
||||
|
@ -707,8 +712,10 @@ FORCEINLINE FASTCALL bool GPU::_master_setFinalBGColor(const u16 *dstLine, const
|
|||
}
|
||||
|
||||
template<BlendFunc FUNC, bool WINDOW>
|
||||
static FORCEINLINE void _master_setFinalOBJColor(GPU *gpu, u16 *dstLine, u8 *bgPixelsLine, u16 color, const u8 alpha, const u8 type, const size_t dstX)
|
||||
FORCEINLINE FASTCALL void GPU::_master_setFinalOBJColor(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const u16 src, const u8 alpha, const u8 type)
|
||||
{
|
||||
u16 finalDstColor = src;
|
||||
|
||||
const bool isObjTranslucentType = type == GPU_OBJ_MODE_Transparent || type == GPU_OBJ_MODE_Bitmap;
|
||||
|
||||
bool windowDraw = true;
|
||||
|
@ -716,7 +723,7 @@ static FORCEINLINE void _master_setFinalOBJColor(GPU *gpu, u16 *dstLine, u8 *bgP
|
|||
|
||||
if (WINDOW)
|
||||
{
|
||||
gpu->renderline_checkWindows(dstX, windowDraw, windowEffectSatisfied);
|
||||
renderline_checkWindows(dstX, windowDraw, windowEffectSatisfied);
|
||||
if (!windowDraw)
|
||||
return;
|
||||
}
|
||||
|
@ -724,13 +731,13 @@ static FORCEINLINE void _master_setFinalOBJColor(GPU *gpu, u16 *dstLine, u8 *bgP
|
|||
//if the window effect is satisfied, then we can do color effects to modify the color
|
||||
if (windowEffectSatisfied)
|
||||
{
|
||||
const bool firstTargetSatisfied = gpu->blend1;
|
||||
const size_t bg_under = bgPixelsLine[dstX];
|
||||
const bool secondTargetSatisfied = (bg_under != 4) && gpu->blend2[bg_under];
|
||||
const bool firstTargetSatisfied = blend1;
|
||||
const bool secondTargetSatisfied = (bg_under != 4) && blend2[bg_under];
|
||||
BlendFunc selectedFunc = NoBlend;
|
||||
|
||||
u8 eva = gpu->BLDALPHA_EVA;
|
||||
u8 evb = gpu->BLDALPHA_EVB;
|
||||
u8 eva = BLDALPHA_EVA;
|
||||
u8 evb = BLDALPHA_EVB;
|
||||
|
||||
//if normal BLDCNT layer target conditions are met, then we can use the BLDCNT-specified color effect
|
||||
if (FUNC == Blend)
|
||||
|
@ -764,15 +771,15 @@ static FORCEINLINE void _master_setFinalOBJColor(GPU *gpu, u16 *dstLine, u8 *bgP
|
|||
break;
|
||||
|
||||
case Increase:
|
||||
color = gpu->currentFadeInColors[color&0x7FFF];
|
||||
finalDstColor = currentFadeInColors[src & 0x7FFF];
|
||||
break;
|
||||
|
||||
case Decrease:
|
||||
color = gpu->currentFadeOutColors[color&0x7FFF];
|
||||
finalDstColor = currentFadeOutColors[src & 0x7FFF];
|
||||
break;
|
||||
|
||||
case Blend:
|
||||
color = _blend(color, dstLine[dstX], &gpuBlendTable555[eva][evb]);
|
||||
finalDstColor = _blend(src, dstLine[dstX], &gpuBlendTable555[eva][evb]);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -780,70 +787,70 @@ static FORCEINLINE void _master_setFinalOBJColor(GPU *gpu, u16 *dstLine, u8 *bgP
|
|||
}
|
||||
}
|
||||
|
||||
dstLine[dstX] = color | 0x8000;
|
||||
dstLine[dstX] = finalDstColor | 0x8000;
|
||||
bgPixelsLine[dstX] = 4;
|
||||
}
|
||||
|
||||
//FUNCNUM is only set for backdrop, for an optimization of looking it up early
|
||||
template<bool BACKDROP, int FUNCNUM>
|
||||
FORCEINLINE void GPU::setFinalColorBG(u16 *dstLine, u8 *bgPixelsLine, u16 color, const size_t dstX)
|
||||
FORCEINLINE void GPU::setFinalColorBG(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, u16 src)
|
||||
{
|
||||
//It is not safe to assert this here.
|
||||
//This is probably the best place to enforce it, since almost every single color that comes in here
|
||||
//will be pulled from a palette that needs the top bit stripped off anyway.
|
||||
//assert((color&0x8000)==0);
|
||||
if (!BACKDROP) color &= 0x7FFF; //but for the backdrop we can easily guarantee earlier that theres no bit here
|
||||
//assert((src&0x8000)==0);
|
||||
if (!BACKDROP) src &= 0x7FFF; //but for the backdrop we can easily guarantee earlier that theres no bit here
|
||||
|
||||
bool draw = false;
|
||||
|
||||
const int test = (BACKDROP) ? FUNCNUM : setFinalColorBck_funcNum;
|
||||
switch (test)
|
||||
{
|
||||
case 0: draw = _master_setFinalBGColor<BACKDROP,NoBlend,false>(dstLine, bgPixelsLine, color, dstX); break;
|
||||
case 1: draw = _master_setFinalBGColor<BACKDROP,Blend,false>(dstLine, bgPixelsLine, color, dstX); break;
|
||||
case 2: draw = _master_setFinalBGColor<BACKDROP,Increase,false>(dstLine, bgPixelsLine, color, dstX); break;
|
||||
case 3: draw = _master_setFinalBGColor<BACKDROP,Decrease,false>(dstLine, bgPixelsLine, color, dstX); break;
|
||||
case 4: draw = _master_setFinalBGColor<BACKDROP,NoBlend,true>(dstLine, bgPixelsLine, color, dstX); break;
|
||||
case 5: draw = _master_setFinalBGColor<BACKDROP,Blend,true>(dstLine, bgPixelsLine, color, dstX); break;
|
||||
case 6: draw = _master_setFinalBGColor<BACKDROP,Increase,true>(dstLine, bgPixelsLine, color, dstX); break;
|
||||
case 7: draw = _master_setFinalBGColor<BACKDROP,Decrease,true>(dstLine, bgPixelsLine, color, dstX); break;
|
||||
case 0: draw = _master_setFinalBGColor<BACKDROP,NoBlend, false>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 1: draw = _master_setFinalBGColor<BACKDROP,Blend, false>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 2: draw = _master_setFinalBGColor<BACKDROP,Increase,false>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 3: draw = _master_setFinalBGColor<BACKDROP,Decrease,false>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 4: draw = _master_setFinalBGColor<BACKDROP,NoBlend, true>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 5: draw = _master_setFinalBGColor<BACKDROP,Blend, true>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 6: draw = _master_setFinalBGColor<BACKDROP,Increase, true>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 7: draw = _master_setFinalBGColor<BACKDROP,Decrease, true>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
default: break;
|
||||
};
|
||||
|
||||
if (BACKDROP || draw) //backdrop must always be drawn
|
||||
{
|
||||
dstLine[dstX] = color | 0x8000;
|
||||
dstLine[dstX] = src | 0x8000;
|
||||
if (!BACKDROP) bgPixelsLine[dstX] = currBgNum; //lets do this in the backdrop drawing loop, should be faster
|
||||
}
|
||||
}
|
||||
|
||||
FORCEINLINE void GPU::setFinalColor3d(const size_t dstX, u16 &outDst, u8 *bgPixelsLine, const FragmentColor src)
|
||||
FORCEINLINE void GPU::setFinalColor3d(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const FragmentColor src)
|
||||
{
|
||||
switch (setFinalColor3d_funcNum)
|
||||
{
|
||||
case 0x0: _master_setFinal3dColor<NoBlend,false>(dstX, outDst, bgPixelsLine, src); break;
|
||||
case 0x1: _master_setFinal3dColor<Blend,false>(dstX, outDst, bgPixelsLine, src); break;
|
||||
case 0x2: _master_setFinal3dColor<Increase,false>(dstX, outDst, bgPixelsLine, src); break;
|
||||
case 0x3: _master_setFinal3dColor<Decrease,false>(dstX, outDst, bgPixelsLine, src); break;
|
||||
case 0x4: _master_setFinal3dColor<NoBlend,true>(dstX, outDst, bgPixelsLine, src); break;
|
||||
case 0x5: _master_setFinal3dColor<Blend,true>(dstX, outDst, bgPixelsLine, src); break;
|
||||
case 0x6: _master_setFinal3dColor<Increase,true>(dstX, outDst, bgPixelsLine, src); break;
|
||||
case 0x7: _master_setFinal3dColor<Decrease,true>(dstX, outDst, bgPixelsLine, src); break;
|
||||
case 0x0: _master_setFinal3dColor<NoBlend, false>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 0x1: _master_setFinal3dColor<Blend, false>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 0x2: _master_setFinal3dColor<Increase,false>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 0x3: _master_setFinal3dColor<Decrease,false>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 0x4: _master_setFinal3dColor<NoBlend, true>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 0x5: _master_setFinal3dColor<Blend, true>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 0x6: _master_setFinal3dColor<Increase, true>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
case 0x7: _master_setFinal3dColor<Decrease, true>(dstX, dstLine, bgPixelsLine, src); break;
|
||||
};
|
||||
}
|
||||
|
||||
FORCEINLINE void setFinalColorSpr(GPU *gpu, u16 *dstLine, u8 *bgPixelsLine, u16 color, const u8 alpha, const u8 type, const size_t dstX)
|
||||
FORCEINLINE void GPU::setFinalColorSpr(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const u16 src, const u8 alpha, const u8 type)
|
||||
{
|
||||
switch (gpu->setFinalColorSpr_funcNum)
|
||||
switch (setFinalColorSpr_funcNum)
|
||||
{
|
||||
case 0x0: _master_setFinalOBJColor<NoBlend,false>(gpu, dstLine, bgPixelsLine, color, alpha, type, dstX); break;
|
||||
case 0x1: _master_setFinalOBJColor<Blend,false>(gpu, dstLine, bgPixelsLine, color, alpha, type, dstX); break;
|
||||
case 0x2: _master_setFinalOBJColor<Increase,false>(gpu, dstLine, bgPixelsLine, color, alpha, type, dstX); break;
|
||||
case 0x3: _master_setFinalOBJColor<Decrease,false>(gpu, dstLine, bgPixelsLine, color, alpha, type, dstX); break;
|
||||
case 0x4: _master_setFinalOBJColor<NoBlend,true>(gpu, dstLine, bgPixelsLine, color, alpha, type, dstX); break;
|
||||
case 0x5: _master_setFinalOBJColor<Blend,true>(gpu, dstLine, bgPixelsLine, color, alpha, type, dstX); break;
|
||||
case 0x6: _master_setFinalOBJColor<Increase,true>(gpu, dstLine, bgPixelsLine, color, alpha, type, dstX); break;
|
||||
case 0x7: _master_setFinalOBJColor<Decrease,true>(gpu, dstLine, bgPixelsLine, color, alpha, type, dstX); break;
|
||||
case 0x0: _master_setFinalOBJColor<NoBlend, false>(dstX, dstLine, bgPixelsLine, src, alpha, type); break;
|
||||
case 0x1: _master_setFinalOBJColor<Blend, false>(dstX, dstLine, bgPixelsLine, src, alpha, type); break;
|
||||
case 0x2: _master_setFinalOBJColor<Increase,false>(dstX, dstLine, bgPixelsLine, src, alpha, type); break;
|
||||
case 0x3: _master_setFinalOBJColor<Decrease,false>(dstX, dstLine, bgPixelsLine, src, alpha, type); break;
|
||||
case 0x4: _master_setFinalOBJColor<NoBlend, true>(dstX, dstLine, bgPixelsLine, src, alpha, type); break;
|
||||
case 0x5: _master_setFinalOBJColor<Blend, true>(dstX, dstLine, bgPixelsLine, src, alpha, type); break;
|
||||
case 0x6: _master_setFinalOBJColor<Increase, true>(dstX, dstLine, bgPixelsLine, src, alpha, type); break;
|
||||
case 0x7: _master_setFinalOBJColor<Decrease, true>(dstX, dstLine, bgPixelsLine, src, alpha, type); break;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -868,10 +875,10 @@ FORCEINLINE void GPU::___setFinalColorBck(u16 color, const size_t srcX, const bo
|
|||
{
|
||||
for (size_t p = 0; p < _gpuDstPitchCount[srcX]; p++)
|
||||
{
|
||||
setFinalColorBG<BACKDROP,FUNCNUM>(currDst + (line * _gpuFramebufferWidth),
|
||||
setFinalColorBG<BACKDROP,FUNCNUM>(_gpuDstPitchIndex[srcX] + p,
|
||||
currDst + (line * _gpuFramebufferWidth),
|
||||
bgPixels + (line * _gpuFramebufferWidth),
|
||||
color,
|
||||
_gpuDstPitchIndex[srcX] + p);
|
||||
color);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -902,10 +909,10 @@ FORCEINLINE void GPU::___setFinalColorBck(u16 color, const size_t srcX, const bo
|
|||
{
|
||||
for (size_t p = 0; p < _gpuDstPitchCount[srcX]; p++)
|
||||
{
|
||||
setFinalColorBG<BACKDROP,FUNCNUM>(currDst + (line * _gpuFramebufferWidth),
|
||||
setFinalColorBG<BACKDROP,FUNCNUM>(_gpuDstPitchIndex[srcX] + p,
|
||||
currDst + (line * _gpuFramebufferWidth),
|
||||
bgPixels + (line * _gpuFramebufferWidth),
|
||||
color,
|
||||
_gpuDstPitchIndex[srcX] + p);
|
||||
color);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1225,18 +1232,18 @@ FORCEINLINE void rot_BMP_map(GPU *gpu, const s32 auxX, const s32 auxY, const int
|
|||
typedef void (*rot_fun)(GPU *gpu, const s32 auxX, const s32 auxY, const int lg, const u32 map, const u32 tile, const u16 *pal, const size_t i);
|
||||
|
||||
template<rot_fun fun, bool WRAP>
|
||||
FORCEINLINE void rot_scale_op(GPU *gpu, s32 X, s32 Y, s16 PA, s16 PB, s16 PC, s16 PD, u16 LG, s32 wh, s32 ht, u32 map, u32 tile, const u16 *pal)
|
||||
FORCEINLINE void rot_scale_op(GPU *gpu, const BGxPARMS ¶m, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal)
|
||||
{
|
||||
ROTOCOORD x, y;
|
||||
x.val = X;
|
||||
y.val = Y;
|
||||
x.val = param.BGxX;
|
||||
y.val = param.BGxY;
|
||||
|
||||
const s32 dx = (s32)PA;
|
||||
const s32 dy = (s32)PC;
|
||||
const s32 dx = (s32)param.BGxPA;
|
||||
const s32 dy = (s32)param.BGxPC;
|
||||
|
||||
// as an optimization, specially handle the fairly common case of
|
||||
// "unrotated + unscaled + no boundary checking required"
|
||||
if (dx==0x100 && dy==0)
|
||||
if (dx == GPU_FRAMEBUFFER_NATIVE_WIDTH && dy == 0)
|
||||
{
|
||||
s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
|
||||
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
|
||||
|
@ -1267,30 +1274,30 @@ FORCEINLINE void rot_scale_op(GPU *gpu, s32 X, s32 Y, s16 PA, s16 PB, s16 PC, s1
|
|||
}
|
||||
|
||||
template<rot_fun fun>
|
||||
FORCEINLINE void apply_rot_fun(GPU *gpu, s32 X, s32 Y, s16 PA, s16 PB, s16 PC, s16 PD, u16 LG, u32 map, u32 tile, const u16 *pal)
|
||||
FORCEINLINE void apply_rot_fun(GPU *gpu, const BGxPARMS ¶m, const u16 LG, const u32 map, const u32 tile, const u16 *pal)
|
||||
{
|
||||
struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[gpu->currBgNum].bits;
|
||||
s32 wh = gpu->BGSize[gpu->currBgNum][0];
|
||||
s32 ht = gpu->BGSize[gpu->currBgNum][1];
|
||||
|
||||
if (bgCnt->PaletteSet_Wrap)
|
||||
rot_scale_op<fun,true>(gpu, X, Y, PA, PB, PC, PD, LG, wh, ht, map, tile, pal);
|
||||
rot_scale_op<fun,true>(gpu, param, LG, wh, ht, map, tile, pal);
|
||||
else
|
||||
rot_scale_op<fun,false>(gpu, X, Y, PA, PB, PC, PD, LG, wh, ht, map, tile, pal);
|
||||
rot_scale_op<fun,false>(gpu, param, LG, wh, ht, map, tile, pal);
|
||||
}
|
||||
|
||||
|
||||
template<bool MOSAIC>
|
||||
FORCEINLINE void rotBG2(GPU *gpu, s32 X, s32 Y, s16 PA, s16 PB, s16 PC, s16 PD, u16 LG)
|
||||
FORCEINLINE void rotBG2(GPU *gpu, const BGxPARMS ¶m, const u16 LG)
|
||||
{
|
||||
const size_t num = gpu->currBgNum;
|
||||
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + gpu->core * ADDRESS_STEP_1KB);
|
||||
// printf("rot mode\n");
|
||||
apply_rot_fun< rot_tiled_8bit_entry<MOSAIC> >(gpu,X,Y,PA,PB,PC,PD,LG, gpu->BG_map_ram[num], gpu->BG_tile_ram[num], pal);
|
||||
apply_rot_fun< rot_tiled_8bit_entry<MOSAIC> >(gpu, param, LG, gpu->BG_map_ram[num], gpu->BG_tile_ram[num], pal);
|
||||
}
|
||||
|
||||
template<bool MOSAIC>
|
||||
FORCEINLINE void extRotBG2(GPU * gpu, s32 X, s32 Y, s16 PA, s16 PB, s16 PC, s16 PD, s16 LG)
|
||||
FORCEINLINE void extRotBG2(GPU *gpu, const BGxPARMS ¶m, const u16 LG)
|
||||
{
|
||||
const size_t num = gpu->currBgNum;
|
||||
struct _DISPCNT * dispCnt = &(gpu->dispx_st)->dispx_DISPCNT.bits;
|
||||
|
@ -1304,26 +1311,26 @@ FORCEINLINE void extRotBG2(GPU * gpu, s32 X, s32 Y, s16 PA, s16 PB, s16 PC, s16
|
|||
if (pal == NULL) return;
|
||||
// 16 bit bgmap entries
|
||||
if(dispCnt->ExBGxPalette_Enable)
|
||||
apply_rot_fun< rot_tiled_16bit_entry<MOSAIC, true> >(gpu,X,Y,PA,PB,PC,PD,LG, gpu->BG_map_ram[num], gpu->BG_tile_ram[num], pal);
|
||||
apply_rot_fun< rot_tiled_16bit_entry<MOSAIC, true> >(gpu, param, LG, gpu->BG_map_ram[num], gpu->BG_tile_ram[num], pal);
|
||||
else
|
||||
apply_rot_fun< rot_tiled_16bit_entry<MOSAIC, false> >(gpu,X,Y,PA,PB,PC,PD,LG, gpu->BG_map_ram[num], gpu->BG_tile_ram[num], pal);
|
||||
apply_rot_fun< rot_tiled_16bit_entry<MOSAIC, false> >(gpu, param, LG, gpu->BG_map_ram[num], gpu->BG_tile_ram[num], pal);
|
||||
break;
|
||||
|
||||
case BGType_AffineExt_256x1:
|
||||
// 256 colors
|
||||
pal = (u16 *)(MMU.ARM9_VMEM + gpu->core * ADDRESS_STEP_1KB);
|
||||
apply_rot_fun< rot_256_map<MOSAIC> >(gpu,X,Y,PA,PB,PC,PD,LG, gpu->BG_bmp_ram[num], 0, pal);
|
||||
apply_rot_fun< rot_256_map<MOSAIC> >(gpu, param, LG, gpu->BG_bmp_ram[num], 0, pal);
|
||||
break;
|
||||
|
||||
case BGType_AffineExt_Direct:
|
||||
// direct colors / BMP
|
||||
apply_rot_fun< rot_BMP_map<MOSAIC> >(gpu,X,Y,PA,PB,PC,PD,LG, gpu->BG_bmp_ram[num], 0, NULL);
|
||||
apply_rot_fun< rot_BMP_map<MOSAIC> >(gpu, param, LG, gpu->BG_bmp_ram[num], 0, NULL);
|
||||
break;
|
||||
|
||||
case BGType_Large8bpp:
|
||||
// large screen 256 colors
|
||||
pal = (u16 *)(MMU.ARM9_VMEM + gpu->core * ADDRESS_STEP_1KB);
|
||||
apply_rot_fun< rot_256_map<MOSAIC> >(gpu,X,Y,PA,PB,PC,PD,LG, gpu->BG_bmp_large_ram[num], 0, pal);
|
||||
apply_rot_fun< rot_256_map<MOSAIC> >(gpu, param, LG, gpu->BG_bmp_large_ram[num], 0, pal);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1358,65 +1365,37 @@ template<bool MOSAIC> void lineText(GPU *gpu)
|
|||
|
||||
template<bool MOSAIC> void lineRot(GPU *gpu)
|
||||
{
|
||||
BGxPARMS *parms;
|
||||
if (gpu->currBgNum == 2)
|
||||
{
|
||||
parms = &(gpu->dispx_st)->dispx_BG2PARMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
parms = &(gpu->dispx_st)->dispx_BG3PARMS;
|
||||
}
|
||||
|
||||
if (gpu->debug)
|
||||
{
|
||||
s32 wh = gpu->BGSize[gpu->currBgNum][0];
|
||||
rotBG2<MOSAIC>(gpu, 0, (s16)gpu->currLine*GPU_FRAMEBUFFER_NATIVE_WIDTH, 256, 0, 0, -77, wh);
|
||||
static const BGxPARMS debugParams = {256, 0, 0, -77, 0, (s16)gpu->currLine*GPU_FRAMEBUFFER_NATIVE_WIDTH};
|
||||
const s32 wh = gpu->BGSize[gpu->currBgNum][0];
|
||||
rotBG2<MOSAIC>(gpu, debugParams, wh);
|
||||
}
|
||||
else
|
||||
{
|
||||
rotBG2<MOSAIC>(gpu,
|
||||
parms->BGxX,
|
||||
parms->BGxY,
|
||||
parms->BGxPA,
|
||||
parms->BGxPB,
|
||||
parms->BGxPC,
|
||||
parms->BGxPD,
|
||||
256);
|
||||
parms->BGxX += parms->BGxPB;
|
||||
parms->BGxY += parms->BGxPD;
|
||||
BGxPARMS ¶ms = (gpu->currBgNum == 2) ? (gpu->dispx_st)->dispx_BG2PARMS : (gpu->dispx_st)->dispx_BG3PARMS;
|
||||
|
||||
rotBG2<MOSAIC>(gpu, params, 256);
|
||||
params.BGxX += params.BGxPB;
|
||||
params.BGxY += params.BGxPD;
|
||||
}
|
||||
}
|
||||
|
||||
template<bool MOSAIC> void lineExtRot(GPU *gpu)
|
||||
{
|
||||
BGxPARMS *parms;
|
||||
if (gpu->currBgNum == 2)
|
||||
{
|
||||
parms = &(gpu->dispx_st)->dispx_BG2PARMS;
|
||||
}
|
||||
else
|
||||
{
|
||||
parms = &(gpu->dispx_st)->dispx_BG3PARMS;
|
||||
}
|
||||
|
||||
if (gpu->debug)
|
||||
{
|
||||
s32 wh = gpu->BGSize[gpu->currBgNum][0];
|
||||
extRotBG2<MOSAIC>(gpu, 0, (s16)gpu->currLine*GPU_FRAMEBUFFER_NATIVE_WIDTH, 256, 0, 0, -77, wh);
|
||||
static BGxPARMS debugParams = {256, 0, 0, -77, 0, (s16)gpu->currLine*GPU_FRAMEBUFFER_NATIVE_WIDTH};
|
||||
const s32 wh = gpu->BGSize[gpu->currBgNum][0];
|
||||
extRotBG2<MOSAIC>(gpu, debugParams, wh);
|
||||
}
|
||||
else
|
||||
{
|
||||
extRotBG2<MOSAIC>(gpu,
|
||||
parms->BGxX,
|
||||
parms->BGxY,
|
||||
parms->BGxPA,
|
||||
parms->BGxPB,
|
||||
parms->BGxPC,
|
||||
parms->BGxPD,
|
||||
256);
|
||||
parms->BGxX += parms->BGxPB;
|
||||
parms->BGxY += parms->BGxPD;
|
||||
BGxPARMS ¶ms = (gpu->currBgNum == 2) ? (gpu->dispx_st)->dispx_BG2PARMS : (gpu->dispx_st)->dispx_BG3PARMS;
|
||||
|
||||
extRotBG2<MOSAIC>(gpu, params, 256);
|
||||
params.BGxX += params.BGxPB;
|
||||
params.BGxY += params.BGxPD;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1942,6 +1921,7 @@ int Screen_Init()
|
|||
{
|
||||
MainScreen.gpu = GPU_Init(GPUCOREID_MAIN);
|
||||
SubScreen.gpu = GPU_Init(GPUCOREID_SUB);
|
||||
gfx3d_init();
|
||||
|
||||
disp_fifo.head = disp_fifo.tail = 0;
|
||||
|
||||
|
@ -1949,7 +1929,6 @@ int Screen_Init()
|
|||
osd = new OSDCLASS(-1);
|
||||
delete previousOSD;
|
||||
|
||||
gfx3d_init();
|
||||
GPU_SetFramebufferSize(GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT);
|
||||
|
||||
return 0;
|
||||
|
@ -1957,11 +1936,15 @@ int Screen_Init()
|
|||
|
||||
void Screen_Reset(void)
|
||||
{
|
||||
gfx3d_reset();
|
||||
GPU_Reset(MainScreen.gpu);
|
||||
GPU_Reset(SubScreen.gpu);
|
||||
MainScreen.offset = 0;
|
||||
SubScreen.offset = _gpuFramebufferHeight;
|
||||
memset_u16(GPU_screen, 0x7FFF, _gpuFramebufferWidth * _gpuFramebufferHeight * 2);
|
||||
|
||||
memset(GPU_screen, 0xFF, _gpuFramebufferWidth * _gpuFramebufferHeight * 2 * sizeof(u16));
|
||||
memset(gfx3d_colorRGBA6665, 0, _gpuFramebufferWidth * _gpuFramebufferHeight * sizeof(FragmentColor));
|
||||
memset(gfx3d_colorRGBA5551, 0, _gpuFramebufferWidth * _gpuFramebufferHeight * sizeof(u16));
|
||||
|
||||
disp_fifo.head = disp_fifo.tail = 0;
|
||||
osd->clear();
|
||||
|
@ -1979,11 +1962,10 @@ void Screen_DeInit(void)
|
|||
delete osd;
|
||||
osd = NULL;
|
||||
|
||||
free(GPU_screen_raw);
|
||||
GPU_screen_raw = NULL;
|
||||
free_aligned(GPU_screen);
|
||||
GPU_screen = NULL;
|
||||
|
||||
free(win_empty);
|
||||
free_aligned(win_empty);
|
||||
win_empty = NULL;
|
||||
}
|
||||
|
||||
|
@ -2004,20 +1986,35 @@ void GPU_SetFramebufferSize(size_t w, size_t h)
|
|||
return;
|
||||
}
|
||||
|
||||
_gpuFramebufferWidth = w;
|
||||
_gpuFramebufferHeight = h;
|
||||
_gpuWidthScale = (float)w / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
_gpuHeightScale = (float)h / (float)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
_gpuLargestDstLineCount = (size_t)ceilf(_gpuHeightScale);
|
||||
// Check if we're calling this function from initialization.
|
||||
// If we're not initializing, we need to finish rendering first.
|
||||
if (gfx3d_colorRGBA6665 != NULL && gfx3d_colorRGBA5551 != NULL)
|
||||
{
|
||||
CurrentRenderer->RenderFinish();
|
||||
}
|
||||
|
||||
GPU_screen_raw = (u16 *)realloc(GPU_screen_raw, w * h * sizeof(u16) * 2 + 32);
|
||||
GPU_screen = (u16*)(((uintptr_t)GPU_screen_raw+32) & ~31);
|
||||
MainScreen.offset = (MainScreen.offset == 0) ? 0 : _gpuFramebufferHeight;
|
||||
SubScreen.offset = (SubScreen.offset == 0) ? 0 : _gpuFramebufferHeight;
|
||||
const float newGpuWidthScale = (float)w / (float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
const float newGpuHeightScale = (float)h / (float)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
const float newGpuLargestDstLineCount = (size_t)ceilf(newGpuHeightScale);
|
||||
const size_t windowBufferSize = w * sizeof(u8);
|
||||
|
||||
u16 *oldGPUScreenPtr = GPU_screen;
|
||||
FragmentColor *oldColorRGBA6665Buffer = gfx3d_colorRGBA6665;
|
||||
u16 *oldColorRGBA5551Buffer = gfx3d_colorRGBA5551;
|
||||
u16 *oldMainScreenTempScanlineBuffer = MainScreen.gpu->tempScanlineBuffer;
|
||||
u16 *oldSubScreenTempScanlineBuffer = SubScreen.gpu->tempScanlineBuffer;
|
||||
u8 *oldMainScreenBGPixels = MainScreen.gpu->bgPixels;
|
||||
u8 *oldSubScreenBGPixels = SubScreen.gpu->bgPixels;
|
||||
|
||||
u8 *oldWinEmptyPtr = win_empty;
|
||||
u8 *oldMainScreenHWin0 = MainScreen.gpu->h_win[0];
|
||||
u8 *oldMainScreenHWin1 = MainScreen.gpu->h_win[1];
|
||||
u8 *oldSubScreenHWin0 = SubScreen.gpu->h_win[0];
|
||||
u8 *oldSubScreenHWin1 = SubScreen.gpu->h_win[1];
|
||||
|
||||
for (size_t srcX = 0, currentPitchCount = 0; srcX < GPU_FRAMEBUFFER_NATIVE_WIDTH; srcX++)
|
||||
{
|
||||
const size_t pitch = (size_t)ceilf((srcX+1) * _gpuWidthScale) - currentPitchCount;
|
||||
const size_t pitch = (size_t)ceilf((srcX+1) * newGpuWidthScale) - currentPitchCount;
|
||||
_gpuDstPitchCount[srcX] = pitch;
|
||||
_gpuDstPitchIndex[srcX] = currentPitchCount;
|
||||
currentPitchCount += pitch;
|
||||
|
@ -2025,36 +2022,72 @@ void GPU_SetFramebufferSize(size_t w, size_t h)
|
|||
|
||||
for (size_t srcY = 0, currentLineCount = 0; srcY < GPU_FRAMEBUFFER_NATIVE_HEIGHT; srcY++)
|
||||
{
|
||||
const size_t lineCount = (size_t)ceilf((srcY+1) * _gpuHeightScale) - currentLineCount;
|
||||
const size_t lineCount = (size_t)ceilf((srcY+1) * newGpuHeightScale) - currentLineCount;
|
||||
_gpuDstLineCount[srcY] = lineCount;
|
||||
_gpuDstLineIndex[srcY] = currentLineCount;
|
||||
currentLineCount += lineCount;
|
||||
}
|
||||
|
||||
MainScreen.gpu->tempScanlineBufferRaw = (u16 *)realloc(MainScreen.gpu->tempScanlineBufferRaw, w * _gpuLargestDstLineCount * sizeof(u16) + 32);
|
||||
SubScreen.gpu->tempScanlineBufferRaw = (u16 *)realloc(SubScreen.gpu->tempScanlineBufferRaw, w * _gpuLargestDstLineCount * sizeof(u16) + 32);
|
||||
MainScreen.gpu->tempScanlineBuffer = (u16*)(((uintptr_t)MainScreen.gpu->tempScanlineBufferRaw+32) & ~31);
|
||||
SubScreen.gpu->tempScanlineBuffer = (u16*)(((uintptr_t)SubScreen.gpu->tempScanlineBufferRaw+32) & ~31);
|
||||
MainScreen.gpu->bgPixels = (u8 *)realloc(MainScreen.gpu->bgPixels, w * _gpuLargestDstLineCount * 4 * sizeof(u8)); // yes indeed, this is oversized. map debug tools try to write to it
|
||||
SubScreen.gpu->bgPixels = (u8 *)realloc(SubScreen.gpu->bgPixels, w * _gpuLargestDstLineCount * 4 * sizeof(u8)); // yes indeed, this is oversized. map debug tools try to write to it
|
||||
u16 *newGPUScreenPtr = (u16 *)malloc_alignedCacheLine(w * h * sizeof(u16) * 2);
|
||||
memset_u16(newGPUScreenPtr, 0x7FFF, w * h * 2);
|
||||
|
||||
const size_t windowBufferSize = w * sizeof(u8);
|
||||
const u8 *oldWinEmptyPtr = win_empty;
|
||||
win_empty = (u8 *)realloc(win_empty, windowBufferSize);
|
||||
memset(win_empty, 0, windowBufferSize);
|
||||
FragmentColor *newColorRGBA6665Buffer = (FragmentColor *)malloc_alignedCacheLine(w * h * sizeof(FragmentColor));
|
||||
u16 *newColorRGBA5551 = (u16 *)malloc_alignedCacheLine(w * h * sizeof(u16));
|
||||
|
||||
MainScreen.gpu->h_win[0] = (u8 *)realloc(MainScreen.gpu->h_win[0], windowBufferSize);
|
||||
MainScreen.gpu->h_win[1] = (u8 *)realloc(MainScreen.gpu->h_win[1], windowBufferSize);
|
||||
SubScreen.gpu->h_win[0] = (u8 *)realloc(SubScreen.gpu->h_win[0], windowBufferSize);
|
||||
SubScreen.gpu->h_win[1] = (u8 *)realloc(SubScreen.gpu->h_win[1], windowBufferSize);
|
||||
u16 *newMainScreenTempScanlineBuffer = (u16 *)malloc_alignedCacheLine(w * newGpuLargestDstLineCount * sizeof(u16));
|
||||
u16 *newSubScreenTempScanlineBuffer = (u16 *)malloc_alignedCacheLine(w * newGpuLargestDstLineCount * sizeof(u16));
|
||||
u8 *newMainScreenBGPixels = (u8 *)malloc_alignedCacheLine(w * newGpuLargestDstLineCount * 4 * sizeof(u8)); // yes indeed, this is oversized. map debug tools try to write to it
|
||||
u8 *newSubScreenBGPixels = (u8 *)malloc_alignedCacheLine(w * newGpuLargestDstLineCount * 4 * sizeof(u8)); // yes indeed, this is oversized. map debug tools try to write to it
|
||||
|
||||
MainScreen.gpu->curr_win[0] = (MainScreen.gpu->curr_win[0] == NULL || MainScreen.gpu->curr_win[0] == oldWinEmptyPtr) ? win_empty : MainScreen.gpu->h_win[0];
|
||||
MainScreen.gpu->curr_win[1] = (MainScreen.gpu->curr_win[1] == NULL || MainScreen.gpu->curr_win[1] == oldWinEmptyPtr) ? win_empty : MainScreen.gpu->h_win[1];
|
||||
SubScreen.gpu->curr_win[0] = (SubScreen.gpu->curr_win[0] == NULL || SubScreen.gpu->curr_win[0] == oldWinEmptyPtr) ? win_empty : SubScreen.gpu->h_win[0];
|
||||
SubScreen.gpu->curr_win[1] = (SubScreen.gpu->curr_win[1] == NULL || SubScreen.gpu->curr_win[1] == oldWinEmptyPtr) ? win_empty : SubScreen.gpu->h_win[1];
|
||||
u8 *newWinEmptyPtr = (u8 *)malloc_alignedCacheLine(windowBufferSize);
|
||||
u8 *newMainScreenHWin0 = (u8 *)malloc_alignedCacheLine(windowBufferSize);
|
||||
u8 *newMainScreenHWin1 = (u8 *)malloc_alignedCacheLine(windowBufferSize);
|
||||
u8 *newSubScreenHWin0 = (u8 *)malloc_alignedCacheLine(windowBufferSize);
|
||||
u8 *newSubScreenHWin1 = (u8 *)malloc_alignedCacheLine(windowBufferSize);
|
||||
memset(newWinEmptyPtr, 0, windowBufferSize);
|
||||
|
||||
memset_u16(GPU_screen, 0x7FFF, w * h * 2);
|
||||
gfx3d_setFramebufferSize(_gpuFramebufferWidth, _gpuFramebufferHeight);
|
||||
_gpuFramebufferWidth = w;
|
||||
_gpuFramebufferHeight = h;
|
||||
_gpuWidthScale = newGpuWidthScale;
|
||||
_gpuHeightScale = newGpuHeightScale;
|
||||
_gpuLargestDstLineCount = newGpuLargestDstLineCount;
|
||||
|
||||
MainScreen.gpu->curr_win[0] = (MainScreen.gpu->curr_win[0] == NULL || MainScreen.gpu->curr_win[0] == oldWinEmptyPtr) ? newWinEmptyPtr : newMainScreenHWin0;
|
||||
MainScreen.gpu->curr_win[1] = (MainScreen.gpu->curr_win[1] == NULL || MainScreen.gpu->curr_win[1] == oldWinEmptyPtr) ? newWinEmptyPtr : newMainScreenHWin1;
|
||||
SubScreen.gpu->curr_win[0] = (SubScreen.gpu->curr_win[0] == NULL || SubScreen.gpu->curr_win[0] == oldWinEmptyPtr) ? newWinEmptyPtr : newSubScreenHWin0;
|
||||
SubScreen.gpu->curr_win[1] = (SubScreen.gpu->curr_win[1] == NULL || SubScreen.gpu->curr_win[1] == oldWinEmptyPtr) ? newWinEmptyPtr : newSubScreenHWin1;
|
||||
|
||||
win_empty = newWinEmptyPtr;
|
||||
MainScreen.gpu->h_win[0] = newMainScreenHWin0;
|
||||
MainScreen.gpu->h_win[1] = newMainScreenHWin1;
|
||||
SubScreen.gpu->h_win[0] = newSubScreenHWin0;
|
||||
SubScreen.gpu->h_win[1] = newSubScreenHWin1;
|
||||
|
||||
MainScreen.gpu->tempScanlineBuffer = newMainScreenTempScanlineBuffer;
|
||||
SubScreen.gpu->tempScanlineBuffer = newSubScreenTempScanlineBuffer;
|
||||
MainScreen.gpu->bgPixels = newMainScreenBGPixels;
|
||||
SubScreen.gpu->bgPixels = newSubScreenBGPixels;
|
||||
GPU_screen = newGPUScreenPtr;
|
||||
gfx3d_colorRGBA6665 = newColorRGBA6665Buffer;
|
||||
gfx3d_colorRGBA5551 = newColorRGBA5551;
|
||||
|
||||
MainScreen.offset = (MainScreen.offset == 0) ? 0 : h;
|
||||
SubScreen.offset = (SubScreen.offset == 0) ? 0 : h;
|
||||
|
||||
CurrentRenderer->SetFramebufferSize(w, h);
|
||||
|
||||
free_aligned(oldGPUScreenPtr);
|
||||
free_aligned(oldColorRGBA6665Buffer);
|
||||
free_aligned(oldColorRGBA5551Buffer);
|
||||
free_aligned(oldWinEmptyPtr);
|
||||
free_aligned(oldMainScreenHWin0);
|
||||
free_aligned(oldMainScreenHWin1);
|
||||
free_aligned(oldSubScreenHWin0);
|
||||
free_aligned(oldSubScreenHWin1);
|
||||
free_aligned(oldMainScreenTempScanlineBuffer);
|
||||
free_aligned(oldSubScreenTempScanlineBuffer);
|
||||
free_aligned(oldMainScreenBGPixels);
|
||||
free_aligned(oldSubScreenBGPixels);
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
|
@ -2112,17 +2145,11 @@ void GPU_set_DISPCAPCNT(u32 val)
|
|||
|
||||
static void GPU_RenderLine_layer(GPU *gpu, const u16 l)
|
||||
{
|
||||
CACHE_ALIGN u16 spr[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
CACHE_ALIGN u8 sprAlpha[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
CACHE_ALIGN u8 sprType[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
CACHE_ALIGN u8 sprPrio[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
|
||||
const size_t pixCount = _gpuFramebufferWidth * _gpuDstLineCount[l];
|
||||
|
||||
u16 *dstLine = gpu->currDst;
|
||||
struct _DISPCNT *dispCnt = &(gpu->dispx_st)->dispx_DISPCNT.bits;
|
||||
itemsForPriority_t *item;
|
||||
bool BG_enabled = true;
|
||||
|
||||
gpu->currentFadeInColors = &fadeInColors[gpu->BLDY_EVY][0];
|
||||
gpu->currentFadeOutColors = &fadeOutColors[gpu->BLDY_EVY][0];
|
||||
|
@ -2163,9 +2190,9 @@ PLAIN_CLEAR:
|
|||
memset(gpu->bgPixels, 5, pixCount);
|
||||
|
||||
// init background color & priorities
|
||||
memset(sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
memset(sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
memset(sprPrio, 0xFF, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
memset(gpu->sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
memset(gpu->sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
memset(gpu->sprPrio, 0xFF, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
memset(sprWin, 0, _gpuFramebufferWidth);
|
||||
|
||||
// init pixels priorities
|
||||
|
@ -2179,18 +2206,18 @@ PLAIN_CLEAR:
|
|||
if (gpu->LayersEnable[4])
|
||||
{
|
||||
//n.b. - this is clearing the sprite line buffer to the background color,
|
||||
memset_u16(spr, backdrop_color, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
memset_u16(gpu->sprColor, backdrop_color, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||
|
||||
//zero 06-may-09: I properly supported window color effects for backdrop, but I am not sure
|
||||
//how it interacts with this. I wish we knew why we needed this
|
||||
|
||||
gpu->spriteRender(spr, sprAlpha, sprType, sprPrio);
|
||||
mosaicSpriteLine(gpu, l, spr, sprAlpha, sprType, sprPrio);
|
||||
gpu->spriteRender(gpu->sprColor, gpu->sprAlpha, gpu->sprType, gpu->sprPrio);
|
||||
mosaicSpriteLine(gpu, l, gpu->sprColor, gpu->sprAlpha, gpu->sprType, gpu->sprPrio);
|
||||
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i++)
|
||||
{
|
||||
// assign them to the good priority item
|
||||
const size_t prio = sprPrio[i];
|
||||
const size_t prio = gpu->sprPrio[i];
|
||||
if (prio >= 4) continue;
|
||||
|
||||
item = &(gpu->itemsForPriority[prio]);
|
||||
|
@ -2199,12 +2226,11 @@ PLAIN_CLEAR:
|
|||
}
|
||||
}
|
||||
|
||||
if (!gpu->LayersEnable[0] && !gpu->LayersEnable[1] && !gpu->LayersEnable[2] && !gpu->LayersEnable[3])
|
||||
BG_enabled = false;
|
||||
|
||||
for (size_t j = 0; j < 8; j++)
|
||||
gpu->blend2[j] = (gpu->BLDCNT & (0x100 << j)) != 0;
|
||||
|
||||
const bool BG_enabled = gpu->LayersEnable[0] || gpu->LayersEnable[1] || gpu->LayersEnable[2] || gpu->LayersEnable[3];
|
||||
|
||||
// paint lower priorities first
|
||||
// then higher priorities on top
|
||||
for (size_t prio = NB_PRIORITIES; prio > 0; )
|
||||
|
@ -2225,42 +2251,36 @@ PLAIN_CLEAR:
|
|||
struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[layerNum].bits;
|
||||
gpu->curr_mosaic_enabled = bgCnt->Mosaic_Enable;
|
||||
|
||||
if (gpu->core == GPUCOREID_MAIN)
|
||||
{
|
||||
if (layerNum == 0 && dispCnt->BG0_3D)
|
||||
if (gpu->core == GPUCOREID_MAIN && layerNum == 0 && dispCnt->BG0_3D)
|
||||
{
|
||||
gpu->currBgNum = 0;
|
||||
|
||||
const u16 hofs = (u16)( ((float)gpu->getHOFS(layerNum) * _gpuWidthScale) + 0.5f );
|
||||
u16 *oldDstLine = dstLine;
|
||||
|
||||
for (size_t lineIndex = 0; lineIndex < _gpuDstLineCount[l]; lineIndex++, dstLine += _gpuFramebufferWidth)
|
||||
for (size_t line = 0; line < _gpuDstLineCount[l]; line++)
|
||||
{
|
||||
const FragmentColor *colorLine = gfx3d_GetLineDataRGBA6665(_gpuDstLineIndex[l] + lineIndex);
|
||||
const FragmentColor *srcLine = gfx3d_GetLineDataRGBA6665(_gpuDstLineIndex[l] + line);
|
||||
|
||||
for (size_t k = 0; k < _gpuFramebufferWidth; k++)
|
||||
for (size_t dstX = 0; dstX < _gpuFramebufferWidth; dstX++)
|
||||
{
|
||||
size_t q = k + hofs;
|
||||
if (q >= _gpuFramebufferWidth * 2)
|
||||
size_t srcX = dstX + hofs;
|
||||
if (srcX >= _gpuFramebufferWidth * 2)
|
||||
{
|
||||
q -= _gpuFramebufferWidth * 2;
|
||||
srcX -= _gpuFramebufferWidth * 2;
|
||||
}
|
||||
|
||||
if (q >= _gpuFramebufferWidth || colorLine[q].a == 0)
|
||||
if (srcX >= _gpuFramebufferWidth || srcLine[srcX].a == 0)
|
||||
continue;
|
||||
|
||||
gpu->setFinalColor3d(k,
|
||||
dstLine[k],
|
||||
gpu->bgPixels + (lineIndex * _gpuFramebufferWidth),
|
||||
colorLine[q]);
|
||||
gpu->setFinalColor3d(dstX,
|
||||
dstLine + (line * _gpuFramebufferWidth),
|
||||
gpu->bgPixels + (line * _gpuFramebufferWidth),
|
||||
srcLine[srcX]);
|
||||
}
|
||||
}
|
||||
|
||||
dstLine = oldDstLine;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
//useful for debugging individual layers
|
||||
//if(gpu->core == GPUCOREID_SUB || layerNum != 2) continue;
|
||||
|
@ -2288,13 +2308,12 @@ PLAIN_CLEAR:
|
|||
{
|
||||
for (size_t p = 0; p < _gpuDstPitchCount[x]; p++)
|
||||
{
|
||||
setFinalColorSpr(gpu,
|
||||
gpu->setFinalColorSpr(_gpuDstPitchIndex[x] + p,
|
||||
gpu->currDst + (line * _gpuFramebufferWidth),
|
||||
gpu->bgPixels + (line * _gpuFramebufferWidth),
|
||||
spr[x],
|
||||
sprAlpha[x],
|
||||
sprType[x],
|
||||
_gpuDstPitchIndex[x] + p);
|
||||
gpu->sprColor[x],
|
||||
gpu->sprAlpha[x],
|
||||
gpu->sprType[x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2302,7 +2321,7 @@ PLAIN_CLEAR:
|
|||
}
|
||||
}
|
||||
|
||||
template<bool SKIP> static void GPU_RenderLine_DispCapture(u16 l)
|
||||
template<bool SKIP> static void GPU_RenderLine_DispCapture(const u16 l)
|
||||
{
|
||||
//this macro takes advantage of the fact that there are only two possible values for capx
|
||||
#define CAPCOPY(SRC, DST, SETALPHABIT) \
|
||||
|
@ -2373,7 +2392,7 @@ template<bool SKIP> static void GPU_RenderLine_DispCapture(u16 l)
|
|||
case 0: // Capture screen (BG + OBJ + 3D)
|
||||
{
|
||||
//INFO("Capture screen (BG + OBJ + 3D)\n");
|
||||
const u16 *src = gpu->tempScanline;
|
||||
const u16 *src = gpu->currDst;
|
||||
CAPCOPY(src, cap_dst, true);
|
||||
}
|
||||
break;
|
||||
|
@ -2381,8 +2400,8 @@ template<bool SKIP> static void GPU_RenderLine_DispCapture(u16 l)
|
|||
case 1: // Capture 3D
|
||||
{
|
||||
//INFO("Capture 3D\n");
|
||||
const u16 *colorLine = gfx3d_GetLineDataRGBA5551(l);
|
||||
CAPCOPY(colorLine, cap_dst, false);
|
||||
const u16 *src = gfx3d_GetLineDataRGBA5551(l);
|
||||
CAPCOPY(src, cap_dst, false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2418,7 +2437,7 @@ template<bool SKIP> static void GPU_RenderLine_DispCapture(u16 l)
|
|||
if (gpu->dispCapCnt.srcA == 0)
|
||||
{
|
||||
// Capture screen (BG + OBJ + 3D)
|
||||
srcA = gpu->tempScanline;
|
||||
srcA = gpu->currDst;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2512,10 +2531,35 @@ static INLINE void GPU_RenderLine_MasterBrightness(const GPUMasterBrightMode mod
|
|||
{
|
||||
if (factor < 16)
|
||||
{
|
||||
for (size_t i = 0; i < pixCount; ++i)
|
||||
#ifdef ENABLE_SSE2
|
||||
static size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
static const __m128i colorMask = _mm_set1_epi16(0x7FFF);
|
||||
|
||||
for (size_t i = 0; i < ssePixCount; i += 8)
|
||||
{
|
||||
__m128i dstColor_vec128 = _mm_load_si128((__m128i *)(dstLine + i));
|
||||
dstColor_vec128 = _mm_and_si128(dstColor_vec128, colorMask);
|
||||
|
||||
dstLine[i+7] = fadeInColors[factor][ _mm_extract_epi16(dstColor_vec128, 7) ];
|
||||
dstLine[i+6] = fadeInColors[factor][ _mm_extract_epi16(dstColor_vec128, 6) ];
|
||||
dstLine[i+5] = fadeInColors[factor][ _mm_extract_epi16(dstColor_vec128, 5) ];
|
||||
dstLine[i+4] = fadeInColors[factor][ _mm_extract_epi16(dstColor_vec128, 4) ];
|
||||
dstLine[i+3] = fadeInColors[factor][ _mm_extract_epi16(dstColor_vec128, 3) ];
|
||||
dstLine[i+2] = fadeInColors[factor][ _mm_extract_epi16(dstColor_vec128, 2) ];
|
||||
dstLine[i+1] = fadeInColors[factor][ _mm_extract_epi16(dstColor_vec128, 1) ];
|
||||
dstLine[i+0] = fadeInColors[factor][ _mm_extract_epi16(dstColor_vec128, 0) ];
|
||||
}
|
||||
|
||||
for (size_t i = ssePixCount; i < pixCount; i++)
|
||||
{
|
||||
dstLine[i] = fadeInColors[factor][ dstLine[i] & 0x7FFF ];
|
||||
}
|
||||
#else
|
||||
for (size_t i = 0; i < pixCount; i++)
|
||||
{
|
||||
dstLine[i] = fadeInColors[factor][ dstLine[i] & 0x7FFF ];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2529,10 +2573,35 @@ static INLINE void GPU_RenderLine_MasterBrightness(const GPUMasterBrightMode mod
|
|||
{
|
||||
if (factor < 16)
|
||||
{
|
||||
for (size_t i = 0; i < pixCount; ++i)
|
||||
#ifdef ENABLE_SSE2
|
||||
static size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
static const __m128i colorMask = _mm_set1_epi16(0x7FFF);
|
||||
|
||||
for (size_t i = 0; i < ssePixCount; i += 8)
|
||||
{
|
||||
__m128i dstColor_vec128 = _mm_load_si128((__m128i *)(dstLine + i));
|
||||
dstColor_vec128 = _mm_and_si128(dstColor_vec128, colorMask);
|
||||
|
||||
dstLine[i+7] = fadeOutColors[factor][ _mm_extract_epi16(dstColor_vec128, 7) ];
|
||||
dstLine[i+6] = fadeOutColors[factor][ _mm_extract_epi16(dstColor_vec128, 6) ];
|
||||
dstLine[i+5] = fadeOutColors[factor][ _mm_extract_epi16(dstColor_vec128, 5) ];
|
||||
dstLine[i+4] = fadeOutColors[factor][ _mm_extract_epi16(dstColor_vec128, 4) ];
|
||||
dstLine[i+3] = fadeOutColors[factor][ _mm_extract_epi16(dstColor_vec128, 3) ];
|
||||
dstLine[i+2] = fadeOutColors[factor][ _mm_extract_epi16(dstColor_vec128, 2) ];
|
||||
dstLine[i+1] = fadeOutColors[factor][ _mm_extract_epi16(dstColor_vec128, 1) ];
|
||||
dstLine[i+0] = fadeOutColors[factor][ _mm_extract_epi16(dstColor_vec128, 0) ];
|
||||
}
|
||||
|
||||
for (size_t i = ssePixCount; i < pixCount; i++)
|
||||
{
|
||||
dstLine[i] = fadeOutColors[factor][ dstLine[i] & 0x7FFF ];
|
||||
}
|
||||
#else
|
||||
for (size_t i = 0; i < pixCount; i++)
|
||||
{
|
||||
dstLine[i] = fadeOutColors[factor][ dstLine[i] & 0x7FFF ];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2695,12 +2764,12 @@ void GPU_RenderLine(NDS_Screen *screen, const u16 l, bool skip)
|
|||
if (gpu->dispMode == GPUDisplayMode_Normal)
|
||||
{
|
||||
//optimization: render straight to the output buffer when thats what we are going to end up displaying anyway
|
||||
gpu->tempScanline = gpu->currDst = dstLine;
|
||||
gpu->currDst = dstLine;
|
||||
}
|
||||
else
|
||||
{
|
||||
//otherwise, we need to go to a temp buffer
|
||||
gpu->tempScanline = gpu->currDst = gpu->tempScanlineBuffer;
|
||||
gpu->currDst = gpu->tempScanlineBuffer;
|
||||
}
|
||||
|
||||
GPU_RenderLine_layer(gpu, l);
|
||||
|
@ -2731,13 +2800,15 @@ void GPU_RenderLine(NDS_Screen *screen, const u16 l, bool skip)
|
|||
{
|
||||
const u16 color = LE_TO_LOCAL_16(src[x]);
|
||||
|
||||
for (size_t line = 0; line < _gpuDstLineCount[l]; line++)
|
||||
{
|
||||
for (size_t p = 0; p < _gpuDstPitchCount[x]; p++)
|
||||
{
|
||||
dstLine[(line * _gpuFramebufferWidth) + (_gpuDstPitchIndex[x] + p)] = color;
|
||||
dstLine[_gpuDstPitchIndex[x] + p] = color;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t line = 1; line < dstLineCount; line++)
|
||||
{
|
||||
memcpy(dstLine + (line * _gpuFramebufferWidth), dstLine, _gpuFramebufferWidth * sizeof(u16));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2751,6 +2822,22 @@ void GPU_RenderLine(NDS_Screen *screen, const u16 l, bool skip)
|
|||
{
|
||||
((u32 *)dstLine)[i] = DISP_FIFOrecv() & 0x7FFF7FFF;
|
||||
}
|
||||
|
||||
if (_gpuFramebufferWidth != GPU_FRAMEBUFFER_NATIVE_WIDTH)
|
||||
{
|
||||
for (size_t i = GPU_FRAMEBUFFER_NATIVE_WIDTH - 1; i < GPU_FRAMEBUFFER_NATIVE_WIDTH; i--)
|
||||
{
|
||||
for (size_t p = _gpuDstPitchCount[i] - 1; p < _gpuDstPitchCount[i]; p--)
|
||||
{
|
||||
dstLine[_gpuDstPitchIndex[i] + p] = dstLine[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t line = 1; line < dstLineCount; line++)
|
||||
{
|
||||
memcpy(dstLine + (line * _gpuFramebufferWidth), dstLine, _gpuFramebufferWidth * sizeof(u16));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2875,12 +2962,12 @@ void GPU::refreshAffineStartRegs(const int num, const int xy)
|
|||
return;
|
||||
}
|
||||
|
||||
BGxPARMS *parms = (num == 2) ? &(dispx_st)->dispx_BG2PARMS : &(dispx_st)->dispx_BG3PARMS;
|
||||
BGxPARMS *params = (num == 2) ? &(dispx_st)->dispx_BG2PARMS : &(dispx_st)->dispx_BG3PARMS;
|
||||
|
||||
if (xy == 0)
|
||||
parms->BGxX = affineInfo[num-2].x;
|
||||
params->BGxX = affineInfo[num-2].x;
|
||||
else
|
||||
parms->BGxY = affineInfo[num-2].y;
|
||||
params->BGxY = affineInfo[num-2].y;
|
||||
}
|
||||
|
||||
template<bool MOSAIC> void GPU::modeRender(const size_t layer)
|
||||
|
|
|
@ -621,6 +621,11 @@ struct GPU
|
|||
//this indicates whether this gpu is handling debug tools
|
||||
bool debug;
|
||||
|
||||
CACHE_ALIGN u16 sprColor[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
CACHE_ALIGN u8 sprAlpha[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
CACHE_ALIGN u8 sprType[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
CACHE_ALIGN u8 sprPrio[GPU_FRAMEBUFFER_NATIVE_WIDTH];
|
||||
|
||||
_BGxCNT & bgcnt(int num) { return (dispx_st)->dispx_BGxCNT[num].bits; }
|
||||
_DISPCNT & dispCnt() { return dispx_st->dispx_DISPCNT.bits; }
|
||||
template<bool MOSAIC> void modeRender(const size_t layer);
|
||||
|
@ -706,13 +711,7 @@ struct GPU
|
|||
u16 *currentFadeInColors, *currentFadeOutColors;
|
||||
bool blend2[8];
|
||||
|
||||
//this should be suitably aligned for SSE2 (32bytes)
|
||||
u16 *tempScanlineBuffer;
|
||||
//this is the raw unadjusted pointer
|
||||
u16 *tempScanlineBufferRaw;
|
||||
|
||||
u16 *tempScanline;
|
||||
|
||||
GPUMasterBrightMode MasterBrightMode;
|
||||
u32 MasterBrightFactor;
|
||||
|
||||
|
@ -748,10 +747,13 @@ struct GPU
|
|||
u16 blend(const u16 colA, const u16 colB);
|
||||
|
||||
template<bool BACKDROP, BlendFunc FUNC, bool WINDOW>
|
||||
FORCEINLINE FASTCALL bool _master_setFinalBGColor(const u16 *dstLine, const u8 *bgPixelsLine, u16 &outColor, const size_t dstX);
|
||||
FORCEINLINE FASTCALL bool _master_setFinalBGColor(const size_t dstX, const u16 *dstLine, const u8 *bgPixelsLine, u16 &outColor);
|
||||
|
||||
template<BlendFunc FUNC, bool WINDOW>
|
||||
FORCEINLINE FASTCALL void _master_setFinal3dColor(const size_t dstX, u16 &outDst, u8 *bgPixelsLine, const FragmentColor src);
|
||||
FORCEINLINE FASTCALL void _master_setFinal3dColor(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const FragmentColor src);
|
||||
|
||||
template<BlendFunc FUNC, bool WINDOW>
|
||||
FORCEINLINE FASTCALL void _master_setFinalOBJColor(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const u16 src, const u8 alpha, const u8 type);
|
||||
|
||||
int setFinalColorBck_funcNum;
|
||||
int bgFunc;
|
||||
|
@ -774,9 +776,10 @@ struct GPU
|
|||
}
|
||||
|
||||
|
||||
void setFinalColor3d(const size_t dstX, u16 &outDst, u8 *bgPixelsLine, const FragmentColor src);
|
||||
void setFinalColor3d(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const FragmentColor src);
|
||||
void setFinalColorSpr(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, const u16 src, const u8 alpha, const u8 type);
|
||||
|
||||
template<bool BACKDROP, int FUNCNUM> void setFinalColorBG(u16 *dstLine, u8 *bgPixelsLine, u16 color, const size_t dstX);
|
||||
template<bool BACKDROP, int FUNCNUM> void setFinalColorBG(const size_t dstX, u16 *dstLine, u8 *bgPixelsLine, u16 src);
|
||||
template<bool MOSAIC, bool BACKDROP> FORCEINLINE void __setFinalColorBck(u16 color, const size_t srcX, const bool opaque);
|
||||
template<bool MOSAIC, bool BACKDROP, int FUNCNUM> FORCEINLINE void ___setFinalColorBck(u16 color, const size_t srcX, const bool opaque);
|
||||
|
||||
|
|
|
@ -2504,7 +2504,6 @@ void NDS_Reset()
|
|||
memcpy(&TSCal, firmware->getTouchCalibrate(), sizeof(TSCalInfo));
|
||||
|
||||
Screen_Reset();
|
||||
gfx3d_reset();
|
||||
|
||||
WIFI_Reset();
|
||||
memcpy(FW_Mac, (MMU.fw.data + 0x36), 6);
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "debug.h"
|
||||
#include "gfx3d.h"
|
||||
#include "NDSSystem.h"
|
||||
|
@ -892,64 +893,60 @@ void OpenGLRenderer::SetVersion(unsigned int major, unsigned int minor, unsigned
|
|||
this->versionRevision = revision;
|
||||
}
|
||||
|
||||
#if defined(ENABLE_SSE2) && defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
#if defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
Render3DError OpenGLRenderer::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||
{
|
||||
// Convert from 32-bit BGRA8888 format to 32-bit RGBA6665 reversed format. OpenGL
|
||||
// stores pixels using a flipped Y-coordinate, so this needs to be flipped back
|
||||
// to the DS Y-coordinate.
|
||||
|
||||
if ((this->_framebufferWidth % 4) == 0)
|
||||
{
|
||||
const size_t pixCount = this->_framebufferWidth;
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
for (size_t x = 0; x < this->_framebufferWidth; x+=4, ir+=4, iw+=4)
|
||||
for (size_t x = 0; x < ssePixCount; x+=4, ir+=4, iw+=4)
|
||||
{
|
||||
// Convert to RGBA6665
|
||||
__m128i v = _mm_load_si128((__m128i *)(this->_framebufferColor + ir));
|
||||
v = _mm_srli_epi32(v, 2);
|
||||
__m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + ir));
|
||||
color = _mm_srli_epi32(color, 2);
|
||||
|
||||
__m128i a = _mm_srli_epi32(v, 1); // Special handling for 5-bit alpha
|
||||
__m128i a = _mm_srli_epi32(color, 1); // Special handling for 5-bit alpha
|
||||
a = _mm_and_si128(a, _mm_set1_epi32(0x1F000000));
|
||||
|
||||
v = _mm_and_si128(v, _mm_set1_epi32(0x003F3F3F));
|
||||
v = _mm_or_si128(v, a);
|
||||
v = _mm_shuffle_epi8(v, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA
|
||||
_mm_store_si128((__m128i *)(dstRGBA6665 + iw), v);
|
||||
color = _mm_and_si128(color, _mm_set1_epi32(0x003F3F3F));
|
||||
color = _mm_or_si128(color, a);
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 6, 3, 0, 1, 2)); // Swizzle RGBA to BGRA
|
||||
_mm_store_si128((__m128i *)(dstRGBA6665 + iw), color);
|
||||
|
||||
// Convert to RGBA5551
|
||||
v = _mm_load_si128((__m128i *)(this->_framebufferColor + ir));
|
||||
color = _mm_load_si128((__m128i *)(this->_framebufferColor + ir));
|
||||
|
||||
__m128i b = _mm_and_si128(v, _mm_set1_epi32(0x000000F8)); // Read from R
|
||||
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x000000F8)); // Read from R
|
||||
b = _mm_slli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i g = _mm_and_si128(v, _mm_set1_epi32(0x0000F800)); // Read from G
|
||||
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x0000F800)); // Read from G
|
||||
g = _mm_srli_epi32(g, 6); // Shift in G
|
||||
|
||||
__m128i r = _mm_and_si128(v, _mm_set1_epi32(0x00F80000)); // Read from B
|
||||
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x00F80000)); // Read from B
|
||||
r = _mm_srli_epi32(r, 19); // Shift to R
|
||||
|
||||
a = _mm_and_si128(v, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpgt_epi32(a, _mm_set1_epi32(0x00000000)); // Determine A
|
||||
a = _mm_and_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A
|
||||
|
||||
v = b;
|
||||
v = _mm_add_epi32(v, g);
|
||||
v = _mm_add_epi32(v, r);
|
||||
v = _mm_add_epi32(v, a);
|
||||
color = b;
|
||||
color = _mm_or_si128(color, g);
|
||||
color = _mm_or_si128(color, r);
|
||||
color = _mm_or_si128(color, a);
|
||||
|
||||
// All the colors are currently placed every other 16 bits, so we need to swizzle them
|
||||
// to the lower 64 bits of our vector before we store them back to memory.
|
||||
v = _mm_shuffle_epi8(v, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), v);
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + iw), color);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t y = 0, ir = 0, iw = ((this->_framebufferHeight - 1) * this->_framebufferWidth); y < this->_framebufferHeight; y++, iw -= (this->_framebufferWidth * 2))
|
||||
{
|
||||
for (size_t x = 0; x < this->_framebufferWidth; x++, ir++, iw++)
|
||||
|
||||
for (size_t x = ssePixCount; x < pixCount; x++, ir++, iw++)
|
||||
{
|
||||
dstRGBA6665[iw].color = BGRA8888_32Rev_To_RGBA6665_32Rev(this->_framebufferColor[ir].color);
|
||||
dstRGBA5551[iw] = R5G5B5TORGB15((this->_framebufferColor[ir].b >> 3) & 0x1F,
|
||||
|
@ -958,12 +955,11 @@ Render3DError OpenGLRenderer::FlushFramebuffer(FragmentColor *__restrict dstRGBA
|
|||
((this->_framebufferColor[ir].a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#else // Code path where SSE2, SSSE3, or little-endian is not supported
|
||||
#else // Code path where SSSE3 or little-endian is not supported
|
||||
|
||||
Render3DError OpenGLRenderer::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||
{
|
||||
|
@ -995,7 +991,7 @@ Render3DError OpenGLRenderer::FlushFramebuffer(FragmentColor *__restrict dstRGBA
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#endif // defined(ENABLE_SSE2) && defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
#endif // defined(ENABLE_SSSE3) && defined(LOCAL_LE)
|
||||
|
||||
OpenGLRenderer_1_2::~OpenGLRenderer_1_2()
|
||||
{
|
||||
|
@ -1487,7 +1483,7 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs()
|
|||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthStencilID);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
|
@ -1495,28 +1491,28 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs()
|
|||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8_EXT, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8_EXT, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthID);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIPolyID);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIFogAttrID);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
||||
|
@ -1891,13 +1887,13 @@ Render3DError OpenGLRenderer_1_2::DestroyToonTable()
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError OpenGLRenderer_1_2::UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
Render3DError OpenGLRenderer_1_2::UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
|
||||
if (this->isShaderSupported)
|
||||
{
|
||||
for (size_t i = 0; i < GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT; i++)
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT; i++)
|
||||
{
|
||||
OGLRef.workingCIDepthStencilBuffer[i] = depthBuffer[i] << 8;
|
||||
OGLRef.workingCIDepthBuffer[i] = depthBuffer[i] | 0xFF000000;
|
||||
|
@ -1916,20 +1912,20 @@ Render3DError OpenGLRenderer_1_2::UploadClearImage(const u16 *__restrict colorBu
|
|||
glActiveTextureARB(GL_TEXTURE0_ARB);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIColorID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, colorBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, colorBuffer);
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthStencilID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, OGLRef.workingCIDepthStencilBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, OGLRef.workingCIDepthStencilBuffer);
|
||||
|
||||
if (this->isShaderSupported)
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIDepthBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIDepthBuffer);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIFogAttrID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIFogAttributesBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIFogAttributesBuffer);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIPolyID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIPolyIDBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIPolyIDBuffer);
|
||||
}
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
@ -2371,7 +2367,7 @@ Render3DError OpenGLRenderer_1_2::UpdateToonTable(const u16 *toonTableBuffer)
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
{
|
||||
if (!this->isFBOSupported)
|
||||
{
|
||||
|
@ -2397,22 +2393,22 @@ Render3DError OpenGLRenderer_1_2::ClearUsingImage(const u16 *__restrict colorBuf
|
|||
// Blit the working depth buffer
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT1_EXT);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
|
||||
glBlitFramebufferEXT(0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glBlitFramebufferEXT(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
// Blit the polygon ID buffer
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT2_EXT);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT2_EXT);
|
||||
glBlitFramebufferEXT(0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glBlitFramebufferEXT(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
// Blit the fog buffer
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT3_EXT);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT3_EXT);
|
||||
glBlitFramebufferEXT(0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glBlitFramebufferEXT(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
// Blit the color buffer. Do this last so that color attachment 0 is set to the read FBO.
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
|
||||
glBlitFramebufferEXT(0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST);
|
||||
glBlitFramebufferEXT(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, OGLRef.fboRenderID);
|
||||
glDrawBuffers(4, RenderDrawList);
|
||||
|
@ -2485,7 +2481,7 @@ Render3DError OpenGLRenderer_1_2::ClearUsingValues(const FragmentColor &clearCol
|
|||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT3_EXT); // texGFogAttrID
|
||||
glClearColor((clearAttributes.isFogged) ? 1.0 : 0.0, 0.0, 0.0, 1.0);
|
||||
glClearColor(clearAttributes.isFogged, 0.0, 0.0, 1.0);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
|
||||
glDrawBuffers(4, RenderDrawList);
|
||||
|
@ -2682,8 +2678,8 @@ Render3DError OpenGLRenderer_1_2::SetupTexture(const POLY &thePoly, bool enableT
|
|||
|
||||
Render3DError OpenGLRenderer_1_2::SetupViewport(const u32 viewportValue)
|
||||
{
|
||||
const GLfloat wScalar = this->_framebufferWidth / GFX3D_FRAMEBUFFER_WIDTH;
|
||||
const GLfloat hScalar = this->_framebufferHeight / GFX3D_FRAMEBUFFER_HEIGHT;
|
||||
const GLfloat wScalar = this->_framebufferWidth / GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
const GLfloat hScalar = this->_framebufferHeight / GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
|
||||
VIEWPORT viewport;
|
||||
viewport.decode(viewportValue);
|
||||
|
@ -2782,21 +2778,11 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h)
|
|||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
|
||||
if (w < GFX3D_FRAMEBUFFER_WIDTH || h < GFX3D_FRAMEBUFFER_HEIGHT)
|
||||
if (w < GPU_FRAMEBUFFER_NATIVE_WIDTH || h < GPU_FRAMEBUFFER_NATIVE_HEIGHT)
|
||||
{
|
||||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
this->_framebufferWidth = w;
|
||||
this->_framebufferHeight = h;
|
||||
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
this->_framebufferColor = (FragmentColor *)realloc(this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
|
||||
if (oglrender_framebufferDidResizeCallback != NULL)
|
||||
{
|
||||
oglrender_framebufferDidResizeCallback(w, h);
|
||||
}
|
||||
|
||||
if (this->isFBOSupported)
|
||||
{
|
||||
glActiveTextureARB(GL_TEXTURE0_ARB + OGLTextureUnitID_GColor);
|
||||
|
@ -2836,11 +2822,28 @@ Render3DError OpenGLRenderer_1_2::SetFramebufferSize(size_t w, size_t h)
|
|||
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, maxSamples, GL_DEPTH24_STENCIL8_EXT, w, h);
|
||||
}
|
||||
|
||||
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
FragmentColor *oldFramebufferColor = this->_framebufferColor;
|
||||
FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes);
|
||||
memset(newFramebufferColor, 0, newFramebufferColorSizeBytes);
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, this->_framebufferColorSizeBytes, NULL, GL_STREAM_READ);
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, newFramebufferColorSizeBytes, newFramebufferColor, GL_STREAM_READ);
|
||||
}
|
||||
|
||||
this->_framebufferWidth = w;
|
||||
this->_framebufferHeight = h;
|
||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||
this->_framebufferColor = newFramebufferColor;
|
||||
|
||||
if (oglrender_framebufferDidResizeCallback != NULL)
|
||||
{
|
||||
oglrender_framebufferDidResizeCallback(w, h);
|
||||
}
|
||||
|
||||
free_aligned(oldFramebufferColor);
|
||||
|
||||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
|
@ -2874,13 +2877,13 @@ Render3DError OpenGLRenderer_1_3::UpdateToonTable(const u16 *toonTableBuffer)
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError OpenGLRenderer_1_3::UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
Render3DError OpenGLRenderer_1_3::UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
|
||||
if (this->isShaderSupported)
|
||||
{
|
||||
for (size_t i = 0; i < GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT; i++)
|
||||
for (size_t i = 0; i < GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT; i++)
|
||||
{
|
||||
OGLRef.workingCIDepthStencilBuffer[i] = depthBuffer[i] << 8;
|
||||
OGLRef.workingCIDepthBuffer[i] = depthBuffer[i] | 0xFF000000;
|
||||
|
@ -2899,20 +2902,20 @@ Render3DError OpenGLRenderer_1_3::UploadClearImage(const u16 *__restrict colorBu
|
|||
glActiveTexture(GL_TEXTURE0);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIColorID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, colorBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, colorBuffer);
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthStencilID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, OGLRef.workingCIDepthStencilBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, OGLRef.workingCIDepthStencilBuffer);
|
||||
|
||||
if (this->isShaderSupported)
|
||||
{
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIDepthBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIDepthBuffer);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIFogAttrID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIFogAttributesBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIFogAttributesBuffer);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIPolyID);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIPolyIDBuffer);
|
||||
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, OGLRef.workingCIPolyIDBuffer);
|
||||
}
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
@ -2924,21 +2927,11 @@ Render3DError OpenGLRenderer_1_3::SetFramebufferSize(size_t w, size_t h)
|
|||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
|
||||
if (w < GFX3D_FRAMEBUFFER_WIDTH || h < GFX3D_FRAMEBUFFER_HEIGHT)
|
||||
if (w < GPU_FRAMEBUFFER_NATIVE_WIDTH || h < GPU_FRAMEBUFFER_NATIVE_HEIGHT)
|
||||
{
|
||||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
this->_framebufferWidth = w;
|
||||
this->_framebufferHeight = h;
|
||||
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
this->_framebufferColor = (FragmentColor *)realloc(this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
|
||||
if (oglrender_framebufferDidResizeCallback != NULL)
|
||||
{
|
||||
oglrender_framebufferDidResizeCallback(w, h);
|
||||
}
|
||||
|
||||
if (this->isFBOSupported)
|
||||
{
|
||||
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_GColor);
|
||||
|
@ -2978,11 +2971,28 @@ Render3DError OpenGLRenderer_1_3::SetFramebufferSize(size_t w, size_t h)
|
|||
glRenderbufferStorageMultisampleEXT(GL_RENDERBUFFER_EXT, maxSamples, GL_DEPTH24_STENCIL8_EXT, w, h);
|
||||
}
|
||||
|
||||
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
FragmentColor *oldFramebufferColor = this->_framebufferColor;
|
||||
FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes);
|
||||
memset(newFramebufferColor, 0, newFramebufferColorSizeBytes);
|
||||
|
||||
if (this->isPBOSupported)
|
||||
{
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, this->_framebufferColorSizeBytes, NULL, GL_STREAM_READ);
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER_ARB, newFramebufferColorSizeBytes, newFramebufferColor, GL_STREAM_READ);
|
||||
}
|
||||
|
||||
this->_framebufferWidth = w;
|
||||
this->_framebufferHeight = h;
|
||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||
this->_framebufferColor = newFramebufferColor;
|
||||
|
||||
if (oglrender_framebufferDidResizeCallback != NULL)
|
||||
{
|
||||
oglrender_framebufferDidResizeCallback(w, h);
|
||||
}
|
||||
|
||||
free_aligned(oldFramebufferColor);
|
||||
|
||||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
|
|
|
@ -489,10 +489,10 @@ struct OGLRenderRef
|
|||
// Client-side Buffers
|
||||
GLfloat *color4fBuffer;
|
||||
GLushort *vertIndexBuffer;
|
||||
CACHE_ALIGN GLuint workingCIDepthBuffer[GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT];
|
||||
CACHE_ALIGN GLuint workingCIDepthStencilBuffer[GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT];
|
||||
CACHE_ALIGN GLuint workingCIFogAttributesBuffer[GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT];
|
||||
CACHE_ALIGN GLuint workingCIPolyIDBuffer[GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT];
|
||||
CACHE_ALIGN GLuint workingCIDepthBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN GLuint workingCIDepthStencilBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN GLuint workingCIFogAttributesBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN GLuint workingCIPolyIDBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
|
||||
// Vertex Attributes Pointers
|
||||
GLvoid *vtxPtrPosition;
|
||||
|
@ -552,7 +552,13 @@ FORCEINLINE u32 BGRA8888_32_To_RGBA6665_32(const u32 srcPix);
|
|||
FORCEINLINE u32 BGRA8888_32Rev_To_RGBA6665_32Rev(const u32 srcPix);
|
||||
bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision);
|
||||
|
||||
#if defined(ENABLE_SSSE3)
|
||||
class OpenGLRenderer : public Render3D_SSSE3
|
||||
#elif defined(ENABLE_SSE2)
|
||||
class OpenGLRenderer : public Render3D_SSE2
|
||||
#else
|
||||
class OpenGLRenderer : public Render3D
|
||||
#endif
|
||||
{
|
||||
private:
|
||||
// Driver's OpenGL Version
|
||||
|
@ -608,7 +614,7 @@ protected:
|
|||
virtual Render3DError InitGeometryProgramShaderLocations() = 0;
|
||||
virtual Render3DError CreateToonTable() = 0;
|
||||
virtual Render3DError DestroyToonTable() = 0;
|
||||
virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer) = 0;
|
||||
virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer) = 0;
|
||||
|
||||
virtual void GetExtensionSet(std::set<std::string> *oglExtensionSet) = 0;
|
||||
virtual Render3DError ExpandFreeTextures() = 0;
|
||||
|
@ -665,7 +671,7 @@ protected:
|
|||
|
||||
virtual Render3DError CreateToonTable();
|
||||
virtual Render3DError DestroyToonTable();
|
||||
virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
|
||||
virtual void GetExtensionSet(std::set<std::string> *oglExtensionSet);
|
||||
virtual Render3DError ExpandFreeTextures();
|
||||
|
@ -679,7 +685,7 @@ protected:
|
|||
virtual Render3DError RenderGeometry(const GFX3D_State &renderState, const POLYLIST *polyList, const INDEXLIST *indexList);
|
||||
virtual Render3DError EndRender(const u64 frameCount);
|
||||
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
|
||||
virtual void SetPolygonIndex(const size_t index);
|
||||
|
@ -703,7 +709,7 @@ class OpenGLRenderer_1_3 : public OpenGLRenderer_1_2
|
|||
{
|
||||
protected:
|
||||
virtual Render3DError CreateToonTable();
|
||||
virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
|
||||
public:
|
||||
virtual Render3DError UpdateToonTable(const u16 *toonTableBuffer);
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "debug.h"
|
||||
#include "gfx3d.h"
|
||||
#include "NDSSystem.h"
|
||||
|
@ -660,7 +661,7 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs()
|
|||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthStencilID);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
|
@ -668,28 +669,28 @@ Render3DError OpenGLRenderer_3_2::CreateFBOs()
|
|||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthID);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIPolyID);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texCIFogAttrID);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, NULL);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
||||
|
@ -1332,7 +1333,7 @@ Render3DError OpenGLRenderer_3_2::UpdateToonTable(const u16 *toonTableBuffer)
|
|||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError OpenGLRenderer_3_2::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
Render3DError OpenGLRenderer_3_2::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
|
||||
|
@ -1344,22 +1345,22 @@ Render3DError OpenGLRenderer_3_2::ClearUsingImage(const u16 *__restrict colorBuf
|
|||
// Blit the working depth buffer
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT1);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT1);
|
||||
glBlitFramebuffer(0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glBlitFramebuffer(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
// Blit the polygon ID buffer
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT2);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT2);
|
||||
glBlitFramebuffer(0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glBlitFramebuffer(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
// Blit the fog buffer
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT3);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT3);
|
||||
glBlitFramebuffer(0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
glBlitFramebuffer(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
// Blit the color buffer. Do this last so that color attachment 0 is set to the read FBO.
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT0);
|
||||
glDrawBuffer(GL_COLOR_ATTACHMENT0);
|
||||
glBlitFramebuffer(0, 0, GFX3D_FRAMEBUFFER_WIDTH, GFX3D_FRAMEBUFFER_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, GL_NEAREST);
|
||||
glBlitFramebuffer(0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, 0, this->_framebufferWidth, this->_framebufferHeight, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, GL_NEAREST);
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, OGLRef.fboRenderID);
|
||||
glDrawBuffers(4, RenderDrawList);
|
||||
|
@ -1408,7 +1409,7 @@ Render3DError OpenGLRenderer_3_2::ClearUsingValues(const FragmentColor &clearCol
|
|||
const GLfloat oglColor[4] = {divide5bitBy31_LUT[clearColor.r], divide5bitBy31_LUT[clearColor.g], divide5bitBy31_LUT[clearColor.b], divide5bitBy31_LUT[clearColor.a]};
|
||||
const GLfloat oglDepth[4] = {(GLfloat)(clearAttributes.depth & 0x000000FF)/255.0f, (GLfloat)((clearAttributes.depth >> 8) & 0x000000FF)/255.0f, (GLfloat)((clearAttributes.depth >> 16) & 0x000000FF)/255.0f, 1.0};
|
||||
const GLfloat oglPolyID[4] = {(GLfloat)clearAttributes.opaquePolyID/63.0f, 0.0, 0.0, 1.0};
|
||||
const GLfloat oglFogAttr[4] = {(clearAttributes.isFogged) ? 1.0 : 0.0, 0.0, 0.0, 1.0};
|
||||
const GLfloat oglFogAttr[4] = {clearAttributes.isFogged, 0.0, 0.0, 1.0};
|
||||
|
||||
glClearBufferfi(GL_DEPTH_STENCIL, 0, (GLfloat)clearAttributes.depth / (GLfloat)0x00FFFFFF, 0);
|
||||
glClearBufferfv(GL_COLOR, 0, oglColor); // texGColorID
|
||||
|
@ -1554,21 +1555,11 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
|||
{
|
||||
OGLRenderRef &OGLRef = *this->ref;
|
||||
|
||||
if (w < GFX3D_FRAMEBUFFER_WIDTH || h < GFX3D_FRAMEBUFFER_HEIGHT)
|
||||
if (w < GPU_FRAMEBUFFER_NATIVE_WIDTH || h < GPU_FRAMEBUFFER_NATIVE_HEIGHT)
|
||||
{
|
||||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
||||
this->_framebufferWidth = w;
|
||||
this->_framebufferHeight = h;
|
||||
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
this->_framebufferColor = (FragmentColor *)realloc(this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
|
||||
if (oglrender_framebufferDidResizeCallback != NULL)
|
||||
{
|
||||
oglrender_framebufferDidResizeCallback(w, h);
|
||||
}
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_GColor);
|
||||
glBindTexture(GL_TEXTURE_2D, OGLRef.texGDepthStencilID);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, w, h, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL);
|
||||
|
@ -1605,7 +1596,24 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h)
|
|||
glRenderbufferStorageMultisample(GL_RENDERBUFFER, maxSamples, GL_DEPTH24_STENCIL8, w, h);
|
||||
}
|
||||
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, this->_framebufferColorSizeBytes, NULL, GL_STREAM_READ);
|
||||
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
FragmentColor *oldFramebufferColor = this->_framebufferColor;
|
||||
FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes);
|
||||
memset(newFramebufferColor, 0, newFramebufferColorSizeBytes);
|
||||
|
||||
glBufferData(GL_PIXEL_PACK_BUFFER, newFramebufferColorSizeBytes, newFramebufferColor, GL_STREAM_READ);
|
||||
|
||||
this->_framebufferWidth = w;
|
||||
this->_framebufferHeight = h;
|
||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||
this->_framebufferColor = newFramebufferColor;
|
||||
|
||||
if (oglrender_framebufferDidResizeCallback != NULL)
|
||||
{
|
||||
oglrender_framebufferDidResizeCallback(w, h);
|
||||
}
|
||||
|
||||
free_aligned(oldFramebufferColor);
|
||||
|
||||
return OGLERROR_NOERR;
|
||||
}
|
||||
|
|
|
@ -88,7 +88,7 @@ protected:
|
|||
virtual Render3DError CreateToonTable();
|
||||
virtual Render3DError DestroyToonTable();
|
||||
virtual Render3DError UpdateToonTable(const u16 *toonTableBuffer);
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
|
||||
virtual void SetPolygonIndex(const size_t index);
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
#include <stdarg.h>
|
||||
#include <zlib.h>
|
||||
#include <stdlib.h>
|
||||
#include <map>
|
||||
|
||||
static std::map<void *, void *> _alignedPtrList; // Key: Aligned pointer / Value: Original pointer
|
||||
|
||||
char *trim(char *s, int len)
|
||||
{
|
||||
|
@ -677,3 +680,66 @@ msgBoxInterface msgBoxFake = {
|
|||
};
|
||||
|
||||
msgBoxInterface *msgbox = &msgBoxFake;
|
||||
|
||||
void* malloc_aligned(size_t length, size_t alignment)
|
||||
{
|
||||
const uintptr_t ptrOffset = alignment; // This value must be a power of 2, or this function will fail.
|
||||
const uintptr_t ptrOffsetMask = ~(ptrOffset - 1);
|
||||
|
||||
void *originalPtr = malloc(length + ptrOffset);
|
||||
if (originalPtr == NULL)
|
||||
{
|
||||
return originalPtr;
|
||||
}
|
||||
|
||||
void *alignedPtr = (void *)(((uintptr_t)originalPtr + ptrOffset) & ptrOffsetMask);
|
||||
_alignedPtrList[alignedPtr] = originalPtr;
|
||||
|
||||
return alignedPtr;
|
||||
}
|
||||
|
||||
void* malloc_aligned16(size_t length)
|
||||
{
|
||||
return malloc_aligned(length, 16);
|
||||
}
|
||||
|
||||
void* malloc_aligned32(size_t length)
|
||||
{
|
||||
return malloc_aligned(length, 32);
|
||||
}
|
||||
|
||||
void* malloc_aligned64(size_t length)
|
||||
{
|
||||
return malloc_aligned(length, 64);
|
||||
}
|
||||
|
||||
void* malloc_alignedCacheLine(size_t length)
|
||||
{
|
||||
#if defined(HOST_32)
|
||||
return malloc_aligned32(length);
|
||||
#elif defined(HOST_64)
|
||||
return malloc_aligned64(length);
|
||||
#else
|
||||
return malloc_aligned16(length);
|
||||
#endif
|
||||
}
|
||||
|
||||
void free_aligned(void *ptr)
|
||||
{
|
||||
if (ptr == NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// If the input pointer is aligned through malloc_aligned(),
|
||||
// then retrieve the original pointer first. Otherwise, this
|
||||
// function behaves like the usual free().
|
||||
void *originalPtr = ptr;
|
||||
if (_alignedPtrList.find(ptr) != _alignedPtrList.end())
|
||||
{
|
||||
originalPtr = _alignedPtrList[ptr];
|
||||
_alignedPtrList.erase(ptr);
|
||||
}
|
||||
|
||||
free(originalPtr);
|
||||
}
|
||||
|
|
|
@ -91,4 +91,11 @@ extern int NDS_WritePNG(const char *fname, u16 *data);
|
|||
extern int NDS_WriteBMP(const char *filename, u16 *data);
|
||||
extern int NDS_WriteBMP_32bppBuffer(int width, int height, const void* buf, const char *filename);
|
||||
|
||||
void* malloc_aligned(size_t length, size_t alignment);
|
||||
void* malloc_aligned16(size_t length);
|
||||
void* malloc_aligned32(size_t length);
|
||||
void* malloc_aligned64(size_t length);
|
||||
void* malloc_alignedCacheLine(size_t length);
|
||||
void free_aligned(void *ptr);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <queue>
|
||||
|
||||
#include "armcpu.h"
|
||||
#include "common.h"
|
||||
#include "debug.h"
|
||||
#include "driver.h"
|
||||
#include "emufile.h"
|
||||
|
@ -312,8 +313,6 @@ static float normalTable[1024];
|
|||
// Color buffer that is filled by the 3D renderer and is read by the GPU engine.
|
||||
FragmentColor *gfx3d_colorRGBA6665 = NULL;
|
||||
u16 *gfx3d_colorRGBA5551 = NULL;
|
||||
static size_t gfx3d_framebufferWidth = GFX3D_FRAMEBUFFER_WIDTH;
|
||||
static size_t gfx3d_framebufferHeight = GFX3D_FRAMEBUFFER_HEIGHT;
|
||||
|
||||
// Matrix stack handling
|
||||
CACHE_ALIGN MatrixStack mtxStack[4] = {
|
||||
|
@ -550,7 +549,6 @@ void gfx3d_init()
|
|||
|
||||
makeTables();
|
||||
Render3D_Init();
|
||||
gfx3d_setFramebufferSize(gfx3d_framebufferWidth, gfx3d_framebufferHeight);
|
||||
gfx3d_reset();
|
||||
}
|
||||
|
||||
|
@ -566,10 +564,10 @@ void gfx3d_deinit()
|
|||
vertlists = NULL;
|
||||
vertlist = NULL;
|
||||
|
||||
free(gfx3d_colorRGBA6665);
|
||||
free_aligned(gfx3d_colorRGBA6665);
|
||||
gfx3d_colorRGBA6665 = NULL;
|
||||
|
||||
free(gfx3d_colorRGBA5551);
|
||||
free_aligned(gfx3d_colorRGBA5551);
|
||||
gfx3d_colorRGBA5551 = NULL;
|
||||
}
|
||||
|
||||
|
@ -645,9 +643,6 @@ void gfx3d_reset()
|
|||
last_s = 0;
|
||||
viewport = 0xBFFF0000;
|
||||
|
||||
memset(gfx3d_colorRGBA6665, 0, gfx3d_framebufferWidth * gfx3d_framebufferHeight * sizeof(FragmentColor));
|
||||
memset(gfx3d_colorRGBA5551, 0, gfx3d_framebufferWidth * gfx3d_framebufferHeight * sizeof(u16));
|
||||
|
||||
gfx3d.state.clearDepth = DS_DEPTH15TO24(0x7FFF);
|
||||
|
||||
clInd2 = 0;
|
||||
|
@ -659,38 +654,6 @@ void gfx3d_reset()
|
|||
CurrentRenderer->Reset();
|
||||
}
|
||||
|
||||
size_t gfx3d_getFramebufferWidth()
|
||||
{
|
||||
return gfx3d_framebufferWidth;
|
||||
}
|
||||
|
||||
size_t gfx3d_getFramebufferHeight()
|
||||
{
|
||||
return gfx3d_framebufferHeight;
|
||||
}
|
||||
|
||||
void gfx3d_setFramebufferSize(size_t w, size_t h)
|
||||
{
|
||||
if (w < GFX3D_FRAMEBUFFER_WIDTH || h < GFX3D_FRAMEBUFFER_HEIGHT)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if we're calling this function from initialization.
|
||||
// If we're not initializing, we need to finish rendering first.
|
||||
if (gfx3d_colorRGBA6665 != NULL && gfx3d_colorRGBA5551 != NULL)
|
||||
{
|
||||
CurrentRenderer->RenderFinish();
|
||||
}
|
||||
|
||||
gfx3d_framebufferWidth = w;
|
||||
gfx3d_framebufferHeight = h;
|
||||
gfx3d_colorRGBA6665 = (FragmentColor *)realloc(gfx3d_colorRGBA6665, w * h * sizeof(FragmentColor));
|
||||
gfx3d_colorRGBA5551 = (u16 *)realloc(gfx3d_colorRGBA5551, w * h * sizeof(u16));
|
||||
|
||||
CurrentRenderer->SetFramebufferSize(w, h);
|
||||
}
|
||||
|
||||
//================================================================================= Geometry Engine
|
||||
//=================================================================================
|
||||
//=================================================================================
|
||||
|
@ -2351,7 +2314,7 @@ void gfx3d_VBlankEndSignal(bool skipFrame)
|
|||
|
||||
if (!CommonSettings.showGpu.main)
|
||||
{
|
||||
memset(gfx3d_colorRGBA6665, 0, sizeof(gfx3d_framebufferWidth * gfx3d_framebufferHeight * sizeof(FragmentColor)));
|
||||
memset(gfx3d_colorRGBA6665, 0, sizeof(GPU_GetFramebufferWidth() * GPU_GetFramebufferHeight() * sizeof(FragmentColor)));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2466,13 +2429,13 @@ void gfx3d_glGetLightColor(const size_t index, u32 &dst)
|
|||
const FragmentColor* gfx3d_GetLineDataRGBA6665(const size_t line)
|
||||
{
|
||||
CurrentRenderer->RenderFinish();
|
||||
return (gfx3d_colorRGBA6665 + (line * gfx3d_framebufferWidth));
|
||||
return (gfx3d_colorRGBA6665 + (line * GPU_GetFramebufferWidth()));
|
||||
}
|
||||
|
||||
const u16* gfx3d_GetLineDataRGBA5551(const size_t line)
|
||||
{
|
||||
CurrentRenderer->RenderFinish();
|
||||
return (gfx3d_colorRGBA5551 + (line * gfx3d_framebufferWidth));
|
||||
return (gfx3d_colorRGBA5551 + (line * GPU_GetFramebufferWidth()));
|
||||
}
|
||||
|
||||
|
||||
|
@ -2562,7 +2525,7 @@ SFORMAT SF_GFX3D[]={
|
|||
{ "GTVC", 4, 1, &tempVertInfo.count},
|
||||
{ "GTVM", 4, 4, tempVertInfo.map},
|
||||
{ "GTVF", 4, 1, &tempVertInfo.first},
|
||||
{ "G3CX", 1, 4*GFX3D_FRAMEBUFFER_WIDTH*GFX3D_FRAMEBUFFER_HEIGHT, gfx3d_colorRGBA6665},
|
||||
{ "G3CX", 1, 4*GPU_FRAMEBUFFER_NATIVE_WIDTH*GPU_FRAMEBUFFER_NATIVE_HEIGHT, gfx3d_colorRGBA6665},
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
|
|
|
@ -28,10 +28,6 @@
|
|||
|
||||
class EMUFILE;
|
||||
|
||||
// Pixel dimensions of the NDS 3D framebuffer
|
||||
#define GFX3D_FRAMEBUFFER_WIDTH 256
|
||||
#define GFX3D_FRAMEBUFFER_HEIGHT 192
|
||||
|
||||
//geometry engine command numbers
|
||||
#define GFX3D_NOP 0x00
|
||||
#define GFX3D_MTX_MODE 0x10
|
||||
|
@ -249,10 +245,6 @@ void gfx3d_init();
|
|||
void gfx3d_deinit();
|
||||
void gfx3d_reset();
|
||||
|
||||
size_t gfx3d_getFramebufferWidth();
|
||||
size_t gfx3d_getFramebufferHeight();
|
||||
void gfx3d_setFramebufferSize(size_t w, size_t h);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u8 enableLightFlags;
|
||||
|
|
|
@ -122,39 +122,24 @@ FORCEINLINE s32 s32floor(double d)
|
|||
|
||||
static void memset_u16(void *dst, const u16 val, const size_t length)
|
||||
{
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
|
||||
__m128i *dst_vec128 = (__m128i *)dst;
|
||||
const __m128i val_vec128 = _mm_set1_epi16(val);
|
||||
const size_t length_vec128 = length / (sizeof(val_vec128) / sizeof(val));
|
||||
//MACRODO_N(length_vec128, (dst_vec128[X] = val_vec128));
|
||||
|
||||
for (size_t i = 0; i < length_vec128; i++)
|
||||
dst_vec128[i] = val_vec128;
|
||||
#else
|
||||
const u32 val_u32 = ((u32)val << 16) | (u32)val;
|
||||
__m128 val_vec128; val_vec128.m128_i32[0] = val_u32;
|
||||
const size_t length_vec128 = length / (sizeof(val_vec128) / sizeof(val));
|
||||
//MACRODO_N(length_vec128,_mm_store_si128((__m128i*)((u8*)dst+(X)*16), val_vec128));
|
||||
MACRODO_N(length_vec128, _mm_store_ps1((float*)((u8*)dst+(X)*16), val_vec128));
|
||||
#endif
|
||||
_mm_stream_si128(dst_vec128 + i, val_vec128);
|
||||
}
|
||||
|
||||
static void memset_u32(void *dst, const u32 val, const size_t length)
|
||||
{
|
||||
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
|
||||
__m128i *dst_vec128 = (__m128i *)dst;
|
||||
const __m128i val_vec128 = _mm_set1_epi32(val);
|
||||
const size_t length_vec128 = length / (sizeof(val_vec128) / sizeof(val));
|
||||
//MACRODO_N(length_vec128, (dst_vec128[X] = val_vec128));
|
||||
|
||||
for (size_t i = 0; i < length_vec128; i++)
|
||||
dst_vec128[i] = val_vec128;
|
||||
#else
|
||||
__m128 val_vec128; val_vec128.m128_i32[0] = val;
|
||||
const size_t length_vec128 = length / (sizeof(val_vec128) / sizeof(val));
|
||||
//MACRODO_N(length_vec128,_mm_store_si128((__m128i*)((u8*)dst+(X)*16), val_vec128));
|
||||
MACRODO_N(length_vec128, _mm_store_ps1((float*)((u8*)dst+(X)*16), val_vec128));
|
||||
#endif
|
||||
_mm_stream_si128(dst_vec128 + i, val_vec128);
|
||||
}
|
||||
|
||||
#else //no sse2
|
||||
|
|
|
@ -45,6 +45,14 @@
|
|||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "bits.h"
|
||||
#include "common.h"
|
||||
#include "matrix.h"
|
||||
|
@ -563,30 +571,38 @@ public:
|
|||
}
|
||||
|
||||
template<bool isShadowPolygon>
|
||||
FORCEINLINE void pixel(const PolygonAttributes &polyAttr, FragmentAttributes &dstAttributes, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z)
|
||||
FORCEINLINE void pixel(const PolygonAttributes &polyAttr, const size_t fragmentIndex, FragmentColor &dstColor, float r, float g, float b, float invu, float invv, float w, float z)
|
||||
{
|
||||
FragmentColor srcColor;
|
||||
FragmentColor shaderOutput;
|
||||
bool isOpaquePixel;
|
||||
|
||||
//FragmentColor &dstColor = this->_softRender->GetFramebuffer()[fragmentIndex];
|
||||
u32 &dstAttributeDepth = this->_softRender->_framebufferAttributes->depth[fragmentIndex];
|
||||
u8 &dstAttributeOpaquePolyID = this->_softRender->_framebufferAttributes->opaquePolyID[fragmentIndex];
|
||||
u8 &dstAttributeTranslucentPolyID = this->_softRender->_framebufferAttributes->translucentPolyID[fragmentIndex];
|
||||
u8 &dstAttributeStencil = this->_softRender->_framebufferAttributes->stencil[fragmentIndex];
|
||||
u8 &dstAttributeIsFogged = this->_softRender->_framebufferAttributes->isFogged[fragmentIndex];
|
||||
u8 &dstAttributeIsTranslucentPoly = this->_softRender->_framebufferAttributes->isTranslucentPoly[fragmentIndex];
|
||||
|
||||
// not sure about the w-buffer depth value: this value was chosen to make the skybox, castle window decals, and water level render correctly in SM64
|
||||
// hack: when using z-depth, drop some LSBs so that the overworld map in Dragon Quest IV shows up correctly
|
||||
const u32 depth = (gfx3d.renderState.wbuffer) ? u32floor(4096*w) : DS_DEPTH15TO24( u32floor(z*0x7FFF) ) & 0x00FFFFFC;
|
||||
const u32 newDepth = (gfx3d.renderState.wbuffer) ? u32floor(4096*w) : DS_DEPTH15TO24( u32floor(z*0x7FFF) ) & 0x00FFFFFC;
|
||||
|
||||
// run the depth test
|
||||
if (polyAttr.enableDepthEqualTest)
|
||||
{
|
||||
const u32 minDepth = max<u32>(0x00000000, dstAttributes.depth - SOFTRASTERIZER_DEPTH_EQUAL_TEST_TOLERANCE);
|
||||
const u32 maxDepth = min<u32>(0x00FFFFFF, dstAttributes.depth + SOFTRASTERIZER_DEPTH_EQUAL_TEST_TOLERANCE);
|
||||
const u32 minDepth = max<u32>(0x00000000, dstAttributeDepth - SOFTRASTERIZER_DEPTH_EQUAL_TEST_TOLERANCE);
|
||||
const u32 maxDepth = min<u32>(0x00FFFFFF, dstAttributeDepth + SOFTRASTERIZER_DEPTH_EQUAL_TEST_TOLERANCE);
|
||||
|
||||
if (depth < minDepth || depth > maxDepth)
|
||||
if (newDepth < minDepth || newDepth > maxDepth)
|
||||
{
|
||||
goto depth_fail;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (depth >= dstAttributes.depth)
|
||||
if (newDepth >= dstAttributeDepth)
|
||||
{
|
||||
goto depth_fail;
|
||||
}
|
||||
|
@ -601,7 +617,7 @@ public:
|
|||
}
|
||||
else
|
||||
{
|
||||
if (dstAttributes.stencil == 0)
|
||||
if (dstAttributeStencil == 0)
|
||||
{
|
||||
goto rejected_fragment;
|
||||
}
|
||||
|
@ -609,7 +625,7 @@ public:
|
|||
//shadow polys have a special check here to keep from self-shadowing when user
|
||||
//has tried to prevent it from happening
|
||||
//if this isnt here, then the vehicle select in mariokart will look terrible
|
||||
if (dstAttributes.opaquePolyID == polyAttr.polygonID)
|
||||
if (dstAttributeOpaquePolyID == polyAttr.polygonID)
|
||||
{
|
||||
goto rejected_fragment;
|
||||
}
|
||||
|
@ -643,31 +659,31 @@ public:
|
|||
isOpaquePixel = (shaderOutput.a == 0x1F);
|
||||
if (isOpaquePixel)
|
||||
{
|
||||
dstAttributes.opaquePolyID = polyAttr.polygonID;
|
||||
dstAttributes.isTranslucentPoly = polyAttr.isTranslucent;
|
||||
dstAttributes.isFogged = polyAttr.enableRenderFog;
|
||||
dstAttributeOpaquePolyID = polyAttr.polygonID;
|
||||
dstAttributeIsTranslucentPoly = polyAttr.isTranslucent;
|
||||
dstAttributeIsFogged = polyAttr.enableRenderFog;
|
||||
dstColor = shaderOutput;
|
||||
}
|
||||
else
|
||||
{
|
||||
//dont overwrite pixels on translucent polys with the same polyids
|
||||
if (dstAttributes.translucentPolyID == polyAttr.polygonID)
|
||||
if (dstAttributeTranslucentPolyID == polyAttr.polygonID)
|
||||
goto rejected_fragment;
|
||||
|
||||
//originally we were using a test case of shadows-behind-trees in sm64ds
|
||||
//but, it looks bad in that game. this is actually correct
|
||||
//if this isnt correct, then complex shape cart shadows in mario kart don't work right
|
||||
dstAttributes.translucentPolyID = polyAttr.polygonID;
|
||||
dstAttributeTranslucentPolyID = polyAttr.polygonID;
|
||||
|
||||
//alpha blending and write color
|
||||
alphaBlend(dstColor, shaderOutput);
|
||||
|
||||
dstAttributes.isFogged = (dstAttributes.isFogged && polyAttr.enableRenderFog);
|
||||
dstAttributeIsFogged = (dstAttributeIsFogged && polyAttr.enableRenderFog);
|
||||
}
|
||||
|
||||
//depth writing
|
||||
if (isOpaquePixel || polyAttr.enableAlphaDepthWrite)
|
||||
dstAttributes.depth = depth;
|
||||
dstAttributeDepth = newDepth;
|
||||
|
||||
//shadow cases: (need multi-bit stencil buffer to cope with all of these, especially the mariokart complex shadows)
|
||||
//1. sm64 (standing near signs and blocks)
|
||||
|
@ -678,14 +694,14 @@ public:
|
|||
goto done;
|
||||
depth_fail:
|
||||
if (isShadowPolygon && polyAttr.polygonID == 0)
|
||||
dstAttributes.stencil++;
|
||||
dstAttributeStencil++;
|
||||
|
||||
rejected_fragment:
|
||||
done:
|
||||
;
|
||||
|
||||
if (isShadowPolygon && polyAttr.polygonID != 0 && dstAttributes.stencil)
|
||||
dstAttributes.stencil--;
|
||||
if (isShadowPolygon && polyAttr.polygonID != 0 && dstAttributeStencil)
|
||||
dstAttributeStencil--;
|
||||
}
|
||||
|
||||
//draws a single scanline
|
||||
|
@ -729,7 +745,7 @@ public:
|
|||
(pRight->color[1].curr - color[1]) * invWidth,
|
||||
(pRight->color[2].curr - color[2]) * invWidth };
|
||||
|
||||
int adr = (pLeft->Y*framebufferWidth)+XStart;
|
||||
size_t adr = (pLeft->Y*framebufferWidth)+XStart;
|
||||
|
||||
//CONSIDER: in case some other math is wrong (shouldve been clipped OK), we might go out of bounds here.
|
||||
//better check the Y value.
|
||||
|
@ -766,7 +782,7 @@ public:
|
|||
}
|
||||
if (x+width > framebufferWidth)
|
||||
{
|
||||
if (RENDERER && !lineHack)
|
||||
if (RENDERER && !lineHack && framebufferWidth == GPU_FRAMEBUFFER_NATIVE_WIDTH)
|
||||
{
|
||||
printf("rasterizer rendering at x=%d! oops!\n",x+width-1);
|
||||
return;
|
||||
|
@ -776,7 +792,7 @@ public:
|
|||
|
||||
while (width-- > 0)
|
||||
{
|
||||
pixel<isShadowPolygon>(polyAttr, this->_softRender->_framebufferAttributes[adr], dstColor[adr], color[0], color[1], color[2], u, v, 1.0f/invw, z);
|
||||
pixel<isShadowPolygon>(polyAttr, adr, dstColor[adr], color[0], color[1], color[2], u, v, 1.0f/invw, z);
|
||||
adr++;
|
||||
x++;
|
||||
|
||||
|
@ -1123,14 +1139,26 @@ void _HACK_Viewer_ExecUnit()
|
|||
|
||||
static Render3D* SoftRasterizerRendererCreate()
|
||||
{
|
||||
#if defined(ENABLE_SSSE3)
|
||||
return new SoftRasterizerRenderer_SSSE3;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
return new SoftRasterizerRenderer_SSE2;
|
||||
#else
|
||||
return new SoftRasterizerRenderer;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void SoftRasterizerRendererDestroy()
|
||||
{
|
||||
if (CurrentRenderer != BaseRenderer)
|
||||
{
|
||||
#if defined(ENABLE_SSSE3)
|
||||
delete (SoftRasterizerRenderer_SSSE3 *)CurrentRenderer;
|
||||
#elif defined(ENABLE_SSE2)
|
||||
delete (SoftRasterizerRenderer_SSE2 *)CurrentRenderer;
|
||||
#else
|
||||
delete (SoftRasterizerRenderer *)CurrentRenderer;
|
||||
#endif
|
||||
CurrentRenderer = BaseRenderer;
|
||||
}
|
||||
}
|
||||
|
@ -1226,7 +1254,8 @@ SoftRasterizerRenderer::~SoftRasterizerRenderer()
|
|||
delete[] postprocessParam;
|
||||
postprocessParam = NULL;
|
||||
|
||||
free(_framebufferAttributes);
|
||||
delete _framebufferAttributes;
|
||||
_framebufferAttributes = NULL;
|
||||
}
|
||||
|
||||
Render3DError SoftRasterizerRenderer::InitTables()
|
||||
|
@ -1291,8 +1320,8 @@ size_t SoftRasterizerRenderer::performClipping(const VERTLIST *vertList, const P
|
|||
|
||||
template<bool CUSTOM> void SoftRasterizerRenderer::performViewportTransforms()
|
||||
{
|
||||
const float xfactor = (float)this->_framebufferWidth/(float)GFX3D_FRAMEBUFFER_WIDTH;
|
||||
const float yfactor = (float)this->_framebufferHeight/(float)GFX3D_FRAMEBUFFER_HEIGHT;
|
||||
const float xfactor = (float)this->_framebufferWidth/(float)GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
const float yfactor = (float)this->_framebufferHeight/(float)GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
const float xmax = (float)this->_framebufferWidth-(CUSTOM?0.001f:0); //fudge factor to keep from overrunning render buffers
|
||||
const float ymax = (float)this->_framebufferHeight-(CUSTOM?0.001f:0);
|
||||
|
||||
|
@ -1561,11 +1590,10 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarking(const u16 *colorTable, c
|
|||
{
|
||||
for (size_t x = 0; x < this->_framebufferWidth; x++, i++)
|
||||
{
|
||||
const FragmentAttributes dstAttributes = this->_framebufferAttributes[i];
|
||||
const u8 polyID = dstAttributes.opaquePolyID;
|
||||
const u8 polyID = this->_framebufferAttributes->opaquePolyID[i];
|
||||
|
||||
if (this->edgeMarkDisabled[polyID>>3]) continue;
|
||||
if (dstAttributes.isTranslucentPoly) continue;
|
||||
if (this->_framebufferAttributes->isTranslucentPoly[i] != 0) continue;
|
||||
|
||||
// > is used instead of != to prevent double edges
|
||||
// between overlapping polys of different IDs.
|
||||
|
@ -1575,7 +1603,7 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarking(const u16 *colorTable, c
|
|||
const FragmentColor edgeColor = this->edgeMarkTable[polyID>>3];
|
||||
|
||||
#define PIXOFFSET(dx,dy) ((dx)+(this->_framebufferWidth*(dy)))
|
||||
#define ISEDGE(dx,dy) ((x+(dx) < this->_framebufferWidth) && (y+(dy) < this->_framebufferHeight) && polyID > this->_framebufferAttributes[i+PIXOFFSET(dx,dy)].opaquePolyID)
|
||||
#define ISEDGE(dx,dy) ((x+(dx) < this->_framebufferWidth) && (y+(dy) < this->_framebufferHeight) && polyID > this->_framebufferAttributes->opaquePolyID[i+PIXOFFSET(dx,dy)])
|
||||
#define DRAWEDGE(dx,dy) alphaBlend(_framebufferColor[i+PIXOFFSET(dx,dy)], edgeColor)
|
||||
|
||||
bool upleft = ISEDGE(-1,-1);
|
||||
|
@ -1717,10 +1745,9 @@ Render3DError SoftRasterizerRenderer::RenderFog(const u8 *densityTable, const u3
|
|||
{
|
||||
for (size_t i = 0; i < framebufferFragmentCount; i++)
|
||||
{
|
||||
const FragmentAttributes &destFragment = this->_framebufferAttributes[i];
|
||||
const size_t fogIndex = destFragment.depth >> 9;
|
||||
const size_t fogIndex = this->_framebufferAttributes->depth[i] >> 9;
|
||||
assert(fogIndex < 32768);
|
||||
const u8 fog = (destFragment.isFogged) ? this->fogTable[fogIndex] : 0;
|
||||
const u8 fog = (this->_framebufferAttributes->isFogged[i] != 0) ? this->fogTable[fogIndex] : 0;
|
||||
|
||||
FragmentColor &destFragmentColor = this->_framebufferColor[i];
|
||||
destFragmentColor.r = ((128-fog)*destFragmentColor.r + r*fog)>>7;
|
||||
|
@ -1733,10 +1760,9 @@ Render3DError SoftRasterizerRenderer::RenderFog(const u8 *densityTable, const u3
|
|||
{
|
||||
for (size_t i = 0; i < framebufferFragmentCount; i++)
|
||||
{
|
||||
const FragmentAttributes &destFragment = this->_framebufferAttributes[i];
|
||||
const size_t fogIndex = destFragment.depth >> 9;
|
||||
const size_t fogIndex = this->_framebufferAttributes->depth[i] >> 9;
|
||||
assert(fogIndex < 32768);
|
||||
const u8 fog = (destFragment.isFogged) ? this->fogTable[fogIndex] : 0;
|
||||
const u8 fog = (this->_framebufferAttributes->isFogged[i] != 0) ? this->fogTable[fogIndex] : 0;
|
||||
|
||||
FragmentColor &destFragmentColor = this->_framebufferColor[i];
|
||||
destFragmentColor.a = ((128-fog)*destFragmentColor.a + a*fog)>>7;
|
||||
|
@ -1753,9 +1779,8 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz
|
|||
for (size_t x = 0; x < this->_framebufferWidth; x++, i++)
|
||||
{
|
||||
FragmentColor &dstColor = this->_framebufferColor[i];
|
||||
const FragmentAttributes dstAttributes = this->_framebufferAttributes[i];
|
||||
const u32 depth = dstAttributes.depth;
|
||||
const u8 polyID = dstAttributes.opaquePolyID;
|
||||
const u32 depth = this->_framebufferAttributes->depth[i];
|
||||
const u8 polyID = this->_framebufferAttributes->opaquePolyID[i];
|
||||
|
||||
// TODO: New edge marking algorithm which tests both polyID and depth, but only checks 4 surrounding pixels. Can we keep this one?
|
||||
if (param.enableEdgeMarking)
|
||||
|
@ -1769,15 +1794,19 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz
|
|||
// - the character edges in-level are clearly transparent, and also show well through shield powerups.
|
||||
|
||||
FragmentColor edgeColor = this->edgeMarkTable[polyID>>3];
|
||||
bool right = false;
|
||||
bool down = false;
|
||||
bool left = false;
|
||||
bool upleft = false;
|
||||
bool up = false;
|
||||
bool upright = false;
|
||||
bool left = false;
|
||||
bool right = false;
|
||||
bool downleft = false;
|
||||
bool down = false;
|
||||
bool downright = false;
|
||||
|
||||
#define PIXOFFSET(dx,dy) ((dx)+(this->_framebufferWidth*(dy)))
|
||||
#define ISEDGE(dx,dy) ((x+(dx) < this->_framebufferWidth) && (y+(dy) < this->_framebufferHeight) && polyID != this->_framebufferAttributes[i+PIXOFFSET(dx,dy)].opaquePolyID && depth >= this->_framebufferAttributes[i+PIXOFFSET(dx,dy)].depth)
|
||||
#define ISEDGE(dx,dy) ((x+(dx) < this->_framebufferWidth) && (y+(dy) < this->_framebufferHeight) && polyID != this->_framebufferAttributes->opaquePolyID[i+PIXOFFSET(dx,dy)] && depth >= this->_framebufferAttributes->depth[i+PIXOFFSET(dx,dy)])
|
||||
|
||||
if (this->edgeMarkDisabled[polyID>>3] || dstAttributes.isTranslucentPoly)
|
||||
if (this->edgeMarkDisabled[polyID>>3] || this->_framebufferAttributes->isTranslucentPoly[i] != 0)
|
||||
goto END_EDGE_MARK;
|
||||
|
||||
up = ISEDGE( 0,-1);
|
||||
|
@ -1787,22 +1816,22 @@ Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasteriz
|
|||
|
||||
if (right)
|
||||
{
|
||||
edgeColor = this->edgeMarkTable[this->_framebufferAttributes[i+PIXOFFSET( 1, 0)].opaquePolyID >> 3];
|
||||
edgeColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i+PIXOFFSET( 1, 0)] >> 3];
|
||||
alphaBlend(dstColor, edgeColor);
|
||||
}
|
||||
else if (down)
|
||||
{
|
||||
edgeColor = this->edgeMarkTable[this->_framebufferAttributes[i+PIXOFFSET( 0, 1)].opaquePolyID >> 3];
|
||||
edgeColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i+PIXOFFSET( 0, 1)] >> 3];
|
||||
alphaBlend(dstColor, edgeColor);
|
||||
}
|
||||
else if (left)
|
||||
{
|
||||
edgeColor = this->edgeMarkTable[this->_framebufferAttributes[i+PIXOFFSET(-1, 0)].opaquePolyID >> 3];
|
||||
edgeColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i+PIXOFFSET(-1, 0)] >> 3];
|
||||
alphaBlend(dstColor, edgeColor);
|
||||
}
|
||||
else if (up)
|
||||
{
|
||||
edgeColor = this->edgeMarkTable[this->_framebufferAttributes[i+PIXOFFSET( 0,-1)].opaquePolyID >> 3];
|
||||
edgeColor = this->edgeMarkTable[this->_framebufferAttributes->opaquePolyID[i+PIXOFFSET( 0,-1)] >> 3];
|
||||
alphaBlend(dstColor, edgeColor);
|
||||
}
|
||||
|
||||
|
@ -1822,7 +1851,7 @@ END_EDGE_MARK: ;
|
|||
|
||||
const size_t fogIndex = depth >> 9;
|
||||
assert(fogIndex < 32768);
|
||||
const u8 fog = (dstAttributes.isFogged) ? this->fogTable[fogIndex] : 0;
|
||||
const u8 fog = (this->_framebufferAttributes->isFogged[i] != 0) ? this->fogTable[fogIndex] : 0;
|
||||
|
||||
if (!param.fogAlphaOnly)
|
||||
{
|
||||
|
@ -1858,26 +1887,26 @@ Render3DError SoftRasterizerRenderer::UpdateToonTable(const u16 *toonTableBuffer
|
|||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
{
|
||||
const float lineDecrement = ((float)GFX3D_FRAMEBUFFER_HEIGHT / (float)this->_framebufferHeight) + 0.000001;
|
||||
const float readIncrement = ((float)GFX3D_FRAMEBUFFER_WIDTH / (float)this->_framebufferWidth) + 0.000001;
|
||||
float line = GFX3D_FRAMEBUFFER_HEIGHT - 1.0 + lineDecrement;
|
||||
float readLocation = (GFX3D_FRAMEBUFFER_HEIGHT - 1) * GFX3D_FRAMEBUFFER_WIDTH;
|
||||
const float lineDecrement = ((float)GPU_FRAMEBUFFER_NATIVE_HEIGHT / (float)this->_framebufferHeight) + 0.000001;
|
||||
const float readIncrement = ((float)GPU_FRAMEBUFFER_NATIVE_WIDTH / (float)this->_framebufferWidth) + 0.000001;
|
||||
float line = GPU_FRAMEBUFFER_NATIVE_HEIGHT - 1.0 + lineDecrement;
|
||||
float readLocation = (GPU_FRAMEBUFFER_NATIVE_HEIGHT - 1) * GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
|
||||
// The clear image buffer is y-flipped, so we need to flip it back to normal here.
|
||||
for (size_t y = 0, iw = 0; y < this->_framebufferHeight; y++, readLocation = ((size_t)line * GFX3D_FRAMEBUFFER_WIDTH))
|
||||
for (size_t y = 0, iw = 0; y < this->_framebufferHeight; y++, readLocation = ((size_t)line * GPU_FRAMEBUFFER_NATIVE_WIDTH))
|
||||
{
|
||||
for (size_t x = 0; x < this->_framebufferWidth; x++, iw++, readLocation += readIncrement)
|
||||
{
|
||||
const size_t ir = (size_t)readLocation;
|
||||
this->_framebufferColor[iw].color = RGB15TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F);
|
||||
this->_framebufferAttributes[iw].isFogged = fogBuffer[ir];
|
||||
this->_framebufferAttributes[iw].depth = depthBuffer[ir];
|
||||
this->_framebufferAttributes[iw].opaquePolyID = polyIDBuffer[ir];
|
||||
this->_framebufferAttributes[iw].translucentPolyID = kUnsetTranslucentPolyID;
|
||||
this->_framebufferAttributes[iw].isTranslucentPoly = false;
|
||||
this->_framebufferAttributes[iw].stencil = 0;
|
||||
this->_framebufferAttributes->isFogged[iw] = fogBuffer[ir];
|
||||
this->_framebufferAttributes->depth[iw] = depthBuffer[ir];
|
||||
this->_framebufferAttributes->opaquePolyID[iw] = polyIDBuffer[ir];
|
||||
this->_framebufferAttributes->translucentPolyID[iw] = kUnsetTranslucentPolyID;
|
||||
this->_framebufferAttributes->isTranslucentPoly[iw] = 0;
|
||||
this->_framebufferAttributes->stencil[iw] = 0;
|
||||
}
|
||||
|
||||
line -= lineDecrement;
|
||||
|
@ -1888,15 +1917,14 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
|
|||
|
||||
Render3DError SoftRasterizerRenderer::ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const
|
||||
{
|
||||
FragmentColor convertedClearColor;
|
||||
FragmentColor convertedClearColor = clearColor;
|
||||
convertedClearColor.r = GFX3D_5TO6(clearColor.r);
|
||||
convertedClearColor.g = GFX3D_5TO6(clearColor.g);
|
||||
convertedClearColor.b = GFX3D_5TO6(clearColor.b);
|
||||
convertedClearColor.a = clearColor.a;
|
||||
|
||||
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
|
||||
{
|
||||
this->_framebufferAttributes[i] = clearAttributes;
|
||||
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
|
||||
this->_framebufferColor[i] = convertedClearColor;
|
||||
}
|
||||
|
||||
|
@ -2009,16 +2037,75 @@ Render3DError SoftRasterizerRenderer::RenderFinish()
|
|||
|
||||
Render3DError SoftRasterizerRenderer::SetFramebufferSize(size_t w, size_t h)
|
||||
{
|
||||
if (w < GFX3D_FRAMEBUFFER_WIDTH || h < GFX3D_FRAMEBUFFER_HEIGHT)
|
||||
if (w < GPU_FRAMEBUFFER_NATIVE_WIDTH || h < GPU_FRAMEBUFFER_NATIVE_HEIGHT)
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
FragmentColor *oldFramebufferColor = this->_framebufferColor;
|
||||
FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes);
|
||||
FragmentAttributesBuffer *oldFramebufferAttributes = this->_framebufferAttributes;
|
||||
FragmentAttributesBuffer *newFramebufferAttributes = new FragmentAttributesBuffer(w * h);
|
||||
|
||||
this->_framebufferWidth = w;
|
||||
this->_framebufferHeight = h;
|
||||
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
this->_framebufferColor = (FragmentColor *)realloc(this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
this->_framebufferAttributes = (FragmentAttributes *)realloc(this->_framebufferAttributes, w * h * sizeof(FragmentAttributes));
|
||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||
this->_framebufferColor = newFramebufferColor;
|
||||
this->_framebufferAttributes = newFramebufferAttributes;
|
||||
|
||||
free_aligned(oldFramebufferColor);
|
||||
delete oldFramebufferAttributes;
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
Render3DError SoftRasterizerRenderer_SSE2::ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const
|
||||
{
|
||||
FragmentColor convertedClearColor = clearColor;
|
||||
convertedClearColor.r = GFX3D_5TO6(clearColor.r);
|
||||
convertedClearColor.g = GFX3D_5TO6(clearColor.g);
|
||||
convertedClearColor.b = GFX3D_5TO6(clearColor.b);
|
||||
|
||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
||||
const size_t ssePixCount = pixCount - (pixCount % 16);
|
||||
|
||||
const __m128i color_vec128 = _mm_set1_epi32(convertedClearColor.color);
|
||||
const __m128i attrDepth_vec128 = _mm_set1_epi32(clearAttributes.depth);
|
||||
const __m128i attrOpaquePolyID_vec128 = _mm_set1_epi8(clearAttributes.opaquePolyID);
|
||||
const __m128i attrTranslucentPolyID_vec128 = _mm_set1_epi8(clearAttributes.translucentPolyID);
|
||||
const __m128i attrStencil_vec128 = _mm_set1_epi8(clearAttributes.stencil);
|
||||
const __m128i attrIsFogged_vec128 = _mm_set1_epi8(clearAttributes.isFogged);
|
||||
const __m128i attrIsTranslucentPoly_vec128 = _mm_set1_epi8(clearAttributes.isTranslucentPoly);
|
||||
|
||||
for (size_t i = 0; i < ssePixCount; i += 16)
|
||||
{
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 0), color_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 4), color_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 8), color_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferColor + i + 12), color_vec128);
|
||||
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 0), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 4), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 8), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->depth + i + 12), attrDepth_vec128);
|
||||
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->opaquePolyID + i), attrOpaquePolyID_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->translucentPolyID + i), attrTranslucentPolyID_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->stencil + i), attrStencil_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->isFogged + i), attrIsFogged_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->_framebufferAttributes->isTranslucentPoly + i), attrIsTranslucentPoly_vec128);
|
||||
}
|
||||
|
||||
for (size_t i = ssePixCount; i < pixCount; i++)
|
||||
{
|
||||
this->_framebufferColor[i] = convertedClearColor;
|
||||
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
|
|
|
@ -39,7 +39,13 @@ struct SoftRasterizerPostProcessParams
|
|||
bool fogAlphaOnly;
|
||||
};
|
||||
|
||||
#if defined(ENABLE_SSSE3)
|
||||
class SoftRasterizerRenderer : public Render3D_SSSE3
|
||||
#elif defined(ENABLE_SSE2)
|
||||
class SoftRasterizerRenderer : public Render3D_SSE2
|
||||
#else
|
||||
class SoftRasterizerRenderer : public Render3D
|
||||
#endif
|
||||
{
|
||||
protected:
|
||||
GFX3D_Clipper clipper;
|
||||
|
@ -62,7 +68,7 @@ protected:
|
|||
virtual Render3DError RenderFog(const u8 *densityTable, const u32 color, const u32 offset, const u8 shift, const bool alphaOnly);
|
||||
virtual Render3DError EndRender(const u64 frameCount);
|
||||
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
|
||||
public:
|
||||
|
@ -70,7 +76,7 @@ public:
|
|||
size_t _clippedPolyCount;
|
||||
FragmentColor toonColor32LUT[32];
|
||||
GFX3D_Clipper::TClippedPoly *clippedPolys;
|
||||
FragmentAttributes *_framebufferAttributes;
|
||||
FragmentAttributesBuffer *_framebufferAttributes;
|
||||
TexCacheItem *polyTexKeys[POLYLIST_SIZE];
|
||||
bool polyVisible[POLYLIST_SIZE];
|
||||
bool polyBackfacing[POLYLIST_SIZE];
|
||||
|
@ -96,4 +102,22 @@ public:
|
|||
virtual Render3DError SetFramebufferSize(size_t w, size_t h);
|
||||
};
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
class SoftRasterizerRenderer_SSE2 : public SoftRasterizerRenderer
|
||||
{
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
|
||||
class SoftRasterizerRenderer_SSSE3 : public SoftRasterizerRenderer_SSE2
|
||||
{
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // _RASTERIZE_H_
|
||||
|
|
|
@ -20,11 +20,21 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include "bits.h"
|
||||
#include "common.h"
|
||||
#include "gfx3d.h"
|
||||
#include "MMU.h"
|
||||
#include "texcache.h"
|
||||
|
||||
|
||||
static CACHE_ALIGN u32 dsDepthToD24_LUT[32768] = {0};
|
||||
int cur3DCore = GPU3D_NULL;
|
||||
|
||||
|
@ -85,7 +95,7 @@ bool NDS_3D_ChangeCore(int newCore)
|
|||
return result;
|
||||
}
|
||||
|
||||
Render3DError error = newRenderer->SetFramebufferSize(gfx3d_getFramebufferWidth(), gfx3d_getFramebufferHeight());
|
||||
Render3DError error = newRenderer->SetFramebufferSize(GPU_GetFramebufferWidth(), GPU_GetFramebufferHeight());
|
||||
if (error != RENDER3DERROR_NOERR)
|
||||
{
|
||||
return result;
|
||||
|
@ -114,6 +124,76 @@ void Render3DBaseDestroy()
|
|||
}
|
||||
}
|
||||
|
||||
FragmentAttributesBuffer::FragmentAttributesBuffer(size_t newCount)
|
||||
{
|
||||
count = newCount;
|
||||
|
||||
depth = (u32 *)malloc_alignedCacheLine(count * sizeof(u32));
|
||||
opaquePolyID = (u8 *)malloc_alignedCacheLine(count * sizeof(u8));
|
||||
translucentPolyID = (u8 *)malloc_alignedCacheLine(count * sizeof(u8));
|
||||
stencil = (u8 *)malloc_alignedCacheLine(count * sizeof(u8));
|
||||
isFogged = (u8 *)malloc_alignedCacheLine(count * sizeof(u8));
|
||||
isTranslucentPoly = (u8 *)malloc_alignedCacheLine(count * sizeof(u8));
|
||||
}
|
||||
|
||||
FragmentAttributesBuffer::~FragmentAttributesBuffer()
|
||||
{
|
||||
free_aligned(depth);
|
||||
free_aligned(opaquePolyID);
|
||||
free_aligned(translucentPolyID);
|
||||
free_aligned(stencil);
|
||||
free_aligned(isFogged);
|
||||
free_aligned(isTranslucentPoly);
|
||||
}
|
||||
|
||||
void FragmentAttributesBuffer::SetAtIndex(const size_t index, const FragmentAttributes &attr)
|
||||
{
|
||||
this->depth[index] = attr.depth;
|
||||
this->opaquePolyID[index] = attr.opaquePolyID;
|
||||
this->translucentPolyID[index] = attr.translucentPolyID;
|
||||
this->stencil[index] = attr.stencil;
|
||||
this->isFogged[index] = attr.isFogged;
|
||||
this->isTranslucentPoly[index] = attr.isTranslucentPoly;
|
||||
}
|
||||
|
||||
void FragmentAttributesBuffer::SetAll(const FragmentAttributes &attr)
|
||||
{
|
||||
#ifdef ENABLE_SSE2
|
||||
const size_t sseCount = count - (count % 16);
|
||||
|
||||
const __m128i attrDepth_vec128 = _mm_set1_epi32(attr.depth);
|
||||
const __m128i attrOpaquePolyID_vec128 = _mm_set1_epi8(attr.opaquePolyID);
|
||||
const __m128i attrTranslucentPolyID_vec128 = _mm_set1_epi8(attr.translucentPolyID);
|
||||
const __m128i attrStencil_vec128 = _mm_set1_epi8(attr.stencil);
|
||||
const __m128i attrIsFogged_vec128 = _mm_set1_epi8(attr.isFogged);
|
||||
const __m128i attrIsTranslucentPoly_vec128 = _mm_set1_epi8(attr.isTranslucentPoly);
|
||||
|
||||
for (size_t i = 0; i < sseCount; i += 16)
|
||||
{
|
||||
_mm_stream_si128((__m128i *)(this->depth + 0), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->depth + 4), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->depth + 8), attrDepth_vec128);
|
||||
_mm_stream_si128((__m128i *)(this->depth + 12), attrDepth_vec128);
|
||||
|
||||
_mm_stream_si128((__m128i *)this->opaquePolyID, attrOpaquePolyID_vec128);
|
||||
_mm_stream_si128((__m128i *)this->translucentPolyID, attrTranslucentPolyID_vec128);
|
||||
_mm_stream_si128((__m128i *)this->stencil, attrStencil_vec128);
|
||||
_mm_stream_si128((__m128i *)this->isFogged, attrIsFogged_vec128);
|
||||
_mm_stream_si128((__m128i *)this->isTranslucentPoly, attrIsTranslucentPoly_vec128);
|
||||
}
|
||||
|
||||
for (size_t i = sseCount; i < count; i++)
|
||||
{
|
||||
this->SetAtIndex(i, attr);
|
||||
}
|
||||
#else
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
this->SetAtIndex(i, attr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Render3D::Render3D()
|
||||
{
|
||||
_renderID = RENDERID_NULL;
|
||||
|
@ -131,8 +211,8 @@ Render3D::Render3D()
|
|||
needTableInit = false;
|
||||
}
|
||||
|
||||
_framebufferWidth = GFX3D_FRAMEBUFFER_WIDTH;
|
||||
_framebufferHeight = GFX3D_FRAMEBUFFER_HEIGHT;
|
||||
_framebufferWidth = GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
_framebufferHeight = GPU_FRAMEBUFFER_NATIVE_HEIGHT;
|
||||
_framebufferColorSizeBytes = 0;
|
||||
_framebufferColor = NULL;
|
||||
|
||||
|
@ -141,7 +221,7 @@ Render3D::Render3D()
|
|||
|
||||
Render3D::~Render3D()
|
||||
{
|
||||
free(_framebufferColor);
|
||||
free_aligned(_framebufferColor);
|
||||
TexCache_Reset();
|
||||
}
|
||||
|
||||
|
@ -172,15 +252,21 @@ size_t Render3D::GetFramebufferHeight()
|
|||
|
||||
Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
|
||||
{
|
||||
if (w < GFX3D_FRAMEBUFFER_WIDTH || h < GFX3D_FRAMEBUFFER_HEIGHT)
|
||||
if (w < GPU_FRAMEBUFFER_NATIVE_WIDTH || h < GPU_FRAMEBUFFER_NATIVE_HEIGHT)
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
const size_t newFramebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
FragmentColor *oldFramebufferColor = this->_framebufferColor;
|
||||
FragmentColor *newFramebufferColor = (FragmentColor *)malloc_alignedCacheLine(newFramebufferColorSizeBytes);
|
||||
|
||||
this->_framebufferWidth = w;
|
||||
this->_framebufferHeight = h;
|
||||
this->_framebufferColorSizeBytes = w * h * sizeof(FragmentColor);
|
||||
this->_framebufferColor = (FragmentColor *)realloc(this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
this->_framebufferColorSizeBytes = newFramebufferColorSizeBytes;
|
||||
this->_framebufferColor = newFramebufferColor;
|
||||
|
||||
free_aligned(oldFramebufferColor);
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
@ -214,6 +300,7 @@ Render3DError Render3D::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665,
|
|||
{
|
||||
memcpy(dstRGBA6665, this->_framebufferColor, this->_framebufferColorSizeBytes);
|
||||
|
||||
// Convert to RGBA5551
|
||||
for (size_t i = 0; i < (this->_framebufferWidth * this->_framebufferHeight); i++)
|
||||
{
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||
|
@ -245,7 +332,7 @@ Render3DError Render3D::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
clearFragment.translucentPolyID = kUnsetTranslucentPolyID;
|
||||
clearFragment.depth = renderState.clearDepth;
|
||||
clearFragment.stencil = 0;
|
||||
clearFragment.isTranslucentPoly = false;
|
||||
clearFragment.isTranslucentPoly = 0;
|
||||
clearFragment.isFogged = BIT15(renderState.clearColor);
|
||||
|
||||
if (renderState.enableClearImage)
|
||||
|
@ -258,33 +345,33 @@ Render3DError Render3D::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
const u8 xScroll = scrollBits & 0xFF;
|
||||
const u8 yScroll = (scrollBits >> 8) & 0xFF;
|
||||
|
||||
size_t dd = (GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT) - GFX3D_FRAMEBUFFER_WIDTH;
|
||||
size_t dstIndex = (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT) - GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
|
||||
for (size_t iy = 0; iy < GFX3D_FRAMEBUFFER_HEIGHT; iy++)
|
||||
for (size_t iy = 0; iy < GPU_FRAMEBUFFER_NATIVE_HEIGHT; iy++)
|
||||
{
|
||||
const size_t y = ((iy + yScroll) & 0xFF) << 8;
|
||||
|
||||
for (size_t ix = 0; ix < GFX3D_FRAMEBUFFER_WIDTH; ix++)
|
||||
for (size_t ix = 0; ix < GPU_FRAMEBUFFER_NATIVE_WIDTH; ix++)
|
||||
{
|
||||
const size_t x = (ix + xScroll) & 0xFF;
|
||||
const size_t adr = y + x;
|
||||
const size_t srcIndex = y | x;
|
||||
|
||||
//this is tested by harry potter and the order of the phoenix.
|
||||
//TODO (optimization) dont do this if we are mapped to blank memory (such as in sonic chronicles)
|
||||
//(or use a special zero fill in the bulk clearing above)
|
||||
this->clearImageColor16Buffer[dd] = clearColorBuffer[adr];
|
||||
this->clearImageColor16Buffer[dstIndex] = clearColorBuffer[srcIndex];
|
||||
|
||||
//this is tested quite well in the sonic chronicles main map mode
|
||||
//where depth values are used for trees etc you can walk behind
|
||||
this->clearImageDepthBuffer[dd] = dsDepthToD24_LUT[clearDepthBuffer[adr] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex] = dsDepthToD24_LUT[clearDepthBuffer[srcIndex] & 0x7FFF];
|
||||
|
||||
this->clearImageFogBuffer[dd] = BIT15(clearDepthBuffer[adr]);
|
||||
this->clearImagePolyIDBuffer[dd] = clearFragment.opaquePolyID;
|
||||
this->clearImageFogBuffer[dstIndex] = BIT15(clearDepthBuffer[srcIndex]);
|
||||
this->clearImagePolyIDBuffer[dstIndex] = clearFragment.opaquePolyID;
|
||||
|
||||
dd++;
|
||||
dstIndex++;
|
||||
}
|
||||
|
||||
dd -= GFX3D_FRAMEBUFFER_WIDTH * 2;
|
||||
dstIndex -= GPU_FRAMEBUFFER_NATIVE_WIDTH * 2;
|
||||
}
|
||||
|
||||
error = this->ClearUsingImage(this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
|
@ -301,7 +388,7 @@ Render3DError Render3D::ClearFramebuffer(const GFX3D_State &renderState)
|
|||
return error;
|
||||
}
|
||||
|
||||
Render3DError Render3D::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
Render3DError Render3D::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer)
|
||||
{
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
@ -384,3 +471,311 @@ Render3DError Render3D::VramReconfigureSignal()
|
|||
TexCache_Invalidate();
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
||||
{
|
||||
Render3DError error = RENDER3DERROR_NOERR;
|
||||
|
||||
FragmentColor clearColor;
|
||||
clearColor.r = renderState.clearColor & 0x1F;
|
||||
clearColor.g = (renderState.clearColor >> 5) & 0x1F;
|
||||
clearColor.b = (renderState.clearColor >> 10) & 0x1F;
|
||||
clearColor.a = (renderState.clearColor >> 16) & 0x1F;
|
||||
|
||||
FragmentAttributes clearFragment;
|
||||
clearFragment.opaquePolyID = (renderState.clearColor >> 24) & 0x3F;
|
||||
//special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display
|
||||
//I am not sure whether it is right, though. previously this was cleared to 0, as a guess,
|
||||
//but in spiderman2 some fires with polyid 0 try to render on top of the background
|
||||
clearFragment.translucentPolyID = kUnsetTranslucentPolyID;
|
||||
clearFragment.depth = renderState.clearDepth;
|
||||
clearFragment.stencil = 0;
|
||||
clearFragment.isTranslucentPoly = 0;
|
||||
clearFragment.isFogged = BIT15(renderState.clearColor);
|
||||
|
||||
if (renderState.enableClearImage)
|
||||
{
|
||||
//the lion, the witch, and the wardrobe (thats book 1, suck it you new-school numberers)
|
||||
//uses the scroll registers in the main game engine
|
||||
const u16 *__restrict clearColorBuffer = (u16 *__restrict)MMU.texInfo.textureSlotAddr[2];
|
||||
const u16 *__restrict clearDepthBuffer = (u16 *__restrict)MMU.texInfo.textureSlotAddr[3];
|
||||
const u16 scrollBits = T1ReadWord(MMU.ARM9_REG, 0x356); //CLRIMAGE_OFFSET
|
||||
const u8 xScroll = scrollBits & 0xFF;
|
||||
const u8 yScroll = (scrollBits >> 8) & 0xFF;
|
||||
|
||||
size_t dstIndex = (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT) - GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
|
||||
static const __m128i addrOffset = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
static const __m128i addrRolloverMask = _mm_set1_epi16(0x00FF);
|
||||
const __m128i opaquePolyID_vec128 = _mm_set1_epi8(clearFragment.opaquePolyID);
|
||||
|
||||
for (size_t iy = 0; iy < GPU_FRAMEBUFFER_NATIVE_HEIGHT; iy++)
|
||||
{
|
||||
const size_t y = ((iy + yScroll) & 0xFF) << 8;
|
||||
__m128i y_vec128 = _mm_set1_epi16(y);
|
||||
|
||||
for (size_t ix = 0; ix < GPU_FRAMEBUFFER_NATIVE_WIDTH; ix += 8)
|
||||
{
|
||||
__m128i addr_vec128 = _mm_set1_epi16(ix + xScroll);
|
||||
addr_vec128 = _mm_add_epi16(addr_vec128, addrOffset);
|
||||
addr_vec128 = _mm_and_si128(addr_vec128, addrRolloverMask);
|
||||
addr_vec128 = _mm_or_si128(addr_vec128, y_vec128);
|
||||
|
||||
this->clearImageColor16Buffer[dstIndex+7] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 7)];
|
||||
this->clearImageColor16Buffer[dstIndex+6] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 6)];
|
||||
this->clearImageColor16Buffer[dstIndex+5] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 5)];
|
||||
this->clearImageColor16Buffer[dstIndex+4] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 4)];
|
||||
this->clearImageColor16Buffer[dstIndex+3] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 3)];
|
||||
this->clearImageColor16Buffer[dstIndex+2] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 2)];
|
||||
this->clearImageColor16Buffer[dstIndex+1] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 1)];
|
||||
this->clearImageColor16Buffer[dstIndex+0] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 0)];
|
||||
|
||||
this->clearImageDepthBuffer[dstIndex+7] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 7)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+6] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 6)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+5] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 5)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+4] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 4)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+3] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 3)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+2] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 2)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+1] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 1)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+0] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 0)] & 0x7FFF];
|
||||
|
||||
this->clearImageFogBuffer[dstIndex+7] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 7)] );
|
||||
this->clearImageFogBuffer[dstIndex+6] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 6)] );
|
||||
this->clearImageFogBuffer[dstIndex+5] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 5)] );
|
||||
this->clearImageFogBuffer[dstIndex+4] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 4)] );
|
||||
this->clearImageFogBuffer[dstIndex+3] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 3)] );
|
||||
this->clearImageFogBuffer[dstIndex+2] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 2)] );
|
||||
this->clearImageFogBuffer[dstIndex+1] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 1)] );
|
||||
this->clearImageFogBuffer[dstIndex+0] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 0)] );
|
||||
|
||||
_mm_storel_epi64((__m128i *)(this->clearImagePolyIDBuffer + dstIndex), opaquePolyID_vec128);
|
||||
|
||||
dstIndex += 8;
|
||||
}
|
||||
|
||||
dstIndex -= GPU_FRAMEBUFFER_NATIVE_WIDTH * 2;
|
||||
}
|
||||
|
||||
error = this->ClearUsingImage(this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
if (error != RENDER3DERROR_NOERR)
|
||||
{
|
||||
error = this->ClearUsingValues(clearColor, clearFragment);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = this->ClearUsingValues(clearColor, clearFragment);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // ENABLE_SSE2
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
|
||||
Render3DError Render3D_SSSE3::FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551)
|
||||
{
|
||||
// Convert to RGBA5551
|
||||
const size_t pixCount = this->_framebufferWidth * this->_framebufferHeight;
|
||||
const size_t ssePixCount = pixCount - (pixCount % 4);
|
||||
|
||||
for (size_t i = 0; i < ssePixCount; i += 4)
|
||||
{
|
||||
__m128i color = _mm_load_si128((__m128i *)(this->_framebufferColor + i));
|
||||
_mm_store_si128((__m128i *)(dstRGBA6665 + i), color);
|
||||
|
||||
__m128i r = _mm_and_si128(color, _mm_set1_epi32(0x0000003E)); // Read from R
|
||||
r = _mm_srli_epi32(r, 1); // Shift to R
|
||||
|
||||
__m128i g = _mm_and_si128(color, _mm_set1_epi32(0x00003E00)); // Read from G
|
||||
g = _mm_srli_epi32(g, 4); // Shift in G
|
||||
|
||||
__m128i b = _mm_and_si128(color, _mm_set1_epi32(0x003E0000)); // Read from B
|
||||
b = _mm_srli_epi32(b, 7); // Shift to B
|
||||
|
||||
__m128i a = _mm_and_si128(color, _mm_set1_epi32(0xFF000000)); // Read from A
|
||||
a = _mm_cmpgt_epi32(a, _mm_set1_epi32(0x00000000)); // Determine A
|
||||
a = _mm_and_si128(a, _mm_set1_epi32(0x00008000)); // Mask to A
|
||||
|
||||
color = r;
|
||||
color = _mm_or_si128(color, g);
|
||||
color = _mm_or_si128(color, b);
|
||||
color = _mm_or_si128(color, a);
|
||||
|
||||
// All the colors are currently placed every other 16 bits, so we need to swizzle them
|
||||
// to the lower 64 bits of our vector before we store them back to memory.
|
||||
color = _mm_shuffle_epi8(color, _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0));
|
||||
_mm_storel_epi64((__m128i *)(dstRGBA5551 + i), color);
|
||||
}
|
||||
|
||||
for (size_t i = ssePixCount; i < pixCount; i++)
|
||||
{
|
||||
dstRGBA5551[i] = R6G6B6TORGB15(this->_framebufferColor[i].r, this->_framebufferColor[i].g, this->_framebufferColor[i].b) | ((this->_framebufferColor[i].a == 0) ? 0x0000 : 0x8000);
|
||||
}
|
||||
|
||||
return RENDER3DERROR_NOERR;
|
||||
}
|
||||
|
||||
Render3DError Render3D_SSSE3::ClearFramebuffer(const GFX3D_State &renderState)
|
||||
{
|
||||
Render3DError error = RENDER3DERROR_NOERR;
|
||||
|
||||
FragmentColor clearColor;
|
||||
clearColor.r = renderState.clearColor & 0x1F;
|
||||
clearColor.g = (renderState.clearColor >> 5) & 0x1F;
|
||||
clearColor.b = (renderState.clearColor >> 10) & 0x1F;
|
||||
clearColor.a = (renderState.clearColor >> 16) & 0x1F;
|
||||
|
||||
FragmentAttributes clearFragment;
|
||||
clearFragment.opaquePolyID = (renderState.clearColor >> 24) & 0x3F;
|
||||
//special value for uninitialized translucent polyid. without this, fires in spiderman2 dont display
|
||||
//I am not sure whether it is right, though. previously this was cleared to 0, as a guess,
|
||||
//but in spiderman2 some fires with polyid 0 try to render on top of the background
|
||||
clearFragment.translucentPolyID = kUnsetTranslucentPolyID;
|
||||
clearFragment.depth = renderState.clearDepth;
|
||||
clearFragment.stencil = 0;
|
||||
clearFragment.isTranslucentPoly = 0;
|
||||
clearFragment.isFogged = BIT15(renderState.clearColor);
|
||||
|
||||
if (renderState.enableClearImage)
|
||||
{
|
||||
//the lion, the witch, and the wardrobe (thats book 1, suck it you new-school numberers)
|
||||
//uses the scroll registers in the main game engine
|
||||
const u16 *__restrict clearColorBuffer = (u16 *__restrict)MMU.texInfo.textureSlotAddr[2];
|
||||
const u16 *__restrict clearDepthBuffer = (u16 *__restrict)MMU.texInfo.textureSlotAddr[3];
|
||||
const u16 scrollBits = T1ReadWord(MMU.ARM9_REG, 0x356); //CLRIMAGE_OFFSET
|
||||
const u8 xScroll = scrollBits & 0xFF;
|
||||
const u8 yScroll = (scrollBits >> 8) & 0xFF;
|
||||
|
||||
size_t dstIndex = (GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT) - GPU_FRAMEBUFFER_NATIVE_WIDTH;
|
||||
|
||||
if (xScroll == 0 && yScroll == 0)
|
||||
{
|
||||
const __m128i opaquePolyID_vec128 = _mm_set1_epi8(clearFragment.opaquePolyID);
|
||||
|
||||
for (size_t iy = 0; iy < GPU_FRAMEBUFFER_NATIVE_HEIGHT; iy++)
|
||||
{
|
||||
for (size_t ix = 0; ix < GPU_FRAMEBUFFER_NATIVE_WIDTH; ix += 16)
|
||||
{
|
||||
static const __m128i depthBitMask_vec128 = _mm_set1_epi16(0x7FFF);
|
||||
static const __m128i fogBufferBitMask_vec128 = _mm_set1_epi16(BIT(15));
|
||||
const u16 addr = (iy << 8) | ix;
|
||||
|
||||
_mm_store_si128((__m128i *)(this->clearImageColor16Buffer + dstIndex + 8), *(__m128i *)(clearColorBuffer + addr + 8));
|
||||
_mm_store_si128((__m128i *)(this->clearImageColor16Buffer + dstIndex), *(__m128i *)(clearColorBuffer + addr));
|
||||
|
||||
__m128i clearDepth_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + addr + 8));
|
||||
clearDepth_vec128 = _mm_and_si128(clearDepth_vec128, depthBitMask_vec128);
|
||||
this->clearImageDepthBuffer[dstIndex+15] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 7)];
|
||||
this->clearImageDepthBuffer[dstIndex+14] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 6)];
|
||||
this->clearImageDepthBuffer[dstIndex+13] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 5)];
|
||||
this->clearImageDepthBuffer[dstIndex+12] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 4)];
|
||||
this->clearImageDepthBuffer[dstIndex+11] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 3)];
|
||||
this->clearImageDepthBuffer[dstIndex+10] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 2)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 9] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 1)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 8] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 0)];
|
||||
|
||||
clearDepth_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + addr));
|
||||
clearDepth_vec128 = _mm_and_si128(clearDepth_vec128, depthBitMask_vec128);
|
||||
this->clearImageDepthBuffer[dstIndex+ 7] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 7)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 6] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 6)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 5] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 5)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 4] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 4)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 3] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 3)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 2] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 2)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 1] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 1)];
|
||||
this->clearImageDepthBuffer[dstIndex+ 0] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepth_vec128, 0)];
|
||||
|
||||
clearDepth_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + addr + 8));
|
||||
clearDepth_vec128 = _mm_and_si128(clearDepth_vec128, fogBufferBitMask_vec128);
|
||||
clearDepth_vec128 = _mm_srli_epi16(clearDepth_vec128, 15);
|
||||
|
||||
__m128 clearDepthFogBit_vec128 = _mm_shuffle_epi8(clearDepth_vec128, _mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0));
|
||||
|
||||
clearDepth_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + addr));
|
||||
clearDepth_vec128 = _mm_and_si128(clearDepth_vec128, fogBufferBitMask_vec128);
|
||||
clearDepth_vec128 = _mm_srli_epi16(clearDepth_vec128, 15);
|
||||
|
||||
clearDepth_vec128 = _mm_shuffle_epi8(clearDepth_vec128, _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 15, 13, 11, 9, 7, 5, 3, 1));
|
||||
|
||||
_mm_store_si128((__m128i *)(this->clearImageFogBuffer + dstIndex), _mm_or_si128(clearDepth_vec128, clearDepthFogBit_vec128));
|
||||
_mm_store_si128((__m128i *)(this->clearImagePolyIDBuffer + dstIndex), opaquePolyID_vec128);
|
||||
|
||||
dstIndex += 16;
|
||||
}
|
||||
|
||||
dstIndex -= GPU_FRAMEBUFFER_NATIVE_WIDTH * 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
static const __m128i addrOffset = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
static const __m128i addrRolloverMask = _mm_set1_epi16(0x00FF);
|
||||
const __m128i opaquePolyID_vec128 = _mm_set1_epi8(clearFragment.opaquePolyID);
|
||||
|
||||
for (size_t iy = 0; iy < GPU_FRAMEBUFFER_NATIVE_HEIGHT; iy++)
|
||||
{
|
||||
const size_t y = ((iy + yScroll) & 0xFF) << 8;
|
||||
__m128i y_vec128 = _mm_set1_epi16(y);
|
||||
|
||||
for (size_t ix = 0; ix < GPU_FRAMEBUFFER_NATIVE_WIDTH; ix += 8)
|
||||
{
|
||||
__m128i addr_vec128 = _mm_set1_epi16(ix + xScroll);
|
||||
addr_vec128 = _mm_add_epi16(addr_vec128, addrOffset);
|
||||
addr_vec128 = _mm_and_si128(addr_vec128, addrRolloverMask);
|
||||
addr_vec128 = _mm_or_si128(addr_vec128, y_vec128);
|
||||
|
||||
this->clearImageColor16Buffer[dstIndex+7] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 7)];
|
||||
this->clearImageColor16Buffer[dstIndex+6] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 6)];
|
||||
this->clearImageColor16Buffer[dstIndex+5] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 5)];
|
||||
this->clearImageColor16Buffer[dstIndex+4] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 4)];
|
||||
this->clearImageColor16Buffer[dstIndex+3] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 3)];
|
||||
this->clearImageColor16Buffer[dstIndex+2] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 2)];
|
||||
this->clearImageColor16Buffer[dstIndex+1] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 1)];
|
||||
this->clearImageColor16Buffer[dstIndex+0] = clearColorBuffer[_mm_extract_epi16(addr_vec128, 0)];
|
||||
|
||||
this->clearImageDepthBuffer[dstIndex+7] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 7)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+6] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 6)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+5] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 5)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+4] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 4)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+3] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 3)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+2] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 2)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+1] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 1)] & 0x7FFF];
|
||||
this->clearImageDepthBuffer[dstIndex+0] = dsDepthToD24_LUT[clearDepthBuffer[_mm_extract_epi16(addr_vec128, 0)] & 0x7FFF];
|
||||
|
||||
this->clearImageFogBuffer[dstIndex+7] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 7)] );
|
||||
this->clearImageFogBuffer[dstIndex+6] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 6)] );
|
||||
this->clearImageFogBuffer[dstIndex+5] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 5)] );
|
||||
this->clearImageFogBuffer[dstIndex+4] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 4)] );
|
||||
this->clearImageFogBuffer[dstIndex+3] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 3)] );
|
||||
this->clearImageFogBuffer[dstIndex+2] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 2)] );
|
||||
this->clearImageFogBuffer[dstIndex+1] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 1)] );
|
||||
this->clearImageFogBuffer[dstIndex+0] = BIT15( clearDepthBuffer[_mm_extract_epi16(addr_vec128, 0)] );
|
||||
|
||||
_mm_storel_epi64((__m128i *)(this->clearImagePolyIDBuffer + dstIndex), opaquePolyID_vec128);
|
||||
|
||||
dstIndex += 8;
|
||||
}
|
||||
|
||||
dstIndex -= GPU_FRAMEBUFFER_NATIVE_WIDTH * 2;
|
||||
}
|
||||
}
|
||||
|
||||
error = this->ClearUsingImage(this->clearImageColor16Buffer, this->clearImageDepthBuffer, this->clearImageFogBuffer, this->clearImagePolyIDBuffer);
|
||||
if (error != RENDER3DERROR_NOERR)
|
||||
{
|
||||
error = this->ClearUsingValues(clearColor, clearFragment);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
error = this->ClearUsingValues(clearColor, clearFragment);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
#endif // ENABLE_SSSE3
|
||||
|
|
|
@ -77,8 +77,25 @@ struct FragmentAttributes
|
|||
u8 opaquePolyID;
|
||||
u8 translucentPolyID;
|
||||
u8 stencil;
|
||||
bool isFogged;
|
||||
bool isTranslucentPoly;
|
||||
u8 isFogged;
|
||||
u8 isTranslucentPoly;
|
||||
};
|
||||
|
||||
struct FragmentAttributesBuffer
|
||||
{
|
||||
size_t count;
|
||||
u32 *depth;
|
||||
u8 *opaquePolyID;
|
||||
u8 *translucentPolyID;
|
||||
u8 *stencil;
|
||||
u8 *isFogged;
|
||||
u8 *isTranslucentPoly;
|
||||
|
||||
FragmentAttributesBuffer(size_t newCount);
|
||||
~FragmentAttributesBuffer();
|
||||
|
||||
void SetAtIndex(const size_t index, const FragmentAttributes &attr);
|
||||
void SetAll(const FragmentAttributes &attr);
|
||||
};
|
||||
|
||||
class Render3D
|
||||
|
@ -92,10 +109,10 @@ protected:
|
|||
size_t _framebufferColorSizeBytes;
|
||||
FragmentColor *_framebufferColor;
|
||||
|
||||
CACHE_ALIGN u16 clearImageColor16Buffer[GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT];
|
||||
CACHE_ALIGN u32 clearImageDepthBuffer[GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT];
|
||||
CACHE_ALIGN bool clearImageFogBuffer[GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT];
|
||||
CACHE_ALIGN u8 clearImagePolyIDBuffer[GFX3D_FRAMEBUFFER_WIDTH * GFX3D_FRAMEBUFFER_HEIGHT];
|
||||
CACHE_ALIGN u16 clearImageColor16Buffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u32 clearImageDepthBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u8 clearImageFogBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
CACHE_ALIGN u8 clearImagePolyIDBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT];
|
||||
|
||||
virtual Render3DError BeginRender(const GFX3D &engine);
|
||||
virtual Render3DError RenderGeometry(const GFX3D_State &renderState, const POLYLIST *polyList, const INDEXLIST *indexList);
|
||||
|
@ -104,7 +121,7 @@ protected:
|
|||
virtual Render3DError EndRender(const u64 frameCount);
|
||||
virtual Render3DError FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551);
|
||||
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const bool *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 *__restrict polyIDBuffer);
|
||||
virtual Render3DError ClearUsingValues(const FragmentColor &clearColor, const FragmentAttributes &clearAttributes) const;
|
||||
|
||||
virtual Render3DError SetupPolygon(const POLY &thePoly);
|
||||
|
@ -138,4 +155,27 @@ public:
|
|||
virtual Render3DError SetFramebufferSize(size_t w, size_t h); // Called whenever the output framebuffer size changes.
|
||||
};
|
||||
|
||||
#ifdef ENABLE_SSE2
|
||||
|
||||
class Render3D_SSE2 : public Render3D
|
||||
{
|
||||
public:
|
||||
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_SSSE3
|
||||
|
||||
class Render3D_SSSE3 : public Render3D_SSE2
|
||||
{
|
||||
protected:
|
||||
virtual Render3DError FlushFramebuffer(FragmentColor *__restrict dstRGBA6665, u16 *__restrict dstRGBA5551);
|
||||
|
||||
public:
|
||||
virtual Render3DError ClearFramebuffer(const GFX3D_State &renderState);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // RENDER3D_H
|
||||
|
|
|
@ -86,6 +86,14 @@
|
|||
#undef ENABLE_SSE2
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_SSE2
|
||||
#undef ENABLE_SSE3
|
||||
#endif
|
||||
|
||||
#ifndef ENABLE_SSE3
|
||||
#undef ENABLE_SSSE3
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define strcasecmp(x,y) _stricmp(x,y)
|
||||
#define strncasecmp(x, y, l) strnicmp(x, y, l)
|
||||
|
@ -119,7 +127,12 @@
|
|||
#else
|
||||
#define DS_ALIGN(X)
|
||||
#endif
|
||||
|
||||
#ifdef HOST_64
|
||||
#define CACHE_ALIGN DS_ALIGN(64)
|
||||
#else
|
||||
#define CACHE_ALIGN DS_ALIGN(32)
|
||||
#endif
|
||||
//use this for example when you want a byte value to be better-aligned
|
||||
#define FAST_ALIGN DS_ALIGN(4)
|
||||
//---------------------------------------------
|
||||
|
|
|
@ -4045,7 +4045,7 @@ void CloseRom()
|
|||
|
||||
// clear screen so the last frame we rendered doesn't stick around
|
||||
// (TODO: maybe NDS_Reset should do this?)
|
||||
memset(GPU_screen, 0xFF, sizeof(GPU_screen));
|
||||
memset(GPU_screen, 0xFF, GPU_GetFramebufferWidth() * GPU_GetFramebufferHeight() * 2 * sizeof(u16));
|
||||
|
||||
InvalidateRect(MainWindow->getHWnd(), NULL, TRUE); // make sure the window refreshes with the cleared screen
|
||||
|
||||
|
|
Loading…
Reference in New Issue