GPU / MMU:
- Do SSE2 optimization for direct-color sprite renders. - Make ARM9_LCD cache-aligned. Allows for SSE2 to perform aligned load/stores on certain operations, improving performance. - Further templatize some methods. - Do some misc. code cleanup.
This commit is contained in:
parent
ced0d3986d
commit
7e3f1d85ae
|
@ -159,63 +159,6 @@ FORCEINLINE void rot_BMP_map(GPUEngineBase *gpu, const s32 auxX, const s32 auxY,
|
||||||
gpu->___setFinalColorBck<LAYERID, MOSAIC, false, 0, ISCUSTOMRENDERINGNEEDED, USECUSTOMVRAM>(color, i, ((color & 0x8000) != 0));
|
gpu->___setFinalColorBck<LAYERID, MOSAIC, false, 0, ISCUSTOMRENDERINGNEEDED, USECUSTOMVRAM>(color, i, ((color & 0x8000) != 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*rot_fun)(GPUEngineBase *gpu, const s32 auxX, const s32 auxY, const int lg, const u32 map, const u32 tile, const u16 *pal, const size_t i);
|
|
||||||
|
|
||||||
template<rot_fun fun, bool WRAP>
|
|
||||||
void rot_scale_op(GPUEngineBase *gpu, const BGxPARMS ¶m, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal)
|
|
||||||
{
|
|
||||||
ROTOCOORD x, y;
|
|
||||||
x.val = param.BGxX;
|
|
||||||
y.val = param.BGxY;
|
|
||||||
|
|
||||||
const s32 dx = (s32)param.BGxPA;
|
|
||||||
const s32 dy = (s32)param.BGxPC;
|
|
||||||
|
|
||||||
// as an optimization, specially handle the fairly common case of
|
|
||||||
// "unrotated + unscaled + no boundary checking required"
|
|
||||||
if (dx == GPU_FRAMEBUFFER_NATIVE_WIDTH && dy == 0)
|
|
||||||
{
|
|
||||||
s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
|
|
||||||
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
|
|
||||||
|
|
||||||
if (WRAP || (auxX + LG < wh && auxX >= 0 && auxY < ht && auxY >= 0))
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < LG; i++)
|
|
||||||
{
|
|
||||||
fun(gpu, auxX, auxY, wh, map, tile, pal, i);
|
|
||||||
auxX++;
|
|
||||||
|
|
||||||
if (WRAP)
|
|
||||||
auxX = auxX & (wh-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < LG; i++, x.val += dx, y.val += dy)
|
|
||||||
{
|
|
||||||
const s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
|
|
||||||
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
|
|
||||||
|
|
||||||
if (WRAP || ((auxX >= 0) && (auxX < wh) && (auxY >= 0) && (auxY < ht)))
|
|
||||||
fun(gpu, auxX, auxY, wh, map, tile, pal, i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<GPULayerID LAYERID, rot_fun fun>
|
|
||||||
void apply_rot_fun(GPUEngineBase *gpu, const BGxPARMS ¶m, const u16 LG, const u32 map, const u32 tile, const u16 *pal)
|
|
||||||
{
|
|
||||||
struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[LAYERID].bits;
|
|
||||||
s32 wh = gpu->BGSize[LAYERID][0];
|
|
||||||
s32 ht = gpu->BGSize[LAYERID][1];
|
|
||||||
|
|
||||||
if (bgCnt->PaletteSet_Wrap)
|
|
||||||
rot_scale_op<fun,true>(gpu, param, LG, wh, ht, map, tile, pal);
|
|
||||||
else
|
|
||||||
rot_scale_op<fun,false>(gpu, param, LG, wh, ht, map, tile, pal);
|
|
||||||
}
|
|
||||||
|
|
||||||
void gpu_savestate(EMUFILE* os)
|
void gpu_savestate(EMUFILE* os)
|
||||||
{
|
{
|
||||||
const GPUEngineA *mainEngine = GPU->GetEngineMain();
|
const GPUEngineA *mainEngine = GPU->GetEngineMain();
|
||||||
|
@ -351,6 +294,9 @@ void GPUEngineBase::_InitLUTs()
|
||||||
|
|
||||||
GPUEngineBase::GPUEngineBase()
|
GPUEngineBase::GPUEngineBase()
|
||||||
{
|
{
|
||||||
|
_paletteBG = NULL;
|
||||||
|
_paletteOBJ = NULL;
|
||||||
|
|
||||||
debug = false;
|
debug = false;
|
||||||
_InitLUTs();
|
_InitLUTs();
|
||||||
workingScanline = NULL;
|
workingScanline = NULL;
|
||||||
|
@ -419,7 +365,7 @@ void GPUEngineBase::_Reset_Base()
|
||||||
this->_bgPrio[1] = 0;
|
this->_bgPrio[1] = 0;
|
||||||
this->_bgPrio[2] = 0;
|
this->_bgPrio[2] = 0;
|
||||||
this->_bgPrio[3] = 0;
|
this->_bgPrio[3] = 0;
|
||||||
this->_bgPrio[4] = 0xFF;
|
this->_bgPrio[4] = 0x7F;
|
||||||
|
|
||||||
this->_bg0HasHighestPrio = true;
|
this->_bg0HasHighestPrio = true;
|
||||||
|
|
||||||
|
@ -677,54 +623,55 @@ void GPUEngineBase::SetVideoProp(const u32 ctrlBits)
|
||||||
|
|
||||||
this->_sprEnable = cnt->OBJ_Enable;
|
this->_sprEnable = cnt->OBJ_Enable;
|
||||||
|
|
||||||
this->SetBGProp(3, T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 14));
|
this->SetBGProp<GPULayerID_BG3>( T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 14) );
|
||||||
this->SetBGProp(2, T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 12));
|
this->SetBGProp<GPULayerID_BG2>( T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 12) );
|
||||||
this->SetBGProp(1, T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 10));
|
this->SetBGProp<GPULayerID_BG1>( T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 10) );
|
||||||
this->SetBGProp(0, T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 8));
|
this->SetBGProp<GPULayerID_BG0>( T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 8) );
|
||||||
}
|
}
|
||||||
|
|
||||||
//this handles writing in BGxCNT
|
//this handles writing in BGxCNT
|
||||||
void GPUEngineBase::SetBGProp(const size_t num, const u16 ctrlBits)
|
template <GPULayerID LAYERID>
|
||||||
|
void GPUEngineBase::SetBGProp(const u16 ctrlBits)
|
||||||
{
|
{
|
||||||
struct _BGxCNT *cnt = &((this->dispx_st)->dispx_BGxCNT[num].bits);
|
struct _BGxCNT *cnt = &((this->dispx_st)->dispx_BGxCNT[LAYERID].bits);
|
||||||
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
|
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
|
||||||
|
|
||||||
this->dispx_st->dispx_BGxCNT[num].val = LE_TO_LOCAL_16(ctrlBits);
|
this->dispx_st->dispx_BGxCNT[LAYERID].val = LE_TO_LOCAL_16(ctrlBits);
|
||||||
|
|
||||||
this->ResortBGLayers();
|
this->ResortBGLayers();
|
||||||
|
|
||||||
if (this->_engineID == GPUEngineID_Sub)
|
if (this->_engineID == GPUEngineID_Sub)
|
||||||
{
|
{
|
||||||
this->_BG_tile_ram[num] = MMU_BBG;
|
this->_BG_tile_ram[LAYERID] = MMU_BBG;
|
||||||
this->_BG_bmp_ram[num] = MMU_BBG;
|
this->_BG_bmp_ram[LAYERID] = MMU_BBG;
|
||||||
this->_BG_bmp_large_ram[num] = MMU_BBG;
|
this->_BG_bmp_large_ram[LAYERID] = MMU_BBG;
|
||||||
this->_BG_map_ram[num] = MMU_BBG;
|
this->_BG_map_ram[LAYERID] = MMU_BBG;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
this->_BG_tile_ram[num] = MMU_ABG + dispCnt->CharacBase_Block * ADDRESS_STEP_64KB;
|
this->_BG_tile_ram[LAYERID] = MMU_ABG + dispCnt->CharacBase_Block * ADDRESS_STEP_64KB;
|
||||||
this->_BG_bmp_ram[num] = MMU_ABG;
|
this->_BG_bmp_ram[LAYERID] = MMU_ABG;
|
||||||
this->_BG_bmp_large_ram[num] = MMU_ABG;
|
this->_BG_bmp_large_ram[LAYERID] = MMU_ABG;
|
||||||
this->_BG_map_ram[num] = MMU_ABG + dispCnt->ScreenBase_Block * ADDRESS_STEP_64KB;
|
this->_BG_map_ram[LAYERID] = MMU_ABG + dispCnt->ScreenBase_Block * ADDRESS_STEP_64KB;
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_BG_tile_ram[num] += (cnt->CharacBase_Block * ADDRESS_STEP_16KB);
|
this->_BG_tile_ram[LAYERID] += (cnt->CharacBase_Block * ADDRESS_STEP_16KB);
|
||||||
this->_BG_bmp_ram[num] += (cnt->ScreenBase_Block * ADDRESS_STEP_16KB);
|
this->_BG_bmp_ram[LAYERID] += (cnt->ScreenBase_Block * ADDRESS_STEP_16KB);
|
||||||
this->_BG_map_ram[num] += (cnt->ScreenBase_Block * ADDRESS_STEP_2KB);
|
this->_BG_map_ram[LAYERID] += (cnt->ScreenBase_Block * ADDRESS_STEP_2KB);
|
||||||
|
|
||||||
switch (num)
|
switch (LAYERID)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
this->BGExtPalSlot[num] = cnt->PaletteSet_Wrap * 2 + num;
|
this->BGExtPalSlot[LAYERID] = cnt->PaletteSet_Wrap * 2 + LAYERID;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
this->BGExtPalSlot[num] = (u8)num;
|
this->BGExtPalSlot[LAYERID] = (u8)LAYERID;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
BGType mode = GPUEngineBase::_mode2type[dispCnt->BG_Mode][num];
|
BGType mode = GPUEngineBase::_mode2type[dispCnt->BG_Mode][LAYERID];
|
||||||
|
|
||||||
//clarify affine ext modes
|
//clarify affine ext modes
|
||||||
if (mode == BGType_AffineExt)
|
if (mode == BGType_AffineExt)
|
||||||
|
@ -746,12 +693,12 @@ void GPUEngineBase::SetBGProp(const size_t num, const u16 ctrlBits)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_BGTypes[num] = mode;
|
this->_BGTypes[LAYERID] = mode;
|
||||||
|
|
||||||
this->BGSize[num][0] = GPUEngineBase::_sizeTab[mode][cnt->ScreenSize][0];
|
this->BGSize[LAYERID][0] = GPUEngineBase::_sizeTab[mode][cnt->ScreenSize][0];
|
||||||
this->BGSize[num][1] = GPUEngineBase::_sizeTab[mode][cnt->ScreenSize][1];
|
this->BGSize[LAYERID][1] = GPUEngineBase::_sizeTab[mode][cnt->ScreenSize][1];
|
||||||
|
|
||||||
this->_bgPrio[num] = (ctrlBits & 0x3);
|
this->_bgPrio[LAYERID] = (ctrlBits & 0x3);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool ISCUSTOMRENDERINGNEEDED>
|
template<bool ISCUSTOMRENDERINGNEEDED>
|
||||||
|
@ -789,6 +736,7 @@ void GPUEngineBase::SetLayerEnableState(const size_t layerIndex, bool theState)
|
||||||
// ROUTINES FOR INSIDE / OUTSIDE WINDOW CHECKS
|
// ROUTINES FOR INSIDE / OUTSIDE WINDOW CHECKS
|
||||||
/*****************************************************************************/
|
/*****************************************************************************/
|
||||||
|
|
||||||
|
// check whether (x,y) is within the rectangle (including wraparounds)
|
||||||
template<int WIN_NUM>
|
template<int WIN_NUM>
|
||||||
u8 GPUEngineBase::_WithinRect(const size_t x) const
|
u8 GPUEngineBase::_WithinRect(const size_t x) const
|
||||||
{
|
{
|
||||||
|
@ -1100,11 +1048,11 @@ FORCEINLINE void GPUEngineBase::_SetFinalColorSprite(const size_t srcX, const si
|
||||||
template<GPULayerID LAYERID, bool BACKDROP, int FUNCNUM, bool ISCUSTOMRENDERINGNEEDED, bool USECUSTOMVRAM>
|
template<GPULayerID LAYERID, bool BACKDROP, int FUNCNUM, bool ISCUSTOMRENDERINGNEEDED, bool USECUSTOMVRAM>
|
||||||
FORCEINLINE void GPUEngineBase::____setFinalColorBck(const u16 color, const size_t srcX)
|
FORCEINLINE void GPUEngineBase::____setFinalColorBck(const u16 color, const size_t srcX)
|
||||||
{
|
{
|
||||||
u16 *dstLine = this->currDst;
|
|
||||||
u8 *bgLine = this->_bgPixels;
|
|
||||||
|
|
||||||
if (ISCUSTOMRENDERINGNEEDED)
|
if (ISCUSTOMRENDERINGNEEDED)
|
||||||
{
|
{
|
||||||
|
u16 *dstLine = this->currDst;
|
||||||
|
u8 *bgLine = this->_bgPixels;
|
||||||
|
|
||||||
const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo();
|
const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo();
|
||||||
|
|
||||||
for (size_t line = 0; line < _gpuDstLineCount[this->currLine]; line++)
|
for (size_t line = 0; line < _gpuDstLineCount[this->currLine]; line++)
|
||||||
|
@ -1130,8 +1078,8 @@ FORCEINLINE void GPUEngineBase::____setFinalColorBck(const u16 color, const size
|
||||||
{
|
{
|
||||||
this->_SetFinalColorBG<LAYERID, BACKDROP, FUNCNUM>(srcX,
|
this->_SetFinalColorBG<LAYERID, BACKDROP, FUNCNUM>(srcX,
|
||||||
srcX,
|
srcX,
|
||||||
dstLine,
|
this->currDst,
|
||||||
bgLine,
|
this->_bgPixels,
|
||||||
color);
|
color);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1217,7 +1165,7 @@ void GPUEngineBase::_MosaicSpriteLinePixel(const size_t x, u16 l, u16 *dst, u8 *
|
||||||
|
|
||||||
dst[x] = LE_TO_LOCAL_16(objColor.color);
|
dst[x] = LE_TO_LOCAL_16(objColor.color);
|
||||||
dst_alpha[x] = objColor.alpha;
|
dst_alpha[x] = objColor.alpha;
|
||||||
if (!objColor.opaque) prioTab[x] = 0xFF;
|
if (!objColor.opaque) prioTab[x] = 0x7F;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUEngineBase::_MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab)
|
void GPUEngineBase::_MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab)
|
||||||
|
@ -1228,6 +1176,61 @@ void GPUEngineBase::_MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTa
|
||||||
this->_MosaicSpriteLinePixel(i, l, dst, dst_alpha, typeTab, prioTab);
|
this->_MosaicSpriteLinePixel(i, l, dst, dst_alpha, typeTab, prioTab);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<rot_fun fun, bool WRAP>
|
||||||
|
void GPUEngineBase::_rot_scale_op(const BGxPARMS ¶m, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal)
|
||||||
|
{
|
||||||
|
ROTOCOORD x, y;
|
||||||
|
x.val = param.BGxX;
|
||||||
|
y.val = param.BGxY;
|
||||||
|
|
||||||
|
const s32 dx = (s32)param.BGxPA;
|
||||||
|
const s32 dy = (s32)param.BGxPC;
|
||||||
|
|
||||||
|
// as an optimization, specially handle the fairly common case of
|
||||||
|
// "unrotated + unscaled + no boundary checking required"
|
||||||
|
if (dx == GPU_FRAMEBUFFER_NATIVE_WIDTH && dy == 0)
|
||||||
|
{
|
||||||
|
s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
|
||||||
|
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
|
||||||
|
|
||||||
|
if (WRAP || (auxX + LG < wh && auxX >= 0 && auxY < ht && auxY >= 0))
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < LG; i++)
|
||||||
|
{
|
||||||
|
fun(this, auxX, auxY, wh, map, tile, pal, i);
|
||||||
|
auxX++;
|
||||||
|
|
||||||
|
if (WRAP)
|
||||||
|
auxX = auxX & (wh-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < LG; i++, x.val += dx, y.val += dy)
|
||||||
|
{
|
||||||
|
const s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
|
||||||
|
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
|
||||||
|
|
||||||
|
if (WRAP || ((auxX >= 0) && (auxX < wh) && (auxY >= 0) && (auxY < ht)))
|
||||||
|
fun(this, auxX, auxY, wh, map, tile, pal, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<GPULayerID LAYERID, rot_fun fun>
|
||||||
|
void GPUEngineBase::_apply_rot_fun(const BGxPARMS ¶m, const u16 LG, const u32 map, const u32 tile, const u16 *pal)
|
||||||
|
{
|
||||||
|
struct _BGxCNT *bgCnt = &(this->dispx_st)->dispx_BGxCNT[LAYERID].bits;
|
||||||
|
s32 wh = this->BGSize[LAYERID][0];
|
||||||
|
s32 ht = this->BGSize[LAYERID][1];
|
||||||
|
|
||||||
|
if (bgCnt->PaletteSet_Wrap)
|
||||||
|
this->_rot_scale_op<fun,true>(param, LG, wh, ht, map, tile, pal);
|
||||||
|
else
|
||||||
|
this->_rot_scale_op<fun,false>(param, LG, wh, ht, map, tile, pal);
|
||||||
|
}
|
||||||
|
|
||||||
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
|
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
|
||||||
void GPUEngineBase::_LineLarge8bpp()
|
void GPUEngineBase::_LineLarge8bpp()
|
||||||
{
|
{
|
||||||
|
@ -1250,12 +1253,10 @@ void GPUEngineBase::_LineLarge8bpp()
|
||||||
u32 tmp_map = this->_BG_bmp_large_ram[LAYERID] + lg * YBG;
|
u32 tmp_map = this->_BG_bmp_large_ram[LAYERID] + lg * YBG;
|
||||||
u8 *map = (u8 *)MMU_gpu_map(tmp_map);
|
u8 *map = (u8 *)MMU_gpu_map(tmp_map);
|
||||||
|
|
||||||
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
|
|
||||||
|
|
||||||
for (size_t x = 0; x < lg; ++x, ++XBG)
|
for (size_t x = 0; x < lg; ++x, ++XBG)
|
||||||
{
|
{
|
||||||
XBG &= wmask;
|
XBG &= wmask;
|
||||||
const u16 color = LE_TO_LOCAL_16( pal[map[XBG]] );
|
const u16 color = LE_TO_LOCAL_16( this->_paletteBG[map[XBG]] );
|
||||||
this->__setFinalColorBck<MOSAIC,false,ISCUSTOMRENDERINGNEEDED>(color,x,(color!=0));
|
this->__setFinalColorBck<MOSAIC,false,ISCUSTOMRENDERINGNEEDED>(color,x,(color!=0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1295,7 +1296,7 @@ void GPUEngineBase::_RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG)
|
||||||
|
|
||||||
if (!bgCnt->Palette_256) // color: 16 palette entries
|
if (!bgCnt->Palette_256) // color: 16 palette entries
|
||||||
{
|
{
|
||||||
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
|
const u16 *pal = this->_paletteBG;
|
||||||
|
|
||||||
yoff = ((YBG&7)<<2);
|
yoff = ((YBG&7)<<2);
|
||||||
xfin = 8 - (xoff&7);
|
xfin = 8 - (xoff&7);
|
||||||
|
@ -1360,11 +1361,7 @@ void GPUEngineBase::_RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG)
|
||||||
}
|
}
|
||||||
else //256-color BG
|
else //256-color BG
|
||||||
{
|
{
|
||||||
const u16 *pal = (dispCnt->ExBGxPalette_Enable) ? (u16 *)MMU.ExtPal[this->_engineID][this->BGExtPalSlot[LAYERID]] : (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
|
const u16 *pal = (dispCnt->ExBGxPalette_Enable) ? (u16 *)MMU.ExtPal[this->_engineID][this->BGExtPalSlot[LAYERID]] : this->_paletteBG;
|
||||||
if (pal == NULL)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
yoff = ((YBG&7)<<3);
|
yoff = ((YBG&7)<<3);
|
||||||
xfin = 8 - (xoff&7);
|
xfin = 8 - (xoff&7);
|
||||||
|
@ -1407,9 +1404,8 @@ void GPUEngineBase::_RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG)
|
||||||
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
|
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
|
||||||
void GPUEngineBase::_RotBG2(const BGxPARMS ¶m, const u16 LG)
|
void GPUEngineBase::_RotBG2(const BGxPARMS ¶m, const u16 LG)
|
||||||
{
|
{
|
||||||
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
|
|
||||||
// printf("rot mode\n");
|
// printf("rot mode\n");
|
||||||
apply_rot_fun< LAYERID, rot_tiled_8bit_entry<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal);
|
this->_apply_rot_fun< LAYERID, rot_tiled_8bit_entry<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], this->_paletteBG);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
|
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
|
||||||
|
@ -1417,41 +1413,43 @@ void GPUEngineBase::_ExtRotBG2(const BGxPARMS ¶m, const u16 LG)
|
||||||
{
|
{
|
||||||
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
|
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
|
||||||
|
|
||||||
u16 *pal = NULL;
|
u16 *pal = this->_paletteBG;
|
||||||
|
|
||||||
switch (this->_BGTypes[LAYERID])
|
switch (this->_BGTypes[LAYERID])
|
||||||
{
|
{
|
||||||
case BGType_AffineExt_256x16: // 16 bit bgmap entries
|
case BGType_AffineExt_256x16: // 16 bit bgmap entries
|
||||||
pal = (dispCnt->ExBGxPalette_Enable) ? (u16 *)(MMU.ExtPal[this->_engineID][this->BGExtPalSlot[LAYERID]]) : (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
|
{
|
||||||
if (pal == NULL) return;
|
if (dispCnt->ExBGxPalette_Enable)
|
||||||
|
{
|
||||||
if(dispCnt->ExBGxPalette_Enable)
|
pal = (u16 *)(MMU.ExtPal[this->_engineID][this->BGExtPalSlot[LAYERID]]);
|
||||||
apply_rot_fun< LAYERID, rot_tiled_16bit_entry<LAYERID, MOSAIC, true, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal);
|
this->_apply_rot_fun< LAYERID, rot_tiled_16bit_entry<LAYERID, MOSAIC, true, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
apply_rot_fun< LAYERID, rot_tiled_16bit_entry<LAYERID, MOSAIC, false, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal);
|
{
|
||||||
|
this->_apply_rot_fun< LAYERID, rot_tiled_16bit_entry<LAYERID, MOSAIC, false, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case BGType_AffineExt_256x1: // 256 colors
|
case BGType_AffineExt_256x1: // 256 colors
|
||||||
pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
|
this->_apply_rot_fun< LAYERID, rot_256_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_bmp_ram[LAYERID], 0, pal);
|
||||||
apply_rot_fun< LAYERID, rot_256_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_bmp_ram[LAYERID], 0, pal);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BGType_AffineExt_Direct: // direct colors / BMP
|
case BGType_AffineExt_Direct: // direct colors / BMP
|
||||||
{
|
{
|
||||||
if (ISCUSTOMRENDERINGNEEDED && (LAYERID == this->vramBGLayer))
|
if (ISCUSTOMRENDERINGNEEDED && (LAYERID == this->vramBGLayer))
|
||||||
{
|
{
|
||||||
apply_rot_fun< LAYERID, rot_BMP_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED, true> >(this, param, LG, this->_BG_bmp_ram[LAYERID], 0, NULL);
|
this->_apply_rot_fun< LAYERID, rot_BMP_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED, true> >(param, LG, this->_BG_bmp_ram[LAYERID], 0, pal);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
apply_rot_fun< LAYERID, rot_BMP_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED, false> >(this, param, LG, this->_BG_bmp_ram[LAYERID], 0, NULL);
|
this->_apply_rot_fun< LAYERID, rot_BMP_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED, false> >(param, LG, this->_BG_bmp_ram[LAYERID], 0, pal);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case BGType_Large8bpp: // large screen 256 colors
|
case BGType_Large8bpp: // large screen 256 colors
|
||||||
pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
|
this->_apply_rot_fun< LAYERID, rot_256_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_bmp_large_ram[LAYERID], 0, pal);
|
||||||
apply_rot_fun< LAYERID, rot_256_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_bmp_large_ram[LAYERID], 0, pal);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -1526,9 +1524,54 @@ void GPUEngineBase::_LineExtRot()
|
||||||
/* http://nocash.emubase.de/gbatek.htm#dsvideoobjs */
|
/* http://nocash.emubase.de/gbatek.htm#dsvideoobjs */
|
||||||
void GPUEngineBase::_RenderSpriteBMP(const u8 spriteNum, const u16 l, u16 *dst, const u32 srcadr, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha)
|
void GPUEngineBase::_RenderSpriteBMP(const u8 spriteNum, const u16 l, u16 *dst, const u32 srcadr, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < lg; i++, ++sprX, x += xdir)
|
const u16 *bmpBuffer = (u16 *)MMU_gpu_map(srcadr);
|
||||||
|
size_t i = 0;
|
||||||
|
|
||||||
|
#ifdef ENABLE_SSE2
|
||||||
|
if (xdir == 1)
|
||||||
{
|
{
|
||||||
const u16 color = LE_TO_LOCAL_16( *(u16 *)MMU_gpu_map(srcadr + (x << 1)) );
|
const __m128i prio_vec128 = _mm_set1_epi8(prio);
|
||||||
|
|
||||||
|
const size_t ssePixCount = lg - (lg % 16);
|
||||||
|
for (; i < ssePixCount; i += 16, x += 16, sprX += 16)
|
||||||
|
{
|
||||||
|
__m128i prioTab_vec128 = _mm_load_si128((__m128i *)(prioTab + sprX));
|
||||||
|
const __m128i prioCompare = _mm_cmplt_epi8(prio_vec128, prioTab_vec128);
|
||||||
|
|
||||||
|
__m128i colorLo_vec128 = _mm_load_si128((__m128i *)(bmpBuffer + x));
|
||||||
|
__m128i colorHi_vec128 = _mm_load_si128((__m128i *)(bmpBuffer + x + 8));
|
||||||
|
|
||||||
|
const __m128i colorAlphaLo_vec128 = _mm_and_si128(colorLo_vec128, _mm_set1_epi16(0x8000));
|
||||||
|
const __m128i colorAlphaHi_vec128 = _mm_and_si128(colorHi_vec128, _mm_set1_epi16(0x8000));
|
||||||
|
|
||||||
|
const __m128i colorAlphaLoCompare = _mm_cmpeq_epi16(colorAlphaLo_vec128, _mm_set1_epi16(0x8000));
|
||||||
|
const __m128i colorAlphaHiCompare = _mm_cmpeq_epi16(colorAlphaHi_vec128, _mm_set1_epi16(0x8000));
|
||||||
|
const __m128i colorAlphaPackedCompare = _mm_cmpeq_epi8( _mm_packs_epi16(colorAlphaLoCompare, colorAlphaHiCompare), _mm_set1_epi8(0xFF) );
|
||||||
|
|
||||||
|
const __m128i combinedPackedCompare = _mm_and_si128(prioCompare, colorAlphaPackedCompare);
|
||||||
|
const __m128i combinedLoCompare = _mm_cmpeq_epi16( _mm_unpacklo_epi8(combinedPackedCompare, _mm_setzero_si128()), _mm_set1_epi16(0x00FF) );
|
||||||
|
const __m128i combinedHiCompare = _mm_cmpeq_epi16( _mm_unpackhi_epi8(combinedPackedCompare, _mm_setzero_si128()), _mm_set1_epi16(0x00FF) );
|
||||||
|
|
||||||
|
colorLo_vec128 = _mm_or_si128( _mm_and_si128(combinedLoCompare, colorLo_vec128), _mm_andnot_si128(combinedLoCompare, _mm_load_si128((__m128i *)(dst + sprX))) );
|
||||||
|
colorHi_vec128 = _mm_or_si128( _mm_and_si128(combinedHiCompare, colorHi_vec128), _mm_andnot_si128(combinedHiCompare, _mm_load_si128((__m128i *)(dst + sprX + 8))) );
|
||||||
|
const __m128i dstAlpha_vec128 = _mm_or_si128( _mm_and_si128(combinedPackedCompare, _mm_set1_epi8(alpha + 1)), _mm_andnot_si128(combinedPackedCompare, _mm_load_si128((__m128i *)(dst_alpha + sprX))) );
|
||||||
|
const __m128i dstTypeTab_vec128 = _mm_or_si128( _mm_and_si128(combinedPackedCompare, _mm_set1_epi8(3)), _mm_andnot_si128(combinedPackedCompare, _mm_load_si128((__m128i *)(typeTab + sprX))) );
|
||||||
|
prioTab_vec128 = _mm_or_si128( _mm_and_si128(combinedPackedCompare, prio_vec128), _mm_andnot_si128(combinedPackedCompare, prioTab_vec128) );
|
||||||
|
const __m128i sprNum_vec128 = _mm_or_si128( _mm_and_si128(combinedPackedCompare, _mm_set1_epi8(spriteNum)), _mm_andnot_si128(combinedPackedCompare, _mm_load_si128((__m128i *)(this->_sprNum + sprX))) );
|
||||||
|
|
||||||
|
_mm_store_si128((__m128i *)(dst + sprX), colorLo_vec128);
|
||||||
|
_mm_store_si128((__m128i *)(dst + sprX + 8), colorHi_vec128);
|
||||||
|
_mm_store_si128((__m128i *)(dst_alpha + sprX), dstAlpha_vec128);
|
||||||
|
_mm_store_si128((__m128i *)(typeTab + sprX), dstTypeTab_vec128);
|
||||||
|
_mm_store_si128((__m128i *)(prioTab + sprX), prioTab_vec128);
|
||||||
|
_mm_store_si128((__m128i *)(this->_sprNum + sprX), sprNum_vec128);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (; i < lg; i++, sprX++, x += xdir)
|
||||||
|
{
|
||||||
|
const u16 color = LE_TO_LOCAL_16(bmpBuffer[x]);
|
||||||
|
|
||||||
//a cleared alpha bit suppresses the pixel from processing entirely; it doesnt exist
|
//a cleared alpha bit suppresses the pixel from processing entirely; it doesnt exist
|
||||||
if ((color & 0x8000) && (prio < prioTab[sprX]))
|
if ((color & 0x8000) && (prio < prioTab[sprX]))
|
||||||
|
@ -1708,12 +1751,11 @@ void GPUEngineBase::SpriteRender(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioT
|
||||||
template<SpriteRenderMode MODE>
|
template<SpriteRenderMode MODE>
|
||||||
void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab)
|
void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab)
|
||||||
{
|
{
|
||||||
u16 l = currLine;
|
u16 l = this->currLine;
|
||||||
size_t cost = 0;
|
size_t cost = 0;
|
||||||
|
|
||||||
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
|
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
|
||||||
u8 block = this->_sprBoundary;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < 128; i++)
|
for (size_t i = 0; i < 128; i++)
|
||||||
{
|
{
|
||||||
const OAMAttributes &spriteInfo = this->_oamList[i];
|
const OAMAttributes &spriteInfo = this->_oamList[i];
|
||||||
|
@ -1733,6 +1775,7 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
|
||||||
s32 sprX, sprY, x, y, lg;
|
s32 sprX, sprY, x, y, lg;
|
||||||
s32 xdir;
|
s32 xdir;
|
||||||
u8 prio;
|
u8 prio;
|
||||||
|
u16 *pal;
|
||||||
u8 *src;
|
u8 *src;
|
||||||
u32 srcadr;
|
u32 srcadr;
|
||||||
|
|
||||||
|
@ -1746,7 +1789,6 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
|
||||||
{
|
{
|
||||||
s32 fieldX, fieldY, auxX, auxY, realX, realY, offset;
|
s32 fieldX, fieldY, auxX, auxY, realX, realY, offset;
|
||||||
u8 blockparameter;
|
u8 blockparameter;
|
||||||
u16 *pal;
|
|
||||||
s16 dx, dmx, dy, dmy;
|
s16 dx, dmx, dy, dmy;
|
||||||
u16 colour;
|
u16 colour;
|
||||||
|
|
||||||
|
@ -1818,13 +1860,10 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
|
||||||
// If we are using 1 palette of 256 colours
|
// If we are using 1 palette of 256 colours
|
||||||
if (spriteInfo.Depth)
|
if (spriteInfo.Depth)
|
||||||
{
|
{
|
||||||
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << block));
|
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << this->_sprBoundary));
|
||||||
|
|
||||||
// If extended palettes are set, use them
|
// If extended palettes are set, use them
|
||||||
if (dispCnt->ExOBJPalette_Enable)
|
pal = (dispCnt->ExOBJPalette_Enable) ? (u16 *)(MMU.ObjExtPal[this->_engineID][0]+(spriteInfo.PaletteIndex*0x200)) : this->_paletteOBJ;
|
||||||
pal = (u16 *)(MMU.ObjExtPal[this->_engineID][0]+(spriteInfo.PaletteIndex*0x200));
|
|
||||||
else
|
|
||||||
pal = (u16 *)(MMU.ARM9_VMEM + 0x200 + this->_engineID * ADDRESS_STEP_1KB);
|
|
||||||
|
|
||||||
for (size_t j = 0; j < lg; ++j, ++sprX)
|
for (size_t j = 0; j < lg; ++j, ++sprX)
|
||||||
{
|
{
|
||||||
|
@ -1908,13 +1947,13 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
|
||||||
if (MODE == SpriteRenderMode_Sprite2D)
|
if (MODE == SpriteRenderMode_Sprite2D)
|
||||||
{
|
{
|
||||||
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << 5));
|
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << 5));
|
||||||
pal = (u16 *)(MMU.ARM9_VMEM + 0x200 + (this->_engineID * ADDRESS_STEP_1KB) + (spriteInfo.PaletteIndex * 32));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << this->_sprBoundary));
|
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << this->_sprBoundary));
|
||||||
pal = (u16 *)(MMU.ARM9_VMEM + 0x200 + (this->_engineID * ADDRESS_STEP_1KB) + (spriteInfo.PaletteIndex * 32));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pal = this->_paletteOBJ + (spriteInfo.PaletteIndex << 4);
|
||||||
|
|
||||||
for (size_t j = 0; j < lg; ++j, ++sprX)
|
for (size_t j = 0; j < lg; ++j, ++sprX)
|
||||||
{
|
{
|
||||||
|
@ -1977,9 +2016,9 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (spriteInfo.Depth)
|
if (spriteInfo.Depth)
|
||||||
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex<<block) + ((y>>3)*sprSize.x*8) + ((y&0x7)*8));
|
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex<<this->_sprBoundary) + ((y>>3)*sprSize.x*8) + ((y&0x7)*8));
|
||||||
else
|
else
|
||||||
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex<<block) + ((y>>3)*sprSize.x*4) + ((y&0x7)*4));
|
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex<<this->_sprBoundary) + ((y>>3)*sprSize.x*4) + ((y&0x7)*4));
|
||||||
}
|
}
|
||||||
|
|
||||||
this->_RenderSpriteWin(src, (spriteInfo.Depth != 0), lg, sprX, x, xdir);
|
this->_RenderSpriteWin(src, (spriteInfo.Depth != 0), lg, sprX, x, xdir);
|
||||||
|
@ -1999,9 +2038,9 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
|
||||||
if (MODE == SpriteRenderMode_Sprite2D)
|
if (MODE == SpriteRenderMode_Sprite2D)
|
||||||
srcadr = this->_sprMem + ((spriteInfo.TileIndex)<<5) + ((y>>3)<<10) + ((y&0x7)*8);
|
srcadr = this->_sprMem + ((spriteInfo.TileIndex)<<5) + ((y>>3)<<10) + ((y&0x7)*8);
|
||||||
else
|
else
|
||||||
srcadr = this->_sprMem + (spriteInfo.TileIndex<<block) + ((y>>3)*sprSize.x*8) + ((y&0x7)*8);
|
srcadr = this->_sprMem + (spriteInfo.TileIndex<<this->_sprBoundary) + ((y>>3)*sprSize.x*8) + ((y&0x7)*8);
|
||||||
|
|
||||||
const u16 *pal = (dispCnt->ExOBJPalette_Enable) ? (u16 *)(MMU.ObjExtPal[this->_engineID][0]+(spriteInfo.PaletteIndex*0x200)) : (u16 *)(MMU.ARM9_VMEM + 0x200 + this->_engineID * ADDRESS_STEP_1KB);
|
pal = (dispCnt->ExOBJPalette_Enable) ? (u16 *)(MMU.ObjExtPal[this->_engineID][0]+(spriteInfo.PaletteIndex*0x200)) : this->_paletteOBJ;
|
||||||
this->_RenderSprite256(i, l, dst, srcadr, pal, dst_alpha, typeTab, prioTab, prio, lg, sprX, x, xdir, spriteInfo.Mode == 1);
|
this->_RenderSprite256(i, l, dst, srcadr, pal, dst_alpha, typeTab, prioTab, prio, lg, sprX, x, xdir, spriteInfo.Mode == 1);
|
||||||
}
|
}
|
||||||
else // 16 colors
|
else // 16 colors
|
||||||
|
@ -2012,10 +2051,10 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
srcadr = this->_sprMem + (spriteInfo.TileIndex<<block) + ((y>>3)*sprSize.x*4) + ((y&0x7)*4);
|
srcadr = this->_sprMem + (spriteInfo.TileIndex<<this->_sprBoundary) + ((y>>3)*sprSize.x*4) + ((y&0x7)*4);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + 0x200 + this->_engineID * ADDRESS_STEP_1KB) + (spriteInfo.PaletteIndex << 4);
|
pal = this->_paletteOBJ + (spriteInfo.PaletteIndex << 4);
|
||||||
this->_RenderSprite16(l, dst, srcadr, pal, dst_alpha, typeTab, prioTab, prio, lg, sprX, x, xdir, spriteInfo.Mode == 1);
|
this->_RenderSprite16(l, dst, srcadr, pal, dst_alpha, typeTab, prioTab, prio, lg, sprX, x, xdir, spriteInfo.Mode == 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2259,7 +2298,7 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex,
|
||||||
|
|
||||||
if ( (spriteInfo.RotScale != 2) && ((spriteInfo.RotScale & 1) == 0) && (spriteInfo.Mode == 3) && (spriteInfo.PaletteIndex != 0) )
|
if ( (spriteInfo.RotScale != 2) && ((spriteInfo.RotScale & 1) == 0) && (spriteInfo.Mode == 3) && (spriteInfo.PaletteIndex != 0) )
|
||||||
{
|
{
|
||||||
const u32 vramAddress = ( (spriteInfo.TileIndex & 0x1F) * 0x10 ) + ( (spriteInfo.TileIndex & ~0x1F) * 0x80 );
|
const u32 vramAddress = ((spriteInfo.TileIndex & 0x1F) << 5) + ((spriteInfo.TileIndex & ~0x1F) << 7);
|
||||||
const SpriteSize sprSize = GPUEngineBase::_sprSizeTab[spriteInfo.Size][spriteInfo.Shape];
|
const SpriteSize sprSize = GPUEngineBase::_sprSizeTab[spriteInfo.Size][spriteInfo.Shape];
|
||||||
|
|
||||||
if( (vramAddress == (mainEngine->dispCapCnt.writeOffset * ADDRESS_STEP_32KB)) && (sprSize.x == 64) && (sprSize.y == 64) )
|
if( (vramAddress == (mainEngine->dispCapCnt.writeOffset * ADDRESS_STEP_32KB)) && (sprSize.x == 64) && (sprSize.y == 64) )
|
||||||
|
@ -2272,58 +2311,62 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 GPUEngineBase::getAffineStart(const size_t layer, int xy)
|
template<GPULayerID LAYERID, int SET_XY>
|
||||||
|
u32 GPUEngineBase::getAffineStart()
|
||||||
{
|
{
|
||||||
if (xy == 0)
|
if (SET_XY == 0)
|
||||||
return affineInfo[layer-2].x;
|
return this->affineInfo[LAYERID-2].x;
|
||||||
else
|
else
|
||||||
return affineInfo[layer-2].y;
|
return this->affineInfo[LAYERID-2].y;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUEngineBase::setAffineStartWord(const size_t layer, int xy, u16 val, int word)
|
template<GPULayerID LAYERID, int SET_XY, bool HIWORD>
|
||||||
|
void GPUEngineBase::setAffineStartWord(u16 val)
|
||||||
{
|
{
|
||||||
u32 curr = getAffineStart(layer, xy);
|
u32 curr = this->getAffineStart<LAYERID, SET_XY>();
|
||||||
|
|
||||||
if (word == 0)
|
if (!HIWORD)
|
||||||
curr = (curr & 0xFFFF0000) | val;
|
curr = (curr & 0xFFFF0000) | val;
|
||||||
else
|
else
|
||||||
curr = (curr & 0x0000FFFF) | (((u32)val) << 16);
|
curr = (curr & 0x0000FFFF) | (((u32)val) << 16);
|
||||||
|
|
||||||
setAffineStart(layer, xy, curr);
|
this->setAffineStart<LAYERID, SET_XY>(curr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUEngineBase::setAffineStart(const size_t layer, int xy, u32 val)
|
template<GPULayerID LAYERID, int SET_XY>
|
||||||
|
void GPUEngineBase::setAffineStart(u32 val)
|
||||||
{
|
{
|
||||||
if (xy == 0)
|
if (SET_XY == 0)
|
||||||
affineInfo[layer-2].x = val;
|
this->affineInfo[LAYERID-2].x = val;
|
||||||
else
|
else
|
||||||
affineInfo[layer-2].y = val;
|
this->affineInfo[LAYERID-2].y = val;
|
||||||
|
|
||||||
refreshAffineStartRegs(layer, xy);
|
this->refreshAffineStartRegs<LAYERID, SET_XY>();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUEngineBase::refreshAffineStartRegs(const int num, const int xy)
|
template<GPULayerID LAYERID, int SET_XY>
|
||||||
|
void GPUEngineBase::refreshAffineStartRegs()
|
||||||
{
|
{
|
||||||
if (num == -1)
|
if (LAYERID == -1)
|
||||||
{
|
{
|
||||||
refreshAffineStartRegs(2, xy);
|
this->refreshAffineStartRegs<GPULayerID_BG2, SET_XY>();
|
||||||
refreshAffineStartRegs(3, xy);
|
this->refreshAffineStartRegs<GPULayerID_BG3, SET_XY>();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (xy == -1)
|
if (SET_XY == -1)
|
||||||
{
|
{
|
||||||
refreshAffineStartRegs(num, 0);
|
this->refreshAffineStartRegs<LAYERID, 0>();
|
||||||
refreshAffineStartRegs(num, 1);
|
this->refreshAffineStartRegs<LAYERID, 1>();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
BGxPARMS *params = (num == 2) ? &(dispx_st)->dispx_BG2PARMS : &(dispx_st)->dispx_BG3PARMS;
|
BGxPARMS *params = (LAYERID == GPULayerID_BG2) ? &(dispx_st)->dispx_BG2PARMS : &(dispx_st)->dispx_BG3PARMS;
|
||||||
|
|
||||||
if (xy == 0)
|
if (SET_XY == 0)
|
||||||
params->BGxX = affineInfo[num-2].x;
|
params->BGxX = this->affineInfo[LAYERID-2].x;
|
||||||
else
|
else
|
||||||
params->BGxY = affineInfo[num-2].y;
|
params->BGxY = this->affineInfo[LAYERID-2].y;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
|
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
|
||||||
|
@ -2737,6 +2780,8 @@ void GPUEngineBase::REG_DISPx_pack_test()
|
||||||
GPUEngineA::GPUEngineA()
|
GPUEngineA::GPUEngineA()
|
||||||
{
|
{
|
||||||
_engineID = GPUEngineID_Main;
|
_engineID = GPUEngineID_Main;
|
||||||
|
_paletteBG = (u16 *)MMU.ARM9_VMEM;
|
||||||
|
_paletteOBJ = (u16 *)(MMU.ARM9_VMEM + 0x200);
|
||||||
_oamList = (OAMAttributes *)(MMU.ARM9_OAM);
|
_oamList = (OAMAttributes *)(MMU.ARM9_OAM);
|
||||||
_sprMem = MMU_AOBJ;
|
_sprMem = MMU_AOBJ;
|
||||||
dispx_st = (REG_DISPx *)MMU.ARM9_REG;
|
dispx_st = (REG_DISPx *)MMU.ARM9_REG;
|
||||||
|
@ -2888,7 +2933,7 @@ void GPUEngineA::RenderLine(const u16 l, bool skip)
|
||||||
//bubble bobble revolution classic mode
|
//bubble bobble revolution classic mode
|
||||||
//NOTE:
|
//NOTE:
|
||||||
//I am REALLY unsatisfied with this logic now. But it seems to be working..
|
//I am REALLY unsatisfied with this logic now. But it seems to be working..
|
||||||
this->refreshAffineStartRegs(-1,-1);
|
this->refreshAffineStartRegs<(GPULayerID)-1, -1>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (skip)
|
if (skip)
|
||||||
|
@ -3019,7 +3064,7 @@ void GPUEngineA::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
|
||||||
this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDY_EVY][0];
|
this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDY_EVY][0];
|
||||||
this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDY_EVY][0];
|
this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDY_EVY][0];
|
||||||
|
|
||||||
const u16 backdrop_color = T1ReadWord(MMU.ARM9_VMEM, 0) & 0x7FFF;
|
const u16 backdrop_color = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF;
|
||||||
|
|
||||||
//we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing
|
//we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing
|
||||||
//this is currently eating up 2fps or so. it is a reasonable candidate for optimization.
|
//this is currently eating up 2fps or so. it is a reasonable candidate for optimization.
|
||||||
|
@ -3056,7 +3101,7 @@ void GPUEngineA::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
|
||||||
// init background color & priorities
|
// init background color & priorities
|
||||||
memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
memset(this->_sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
memset(this->_sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
memset(this->_sprPrio, 0xFF, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
memset(this->_sprPrio, 0x7F, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
memset(this->_sprWin, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
memset(this->_sprWin, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
|
|
||||||
// init pixels priorities
|
// init pixels priorities
|
||||||
|
@ -3275,7 +3320,6 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
|
||||||
cap_dst_adr &= 0x1FFFF;
|
cap_dst_adr &= 0x1FFFF;
|
||||||
cap_dst_adr += vramWriteBlock * GPU_VRAM_BLOCK_LINES * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16);
|
cap_dst_adr += vramWriteBlock * GPU_VRAM_BLOCK_LINES * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16);
|
||||||
|
|
||||||
// TODO: Make MMU.blank_memory and MMU.ARM9_LCD 16-byte aligned so that we can use aligned load/store for better performance.
|
|
||||||
const u16 *cap_src = (u16 *)MMU.blank_memory;
|
const u16 *cap_src = (u16 *)MMU.blank_memory;
|
||||||
u16 *cap_dst = (u16 *)(MMU.ARM9_LCD + cap_dst_adr);
|
u16 *cap_dst = (u16 *)(MMU.ARM9_LCD + cap_dst_adr);
|
||||||
|
|
||||||
|
@ -3507,7 +3551,7 @@ void GPUEngineA::_RenderLine_DispCapture_Copy(const u16 *__restrict src, u16 *__
|
||||||
if (CAPTUREFROMNATIVESRC)
|
if (CAPTUREFROMNATIVESRC)
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_SSE2
|
#ifdef ENABLE_SSE2
|
||||||
MACRODO_N(CAPTURELENGTH / (sizeof(__m128i) / sizeof(u16)), _mm_storeu_si128((__m128i *)dst + X, _mm_or_si128( _mm_loadu_si128( (__m128i *)src + X), alpha_vec128 ) ));
|
MACRODO_N(CAPTURELENGTH / (sizeof(__m128i) / sizeof(u16)), _mm_store_si128((__m128i *)dst + X, _mm_or_si128( _mm_load_si128( (__m128i *)src + X), alpha_vec128 ) ));
|
||||||
#else
|
#else
|
||||||
for (size_t i = 0; i < CAPTURELENGTH; i++)
|
for (size_t i = 0; i < CAPTURELENGTH; i++)
|
||||||
{
|
{
|
||||||
|
@ -3742,7 +3786,7 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *__restrict srcA, const
|
||||||
srcA[_gpuDstPitchIndex[i+1]],
|
srcA[_gpuDstPitchIndex[i+1]],
|
||||||
srcA[_gpuDstPitchIndex[i+0]]);
|
srcA[_gpuDstPitchIndex[i+0]]);
|
||||||
|
|
||||||
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_loadu_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[_gpuDstPitchIndex[i+7]],
|
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[_gpuDstPitchIndex[i+7]],
|
||||||
srcB[_gpuDstPitchIndex[i+6]],
|
srcB[_gpuDstPitchIndex[i+6]],
|
||||||
srcB[_gpuDstPitchIndex[i+5]],
|
srcB[_gpuDstPitchIndex[i+5]],
|
||||||
srcB[_gpuDstPitchIndex[i+4]],
|
srcB[_gpuDstPitchIndex[i+4]],
|
||||||
|
@ -3751,7 +3795,7 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *__restrict srcA, const
|
||||||
srcB[_gpuDstPitchIndex[i+1]],
|
srcB[_gpuDstPitchIndex[i+1]],
|
||||||
srcB[_gpuDstPitchIndex[i+0]]);
|
srcB[_gpuDstPitchIndex[i+0]]);
|
||||||
|
|
||||||
_mm_storeu_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
_mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (size_t i = 0; i < CAPTURELENGTH; i++)
|
for (size_t i = 0; i < CAPTURELENGTH; i++)
|
||||||
|
@ -3788,6 +3832,8 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *__restrict srcA, const
|
||||||
GPUEngineB::GPUEngineB()
|
GPUEngineB::GPUEngineB()
|
||||||
{
|
{
|
||||||
_engineID = GPUEngineID_Sub;
|
_engineID = GPUEngineID_Sub;
|
||||||
|
_paletteBG = (u16 *)(MMU.ARM9_VMEM + ADDRESS_STEP_1KB);
|
||||||
|
_paletteOBJ = (u16 *)(MMU.ARM9_VMEM + ADDRESS_STEP_1KB + 0x200);
|
||||||
_oamList = (OAMAttributes *)(MMU.ARM9_OAM + ADDRESS_STEP_1KB);
|
_oamList = (OAMAttributes *)(MMU.ARM9_OAM + ADDRESS_STEP_1KB);
|
||||||
_sprMem = MMU_BOBJ;
|
_sprMem = MMU_BOBJ;
|
||||||
dispx_st = (REG_DISPx *)(&MMU.ARM9_REG[REG_DISPB]);
|
dispx_st = (REG_DISPx *)(&MMU.ARM9_REG[REG_DISPB]);
|
||||||
|
@ -3837,7 +3883,7 @@ void GPUEngineB::RenderLine(const u16 l, bool skip)
|
||||||
//bubble bobble revolution classic mode
|
//bubble bobble revolution classic mode
|
||||||
//NOTE:
|
//NOTE:
|
||||||
//I am REALLY unsatisfied with this logic now. But it seems to be working..
|
//I am REALLY unsatisfied with this logic now. But it seems to be working..
|
||||||
this->refreshAffineStartRegs(-1,-1);
|
this->refreshAffineStartRegs<(GPULayerID)-1, -1>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (skip)
|
if (skip)
|
||||||
|
@ -3934,7 +3980,7 @@ void GPUEngineB::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
|
||||||
this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDY_EVY][0];
|
this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDY_EVY][0];
|
||||||
this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDY_EVY][0];
|
this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDY_EVY][0];
|
||||||
|
|
||||||
const u16 backdrop_color = T1ReadWord(MMU.ARM9_VMEM, ADDRESS_STEP_1KB) & 0x7FFF;
|
const u16 backdrop_color = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF;
|
||||||
|
|
||||||
//we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing
|
//we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing
|
||||||
//this is currently eating up 2fps or so. it is a reasonable candidate for optimization.
|
//this is currently eating up 2fps or so. it is a reasonable candidate for optimization.
|
||||||
|
@ -3971,7 +4017,7 @@ void GPUEngineB::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
|
||||||
// init background color & priorities
|
// init background color & priorities
|
||||||
memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
memset(this->_sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
memset(this->_sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
memset(this->_sprPrio, 0xFF, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
memset(this->_sprPrio, 0x7F, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
memset(this->_sprWin, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
memset(this->_sprWin, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
|
||||||
|
|
||||||
// init pixels priorities
|
// init pixels priorities
|
||||||
|
@ -4029,9 +4075,6 @@ void GPUEngineB::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
|
||||||
struct _BGxCNT *bgCnt = &(this->dispx_st)->dispx_BGxCNT[layerID].bits;
|
struct _BGxCNT *bgCnt = &(this->dispx_st)->dispx_BGxCNT[layerID].bits;
|
||||||
this->_curr_mosaic_enabled = bgCnt->Mosaic_Enable;
|
this->_curr_mosaic_enabled = bgCnt->Mosaic_Enable;
|
||||||
|
|
||||||
//useful for debugging individual layers
|
|
||||||
//if(this->core == GPUEngineID_Sub || layerNum != 2) continue;
|
|
||||||
|
|
||||||
#ifndef DISABLE_MOSAIC
|
#ifndef DISABLE_MOSAIC
|
||||||
if (this->_curr_mosaic_enabled)
|
if (this->_curr_mosaic_enabled)
|
||||||
{
|
{
|
||||||
|
@ -4531,10 +4574,6 @@ void GPUSubsystem::RenderLine(const u16 l, bool skip)
|
||||||
this->_engineSub->RenderLine<false>(l, skip);
|
this->_engineSub->RenderLine<false>(l, skip);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (l == 191)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551)
|
void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551)
|
||||||
|
@ -4581,3 +4620,18 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
|
||||||
this->_gpu = (theID == GPUEngineID_Main) ? (GPUEngineBase *)GPU->GetEngineMain() : (GPUEngineBase *)GPU->GetEngineSub();
|
this->_gpu = (theID == GPUEngineID_Main) ? (GPUEngineBase *)GPU->GetEngineMain() : (GPUEngineBase *)GPU->GetEngineSub();
|
||||||
this->_gpu->SetDisplayByID(this->_ID);
|
this->_gpu->SetDisplayByID(this->_ID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template void GPUEngineBase::setAffineStart<GPULayerID_BG2, 0>(u32 val);
|
||||||
|
template void GPUEngineBase::setAffineStart<GPULayerID_BG2, 1>(u32 val);
|
||||||
|
template void GPUEngineBase::setAffineStart<GPULayerID_BG3, 0>(u32 val);
|
||||||
|
template void GPUEngineBase::setAffineStart<GPULayerID_BG3, 1>(u32 val);
|
||||||
|
|
||||||
|
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG2, 0, false>(u16 val);
|
||||||
|
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG2, 0, true>(u16 val);
|
||||||
|
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG2, 1, false>(u16 val);
|
||||||
|
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG2, 1, true>(u16 val);
|
||||||
|
|
||||||
|
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG3, 0, false>(u16 val);
|
||||||
|
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG3, 0, true>(u16 val);
|
||||||
|
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG3, 1, false>(u16 val);
|
||||||
|
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG3, 1, true>(u16 val);
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
class GPUEngineBase;
|
||||||
class EMUFILE;
|
class EMUFILE;
|
||||||
struct MMU_struct;
|
struct MMU_struct;
|
||||||
|
|
||||||
|
@ -44,6 +45,8 @@ struct MMU_struct;
|
||||||
void gpu_savestate(EMUFILE* os);
|
void gpu_savestate(EMUFILE* os);
|
||||||
bool gpu_loadstate(EMUFILE* is, int size);
|
bool gpu_loadstate(EMUFILE* is, int size);
|
||||||
|
|
||||||
|
typedef void (*rot_fun)(GPUEngineBase *gpu, const s32 auxX, const s32 auxY, const int lg, const u32 map, const u32 tile, const u16 *pal, const size_t i);
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
this structure is for display control,
|
this structure is for display control,
|
||||||
it holds flags for general display
|
it holds flags for general display
|
||||||
|
@ -751,12 +754,14 @@ protected:
|
||||||
} _mosaicColors;
|
} _mosaicColors;
|
||||||
|
|
||||||
GPUEngineID _engineID;
|
GPUEngineID _engineID;
|
||||||
|
u16 *_paletteBG;
|
||||||
|
u16 *_paletteOBJ;
|
||||||
|
OAMAttributes *_oamList;
|
||||||
|
u32 _sprMem;
|
||||||
|
|
||||||
u8 _bgPrio[5];
|
u8 _bgPrio[5];
|
||||||
bool _bg0HasHighestPrio;
|
bool _bg0HasHighestPrio;
|
||||||
|
|
||||||
OAMAttributes *_oamList;
|
|
||||||
u32 _sprMem;
|
|
||||||
u8 _sprBoundary;
|
u8 _sprBoundary;
|
||||||
u8 _sprBMPBoundary;
|
u8 _sprBMPBoundary;
|
||||||
u8 _sprBMPMode;
|
u8 _sprBMPMode;
|
||||||
|
@ -833,6 +838,9 @@ protected:
|
||||||
void _MosaicSpriteLinePixel(const size_t x, u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
|
void _MosaicSpriteLinePixel(const size_t x, u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
|
||||||
void _MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
|
void _MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
|
||||||
|
|
||||||
|
template<rot_fun fun, bool WRAP> void _rot_scale_op(const BGxPARMS ¶m, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal);
|
||||||
|
template<GPULayerID LAYERID, rot_fun fun> void _apply_rot_fun(const BGxPARMS ¶m, const u16 LG, const u32 map, const u32 tile, const u16 *pal);
|
||||||
|
|
||||||
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineLarge8bpp();
|
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineLarge8bpp();
|
||||||
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG);
|
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG);
|
||||||
|
|
||||||
|
@ -843,7 +851,6 @@ protected:
|
||||||
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineRot();
|
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineRot();
|
||||||
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineExtRot();
|
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineExtRot();
|
||||||
|
|
||||||
// check whether (x,y) is within the rectangle (including wraparounds)
|
|
||||||
template<int WIN_NUM> u8 _WithinRect(const size_t x) const;
|
template<int WIN_NUM> u8 _WithinRect(const size_t x) const;
|
||||||
template <GPULayerID LAYERID> void _RenderLine_CheckWindows(const size_t srcX, bool &draw, bool &effect) const;
|
template <GPULayerID LAYERID> void _RenderLine_CheckWindows(const size_t srcX, bool &draw, bool &effect) const;
|
||||||
|
|
||||||
|
@ -890,7 +897,7 @@ public:
|
||||||
void SetupFinalPixelBlitter();
|
void SetupFinalPixelBlitter();
|
||||||
|
|
||||||
void SetVideoProp(const u32 ctrlBits);
|
void SetVideoProp(const u32 ctrlBits);
|
||||||
void SetBGProp(const size_t num, const u16 ctrlBits);
|
template<GPULayerID LAYERID> void SetBGProp(const u16 ctrlBits);
|
||||||
|
|
||||||
template<bool ISCUSTOMRENDERINGNEEDED> void RenderLine(const u16 l, bool skip);
|
template<bool ISCUSTOMRENDERINGNEEDED> void RenderLine(const u16 l, bool skip);
|
||||||
|
|
||||||
|
@ -945,10 +952,10 @@ public:
|
||||||
void UpdateVRAM3DUsageProperties_BGLayer(const size_t bankIndex, VRAM3DUsageProperties &outProperty);
|
void UpdateVRAM3DUsageProperties_BGLayer(const size_t bankIndex, VRAM3DUsageProperties &outProperty);
|
||||||
void UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex, VRAM3DUsageProperties &outProperty);
|
void UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex, VRAM3DUsageProperties &outProperty);
|
||||||
|
|
||||||
void setAffineStart(const size_t layer, int xy, u32 val);
|
template<GPULayerID LAYERID, int SET_XY> void setAffineStart(u32 val);
|
||||||
void setAffineStartWord(const size_t layer, int xy, u16 val, int word);
|
template<GPULayerID LAYERID, int SET_XY, bool HIWORD> void setAffineStartWord(u16 val);
|
||||||
u32 getAffineStart(const size_t layer, int xy);
|
template<GPULayerID LAYERID, int SET_XY> u32 getAffineStart();
|
||||||
void refreshAffineStartRegs(const int num, const int xy);
|
template<GPULayerID LAYERID, int SET_XY> void refreshAffineStartRegs();
|
||||||
|
|
||||||
void SpriteRender(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
|
void SpriteRender(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
|
||||||
void ModeRenderDebug(const GPULayerID layerID);
|
void ModeRenderDebug(const GPULayerID layerID);
|
||||||
|
|
|
@ -305,7 +305,7 @@ struct TVramBankInfo {
|
||||||
u8 page_addr, num_pages;
|
u8 page_addr, num_pages;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const TVramBankInfo vram_bank_info[VRAM_BANKS] = {
|
static const TVramBankInfo vram_bank_info[VRAM_BANK_COUNT] = {
|
||||||
{0,8},
|
{0,8},
|
||||||
{8,8},
|
{8,8},
|
||||||
{16,8},
|
{16,8},
|
||||||
|
@ -483,7 +483,7 @@ std::string VramConfiguration::describePurpose(Purpose p) {
|
||||||
|
|
||||||
std::string VramConfiguration::describe() {
|
std::string VramConfiguration::describe() {
|
||||||
std::stringstream ret;
|
std::stringstream ret;
|
||||||
for(int i=0;i<VRAM_BANKS;i++) {
|
for(int i=0;i<VRAM_BANK_COUNT;i++) {
|
||||||
ret << (char)(i+'A') << ": " << banks[i].ofs << " " << describePurpose(banks[i].purpose) << std::endl;
|
ret << (char)(i+'A') << ": " << banks[i].ofs << " " << describePurpose(banks[i].purpose) << std::endl;
|
||||||
}
|
}
|
||||||
return ret.str();
|
return ret.str();
|
||||||
|
@ -514,49 +514,46 @@ static inline u8* MMU_vram_physical(const int page)
|
||||||
return MMU.ARM9_LCD + (page*ADDRESS_STEP_16KB);
|
return MMU.ARM9_LCD + (page*ADDRESS_STEP_16KB);
|
||||||
}
|
}
|
||||||
|
|
||||||
//todo - templateize
|
template <VRAMBankID VRAMBANK>
|
||||||
static inline void MMU_VRAMmapRefreshBank(const int bank)
|
static inline void MMU_VRAMmapRefreshBank()
|
||||||
{
|
{
|
||||||
int block = bank;
|
const size_t block = (VRAMBANK >= VRAM_BANK_H) ? VRAMBANK + 1 : VRAMBANK;
|
||||||
if(bank >= VRAM_BANK_H) block++;
|
|
||||||
|
VRAMCNT VRAMBankCnt;
|
||||||
u8 VRAMBankCnt = T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x240 + block);
|
VRAMBankCnt.value = T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x240 + block);
|
||||||
|
|
||||||
//do nothing if the bank isnt enabled
|
//do nothing if the bank isnt enabled
|
||||||
u8 en = VRAMBankCnt & 0x80;
|
if(VRAMBankCnt.Enable == 0) return;
|
||||||
if(!en) return;
|
|
||||||
|
|
||||||
int mst,ofs=0;
|
switch(VRAMBANK) {
|
||||||
switch(bank) {
|
|
||||||
case VRAM_BANK_A:
|
case VRAM_BANK_A:
|
||||||
case VRAM_BANK_B:
|
case VRAM_BANK_B:
|
||||||
mst = VRAMBankCnt & 3;
|
assert(VRAMBankCnt.MST == VRAMBankCnt.MST_ABHI);
|
||||||
ofs = (VRAMBankCnt>>3) & 3;
|
switch(VRAMBankCnt.MST_ABHI)
|
||||||
switch(mst)
|
|
||||||
{
|
{
|
||||||
case 0: //LCDC
|
case 0: //LCDC
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
|
||||||
MMU_vram_lcdc(bank);
|
MMU_vram_lcdc(VRAMBANK);
|
||||||
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs);
|
if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST_ABHI, VRAMBankCnt.OFS);
|
||||||
break;
|
break;
|
||||||
case 1: //ABG
|
case 1: //ABG
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::ABG;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABG;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_ABG+ofs*8);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG+VRAMBankCnt.OFS*8);
|
||||||
break;
|
break;
|
||||||
case 2: //AOBJ
|
case 2: //AOBJ
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::AOBJ;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::AOBJ;
|
||||||
switch(ofs) {
|
switch(VRAMBankCnt.OFS) {
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_AOBJ+ofs*8);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_AOBJ+VRAMBankCnt.OFS*8);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PROGINFO("Unsupported ofs setting %d for engine A OBJ vram bank %c\n", ofs, 'A'+bank);
|
PROGINFO("Unsupported ofs setting %d for engine A OBJ vram bank %c\n", VRAMBankCnt.OFS, 'A'+VRAMBANK);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 3: //texture
|
case 3: //texture
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::TEX;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::TEX;
|
||||||
MMU.texInfo.textureSlotAddr[ofs] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.texInfo.textureSlotAddr[VRAMBankCnt.OFS] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
break;
|
break;
|
||||||
default: goto unsupported_mst;
|
default: goto unsupported_mst;
|
||||||
}
|
}
|
||||||
|
@ -564,78 +561,75 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
|
||||||
|
|
||||||
case VRAM_BANK_C:
|
case VRAM_BANK_C:
|
||||||
case VRAM_BANK_D:
|
case VRAM_BANK_D:
|
||||||
mst = VRAMBankCnt & 7;
|
switch(VRAMBankCnt.MST)
|
||||||
ofs = (VRAMBankCnt>>3) & 3;
|
|
||||||
switch(mst)
|
|
||||||
{
|
{
|
||||||
case 0: //LCDC
|
case 0: //LCDC
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
|
||||||
MMU_vram_lcdc(bank);
|
MMU_vram_lcdc(VRAMBANK);
|
||||||
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs);
|
if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
|
||||||
break;
|
break;
|
||||||
case 1: //ABG
|
case 1: //ABG
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::ABG;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABG;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_ABG+ofs*8);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG+VRAMBankCnt.OFS*8);
|
||||||
break;
|
break;
|
||||||
case 2: //arm7
|
case 2: //arm7
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::ARM7;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ARM7;
|
||||||
if(bank == 2) T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240, T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240) | 1);
|
if(VRAMBANK == 2) T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240, T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240) | 1);
|
||||||
if(bank == 3) T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240, T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240) | 2);
|
if(VRAMBANK == 3) T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240, T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240) | 2);
|
||||||
//printf("DING!\n");
|
//printf("DING!\n");
|
||||||
switch(ofs) {
|
switch(VRAMBankCnt.OFS) {
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
vram_arm7_map[ofs] = vram_bank_info[bank].page_addr;
|
vram_arm7_map[VRAMBankCnt.OFS] = vram_bank_info[VRAMBANK].page_addr;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PROGINFO("Unsupported ofs setting %d for arm7 vram bank %c\n", ofs, 'A'+bank);
|
PROGINFO("Unsupported ofs setting %d for arm7 vram bank %c\n", VRAMBankCnt.OFS, 'A'+VRAMBANK);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case 3: //texture
|
case 3: //texture
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::TEX;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::TEX;
|
||||||
MMU.texInfo.textureSlotAddr[ofs] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.texInfo.textureSlotAddr[VRAMBankCnt.OFS] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
break;
|
break;
|
||||||
case 4: //BGB or BOBJ
|
case 4: //BGB or BOBJ
|
||||||
if(bank == VRAM_BANK_C) {
|
if(VRAMBANK == VRAM_BANK_C) {
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::BBG;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BBG;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_BBG); //BBG
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG); //BBG
|
||||||
} else {
|
} else {
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::BOBJ;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BOBJ;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_BOBJ); //BOBJ
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BOBJ); //BOBJ
|
||||||
}
|
}
|
||||||
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs);
|
if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
|
||||||
break;
|
break;
|
||||||
default: goto unsupported_mst;
|
default: goto unsupported_mst;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VRAM_BANK_E:
|
case VRAM_BANK_E:
|
||||||
mst = VRAMBankCnt & 7;
|
if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
|
||||||
if(((VRAMBankCnt>>3)&3) != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs);
|
switch(VRAMBankCnt.MST) {
|
||||||
switch(mst) {
|
|
||||||
case 0: //LCDC
|
case 0: //LCDC
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
|
||||||
MMU_vram_lcdc(bank);
|
MMU_vram_lcdc(VRAMBANK);
|
||||||
break;
|
break;
|
||||||
case 1: //ABG
|
case 1: //ABG
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::ABG;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABG;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_ABG);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG);
|
||||||
break;
|
break;
|
||||||
case 2: //AOBJ
|
case 2: //AOBJ
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::AOBJ;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::AOBJ;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_AOBJ);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_AOBJ);
|
||||||
break;
|
break;
|
||||||
case 3: //texture palette
|
case 3: //texture palette
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::TEXPAL;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::TEXPAL;
|
||||||
MMU.texInfo.texPalSlot[0] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.texInfo.texPalSlot[0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
MMU.texInfo.texPalSlot[1] = MMU_vram_physical(vram_bank_info[bank].page_addr+1);
|
MMU.texInfo.texPalSlot[1] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr+1);
|
||||||
MMU.texInfo.texPalSlot[2] = MMU_vram_physical(vram_bank_info[bank].page_addr+2);
|
MMU.texInfo.texPalSlot[2] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr+2);
|
||||||
MMU.texInfo.texPalSlot[3] = MMU_vram_physical(vram_bank_info[bank].page_addr+3);
|
MMU.texInfo.texPalSlot[3] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr+3);
|
||||||
break;
|
break;
|
||||||
case 4: //A BG extended palette
|
case 4: //A BG extended palette
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::ABGEXTPAL;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABGEXTPAL;
|
||||||
MMU.ExtPal[0][0] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.ExtPal[0][0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
MMU.ExtPal[0][1] = MMU.ExtPal[0][0] + ADDRESS_STEP_8KB;
|
MMU.ExtPal[0][1] = MMU.ExtPal[0][0] + ADDRESS_STEP_8KB;
|
||||||
MMU.ExtPal[0][2] = MMU.ExtPal[0][1] + ADDRESS_STEP_8KB;
|
MMU.ExtPal[0][2] = MMU.ExtPal[0][1] + ADDRESS_STEP_8KB;
|
||||||
MMU.ExtPal[0][3] = MMU.ExtPal[0][2] + ADDRESS_STEP_8KB;
|
MMU.ExtPal[0][3] = MMU.ExtPal[0][2] + ADDRESS_STEP_8KB;
|
||||||
|
@ -646,50 +640,48 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
|
||||||
|
|
||||||
case VRAM_BANK_F:
|
case VRAM_BANK_F:
|
||||||
case VRAM_BANK_G: {
|
case VRAM_BANK_G: {
|
||||||
mst = VRAMBankCnt & 7;
|
|
||||||
ofs = (VRAMBankCnt>>3) & 3;
|
|
||||||
const int pageofslut[] = {0,1,4,5};
|
const int pageofslut[] = {0,1,4,5};
|
||||||
const int pageofs = pageofslut[ofs];
|
const int pageofs = pageofslut[VRAMBankCnt.OFS];
|
||||||
switch(mst)
|
switch(VRAMBankCnt.MST)
|
||||||
{
|
{
|
||||||
case 0: //LCDC
|
case 0: //LCDC
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
|
||||||
MMU_vram_lcdc(bank);
|
MMU_vram_lcdc(VRAMBANK);
|
||||||
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs);
|
if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
|
||||||
break;
|
break;
|
||||||
case 1: //ABG
|
case 1: //ABG
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::ABG;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABG;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_ABG+pageofs);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG+pageofs);
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_ABG+pageofs+2); //unexpected mirroring (required by spyro eternal night)
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG+pageofs+2); //unexpected mirroring (required by spyro eternal night)
|
||||||
break;
|
break;
|
||||||
case 2: //AOBJ
|
case 2: //AOBJ
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::AOBJ;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::AOBJ;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_AOBJ+pageofs);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_AOBJ+pageofs);
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_AOBJ+pageofs+2); //unexpected mirroring - I have no proof, but it is inferred from the ABG above
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_AOBJ+pageofs+2); //unexpected mirroring - I have no proof, but it is inferred from the ABG above
|
||||||
break;
|
break;
|
||||||
case 3: //texture palette
|
case 3: //texture palette
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::TEXPAL;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::TEXPAL;
|
||||||
MMU.texInfo.texPalSlot[pageofs] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.texInfo.texPalSlot[pageofs] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
break;
|
break;
|
||||||
case 4: //A BG extended palette
|
case 4: //A BG extended palette
|
||||||
switch(ofs) {
|
switch(VRAMBankCnt.OFS) {
|
||||||
case 0:
|
case 0:
|
||||||
case 1:
|
case 1:
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::ABGEXTPAL;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABGEXTPAL;
|
||||||
MMU.ExtPal[0][ofs*2] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.ExtPal[0][VRAMBankCnt.OFS*2] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
MMU.ExtPal[0][ofs*2+1] = MMU.ExtPal[0][ofs*2] + ADDRESS_STEP_8KB;
|
MMU.ExtPal[0][VRAMBankCnt.OFS*2+1] = MMU.ExtPal[0][VRAMBankCnt.OFS*2] + ADDRESS_STEP_8KB;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::INVALID;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::INVALID;
|
||||||
PROGINFO("Unsupported ofs setting %d for engine A bgextpal vram bank %c\n", ofs, 'A'+bank);
|
PROGINFO("Unsupported ofs setting %d for engine A bgextpal vram bank %c\n", VRAMBankCnt.OFS, 'A'+VRAMBANK);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 5: //A OBJ extended palette
|
case 5: //A OBJ extended palette
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::AOBJEXTPAL;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::AOBJEXTPAL;
|
||||||
MMU.ObjExtPal[0][0] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.ObjExtPal[0][0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
MMU.ObjExtPal[0][1] = MMU.ObjExtPal[0][1] + ADDRESS_STEP_8KB;
|
MMU.ObjExtPal[0][1] = MMU.ObjExtPal[0][1] + ADDRESS_STEP_8KB;
|
||||||
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs);
|
if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
|
||||||
break;
|
break;
|
||||||
default: goto unsupported_mst;
|
default: goto unsupported_mst;
|
||||||
}
|
}
|
||||||
|
@ -697,22 +689,22 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
|
||||||
}
|
}
|
||||||
|
|
||||||
case VRAM_BANK_H:
|
case VRAM_BANK_H:
|
||||||
mst = VRAMBankCnt & 3;
|
assert(VRAMBankCnt.MST == VRAMBankCnt.MST_ABHI);
|
||||||
if(((VRAMBankCnt>>3)&3) != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs);
|
if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST_ABHI, VRAMBankCnt.OFS);
|
||||||
switch(mst)
|
switch(VRAMBankCnt.MST_ABHI)
|
||||||
{
|
{
|
||||||
case 0: //LCDC
|
case 0: //LCDC
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
|
||||||
MMU_vram_lcdc(bank);
|
MMU_vram_lcdc(VRAMBANK);
|
||||||
break;
|
break;
|
||||||
case 1: //BBG
|
case 1: //BBG
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::BBG;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BBG;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_BBG);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG);
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_BBG + 4); //unexpected mirroring
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG + 4); //unexpected mirroring
|
||||||
break;
|
break;
|
||||||
case 2: //B BG extended palette
|
case 2: //B BG extended palette
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::BBGEXTPAL;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BBGEXTPAL;
|
||||||
MMU.ExtPal[1][0] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.ExtPal[1][0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
MMU.ExtPal[1][1] = MMU.ExtPal[1][0] + ADDRESS_STEP_8KB;
|
MMU.ExtPal[1][1] = MMU.ExtPal[1][0] + ADDRESS_STEP_8KB;
|
||||||
MMU.ExtPal[1][2] = MMU.ExtPal[1][1] + ADDRESS_STEP_8KB;
|
MMU.ExtPal[1][2] = MMU.ExtPal[1][1] + ADDRESS_STEP_8KB;
|
||||||
MMU.ExtPal[1][3] = MMU.ExtPal[1][2] + ADDRESS_STEP_8KB;
|
MMU.ExtPal[1][3] = MMU.ExtPal[1][2] + ADDRESS_STEP_8KB;
|
||||||
|
@ -722,27 +714,27 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VRAM_BANK_I:
|
case VRAM_BANK_I:
|
||||||
mst = VRAMBankCnt & 3;
|
assert(VRAMBankCnt.MST == VRAMBankCnt.MST_ABHI);
|
||||||
if(((VRAMBankCnt>>3)&3) != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs);
|
if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST_ABHI, VRAMBankCnt.OFS);
|
||||||
switch(mst)
|
switch(VRAMBankCnt.MST_ABHI)
|
||||||
{
|
{
|
||||||
case 0: //LCDC
|
case 0: //LCDC
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
|
||||||
MMU_vram_lcdc(bank);
|
MMU_vram_lcdc(VRAMBANK);
|
||||||
break;
|
break;
|
||||||
case 1: //BBG
|
case 1: //BBG
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::BBG;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BBG;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_BBG+2);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG+2);
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_BBG+3); //unexpected mirroring
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG+3); //unexpected mirroring
|
||||||
break;
|
break;
|
||||||
case 2: //BOBJ
|
case 2: //BOBJ
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::BOBJ;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BOBJ;
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_BOBJ);
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BOBJ);
|
||||||
MMU_vram_arm9(bank,VRAM_PAGE_BOBJ+1); //FF3 end scene (lens flare sprite) needs this as it renders a sprite off the end of the 16KB and back around
|
MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BOBJ+1); //FF3 end scene (lens flare sprite) needs this as it renders a sprite off the end of the 16KB and back around
|
||||||
break;
|
break;
|
||||||
case 3: //B OBJ extended palette
|
case 3: //B OBJ extended palette
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::BOBJEXTPAL;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BOBJEXTPAL;
|
||||||
MMU.ObjExtPal[1][0] = MMU_vram_physical(vram_bank_info[bank].page_addr);
|
MMU.ObjExtPal[1][0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
|
||||||
MMU.ObjExtPal[1][1] = MMU.ObjExtPal[1][1] + ADDRESS_STEP_8KB;
|
MMU.ObjExtPal[1][1] = MMU.ObjExtPal[1][1] + ADDRESS_STEP_8KB;
|
||||||
break;
|
break;
|
||||||
default: goto unsupported_mst;
|
default: goto unsupported_mst;
|
||||||
|
@ -750,15 +742,15 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
} //switch(bank)
|
} //switch(VRAMBANK)
|
||||||
|
|
||||||
vramConfiguration.banks[bank].ofs = ofs;
|
vramConfiguration.banks[VRAMBANK].ofs = VRAMBankCnt.OFS;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
unsupported_mst:
|
unsupported_mst:
|
||||||
vramConfiguration.banks[bank].purpose = VramConfiguration::INVALID;
|
vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::INVALID;
|
||||||
PROGINFO("Unsupported mst setting %d for vram bank %c\n", mst, 'A'+bank);
|
PROGINFO("Unsupported mst setting %d for vram bank %c\n", VRAMBankCnt.MST, 'A'+VRAMBANK);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MMU_VRAM_unmap_all()
|
void MMU_VRAM_unmap_all()
|
||||||
|
@ -821,19 +813,19 @@ static inline void MMU_VRAMmapControl(u8 block, u8 VRAMBankCnt)
|
||||||
//goblet of fire "care of magical creatures" maps I and D to BOBJ (the I is an accident)
|
//goblet of fire "care of magical creatures" maps I and D to BOBJ (the I is an accident)
|
||||||
//and requires A to override it.
|
//and requires A to override it.
|
||||||
//This may create other bugs....
|
//This may create other bugs....
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_I);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_I>();
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_H);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_H>();
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_G);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_G>();
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_F);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_F>();
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_E);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_E>();
|
||||||
//zero 21-jun-2012
|
//zero 21-jun-2012
|
||||||
//tomwi's streaming music demo sets A and D to ABG (the A is an accident).
|
//tomwi's streaming music demo sets A and D to ABG (the A is an accident).
|
||||||
//in this case, D should get priority.
|
//in this case, D should get priority.
|
||||||
//this is somewhat risky. will it break other things?
|
//this is somewhat risky. will it break other things?
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_A);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_A>();
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_B);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_B>();
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_C);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_C>();
|
||||||
MMU_VRAMmapRefreshBank(VRAM_BANK_D);
|
MMU_VRAMmapRefreshBank<VRAM_BANK_D>();
|
||||||
|
|
||||||
//printf(vramConfiguration.describe().c_str());
|
//printf(vramConfiguration.describe().c_str());
|
||||||
//printf("vram remapped at vcount=%d\n",nds.VCount);
|
//printf("vram remapped at vcount=%d\n",nds.VCount);
|
||||||
|
@ -912,6 +904,8 @@ void MMU_Init(void)
|
||||||
LOG("MMU init\n");
|
LOG("MMU init\n");
|
||||||
|
|
||||||
memset(&MMU, 0, sizeof(MMU_struct));
|
memset(&MMU, 0, sizeof(MMU_struct));
|
||||||
|
|
||||||
|
MMU.blank_memory = &MMU.ARM9_LCD[0xA4000];
|
||||||
|
|
||||||
//MMU.DTCMRegion = 0x027C0000;
|
//MMU.DTCMRegion = 0x027C0000;
|
||||||
//even though apps may change dtcm immediately upon startup, this is the correct hardware starting value:
|
//even though apps may change dtcm immediately upon startup, this is the correct hardware starting value:
|
||||||
|
@ -961,7 +955,6 @@ void MMU_Reset()
|
||||||
memset(MMU.ARM9_VMEM, 0, sizeof(MMU.ARM9_VMEM));
|
memset(MMU.ARM9_VMEM, 0, sizeof(MMU.ARM9_VMEM));
|
||||||
memset(MMU.MAIN_MEM, 0, sizeof(MMU.MAIN_MEM));
|
memset(MMU.MAIN_MEM, 0, sizeof(MMU.MAIN_MEM));
|
||||||
|
|
||||||
memset(MMU.blank_memory, 0, sizeof(MMU.blank_memory));
|
|
||||||
memset(MMU.UNUSED_RAM, 0, sizeof(MMU.UNUSED_RAM));
|
memset(MMU.UNUSED_RAM, 0, sizeof(MMU.UNUSED_RAM));
|
||||||
memset(MMU.MORE_UNUSED_RAM, 0, sizeof(MMU.UNUSED_RAM));
|
memset(MMU.MORE_UNUSED_RAM, 0, sizeof(MMU.UNUSED_RAM));
|
||||||
|
|
||||||
|
@ -3591,22 +3584,22 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
|
||||||
val &= 0x7F7F;
|
val &= 0x7F7F;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case REG_DISPA_BG2XL: mainEngine->setAffineStartWord(2,0,val,0); break;
|
case REG_DISPA_BG2XL: mainEngine->setAffineStartWord<GPULayerID_BG2, 0, false>(val); break;
|
||||||
case REG_DISPA_BG2XH: mainEngine->setAffineStartWord(2,0,val,1); break;
|
case REG_DISPA_BG2XH: mainEngine->setAffineStartWord<GPULayerID_BG2, 0, true>(val); break;
|
||||||
case REG_DISPA_BG2YL: mainEngine->setAffineStartWord(2,1,val,0); break;
|
case REG_DISPA_BG2YL: mainEngine->setAffineStartWord<GPULayerID_BG2, 1, false>(val); break;
|
||||||
case REG_DISPA_BG2YH: mainEngine->setAffineStartWord(2,1,val,1); break;
|
case REG_DISPA_BG2YH: mainEngine->setAffineStartWord<GPULayerID_BG2, 1, true>(val); break;
|
||||||
case REG_DISPA_BG3XL: mainEngine->setAffineStartWord(3,0,val,0); break;
|
case REG_DISPA_BG3XL: mainEngine->setAffineStartWord<GPULayerID_BG3, 0, false>(val); break;
|
||||||
case REG_DISPA_BG3XH: mainEngine->setAffineStartWord(3,0,val,1); break;
|
case REG_DISPA_BG3XH: mainEngine->setAffineStartWord<GPULayerID_BG3, 0, true>(val); break;
|
||||||
case REG_DISPA_BG3YL: mainEngine->setAffineStartWord(3,1,val,0); break;
|
case REG_DISPA_BG3YL: mainEngine->setAffineStartWord<GPULayerID_BG3, 1, false>(val); break;
|
||||||
case REG_DISPA_BG3YH: mainEngine->setAffineStartWord(3,1,val,1); break;
|
case REG_DISPA_BG3YH: mainEngine->setAffineStartWord<GPULayerID_BG3, 1, true>(val); break;
|
||||||
case REG_DISPB_BG2XL: subEngine->setAffineStartWord(2,0,val,0); break;
|
case REG_DISPB_BG2XL: subEngine->setAffineStartWord<GPULayerID_BG2, 0, false>(val); break;
|
||||||
case REG_DISPB_BG2XH: subEngine->setAffineStartWord(2,0,val,1); break;
|
case REG_DISPB_BG2XH: subEngine->setAffineStartWord<GPULayerID_BG2, 0, true>(val); break;
|
||||||
case REG_DISPB_BG2YL: subEngine->setAffineStartWord(2,1,val,0); break;
|
case REG_DISPB_BG2YL: subEngine->setAffineStartWord<GPULayerID_BG2, 1, false>(val); break;
|
||||||
case REG_DISPB_BG2YH: subEngine->setAffineStartWord(2,1,val,1); break;
|
case REG_DISPB_BG2YH: subEngine->setAffineStartWord<GPULayerID_BG2, 1, true>(val); break;
|
||||||
case REG_DISPB_BG3XL: subEngine->setAffineStartWord(3,0,val,0); break;
|
case REG_DISPB_BG3XL: subEngine->setAffineStartWord<GPULayerID_BG3, 0, false>(val); break;
|
||||||
case REG_DISPB_BG3XH: subEngine->setAffineStartWord(3,0,val,1); break;
|
case REG_DISPB_BG3XH: subEngine->setAffineStartWord<GPULayerID_BG3, 0, true>(val); break;
|
||||||
case REG_DISPB_BG3YL: subEngine->setAffineStartWord(3,1,val,0); break;
|
case REG_DISPB_BG3YL: subEngine->setAffineStartWord<GPULayerID_BG3, 1, false>(val); break;
|
||||||
case REG_DISPB_BG3YH: subEngine->setAffineStartWord(3,1,val,1); break;
|
case REG_DISPB_BG3YH: subEngine->setAffineStartWord<GPULayerID_BG3, 1, true>(val); break;
|
||||||
|
|
||||||
case REG_DISPA_DISP3DCNT: writereg_DISP3DCNT(16,adr,val); return;
|
case REG_DISPA_DISP3DCNT: writereg_DISP3DCNT(16,adr,val); return;
|
||||||
|
|
||||||
|
@ -3815,42 +3808,42 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
|
||||||
|
|
||||||
case REG_DISPA_BG0CNT :
|
case REG_DISPA_BG0CNT :
|
||||||
//GPULOG("MAIN BG0 SETPROP 16B %08X\r\n", val);
|
//GPULOG("MAIN BG0 SETPROP 16B %08X\r\n", val);
|
||||||
mainEngine->SetBGProp(0, val);
|
mainEngine->SetBGProp<GPULayerID_BG0>(val);
|
||||||
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x8, val);
|
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x8, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPA_BG1CNT :
|
case REG_DISPA_BG1CNT :
|
||||||
//GPULOG("MAIN BG1 SETPROP 16B %08X\r\n", val);
|
//GPULOG("MAIN BG1 SETPROP 16B %08X\r\n", val);
|
||||||
mainEngine->SetBGProp(1, val);
|
mainEngine->SetBGProp<GPULayerID_BG1>(val);
|
||||||
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xA, val);
|
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xA, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPA_BG2CNT :
|
case REG_DISPA_BG2CNT :
|
||||||
//GPULOG("MAIN BG2 SETPROP 16B %08X\r\n", val);
|
//GPULOG("MAIN BG2 SETPROP 16B %08X\r\n", val);
|
||||||
mainEngine->SetBGProp(2, val);
|
mainEngine->SetBGProp<GPULayerID_BG2>(val);
|
||||||
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xC, val);
|
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xC, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPA_BG3CNT :
|
case REG_DISPA_BG3CNT :
|
||||||
//GPULOG("MAIN BG3 SETPROP 16B %08X\r\n", val);
|
//GPULOG("MAIN BG3 SETPROP 16B %08X\r\n", val);
|
||||||
mainEngine->SetBGProp(3, val);
|
mainEngine->SetBGProp<GPULayerID_BG3>(val);
|
||||||
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xE, val);
|
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xE, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG0CNT :
|
case REG_DISPB_BG0CNT :
|
||||||
//GPULOG("SUB BG0 SETPROP 16B %08X\r\n", val);
|
//GPULOG("SUB BG0 SETPROP 16B %08X\r\n", val);
|
||||||
subEngine->SetBGProp(0, val);
|
subEngine->SetBGProp<GPULayerID_BG0>(val);
|
||||||
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x1008, val);
|
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x1008, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG1CNT :
|
case REG_DISPB_BG1CNT :
|
||||||
//GPULOG("SUB BG1 SETPROP 16B %08X\r\n", val);
|
//GPULOG("SUB BG1 SETPROP 16B %08X\r\n", val);
|
||||||
subEngine->SetBGProp(1, val);
|
subEngine->SetBGProp<GPULayerID_BG1>(val);
|
||||||
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100A, val);
|
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100A, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG2CNT :
|
case REG_DISPB_BG2CNT :
|
||||||
//GPULOG("SUB BG2 SETPROP 16B %08X\r\n", val);
|
//GPULOG("SUB BG2 SETPROP 16B %08X\r\n", val);
|
||||||
subEngine->SetBGProp(2, val);
|
subEngine->SetBGProp<GPULayerID_BG2>(val);
|
||||||
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100C, val);
|
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100C, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG3CNT :
|
case REG_DISPB_BG3CNT :
|
||||||
//GPULOG("SUB BG3 SETPROP 16B %08X\r\n", val);
|
//GPULOG("SUB BG3 SETPROP 16B %08X\r\n", val);
|
||||||
subEngine->SetBGProp(3, val);
|
subEngine->SetBGProp<GPULayerID_BG3>(val);
|
||||||
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100E, val);
|
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100E, val);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -4133,28 +4126,28 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val)
|
||||||
MMU_new.gxstat.write32(val);
|
MMU_new.gxstat.write32(val);
|
||||||
break;
|
break;
|
||||||
case REG_DISPA_BG2XL:
|
case REG_DISPA_BG2XL:
|
||||||
mainEngine->setAffineStart(2,0,val);
|
mainEngine->setAffineStart<GPULayerID_BG2, 0>(val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPA_BG2YL:
|
case REG_DISPA_BG2YL:
|
||||||
mainEngine->setAffineStart(2,1,val);
|
mainEngine->setAffineStart<GPULayerID_BG2, 1>(val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG2XL:
|
case REG_DISPB_BG2XL:
|
||||||
subEngine->setAffineStart(2,0,val);
|
subEngine->setAffineStart<GPULayerID_BG2, 0>(val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG2YL:
|
case REG_DISPB_BG2YL:
|
||||||
subEngine->setAffineStart(2,1,val);
|
subEngine->setAffineStart<GPULayerID_BG2, 1>(val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPA_BG3XL:
|
case REG_DISPA_BG3XL:
|
||||||
mainEngine->setAffineStart(3,0,val);
|
mainEngine->setAffineStart<GPULayerID_BG3, 0>(val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPA_BG3YL:
|
case REG_DISPA_BG3YL:
|
||||||
mainEngine->setAffineStart(3,1,val);
|
mainEngine->setAffineStart<GPULayerID_BG3, 1>(val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG3XL:
|
case REG_DISPB_BG3XL:
|
||||||
subEngine->setAffineStart(3,0,val);
|
subEngine->setAffineStart<GPULayerID_BG3, 0>(val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG3YL:
|
case REG_DISPB_BG3YL:
|
||||||
subEngine->setAffineStart(3,1,val);
|
subEngine->setAffineStart<GPULayerID_BG3, 1>(val);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Alpha test reference value - Parameters:1
|
// Alpha test reference value - Parameters:1
|
||||||
|
@ -4363,24 +4356,24 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case REG_DISPA_BG0CNT :
|
case REG_DISPA_BG0CNT :
|
||||||
mainEngine->SetBGProp(0, (val & 0xFFFF));
|
mainEngine->SetBGProp<GPULayerID_BG0>(val & 0xFFFF);
|
||||||
mainEngine->SetBGProp(1, (val >> 16));
|
mainEngine->SetBGProp<GPULayerID_BG1>(val >> 16);
|
||||||
//if((val>>16)==0x400) emu_halt();
|
//if((val>>16)==0x400) emu_halt();
|
||||||
T1WriteLong(MMU.ARM9_REG, 8, val);
|
T1WriteLong(MMU.ARM9_REG, 8, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPA_BG2CNT :
|
case REG_DISPA_BG2CNT :
|
||||||
mainEngine->SetBGProp(2, (val & 0xFFFF));
|
mainEngine->SetBGProp<GPULayerID_BG2>(val & 0xFFFF);
|
||||||
mainEngine->SetBGProp(3, (val >> 16));
|
mainEngine->SetBGProp<GPULayerID_BG3>(val >> 16);
|
||||||
T1WriteLong(MMU.ARM9_REG, 0xC, val);
|
T1WriteLong(MMU.ARM9_REG, 0xC, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG0CNT :
|
case REG_DISPB_BG0CNT :
|
||||||
subEngine->SetBGProp(0, (val & 0xFFFF));
|
subEngine->SetBGProp<GPULayerID_BG0>(val & 0xFFFF);
|
||||||
subEngine->SetBGProp(1, (val >> 16));
|
subEngine->SetBGProp<GPULayerID_BG1>(val >> 16);
|
||||||
T1WriteLong(MMU.ARM9_REG, 0x1008, val);
|
T1WriteLong(MMU.ARM9_REG, 0x1008, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPB_BG2CNT :
|
case REG_DISPB_BG2CNT :
|
||||||
subEngine->SetBGProp(2, (val & 0xFFFF));
|
subEngine->SetBGProp<GPULayerID_BG2>(val & 0xFFFF);
|
||||||
subEngine->SetBGProp(3, (val >> 16));
|
subEngine->SetBGProp<GPULayerID_BG3>(val >> 16);
|
||||||
T1WriteLong(MMU.ARM9_REG, 0x100C, val);
|
T1WriteLong(MMU.ARM9_REG, 0x100C, val);
|
||||||
return;
|
return;
|
||||||
case REG_DISPA_DISPMMEMFIFO:
|
case REG_DISPA_DISPMMEMFIFO:
|
||||||
|
|
|
@ -312,6 +312,28 @@ struct GCBUS_Controller
|
||||||
eCardMode mode; //probably only one of these
|
eCardMode mode; //probably only one of these
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef union
|
||||||
|
{
|
||||||
|
u8 value;
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
unsigned MST:3;
|
||||||
|
unsigned OFS:2;
|
||||||
|
unsigned :2;
|
||||||
|
unsigned Enable:1;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
unsigned MST_ABHI:2;
|
||||||
|
unsigned :1;
|
||||||
|
unsigned OFS_ABHI:2;
|
||||||
|
unsigned :2;
|
||||||
|
unsigned Enable_ABHI:1;
|
||||||
|
};
|
||||||
|
} VRAMCNT;
|
||||||
|
|
||||||
#define DUP2(x) x, x
|
#define DUP2(x) x, x
|
||||||
#define DUP4(x) x, x, x, x
|
#define DUP4(x) x, x, x, x
|
||||||
#define DUP8(x) x, x, x, x, x, x, x, x
|
#define DUP8(x) x, x, x, x, x, x, x, x
|
||||||
|
@ -328,18 +350,14 @@ struct MMU_struct
|
||||||
u8 MAIN_MEM[16*1024*1024]; //expanded from 8MB to 16MB to support dsi
|
u8 MAIN_MEM[16*1024*1024]; //expanded from 8MB to 16MB to support dsi
|
||||||
u8 ARM9_REG[0x1000000]; //this variable is evil and should be removed by correctly emulating all registers.
|
u8 ARM9_REG[0x1000000]; //this variable is evil and should be removed by correctly emulating all registers.
|
||||||
u8 ARM9_BIOS[0x8000];
|
u8 ARM9_BIOS[0x8000];
|
||||||
u8 ARM9_VMEM[0x800];
|
CACHE_ALIGN u8 ARM9_VMEM[0x800];
|
||||||
|
|
||||||
|
//an extra 128KB for blank memory, directly after arm9_lcd, so that
|
||||||
|
//we can easily map things to the end of arm9_lcd to represent
|
||||||
|
//an unmapped state
|
||||||
|
CACHE_ALIGN u8 ARM9_LCD[0xA4000 + 0x20000];
|
||||||
|
u8 *blank_memory;
|
||||||
|
|
||||||
#include "PACKED.h"
|
|
||||||
struct {
|
|
||||||
u8 ARM9_LCD[0xA4000];
|
|
||||||
//an extra 128KB for blank memory, directly after arm9_lcd, so that
|
|
||||||
//we can easily map things to the end of arm9_lcd to represent
|
|
||||||
//an unmapped state
|
|
||||||
u8 blank_memory[0x20000];
|
|
||||||
};
|
|
||||||
#include "PACKED_END.h"
|
|
||||||
|
|
||||||
u8 ARM9_OAM[0x800];
|
u8 ARM9_OAM[0x800];
|
||||||
|
|
||||||
u8* ExtPal[2][4];
|
u8* ExtPal[2][4];
|
||||||
|
@ -519,16 +537,20 @@ extern const armcpu_memory_iface arm9_base_memory_iface;
|
||||||
extern const armcpu_memory_iface arm7_base_memory_iface;
|
extern const armcpu_memory_iface arm7_base_memory_iface;
|
||||||
extern const armcpu_memory_iface arm9_direct_memory_iface;
|
extern const armcpu_memory_iface arm9_direct_memory_iface;
|
||||||
|
|
||||||
#define VRAM_BANKS 9
|
enum VRAMBankID
|
||||||
#define VRAM_BANK_A 0
|
{
|
||||||
#define VRAM_BANK_B 1
|
VRAM_BANK_A = 0,
|
||||||
#define VRAM_BANK_C 2
|
VRAM_BANK_B = 1,
|
||||||
#define VRAM_BANK_D 3
|
VRAM_BANK_C = 2,
|
||||||
#define VRAM_BANK_E 4
|
VRAM_BANK_D = 3,
|
||||||
#define VRAM_BANK_F 5
|
VRAM_BANK_E = 4,
|
||||||
#define VRAM_BANK_G 6
|
VRAM_BANK_F = 5,
|
||||||
#define VRAM_BANK_H 7
|
VRAM_BANK_G = 6,
|
||||||
#define VRAM_BANK_I 8
|
VRAM_BANK_H = 7,
|
||||||
|
VRAM_BANK_I = 8,
|
||||||
|
|
||||||
|
VRAM_BANK_COUNT = 9
|
||||||
|
};
|
||||||
|
|
||||||
#define VRAM_PAGE_ABG 0
|
#define VRAM_PAGE_ABG 0
|
||||||
#define VRAM_PAGE_BBG 128
|
#define VRAM_PAGE_BBG 128
|
||||||
|
@ -545,10 +567,10 @@ struct VramConfiguration {
|
||||||
struct BankInfo {
|
struct BankInfo {
|
||||||
Purpose purpose;
|
Purpose purpose;
|
||||||
int ofs;
|
int ofs;
|
||||||
} banks[VRAM_BANKS];
|
} banks[VRAM_BANK_COUNT];
|
||||||
|
|
||||||
inline void clear() {
|
inline void clear() {
|
||||||
for(int i=0;i<VRAM_BANKS;i++) {
|
for(int i=0;i<VRAM_BANK_COUNT;i++) {
|
||||||
banks[i].ofs = 0;
|
banks[i].ofs = 0;
|
||||||
banks[i].purpose = OFF;
|
banks[i].purpose = OFF;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1448,7 +1448,7 @@ static void execHardware_hstart()
|
||||||
//when the vcount hits 263 it rolls over to 0
|
//when the vcount hits 263 it rolls over to 0
|
||||||
nds.VCount=0;
|
nds.VCount=0;
|
||||||
}
|
}
|
||||||
if(nds.VCount==262)
|
else if(nds.VCount==262)
|
||||||
{
|
{
|
||||||
//when the vcount hits 262, vblank ends (oam pre-renders by one scanline)
|
//when the vcount hits 262, vblank ends (oam pre-renders by one scanline)
|
||||||
execHardware_hstart_vblankEnd();
|
execHardware_hstart_vblankEnd();
|
||||||
|
|
|
@ -575,12 +575,12 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
||||||
{
|
{
|
||||||
// Copy the colors to the color buffer. Since we can only copy 8 elements at once,
|
// Copy the colors to the color buffer. Since we can only copy 8 elements at once,
|
||||||
// we need to load-store twice.
|
// we need to load-store twice.
|
||||||
_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i + 8), _mm_loadu_si128((__m128i *)(clearColorBuffer + i + 8)) );
|
_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i + 8), _mm_load_si128((__m128i *)(clearColorBuffer + i + 8)) );
|
||||||
_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i), _mm_loadu_si128((__m128i *)(clearColorBuffer + i)) );
|
_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i), _mm_load_si128((__m128i *)(clearColorBuffer + i)) );
|
||||||
|
|
||||||
// Write the depth values to the depth buffer.
|
// Write the depth values to the depth buffer.
|
||||||
__m128i clearDepthHi_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i + 8));
|
__m128i clearDepthHi_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i + 8));
|
||||||
__m128i clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i));
|
__m128i clearDepthLo_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i));
|
||||||
clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, depthBitMask_vec128);
|
clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, depthBitMask_vec128);
|
||||||
clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, depthBitMask_vec128);
|
clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, depthBitMask_vec128);
|
||||||
|
|
||||||
|
@ -602,8 +602,8 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
|
||||||
this->clearImageDepthBuffer[i+ 0] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepthLo_vec128, 0)];
|
this->clearImageDepthBuffer[i+ 0] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepthLo_vec128, 0)];
|
||||||
|
|
||||||
// Write the fog flags to the fog flag buffer.
|
// Write the fog flags to the fog flag buffer.
|
||||||
clearDepthHi_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i + 8));
|
clearDepthHi_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i + 8));
|
||||||
clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i));
|
clearDepthLo_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i));
|
||||||
clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, fogBufferBitMask_vec128);
|
clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, fogBufferBitMask_vec128);
|
||||||
clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, fogBufferBitMask_vec128);
|
clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, fogBufferBitMask_vec128);
|
||||||
clearDepthHi_vec128 = _mm_srli_epi16(clearDepthHi_vec128, 15);
|
clearDepthHi_vec128 = _mm_srli_epi16(clearDepthHi_vec128, 15);
|
||||||
|
|
Loading…
Reference in New Issue