GPU / MMU:

- Do SSE2 optimization for direct-color sprite renders.
- Make ARM9_LCD cache-aligned. Allows for SSE2 to perform aligned load/stores on certain operations, improving performance.
- Further templatize some methods.
- Do some misc. code cleanup.
This commit is contained in:
rogerman 2015-09-05 22:35:34 +00:00
parent ced0d3986d
commit 7e3f1d85ae
6 changed files with 454 additions and 378 deletions

View File

@ -159,63 +159,6 @@ FORCEINLINE void rot_BMP_map(GPUEngineBase *gpu, const s32 auxX, const s32 auxY,
gpu->___setFinalColorBck<LAYERID, MOSAIC, false, 0, ISCUSTOMRENDERINGNEEDED, USECUSTOMVRAM>(color, i, ((color & 0x8000) != 0)); gpu->___setFinalColorBck<LAYERID, MOSAIC, false, 0, ISCUSTOMRENDERINGNEEDED, USECUSTOMVRAM>(color, i, ((color & 0x8000) != 0));
} }
typedef void (*rot_fun)(GPUEngineBase *gpu, const s32 auxX, const s32 auxY, const int lg, const u32 map, const u32 tile, const u16 *pal, const size_t i);
template<rot_fun fun, bool WRAP>
void rot_scale_op(GPUEngineBase *gpu, const BGxPARMS &param, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal)
{
ROTOCOORD x, y;
x.val = param.BGxX;
y.val = param.BGxY;
const s32 dx = (s32)param.BGxPA;
const s32 dy = (s32)param.BGxPC;
// as an optimization, specially handle the fairly common case of
// "unrotated + unscaled + no boundary checking required"
if (dx == GPU_FRAMEBUFFER_NATIVE_WIDTH && dy == 0)
{
s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
if (WRAP || (auxX + LG < wh && auxX >= 0 && auxY < ht && auxY >= 0))
{
for (size_t i = 0; i < LG; i++)
{
fun(gpu, auxX, auxY, wh, map, tile, pal, i);
auxX++;
if (WRAP)
auxX = auxX & (wh-1);
}
return;
}
}
for (size_t i = 0; i < LG; i++, x.val += dx, y.val += dy)
{
const s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
if (WRAP || ((auxX >= 0) && (auxX < wh) && (auxY >= 0) && (auxY < ht)))
fun(gpu, auxX, auxY, wh, map, tile, pal, i);
}
}
template<GPULayerID LAYERID, rot_fun fun>
void apply_rot_fun(GPUEngineBase *gpu, const BGxPARMS &param, const u16 LG, const u32 map, const u32 tile, const u16 *pal)
{
struct _BGxCNT *bgCnt = &(gpu->dispx_st)->dispx_BGxCNT[LAYERID].bits;
s32 wh = gpu->BGSize[LAYERID][0];
s32 ht = gpu->BGSize[LAYERID][1];
if (bgCnt->PaletteSet_Wrap)
rot_scale_op<fun,true>(gpu, param, LG, wh, ht, map, tile, pal);
else
rot_scale_op<fun,false>(gpu, param, LG, wh, ht, map, tile, pal);
}
void gpu_savestate(EMUFILE* os) void gpu_savestate(EMUFILE* os)
{ {
const GPUEngineA *mainEngine = GPU->GetEngineMain(); const GPUEngineA *mainEngine = GPU->GetEngineMain();
@ -351,6 +294,9 @@ void GPUEngineBase::_InitLUTs()
GPUEngineBase::GPUEngineBase() GPUEngineBase::GPUEngineBase()
{ {
_paletteBG = NULL;
_paletteOBJ = NULL;
debug = false; debug = false;
_InitLUTs(); _InitLUTs();
workingScanline = NULL; workingScanline = NULL;
@ -419,7 +365,7 @@ void GPUEngineBase::_Reset_Base()
this->_bgPrio[1] = 0; this->_bgPrio[1] = 0;
this->_bgPrio[2] = 0; this->_bgPrio[2] = 0;
this->_bgPrio[3] = 0; this->_bgPrio[3] = 0;
this->_bgPrio[4] = 0xFF; this->_bgPrio[4] = 0x7F;
this->_bg0HasHighestPrio = true; this->_bg0HasHighestPrio = true;
@ -677,54 +623,55 @@ void GPUEngineBase::SetVideoProp(const u32 ctrlBits)
this->_sprEnable = cnt->OBJ_Enable; this->_sprEnable = cnt->OBJ_Enable;
this->SetBGProp(3, T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 14)); this->SetBGProp<GPULayerID_BG3>( T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 14) );
this->SetBGProp(2, T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 12)); this->SetBGProp<GPULayerID_BG2>( T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 12) );
this->SetBGProp(1, T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 10)); this->SetBGProp<GPULayerID_BG1>( T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 10) );
this->SetBGProp(0, T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 8)); this->SetBGProp<GPULayerID_BG0>( T1ReadWord(MMU.ARM9_REG, this->_engineID * ADDRESS_STEP_4KB + 8) );
} }
//this handles writing in BGxCNT //this handles writing in BGxCNT
void GPUEngineBase::SetBGProp(const size_t num, const u16 ctrlBits) template <GPULayerID LAYERID>
void GPUEngineBase::SetBGProp(const u16 ctrlBits)
{ {
struct _BGxCNT *cnt = &((this->dispx_st)->dispx_BGxCNT[num].bits); struct _BGxCNT *cnt = &((this->dispx_st)->dispx_BGxCNT[LAYERID].bits);
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits; struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
this->dispx_st->dispx_BGxCNT[num].val = LE_TO_LOCAL_16(ctrlBits); this->dispx_st->dispx_BGxCNT[LAYERID].val = LE_TO_LOCAL_16(ctrlBits);
this->ResortBGLayers(); this->ResortBGLayers();
if (this->_engineID == GPUEngineID_Sub) if (this->_engineID == GPUEngineID_Sub)
{ {
this->_BG_tile_ram[num] = MMU_BBG; this->_BG_tile_ram[LAYERID] = MMU_BBG;
this->_BG_bmp_ram[num] = MMU_BBG; this->_BG_bmp_ram[LAYERID] = MMU_BBG;
this->_BG_bmp_large_ram[num] = MMU_BBG; this->_BG_bmp_large_ram[LAYERID] = MMU_BBG;
this->_BG_map_ram[num] = MMU_BBG; this->_BG_map_ram[LAYERID] = MMU_BBG;
} }
else else
{ {
this->_BG_tile_ram[num] = MMU_ABG + dispCnt->CharacBase_Block * ADDRESS_STEP_64KB; this->_BG_tile_ram[LAYERID] = MMU_ABG + dispCnt->CharacBase_Block * ADDRESS_STEP_64KB;
this->_BG_bmp_ram[num] = MMU_ABG; this->_BG_bmp_ram[LAYERID] = MMU_ABG;
this->_BG_bmp_large_ram[num] = MMU_ABG; this->_BG_bmp_large_ram[LAYERID] = MMU_ABG;
this->_BG_map_ram[num] = MMU_ABG + dispCnt->ScreenBase_Block * ADDRESS_STEP_64KB; this->_BG_map_ram[LAYERID] = MMU_ABG + dispCnt->ScreenBase_Block * ADDRESS_STEP_64KB;
} }
this->_BG_tile_ram[num] += (cnt->CharacBase_Block * ADDRESS_STEP_16KB); this->_BG_tile_ram[LAYERID] += (cnt->CharacBase_Block * ADDRESS_STEP_16KB);
this->_BG_bmp_ram[num] += (cnt->ScreenBase_Block * ADDRESS_STEP_16KB); this->_BG_bmp_ram[LAYERID] += (cnt->ScreenBase_Block * ADDRESS_STEP_16KB);
this->_BG_map_ram[num] += (cnt->ScreenBase_Block * ADDRESS_STEP_2KB); this->_BG_map_ram[LAYERID] += (cnt->ScreenBase_Block * ADDRESS_STEP_2KB);
switch (num) switch (LAYERID)
{ {
case 0: case 0:
case 1: case 1:
this->BGExtPalSlot[num] = cnt->PaletteSet_Wrap * 2 + num; this->BGExtPalSlot[LAYERID] = cnt->PaletteSet_Wrap * 2 + LAYERID;
break; break;
default: default:
this->BGExtPalSlot[num] = (u8)num; this->BGExtPalSlot[LAYERID] = (u8)LAYERID;
break; break;
} }
BGType mode = GPUEngineBase::_mode2type[dispCnt->BG_Mode][num]; BGType mode = GPUEngineBase::_mode2type[dispCnt->BG_Mode][LAYERID];
//clarify affine ext modes //clarify affine ext modes
if (mode == BGType_AffineExt) if (mode == BGType_AffineExt)
@ -746,12 +693,12 @@ void GPUEngineBase::SetBGProp(const size_t num, const u16 ctrlBits)
} }
} }
this->_BGTypes[num] = mode; this->_BGTypes[LAYERID] = mode;
this->BGSize[num][0] = GPUEngineBase::_sizeTab[mode][cnt->ScreenSize][0]; this->BGSize[LAYERID][0] = GPUEngineBase::_sizeTab[mode][cnt->ScreenSize][0];
this->BGSize[num][1] = GPUEngineBase::_sizeTab[mode][cnt->ScreenSize][1]; this->BGSize[LAYERID][1] = GPUEngineBase::_sizeTab[mode][cnt->ScreenSize][1];
this->_bgPrio[num] = (ctrlBits & 0x3); this->_bgPrio[LAYERID] = (ctrlBits & 0x3);
} }
template<bool ISCUSTOMRENDERINGNEEDED> template<bool ISCUSTOMRENDERINGNEEDED>
@ -789,6 +736,7 @@ void GPUEngineBase::SetLayerEnableState(const size_t layerIndex, bool theState)
// ROUTINES FOR INSIDE / OUTSIDE WINDOW CHECKS // ROUTINES FOR INSIDE / OUTSIDE WINDOW CHECKS
/*****************************************************************************/ /*****************************************************************************/
// check whether (x,y) is within the rectangle (including wraparounds)
template<int WIN_NUM> template<int WIN_NUM>
u8 GPUEngineBase::_WithinRect(const size_t x) const u8 GPUEngineBase::_WithinRect(const size_t x) const
{ {
@ -1100,11 +1048,11 @@ FORCEINLINE void GPUEngineBase::_SetFinalColorSprite(const size_t srcX, const si
template<GPULayerID LAYERID, bool BACKDROP, int FUNCNUM, bool ISCUSTOMRENDERINGNEEDED, bool USECUSTOMVRAM> template<GPULayerID LAYERID, bool BACKDROP, int FUNCNUM, bool ISCUSTOMRENDERINGNEEDED, bool USECUSTOMVRAM>
FORCEINLINE void GPUEngineBase::____setFinalColorBck(const u16 color, const size_t srcX) FORCEINLINE void GPUEngineBase::____setFinalColorBck(const u16 color, const size_t srcX)
{ {
u16 *dstLine = this->currDst;
u8 *bgLine = this->_bgPixels;
if (ISCUSTOMRENDERINGNEEDED) if (ISCUSTOMRENDERINGNEEDED)
{ {
u16 *dstLine = this->currDst;
u8 *bgLine = this->_bgPixels;
const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo(); const NDSDisplayInfo &dispInfo = GPU->GetDisplayInfo();
for (size_t line = 0; line < _gpuDstLineCount[this->currLine]; line++) for (size_t line = 0; line < _gpuDstLineCount[this->currLine]; line++)
@ -1130,8 +1078,8 @@ FORCEINLINE void GPUEngineBase::____setFinalColorBck(const u16 color, const size
{ {
this->_SetFinalColorBG<LAYERID, BACKDROP, FUNCNUM>(srcX, this->_SetFinalColorBG<LAYERID, BACKDROP, FUNCNUM>(srcX,
srcX, srcX,
dstLine, this->currDst,
bgLine, this->_bgPixels,
color); color);
} }
} }
@ -1217,7 +1165,7 @@ void GPUEngineBase::_MosaicSpriteLinePixel(const size_t x, u16 l, u16 *dst, u8 *
dst[x] = LE_TO_LOCAL_16(objColor.color); dst[x] = LE_TO_LOCAL_16(objColor.color);
dst_alpha[x] = objColor.alpha; dst_alpha[x] = objColor.alpha;
if (!objColor.opaque) prioTab[x] = 0xFF; if (!objColor.opaque) prioTab[x] = 0x7F;
} }
void GPUEngineBase::_MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab) void GPUEngineBase::_MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab)
@ -1228,6 +1176,61 @@ void GPUEngineBase::_MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTa
this->_MosaicSpriteLinePixel(i, l, dst, dst_alpha, typeTab, prioTab); this->_MosaicSpriteLinePixel(i, l, dst, dst_alpha, typeTab, prioTab);
} }
template<rot_fun fun, bool WRAP>
void GPUEngineBase::_rot_scale_op(const BGxPARMS &param, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal)
{
ROTOCOORD x, y;
x.val = param.BGxX;
y.val = param.BGxY;
const s32 dx = (s32)param.BGxPA;
const s32 dy = (s32)param.BGxPC;
// as an optimization, specially handle the fairly common case of
// "unrotated + unscaled + no boundary checking required"
if (dx == GPU_FRAMEBUFFER_NATIVE_WIDTH && dy == 0)
{
s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
if (WRAP || (auxX + LG < wh && auxX >= 0 && auxY < ht && auxY >= 0))
{
for (size_t i = 0; i < LG; i++)
{
fun(this, auxX, auxY, wh, map, tile, pal, i);
auxX++;
if (WRAP)
auxX = auxX & (wh-1);
}
return;
}
}
for (size_t i = 0; i < LG; i++, x.val += dx, y.val += dy)
{
const s32 auxX = (WRAP) ? x.bits.Integer & (wh-1) : x.bits.Integer;
const s32 auxY = (WRAP) ? y.bits.Integer & (ht-1) : y.bits.Integer;
if (WRAP || ((auxX >= 0) && (auxX < wh) && (auxY >= 0) && (auxY < ht)))
fun(this, auxX, auxY, wh, map, tile, pal, i);
}
}
template<GPULayerID LAYERID, rot_fun fun>
void GPUEngineBase::_apply_rot_fun(const BGxPARMS &param, const u16 LG, const u32 map, const u32 tile, const u16 *pal)
{
struct _BGxCNT *bgCnt = &(this->dispx_st)->dispx_BGxCNT[LAYERID].bits;
s32 wh = this->BGSize[LAYERID][0];
s32 ht = this->BGSize[LAYERID][1];
if (bgCnt->PaletteSet_Wrap)
this->_rot_scale_op<fun,true>(param, LG, wh, ht, map, tile, pal);
else
this->_rot_scale_op<fun,false>(param, LG, wh, ht, map, tile, pal);
}
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_LineLarge8bpp() void GPUEngineBase::_LineLarge8bpp()
{ {
@ -1250,12 +1253,10 @@ void GPUEngineBase::_LineLarge8bpp()
u32 tmp_map = this->_BG_bmp_large_ram[LAYERID] + lg * YBG; u32 tmp_map = this->_BG_bmp_large_ram[LAYERID] + lg * YBG;
u8 *map = (u8 *)MMU_gpu_map(tmp_map); u8 *map = (u8 *)MMU_gpu_map(tmp_map);
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
for (size_t x = 0; x < lg; ++x, ++XBG) for (size_t x = 0; x < lg; ++x, ++XBG)
{ {
XBG &= wmask; XBG &= wmask;
const u16 color = LE_TO_LOCAL_16( pal[map[XBG]] ); const u16 color = LE_TO_LOCAL_16( this->_paletteBG[map[XBG]] );
this->__setFinalColorBck<MOSAIC,false,ISCUSTOMRENDERINGNEEDED>(color,x,(color!=0)); this->__setFinalColorBck<MOSAIC,false,ISCUSTOMRENDERINGNEEDED>(color,x,(color!=0));
} }
} }
@ -1295,7 +1296,7 @@ void GPUEngineBase::_RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG)
if (!bgCnt->Palette_256) // color: 16 palette entries if (!bgCnt->Palette_256) // color: 16 palette entries
{ {
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB); const u16 *pal = this->_paletteBG;
yoff = ((YBG&7)<<2); yoff = ((YBG&7)<<2);
xfin = 8 - (xoff&7); xfin = 8 - (xoff&7);
@ -1360,11 +1361,7 @@ void GPUEngineBase::_RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG)
} }
else //256-color BG else //256-color BG
{ {
const u16 *pal = (dispCnt->ExBGxPalette_Enable) ? (u16 *)MMU.ExtPal[this->_engineID][this->BGExtPalSlot[LAYERID]] : (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB); const u16 *pal = (dispCnt->ExBGxPalette_Enable) ? (u16 *)MMU.ExtPal[this->_engineID][this->BGExtPalSlot[LAYERID]] : this->_paletteBG;
if (pal == NULL)
{
return;
}
yoff = ((YBG&7)<<3); yoff = ((YBG&7)<<3);
xfin = 8 - (xoff&7); xfin = 8 - (xoff&7);
@ -1407,9 +1404,8 @@ void GPUEngineBase::_RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG)
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
void GPUEngineBase::_RotBG2(const BGxPARMS &param, const u16 LG) void GPUEngineBase::_RotBG2(const BGxPARMS &param, const u16 LG)
{ {
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB);
// printf("rot mode\n"); // printf("rot mode\n");
apply_rot_fun< LAYERID, rot_tiled_8bit_entry<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal); this->_apply_rot_fun< LAYERID, rot_tiled_8bit_entry<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], this->_paletteBG);
} }
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
@ -1417,41 +1413,43 @@ void GPUEngineBase::_ExtRotBG2(const BGxPARMS &param, const u16 LG)
{ {
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits; struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
u16 *pal = NULL; u16 *pal = this->_paletteBG;
switch (this->_BGTypes[LAYERID]) switch (this->_BGTypes[LAYERID])
{ {
case BGType_AffineExt_256x16: // 16 bit bgmap entries case BGType_AffineExt_256x16: // 16 bit bgmap entries
pal = (dispCnt->ExBGxPalette_Enable) ? (u16 *)(MMU.ExtPal[this->_engineID][this->BGExtPalSlot[LAYERID]]) : (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB); {
if (pal == NULL) return; if (dispCnt->ExBGxPalette_Enable)
{
if(dispCnt->ExBGxPalette_Enable) pal = (u16 *)(MMU.ExtPal[this->_engineID][this->BGExtPalSlot[LAYERID]]);
apply_rot_fun< LAYERID, rot_tiled_16bit_entry<LAYERID, MOSAIC, true, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal); this->_apply_rot_fun< LAYERID, rot_tiled_16bit_entry<LAYERID, MOSAIC, true, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal);
}
else else
apply_rot_fun< LAYERID, rot_tiled_16bit_entry<LAYERID, MOSAIC, false, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal); {
this->_apply_rot_fun< LAYERID, rot_tiled_16bit_entry<LAYERID, MOSAIC, false, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_map_ram[LAYERID], this->_BG_tile_ram[LAYERID], pal);
}
break; break;
}
case BGType_AffineExt_256x1: // 256 colors case BGType_AffineExt_256x1: // 256 colors
pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB); this->_apply_rot_fun< LAYERID, rot_256_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_bmp_ram[LAYERID], 0, pal);
apply_rot_fun< LAYERID, rot_256_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_bmp_ram[LAYERID], 0, pal);
break; break;
case BGType_AffineExt_Direct: // direct colors / BMP case BGType_AffineExt_Direct: // direct colors / BMP
{ {
if (ISCUSTOMRENDERINGNEEDED && (LAYERID == this->vramBGLayer)) if (ISCUSTOMRENDERINGNEEDED && (LAYERID == this->vramBGLayer))
{ {
apply_rot_fun< LAYERID, rot_BMP_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED, true> >(this, param, LG, this->_BG_bmp_ram[LAYERID], 0, NULL); this->_apply_rot_fun< LAYERID, rot_BMP_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED, true> >(param, LG, this->_BG_bmp_ram[LAYERID], 0, pal);
} }
else else
{ {
apply_rot_fun< LAYERID, rot_BMP_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED, false> >(this, param, LG, this->_BG_bmp_ram[LAYERID], 0, NULL); this->_apply_rot_fun< LAYERID, rot_BMP_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED, false> >(param, LG, this->_BG_bmp_ram[LAYERID], 0, pal);
} }
break; break;
} }
case BGType_Large8bpp: // large screen 256 colors case BGType_Large8bpp: // large screen 256 colors
pal = (u16 *)(MMU.ARM9_VMEM + this->_engineID * ADDRESS_STEP_1KB); this->_apply_rot_fun< LAYERID, rot_256_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(param, LG, this->_BG_bmp_large_ram[LAYERID], 0, pal);
apply_rot_fun< LAYERID, rot_256_map<LAYERID, MOSAIC, ISCUSTOMRENDERINGNEEDED> >(this, param, LG, this->_BG_bmp_large_ram[LAYERID], 0, pal);
break; break;
default: default:
@ -1526,9 +1524,54 @@ void GPUEngineBase::_LineExtRot()
/* http://nocash.emubase.de/gbatek.htm#dsvideoobjs */ /* http://nocash.emubase.de/gbatek.htm#dsvideoobjs */
void GPUEngineBase::_RenderSpriteBMP(const u8 spriteNum, const u16 l, u16 *dst, const u32 srcadr, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha) void GPUEngineBase::_RenderSpriteBMP(const u8 spriteNum, const u16 l, u16 *dst, const u32 srcadr, u8 *dst_alpha, u8 *typeTab, u8 *prioTab, const u8 prio, const size_t lg, size_t sprX, size_t x, const s32 xdir, const u8 alpha)
{ {
for (size_t i = 0; i < lg; i++, ++sprX, x += xdir) const u16 *bmpBuffer = (u16 *)MMU_gpu_map(srcadr);
size_t i = 0;
#ifdef ENABLE_SSE2
if (xdir == 1)
{ {
const u16 color = LE_TO_LOCAL_16( *(u16 *)MMU_gpu_map(srcadr + (x << 1)) ); const __m128i prio_vec128 = _mm_set1_epi8(prio);
const size_t ssePixCount = lg - (lg % 16);
for (; i < ssePixCount; i += 16, x += 16, sprX += 16)
{
__m128i prioTab_vec128 = _mm_load_si128((__m128i *)(prioTab + sprX));
const __m128i prioCompare = _mm_cmplt_epi8(prio_vec128, prioTab_vec128);
__m128i colorLo_vec128 = _mm_load_si128((__m128i *)(bmpBuffer + x));
__m128i colorHi_vec128 = _mm_load_si128((__m128i *)(bmpBuffer + x + 8));
const __m128i colorAlphaLo_vec128 = _mm_and_si128(colorLo_vec128, _mm_set1_epi16(0x8000));
const __m128i colorAlphaHi_vec128 = _mm_and_si128(colorHi_vec128, _mm_set1_epi16(0x8000));
const __m128i colorAlphaLoCompare = _mm_cmpeq_epi16(colorAlphaLo_vec128, _mm_set1_epi16(0x8000));
const __m128i colorAlphaHiCompare = _mm_cmpeq_epi16(colorAlphaHi_vec128, _mm_set1_epi16(0x8000));
const __m128i colorAlphaPackedCompare = _mm_cmpeq_epi8( _mm_packs_epi16(colorAlphaLoCompare, colorAlphaHiCompare), _mm_set1_epi8(0xFF) );
const __m128i combinedPackedCompare = _mm_and_si128(prioCompare, colorAlphaPackedCompare);
const __m128i combinedLoCompare = _mm_cmpeq_epi16( _mm_unpacklo_epi8(combinedPackedCompare, _mm_setzero_si128()), _mm_set1_epi16(0x00FF) );
const __m128i combinedHiCompare = _mm_cmpeq_epi16( _mm_unpackhi_epi8(combinedPackedCompare, _mm_setzero_si128()), _mm_set1_epi16(0x00FF) );
colorLo_vec128 = _mm_or_si128( _mm_and_si128(combinedLoCompare, colorLo_vec128), _mm_andnot_si128(combinedLoCompare, _mm_load_si128((__m128i *)(dst + sprX))) );
colorHi_vec128 = _mm_or_si128( _mm_and_si128(combinedHiCompare, colorHi_vec128), _mm_andnot_si128(combinedHiCompare, _mm_load_si128((__m128i *)(dst + sprX + 8))) );
const __m128i dstAlpha_vec128 = _mm_or_si128( _mm_and_si128(combinedPackedCompare, _mm_set1_epi8(alpha + 1)), _mm_andnot_si128(combinedPackedCompare, _mm_load_si128((__m128i *)(dst_alpha + sprX))) );
const __m128i dstTypeTab_vec128 = _mm_or_si128( _mm_and_si128(combinedPackedCompare, _mm_set1_epi8(3)), _mm_andnot_si128(combinedPackedCompare, _mm_load_si128((__m128i *)(typeTab + sprX))) );
prioTab_vec128 = _mm_or_si128( _mm_and_si128(combinedPackedCompare, prio_vec128), _mm_andnot_si128(combinedPackedCompare, prioTab_vec128) );
const __m128i sprNum_vec128 = _mm_or_si128( _mm_and_si128(combinedPackedCompare, _mm_set1_epi8(spriteNum)), _mm_andnot_si128(combinedPackedCompare, _mm_load_si128((__m128i *)(this->_sprNum + sprX))) );
_mm_store_si128((__m128i *)(dst + sprX), colorLo_vec128);
_mm_store_si128((__m128i *)(dst + sprX + 8), colorHi_vec128);
_mm_store_si128((__m128i *)(dst_alpha + sprX), dstAlpha_vec128);
_mm_store_si128((__m128i *)(typeTab + sprX), dstTypeTab_vec128);
_mm_store_si128((__m128i *)(prioTab + sprX), prioTab_vec128);
_mm_store_si128((__m128i *)(this->_sprNum + sprX), sprNum_vec128);
}
}
#endif
for (; i < lg; i++, sprX++, x += xdir)
{
const u16 color = LE_TO_LOCAL_16(bmpBuffer[x]);
//a cleared alpha bit suppresses the pixel from processing entirely; it doesnt exist //a cleared alpha bit suppresses the pixel from processing entirely; it doesnt exist
if ((color & 0x8000) && (prio < prioTab[sprX])) if ((color & 0x8000) && (prio < prioTab[sprX]))
@ -1708,12 +1751,11 @@ void GPUEngineBase::SpriteRender(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioT
template<SpriteRenderMode MODE> template<SpriteRenderMode MODE>
void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab) void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab)
{ {
u16 l = currLine; u16 l = this->currLine;
size_t cost = 0; size_t cost = 0;
struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits; struct _DISPCNT *dispCnt = &(this->dispx_st)->dispx_DISPCNT.bits;
u8 block = this->_sprBoundary;
for (size_t i = 0; i < 128; i++) for (size_t i = 0; i < 128; i++)
{ {
const OAMAttributes &spriteInfo = this->_oamList[i]; const OAMAttributes &spriteInfo = this->_oamList[i];
@ -1733,6 +1775,7 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
s32 sprX, sprY, x, y, lg; s32 sprX, sprY, x, y, lg;
s32 xdir; s32 xdir;
u8 prio; u8 prio;
u16 *pal;
u8 *src; u8 *src;
u32 srcadr; u32 srcadr;
@ -1746,7 +1789,6 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
{ {
s32 fieldX, fieldY, auxX, auxY, realX, realY, offset; s32 fieldX, fieldY, auxX, auxY, realX, realY, offset;
u8 blockparameter; u8 blockparameter;
u16 *pal;
s16 dx, dmx, dy, dmy; s16 dx, dmx, dy, dmy;
u16 colour; u16 colour;
@ -1818,13 +1860,10 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
// If we are using 1 palette of 256 colours // If we are using 1 palette of 256 colours
if (spriteInfo.Depth) if (spriteInfo.Depth)
{ {
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << block)); src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << this->_sprBoundary));
// If extended palettes are set, use them // If extended palettes are set, use them
if (dispCnt->ExOBJPalette_Enable) pal = (dispCnt->ExOBJPalette_Enable) ? (u16 *)(MMU.ObjExtPal[this->_engineID][0]+(spriteInfo.PaletteIndex*0x200)) : this->_paletteOBJ;
pal = (u16 *)(MMU.ObjExtPal[this->_engineID][0]+(spriteInfo.PaletteIndex*0x200));
else
pal = (u16 *)(MMU.ARM9_VMEM + 0x200 + this->_engineID * ADDRESS_STEP_1KB);
for (size_t j = 0; j < lg; ++j, ++sprX) for (size_t j = 0; j < lg; ++j, ++sprX)
{ {
@ -1908,13 +1947,13 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
if (MODE == SpriteRenderMode_Sprite2D) if (MODE == SpriteRenderMode_Sprite2D)
{ {
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << 5)); src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << 5));
pal = (u16 *)(MMU.ARM9_VMEM + 0x200 + (this->_engineID * ADDRESS_STEP_1KB) + (spriteInfo.PaletteIndex * 32));
} }
else else
{ {
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << this->_sprBoundary)); src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex << this->_sprBoundary));
pal = (u16 *)(MMU.ARM9_VMEM + 0x200 + (this->_engineID * ADDRESS_STEP_1KB) + (spriteInfo.PaletteIndex * 32));
} }
pal = this->_paletteOBJ + (spriteInfo.PaletteIndex << 4);
for (size_t j = 0; j < lg; ++j, ++sprX) for (size_t j = 0; j < lg; ++j, ++sprX)
{ {
@ -1977,9 +2016,9 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
else else
{ {
if (spriteInfo.Depth) if (spriteInfo.Depth)
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex<<block) + ((y>>3)*sprSize.x*8) + ((y&0x7)*8)); src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex<<this->_sprBoundary) + ((y>>3)*sprSize.x*8) + ((y&0x7)*8));
else else
src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex<<block) + ((y>>3)*sprSize.x*4) + ((y&0x7)*4)); src = (u8 *)MMU_gpu_map(this->_sprMem + (spriteInfo.TileIndex<<this->_sprBoundary) + ((y>>3)*sprSize.x*4) + ((y&0x7)*4));
} }
this->_RenderSpriteWin(src, (spriteInfo.Depth != 0), lg, sprX, x, xdir); this->_RenderSpriteWin(src, (spriteInfo.Depth != 0), lg, sprX, x, xdir);
@ -1999,9 +2038,9 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
if (MODE == SpriteRenderMode_Sprite2D) if (MODE == SpriteRenderMode_Sprite2D)
srcadr = this->_sprMem + ((spriteInfo.TileIndex)<<5) + ((y>>3)<<10) + ((y&0x7)*8); srcadr = this->_sprMem + ((spriteInfo.TileIndex)<<5) + ((y>>3)<<10) + ((y&0x7)*8);
else else
srcadr = this->_sprMem + (spriteInfo.TileIndex<<block) + ((y>>3)*sprSize.x*8) + ((y&0x7)*8); srcadr = this->_sprMem + (spriteInfo.TileIndex<<this->_sprBoundary) + ((y>>3)*sprSize.x*8) + ((y&0x7)*8);
const u16 *pal = (dispCnt->ExOBJPalette_Enable) ? (u16 *)(MMU.ObjExtPal[this->_engineID][0]+(spriteInfo.PaletteIndex*0x200)) : (u16 *)(MMU.ARM9_VMEM + 0x200 + this->_engineID * ADDRESS_STEP_1KB); pal = (dispCnt->ExOBJPalette_Enable) ? (u16 *)(MMU.ObjExtPal[this->_engineID][0]+(spriteInfo.PaletteIndex*0x200)) : this->_paletteOBJ;
this->_RenderSprite256(i, l, dst, srcadr, pal, dst_alpha, typeTab, prioTab, prio, lg, sprX, x, xdir, spriteInfo.Mode == 1); this->_RenderSprite256(i, l, dst, srcadr, pal, dst_alpha, typeTab, prioTab, prio, lg, sprX, x, xdir, spriteInfo.Mode == 1);
} }
else // 16 colors else // 16 colors
@ -2012,10 +2051,10 @@ void GPUEngineBase::_SpriteRenderPerform(u16 *dst, u8 *dst_alpha, u8 *typeTab, u
} }
else else
{ {
srcadr = this->_sprMem + (spriteInfo.TileIndex<<block) + ((y>>3)*sprSize.x*4) + ((y&0x7)*4); srcadr = this->_sprMem + (spriteInfo.TileIndex<<this->_sprBoundary) + ((y>>3)*sprSize.x*4) + ((y&0x7)*4);
} }
const u16 *pal = (u16 *)(MMU.ARM9_VMEM + 0x200 + this->_engineID * ADDRESS_STEP_1KB) + (spriteInfo.PaletteIndex << 4); pal = this->_paletteOBJ + (spriteInfo.PaletteIndex << 4);
this->_RenderSprite16(l, dst, srcadr, pal, dst_alpha, typeTab, prioTab, prio, lg, sprX, x, xdir, spriteInfo.Mode == 1); this->_RenderSprite16(l, dst, srcadr, pal, dst_alpha, typeTab, prioTab, prio, lg, sprX, x, xdir, spriteInfo.Mode == 1);
} }
} }
@ -2259,7 +2298,7 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex,
if ( (spriteInfo.RotScale != 2) && ((spriteInfo.RotScale & 1) == 0) && (spriteInfo.Mode == 3) && (spriteInfo.PaletteIndex != 0) ) if ( (spriteInfo.RotScale != 2) && ((spriteInfo.RotScale & 1) == 0) && (spriteInfo.Mode == 3) && (spriteInfo.PaletteIndex != 0) )
{ {
const u32 vramAddress = ( (spriteInfo.TileIndex & 0x1F) * 0x10 ) + ( (spriteInfo.TileIndex & ~0x1F) * 0x80 ); const u32 vramAddress = ((spriteInfo.TileIndex & 0x1F) << 5) + ((spriteInfo.TileIndex & ~0x1F) << 7);
const SpriteSize sprSize = GPUEngineBase::_sprSizeTab[spriteInfo.Size][spriteInfo.Shape]; const SpriteSize sprSize = GPUEngineBase::_sprSizeTab[spriteInfo.Size][spriteInfo.Shape];
if( (vramAddress == (mainEngine->dispCapCnt.writeOffset * ADDRESS_STEP_32KB)) && (sprSize.x == 64) && (sprSize.y == 64) ) if( (vramAddress == (mainEngine->dispCapCnt.writeOffset * ADDRESS_STEP_32KB)) && (sprSize.x == 64) && (sprSize.y == 64) )
@ -2272,58 +2311,62 @@ void GPUEngineBase::UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex,
} }
} }
u32 GPUEngineBase::getAffineStart(const size_t layer, int xy) template<GPULayerID LAYERID, int SET_XY>
u32 GPUEngineBase::getAffineStart()
{ {
if (xy == 0) if (SET_XY == 0)
return affineInfo[layer-2].x; return this->affineInfo[LAYERID-2].x;
else else
return affineInfo[layer-2].y; return this->affineInfo[LAYERID-2].y;
} }
void GPUEngineBase::setAffineStartWord(const size_t layer, int xy, u16 val, int word) template<GPULayerID LAYERID, int SET_XY, bool HIWORD>
void GPUEngineBase::setAffineStartWord(u16 val)
{ {
u32 curr = getAffineStart(layer, xy); u32 curr = this->getAffineStart<LAYERID, SET_XY>();
if (word == 0) if (!HIWORD)
curr = (curr & 0xFFFF0000) | val; curr = (curr & 0xFFFF0000) | val;
else else
curr = (curr & 0x0000FFFF) | (((u32)val) << 16); curr = (curr & 0x0000FFFF) | (((u32)val) << 16);
setAffineStart(layer, xy, curr); this->setAffineStart<LAYERID, SET_XY>(curr);
} }
void GPUEngineBase::setAffineStart(const size_t layer, int xy, u32 val) template<GPULayerID LAYERID, int SET_XY>
void GPUEngineBase::setAffineStart(u32 val)
{ {
if (xy == 0) if (SET_XY == 0)
affineInfo[layer-2].x = val; this->affineInfo[LAYERID-2].x = val;
else else
affineInfo[layer-2].y = val; this->affineInfo[LAYERID-2].y = val;
refreshAffineStartRegs(layer, xy); this->refreshAffineStartRegs<LAYERID, SET_XY>();
} }
void GPUEngineBase::refreshAffineStartRegs(const int num, const int xy) template<GPULayerID LAYERID, int SET_XY>
void GPUEngineBase::refreshAffineStartRegs()
{ {
if (num == -1) if (LAYERID == -1)
{ {
refreshAffineStartRegs(2, xy); this->refreshAffineStartRegs<GPULayerID_BG2, SET_XY>();
refreshAffineStartRegs(3, xy); this->refreshAffineStartRegs<GPULayerID_BG3, SET_XY>();
return; return;
} }
if (xy == -1) if (SET_XY == -1)
{ {
refreshAffineStartRegs(num, 0); this->refreshAffineStartRegs<LAYERID, 0>();
refreshAffineStartRegs(num, 1); this->refreshAffineStartRegs<LAYERID, 1>();
return; return;
} }
BGxPARMS *params = (num == 2) ? &(dispx_st)->dispx_BG2PARMS : &(dispx_st)->dispx_BG3PARMS; BGxPARMS *params = (LAYERID == GPULayerID_BG2) ? &(dispx_st)->dispx_BG2PARMS : &(dispx_st)->dispx_BG3PARMS;
if (xy == 0) if (SET_XY == 0)
params->BGxX = affineInfo[num-2].x; params->BGxX = this->affineInfo[LAYERID-2].x;
else else
params->BGxY = affineInfo[num-2].y; params->BGxY = this->affineInfo[LAYERID-2].y;
} }
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED>
@ -2737,6 +2780,8 @@ void GPUEngineBase::REG_DISPx_pack_test()
GPUEngineA::GPUEngineA() GPUEngineA::GPUEngineA()
{ {
_engineID = GPUEngineID_Main; _engineID = GPUEngineID_Main;
_paletteBG = (u16 *)MMU.ARM9_VMEM;
_paletteOBJ = (u16 *)(MMU.ARM9_VMEM + 0x200);
_oamList = (OAMAttributes *)(MMU.ARM9_OAM); _oamList = (OAMAttributes *)(MMU.ARM9_OAM);
_sprMem = MMU_AOBJ; _sprMem = MMU_AOBJ;
dispx_st = (REG_DISPx *)MMU.ARM9_REG; dispx_st = (REG_DISPx *)MMU.ARM9_REG;
@ -2888,7 +2933,7 @@ void GPUEngineA::RenderLine(const u16 l, bool skip)
//bubble bobble revolution classic mode //bubble bobble revolution classic mode
//NOTE: //NOTE:
//I am REALLY unsatisfied with this logic now. But it seems to be working.. //I am REALLY unsatisfied with this logic now. But it seems to be working..
this->refreshAffineStartRegs(-1,-1); this->refreshAffineStartRegs<(GPULayerID)-1, -1>();
} }
if (skip) if (skip)
@ -3019,7 +3064,7 @@ void GPUEngineA::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDY_EVY][0]; this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDY_EVY][0];
this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDY_EVY][0]; this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDY_EVY][0];
const u16 backdrop_color = T1ReadWord(MMU.ARM9_VMEM, 0) & 0x7FFF; const u16 backdrop_color = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF;
//we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing //we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing
//this is currently eating up 2fps or so. it is a reasonable candidate for optimization. //this is currently eating up 2fps or so. it is a reasonable candidate for optimization.
@ -3056,7 +3101,7 @@ void GPUEngineA::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
// init background color & priorities // init background color & priorities
memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
memset(this->_sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); memset(this->_sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
memset(this->_sprPrio, 0xFF, GPU_FRAMEBUFFER_NATIVE_WIDTH); memset(this->_sprPrio, 0x7F, GPU_FRAMEBUFFER_NATIVE_WIDTH);
memset(this->_sprWin, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); memset(this->_sprWin, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
// init pixels priorities // init pixels priorities
@ -3275,7 +3320,6 @@ void GPUEngineA::_RenderLine_DisplayCapture(const u16 l)
cap_dst_adr &= 0x1FFFF; cap_dst_adr &= 0x1FFFF;
cap_dst_adr += vramWriteBlock * GPU_VRAM_BLOCK_LINES * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16); cap_dst_adr += vramWriteBlock * GPU_VRAM_BLOCK_LINES * GPU_FRAMEBUFFER_NATIVE_WIDTH * sizeof(u16);
// TODO: Make MMU.blank_memory and MMU.ARM9_LCD 16-byte aligned so that we can use aligned load/store for better performance.
const u16 *cap_src = (u16 *)MMU.blank_memory; const u16 *cap_src = (u16 *)MMU.blank_memory;
u16 *cap_dst = (u16 *)(MMU.ARM9_LCD + cap_dst_adr); u16 *cap_dst = (u16 *)(MMU.ARM9_LCD + cap_dst_adr);
@ -3507,7 +3551,7 @@ void GPUEngineA::_RenderLine_DispCapture_Copy(const u16 *__restrict src, u16 *__
if (CAPTUREFROMNATIVESRC) if (CAPTUREFROMNATIVESRC)
{ {
#ifdef ENABLE_SSE2 #ifdef ENABLE_SSE2
MACRODO_N(CAPTURELENGTH / (sizeof(__m128i) / sizeof(u16)), _mm_storeu_si128((__m128i *)dst + X, _mm_or_si128( _mm_loadu_si128( (__m128i *)src + X), alpha_vec128 ) )); MACRODO_N(CAPTURELENGTH / (sizeof(__m128i) / sizeof(u16)), _mm_store_si128((__m128i *)dst + X, _mm_or_si128( _mm_load_si128( (__m128i *)src + X), alpha_vec128 ) ));
#else #else
for (size_t i = 0; i < CAPTURELENGTH; i++) for (size_t i = 0; i < CAPTURELENGTH; i++)
{ {
@ -3742,7 +3786,7 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *__restrict srcA, const
srcA[_gpuDstPitchIndex[i+1]], srcA[_gpuDstPitchIndex[i+1]],
srcA[_gpuDstPitchIndex[i+0]]); srcA[_gpuDstPitchIndex[i+0]]);
__m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_loadu_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[_gpuDstPitchIndex[i+7]], __m128i srcB_vec128 = (CAPTUREFROMNATIVESRCB) ? _mm_load_si128((__m128i *)(srcB + i)) : _mm_set_epi16(srcB[_gpuDstPitchIndex[i+7]],
srcB[_gpuDstPitchIndex[i+6]], srcB[_gpuDstPitchIndex[i+6]],
srcB[_gpuDstPitchIndex[i+5]], srcB[_gpuDstPitchIndex[i+5]],
srcB[_gpuDstPitchIndex[i+4]], srcB[_gpuDstPitchIndex[i+4]],
@ -3751,7 +3795,7 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *__restrict srcA, const
srcB[_gpuDstPitchIndex[i+1]], srcB[_gpuDstPitchIndex[i+1]],
srcB[_gpuDstPitchIndex[i+0]]); srcB[_gpuDstPitchIndex[i+0]]);
_mm_storeu_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) ); _mm_store_si128( (__m128i *)(dst + i), this->_RenderLine_DispCapture_BlendFunc_SSE2(srcA_vec128, srcB_vec128, blendEVA_vec128, blendEVB_vec128) );
} }
#else #else
for (size_t i = 0; i < CAPTURELENGTH; i++) for (size_t i = 0; i < CAPTURELENGTH; i++)
@ -3788,6 +3832,8 @@ void GPUEngineA::_RenderLine_DispCapture_Blend(const u16 *__restrict srcA, const
GPUEngineB::GPUEngineB() GPUEngineB::GPUEngineB()
{ {
_engineID = GPUEngineID_Sub; _engineID = GPUEngineID_Sub;
_paletteBG = (u16 *)(MMU.ARM9_VMEM + ADDRESS_STEP_1KB);
_paletteOBJ = (u16 *)(MMU.ARM9_VMEM + ADDRESS_STEP_1KB + 0x200);
_oamList = (OAMAttributes *)(MMU.ARM9_OAM + ADDRESS_STEP_1KB); _oamList = (OAMAttributes *)(MMU.ARM9_OAM + ADDRESS_STEP_1KB);
_sprMem = MMU_BOBJ; _sprMem = MMU_BOBJ;
dispx_st = (REG_DISPx *)(&MMU.ARM9_REG[REG_DISPB]); dispx_st = (REG_DISPx *)(&MMU.ARM9_REG[REG_DISPB]);
@ -3837,7 +3883,7 @@ void GPUEngineB::RenderLine(const u16 l, bool skip)
//bubble bobble revolution classic mode //bubble bobble revolution classic mode
//NOTE: //NOTE:
//I am REALLY unsatisfied with this logic now. But it seems to be working.. //I am REALLY unsatisfied with this logic now. But it seems to be working..
this->refreshAffineStartRegs(-1,-1); this->refreshAffineStartRegs<(GPULayerID)-1, -1>();
} }
if (skip) if (skip)
@ -3934,7 +3980,7 @@ void GPUEngineB::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDY_EVY][0]; this->_currentFadeInColors = &GPUEngineBase::_fadeInColors[this->_BLDY_EVY][0];
this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDY_EVY][0]; this->_currentFadeOutColors = &GPUEngineBase::_fadeOutColors[this->_BLDY_EVY][0];
const u16 backdrop_color = T1ReadWord(MMU.ARM9_VMEM, ADDRESS_STEP_1KB) & 0x7FFF; const u16 backdrop_color = LE_TO_LOCAL_16(this->_paletteBG[0]) & 0x7FFF;
//we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing //we need to write backdrop colors in the same way as we do BG pixels in order to do correct window processing
//this is currently eating up 2fps or so. it is a reasonable candidate for optimization. //this is currently eating up 2fps or so. it is a reasonable candidate for optimization.
@ -3971,7 +4017,7 @@ void GPUEngineB::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
// init background color & priorities // init background color & priorities
memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); memset(this->_sprAlpha, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
memset(this->_sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); memset(this->_sprType, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
memset(this->_sprPrio, 0xFF, GPU_FRAMEBUFFER_NATIVE_WIDTH); memset(this->_sprPrio, 0x7F, GPU_FRAMEBUFFER_NATIVE_WIDTH);
memset(this->_sprWin, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH); memset(this->_sprWin, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH);
// init pixels priorities // init pixels priorities
@ -4029,9 +4075,6 @@ void GPUEngineB::_RenderLine_Layer(const u16 l, u16 *dstLine, const size_t dstLi
struct _BGxCNT *bgCnt = &(this->dispx_st)->dispx_BGxCNT[layerID].bits; struct _BGxCNT *bgCnt = &(this->dispx_st)->dispx_BGxCNT[layerID].bits;
this->_curr_mosaic_enabled = bgCnt->Mosaic_Enable; this->_curr_mosaic_enabled = bgCnt->Mosaic_Enable;
//useful for debugging individual layers
//if(this->core == GPUEngineID_Sub || layerNum != 2) continue;
#ifndef DISABLE_MOSAIC #ifndef DISABLE_MOSAIC
if (this->_curr_mosaic_enabled) if (this->_curr_mosaic_enabled)
{ {
@ -4531,10 +4574,6 @@ void GPUSubsystem::RenderLine(const u16 l, bool skip)
this->_engineSub->RenderLine<false>(l, skip); this->_engineSub->RenderLine<false>(l, skip);
} }
if (l == 191)
{
}
} }
void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551) void GPUSubsystem::ClearWithColor(const u16 colorBGRA5551)
@ -4581,3 +4620,18 @@ void NDSDisplay::SetEngineByID(const GPUEngineID theID)
this->_gpu = (theID == GPUEngineID_Main) ? (GPUEngineBase *)GPU->GetEngineMain() : (GPUEngineBase *)GPU->GetEngineSub(); this->_gpu = (theID == GPUEngineID_Main) ? (GPUEngineBase *)GPU->GetEngineMain() : (GPUEngineBase *)GPU->GetEngineSub();
this->_gpu->SetDisplayByID(this->_ID); this->_gpu->SetDisplayByID(this->_ID);
} }
template void GPUEngineBase::setAffineStart<GPULayerID_BG2, 0>(u32 val);
template void GPUEngineBase::setAffineStart<GPULayerID_BG2, 1>(u32 val);
template void GPUEngineBase::setAffineStart<GPULayerID_BG3, 0>(u32 val);
template void GPUEngineBase::setAffineStart<GPULayerID_BG3, 1>(u32 val);
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG2, 0, false>(u16 val);
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG2, 0, true>(u16 val);
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG2, 1, false>(u16 val);
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG2, 1, true>(u16 val);
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG3, 0, false>(u16 val);
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG3, 0, true>(u16 val);
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG3, 1, false>(u16 val);
template void GPUEngineBase::setAffineStartWord<GPULayerID_BG3, 1, true>(u16 val);

View File

@ -30,6 +30,7 @@
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
class GPUEngineBase;
class EMUFILE; class EMUFILE;
struct MMU_struct; struct MMU_struct;
@ -44,6 +45,8 @@ struct MMU_struct;
void gpu_savestate(EMUFILE* os); void gpu_savestate(EMUFILE* os);
bool gpu_loadstate(EMUFILE* is, int size); bool gpu_loadstate(EMUFILE* is, int size);
typedef void (*rot_fun)(GPUEngineBase *gpu, const s32 auxX, const s32 auxY, const int lg, const u32 map, const u32 tile, const u16 *pal, const size_t i);
/******************************************************************************* /*******************************************************************************
this structure is for display control, this structure is for display control,
it holds flags for general display it holds flags for general display
@ -751,12 +754,14 @@ protected:
} _mosaicColors; } _mosaicColors;
GPUEngineID _engineID; GPUEngineID _engineID;
u16 *_paletteBG;
u16 *_paletteOBJ;
OAMAttributes *_oamList;
u32 _sprMem;
u8 _bgPrio[5]; u8 _bgPrio[5];
bool _bg0HasHighestPrio; bool _bg0HasHighestPrio;
OAMAttributes *_oamList;
u32 _sprMem;
u8 _sprBoundary; u8 _sprBoundary;
u8 _sprBMPBoundary; u8 _sprBMPBoundary;
u8 _sprBMPMode; u8 _sprBMPMode;
@ -833,6 +838,9 @@ protected:
void _MosaicSpriteLinePixel(const size_t x, u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab); void _MosaicSpriteLinePixel(const size_t x, u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
void _MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab); void _MosaicSpriteLine(u16 l, u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
template<rot_fun fun, bool WRAP> void _rot_scale_op(const BGxPARMS &param, const u16 LG, const s32 wh, const s32 ht, const u32 map, const u32 tile, const u16 *pal);
template<GPULayerID LAYERID, rot_fun fun> void _apply_rot_fun(const BGxPARMS &param, const u16 LG, const u32 map, const u32 tile, const u16 *pal);
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineLarge8bpp(); template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineLarge8bpp();
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG); template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _RenderLine_TextBG(u16 XBG, u16 YBG, u16 LG);
@ -843,7 +851,6 @@ protected:
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineRot(); template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineRot();
template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineExtRot(); template<GPULayerID LAYERID, bool MOSAIC, bool ISCUSTOMRENDERINGNEEDED> void _LineExtRot();
// check whether (x,y) is within the rectangle (including wraparounds)
template<int WIN_NUM> u8 _WithinRect(const size_t x) const; template<int WIN_NUM> u8 _WithinRect(const size_t x) const;
template <GPULayerID LAYERID> void _RenderLine_CheckWindows(const size_t srcX, bool &draw, bool &effect) const; template <GPULayerID LAYERID> void _RenderLine_CheckWindows(const size_t srcX, bool &draw, bool &effect) const;
@ -890,7 +897,7 @@ public:
void SetupFinalPixelBlitter(); void SetupFinalPixelBlitter();
void SetVideoProp(const u32 ctrlBits); void SetVideoProp(const u32 ctrlBits);
void SetBGProp(const size_t num, const u16 ctrlBits); template<GPULayerID LAYERID> void SetBGProp(const u16 ctrlBits);
template<bool ISCUSTOMRENDERINGNEEDED> void RenderLine(const u16 l, bool skip); template<bool ISCUSTOMRENDERINGNEEDED> void RenderLine(const u16 l, bool skip);
@ -945,10 +952,10 @@ public:
void UpdateVRAM3DUsageProperties_BGLayer(const size_t bankIndex, VRAM3DUsageProperties &outProperty); void UpdateVRAM3DUsageProperties_BGLayer(const size_t bankIndex, VRAM3DUsageProperties &outProperty);
void UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex, VRAM3DUsageProperties &outProperty); void UpdateVRAM3DUsageProperties_OBJLayer(const size_t bankIndex, VRAM3DUsageProperties &outProperty);
void setAffineStart(const size_t layer, int xy, u32 val); template<GPULayerID LAYERID, int SET_XY> void setAffineStart(u32 val);
void setAffineStartWord(const size_t layer, int xy, u16 val, int word); template<GPULayerID LAYERID, int SET_XY, bool HIWORD> void setAffineStartWord(u16 val);
u32 getAffineStart(const size_t layer, int xy); template<GPULayerID LAYERID, int SET_XY> u32 getAffineStart();
void refreshAffineStartRegs(const int num, const int xy); template<GPULayerID LAYERID, int SET_XY> void refreshAffineStartRegs();
void SpriteRender(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab); void SpriteRender(u16 *dst, u8 *dst_alpha, u8 *typeTab, u8 *prioTab);
void ModeRenderDebug(const GPULayerID layerID); void ModeRenderDebug(const GPULayerID layerID);

View File

@ -305,7 +305,7 @@ struct TVramBankInfo {
u8 page_addr, num_pages; u8 page_addr, num_pages;
}; };
static const TVramBankInfo vram_bank_info[VRAM_BANKS] = { static const TVramBankInfo vram_bank_info[VRAM_BANK_COUNT] = {
{0,8}, {0,8},
{8,8}, {8,8},
{16,8}, {16,8},
@ -483,7 +483,7 @@ std::string VramConfiguration::describePurpose(Purpose p) {
std::string VramConfiguration::describe() { std::string VramConfiguration::describe() {
std::stringstream ret; std::stringstream ret;
for(int i=0;i<VRAM_BANKS;i++) { for(int i=0;i<VRAM_BANK_COUNT;i++) {
ret << (char)(i+'A') << ": " << banks[i].ofs << " " << describePurpose(banks[i].purpose) << std::endl; ret << (char)(i+'A') << ": " << banks[i].ofs << " " << describePurpose(banks[i].purpose) << std::endl;
} }
return ret.str(); return ret.str();
@ -514,49 +514,46 @@ static inline u8* MMU_vram_physical(const int page)
return MMU.ARM9_LCD + (page*ADDRESS_STEP_16KB); return MMU.ARM9_LCD + (page*ADDRESS_STEP_16KB);
} }
//todo - templateize template <VRAMBankID VRAMBANK>
static inline void MMU_VRAMmapRefreshBank(const int bank) static inline void MMU_VRAMmapRefreshBank()
{ {
int block = bank; const size_t block = (VRAMBANK >= VRAM_BANK_H) ? VRAMBANK + 1 : VRAMBANK;
if(bank >= VRAM_BANK_H) block++;
VRAMCNT VRAMBankCnt;
u8 VRAMBankCnt = T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x240 + block); VRAMBankCnt.value = T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x240 + block);
//do nothing if the bank isnt enabled //do nothing if the bank isnt enabled
u8 en = VRAMBankCnt & 0x80; if(VRAMBankCnt.Enable == 0) return;
if(!en) return;
int mst,ofs=0; switch(VRAMBANK) {
switch(bank) {
case VRAM_BANK_A: case VRAM_BANK_A:
case VRAM_BANK_B: case VRAM_BANK_B:
mst = VRAMBankCnt & 3; assert(VRAMBankCnt.MST == VRAMBankCnt.MST_ABHI);
ofs = (VRAMBankCnt>>3) & 3; switch(VRAMBankCnt.MST_ABHI)
switch(mst)
{ {
case 0: //LCDC case 0: //LCDC
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
MMU_vram_lcdc(bank); MMU_vram_lcdc(VRAMBANK);
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs); if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST_ABHI, VRAMBankCnt.OFS);
break; break;
case 1: //ABG case 1: //ABG
vramConfiguration.banks[bank].purpose = VramConfiguration::ABG; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABG;
MMU_vram_arm9(bank,VRAM_PAGE_ABG+ofs*8); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG+VRAMBankCnt.OFS*8);
break; break;
case 2: //AOBJ case 2: //AOBJ
vramConfiguration.banks[bank].purpose = VramConfiguration::AOBJ; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::AOBJ;
switch(ofs) { switch(VRAMBankCnt.OFS) {
case 0: case 0:
case 1: case 1:
MMU_vram_arm9(bank,VRAM_PAGE_AOBJ+ofs*8); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_AOBJ+VRAMBankCnt.OFS*8);
break; break;
default: default:
PROGINFO("Unsupported ofs setting %d for engine A OBJ vram bank %c\n", ofs, 'A'+bank); PROGINFO("Unsupported ofs setting %d for engine A OBJ vram bank %c\n", VRAMBankCnt.OFS, 'A'+VRAMBANK);
} }
break; break;
case 3: //texture case 3: //texture
vramConfiguration.banks[bank].purpose = VramConfiguration::TEX; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::TEX;
MMU.texInfo.textureSlotAddr[ofs] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.texInfo.textureSlotAddr[VRAMBankCnt.OFS] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
break; break;
default: goto unsupported_mst; default: goto unsupported_mst;
} }
@ -564,78 +561,75 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
case VRAM_BANK_C: case VRAM_BANK_C:
case VRAM_BANK_D: case VRAM_BANK_D:
mst = VRAMBankCnt & 7; switch(VRAMBankCnt.MST)
ofs = (VRAMBankCnt>>3) & 3;
switch(mst)
{ {
case 0: //LCDC case 0: //LCDC
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
MMU_vram_lcdc(bank); MMU_vram_lcdc(VRAMBANK);
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs); if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
break; break;
case 1: //ABG case 1: //ABG
vramConfiguration.banks[bank].purpose = VramConfiguration::ABG; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABG;
MMU_vram_arm9(bank,VRAM_PAGE_ABG+ofs*8); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG+VRAMBankCnt.OFS*8);
break; break;
case 2: //arm7 case 2: //arm7
vramConfiguration.banks[bank].purpose = VramConfiguration::ARM7; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ARM7;
if(bank == 2) T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240, T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240) | 1); if(VRAMBANK == 2) T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240, T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240) | 1);
if(bank == 3) T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240, T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240) | 2); if(VRAMBANK == 3) T1WriteByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240, T1ReadByte(MMU.MMU_MEM[ARMCPU_ARM7][0x40], 0x240) | 2);
//printf("DING!\n"); //printf("DING!\n");
switch(ofs) { switch(VRAMBankCnt.OFS) {
case 0: case 0:
case 1: case 1:
vram_arm7_map[ofs] = vram_bank_info[bank].page_addr; vram_arm7_map[VRAMBankCnt.OFS] = vram_bank_info[VRAMBANK].page_addr;
break; break;
default: default:
PROGINFO("Unsupported ofs setting %d for arm7 vram bank %c\n", ofs, 'A'+bank); PROGINFO("Unsupported ofs setting %d for arm7 vram bank %c\n", VRAMBankCnt.OFS, 'A'+VRAMBANK);
} }
break; break;
case 3: //texture case 3: //texture
vramConfiguration.banks[bank].purpose = VramConfiguration::TEX; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::TEX;
MMU.texInfo.textureSlotAddr[ofs] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.texInfo.textureSlotAddr[VRAMBankCnt.OFS] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
break; break;
case 4: //BGB or BOBJ case 4: //BGB or BOBJ
if(bank == VRAM_BANK_C) { if(VRAMBANK == VRAM_BANK_C) {
vramConfiguration.banks[bank].purpose = VramConfiguration::BBG; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BBG;
MMU_vram_arm9(bank,VRAM_PAGE_BBG); //BBG MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG); //BBG
} else { } else {
vramConfiguration.banks[bank].purpose = VramConfiguration::BOBJ; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BOBJ;
MMU_vram_arm9(bank,VRAM_PAGE_BOBJ); //BOBJ MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BOBJ); //BOBJ
} }
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs); if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
break; break;
default: goto unsupported_mst; default: goto unsupported_mst;
} }
break; break;
case VRAM_BANK_E: case VRAM_BANK_E:
mst = VRAMBankCnt & 7; if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
if(((VRAMBankCnt>>3)&3) != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs); switch(VRAMBankCnt.MST) {
switch(mst) {
case 0: //LCDC case 0: //LCDC
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
MMU_vram_lcdc(bank); MMU_vram_lcdc(VRAMBANK);
break; break;
case 1: //ABG case 1: //ABG
vramConfiguration.banks[bank].purpose = VramConfiguration::ABG; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABG;
MMU_vram_arm9(bank,VRAM_PAGE_ABG); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG);
break; break;
case 2: //AOBJ case 2: //AOBJ
vramConfiguration.banks[bank].purpose = VramConfiguration::AOBJ; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::AOBJ;
MMU_vram_arm9(bank,VRAM_PAGE_AOBJ); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_AOBJ);
break; break;
case 3: //texture palette case 3: //texture palette
vramConfiguration.banks[bank].purpose = VramConfiguration::TEXPAL; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::TEXPAL;
MMU.texInfo.texPalSlot[0] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.texInfo.texPalSlot[0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
MMU.texInfo.texPalSlot[1] = MMU_vram_physical(vram_bank_info[bank].page_addr+1); MMU.texInfo.texPalSlot[1] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr+1);
MMU.texInfo.texPalSlot[2] = MMU_vram_physical(vram_bank_info[bank].page_addr+2); MMU.texInfo.texPalSlot[2] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr+2);
MMU.texInfo.texPalSlot[3] = MMU_vram_physical(vram_bank_info[bank].page_addr+3); MMU.texInfo.texPalSlot[3] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr+3);
break; break;
case 4: //A BG extended palette case 4: //A BG extended palette
vramConfiguration.banks[bank].purpose = VramConfiguration::ABGEXTPAL; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABGEXTPAL;
MMU.ExtPal[0][0] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.ExtPal[0][0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
MMU.ExtPal[0][1] = MMU.ExtPal[0][0] + ADDRESS_STEP_8KB; MMU.ExtPal[0][1] = MMU.ExtPal[0][0] + ADDRESS_STEP_8KB;
MMU.ExtPal[0][2] = MMU.ExtPal[0][1] + ADDRESS_STEP_8KB; MMU.ExtPal[0][2] = MMU.ExtPal[0][1] + ADDRESS_STEP_8KB;
MMU.ExtPal[0][3] = MMU.ExtPal[0][2] + ADDRESS_STEP_8KB; MMU.ExtPal[0][3] = MMU.ExtPal[0][2] + ADDRESS_STEP_8KB;
@ -646,50 +640,48 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
case VRAM_BANK_F: case VRAM_BANK_F:
case VRAM_BANK_G: { case VRAM_BANK_G: {
mst = VRAMBankCnt & 7;
ofs = (VRAMBankCnt>>3) & 3;
const int pageofslut[] = {0,1,4,5}; const int pageofslut[] = {0,1,4,5};
const int pageofs = pageofslut[ofs]; const int pageofs = pageofslut[VRAMBankCnt.OFS];
switch(mst) switch(VRAMBankCnt.MST)
{ {
case 0: //LCDC case 0: //LCDC
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
MMU_vram_lcdc(bank); MMU_vram_lcdc(VRAMBANK);
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs); if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
break; break;
case 1: //ABG case 1: //ABG
vramConfiguration.banks[bank].purpose = VramConfiguration::ABG; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABG;
MMU_vram_arm9(bank,VRAM_PAGE_ABG+pageofs); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG+pageofs);
MMU_vram_arm9(bank,VRAM_PAGE_ABG+pageofs+2); //unexpected mirroring (required by spyro eternal night) MMU_vram_arm9(VRAMBANK,VRAM_PAGE_ABG+pageofs+2); //unexpected mirroring (required by spyro eternal night)
break; break;
case 2: //AOBJ case 2: //AOBJ
vramConfiguration.banks[bank].purpose = VramConfiguration::AOBJ; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::AOBJ;
MMU_vram_arm9(bank,VRAM_PAGE_AOBJ+pageofs); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_AOBJ+pageofs);
MMU_vram_arm9(bank,VRAM_PAGE_AOBJ+pageofs+2); //unexpected mirroring - I have no proof, but it is inferred from the ABG above MMU_vram_arm9(VRAMBANK,VRAM_PAGE_AOBJ+pageofs+2); //unexpected mirroring - I have no proof, but it is inferred from the ABG above
break; break;
case 3: //texture palette case 3: //texture palette
vramConfiguration.banks[bank].purpose = VramConfiguration::TEXPAL; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::TEXPAL;
MMU.texInfo.texPalSlot[pageofs] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.texInfo.texPalSlot[pageofs] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
break; break;
case 4: //A BG extended palette case 4: //A BG extended palette
switch(ofs) { switch(VRAMBankCnt.OFS) {
case 0: case 0:
case 1: case 1:
vramConfiguration.banks[bank].purpose = VramConfiguration::ABGEXTPAL; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::ABGEXTPAL;
MMU.ExtPal[0][ofs*2] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.ExtPal[0][VRAMBankCnt.OFS*2] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
MMU.ExtPal[0][ofs*2+1] = MMU.ExtPal[0][ofs*2] + ADDRESS_STEP_8KB; MMU.ExtPal[0][VRAMBankCnt.OFS*2+1] = MMU.ExtPal[0][VRAMBankCnt.OFS*2] + ADDRESS_STEP_8KB;
break; break;
default: default:
vramConfiguration.banks[bank].purpose = VramConfiguration::INVALID; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::INVALID;
PROGINFO("Unsupported ofs setting %d for engine A bgextpal vram bank %c\n", ofs, 'A'+bank); PROGINFO("Unsupported ofs setting %d for engine A bgextpal vram bank %c\n", VRAMBankCnt.OFS, 'A'+VRAMBANK);
break; break;
} }
break; break;
case 5: //A OBJ extended palette case 5: //A OBJ extended palette
vramConfiguration.banks[bank].purpose = VramConfiguration::AOBJEXTPAL; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::AOBJEXTPAL;
MMU.ObjExtPal[0][0] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.ObjExtPal[0][0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
MMU.ObjExtPal[0][1] = MMU.ObjExtPal[0][1] + ADDRESS_STEP_8KB; MMU.ObjExtPal[0][1] = MMU.ObjExtPal[0][1] + ADDRESS_STEP_8KB;
if(ofs != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs); if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST, VRAMBankCnt.OFS);
break; break;
default: goto unsupported_mst; default: goto unsupported_mst;
} }
@ -697,22 +689,22 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
} }
case VRAM_BANK_H: case VRAM_BANK_H:
mst = VRAMBankCnt & 3; assert(VRAMBankCnt.MST == VRAMBankCnt.MST_ABHI);
if(((VRAMBankCnt>>3)&3) != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs); if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST_ABHI, VRAMBankCnt.OFS);
switch(mst) switch(VRAMBankCnt.MST_ABHI)
{ {
case 0: //LCDC case 0: //LCDC
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
MMU_vram_lcdc(bank); MMU_vram_lcdc(VRAMBANK);
break; break;
case 1: //BBG case 1: //BBG
vramConfiguration.banks[bank].purpose = VramConfiguration::BBG; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BBG;
MMU_vram_arm9(bank,VRAM_PAGE_BBG); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG);
MMU_vram_arm9(bank,VRAM_PAGE_BBG + 4); //unexpected mirroring MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG + 4); //unexpected mirroring
break; break;
case 2: //B BG extended palette case 2: //B BG extended palette
vramConfiguration.banks[bank].purpose = VramConfiguration::BBGEXTPAL; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BBGEXTPAL;
MMU.ExtPal[1][0] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.ExtPal[1][0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
MMU.ExtPal[1][1] = MMU.ExtPal[1][0] + ADDRESS_STEP_8KB; MMU.ExtPal[1][1] = MMU.ExtPal[1][0] + ADDRESS_STEP_8KB;
MMU.ExtPal[1][2] = MMU.ExtPal[1][1] + ADDRESS_STEP_8KB; MMU.ExtPal[1][2] = MMU.ExtPal[1][1] + ADDRESS_STEP_8KB;
MMU.ExtPal[1][3] = MMU.ExtPal[1][2] + ADDRESS_STEP_8KB; MMU.ExtPal[1][3] = MMU.ExtPal[1][2] + ADDRESS_STEP_8KB;
@ -722,27 +714,27 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
break; break;
case VRAM_BANK_I: case VRAM_BANK_I:
mst = VRAMBankCnt & 3; assert(VRAMBankCnt.MST == VRAMBankCnt.MST_ABHI);
if(((VRAMBankCnt>>3)&3) != 0) PROGINFO("Bank %i: MST %i OFS %i\n", mst, ofs); if(VRAMBankCnt.OFS != 0) PROGINFO("Bank %i: MST %i OFS %i\n", VRAMBankCnt.MST_ABHI, VRAMBankCnt.OFS);
switch(mst) switch(VRAMBankCnt.MST_ABHI)
{ {
case 0: //LCDC case 0: //LCDC
vramConfiguration.banks[bank].purpose = VramConfiguration::LCDC; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::LCDC;
MMU_vram_lcdc(bank); MMU_vram_lcdc(VRAMBANK);
break; break;
case 1: //BBG case 1: //BBG
vramConfiguration.banks[bank].purpose = VramConfiguration::BBG; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BBG;
MMU_vram_arm9(bank,VRAM_PAGE_BBG+2); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG+2);
MMU_vram_arm9(bank,VRAM_PAGE_BBG+3); //unexpected mirroring MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BBG+3); //unexpected mirroring
break; break;
case 2: //BOBJ case 2: //BOBJ
vramConfiguration.banks[bank].purpose = VramConfiguration::BOBJ; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BOBJ;
MMU_vram_arm9(bank,VRAM_PAGE_BOBJ); MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BOBJ);
MMU_vram_arm9(bank,VRAM_PAGE_BOBJ+1); //FF3 end scene (lens flare sprite) needs this as it renders a sprite off the end of the 16KB and back around MMU_vram_arm9(VRAMBANK,VRAM_PAGE_BOBJ+1); //FF3 end scene (lens flare sprite) needs this as it renders a sprite off the end of the 16KB and back around
break; break;
case 3: //B OBJ extended palette case 3: //B OBJ extended palette
vramConfiguration.banks[bank].purpose = VramConfiguration::BOBJEXTPAL; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::BOBJEXTPAL;
MMU.ObjExtPal[1][0] = MMU_vram_physical(vram_bank_info[bank].page_addr); MMU.ObjExtPal[1][0] = MMU_vram_physical(vram_bank_info[VRAMBANK].page_addr);
MMU.ObjExtPal[1][1] = MMU.ObjExtPal[1][1] + ADDRESS_STEP_8KB; MMU.ObjExtPal[1][1] = MMU.ObjExtPal[1][1] + ADDRESS_STEP_8KB;
break; break;
default: goto unsupported_mst; default: goto unsupported_mst;
@ -750,15 +742,15 @@ static inline void MMU_VRAMmapRefreshBank(const int bank)
break; break;
} //switch(bank) } //switch(VRAMBANK)
vramConfiguration.banks[bank].ofs = ofs; vramConfiguration.banks[VRAMBANK].ofs = VRAMBankCnt.OFS;
return; return;
unsupported_mst: unsupported_mst:
vramConfiguration.banks[bank].purpose = VramConfiguration::INVALID; vramConfiguration.banks[VRAMBANK].purpose = VramConfiguration::INVALID;
PROGINFO("Unsupported mst setting %d for vram bank %c\n", mst, 'A'+bank); PROGINFO("Unsupported mst setting %d for vram bank %c\n", VRAMBankCnt.MST, 'A'+VRAMBANK);
} }
void MMU_VRAM_unmap_all() void MMU_VRAM_unmap_all()
@ -821,19 +813,19 @@ static inline void MMU_VRAMmapControl(u8 block, u8 VRAMBankCnt)
//goblet of fire "care of magical creatures" maps I and D to BOBJ (the I is an accident) //goblet of fire "care of magical creatures" maps I and D to BOBJ (the I is an accident)
//and requires A to override it. //and requires A to override it.
//This may create other bugs.... //This may create other bugs....
MMU_VRAMmapRefreshBank(VRAM_BANK_I); MMU_VRAMmapRefreshBank<VRAM_BANK_I>();
MMU_VRAMmapRefreshBank(VRAM_BANK_H); MMU_VRAMmapRefreshBank<VRAM_BANK_H>();
MMU_VRAMmapRefreshBank(VRAM_BANK_G); MMU_VRAMmapRefreshBank<VRAM_BANK_G>();
MMU_VRAMmapRefreshBank(VRAM_BANK_F); MMU_VRAMmapRefreshBank<VRAM_BANK_F>();
MMU_VRAMmapRefreshBank(VRAM_BANK_E); MMU_VRAMmapRefreshBank<VRAM_BANK_E>();
//zero 21-jun-2012 //zero 21-jun-2012
//tomwi's streaming music demo sets A and D to ABG (the A is an accident). //tomwi's streaming music demo sets A and D to ABG (the A is an accident).
//in this case, D should get priority. //in this case, D should get priority.
//this is somewhat risky. will it break other things? //this is somewhat risky. will it break other things?
MMU_VRAMmapRefreshBank(VRAM_BANK_A); MMU_VRAMmapRefreshBank<VRAM_BANK_A>();
MMU_VRAMmapRefreshBank(VRAM_BANK_B); MMU_VRAMmapRefreshBank<VRAM_BANK_B>();
MMU_VRAMmapRefreshBank(VRAM_BANK_C); MMU_VRAMmapRefreshBank<VRAM_BANK_C>();
MMU_VRAMmapRefreshBank(VRAM_BANK_D); MMU_VRAMmapRefreshBank<VRAM_BANK_D>();
//printf(vramConfiguration.describe().c_str()); //printf(vramConfiguration.describe().c_str());
//printf("vram remapped at vcount=%d\n",nds.VCount); //printf("vram remapped at vcount=%d\n",nds.VCount);
@ -912,6 +904,8 @@ void MMU_Init(void)
LOG("MMU init\n"); LOG("MMU init\n");
memset(&MMU, 0, sizeof(MMU_struct)); memset(&MMU, 0, sizeof(MMU_struct));
MMU.blank_memory = &MMU.ARM9_LCD[0xA4000];
//MMU.DTCMRegion = 0x027C0000; //MMU.DTCMRegion = 0x027C0000;
//even though apps may change dtcm immediately upon startup, this is the correct hardware starting value: //even though apps may change dtcm immediately upon startup, this is the correct hardware starting value:
@ -961,7 +955,6 @@ void MMU_Reset()
memset(MMU.ARM9_VMEM, 0, sizeof(MMU.ARM9_VMEM)); memset(MMU.ARM9_VMEM, 0, sizeof(MMU.ARM9_VMEM));
memset(MMU.MAIN_MEM, 0, sizeof(MMU.MAIN_MEM)); memset(MMU.MAIN_MEM, 0, sizeof(MMU.MAIN_MEM));
memset(MMU.blank_memory, 0, sizeof(MMU.blank_memory));
memset(MMU.UNUSED_RAM, 0, sizeof(MMU.UNUSED_RAM)); memset(MMU.UNUSED_RAM, 0, sizeof(MMU.UNUSED_RAM));
memset(MMU.MORE_UNUSED_RAM, 0, sizeof(MMU.UNUSED_RAM)); memset(MMU.MORE_UNUSED_RAM, 0, sizeof(MMU.UNUSED_RAM));
@ -3591,22 +3584,22 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
val &= 0x7F7F; val &= 0x7F7F;
break; break;
case REG_DISPA_BG2XL: mainEngine->setAffineStartWord(2,0,val,0); break; case REG_DISPA_BG2XL: mainEngine->setAffineStartWord<GPULayerID_BG2, 0, false>(val); break;
case REG_DISPA_BG2XH: mainEngine->setAffineStartWord(2,0,val,1); break; case REG_DISPA_BG2XH: mainEngine->setAffineStartWord<GPULayerID_BG2, 0, true>(val); break;
case REG_DISPA_BG2YL: mainEngine->setAffineStartWord(2,1,val,0); break; case REG_DISPA_BG2YL: mainEngine->setAffineStartWord<GPULayerID_BG2, 1, false>(val); break;
case REG_DISPA_BG2YH: mainEngine->setAffineStartWord(2,1,val,1); break; case REG_DISPA_BG2YH: mainEngine->setAffineStartWord<GPULayerID_BG2, 1, true>(val); break;
case REG_DISPA_BG3XL: mainEngine->setAffineStartWord(3,0,val,0); break; case REG_DISPA_BG3XL: mainEngine->setAffineStartWord<GPULayerID_BG3, 0, false>(val); break;
case REG_DISPA_BG3XH: mainEngine->setAffineStartWord(3,0,val,1); break; case REG_DISPA_BG3XH: mainEngine->setAffineStartWord<GPULayerID_BG3, 0, true>(val); break;
case REG_DISPA_BG3YL: mainEngine->setAffineStartWord(3,1,val,0); break; case REG_DISPA_BG3YL: mainEngine->setAffineStartWord<GPULayerID_BG3, 1, false>(val); break;
case REG_DISPA_BG3YH: mainEngine->setAffineStartWord(3,1,val,1); break; case REG_DISPA_BG3YH: mainEngine->setAffineStartWord<GPULayerID_BG3, 1, true>(val); break;
case REG_DISPB_BG2XL: subEngine->setAffineStartWord(2,0,val,0); break; case REG_DISPB_BG2XL: subEngine->setAffineStartWord<GPULayerID_BG2, 0, false>(val); break;
case REG_DISPB_BG2XH: subEngine->setAffineStartWord(2,0,val,1); break; case REG_DISPB_BG2XH: subEngine->setAffineStartWord<GPULayerID_BG2, 0, true>(val); break;
case REG_DISPB_BG2YL: subEngine->setAffineStartWord(2,1,val,0); break; case REG_DISPB_BG2YL: subEngine->setAffineStartWord<GPULayerID_BG2, 1, false>(val); break;
case REG_DISPB_BG2YH: subEngine->setAffineStartWord(2,1,val,1); break; case REG_DISPB_BG2YH: subEngine->setAffineStartWord<GPULayerID_BG2, 1, true>(val); break;
case REG_DISPB_BG3XL: subEngine->setAffineStartWord(3,0,val,0); break; case REG_DISPB_BG3XL: subEngine->setAffineStartWord<GPULayerID_BG3, 0, false>(val); break;
case REG_DISPB_BG3XH: subEngine->setAffineStartWord(3,0,val,1); break; case REG_DISPB_BG3XH: subEngine->setAffineStartWord<GPULayerID_BG3, 0, true>(val); break;
case REG_DISPB_BG3YL: subEngine->setAffineStartWord(3,1,val,0); break; case REG_DISPB_BG3YL: subEngine->setAffineStartWord<GPULayerID_BG3, 1, false>(val); break;
case REG_DISPB_BG3YH: subEngine->setAffineStartWord(3,1,val,1); break; case REG_DISPB_BG3YH: subEngine->setAffineStartWord<GPULayerID_BG3, 1, true>(val); break;
case REG_DISPA_DISP3DCNT: writereg_DISP3DCNT(16,adr,val); return; case REG_DISPA_DISP3DCNT: writereg_DISP3DCNT(16,adr,val); return;
@ -3815,42 +3808,42 @@ void FASTCALL _MMU_ARM9_write16(u32 adr, u16 val)
case REG_DISPA_BG0CNT : case REG_DISPA_BG0CNT :
//GPULOG("MAIN BG0 SETPROP 16B %08X\r\n", val); //GPULOG("MAIN BG0 SETPROP 16B %08X\r\n", val);
mainEngine->SetBGProp(0, val); mainEngine->SetBGProp<GPULayerID_BG0>(val);
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x8, val); T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x8, val);
return; return;
case REG_DISPA_BG1CNT : case REG_DISPA_BG1CNT :
//GPULOG("MAIN BG1 SETPROP 16B %08X\r\n", val); //GPULOG("MAIN BG1 SETPROP 16B %08X\r\n", val);
mainEngine->SetBGProp(1, val); mainEngine->SetBGProp<GPULayerID_BG1>(val);
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xA, val); T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xA, val);
return; return;
case REG_DISPA_BG2CNT : case REG_DISPA_BG2CNT :
//GPULOG("MAIN BG2 SETPROP 16B %08X\r\n", val); //GPULOG("MAIN BG2 SETPROP 16B %08X\r\n", val);
mainEngine->SetBGProp(2, val); mainEngine->SetBGProp<GPULayerID_BG2>(val);
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xC, val); T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xC, val);
return; return;
case REG_DISPA_BG3CNT : case REG_DISPA_BG3CNT :
//GPULOG("MAIN BG3 SETPROP 16B %08X\r\n", val); //GPULOG("MAIN BG3 SETPROP 16B %08X\r\n", val);
mainEngine->SetBGProp(3, val); mainEngine->SetBGProp<GPULayerID_BG3>(val);
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xE, val); T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0xE, val);
return; return;
case REG_DISPB_BG0CNT : case REG_DISPB_BG0CNT :
//GPULOG("SUB BG0 SETPROP 16B %08X\r\n", val); //GPULOG("SUB BG0 SETPROP 16B %08X\r\n", val);
subEngine->SetBGProp(0, val); subEngine->SetBGProp<GPULayerID_BG0>(val);
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x1008, val); T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x1008, val);
return; return;
case REG_DISPB_BG1CNT : case REG_DISPB_BG1CNT :
//GPULOG("SUB BG1 SETPROP 16B %08X\r\n", val); //GPULOG("SUB BG1 SETPROP 16B %08X\r\n", val);
subEngine->SetBGProp(1, val); subEngine->SetBGProp<GPULayerID_BG1>(val);
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100A, val); T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100A, val);
return; return;
case REG_DISPB_BG2CNT : case REG_DISPB_BG2CNT :
//GPULOG("SUB BG2 SETPROP 16B %08X\r\n", val); //GPULOG("SUB BG2 SETPROP 16B %08X\r\n", val);
subEngine->SetBGProp(2, val); subEngine->SetBGProp<GPULayerID_BG2>(val);
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100C, val); T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100C, val);
return; return;
case REG_DISPB_BG3CNT : case REG_DISPB_BG3CNT :
//GPULOG("SUB BG3 SETPROP 16B %08X\r\n", val); //GPULOG("SUB BG3 SETPROP 16B %08X\r\n", val);
subEngine->SetBGProp(3, val); subEngine->SetBGProp<GPULayerID_BG3>(val);
T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100E, val); T1WriteWord(MMU.MMU_MEM[ARMCPU_ARM9][0x40], 0x100E, val);
return; return;
@ -4133,28 +4126,28 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val)
MMU_new.gxstat.write32(val); MMU_new.gxstat.write32(val);
break; break;
case REG_DISPA_BG2XL: case REG_DISPA_BG2XL:
mainEngine->setAffineStart(2,0,val); mainEngine->setAffineStart<GPULayerID_BG2, 0>(val);
return; return;
case REG_DISPA_BG2YL: case REG_DISPA_BG2YL:
mainEngine->setAffineStart(2,1,val); mainEngine->setAffineStart<GPULayerID_BG2, 1>(val);
return; return;
case REG_DISPB_BG2XL: case REG_DISPB_BG2XL:
subEngine->setAffineStart(2,0,val); subEngine->setAffineStart<GPULayerID_BG2, 0>(val);
return; return;
case REG_DISPB_BG2YL: case REG_DISPB_BG2YL:
subEngine->setAffineStart(2,1,val); subEngine->setAffineStart<GPULayerID_BG2, 1>(val);
return; return;
case REG_DISPA_BG3XL: case REG_DISPA_BG3XL:
mainEngine->setAffineStart(3,0,val); mainEngine->setAffineStart<GPULayerID_BG3, 0>(val);
return; return;
case REG_DISPA_BG3YL: case REG_DISPA_BG3YL:
mainEngine->setAffineStart(3,1,val); mainEngine->setAffineStart<GPULayerID_BG3, 1>(val);
return; return;
case REG_DISPB_BG3XL: case REG_DISPB_BG3XL:
subEngine->setAffineStart(3,0,val); subEngine->setAffineStart<GPULayerID_BG3, 0>(val);
return; return;
case REG_DISPB_BG3YL: case REG_DISPB_BG3YL:
subEngine->setAffineStart(3,1,val); subEngine->setAffineStart<GPULayerID_BG3, 1>(val);
return; return;
// Alpha test reference value - Parameters:1 // Alpha test reference value - Parameters:1
@ -4363,24 +4356,24 @@ void FASTCALL _MMU_ARM9_write32(u32 adr, u32 val)
return; return;
case REG_DISPA_BG0CNT : case REG_DISPA_BG0CNT :
mainEngine->SetBGProp(0, (val & 0xFFFF)); mainEngine->SetBGProp<GPULayerID_BG0>(val & 0xFFFF);
mainEngine->SetBGProp(1, (val >> 16)); mainEngine->SetBGProp<GPULayerID_BG1>(val >> 16);
//if((val>>16)==0x400) emu_halt(); //if((val>>16)==0x400) emu_halt();
T1WriteLong(MMU.ARM9_REG, 8, val); T1WriteLong(MMU.ARM9_REG, 8, val);
return; return;
case REG_DISPA_BG2CNT : case REG_DISPA_BG2CNT :
mainEngine->SetBGProp(2, (val & 0xFFFF)); mainEngine->SetBGProp<GPULayerID_BG2>(val & 0xFFFF);
mainEngine->SetBGProp(3, (val >> 16)); mainEngine->SetBGProp<GPULayerID_BG3>(val >> 16);
T1WriteLong(MMU.ARM9_REG, 0xC, val); T1WriteLong(MMU.ARM9_REG, 0xC, val);
return; return;
case REG_DISPB_BG0CNT : case REG_DISPB_BG0CNT :
subEngine->SetBGProp(0, (val & 0xFFFF)); subEngine->SetBGProp<GPULayerID_BG0>(val & 0xFFFF);
subEngine->SetBGProp(1, (val >> 16)); subEngine->SetBGProp<GPULayerID_BG1>(val >> 16);
T1WriteLong(MMU.ARM9_REG, 0x1008, val); T1WriteLong(MMU.ARM9_REG, 0x1008, val);
return; return;
case REG_DISPB_BG2CNT : case REG_DISPB_BG2CNT :
subEngine->SetBGProp(2, (val & 0xFFFF)); subEngine->SetBGProp<GPULayerID_BG2>(val & 0xFFFF);
subEngine->SetBGProp(3, (val >> 16)); subEngine->SetBGProp<GPULayerID_BG3>(val >> 16);
T1WriteLong(MMU.ARM9_REG, 0x100C, val); T1WriteLong(MMU.ARM9_REG, 0x100C, val);
return; return;
case REG_DISPA_DISPMMEMFIFO: case REG_DISPA_DISPMMEMFIFO:

View File

@ -312,6 +312,28 @@ struct GCBUS_Controller
eCardMode mode; //probably only one of these eCardMode mode; //probably only one of these
}; };
typedef union
{
u8 value;
struct
{
unsigned MST:3;
unsigned OFS:2;
unsigned :2;
unsigned Enable:1;
};
struct
{
unsigned MST_ABHI:2;
unsigned :1;
unsigned OFS_ABHI:2;
unsigned :2;
unsigned Enable_ABHI:1;
};
} VRAMCNT;
#define DUP2(x) x, x #define DUP2(x) x, x
#define DUP4(x) x, x, x, x #define DUP4(x) x, x, x, x
#define DUP8(x) x, x, x, x, x, x, x, x #define DUP8(x) x, x, x, x, x, x, x, x
@ -328,18 +350,14 @@ struct MMU_struct
u8 MAIN_MEM[16*1024*1024]; //expanded from 8MB to 16MB to support dsi u8 MAIN_MEM[16*1024*1024]; //expanded from 8MB to 16MB to support dsi
u8 ARM9_REG[0x1000000]; //this variable is evil and should be removed by correctly emulating all registers. u8 ARM9_REG[0x1000000]; //this variable is evil and should be removed by correctly emulating all registers.
u8 ARM9_BIOS[0x8000]; u8 ARM9_BIOS[0x8000];
u8 ARM9_VMEM[0x800]; CACHE_ALIGN u8 ARM9_VMEM[0x800];
//an extra 128KB for blank memory, directly after arm9_lcd, so that
//we can easily map things to the end of arm9_lcd to represent
//an unmapped state
CACHE_ALIGN u8 ARM9_LCD[0xA4000 + 0x20000];
u8 *blank_memory;
#include "PACKED.h"
struct {
u8 ARM9_LCD[0xA4000];
//an extra 128KB for blank memory, directly after arm9_lcd, so that
//we can easily map things to the end of arm9_lcd to represent
//an unmapped state
u8 blank_memory[0x20000];
};
#include "PACKED_END.h"
u8 ARM9_OAM[0x800]; u8 ARM9_OAM[0x800];
u8* ExtPal[2][4]; u8* ExtPal[2][4];
@ -519,16 +537,20 @@ extern const armcpu_memory_iface arm9_base_memory_iface;
extern const armcpu_memory_iface arm7_base_memory_iface; extern const armcpu_memory_iface arm7_base_memory_iface;
extern const armcpu_memory_iface arm9_direct_memory_iface; extern const armcpu_memory_iface arm9_direct_memory_iface;
#define VRAM_BANKS 9 enum VRAMBankID
#define VRAM_BANK_A 0 {
#define VRAM_BANK_B 1 VRAM_BANK_A = 0,
#define VRAM_BANK_C 2 VRAM_BANK_B = 1,
#define VRAM_BANK_D 3 VRAM_BANK_C = 2,
#define VRAM_BANK_E 4 VRAM_BANK_D = 3,
#define VRAM_BANK_F 5 VRAM_BANK_E = 4,
#define VRAM_BANK_G 6 VRAM_BANK_F = 5,
#define VRAM_BANK_H 7 VRAM_BANK_G = 6,
#define VRAM_BANK_I 8 VRAM_BANK_H = 7,
VRAM_BANK_I = 8,
VRAM_BANK_COUNT = 9
};
#define VRAM_PAGE_ABG 0 #define VRAM_PAGE_ABG 0
#define VRAM_PAGE_BBG 128 #define VRAM_PAGE_BBG 128
@ -545,10 +567,10 @@ struct VramConfiguration {
struct BankInfo { struct BankInfo {
Purpose purpose; Purpose purpose;
int ofs; int ofs;
} banks[VRAM_BANKS]; } banks[VRAM_BANK_COUNT];
inline void clear() { inline void clear() {
for(int i=0;i<VRAM_BANKS;i++) { for(int i=0;i<VRAM_BANK_COUNT;i++) {
banks[i].ofs = 0; banks[i].ofs = 0;
banks[i].purpose = OFF; banks[i].purpose = OFF;
} }

View File

@ -1448,7 +1448,7 @@ static void execHardware_hstart()
//when the vcount hits 263 it rolls over to 0 //when the vcount hits 263 it rolls over to 0
nds.VCount=0; nds.VCount=0;
} }
if(nds.VCount==262) else if(nds.VCount==262)
{ {
//when the vcount hits 262, vblank ends (oam pre-renders by one scanline) //when the vcount hits 262, vblank ends (oam pre-renders by one scanline)
execHardware_hstart_vblankEnd(); execHardware_hstart_vblankEnd();

View File

@ -575,12 +575,12 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
{ {
// Copy the colors to the color buffer. Since we can only copy 8 elements at once, // Copy the colors to the color buffer. Since we can only copy 8 elements at once,
// we need to load-store twice. // we need to load-store twice.
_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i + 8), _mm_loadu_si128((__m128i *)(clearColorBuffer + i + 8)) ); _mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i + 8), _mm_load_si128((__m128i *)(clearColorBuffer + i + 8)) );
_mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i), _mm_loadu_si128((__m128i *)(clearColorBuffer + i)) ); _mm_store_si128( (__m128i *)(this->clearImageColor16Buffer + i), _mm_load_si128((__m128i *)(clearColorBuffer + i)) );
// Write the depth values to the depth buffer. // Write the depth values to the depth buffer.
__m128i clearDepthHi_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i + 8)); __m128i clearDepthHi_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i + 8));
__m128i clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i)); __m128i clearDepthLo_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i));
clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, depthBitMask_vec128); clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, depthBitMask_vec128);
clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, depthBitMask_vec128); clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, depthBitMask_vec128);
@ -602,8 +602,8 @@ Render3DError Render3D_SSE2::ClearFramebuffer(const GFX3D_State &renderState)
this->clearImageDepthBuffer[i+ 0] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepthLo_vec128, 0)]; this->clearImageDepthBuffer[i+ 0] = dsDepthToD24_LUT[_mm_extract_epi16(clearDepthLo_vec128, 0)];
// Write the fog flags to the fog flag buffer. // Write the fog flags to the fog flag buffer.
clearDepthHi_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i + 8)); clearDepthHi_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i + 8));
clearDepthLo_vec128 = _mm_loadu_si128((__m128i *)(clearDepthBuffer + i)); clearDepthLo_vec128 = _mm_load_si128((__m128i *)(clearDepthBuffer + i));
clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, fogBufferBitMask_vec128); clearDepthHi_vec128 = _mm_and_si128(clearDepthHi_vec128, fogBufferBitMask_vec128);
clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, fogBufferBitMask_vec128); clearDepthLo_vec128 = _mm_and_si128(clearDepthLo_vec128, fogBufferBitMask_vec128);
clearDepthHi_vec128 = _mm_srli_epi16(clearDepthHi_vec128, 15); clearDepthHi_vec128 = _mm_srli_epi16(clearDepthHi_vec128, 15);