/* Copyright 2016-2017 StapleButter This file is part of melonDS. melonDS is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. melonDS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with melonDS. If not, see http://www.gnu.org/licenses/. */ #include #include #include "NDS.h" #include "GPU.h" GPU2D::GPU2D(u32 num) { Num = num; } GPU2D::~GPU2D() { } void GPU2D::Reset() { DispCnt = 0; memset(BGCnt, 0, 4*2); memset(BGXPos, 0, 4*2); memset(BGYPos, 0, 4*2); memset(BGXCenter, 0, 2*4); memset(BGYCenter, 0, 2*4); memset(BGRotA, 0, 2*2); memset(BGRotB, 0, 2*2); memset(BGRotC, 0, 2*2); memset(BGRotD, 0, 2*2); } void GPU2D::SetFramebuffer(u16* buf) { // framebuffer is 256x192 16bit. // might eventually support other framebuffer types/sizes Framebuffer = buf; } u8 GPU2D::Read8(u32 addr) { printf("!! GPU2D READ8 %08X\n", addr); return 0; } u16 GPU2D::Read16(u32 addr) { switch (addr & 0x00000FFF) { case 0x000: return DispCnt&0xFFFF; case 0x002: return DispCnt>>16; case 0x008: return BGCnt[0]; case 0x00A: return BGCnt[1]; case 0x00C: return BGCnt[2]; case 0x00E: return BGCnt[3]; } printf("unknown GPU read16 %08X\n", addr); return 0; } u32 GPU2D::Read32(u32 addr) { switch (addr & 0x00000FFF) { case 0x000: return DispCnt; } return Read16(addr) | (Read16(addr+2) << 16); } void GPU2D::Write8(u32 addr, u8 val) { printf("!! GPU2D WRITE8 %08X %02X\n", addr, val); } void GPU2D::Write16(u32 addr, u16 val) { switch (addr & 0x00000FFF) { case 0x000: DispCnt = (DispCnt & 0xFFFF0000) | val; //printf("[L] DISPCNT=%08X\n", DispCnt); return; case 0x002: DispCnt = (DispCnt & 0x0000FFFF) | (val << 16); //printf("[H] DISPCNT=%08X\n", DispCnt); return; case 0x008: BGCnt[0] = val; return; case 0x00A: BGCnt[1] = val; return; case 0x00C: BGCnt[2] = val; return; case 0x00E: BGCnt[3] = val; return; case 0x010: BGXPos[0] = val; return; case 0x012: BGYPos[0] = val; return; case 0x014: BGXPos[1] = val; return; case 0x016: BGYPos[1] = val; return; case 0x018: BGXPos[2] = val; return; case 0x01A: BGYPos[2] = val; return; case 0x01C: BGXPos[3] = val; return; case 0x01E: BGYPos[3] = val; return; case 0x020: BGRotA[0] = val; return; case 0x022: BGRotB[0] = val; return; case 0x024: BGRotC[0] = val; return; case 0x026: BGRotD[0] = val; return; case 0x030: BGRotA[1] = val; return; case 0x032: BGRotB[1] = val; return; case 0x034: BGRotC[1] = val; return; case 0x036: BGRotD[1] = val; return; } //printf("unknown GPU write16 %08X %04X\n", addr, val); } void GPU2D::Write32(u32 addr, u32 val) { switch (addr & 0x00000FFF) { case 0x000: //printf("DISPCNT=%08X\n", val); DispCnt = val; return; case 0x028: if (val & 0x08000000) val |= 0xF0000000; BGXCenter[0] = val; return; case 0x02C: if (val & 0x08000000) val |= 0xF0000000; BGYCenter[0] = val; return; case 0x038: if (val & 0x08000000) val |= 0xF0000000; BGXCenter[1] = val; return; case 0x03C: if (val & 0x08000000) val |= 0xF0000000; BGYCenter[1] = val; return; } Write16(addr, val&0xFFFF); Write16(addr+2, val>>16); } void GPU2D::DrawScanline(u32 line) { u16* dst = &Framebuffer[256*line]; u32 dispmode = DispCnt >> 16; dispmode &= (Num ? 0x1 : 0x3); switch (dispmode) { case 0: // screen off { for (int i = 0; i < 256>>1; i++) ((u32*)dst)[i] = 0x7FFF7FFF; } break; case 1: // regular display { DrawScanline_Mode1(line, dst); } break; case 2: // VRAM display { u32* vram = (u32*)GPU::VRAM[(DispCnt >> 18) & 0x3]; vram = &vram[line << 7]; for (int i = 0; i < 256>>1; i++) ((u32*)dst)[i] = vram[i]; } break; case 3: // FIFO display { // uh, is there even anything that uses this? } break; } } template void GPU2D::DrawScanlineBGMode(u32 line, u32* spritebuf, u16* dst) { for (int i = 3; i >= 0; i--) { if ((BGCnt[3] & 0x3) == i) { if (DispCnt & 0x0800) { if (bgmode >= 3) DrawBG_Extended(line, dst, 3); else if (bgmode >= 1) {} // todo: rotscale else DrawBG_Text(line, dst, 3); } } if ((BGCnt[2] & 0x3) == i) { if (DispCnt & 0x0400) { if (bgmode == 5) DrawBG_Extended(line, dst, 2); else if (bgmode == 4 || bgmode == 2) {} // todo: rotscale else DrawBG_Text(line, dst, 2); } } if ((BGCnt[1] & 0x3) == i) { if (DispCnt & 0x0200) { DrawBG_Text(line, dst, 1); } } if ((BGCnt[0] & 0x3) == i) { if (DispCnt & 0x0100) { if ((!Num) && (DispCnt & 0x8)) {} // TODO else DrawBG_Text(line, dst, 0); } } if (DispCnt & 0x1000) InterleaveSprites(spritebuf, 0x8000 | (i<<16), dst); } } void GPU2D::DrawScanline_Mode1(u32 line, u16* dst) { u32 backdrop; if (Num) backdrop = *(u16*)&GPU::Palette[0x400]; else backdrop = *(u16*)&GPU::Palette[0]; // TODO: color effect for backdrop backdrop |= (backdrop<<16); for (int i = 0; i < 256>>1; i++) ((u32*)dst)[i] = backdrop; // prerender sprites u32 spritebuf[256]; memset(spritebuf, 0, 256*4); if (DispCnt & 0x1000) DrawSprites(line, spritebuf); switch (DispCnt & 0x7) { case 0: DrawScanlineBGMode<0>(line, spritebuf, dst); break; case 1: DrawScanlineBGMode<1>(line, spritebuf, dst); break; case 2: DrawScanlineBGMode<2>(line, spritebuf, dst); break; case 3: DrawScanlineBGMode<3>(line, spritebuf, dst); break; case 4: DrawScanlineBGMode<4>(line, spritebuf, dst); break; case 5: DrawScanlineBGMode<5>(line, spritebuf, dst); break; } // debug crap //for (int i = 0; i < 256; i++) // dst[i] = *(u16*)&GPU::Palette[Num*0x400 + (i>>4)*2 + (line>>4)*32]; } void GPU2D::DrawBG_Text(u32 line, u16* dst, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; u8* tileset; u16* tilemap; u16* pal; u32 extpal; u16 xoff = BGXPos[bgnum]; u16 yoff = BGYPos[bgnum] + line; u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0; extpal = (bgcnt & 0x0080) && (DispCnt & 0x40000000); if (Num) { tileset = (u8*)GPU::VRAM_BBG[((bgcnt & 0x003C) >> 2)]; tilemap = (u16*)GPU::VRAM_BBG[((bgcnt & 0x1800) >> 11)]; if (!tileset || !tilemap) return; tilemap += ((bgcnt & 0x0700) << 2); if (extpal) { if ((bgnum<2) && (bgcnt&0x2000)) pal = (u16*)GPU::VRAM_BBGExtPal[2+bgnum]; else pal = (u16*)GPU::VRAM_BBGExtPal[bgnum]; // derp if (!pal) pal = (u16*)&GPU::Palette[0x400]; } else pal = (u16*)&GPU::Palette[0x400]; } else { tileset = (u8*)GPU::VRAM_ABG[((DispCnt & 0x07000000) >> 22) + ((bgcnt & 0x003C) >> 2)]; tilemap = (u16*)GPU::VRAM_ABG[((DispCnt & 0x38000000) >> 25) + ((bgcnt & 0x1800) >> 11)]; if (!tileset || !tilemap) return; tilemap += ((bgcnt & 0x0700) << 2); if (extpal) { if ((bgnum<2) && (bgcnt&0x2000)) pal = (u16*)GPU::VRAM_ABGExtPal[2+bgnum]; else pal = (u16*)GPU::VRAM_ABGExtPal[bgnum]; // derp if (!pal) pal = (u16*)&GPU::Palette[0]; } else pal = (u16*)&GPU::Palette[0]; } // adjust Y position in tilemap if (bgcnt & 0x8000) { tilemap += ((yoff & 0x1F8) << 2); if (bgcnt & 0x4000) tilemap += ((yoff & 0x100) << 2); } else tilemap += ((yoff & 0xF8) << 2); u16 curtile; u16* curpal; u8* pixels; if (bgcnt & 0x0080) { // 256-color // preload shit as needed if (xoff & 0x7) { // load a new tile curtile = tilemap[((xoff & 0xFF) >> 3) + ((xoff & widexmask) << 2)]; curpal = pal; if (extpal) curpal += ((curtile & 0xF000) >> 4); pixels = tileset + ((curtile & 0x03FF) << 6); pixels += (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); } for (int i = 0; i < 256; i++) { if (!(xoff & 0x7)) { // load a new tile curtile = tilemap[((xoff & 0xFF) >> 3) + ((xoff & widexmask) << 2)]; curpal = pal; if (extpal) curpal += ((curtile & 0xF000) >> 4); pixels = tileset + ((curtile & 0x03FF) << 6); pixels += (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); } // draw pixel u8 color; u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7); color = pixels[tilexoff]; if (color) dst[i] = curpal[color]; xoff++; } } else { // 16-color // preload shit as needed if (xoff & 0x7) { // load a new tile curtile = tilemap[((xoff & 0xFF) >> 3) + ((xoff & widexmask) << 2)]; curpal = pal + ((curtile & 0xF000) >> 8); pixels = tileset + ((curtile & 0x03FF) << 5); pixels += (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); } for (int i = 0; i < 256; i++) { if (!(xoff & 0x7)) { // load a new tile curtile = tilemap[((xoff & 0xFF) >> 3) + ((xoff & widexmask) << 2)]; curpal = pal + ((curtile & 0xF000) >> 8); pixels = tileset + ((curtile & 0x03FF) << 5); pixels += (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); } // draw pixel u8 color; u32 tilexoff = (curtile & 0x0400) ? (7-(xoff&0x7)) : (xoff&0x7); if (tilexoff & 0x1) { color = pixels[tilexoff>>1] >> 4; } else { color = pixels[tilexoff>>1] & 0x0F; } if (color) dst[i] = curpal[color]; xoff++; } } } void GPU2D::DrawBG_Extended(u32 line, u16* dst, u32 bgnum) { u16 bgcnt = BGCnt[bgnum]; u8* tileset; u16* tilemap; u16* pal; u32 extpal; u32 coordmask; u32 yshift; switch (bgcnt & 0xC000) { case 0x0000: coordmask = 0x07800; yshift = 7; break; case 0x4000: coordmask = 0x0F800; yshift = 8; break; case 0x8000: coordmask = 0x1F800; yshift = 9; break; case 0xC000: coordmask = 0x3F800; yshift = 10; break; } u32 overflowmask; if (bgcnt & 0x2000) overflowmask = 0; else overflowmask = ~(coordmask | 0x7FF); extpal = (DispCnt & 0x40000000); s16 rotA = BGRotA[bgnum-2]; s16 rotB = BGRotB[bgnum-2]; s16 rotC = BGRotC[bgnum-2]; s16 rotD = BGRotD[bgnum-2]; s32 rotX = BGXCenter[bgnum-2]; s32 rotY = BGYCenter[bgnum-2]; // hax rotX += line*rotB; rotY += line*rotD; if (bgcnt & 0x0080) { // bitmap modes if (Num) tileset = (u8*)GPU::VRAM_BBG[((bgcnt & 0x003C) >> 2)]; else tileset = (u8*)GPU::VRAM_ABG[((bgcnt & 0x003C) >> 2)]; if (!tileset) return; coordmask |= 0x7FF; if (bgcnt & 0x0004) { // direct color bitmap u16* bitmap = (u16*)tileset; for (int i = 0; i < 256; i++) { if (!((rotX|rotY) & overflowmask)) { u16 color = bitmap[(((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)]; if (color & 0x8000) dst[i] = color; } rotX += rotA; rotY += rotC; } } else { // 256-color bitmap if (Num) pal = (u16*)&GPU::Palette[0x400]; else pal = (u16*)&GPU::Palette[0]; for (int i = 0; i < 256; i++) { if (!((rotX|rotY) & overflowmask)) { u8 color = tileset[(((rotY & coordmask) >> 8) << yshift) + ((rotX & coordmask) >> 8)]; if (color) dst[i] = pal[color]; } rotX += rotA; rotY += rotC; } } } else { // shitty mode if (Num) { tileset = (u8*)GPU::VRAM_BBG[((bgcnt & 0x003C) >> 2)]; tilemap = (u16*)GPU::VRAM_BBG[((bgcnt & 0x1800) >> 11)]; if (!tileset || !tilemap) return; tilemap += ((bgcnt & 0x0700) << 2); if (extpal) { pal = (u16*)GPU::VRAM_BBGExtPal[bgnum]; // derp if (!pal) pal = (u16*)&GPU::Palette[0x400]; } else pal = (u16*)&GPU::Palette[0x400]; } else { tileset = (u8*)GPU::VRAM_ABG[((DispCnt & 0x07000000) >> 22) + ((bgcnt & 0x003C) >> 2)]; tilemap = (u16*)GPU::VRAM_ABG[((DispCnt & 0x38000000) >> 25) + ((bgcnt & 0x1800) >> 11)]; if (!tileset || !tilemap) return; tilemap += ((bgcnt & 0x0700) << 2); if (extpal) { pal = (u16*)GPU::VRAM_ABGExtPal[bgnum]; // derp if (!pal) pal = (u16*)&GPU::Palette[0]; } else pal = (u16*)&GPU::Palette[0]; } u16 curtile; u16* curpal; u8* pixels; yshift -= 3; for (int i = 0; i < 256; i++) { if (!((rotX|rotY) & overflowmask)) { curtile = tilemap[(((rotY & coordmask) >> 11) << yshift) + ((rotX & coordmask) >> 11)]; curpal = pal; if (extpal) curpal += ((curtile & 0xF000) >> 4); pixels = tileset + ((curtile & 0x03FF) << 6); // draw pixel u8 color; u32 tilexoff = (rotX >> 8) & 0x7; u32 tileyoff = (rotY >> 8) & 0x7; if (curtile & 0x0400) tilexoff = 7-tilexoff; if (curtile & 0x0800) tileyoff = 7-tileyoff; color = pixels[(tileyoff << 3) + tilexoff]; if (color) dst[i] = curpal[color]; } rotX += rotA; rotY += rotC; } } //BGXCenter[bgnum-2] += rotB; //BGYCenter[bgnum-2] += rotD; } void GPU2D::InterleaveSprites(u32* buf, u32 prio, u16* dst) { for (u32 i = 0; i < 256; i++) { if ((buf[i] & 0xF8000) == prio) dst[i] = buf[i] & 0x7FFF; } } void GPU2D::DrawSprites(u32 line, u32* dst) { u16* oam = (u16*)&GPU::OAM[Num ? 0x400 : 0]; const s32 spritewidth[16] = { 8, 16, 8, 0, 16, 32, 8, 0, 32, 32, 16, 0, 64, 64, 32, 0 }; const s32 spriteheight[16] = { 8, 8, 16, 0, 16, 8, 32, 0, 32, 16, 32, 0, 64, 32, 64, 0 }; for (int bgnum = 0x0C00; bgnum >= 0x0000; bgnum -= 0x0400) { for (int sprnum = 127; sprnum >= 0; sprnum--) { u16* attrib = &oam[sprnum*4]; if ((attrib[2] & 0x0C00) != bgnum) continue; if (attrib[0] & 0x0100) { u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); s32 width = spritewidth[sizeparam]; s32 height = spriteheight[sizeparam]; s32 boundwidth = width; s32 boundheight = height; if (attrib[0] & 0x0200) { boundwidth <<= 1; boundheight <<= 1; } u32 ypos = attrib[0] & 0xFF; ypos = (line - ypos) & 0xFF; if (ypos >= (u32)boundheight) continue; s32 xpos = (s32)(attrib[1] << 23) >> 23; if (xpos <= -boundwidth) continue; u32 rotparamgroup = (attrib[1] >> 9) & 0x1F; DrawSprite_Rotscale(attrib, &oam[(rotparamgroup*16) + 3], boundwidth, boundheight, width, height, xpos, ypos, dst); } else { if (attrib[0] & 0x0200) continue; u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); s32 width = spritewidth[sizeparam]; s32 height = spriteheight[sizeparam]; u32 ypos = attrib[0] & 0xFF; ypos = (line - ypos) & 0xFF; if (ypos >= (u32)height) continue; s32 xpos = (s32)(attrib[1] << 23) >> 23; if (xpos <= -width) continue; // yflip if (attrib[1] & 0x2000) ypos = height-1 - ypos; DrawSprite_Normal(attrib, width, xpos, ypos, dst); } } } } void GPU2D::DrawSprite_Rotscale(u16* attrib, u16* rotparams, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, u32 ypos, u32* dst) { u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000; u32 tilenum = attrib[2] & 0x03FF; u32 ytilefactor; if (DispCnt & 0x10) { tilenum <<= ((DispCnt >> 20) & 0x3); ytilefactor = (width >> 3); } else { ytilefactor = 0x20; } s32 centerX = boundwidth >> 1; s32 centerY = boundheight >> 1; u32 xoff; if (xpos >= 0) { xoff = 0; if ((xpos+boundwidth) > 256) boundwidth = 256-xpos; } else { xoff = -xpos; xpos = 0; } s16 rotA = (s16)rotparams[0]; s16 rotB = (s16)rotparams[4]; s16 rotC = (s16)rotparams[8]; s16 rotD = (s16)rotparams[12]; s32 rotX = ((xoff-centerX) * rotA) + ((ypos-centerY) * rotB) + (width << 7); s32 rotY = ((xoff-centerX) * rotC) + ((ypos-centerY) * rotD) + (height << 7); width <<= 8; height <<= 8; if (attrib[0] & 0x2000) { // 256-color } else { // 16-color tilenum <<= 5; ytilefactor <<= 5; u8* pixels = (Num ? GPU::VRAM_BOBJ : GPU::VRAM_AOBJ)[tilenum >> 14]; if (!pixels) return; pixels += (tilenum & 0x3FFF); u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; pal += (attrib[2] & 0xF000) >> 8; for (; xoff < boundwidth;) { if ((u32)rotX < width && (u32)rotY < height) { u8 color; // blaaaarg color = pixels[((rotY>>11)*ytilefactor) + ((rotY&0x700)>>6) + ((rotX>>11)*32) + ((rotX&0x700)>>9)]; if (rotX & 0x100) color >>= 4; else color &= 0x0F; if (color) dst[xpos] = pal[color] | prio; } rotX += rotA; rotY += rotC; xoff++; xpos++; } } } void GPU2D::DrawSprite_Normal(u16* attrib, u32 width, s32 xpos, u32 ypos, u32* dst) { u32 prio = ((attrib[2] & 0x0C00) << 6) | 0x8000; u32 tilenum = attrib[2] & 0x03FF; if (DispCnt & 0x10) { tilenum <<= ((DispCnt >> 20) & 0x3); tilenum += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); } else { tilenum += ((ypos >> 3) * 0x20) << ((attrib[0] & 0x2000) ? 1:0); } u32 wmask = width - 8; // really ((width - 1) & ~0x7) u32 xoff; if (xpos >= 0) { xoff = 0; if ((xpos+width) > 256) width = 256-xpos; } else { xoff = -xpos; xpos = 0; } if (attrib[0] & 0x2000) { // 256-color tilenum <<= 5; u8* pixels = (Num ? GPU::VRAM_BOBJ : GPU::VRAM_AOBJ)[tilenum >> 14]; if (!pixels) return; pixels += (tilenum & 0x3FFF); pixels += ((ypos & 0x7) << 3); u32 extpal = (DispCnt & 0x80000000); u16* pal; if (extpal) { pal = (u16*)(Num ? GPU::VRAM_BOBJExtPal : GPU::VRAM_AOBJExtPal); pal += (attrib[2] & 0xF000) >> 4; } else pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; if (!pal) pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; // derp if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works { pixels += (((width-1 - xoff) & wmask) << 3); pixels += ((width-1 - xoff) & 0x7); for (; xoff < width;) { u8 color = *pixels; pixels--; if (color) dst[xpos] = pal[color] | prio; xoff++; xpos++; if (!(xoff & 0x7)) pixels -= 56; } } else { pixels += ((xoff & wmask) << 3); pixels += (xoff & 0x7); for (; xoff < width;) { u8 color = *pixels; pixels++; if (color) dst[xpos] = pal[color] | prio; xoff++; xpos++; if (!(xoff & 0x7)) pixels += 56; } } } else { // 16-color tilenum <<= 5; u8* pixels = (Num ? GPU::VRAM_BOBJ : GPU::VRAM_AOBJ)[tilenum >> 14]; if (!pixels) return; pixels += (tilenum & 0x3FFF); pixels += ((ypos & 0x7) << 2); u16* pal = (u16*)&GPU::Palette[Num ? 0x600 : 0x200]; pal += (attrib[2] & 0xF000) >> 8; if (attrib[1] & 0x1000) // xflip. TODO: do better? oh well for now this works { pixels += (((width-1 - xoff) & wmask) << 2); pixels += (((width-1 - xoff) & 0x7) >> 1); for (; xoff < width;) { u8 color; if (xoff & 0x1) { color = *pixels & 0x0F; pixels--; } else { color = *pixels >> 4; } if (color) dst[xpos] = pal[color] | prio; xoff++; xpos++; if (!(xoff & 0x7)) pixels -= 28; } } else { pixels += ((xoff & wmask) << 2); pixels += ((xoff & 0x7) >> 1); for (; xoff < width;) { u8 color; if (xoff & 0x1) { color = *pixels >> 4; pixels++; } else { color = *pixels & 0x0F; } if (color) dst[xpos] = pal[color] | prio; xoff++; xpos++; if (!(xoff & 0x7)) pixels += 28; } } } }