From 5e7fdbe2c0a8dc9918f2a8f8db17280a8dd79fee Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Tue, 29 May 2018 21:26:48 +1000 Subject: [PATCH] Update to v106r32 release. byuu says: Changelog: - sfc/ppu-fast: everything other than vertical mosaic and interlace support is in Games are quite playable now, and you're welcome to try things out, but please don't report bugs yet. It's still too early for that. --- higan/emulator/emulator.hpp | 2 +- higan/sfc/ppu-fast/background.cpp | 108 ++++++++++------- higan/sfc/ppu-fast/io.cpp | 59 +++++----- higan/sfc/ppu-fast/line.cpp | 95 ++++++++++----- higan/sfc/ppu-fast/mode7.cpp | 62 ++++++++++ higan/sfc/ppu-fast/object.cpp | 186 ++++++++++++++++++++++++------ higan/sfc/ppu-fast/ppu.cpp | 31 ++--- higan/sfc/ppu-fast/ppu.hpp | 65 +++++++---- higan/sfc/ppu-fast/window.cpp | 74 +++++++++++- higan/sfc/ppu/object.cpp | 39 +++---- higan/sfc/ppu/screen.cpp | 10 +- 11 files changed, 535 insertions(+), 196 deletions(-) create mode 100644 higan/sfc/ppu-fast/mode7.cpp diff --git a/higan/emulator/emulator.hpp b/higan/emulator/emulator.hpp index 447ee990..5b4f372d 100644 --- a/higan/emulator/emulator.hpp +++ b/higan/emulator/emulator.hpp @@ -12,7 +12,7 @@ using namespace nall; namespace Emulator { static const string Name = "higan"; - static const string Version = "106.31"; + static const string Version = "106.32"; static const string Author = "byuu"; static const string License = "GPLv3"; static const string Website = "https://byuu.org/"; diff --git a/higan/sfc/ppu-fast/background.cpp b/higan/sfc/ppu-fast/background.cpp index 6db0d8bc..25908e63 100644 --- a/higan/sfc/ppu-fast/background.cpp +++ b/higan/sfc/ppu-fast/background.cpp @@ -1,35 +1,31 @@ auto PPU::Line::renderBackground(PPU::IO::Background& self, uint source) -> void { if(io.displayDisable) return; - if(self.tileMode == TileMode::Inactive) return; - if(self.tileMode == TileMode::Mode7) return; //todo if(!self.aboveEnable && !self.belowEnable) return; + if(self.tileMode == TileMode::Mode7) return renderMode7(self, source); + if(self.tileMode == TileMode::Inactive) return; + + bool windowAbove[256]; + bool windowBelow[256]; + renderWindow(self.window, self.window.aboveEnable, windowAbove); + renderWindow(self.window, self.window.belowEnable, windowBelow); bool hires = io.bgMode == 5 || io.bgMode == 6; - bool offsetPerTile = io.bgMode == 2 || io.bgMode == 4 || io.bgMode == 6; - bool directColor = io.col.directColor && source == Source::BG1 && (io.bgMode == 3 || io.bgMode == 4); - uint width = !hires ? 256 : 512; + bool offsetPerTileMode = io.bgMode == 2 || io.bgMode == 4 || io.bgMode == 6; + bool directColorMode = io.col.directColor && source == Source::BG1 && (io.bgMode == 3 || io.bgMode == 4); + int width = !hires ? 256 : 512; uint tileHeight = self.tileSize ? 4 : 3; uint tileWidth = hires ? 4 : tileHeight; uint tileMask = 0x0fff >> self.tileMode; - uint tiledataIndex = self.tiledataAddress >> 4 + self.tileMode; - - uint maskX = width << (tileHeight == 4); - uint maskY = maskX; - if(self.screenSize.bit(0)) maskX <<= 1; - if(self.screenSize.bit(1)) maskY <<= 1; - maskX--; - maskY--; - - uint screenX = self.screenSize.bit(0) ? 32 << 5 : 0; - uint screenY = self.screenSize.bit(1) ? 32 << 5 : 0; - if(self.screenSize == 3) screenY <<= 1; + uint tiledataIndex = self.tiledataAddress >> 3 + self.tileMode; uint paletteBase = io.bgMode == 0 ? source << 5 : 0; uint paletteShift = 2 << self.tileMode; uint hscroll = self.hoffset; uint vscroll = self.voffset; + uint hmask = (width << (tileHeight == 4) << self.screenSize.bit(0)) - 1; + uint vmask = (width << (tileHeight == 4) << self.screenSize.bit(1)) - 1; uint y = this->y; //todo: vmosaic if(hires) { @@ -42,25 +38,38 @@ auto PPU::Line::renderBackground(PPU::IO::Background& self, uint source) -> void uint mosaicPriority = 0; uint mosaicColor = 0; - auto getTile = [&](uint hoffset, uint voffset) -> uint { - uint tileX = (hoffset & maskX) >> tileWidth; - uint tileY = (voffset & maskY) >> tileHeight; - uint tilePosition = (tileY & 0x1f) << 5 | (tileX & 0x1f); - if(tileX & 0x20) tilePosition += screenX; - if(tileY & 0x20) tilePosition += screenY; - uint15 tiledataAddress = self.screenAddress + tilePosition; - return ppu.vram[tiledataAddress]; - }; - int x = 0 - (hscroll & 7); while(x < width) { uint hoffset = x + hscroll; uint voffset = y + vscroll; - if(offsetPerTile); //todo - hoffset &= maskX; - voffset &= maskY; + if(offsetPerTileMode) { + uint validBit = source == Source::BG1 ? 0x2000 : source == Source::BG2 ? 0x4000 : 0x0000; + uint offsetX = x + (hscroll & 7); + if(offsetX >= 8) { //first column is exempt + uint hvalue = getTile(io.bg3, (offsetX - 8) + (io.bg3.hoffset & ~7), io.bg3.voffset + 0); + if(io.bgMode == 4) { + if(hvalue & validBit) { + if(!(hvalue & 0x8000)) { + hoffset = offsetX + (hvalue & ~7); + } else { + voffset = y + hvalue; + } + } + } else { + uint vvalue = getTile(io.bg3, (offsetX - 8) + (io.bg3.hoffset & ~7), io.bg3.voffset + 8); + if(hvalue & validBit) { + hoffset = offsetX + (hvalue & ~7); + } + if(vvalue & validBit) { + voffset = y + vvalue; + } + } + } + } + hoffset &= hmask; + voffset &= vmask; - uint tileNumber = getTile(hoffset, voffset); + uint tileNumber = getTile(self, hoffset, voffset); uint mirrorY = tileNumber & 0x8000 ? 7 : 0; uint mirrorX = tileNumber & 0x4000 ? 7 : 0; uint tilePriority = tileNumber & 0x2000 ? self.priority[1] : self.priority[0]; @@ -80,8 +89,8 @@ auto PPU::Line::renderBackground(PPU::IO::Background& self, uint source) -> void mosaicCounter = 1 + io.mosaicSize; mosaicPalette = tiledata[tileX ^ mirrorX]; mosaicPriority = tilePriority; - if(directColor) { - //todo + if(directColorMode) { + mosaicColor = directColor(paletteNumber, mosaicPalette); } else { mosaicColor = cgram[paletteIndex + mosaicPalette]; } @@ -89,15 +98,34 @@ auto PPU::Line::renderBackground(PPU::IO::Background& self, uint source) -> void if(!mosaicPalette) continue; if(!hires) { - if(self.aboveEnable) { //todo: window - plotAbove(x, source, mosaicPriority, mosaicColor); - } - if(self.belowEnable) { //todo: window - plotBelow(x, source, mosaicPriority, mosaicColor); - } + if(self.aboveEnable && !windowAbove[x]) plotAbove(x, source, mosaicPriority, mosaicColor); + if(self.belowEnable && !windowBelow[x]) plotBelow(x, source, mosaicPriority, mosaicColor); } else { - //todo + uint X = x >> 1; + if(x & 1) { + if(self.aboveEnable && !windowAbove[X]) plotAbove(X, source, mosaicPriority, mosaicColor); + } else { + if(self.belowEnable && !windowBelow[X]) plotBelow(X, source, mosaicPriority, mosaicColor); + } } } } } + +auto PPU::Line::getTile(PPU::IO::Background& self, uint hoffset, uint voffset) -> uint { + bool hires = io.bgMode == 5 || io.bgMode == 6; + uint width = !hires ? 256 : 512; + uint tileHeight = self.tileSize ? 4 : 3; + uint tileWidth = hires ? 4 : tileHeight; + uint hmask = (width << (tileHeight == 4) << self.screenSize.bit(0)) - 1; + uint vmask = (width << (tileHeight == 4) << self.screenSize.bit(1)) - 1; + uint screenX = self.screenSize.bit(0) ? 32 << 5 : 0; + uint screenY = self.screenSize.bit(1) ? screenX + (32 << 5) : 0; + uint tileX = (hoffset & hmask) >> tileWidth; + uint tileY = (voffset & vmask) >> tileHeight; + uint tilePosition = (tileY & 0x1f) << 5 | (tileX & 0x1f); + if(tileX & 0x20) tilePosition += screenX; + if(tileY & 0x20) tilePosition += screenY; + uint15 tiledataAddress = self.screenAddress + tilePosition; + return ppu.vram[tiledataAddress]; +} diff --git a/higan/sfc/ppu-fast/io.cpp b/higan/sfc/ppu-fast/io.cpp index 85e24177..52b215b6 100644 --- a/higan/sfc/ppu-fast/io.cpp +++ b/higan/sfc/ppu-fast/io.cpp @@ -1,7 +1,6 @@ auto PPU::latchCounters() -> void { - cpu.synchronize(ppu); - io.hcounter = hdot(); - io.vcounter = vcounter(); + io.hcounter = cpu.hdot(); + io.vcounter = cpu.vcounter(); latch.counters = 1; } @@ -17,20 +16,20 @@ auto PPU::vramAddress() const -> uint15 { //uint15 for 64K VRAM; uint16 for 128 } auto PPU::readVRAM() -> uint16 { - if(!io.displayDisable && vcounter() < vdisp()) return 0x0000; + if(!io.displayDisable && cpu.vcounter() < vdisp()) return 0x0000; auto address = vramAddress(); return vram[address]; } auto PPU::writeVRAM(uint1 byte, uint8 data) -> void { - if(!io.displayDisable && vcounter() < vdisp()) return; + if(!io.displayDisable && cpu.vcounter() < vdisp()) return; auto address = vramAddress(); vram[address].byte(byte) = data; auto word = vram[address]; - auto line2bpp = tilecache[0] + (address.bits(3,14) << 6) + (address.bits(0,2) << 3); - auto line4bpp = tilecache[1] + (address.bits(4,14) << 6) + (address.bits(0,2) << 3); - auto line8bpp = tilecache[2] + (address.bits(5,14) << 6) + (address.bits(0,2) << 3); + auto line2bpp = tilecache[TileMode::BPP2] + (address.bits(3,14) << 6) + (address.bits(0,2) << 3); + auto line4bpp = tilecache[TileMode::BPP4] + (address.bits(4,14) << 6) + (address.bits(0,2) << 3); + auto line8bpp = tilecache[TileMode::BPP8] + (address.bits(5,14) << 6) + (address.bits(0,2) << 3); uint plane4bpp = address.bit(3) << 1; uint plane8bpp = address.bit(3) << 1 | address.bit(4) << 2; for(uint x : range(8)) { @@ -44,27 +43,27 @@ auto PPU::writeVRAM(uint1 byte, uint8 data) -> void { } auto PPU::readOAM(uint10 address) -> uint8 { - if(!io.displayDisable && vcounter() < vdisp()) address = latch.oamAddress; + if(!io.displayDisable && cpu.vcounter() < vdisp()) address = latch.oamAddress; return readObject(address); } auto PPU::writeOAM(uint10 address, uint8 data) -> void { - if(!io.displayDisable && vcounter() < vdisp()) address = latch.oamAddress; + if(!io.displayDisable && cpu.vcounter() < vdisp()) address = latch.oamAddress; return writeObject(address, data); } auto PPU::readCGRAM(uint1 byte, uint8 address) -> uint8 { if(!io.displayDisable - && vcounter() > 0 && vcounter() < vdisp() - && hcounter() >= 88 && hcounter() < 1096 + && cpu.vcounter() > 0 && cpu.vcounter() < vdisp() + && cpu.hcounter() >= 88 && cpu.hcounter() < 1096 ) address = latch.cgramAddress; return cgram[address].byte(byte); } auto PPU::writeCGRAM(uint8 address, uint15 data) -> void { if(!io.displayDisable - && vcounter() > 0 && vcounter() < vdisp() - && hcounter() >= 88 && hcounter() < 1096 + && cpu.vcounter() > 0 && cpu.vcounter() < vdisp() + && cpu.hcounter() >= 88 && cpu.hcounter() < 1096 ) address = latch.cgramAddress; cgram[address] = data; } @@ -187,7 +186,7 @@ auto PPU::writeIO(uint24 address, uint8 data) -> void { switch((uint16)address) { case 0x2100: { //INIDISP - if(io.displayDisable && vcounter() == vdisp()) oamAddressReset(); + if(io.displayDisable && cpu.vcounter() == vdisp()) oamAddressReset(); io.displayBrightness = data.bits(0,3); io.displayDisable = data.bit (7); return; @@ -373,6 +372,13 @@ auto PPU::writeIO(uint24 address, uint8 data) -> void { return; } + case 0x211a: { //M7SEL + io.mode7.hflip = data.bit (0); + io.mode7.vflip = data.bit (1); + io.mode7.repeat = data.bits(6,7); + return; + } + case 0x211b: { //M7A io.mode7.a = data << 8 | latch.mode7; latch.mode7 = data; @@ -539,21 +545,22 @@ auto PPU::writeIO(uint24 address, uint8 data) -> void { } case 0x2131: { //CGADDSUB - io.bg1.colorEnable = data.bit(0); - io.bg2.colorEnable = data.bit(1); - io.bg3.colorEnable = data.bit(2); - io.bg4.colorEnable = data.bit(3); - io.obj.colorEnable = data.bit(4); - io.col.colorEnable = data.bit(5); - io.col.colorHalve = data.bit(6); - io.col.colorMode = data.bit(7); + io.col.enable[Source::BG1 ] = data.bit(0); + io.col.enable[Source::BG2 ] = data.bit(1); + io.col.enable[Source::BG3 ] = data.bit(2); + io.col.enable[Source::BG4 ] = data.bit(3); + io.col.enable[Source::OBJ1] = 0; + io.col.enable[Source::OBJ2] = data.bit(4); + io.col.enable[Source::COL ] = data.bit(5); + io.col.halve = data.bit(6); + io.col.mathMode = data.bit(7); return; } case 0x2132: { //COLDATA - if(data.bit(5)) io.col.colorRed = data.bits(0,4); - if(data.bit(6)) io.col.colorGreen = data.bits(0,4); - if(data.bit(7)) io.col.colorBlue = data.bits(0,4); + if(data.bit(5)) io.col.fixedColor.bits( 0, 4) = data.bits(0,4); + if(data.bit(6)) io.col.fixedColor.bits( 5, 9) = data.bits(0,4); + if(data.bit(7)) io.col.fixedColor.bits(10,14) = data.bits(0,4); return; } diff --git a/higan/sfc/ppu-fast/line.cpp b/higan/sfc/ppu-fast/line.cpp index 501e703d..028923cc 100644 --- a/higan/sfc/ppu-fast/line.cpp +++ b/higan/sfc/ppu-fast/line.cpp @@ -1,52 +1,83 @@ auto PPU::Line::render() -> void { - if(io.displayDisable) { - for(uint x : range(512)) { - outputLo[x] = 0; - outputHi[x] = 0; - } - } else { + bool hires = io.pseudoHires || io.bgMode == 5 || io.bgMode == 6; + + if(!io.displayDisable) { auto aboveColor = cgram[0]; - auto belowColor = 0; + auto belowColor = hires ? cgram[0] : io.col.fixedColor; for(uint x : range(256)) { - above[x].source = Source::COL; - above[x].priority = 0; - above[x].color = aboveColor; - below[x].source = Source::COL; - below[x].priority = 0; - below[x].color = belowColor; + above[x] = {Source::COL, 0, aboveColor}; + below[x] = {Source::COL, 0, belowColor}; } } - renderWindow(io.bg1.window); - renderWindow(io.bg2.window); - renderWindow(io.bg3.window); - renderWindow(io.bg4.window); - renderWindow(io.obj.window); - renderWindow(io.col.window); renderBackground(io.bg1, Source::BG1); renderBackground(io.bg2, Source::BG2); renderBackground(io.bg3, Source::BG3); renderBackground(io.bg4, Source::BG4); renderObject(io.obj); - for(uint x : range(512)) { - outputLo[x] = io.displayBrightness << 15 | above[x >> 1].color; - outputHi[x] = io.displayBrightness << 15 | above[x >> 1].color; + if(io.displayDisable) { + for(uint x : range(512)) { + outputLo[x] = 0; + outputHi[x] = 0; + } + return; } + + renderWindow(io.col.window, io.col.window.aboveMask, windowAbove); + renderWindow(io.col.window, io.col.window.belowMask, windowBelow); + + if(!hires) for(uint x : range(256)) { + outputLo[x << 1 | 0] = + outputHi[x << 1 | 0] = + outputLo[x << 1 | 1] = + outputHi[x << 1 | 1] = io.displayBrightness << 15 | pixel(x, above[x], below[x]); + } else for(uint x : range(256)) { + outputLo[x << 1 | 0] = + outputHi[x << 1 | 0] = io.displayBrightness << 15 | pixel(x, below[x], above[x]); + outputLo[x << 1 | 1] = + outputHi[x << 1 | 1] = io.displayBrightness << 15 | pixel(x, above[x], below[x]); + } +} + +auto PPU::Line::pixel(uint x, Pixel above, Pixel below) const -> uint15 { + if(!windowAbove[x]) above.color = 0x0000; + if(!windowBelow[x]) return above.color; + if(!io.col.enable[above.source]) return above.color; + if(!io.col.blendMode) return blend(above.color, io.col.fixedColor, io.col.halve && windowAbove[x]); + return blend(above.color, below.color, io.col.halve && windowAbove[x] && below.source != Source::COL); +} + +auto PPU::Line::blend(uint x, uint y, bool halve) const -> uint15 { + if(!io.col.mathMode) { //add + if(!halve) { + uint sum = x + y; + uint carry = (sum - ((x ^ y) & 0x0421)) & 0x8420; + return (sum - carry) | (carry - (carry >> 5)); + } else { + return (x + y - ((x ^ y) & 0x0421)) >> 1; + } + } else { //sub + uint diff = x - y + 0x8420; + uint borrow = (diff - ((x ^ y) & 0x8420)) & 0x8420; + if(!halve) { + return (diff - borrow) & (borrow - (borrow >> 5)); + } else { + return (((diff - borrow) & (borrow - (borrow >> 5))) & 0x7bde) >> 1; + } + } +} + +auto PPU::Line::directColor(uint palette, uint tile) const -> uint15 { + return (palette << 7 & 0x6000) + (tile >> 0 & 0x1000) + + (palette << 4 & 0x0380) + (tile >> 5 & 0x0040) + + (palette << 2 & 0x001c) + (tile >> 9 & 0x0002); } auto PPU::Line::plotAbove(uint x, uint source, uint priority, uint color) -> void { - if(priority > above[x].priority) { - above[x].source = source; - above[x].priority = priority; - above[x].color = color; - } + if(priority >= above[x].priority) above[x] = {source, priority, color}; } auto PPU::Line::plotBelow(uint x, uint source, uint priority, uint color) -> void { - if(priority > below[x].priority) { - below[x].source = source; - below[x].priority = priority; - below[x].color = color; - } + if(priority >= below[x].priority) below[x] = {source, priority, color}; } diff --git a/higan/sfc/ppu-fast/mode7.cpp b/higan/sfc/ppu-fast/mode7.cpp new file mode 100644 index 00000000..38bc5b3a --- /dev/null +++ b/higan/sfc/ppu-fast/mode7.cpp @@ -0,0 +1,62 @@ +auto PPU::Line::renderMode7(PPU::IO::Background& self, uint source) -> void { + int y = !io.mode7.vflip ? (int)this->y : 255 - y; + int a = (int16)io.mode7.a; + int b = (int16)io.mode7.b; + int c = (int16)io.mode7.c; + int d = (int16)io.mode7.d; + int hcenter = (int13)io.mode7.x; + int vcenter = (int13)io.mode7.y; + int hoffset = (int13)io.mode7.hoffset; + int voffset = (int13)io.mode7.voffset; + + uint mosaicCounter = 1; + uint mosaicPalette = 0; + uint mosaicPriority = 0; + uint mosaicColor = 0; + + auto clip = [](int n) -> int { return n & 0x2000 ? (n | ~1023) : (n & 1023); }; + int originX = (a * clip(hoffset - hcenter) & ~63) + (b * clip(voffset - vcenter) & ~63) + (b * y & ~63) + (hcenter << 8); + int originY = (c * clip(hoffset - hcenter) & ~63) + (d * clip(voffset - vcenter) & ~63) + (d * y & ~63) + (vcenter << 8); + + bool windowAbove[256]; + bool windowBelow[256]; + renderWindow(self.window, self.window.aboveEnable, windowAbove); + renderWindow(self.window, self.window.belowEnable, windowBelow); + + for(int x : range(256)) { + int pixelX = originX + a * x >> 8; + int pixelY = originY + c * x >> 8; + int tileX = pixelX >> 3 & 127; + int tileY = pixelY >> 3 & 127; + bool outOfBounds = (pixelX | pixelY) & ~1023; + uint15 tileAddress = tileY * 128 + tileX; + uint15 paletteAddress = ((pixelY & 7) << 3) + (pixelX & 7); + uint8 tile = ppu.vram[tileAddress].byte(0); + if(io.mode7.repeat == 3 && outOfBounds) tile = 0; + uint8 palette = ppu.vram[paletteAddress + (tile << 6)].byte(1); + if(io.mode7.repeat == 2 && outOfBounds) palette = 0; + + uint priority; + if(source == Source::BG1) { + priority = self.priority[0]; + } else if(source == Source::BG2) { + priority = self.priority[palette >> 7]; + palette &= 0x7f; + } + + if(--mosaicCounter == 0) { + mosaicCounter = 1 + io.mosaicSize; + mosaicPalette = palette; + mosaicPriority = priority; + if(io.col.directColor) { + mosaicColor = directColor(0, palette); + } else { + mosaicColor = ppu.cgram[palette]; + } + } + if(!mosaicPalette) continue; + + if(self.aboveEnable && !windowAbove[x]) plotAbove(x, source, mosaicPriority, mosaicColor); + if(self.belowEnable && !windowBelow[x]) plotBelow(x, source, mosaicPriority, mosaicColor); + } +} diff --git a/higan/sfc/ppu-fast/object.cpp b/higan/sfc/ppu-fast/object.cpp index 8ec9b702..1061292d 100644 --- a/higan/sfc/ppu-fast/object.cpp +++ b/higan/sfc/ppu-fast/object.cpp @@ -1,37 +1,156 @@ -auto PPU::Line::renderObject(PPU::IO::Object&) -> void { +auto PPU::Line::renderObject(PPU::IO::Object& self) -> void { + if(io.displayDisable) return; + if(!self.aboveEnable && !self.belowEnable) return; + + bool windowAbove[256]; + bool windowBelow[256]; + renderWindow(self.window, self.window.aboveEnable, windowAbove); + renderWindow(self.window, self.window.belowEnable, windowBelow); + + uint itemCount = 0; + uint tileCount = 0; + for(auto n : range(32)) items[n].valid = false; + for(auto n : range(34)) tiles[n].valid = false; + + for(auto n : range(128)) { + ObjectItem item{true, self.first + n}; + const auto& object = ppu.objects[item.index]; + + if(object.size == 0) { + static const uint widths[] = { 8, 8, 8, 16, 16, 32, 16, 16}; + static const uint heights[] = { 8, 8, 8, 16, 16, 32, 32, 32}; + item.width = widths [self.baseSize]; + item.height = heights[self.baseSize]; + if(self.interlace && self.baseSize >= 6) item.height = 16; //hardware quirk + } else { + static const uint widths[] = {16, 32, 64, 32, 64, 64, 32, 32}; + static const uint heights[] = {16, 32, 64, 32, 64, 64, 64, 32}; + item.width = widths [self.baseSize]; + item.height = heights[self.baseSize]; + } + + if(object.x > 256 && object.x + item.width - 1 < 512) continue; + uint height = item.height >> self.interlace; + if((y >= object.y && y < object.y + height) + || (object.y + height >= 256 && y < (object.y + height & 255)) + ) { + if(itemCount++ >= 32) break; + items[itemCount - 1] = item; + } + } + + for(int n = 31; n >= 0; n--) { + const auto& item = items[n]; + if(!item.valid) continue; + + const auto& object = ppu.objects[item.index]; + uint tileWidth = item.width >> 3; + int x = object.x; + int y = this->y - object.y & 0xff; + if(self.interlace) y <<= 1; + + if(object.vflip) { + if(item.width == item.height) { + y = item.height - 1 - y; + } else if(y < item.width) { + y = item.width - 1 - y; + } else { + y = item.width + (item.width - 1) - (y - item.width); + } + } + + if(self.interlace) { + y = !object.vflip ? y + ppu.field() : y - ppu.field(); + } + + x &= 511; + y &= 255; + + uint16 tiledataAddress = self.tiledataAddress; + if(object.nameselect) tiledataAddress += 1 + self.nameselect << 12; + uint16 characterX = object.character.bits(0,3); + uint16 characterY = (object.character.bits(4,7) + (y >> 3) & 15) << 4; + + for(uint tileX : range(tileWidth)) { + uint objectX = x + (tileX << 3) & 511; + if(x != 256 && objectX >= 256 && objectX + 7 < 512) continue; + + ObjectTile tile{true}; + tile.x = objectX; + tile.y = y; + tile.priority = object.priority; + tile.palette = 128 + (object.palette << 4); + tile.hflip = object.hflip; + + uint mirrorX = !object.hflip ? tileX : tileWidth - 1 - tileX; + uint address = tiledataAddress + ((characterY + (characterX + mirrorX & 15)) << 4); + tile.number = address >> 4; + + if(tileCount++ >= 34) break; + tiles[tileCount - 1] = tile; + } + } + + self.rangeOver |= itemCount > 32; + self.timeOver |= tileCount > 34; + + for(uint n : range(34)) { + const auto& tile = tiles[n]; + if(!tile.valid) continue; + + auto tiledata = ppu.tilecache[TileMode::BPP4] + (tile.number << 6) + ((tile.y & 7) << 3); + uint tileX = tile.x; + uint mirrorX = tile.hflip ? 7 : 0; + for(uint x : range(8)) { + tileX &= 511; + if(tileX < 256) { + if(uint color = tiledata[x ^ mirrorX]) { + uint source = tile.palette < 192 ? Source::OBJ1 : Source::OBJ2; + uint priority = self.priority[tile.priority]; + color = ppu.cgram[tile.palette + color]; + if(self.aboveEnable && !windowAbove[x]) plotAbove(tileX, source, priority, color); + if(self.belowEnable && !windowBelow[x]) plotBelow(tileX, source, priority, color); + } + } + tileX++; + } + } } auto PPU::oamAddressReset() -> void { + io.oamAddress = io.oamBaseAddress; + oamSetFirstObject(); } auto PPU::oamSetFirstObject() -> void { + io.obj.first = !io.oamPriority ? 0 : io.oamAddress >> 2; } auto PPU::readObject(uint10 address) -> uint8 { if(!address.bit(9)) { uint n = address >> 2; //object# address &= 3; - if(address == 0) return object[n].x.bits(0,7); - if(address == 1) return object[n].y; - if(address == 2) return object[n].character; + if(address == 0) return objects[n].x.bits(0,7); + if(address == 1) return objects[n].y - 1; + if(address == 2) return objects[n].character; return ( - object[n].nameselect << 0 - | object[n].palette << 1 - | object[n].priority << 4 - | object[n].hflip << 6 - | object[n].vflip << 7 + objects[n].nameselect << 0 + | objects[n].palette << 1 + | objects[n].priority << 4 + | objects[n].hflip << 6 + | objects[n].vflip << 7 ); } else { uint n = (address & 0x1f) << 2; //object# return ( - object[n + 0].x.bit(8) << 0 - | object[n + 0].size << 1 - | object[n + 1].x.bit(8) << 2 - | object[n + 1].size << 3 - | object[n + 2].x.bit(8) << 4 - | object[n + 2].size << 5 - | object[n + 3].x.bit(8) << 6 - | object[n + 3].size << 7 + objects[n + 0].x.bit(8) << 0 + | objects[n + 0].size << 1 + | objects[n + 1].x.bit(8) << 2 + | objects[n + 1].size << 3 + | objects[n + 2].x.bit(8) << 4 + | objects[n + 2].size << 5 + | objects[n + 3].x.bit(8) << 6 + | objects[n + 3].size << 7 ); } } @@ -39,23 +158,24 @@ auto PPU::readObject(uint10 address) -> uint8 { auto PPU::writeObject(uint10 address, uint8 data) -> void { if(!address.bit(9)) { uint n = address >> 2; //object# - if(address == 0) { object[n].x.bits(0,7) = data; return; } - if(address == 1) { object[n].y = data; return; } - if(address == 2) { object[n].character = data; return; } - object[n].nameselect = data.bit (0); - object[n].palette = data.bits(1,3); - object[n].priority = data.bits(4,5); - object[n].hflip = data.bit (6); - object[n].vflip = data.bit (7); + address &= 3; + if(address == 0) { objects[n].x.bits(0,7) = data; return; } + if(address == 1) { objects[n].y = data + 1; return; } //+1 => rendering happens one scanline late + if(address == 2) { objects[n].character = data; return; } + objects[n].nameselect = data.bit (0); + objects[n].palette = data.bits(1,3); + objects[n].priority = data.bits(4,5); + objects[n].hflip = data.bit (6); + objects[n].vflip = data.bit (7); } else { uint n = (address & 0x1f) << 2; //object# - object[n + 0].x.bit(8) = data.bit(0); - object[n + 0].size = data.bit(1); - object[n + 1].x.bit(8) = data.bit(2); - object[n + 1].size = data.bit(3); - object[n + 2].x.bit(8) = data.bit(4); - object[n + 2].size = data.bit(5); - object[n + 3].x.bit(8) = data.bit(6); - object[n + 3].size = data.bit(7); + objects[n + 0].x.bit(8) = data.bit(0); + objects[n + 0].size = data.bit(1); + objects[n + 1].x.bit(8) = data.bit(2); + objects[n + 1].size = data.bit(3); + objects[n + 2].x.bit(8) = data.bit(4); + objects[n + 2].size = data.bit(5); + objects[n + 3].x.bit(8) = data.bit(6); + objects[n + 3].size = data.bit(7); } } diff --git a/higan/sfc/ppu-fast/ppu.cpp b/higan/sfc/ppu-fast/ppu.cpp index 4dad4220..0f9b66fb 100644 --- a/higan/sfc/ppu-fast/ppu.cpp +++ b/higan/sfc/ppu-fast/ppu.cpp @@ -6,6 +6,7 @@ PPU ppu; #include "io.cpp" #include "line.cpp" #include "background.cpp" +#include "mode7.cpp" #include "object.cpp" #include "window.cpp" #include "serialization.cpp" @@ -18,9 +19,9 @@ PPU::PPU() { output = new uint32[512 * 512]; output += 16 * 512; //overscan offset - tilecache[0] = new uint8[4096 * 8 * 8]; - tilecache[1] = new uint8[2048 * 8 * 8]; - tilecache[2] = new uint8[1024 * 8 * 8]; + tilecache[TileMode::BPP2] = new uint8[4096 * 8 * 8]; + tilecache[TileMode::BPP4] = new uint8[2048 * 8 * 8]; + tilecache[TileMode::BPP8] = new uint8[1024 * 8 * 8]; for(uint y : range(240)) { lines[y].y = y; @@ -33,9 +34,9 @@ PPU::~PPU() { output -= 16 * 512; //overscan offset delete[] output; - delete[] tilecache[0]; - delete[] tilecache[1]; - delete[] tilecache[2]; + delete[] tilecache[TileMode::BPP2]; + delete[] tilecache[TileMode::BPP4]; + delete[] tilecache[TileMode::BPP8]; } auto PPU::Enter() -> void { @@ -50,24 +51,26 @@ auto PPU::step(uint clocks) -> void { auto PPU::main() -> void { scanline(); - uint y = PPUcounter::vcounter(); - + uint y = vcounter(); step(512); if(y >= 1 && y <= vdisp()) { memory::copy(&lines[y].cgram, &cgram, sizeof(cgram)); memory::copy(&lines[y].io, &io, sizeof(io)); + //lines[y].render(); } - - step(624); - step(PPUcounter::lineclocks() - PPUcounter::hcounter()); + step(lineclocks() - hcounter()); } auto PPU::scanline() -> void { - if(PPUcounter::vcounter() == 0) { + if(vcounter() == 0) { frame(); } - if(PPUcounter::vcounter() == 240) { + if(vcounter() == vdisp() && !io.displayDisable) { + oamAddressReset(); + } + + if(vcounter() == 240) { const uint limit = vdisp(); #pragma omp parallel for for(uint y = 1; y < limit; y++) { @@ -78,6 +81,8 @@ auto PPU::scanline() -> void { } auto PPU::frame() -> void { + io.obj.timeOver = false; + io.obj.rangeOver = false; } auto PPU::refresh() -> void { diff --git a/higan/sfc/ppu-fast/ppu.hpp b/higan/sfc/ppu-fast/ppu.hpp index afc91f17..bfe1c99c 100644 --- a/higan/sfc/ppu-fast/ppu.hpp +++ b/higan/sfc/ppu-fast/ppu.hpp @@ -5,13 +5,6 @@ //* mid-frame OAM changes not supported struct PPU : Thread, PPUcounter { - //as a scanline-based renderer, PPU::PPUcounter values are not cycle-accurate - alwaysinline auto field() const -> bool { return cpu.field(); } - alwaysinline auto vcounter() const -> uint16 { return cpu.vcounter(); } - alwaysinline auto hcounter() const -> uint16 { return cpu.hcounter(); } - alwaysinline auto hdot() const -> uint16 { return cpu.hdot(); } - alwaysinline auto lineclocks() const -> uint16 { return cpu.lineclocks(); } - alwaysinline auto interlace() const -> bool { return false; } alwaysinline auto overscan() const -> bool { return false; } alwaysinline auto vdisp() const -> uint { return !io.overscan ? 225 : 240; } @@ -125,6 +118,9 @@ public: } window; struct Mode7 { + uint1 hflip; + uint1 vflip; + uint2 repeat; uint16 a; uint16 b; uint16 c; @@ -139,7 +135,6 @@ public: WindowLayer window; uint1 aboveEnable; uint1 belowEnable; - uint1 colorEnable; uint1 mosaicEnable; uint15 tiledataAddress; uint15 screenAddress; @@ -155,12 +150,11 @@ public: WindowLayer window; uint1 aboveEnable; uint1 belowEnable; - uint1 colorEnable; uint1 interlace; uint3 baseSize; uint2 nameselect; uint15 tiledataAddress; - uint7 firstObject; + uint7 first; uint1 rangeOver; uint1 timeOver; uint4 priority[4]; @@ -168,14 +162,12 @@ public: struct Color { WindowColor window; - uint1 colorEnable; - uint1 directColor; - uint1 blendMode; - uint1 colorHalve; - uint1 colorMode; - uint5 colorRed; - uint5 colorGreen; - uint5 colorBlue; + uint1 enable[7]; + uint1 directColor; + uint1 blendMode; //0 = fixed; 1 = pixel + uint1 halve; + uint1 mathMode; //0 = add; 1 = sub + uint15 fixedColor; } col; } io; @@ -195,23 +187,32 @@ public: uint2 priority; uint3 palette; uint1 size; - } object[128]; + } objects[128]; struct Line { + struct Pixel; + //line.cpp auto render() -> void; + auto pixel(uint x, Pixel above, Pixel below) const -> uint15; + auto blend(uint x, uint y, bool halve) const -> uint15; + alwaysinline auto directColor(uint palette, uint tile) const -> uint15; alwaysinline auto plotAbove(uint x, uint source, uint priority, uint color) -> void; alwaysinline auto plotBelow(uint x, uint source, uint priority, uint color) -> void; //background.cpp auto renderBackground(PPU::IO::Background&, uint source) -> void; + auto getTile(PPU::IO::Background&, uint hoffset, uint voffset) -> uint; + + //mode7.cpp + auto renderMode7(PPU::IO::Background&, uint source) -> void; //object.cpp auto renderObject(PPU::IO::Object&) -> void; //window.cpp - auto renderWindow(PPU::IO::WindowLayer&) -> void; - auto renderWindow(PPU::IO::WindowColor&) -> void; + auto renderWindow(PPU::IO::WindowLayer&, bool, bool*) -> void; + auto renderWindow(PPU::IO::WindowColor&, uint, bool*) -> void; uint9 y; uint32* outputLo = nullptr; @@ -220,11 +221,31 @@ public: uint15 cgram[256]; IO io; - struct Screen { + struct ObjectItem { + uint1 valid; + uint7 index; + uint8 width; + uint8 height; + } items[32]; + + struct ObjectTile { + uint1 valid; + uint9 x; + uint8 y; + uint2 priority; + uint8 palette; + uint1 hflip; + uint11 number; + } tiles[34]; + + struct Pixel { uint source; uint priority; uint color; } above[256], below[256]; + + bool windowAbove[256]; + bool windowBelow[256]; } lines[240]; }; diff --git a/higan/sfc/ppu-fast/window.cpp b/higan/sfc/ppu-fast/window.cpp index a7cd7227..8b0ad4c2 100644 --- a/higan/sfc/ppu-fast/window.cpp +++ b/higan/sfc/ppu-fast/window.cpp @@ -1,5 +1,75 @@ -auto PPU::Line::renderWindow(PPU::IO::WindowLayer&) -> void { +auto PPU::Line::renderWindow(PPU::IO::WindowLayer& self, bool enable, bool* output) -> void { + if(!enable || (!self.oneEnable && !self.twoEnable)) { + memory::fill(output, 256, 0); + return; + } + + if(self.oneEnable && !self.twoEnable) { + bool set = 1 ^ self.oneInvert, clear = !set; + for(uint x : range(256)) { + output[x] = x >= io.window.oneLeft && x <= io.window.oneRight ? set : clear; + } + return; + } + + if(self.twoEnable && !self.oneEnable) { + bool set = 1 ^ self.twoInvert, clear = !set; + for(uint x : range(256)) { + output[x] = x >= io.window.twoLeft && x <= io.window.twoRight ? set : clear; + } + return; + } + + for(uint x : range(256)) { + bool oneMask = (x >= io.window.oneLeft && x <= io.window.oneRight) ^ self.oneInvert; + bool twoMask = (x >= io.window.twoLeft && x <= io.window.twoRight) ^ self.twoInvert; + switch(self.mask) { + case 0: output[x] = (oneMask | twoMask) == 1; break; + case 1: output[x] = (oneMask & twoMask) == 1; break; + case 2: output[x] = (oneMask ^ twoMask) == 1; break; + case 3: output[x] = (oneMask ^ twoMask) == 0; break; + } + } } -auto PPU::Line::renderWindow(PPU::IO::WindowColor&) -> void { +auto PPU::Line::renderWindow(PPU::IO::WindowColor& self, uint mask, bool* output) -> void { + bool set, clear; + switch(mask) { + case 0: memory::fill(output, 256, 1); return; //always + case 1: set = 1, clear = 0; break; //inside + case 2: set = 0, clear = 1; break; //outside + case 3: memory::fill(output, 256, 0); return; //never + } + + if(!self.oneEnable && !self.twoEnable) { + memory::fill(output, 256, clear); + return; + } + + if(self.oneEnable && !self.twoEnable) { + if(self.oneInvert) set ^= 1, clear ^= 1; + for(uint x : range(256)) { + output[x] = x >= io.window.oneLeft && x <= io.window.oneRight ? set : clear; + } + return; + } + + if(self.twoEnable && !self.oneEnable) { + if(self.twoInvert) set ^= 1, clear ^= 1; + for(uint x : range(256)) { + output[x] = x >= io.window.twoLeft && x <= io.window.twoRight ? set : clear; + } + return; + } + + for(uint x : range(256)) { + bool oneMask = (x >= io.window.oneLeft && x <= io.window.oneRight) ^ self.oneInvert; + bool twoMask = (x >= io.window.twoLeft && x <= io.window.twoRight) ^ self.twoInvert; + switch(self.mask) { + case 0: output[x] = (oneMask | twoMask) == 1 ? set : clear; break; + case 1: output[x] = (oneMask & twoMask) == 1 ? set : clear; break; + case 2: output[x] = (oneMask ^ twoMask) == 1 ? set : clear; break; + case 3: output[x] = (oneMask ^ twoMask) == 0 ? set : clear; break; + } + } } diff --git a/higan/sfc/ppu/object.cpp b/higan/sfc/ppu/object.cpp index c63a6864..a23a96aa 100644 --- a/higan/sfc/ppu/object.cpp +++ b/higan/sfc/ppu/object.cpp @@ -28,8 +28,8 @@ auto PPU::Object::scanline() -> void { if(t.y == ppu.vdisp() && !ppu.io.displayDisable) addressReset(); if(t.y >= ppu.vdisp() - 1) return; - for(auto n : range(32)) oamItem[n].valid = false; //default to invalid - for(auto n : range(34)) oamTile[n].valid = false; //default to invalid + for(auto n : range(32)) oamItem[n].valid = false; + for(auto n : range(34)) oamTile[n].valid = false; for(auto n : range(128)) { uint7 sprite = io.firstSprite + n; @@ -44,10 +44,10 @@ auto PPU::Object::scanline() -> void { } auto PPU::Object::onScanline(PPU::OAM::Object& sprite) -> bool { - if(sprite.x > 256 && (sprite.x + sprite.width() - 1) < 512) return false; - int height = sprite.height() >> io.interlace; - if(t.y >= sprite.y && t.y < (sprite.y + height)) return true; - if((sprite.y + height) >= 256 && t.y < ((sprite.y + height) & 255)) return true; + if(sprite.x > 256 && sprite.x + sprite.width() - 1 < 512) return false; + uint height = sprite.height() >> io.interlace; + if(t.y >= sprite.y && t.y < sprite.y + height) return true; + if(sprite.y + height >= 256 && t.y < (sprite.y + height & 255)) return true; return false; } @@ -95,16 +95,16 @@ auto PPU::Object::tilefetch() -> void { uint tileWidth = sprite.width() >> 3; int x = sprite.x; - int y = (t.y - sprite.y) & 0xff; + int y = t.y - sprite.y & 0xff; if(io.interlace) y <<= 1; if(sprite.vflip) { if(sprite.width() == sprite.height()) { - y = (sprite.height() - 1) - y; + y = sprite.height() - 1 - y; } else if(y < sprite.width()) { - y = (sprite.width() - 1) - y; + y = sprite.width() - 1 - y; } else { - y = sprite.width() + ((sprite.width() - 1) - (y - sprite.width())); + y = sprite.width() + (sprite.width() - 1) - (y - sprite.width()); } } @@ -116,18 +116,13 @@ auto PPU::Object::tilefetch() -> void { y &= 255; uint16 tiledataAddress = io.tiledataAddress; - uint16 chrx = (sprite.character >> 0) & 15; - uint16 chry = (sprite.character >> 4) & 15; - if(sprite.nameselect) { - tiledataAddress += (256 * 16) + (io.nameselect << 12); - } - chry += (y >> 3); - chry &= 15; - chry <<= 4; + if(sprite.nameselect) tiledataAddress += 1 + io.nameselect << 12; + uint16 chrx = sprite.character.bits(0,3); + uint16 chry = (sprite.character.bits(4,7) + (y >> 3) & 15) << 4; for(uint tx : range(tileWidth)) { - uint sx = (x + (tx << 3)) & 511; - if(x != 256 && sx >= 256 && (sx + 7) < 512) continue; + uint sx = x + (tx << 3) & 511; + if(x != 256 && sx >= 256 && sx + 7 < 512) continue; if(t.tileCount++ >= 34) break; uint n = t.tileCount - 1; @@ -137,8 +132,8 @@ auto PPU::Object::tilefetch() -> void { oamTile[n].palette = 128 + (sprite.palette << 4); oamTile[n].hflip = sprite.hflip; - uint mx = !sprite.hflip ? tx : (tileWidth - 1) - tx; - uint pos = tiledataAddress + ((chry + ((chrx + mx) & 15)) << 4); + uint mx = !sprite.hflip ? tx : tileWidth - 1 - tx; + uint pos = tiledataAddress + ((chry + (chrx + mx & 15)) << 4); uint16 addr = (pos & 0xfff0) + (y & 7); oamTile[n].data.bits( 0,15) = ppu.vram[addr + 0]; diff --git a/higan/sfc/ppu/screen.cpp b/higan/sfc/ppu/screen.cpp index 193b0c3b..5cfe37f9 100644 --- a/higan/sfc/ppu/screen.cpp +++ b/higan/sfc/ppu/screen.cpp @@ -123,7 +123,7 @@ auto PPU::Screen::above() -> uint16 { } auto PPU::Screen::blend(uint x, uint y) const -> uint15 { - if(!io.colorMode) { + if(!io.colorMode) { //add if(!math.colorHalve) { uint sum = x + y; uint carry = (sum - ((x ^ y) & 0x0421)) & 0x8420; @@ -131,7 +131,7 @@ auto PPU::Screen::blend(uint x, uint y) const -> uint15 { } else { return (x + y - ((x ^ y) & 0x0421)) >> 1; } - } else { + } else { //sub uint diff = x - y + 0x8420; uint borrow = (diff - ((x ^ y) & 0x8420)) & 0x8420; if(!math.colorHalve) { @@ -151,9 +151,9 @@ auto PPU::Screen::directColor(uint palette, uint tile) const -> uint15 { //palette = -------- BBGGGRRR //tile = ---bgr-- -------- //output = 0BBb00GG Gg0RRRr0 - return ((palette << 7) & 0x6000) + ((tile >> 0) & 0x1000) - + ((palette << 4) & 0x0380) + ((tile >> 5) & 0x0040) - + ((palette << 2) & 0x001c) + ((tile >> 9) & 0x0002); + return (palette << 7 & 0x6000) + (tile >> 0 & 0x1000) + + (palette << 4 & 0x0380) + (tile >> 5 & 0x0040) + + (palette << 2 & 0x001c) + (tile >> 9 & 0x0002); } auto PPU::Screen::fixedColor() const -> uint15 {