From 6c8e3c885d499b4479c2cf77bbec7908f1d6098f Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Sat, 26 May 2018 13:29:14 +1000 Subject: [PATCH] Update to v106r28 release. byuu says: Changelog: - SNES: started on skeleton of the new parallel PPU core To build the new PPU core, set profile=fast via GNU make. The old core is profile=accurate. The names of the profiles, and the name of the folder for the fast PPU are subject to change. The new PPU core doesn't do anything but demonstrate the proof of concept: every scanline, make a copy of all the PPU registers and CGRAM. Share the VRAM and OAM. Batch render all scanlines at once using OpenMP at the end of each frame and blit the result. With no PPU core at all, bsnes runs 91% faster than with the accuracy PPU (230fps vs 120fps.) That's the absolute theoretical best-case scenario. With the skeleton in place, we're already around 220fps. It'll go down more as the PPU line renderer starts to do real work. I don't know where things will end up yet. I suppose we'll find out in time. My own copy of TDM/GCC can't use OpenMP on Windows, so ... it won't parallelize if you build with that. I'm going to have to switch to a different MinGW distribution once this is complete, I suppose. --- higan/GNUmakefile | 2 + higan/emulator/emulator.hpp | 2 +- higan/sfc/GNUmakefile | 3 +- higan/sfc/ppu-fast/io.cpp | 76 +++++++++++++++++ higan/sfc/ppu-fast/line.cpp | 6 ++ higan/sfc/ppu-fast/object.cpp | 52 ++++++++++++ higan/sfc/ppu-fast/ppu.cpp | 92 +++++++++++++++++++++ higan/sfc/ppu-fast/ppu.hpp | 104 ++++++++++++++++++++++++ higan/sfc/ppu-fast/serialization.cpp | 4 + higan/sfc/ppu/counter/serialization.cpp | 11 +++ higan/sfc/ppu/object/oam.cpp | 32 ++++---- higan/sfc/ppu/object/object.hpp | 4 +- higan/sfc/ppu/ppu.cpp | 1 + higan/sfc/ppu/serialization.cpp | 12 --- higan/sfc/sfc.hpp | 4 + 15 files changed, 373 insertions(+), 32 deletions(-) create mode 100644 higan/sfc/ppu-fast/io.cpp create mode 100644 higan/sfc/ppu-fast/line.cpp create mode 100644 higan/sfc/ppu-fast/object.cpp create mode 100644 higan/sfc/ppu-fast/ppu.cpp create mode 100644 higan/sfc/ppu-fast/ppu.hpp create mode 100644 higan/sfc/ppu-fast/serialization.cpp create mode 100644 higan/sfc/ppu/counter/serialization.cpp diff --git a/higan/GNUmakefile b/higan/GNUmakefile index 47fd99b1..8f8e4e2c 100644 --- a/higan/GNUmakefile +++ b/higan/GNUmakefile @@ -3,9 +3,11 @@ include ../nall/GNUmakefile binary := application target := higan +profile := accurate objects := libco emulator audio video resource flags += -I. -I.. +flags += $(if $(call streq,$(profile),accurate),-DPROFILE_ACCURATE,-DPROFILE_FAST) ifeq ($(platform),windows) ifeq ($(binary),application) diff --git a/higan/emulator/emulator.hpp b/higan/emulator/emulator.hpp index 43a98c1d..72946255 100644 --- a/higan/emulator/emulator.hpp +++ b/higan/emulator/emulator.hpp @@ -12,7 +12,7 @@ using namespace nall; namespace Emulator { static const string Name = "higan"; - static const string Version = "106.27"; + static const string Version = "106.28"; static const string Author = "byuu"; static const string License = "GPLv3"; static const string Website = "https://byuu.org/"; diff --git a/higan/sfc/GNUmakefile b/higan/sfc/GNUmakefile index d197c579..e36a314a 100644 --- a/higan/sfc/GNUmakefile +++ b/higan/sfc/GNUmakefile @@ -2,7 +2,7 @@ processors += wdc65816 spc700 arm7tdmi gsu hg51b upd96050 objects += sfc-interface sfc-system sfc-controller objects += sfc-cartridge sfc-memory -objects += sfc-cpu sfc-smp sfc-dsp sfc-ppu +objects += sfc-cpu sfc-smp sfc-dsp $(if $(call streq,$(profile),accurate),sfc-ppu,sfc-ppu-fast) objects += sfc-expansion sfc-satellaview sfc-21fx objects += sfc-icd sfc-mcc sfc-dip sfc-event objects += sfc-sa1 sfc-superfx @@ -22,6 +22,7 @@ obj/sfc-cpu.o: sfc/cpu/cpu.cpp obj/sfc-smp.o: sfc/smp/smp.cpp obj/sfc-dsp.o: sfc/dsp/dsp.cpp obj/sfc-ppu.o: sfc/ppu/ppu.cpp +obj/sfc-ppu-fast.o: sfc/ppu-fast/ppu.cpp obj/sfc-expansion.o: sfc/expansion/expansion.cpp obj/sfc-satellaview.o: sfc/expansion/satellaview/satellaview.cpp diff --git a/higan/sfc/ppu-fast/io.cpp b/higan/sfc/ppu-fast/io.cpp new file mode 100644 index 00000000..b17ac029 --- /dev/null +++ b/higan/sfc/ppu-fast/io.cpp @@ -0,0 +1,76 @@ +auto PPU::readIO(uint24 address, uint8 data) -> uint8 { + cpu.synchronize(ppu); + + switch((uint16)address) { + + case 0x2104: case 0x2105: case 0x2106: case 0x2108: + case 0x2109: case 0x210a: case 0x2114: case 0x2115: + case 0x2116: case 0x2118: case 0x2119: case 0x211a: + case 0x2124: case 0x2125: case 0x2126: case 0x2128: + case 0x2129: case 0x212a: { + return ppu1.mdr; + } + + case 0x2134: { //MPYL + uint24 result = (int16)io.m7a * (int8)(io.m7b >> 8); + return ppu1.mdr = result.byte(0); + } + + case 0x2135: { //MPYM + uint24 result = (int16)io.m7a * (int8)(io.m7b >> 8); + return ppu1.mdr = result.byte(1); + } + + case 0x2136: { //MPYH + uint24 result = (int16)io.m7a * (int8)(io.m7b >> 8); + return ppu1.mdr = result.byte(2); + } + + } + + return data; +} + +auto PPU::writeIO(uint24 address, uint8 data) -> void { + cpu.synchronize(ppu); + + switch((uint16)address) { + + case 0x211b: { //M7A + io.m7a = data << 8 | latch.mode7; + latch.mode7 = data; + return; + } + + case 0x211c: { //M7B + io.m7b = data << 8 | latch.mode7; + latch.mode7 = data; + return; + } + + case 0x211d: { //M7C + io.m7c = data << 8 | latch.mode7; + latch.mode7 = data; + return; + } + + case 0x211e: { //M7D + io.m7d = data << 8 | latch.mode7; + latch.mode7 = data; + return; + } + + case 0x211f: { //M7X + io.m7x = data << 8 | latch.mode7; + latch.mode7 = data; + return; + } + + case 0x2120: { //M7Y + io.m7y = data << 8 | latch.mode7; + latch.mode7 = data; + return; + } + + } +} diff --git a/higan/sfc/ppu-fast/line.cpp b/higan/sfc/ppu-fast/line.cpp new file mode 100644 index 00000000..c2ec460d --- /dev/null +++ b/higan/sfc/ppu-fast/line.cpp @@ -0,0 +1,6 @@ +auto PPU::Line::render() -> void { + for(uint x : range(512)) { + outputLo[x] = 0x7ffff; + outputHi[x] = 0x7ffff; + } +} diff --git a/higan/sfc/ppu-fast/object.cpp b/higan/sfc/ppu-fast/object.cpp new file mode 100644 index 00000000..8f6bf2bd --- /dev/null +++ b/higan/sfc/ppu-fast/object.cpp @@ -0,0 +1,52 @@ +auto PPU::readOAM(uint10 address) -> uint8 { + if(!address.bit(9)) { + uint n = address >> 2; //object# + address &= 3; + if(address == 0) return object[n].x.bits(0,7); + if(address == 1) return object[n].y; + if(address == 2) return object[n].character; + return ( + object[n].nameselect << 0 + | object[n].palette << 1 + | object[n].priority << 4 + | object[n].hflip << 6 + | object[n].vflip << 7 + ); + } else { + uint n = (address & 0x1f) << 2; //object# + return ( + object[n + 0].x.bit(8) << 0 + | object[n + 0].size << 1 + | object[n + 1].x.bit(8) << 2 + | object[n + 1].size << 3 + | object[n + 2].x.bit(8) << 4 + | object[n + 2].size << 5 + | object[n + 3].x.bit(8) << 6 + | object[n + 3].size << 7 + ); + } +} + +auto PPU::writeOAM(uint10 address, uint8 data) -> void { + if(!address.bit(9)) { + uint n = address >> 2; //object# + if(address == 0) { object[n].x.bits(0,7) = data; return; } + if(address == 1) { object[n].y = data; return; } + if(address == 2) { object[n].character = data; return; } + object[n].nameselect = data.bit (0); + object[n].palette = data.bits(1,3); + object[n].priority = data.bits(4,5); + object[n].hflip = data.bit (6); + object[n].vflip = data.bit (7); + } else { + uint n = (address & 0x1f) << 2; //object# + object[n + 0].x.bit(8) = data.bit(0); + object[n + 0].size = data.bit(1); + object[n + 1].x.bit(8) = data.bit(2); + object[n + 1].size = data.bit(3); + object[n + 2].x.bit(8) = data.bit(4); + object[n + 2].size = data.bit(5); + object[n + 3].x.bit(8) = data.bit(6); + object[n + 3].size = data.bit(7); + } +} diff --git a/higan/sfc/ppu-fast/ppu.cpp b/higan/sfc/ppu-fast/ppu.cpp new file mode 100644 index 00000000..25eabab4 --- /dev/null +++ b/higan/sfc/ppu-fast/ppu.cpp @@ -0,0 +1,92 @@ +#include + +namespace SuperFamicom { + +PPU ppu; +#include "io.cpp" +#include "object.cpp" +#include "line.cpp" +#include "serialization.cpp" +#include + +PPU::PPU() { + output = new uint32[512 * 512]; + output += 16 * 512; //overscan offset + for(uint y : range(240)) { + lines[y].y = y; + lines[y].outputLo = output + (y * 2 + 0) * 512; + lines[y].outputHi = output + (y * 2 + 1) * 512; + } +} + +PPU::~PPU() { + output -= 16 * 512; //overscan offset + delete[] output; +} + +auto PPU::Enter() -> void { + while(true) scheduler.synchronize(), ppu.main(); +} + +auto PPU::step(uint clocks) -> void { + tick(clocks); + Thread::step(clocks); + synchronize(cpu); +} + +auto PPU::main() -> void { + scanline(); + uint y = vcounter(); + + step(512); + if(y >= 1 && y <= vdisp()) { + memory::copy(&lines[y].cgram, &cgram, sizeof(cgram)); + memory::copy(&lines[y].io, &io, sizeof(io)); + } + + step(624); + + step(lineclocks() - 512 - 624); +} + +auto PPU::scanline() -> void { + if(vcounter() == 0) { + frame(); + } + + if(vcounter() == 241) { + #pragma omp parallel for + for(uint y = 1; y < vdisp(); y++) { + lines[y].render(); + } + scheduler.exit(Scheduler::Event::Frame); + } +} + +auto PPU::frame() -> void { +} + +auto PPU::refresh() -> void { + auto output = this->output; + if(!overscan()) output -= 14 * 512; + auto pitch = 512; + auto width = 512; + auto height = 480; + Emulator::video.refresh(output, pitch * sizeof(uint32), width, height); +} + +auto PPU::load(Markup::Node node) -> bool { + return true; +} + +auto PPU::power(bool reset) -> void { + create(Enter, system.cpuFrequency()); + PPUcounter::reset(); + memory::fill(output, 512 * 480 * sizeof(uint32)); + + function uint8> reader{&PPU::readIO, this}; + function void> writer{&PPU::writeIO, this}; + bus.map(reader, writer, "00-3f,80-bf:2100-213f"); +} + +} diff --git a/higan/sfc/ppu-fast/ppu.hpp b/higan/sfc/ppu-fast/ppu.hpp new file mode 100644 index 00000000..f616d3f0 --- /dev/null +++ b/higan/sfc/ppu-fast/ppu.hpp @@ -0,0 +1,104 @@ +struct PPU : Thread, PPUcounter { + alwaysinline auto interlace() const -> bool { return false; } + alwaysinline auto overscan() const -> bool { return false; } + alwaysinline auto vdisp() const -> uint { return 225; } + + //ppu.cpp + PPU(); + ~PPU(); + + static auto Enter() -> void; + alwaysinline auto step(uint clocks) -> void; + auto main() -> void; + auto scanline() -> void; + auto frame() -> void; + auto refresh() -> void; + auto load(Markup::Node) -> bool; + auto power(bool reset) -> void; + + auto latchCounters() -> void {} + + //serialization.cpp + auto serialize(serializer&) -> void; + +public: + uint32* output = nullptr; + uint16 vram[32 * 1024]; + uint16 cgram[256]; + + struct { + uint4 version; + uint8 mdr; + } ppu1, ppu2; + + struct Latch { + uint8 mode7; + } latch; + + enum : uint { + INIDISP = 0x00, OBSEL = 0x01, OAMADDL = 0x02, OAMADDH = 0x03, + OAMDATA = 0x04, BGMODE = 0x05, MOSAIC = 0x06, BG1SC = 0x07, + BG2SC = 0x08, BG3SC = 0x09, BG4SC = 0x0a, BG12NBA = 0x0b, + BG34NBA = 0x0c, BG1HOFS = 0x0d, BG1VOFS = 0x0e, BG2HOFS = 0x0f, + BG2VOFS = 0x10, BG3HOFS = 0x11, BG3VOFS = 0x12, BG4HOFS = 0x13, + BG4VOFS = 0x14, VMAIN = 0x15, VMADDL = 0x16, VMADDH = 0x17, + VMDATAL = 0x18, VMDATAH = 0x19, M7SEL = 0x1a, M7A = 0x1b, + M7B = 0x1c, M7C = 0x1d, M7D = 0x1e, M7X = 0x1f, + M7Y = 0x20, CGADD = 0x21, CGDATA = 0x22, W12SEL = 0x23, + W34SEL = 0x24, WOBJSEL = 0x25, WH0 = 0x26, WH1 = 0x27, + WH2 = 0x28, WH3 = 0x29, WBGLOG = 0x2a, WOBJLOG = 0x2b, + TM = 0x2c, TS = 0x2d, TMW = 0x2e, TSW = 0x2f, + CGWSEL = 0x30, CGADDSUB = 0x31, COLDATA = 0x32, SETINI = 0x33, + MPYL = 0x34, MPYM = 0x35, MPYH = 0x36, SLHV = 0x37, + OAMDATAREAD = 0x38, VMDATALREAD = 0x39, VMDATAHREAD = 0x3a, CGDATAREAD = 0x3b, + OPHCT = 0x3c, OPVCT = 0x3d, STAT77 = 0x3e, STAT78 = 0x3f, + }; + + //io.cpp + auto readIO(uint24 address, uint8 data) -> uint8; + auto writeIO(uint24 address, uint8 data) -> void; + + struct IO { + uint16 m7a; + uint16 m7b; + uint16 m7c; + uint16 m7d; + uint16 m7x; + uint16 m7y; + } io; + + //object.cpp + auto readOAM(uint10 address) -> uint8; + auto writeOAM(uint10 address, uint8 data) -> void; + + struct Object { + uint9 x; + uint8 y; + uint8 character; + uint1 nameselect; + uint1 vflip; + uint1 hflip; + uint2 priority; + uint3 palette; + uint1 size; + } object[128]; + + //bitplane -> bitmap tile caches + uint8 vram2bpp[4096 * 64]; + uint8 vram4bpp[2048 * 64]; + uint8 vram8bpp[1024 * 64]; + + struct Line { + //line.cpp + auto render() -> void; + + uint y = 0; + uint32* outputLo = nullptr; + uint32* outputHi = nullptr; + + uint15 cgram[256]; + IO io; + } lines[240]; +}; + +extern PPU ppu; diff --git a/higan/sfc/ppu-fast/serialization.cpp b/higan/sfc/ppu-fast/serialization.cpp new file mode 100644 index 00000000..6abea464 --- /dev/null +++ b/higan/sfc/ppu-fast/serialization.cpp @@ -0,0 +1,4 @@ +auto PPU::serialize(serializer& s) -> void { + Thread::serialize(s); + PPUcounter::serialize(s); +} diff --git a/higan/sfc/ppu/counter/serialization.cpp b/higan/sfc/ppu/counter/serialization.cpp new file mode 100644 index 00000000..3dae5c70 --- /dev/null +++ b/higan/sfc/ppu/counter/serialization.cpp @@ -0,0 +1,11 @@ +auto PPUcounter::serialize(serializer& s) -> void { + s.integer(status.interlace); + s.integer(status.field); + s.integer(status.vcounter); + s.integer(status.hcounter); + + s.array(history.field); + s.array(history.vcounter); + s.array(history.hcounter); + s.integer(history.index); +} diff --git a/higan/sfc/ppu/object/oam.cpp b/higan/sfc/ppu/object/oam.cpp index c3e537b2..611bbf29 100644 --- a/higan/sfc/ppu/object/oam.cpp +++ b/higan/sfc/ppu/object/oam.cpp @@ -1,10 +1,10 @@ -auto PPU::OAM::read(uint10 addr) -> uint8 { - if(!addr.bit(9)) { - uint n = addr >> 2; //object# - addr &= 3; - if(addr == 0) return object[n].x.bits(0,7); - if(addr == 1) return object[n].y; - if(addr == 2) return object[n].character; +auto PPU::OAM::read(uint10 address) -> uint8 { + if(!address.bit(9)) { + uint n = address >> 2; //object# + address &= 3; + if(address == 0) return object[n].x.bits(0,7); + if(address == 1) return object[n].y; + if(address == 2) return object[n].character; return ( object[n].nameselect << 0 | object[n].palette << 1 @@ -13,7 +13,7 @@ auto PPU::OAM::read(uint10 addr) -> uint8 { | object[n].vflip << 7 ); } else { - uint n = (addr & 0x1f) << 2; //object# + uint n = (address & 0x1f) << 2; //object# return ( object[n + 0].x.bit(8) << 0 | object[n + 0].size << 1 @@ -27,20 +27,20 @@ auto PPU::OAM::read(uint10 addr) -> uint8 { } } -auto PPU::OAM::write(uint10 addr, uint8 data) -> void { - if(!addr.bit(9)) { - uint n = addr >> 2; //object# - addr &= 3; - if(addr == 0) { object[n].x.bits(0,7) = data; return; } - if(addr == 1) { object[n].y = data; return; } - if(addr == 2) { object[n].character = data; return; } +auto PPU::OAM::write(uint10 address, uint8 data) -> void { + if(!address.bit(9)) { + uint n = address >> 2; //object# + address &= 3; + if(address == 0) { object[n].x.bits(0,7) = data; return; } + if(address == 1) { object[n].y = data; return; } + if(address == 2) { object[n].character = data; return; } object[n].nameselect = data.bit (0); object[n].palette = data.bits(1,3); object[n].priority = data.bits(4,5); object[n].hflip = data.bit (6); object[n].vflip = data.bit (7); } else { - uint n = (addr & 0x1f) << 2; //object# + uint n = (address & 0x1f) << 2; //object# object[n + 0].x.bit(8) = data.bit(0); object[n + 0].size = data.bit(1); object[n + 1].x.bit(8) = data.bit(2); diff --git a/higan/sfc/ppu/object/object.hpp b/higan/sfc/ppu/object/object.hpp index f667f744..a8f2944a 100644 --- a/higan/sfc/ppu/object/object.hpp +++ b/higan/sfc/ppu/object/object.hpp @@ -1,6 +1,6 @@ struct OAM { - auto read(uint10 addr) -> uint8; - auto write(uint10 addr, uint8 data) -> void; + auto read(uint10 address) -> uint8; + auto write(uint10 address, uint8 data) -> void; struct Object { alwaysinline auto width() const -> uint; diff --git a/higan/sfc/ppu/ppu.cpp b/higan/sfc/ppu/ppu.cpp index fb66b014..82c3457f 100644 --- a/higan/sfc/ppu/ppu.cpp +++ b/higan/sfc/ppu/ppu.cpp @@ -10,6 +10,7 @@ PPU ppu; #include "window/window.cpp" #include "screen/screen.cpp" #include "serialization.cpp" +#include "counter/serialization.cpp" PPU::PPU() : bg1(Background::ID::BG1), diff --git a/higan/sfc/ppu/serialization.cpp b/higan/sfc/ppu/serialization.cpp index 10b4da0a..e1441f4a 100644 --- a/higan/sfc/ppu/serialization.cpp +++ b/higan/sfc/ppu/serialization.cpp @@ -1,15 +1,3 @@ -auto PPUcounter::serialize(serializer& s) -> void { - s.integer(status.interlace); - s.integer(status.field); - s.integer(status.vcounter); - s.integer(status.hcounter); - - s.array(history.field); - s.array(history.vcounter); - s.array(history.hcounter); - s.integer(history.index); -} - auto PPU::serialize(serializer& s) -> void { Thread::serialize(s); PPUcounter::serialize(s); diff --git a/higan/sfc/sfc.hpp b/higan/sfc/sfc.hpp index 77b3378c..0d2737b7 100644 --- a/higan/sfc/sfc.hpp +++ b/higan/sfc/sfc.hpp @@ -52,7 +52,11 @@ namespace SuperFamicom { #include #include #include +#if defined(PROFILE_ACCURATE) #include +#elif defined(PROFILE_FAST) + #include +#endif #include #include