mirror of https://github.com/bsnes-emu/bsnes.git
Update to v106r28 release.
byuu says: Changelog: - SNES: started on skeleton of the new parallel PPU core To build the new PPU core, set profile=fast via GNU make. The old core is profile=accurate. The names of the profiles, and the name of the folder for the fast PPU are subject to change. The new PPU core doesn't do anything but demonstrate the proof of concept: every scanline, make a copy of all the PPU registers and CGRAM. Share the VRAM and OAM. Batch render all scanlines at once using OpenMP at the end of each frame and blit the result. With no PPU core at all, bsnes runs 91% faster than with the accuracy PPU (230fps vs 120fps.) That's the absolute theoretical best-case scenario. With the skeleton in place, we're already around 220fps. It'll go down more as the PPU line renderer starts to do real work. I don't know where things will end up yet. I suppose we'll find out in time. My own copy of TDM/GCC can't use OpenMP on Windows, so ... it won't parallelize if you build with that. I'm going to have to switch to a different MinGW distribution once this is complete, I suppose.
This commit is contained in:
parent
8f5bc80f01
commit
6c8e3c885d
|
@ -3,9 +3,11 @@ include ../nall/GNUmakefile
|
|||
|
||||
binary := application
|
||||
target := higan
|
||||
profile := accurate
|
||||
objects := libco emulator audio video resource
|
||||
|
||||
flags += -I. -I..
|
||||
flags += $(if $(call streq,$(profile),accurate),-DPROFILE_ACCURATE,-DPROFILE_FAST)
|
||||
|
||||
ifeq ($(platform),windows)
|
||||
ifeq ($(binary),application)
|
||||
|
|
|
@ -12,7 +12,7 @@ using namespace nall;
|
|||
|
||||
namespace Emulator {
|
||||
static const string Name = "higan";
|
||||
static const string Version = "106.27";
|
||||
static const string Version = "106.28";
|
||||
static const string Author = "byuu";
|
||||
static const string License = "GPLv3";
|
||||
static const string Website = "https://byuu.org/";
|
||||
|
|
|
@ -2,7 +2,7 @@ processors += wdc65816 spc700 arm7tdmi gsu hg51b upd96050
|
|||
|
||||
objects += sfc-interface sfc-system sfc-controller
|
||||
objects += sfc-cartridge sfc-memory
|
||||
objects += sfc-cpu sfc-smp sfc-dsp sfc-ppu
|
||||
objects += sfc-cpu sfc-smp sfc-dsp $(if $(call streq,$(profile),accurate),sfc-ppu,sfc-ppu-fast)
|
||||
objects += sfc-expansion sfc-satellaview sfc-21fx
|
||||
objects += sfc-icd sfc-mcc sfc-dip sfc-event
|
||||
objects += sfc-sa1 sfc-superfx
|
||||
|
@ -22,6 +22,7 @@ obj/sfc-cpu.o: sfc/cpu/cpu.cpp
|
|||
obj/sfc-smp.o: sfc/smp/smp.cpp
|
||||
obj/sfc-dsp.o: sfc/dsp/dsp.cpp
|
||||
obj/sfc-ppu.o: sfc/ppu/ppu.cpp
|
||||
obj/sfc-ppu-fast.o: sfc/ppu-fast/ppu.cpp
|
||||
|
||||
obj/sfc-expansion.o: sfc/expansion/expansion.cpp
|
||||
obj/sfc-satellaview.o: sfc/expansion/satellaview/satellaview.cpp
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
auto PPU::readIO(uint24 address, uint8 data) -> uint8 {
|
||||
cpu.synchronize(ppu);
|
||||
|
||||
switch((uint16)address) {
|
||||
|
||||
case 0x2104: case 0x2105: case 0x2106: case 0x2108:
|
||||
case 0x2109: case 0x210a: case 0x2114: case 0x2115:
|
||||
case 0x2116: case 0x2118: case 0x2119: case 0x211a:
|
||||
case 0x2124: case 0x2125: case 0x2126: case 0x2128:
|
||||
case 0x2129: case 0x212a: {
|
||||
return ppu1.mdr;
|
||||
}
|
||||
|
||||
case 0x2134: { //MPYL
|
||||
uint24 result = (int16)io.m7a * (int8)(io.m7b >> 8);
|
||||
return ppu1.mdr = result.byte(0);
|
||||
}
|
||||
|
||||
case 0x2135: { //MPYM
|
||||
uint24 result = (int16)io.m7a * (int8)(io.m7b >> 8);
|
||||
return ppu1.mdr = result.byte(1);
|
||||
}
|
||||
|
||||
case 0x2136: { //MPYH
|
||||
uint24 result = (int16)io.m7a * (int8)(io.m7b >> 8);
|
||||
return ppu1.mdr = result.byte(2);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
auto PPU::writeIO(uint24 address, uint8 data) -> void {
|
||||
cpu.synchronize(ppu);
|
||||
|
||||
switch((uint16)address) {
|
||||
|
||||
case 0x211b: { //M7A
|
||||
io.m7a = data << 8 | latch.mode7;
|
||||
latch.mode7 = data;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x211c: { //M7B
|
||||
io.m7b = data << 8 | latch.mode7;
|
||||
latch.mode7 = data;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x211d: { //M7C
|
||||
io.m7c = data << 8 | latch.mode7;
|
||||
latch.mode7 = data;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x211e: { //M7D
|
||||
io.m7d = data << 8 | latch.mode7;
|
||||
latch.mode7 = data;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x211f: { //M7X
|
||||
io.m7x = data << 8 | latch.mode7;
|
||||
latch.mode7 = data;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x2120: { //M7Y
|
||||
io.m7y = data << 8 | latch.mode7;
|
||||
latch.mode7 = data;
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
auto PPU::Line::render() -> void {
|
||||
for(uint x : range(512)) {
|
||||
outputLo[x] = 0x7ffff;
|
||||
outputHi[x] = 0x7ffff;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
auto PPU::readOAM(uint10 address) -> uint8 {
|
||||
if(!address.bit(9)) {
|
||||
uint n = address >> 2; //object#
|
||||
address &= 3;
|
||||
if(address == 0) return object[n].x.bits(0,7);
|
||||
if(address == 1) return object[n].y;
|
||||
if(address == 2) return object[n].character;
|
||||
return (
|
||||
object[n].nameselect << 0
|
||||
| object[n].palette << 1
|
||||
| object[n].priority << 4
|
||||
| object[n].hflip << 6
|
||||
| object[n].vflip << 7
|
||||
);
|
||||
} else {
|
||||
uint n = (address & 0x1f) << 2; //object#
|
||||
return (
|
||||
object[n + 0].x.bit(8) << 0
|
||||
| object[n + 0].size << 1
|
||||
| object[n + 1].x.bit(8) << 2
|
||||
| object[n + 1].size << 3
|
||||
| object[n + 2].x.bit(8) << 4
|
||||
| object[n + 2].size << 5
|
||||
| object[n + 3].x.bit(8) << 6
|
||||
| object[n + 3].size << 7
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
auto PPU::writeOAM(uint10 address, uint8 data) -> void {
|
||||
if(!address.bit(9)) {
|
||||
uint n = address >> 2; //object#
|
||||
if(address == 0) { object[n].x.bits(0,7) = data; return; }
|
||||
if(address == 1) { object[n].y = data; return; }
|
||||
if(address == 2) { object[n].character = data; return; }
|
||||
object[n].nameselect = data.bit (0);
|
||||
object[n].palette = data.bits(1,3);
|
||||
object[n].priority = data.bits(4,5);
|
||||
object[n].hflip = data.bit (6);
|
||||
object[n].vflip = data.bit (7);
|
||||
} else {
|
||||
uint n = (address & 0x1f) << 2; //object#
|
||||
object[n + 0].x.bit(8) = data.bit(0);
|
||||
object[n + 0].size = data.bit(1);
|
||||
object[n + 1].x.bit(8) = data.bit(2);
|
||||
object[n + 1].size = data.bit(3);
|
||||
object[n + 2].x.bit(8) = data.bit(4);
|
||||
object[n + 2].size = data.bit(5);
|
||||
object[n + 3].x.bit(8) = data.bit(6);
|
||||
object[n + 3].size = data.bit(7);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
#include <sfc/sfc.hpp>
|
||||
|
||||
namespace SuperFamicom {
|
||||
|
||||
PPU ppu;
|
||||
#include "io.cpp"
|
||||
#include "object.cpp"
|
||||
#include "line.cpp"
|
||||
#include "serialization.cpp"
|
||||
#include <sfc/ppu/counter/serialization.cpp>
|
||||
|
||||
PPU::PPU() {
|
||||
output = new uint32[512 * 512];
|
||||
output += 16 * 512; //overscan offset
|
||||
for(uint y : range(240)) {
|
||||
lines[y].y = y;
|
||||
lines[y].outputLo = output + (y * 2 + 0) * 512;
|
||||
lines[y].outputHi = output + (y * 2 + 1) * 512;
|
||||
}
|
||||
}
|
||||
|
||||
PPU::~PPU() {
|
||||
output -= 16 * 512; //overscan offset
|
||||
delete[] output;
|
||||
}
|
||||
|
||||
auto PPU::Enter() -> void {
|
||||
while(true) scheduler.synchronize(), ppu.main();
|
||||
}
|
||||
|
||||
auto PPU::step(uint clocks) -> void {
|
||||
tick(clocks);
|
||||
Thread::step(clocks);
|
||||
synchronize(cpu);
|
||||
}
|
||||
|
||||
auto PPU::main() -> void {
|
||||
scanline();
|
||||
uint y = vcounter();
|
||||
|
||||
step(512);
|
||||
if(y >= 1 && y <= vdisp()) {
|
||||
memory::copy(&lines[y].cgram, &cgram, sizeof(cgram));
|
||||
memory::copy(&lines[y].io, &io, sizeof(io));
|
||||
}
|
||||
|
||||
step(624);
|
||||
|
||||
step(lineclocks() - 512 - 624);
|
||||
}
|
||||
|
||||
auto PPU::scanline() -> void {
|
||||
if(vcounter() == 0) {
|
||||
frame();
|
||||
}
|
||||
|
||||
if(vcounter() == 241) {
|
||||
#pragma omp parallel for
|
||||
for(uint y = 1; y < vdisp(); y++) {
|
||||
lines[y].render();
|
||||
}
|
||||
scheduler.exit(Scheduler::Event::Frame);
|
||||
}
|
||||
}
|
||||
|
||||
auto PPU::frame() -> void {
|
||||
}
|
||||
|
||||
auto PPU::refresh() -> void {
|
||||
auto output = this->output;
|
||||
if(!overscan()) output -= 14 * 512;
|
||||
auto pitch = 512;
|
||||
auto width = 512;
|
||||
auto height = 480;
|
||||
Emulator::video.refresh(output, pitch * sizeof(uint32), width, height);
|
||||
}
|
||||
|
||||
auto PPU::load(Markup::Node node) -> bool {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto PPU::power(bool reset) -> void {
|
||||
create(Enter, system.cpuFrequency());
|
||||
PPUcounter::reset();
|
||||
memory::fill(output, 512 * 480 * sizeof(uint32));
|
||||
|
||||
function<auto (uint24, uint8) -> uint8> reader{&PPU::readIO, this};
|
||||
function<auto (uint24, uint8) -> void> writer{&PPU::writeIO, this};
|
||||
bus.map(reader, writer, "00-3f,80-bf:2100-213f");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
struct PPU : Thread, PPUcounter {
|
||||
alwaysinline auto interlace() const -> bool { return false; }
|
||||
alwaysinline auto overscan() const -> bool { return false; }
|
||||
alwaysinline auto vdisp() const -> uint { return 225; }
|
||||
|
||||
//ppu.cpp
|
||||
PPU();
|
||||
~PPU();
|
||||
|
||||
static auto Enter() -> void;
|
||||
alwaysinline auto step(uint clocks) -> void;
|
||||
auto main() -> void;
|
||||
auto scanline() -> void;
|
||||
auto frame() -> void;
|
||||
auto refresh() -> void;
|
||||
auto load(Markup::Node) -> bool;
|
||||
auto power(bool reset) -> void;
|
||||
|
||||
auto latchCounters() -> void {}
|
||||
|
||||
//serialization.cpp
|
||||
auto serialize(serializer&) -> void;
|
||||
|
||||
public:
|
||||
uint32* output = nullptr;
|
||||
uint16 vram[32 * 1024];
|
||||
uint16 cgram[256];
|
||||
|
||||
struct {
|
||||
uint4 version;
|
||||
uint8 mdr;
|
||||
} ppu1, ppu2;
|
||||
|
||||
struct Latch {
|
||||
uint8 mode7;
|
||||
} latch;
|
||||
|
||||
enum : uint {
|
||||
INIDISP = 0x00, OBSEL = 0x01, OAMADDL = 0x02, OAMADDH = 0x03,
|
||||
OAMDATA = 0x04, BGMODE = 0x05, MOSAIC = 0x06, BG1SC = 0x07,
|
||||
BG2SC = 0x08, BG3SC = 0x09, BG4SC = 0x0a, BG12NBA = 0x0b,
|
||||
BG34NBA = 0x0c, BG1HOFS = 0x0d, BG1VOFS = 0x0e, BG2HOFS = 0x0f,
|
||||
BG2VOFS = 0x10, BG3HOFS = 0x11, BG3VOFS = 0x12, BG4HOFS = 0x13,
|
||||
BG4VOFS = 0x14, VMAIN = 0x15, VMADDL = 0x16, VMADDH = 0x17,
|
||||
VMDATAL = 0x18, VMDATAH = 0x19, M7SEL = 0x1a, M7A = 0x1b,
|
||||
M7B = 0x1c, M7C = 0x1d, M7D = 0x1e, M7X = 0x1f,
|
||||
M7Y = 0x20, CGADD = 0x21, CGDATA = 0x22, W12SEL = 0x23,
|
||||
W34SEL = 0x24, WOBJSEL = 0x25, WH0 = 0x26, WH1 = 0x27,
|
||||
WH2 = 0x28, WH3 = 0x29, WBGLOG = 0x2a, WOBJLOG = 0x2b,
|
||||
TM = 0x2c, TS = 0x2d, TMW = 0x2e, TSW = 0x2f,
|
||||
CGWSEL = 0x30, CGADDSUB = 0x31, COLDATA = 0x32, SETINI = 0x33,
|
||||
MPYL = 0x34, MPYM = 0x35, MPYH = 0x36, SLHV = 0x37,
|
||||
OAMDATAREAD = 0x38, VMDATALREAD = 0x39, VMDATAHREAD = 0x3a, CGDATAREAD = 0x3b,
|
||||
OPHCT = 0x3c, OPVCT = 0x3d, STAT77 = 0x3e, STAT78 = 0x3f,
|
||||
};
|
||||
|
||||
//io.cpp
|
||||
auto readIO(uint24 address, uint8 data) -> uint8;
|
||||
auto writeIO(uint24 address, uint8 data) -> void;
|
||||
|
||||
struct IO {
|
||||
uint16 m7a;
|
||||
uint16 m7b;
|
||||
uint16 m7c;
|
||||
uint16 m7d;
|
||||
uint16 m7x;
|
||||
uint16 m7y;
|
||||
} io;
|
||||
|
||||
//object.cpp
|
||||
auto readOAM(uint10 address) -> uint8;
|
||||
auto writeOAM(uint10 address, uint8 data) -> void;
|
||||
|
||||
struct Object {
|
||||
uint9 x;
|
||||
uint8 y;
|
||||
uint8 character;
|
||||
uint1 nameselect;
|
||||
uint1 vflip;
|
||||
uint1 hflip;
|
||||
uint2 priority;
|
||||
uint3 palette;
|
||||
uint1 size;
|
||||
} object[128];
|
||||
|
||||
//bitplane -> bitmap tile caches
|
||||
uint8 vram2bpp[4096 * 64];
|
||||
uint8 vram4bpp[2048 * 64];
|
||||
uint8 vram8bpp[1024 * 64];
|
||||
|
||||
struct Line {
|
||||
//line.cpp
|
||||
auto render() -> void;
|
||||
|
||||
uint y = 0;
|
||||
uint32* outputLo = nullptr;
|
||||
uint32* outputHi = nullptr;
|
||||
|
||||
uint15 cgram[256];
|
||||
IO io;
|
||||
} lines[240];
|
||||
};
|
||||
|
||||
extern PPU ppu;
|
|
@ -0,0 +1,4 @@
|
|||
auto PPU::serialize(serializer& s) -> void {
|
||||
Thread::serialize(s);
|
||||
PPUcounter::serialize(s);
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
auto PPUcounter::serialize(serializer& s) -> void {
|
||||
s.integer(status.interlace);
|
||||
s.integer(status.field);
|
||||
s.integer(status.vcounter);
|
||||
s.integer(status.hcounter);
|
||||
|
||||
s.array(history.field);
|
||||
s.array(history.vcounter);
|
||||
s.array(history.hcounter);
|
||||
s.integer(history.index);
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
auto PPU::OAM::read(uint10 addr) -> uint8 {
|
||||
if(!addr.bit(9)) {
|
||||
uint n = addr >> 2; //object#
|
||||
addr &= 3;
|
||||
if(addr == 0) return object[n].x.bits(0,7);
|
||||
if(addr == 1) return object[n].y;
|
||||
if(addr == 2) return object[n].character;
|
||||
auto PPU::OAM::read(uint10 address) -> uint8 {
|
||||
if(!address.bit(9)) {
|
||||
uint n = address >> 2; //object#
|
||||
address &= 3;
|
||||
if(address == 0) return object[n].x.bits(0,7);
|
||||
if(address == 1) return object[n].y;
|
||||
if(address == 2) return object[n].character;
|
||||
return (
|
||||
object[n].nameselect << 0
|
||||
| object[n].palette << 1
|
||||
|
@ -13,7 +13,7 @@ auto PPU::OAM::read(uint10 addr) -> uint8 {
|
|||
| object[n].vflip << 7
|
||||
);
|
||||
} else {
|
||||
uint n = (addr & 0x1f) << 2; //object#
|
||||
uint n = (address & 0x1f) << 2; //object#
|
||||
return (
|
||||
object[n + 0].x.bit(8) << 0
|
||||
| object[n + 0].size << 1
|
||||
|
@ -27,20 +27,20 @@ auto PPU::OAM::read(uint10 addr) -> uint8 {
|
|||
}
|
||||
}
|
||||
|
||||
auto PPU::OAM::write(uint10 addr, uint8 data) -> void {
|
||||
if(!addr.bit(9)) {
|
||||
uint n = addr >> 2; //object#
|
||||
addr &= 3;
|
||||
if(addr == 0) { object[n].x.bits(0,7) = data; return; }
|
||||
if(addr == 1) { object[n].y = data; return; }
|
||||
if(addr == 2) { object[n].character = data; return; }
|
||||
auto PPU::OAM::write(uint10 address, uint8 data) -> void {
|
||||
if(!address.bit(9)) {
|
||||
uint n = address >> 2; //object#
|
||||
address &= 3;
|
||||
if(address == 0) { object[n].x.bits(0,7) = data; return; }
|
||||
if(address == 1) { object[n].y = data; return; }
|
||||
if(address == 2) { object[n].character = data; return; }
|
||||
object[n].nameselect = data.bit (0);
|
||||
object[n].palette = data.bits(1,3);
|
||||
object[n].priority = data.bits(4,5);
|
||||
object[n].hflip = data.bit (6);
|
||||
object[n].vflip = data.bit (7);
|
||||
} else {
|
||||
uint n = (addr & 0x1f) << 2; //object#
|
||||
uint n = (address & 0x1f) << 2; //object#
|
||||
object[n + 0].x.bit(8) = data.bit(0);
|
||||
object[n + 0].size = data.bit(1);
|
||||
object[n + 1].x.bit(8) = data.bit(2);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
struct OAM {
|
||||
auto read(uint10 addr) -> uint8;
|
||||
auto write(uint10 addr, uint8 data) -> void;
|
||||
auto read(uint10 address) -> uint8;
|
||||
auto write(uint10 address, uint8 data) -> void;
|
||||
|
||||
struct Object {
|
||||
alwaysinline auto width() const -> uint;
|
||||
|
|
|
@ -10,6 +10,7 @@ PPU ppu;
|
|||
#include "window/window.cpp"
|
||||
#include "screen/screen.cpp"
|
||||
#include "serialization.cpp"
|
||||
#include "counter/serialization.cpp"
|
||||
|
||||
PPU::PPU() :
|
||||
bg1(Background::ID::BG1),
|
||||
|
|
|
@ -1,15 +1,3 @@
|
|||
auto PPUcounter::serialize(serializer& s) -> void {
|
||||
s.integer(status.interlace);
|
||||
s.integer(status.field);
|
||||
s.integer(status.vcounter);
|
||||
s.integer(status.hcounter);
|
||||
|
||||
s.array(history.field);
|
||||
s.array(history.vcounter);
|
||||
s.array(history.hcounter);
|
||||
s.integer(history.index);
|
||||
}
|
||||
|
||||
auto PPU::serialize(serializer& s) -> void {
|
||||
Thread::serialize(s);
|
||||
PPUcounter::serialize(s);
|
||||
|
|
|
@ -52,7 +52,11 @@ namespace SuperFamicom {
|
|||
#include <sfc/cpu/cpu.hpp>
|
||||
#include <sfc/smp/smp.hpp>
|
||||
#include <sfc/dsp/dsp.hpp>
|
||||
#if defined(PROFILE_ACCURATE)
|
||||
#include <sfc/ppu/ppu.hpp>
|
||||
#elif defined(PROFILE_FAST)
|
||||
#include <sfc/ppu-fast/ppu.hpp>
|
||||
#endif
|
||||
|
||||
#include <sfc/controller/controller.hpp>
|
||||
#include <sfc/expansion/expansion.hpp>
|
||||
|
|
Loading…
Reference in New Issue