Update to v068r04 release.

(there was no r03 release posted to the WIP thread)

byuu says:

This should provide hardware-accurate mosaic support in the accurate
renderer, with the exception that I'm still not sure what mid-frame
vertical mosaic or mid-scanline horizontal mosaic writes do. Either the
code I have is correct, or it bypasses the mosaic adjust and gives the
exact H/V positions.

I've also renamed the fast folder to alternative (thinking about naming
it simply alt instead), and started on a brand new PPU renderer. So far
it's just a barebones setup with some MMIO support and VRAM/OAM/CGRAM
writing. I'm not even confident that I can get this to be faster than
the current scanline renderer, but it's the only avenue that we have
left for any kind of significant bsnes speedup, so I have to try. I'm
going to finish up the MMIO stuff first, that way we have a clean slate
with no actual rendering. And then from here we can try various
different approaches.
This commit is contained in:
Tim Allen 2010-08-30 21:43:07 +10:00
parent 39b1acb177
commit c434e8a0d5
51 changed files with 573 additions and 21 deletions

View File

@ -87,6 +87,6 @@ clean: ui_clean
-@$(call delete,*.manifest)
archive-all:
tar -cjf bsnes-`date +%Y%m%d`.tar.bz2 launcher libco nall obj out qt ruby snes Makefile sync.sh cc.bat clean.bat
tar -cjf bsnes.tar.bz2 launcher libco nall obj out qt ruby snes Makefile sync.sh cc.bat clean.bat
help:;

View File

@ -20,14 +20,14 @@ else ifeq ($(profile),compatibility)
flags += -DPROFILE_COMPATIBILITY
snescpu := $(snes)/cpu
snessmp := $(snes)/smp
snesdsp := $(snes)/fast/dsp
snesppu := $(snes)/fast/ppu
snesdsp := $(snes)/alternative/dsp
snesppu := $(snes)/alternative/ppu
else ifeq ($(profile),performance)
flags += -DPROFILE_PERFORMANCE
snescpu := $(snes)/fast/cpu
snescpu := $(snes)/alternative/cpu
snessmp := $(snes)/smp
snesdsp := $(snes)/fast/dsp
snesppu := $(snes)/fast/ppu
snesdsp := $(snes)/alternative/dsp
snesppu := $(snes)/alternative/ppu-fast
endif
obj/libco.o : libco/libco.c libco/*

View File

@ -0,0 +1,3 @@
#ifdef PPU_CPP
#endif

View File

@ -0,0 +1,7 @@
#ifdef PPU_CPP
bool PPUDebugger::property(unsigned id, string &name, string &value) {
return false;
}
#endif

View File

@ -0,0 +1,10 @@
class PPUDebugger : public PPU, public ChipDebugger {
public:
bool property(unsigned id, string &name, string &value);
bool bg1_enabled[2];
bool bg2_enabled[2];
bool bg3_enabled[2];
bool bg4_enabled[2];
bool oam_enabled[4];
};

View File

@ -0,0 +1,48 @@
#ifdef PPU_CPP
void PPU::latch_counters() {
}
uint16 PPU::get_vram_addr() {
uint16 addr = regs.vram_addr;
switch(regs.vram_mapping) {
case 0: break;
case 1: addr = (addr & 0xff00) | ((addr & 0x001f) << 3) | ((addr >> 5) & 7); break;
case 2: addr = (addr & 0xfe00) | ((addr & 0x003f) << 3) | ((addr >> 6) & 7); break;
case 3: addr = (addr & 0xfc00) | ((addr & 0x007f) << 3) | ((addr >> 7) & 7); break;
}
return (addr << 1);
}
uint8 PPU::vram_read(unsigned addr) {
if(regs.display_disabled == true) return memory::vram[addr];
if(cpu.vcounter() >= regs.height) return memory::vram[addr];
return 0x00;
}
void PPU::vram_write(unsigned addr, uint8 data) {
if(regs.display_disabled == true) { memory::vram[addr] = data; return; }
if(cpu.vcounter() >= regs.height) { memory::vram[addr] = data; return; }
}
uint8 PPU::oam_read(unsigned addr) {
if(regs.display_disabled == true) return memory::oam[addr];
if(cpu.vcounter() >= regs.height) return memory::oam[addr];
return memory::oam[0x0218];
}
void PPU::oam_write(unsigned addr, uint8 data) {
if(regs.display_disabled == true) { memory::oam[addr] = data; return; }
if(cpu.vcounter() >= regs.height) { memory::oam[addr] = data; return; }
memory::oam[0x0218] = data;
}
uint8 PPU::cgram_read(unsigned addr) {
return memory::cgram[addr];
}
void PPU::cgram_write(unsigned addr, uint8 data) {
memory::cgram[addr] = data;
}
#endif

View File

@ -0,0 +1,227 @@
#ifdef PPU_CPP
uint8 PPU::mmio_read(unsigned addr) {
switch(addr & 0xffff) {
}
return 0x00;
}
void PPU::mmio_write(unsigned addr, uint8 data) {
switch(addr & 0xffff) {
case 0x2100: {
regs.display_disabled = data & 0x80;
regs.display_brightness = data & 0x0f;
return;
}
case 0x2101: {
regs.oam_basesize = (data >> 5) & 7;
regs.oam_nameselect = (data >> 3) & 3;
regs.oam_tdaddr = (data & 3) << 14;
return;
}
case 0x2102: {
regs.oam_baseaddr = (regs.oam_baseaddr & 0xff00) | (data << 0);
regs.oam_baseaddr &= 0x01ff;
regs.oam_addr = regs.oam_baseaddr << 1;
regs.oam_firstsprite = (regs.oam_priority == false ? 0 : (regs.oam_addr >> 2) & 127);
return;
}
case 0x2103: {
regs.oam_priority = data & 0x80;
regs.oam_baseaddr = (data << 8) | (regs.oam_baseaddr & 0x00ff);
regs.oam_baseaddr &= 0x01ff;
regs.oam_addr = regs.oam_baseaddr << 1;
regs.oam_firstsprite = (regs.oam_priority == false ? 0 : (regs.oam_addr >> 2) & 127);
return;
}
case 0x2104: {
if(regs.oam_addr & 0x0200) {
oam_write(regs.oam_addr, data);
} else if((regs.oam_addr & 1) == 0) {
regs.oam_latchdata = data;
} else {
oam_write((regs.oam_addr & ~1) + 0, regs.oam_latchdata);
oam_write((regs.oam_addr & ~1) + 1, data);
}
regs.oam_addr = (regs.oam_addr + 1) & 0x03ff;
regs.oam_firstsprite = (regs.oam_priority == false ? 0 : (regs.oam_addr >> 2) & 127);
return;
}
case 0x2105: {
regs.bg_tilesize[BG4] = data & 0x80;
regs.bg_tilesize[BG3] = data & 0x40;
regs.bg_tilesize[BG2] = data & 0x20;
regs.bg_tilesize[BG1] = data & 0x10;
regs.bg3_priority = data & 0x08;
regs.bg_mode = data & 0x07;
return;
}
case 0x2106: {
regs.mosaic_size = (data >> 4) & 15;
regs.mosaic_enabled[BG4] = data & 0x08;
regs.mosaic_enabled[BG3] = data & 0x04;
regs.mosaic_enabled[BG2] = data & 0x02;
regs.mosaic_enabled[BG1] = data & 0x01;
return;
}
case 0x2107: {
regs.bg_scaddr[BG1] = (data & 0x7c) << 9;
regs.bg_scsize[BG1] = (data & 0x03);
return;
}
case 0x2108: {
regs.bg_scaddr[BG2] = (data & 0x7c) << 9;
regs.bg_scsize[BG2] = (data & 0x03);
return;
}
case 0x2109: {
regs.bg_scaddr[BG3] = (data & 0x7c) << 9;
regs.bg_scsize[BG3] = (data & 0x03);
return;
}
case 0x210a: {
regs.bg_scaddr[BG4] = (data & 0x7c) << 9;
regs.bg_scsize[BG4] = (data & 0x03);
return;
}
case 0x210b: {
regs.bg_tdaddr[BG1] = (data & 0x07) << 13;
regs.bg_tdaddr[BG2] = (data & 0x70) << 9;
return;
}
case 0x210c: {
regs.bg_tdaddr[BG3] = (data & 0x07) << 13;
regs.bg_tdaddr[BG4] = (data & 0x70) << 9;
return;
}
case 0x2115: {
regs.vram_incmode = data & 0x80;
regs.vram_mapping = (data >> 2) & 3;
switch(data & 3) {
case 0: regs.vram_incsize = 1; break;
case 1: regs.vram_incsize = 32; break;
case 2: regs.vram_incsize = 128; break;
case 3: regs.vram_incsize = 128; break;
}
return;
}
case 0x2116: {
regs.vram_addr = (regs.vram_addr & 0xff00) | (data << 0);
uint16 addr = get_vram_addr();
regs.vram_readbuffer = vram_read(addr + 0) << 0;
regs.vram_readbuffer |= vram_read(addr + 1) << 8;
return;
}
case 0x2117: {
regs.vram_addr = (data << 8) | (regs.vram_addr & 0x00ff);
uint16 addr = get_vram_addr();
regs.vram_readbuffer = vram_read(addr + 0) << 0;
regs.vram_readbuffer |= vram_read(addr + 1) << 8;
return;
}
case 0x2118: {
uint16 addr = get_vram_addr() + 0;
vram_write(addr, data);
if(regs.vram_incmode == 0) regs.vram_addr += regs.vram_incsize;
return;
}
case 0x2119: {
uint16 addr = get_vram_addr() + 1;
vram_write(addr, data);
if(regs.vram_incmode == 1) regs.vram_addr += regs.vram_incsize;
return;
}
case 0x2121: {
regs.cgram_addr = data << 1;
return;
}
case 0x2122: {
if((regs.cgram_addr & 1) == 0) {
regs.cgram_latchdata = data;
} else {
cgram_write((regs.cgram_addr & ~1) + 0, regs.cgram_latchdata);
cgram_write((regs.cgram_addr & ~1) + 1, data & 0x7f);
}
regs.cgram_addr = (regs.cgram_addr + 1) & 0x01ff;
return;
}
}
}
void PPU::mmio_reset() {
//internal
regs.width = 256;
regs.height = 225;
//$2100
regs.display_disabled = true;
regs.display_brightness = 0;
//$2101
regs.oam_basesize = 0;
regs.oam_nameselect = 0;
regs.oam_tdaddr = 0;
//$2102-$2103
regs.oam_baseaddr = 0;
regs.oam_addr = 0;
regs.oam_priority = 0;
regs.oam_firstsprite = 0;
//$2104
regs.oam_latchdata = 0;
//$2105
for(unsigned i = 0; i < 4; i++) regs.bg_tilesize[i] = 0;
regs.bg3_priority = 0;
regs.bg_mode = 0;
//$2106
regs.mosaic_size = 0;
for(unsigned i = 0; i < 4; i++) regs.mosaic_enabled[i] = 0;
//$2107-$210a
for(unsigned i = 0; i < 4; i++) regs.bg_scaddr[i] = 0;
for(unsigned i = 0; i < 4; i++) regs.bg_scsize[i] = 0;
//$210b-$210c
for(unsigned i = 0; i < 4; i++) regs.bg_tdaddr[i] = 0;
//$2115
regs.vram_incmode = 0;
regs.vram_mapping = 0;
regs.vram_incsize = 0;
//$2116-$2117
regs.vram_addr = 0;
//$2121
regs.cgram_addr = 0;
//$2122
regs.cgram_latchdata = 0;
//$2139-$213a
regs.vram_readbuffer = 0;
}
#endif

View File

@ -0,0 +1,124 @@
#include <snes.hpp>
#define PPU_CPP
namespace SNES {
#if defined(DEBUGGER)
#include "debugger/debugger.cpp"
PPUDebugger ppu;
#else
PPU ppu;
#endif
#include "background.cpp"
#include "memory.cpp"
#include "mmio.cpp"
#include "screen.cpp"
void PPU::step(unsigned clocks) {
clock += clocks;
}
void PPU::synchronize_cpu() {
if(CPU::Threaded == true) {
if(clock >= 0 && scheduler.sync != Scheduler::SynchronizeMode::All) co_switch(cpu.thread);
} else {
while(clock >= 0) cpu.enter();
}
}
void PPU::Enter() { ppu.enter(); }
void PPU::enter() {
while(true) {
if(scheduler.sync == Scheduler::SynchronizeMode::All) {
scheduler.exit(Scheduler::ExitReason::SynchronizeEvent);
}
scanline();
add_clocks(512);
if(vcounter() < regs.height) render_scanline();
add_clocks(lineclocks() - 512);
}
}
void PPU::add_clocks(unsigned clocks) {
tick(clocks);
step(clocks);
synchronize_cpu();
}
void PPU::scanline() {
if(vcounter() == 0) frame();
regs.width = !hires() ? 256 : 512;
regs.height = !overscan() ? 225 : 240;
}
void PPU::frame() {
system.frame();
}
void PPU::render_scanline() {
screen_render();
}
void PPU::power() {
for(unsigned i = 0; i < memory::vram.size(); i++) memory::vram[i] = 0x00;
for(unsigned i = 0; i < memory::oam.size(); i++) memory::oam[i] = 0x00;
for(unsigned i = 0; i < memory::cgram.size(); i++) memory::cgram[i] = 0x00;
reset();
}
void PPU::reset() {
create(Enter, system.cpu_frequency());
PPUcounter::reset();
memset(surface, 0, 512 * 512 * sizeof(uint16));
mmio_reset();
}
PPU::PPU() {
surface = new uint16[512 * 512];
output = surface + 16 * 512;
light_table = new uint16*[16];
for(unsigned l = 0; l < 16; l++) {
light_table[l] = new uint16[32768];
for(unsigned r = 0; r < 32; r++) {
for(unsigned g = 0; g < 32; g++) {
for(unsigned b = 0; b < 32; b++) {
double luma = (double)l / 15.0;
unsigned ar = (luma * r + 0.5);
unsigned ag = (luma * g + 0.5);
unsigned ab = (luma * b + 0.5);
light_table[l][(r << 10) + (g << 5) + (b << 0)] = (ab << 10) + (ag << 5) + (ar << 0);
}
}
}
}
}
PPU::~PPU() {
delete[] surface;
}
bool PPU::interlace() const { return false; }
bool PPU::overscan() const { return false; }
bool PPU::hires() const { return false; }
void PPUcounter::serialize(serializer &s) {
s.integer(status.interlace);
s.integer(status.field);
s.integer(status.vcounter);
s.integer(status.hcounter);
s.array(history.field);
s.array(history.vcounter);
s.array(history.hcounter);
s.integer(history.index);
}
void PPU::serialize(serializer &s) {
}
}

View File

@ -0,0 +1,118 @@
class PPU : public Processor, public PPUcounter, public MMIO {
public:
enum : bool { Threaded = true };
alwaysinline void step(unsigned clocks);
alwaysinline void synchronize_cpu();
void latch_counters();
bool interlace() const;
bool overscan() const;
bool hires() const;
uint8 mmio_read(unsigned addr);
void mmio_write(unsigned addr, uint8 data);
void enter();
void power();
void reset();
void serialize(serializer&);
PPU();
~PPU();
private:
enum : unsigned { BG1 = 0, BG2 = 1, BG3 = 2, BG4 = 3, OAM = 4, COL = 5, BACK = 5 };
uint16 *surface;
uint16 *output;
//background.cpp
//memory.cpp
uint16 get_vram_addr();
uint8 vram_read(unsigned addr);
void vram_write(unsigned addr, uint8 data);
uint8 oam_read(unsigned addr);
void oam_write(unsigned addr, uint8 data);
uint8 cgram_read(unsigned addr);
void cgram_write(unsigned addr, uint8 data);
//mmio.cpp
struct Regs {
//internal
unsigned width;
unsigned height;
//$2100
bool display_disabled;
unsigned display_brightness;
//$2101
unsigned oam_basesize;
unsigned oam_nameselect;
uint16 oam_tdaddr;
//$2102-$2103
uint16 oam_baseaddr;
uint16 oam_addr;
bool oam_priority;
unsigned oam_firstsprite;
//$2104
uint8 oam_latchdata;
//$2105
bool bg_tilesize[4];
bool bg3_priority;
unsigned bg_mode;
//$2106
unsigned mosaic_size;
bool mosaic_enabled[4];
//$2107-$210a
uint16 bg_scaddr[4];
unsigned bg_scsize[4];
//$210b-$210c
uint16 bg_tdaddr[4];
//$2115
bool vram_incmode;
unsigned vram_mapping;
unsigned vram_incsize;
//$2116-$2117
uint16 vram_addr;
//$2121
uint16 cgram_addr;
//$2122
uint8 cgram_latchdata;
//$2139-$213a
uint16 vram_readbuffer;
} regs;
void mmio_reset();
//ppu.cpp
static void Enter();
void add_clocks(unsigned clocks);
void scanline();
void frame();
void render_scanline();
//screen.cpp
uint16 **light_table;
void screen_render();
friend class Video;
};
#if defined(DEBUGGER)
#include "debugger/debugger.hpp"
extern PPUDebugger ppu;
#else
extern PPU ppu;
#endif

View File

@ -0,0 +1,17 @@
#ifdef PPU_CPP
void PPU::screen_render() {
uint16 *data = output + vcounter() * 1024;
if(regs.display_disabled) {
memset(data, 0x00, regs.width << 1);
return;
}
uint16 *table = light_table[regs.display_brightness];
uint16 color = memory::cgram[0];
color |= memory::cgram[1] << 8;
color = table[color];
for(unsigned i = 0; i < 256; i++) data[i] = color;
}
#endif

View File

@ -7,17 +7,15 @@ void PPU::Background::frame() {
void PPU::Background::scanline() {
if(self.vcounter() == 1) {
t.mosaic_y = regs.mosaic ? 1 : 0; //TODO: this is most likely incorrect
t.mosaic_vcounter = 0;
}
if(t.mosaic_vcounter++ == regs.mosaic) {
t.mosaic_vcounter = 0;
t.mosaic_vcounter = regs.mosaic + 1;
t.mosaic_y = 1;
} else if(--t.mosaic_vcounter == 0) {
t.mosaic_vcounter = regs.mosaic + 1;
t.mosaic_y += regs.mosaic + 1;
}
t.mosaic_x = 0;
t.mosaic_hcounter = 0;
t.mosaic_hcounter = regs.mosaic + 1;
}
void PPU::Background::run() {
@ -36,8 +34,8 @@ void PPU::Background::run() {
unsigned x = t.mosaic_x;
unsigned y = t.mosaic_y;
if(t.mosaic_hcounter++ == regs.mosaic) {
t.mosaic_hcounter = 0;
if(--t.mosaic_hcounter == 0) {
t.mosaic_hcounter = regs.mosaic + 1;
t.mosaic_x += regs.mosaic + 1;
}
if(regs.mode == Mode::Mode7) return run_mode7();

View File

@ -4,5 +4,5 @@ namespace Info {
#include <cpu/cpu.hpp>
#include <smp/smp.hpp>
#include <fast/dsp/dsp.hpp>
#include <fast/ppu/ppu.hpp>
#include <alternative/dsp/dsp.hpp>
#include <alternative/ppu/ppu.hpp>

View File

@ -2,7 +2,7 @@ namespace Info {
static const char Profile[] = "Performance";
}
#include <fast/cpu/cpu.hpp>
#include <alternative/cpu/cpu.hpp>
#include <smp/smp.hpp>
#include <fast/dsp/dsp.hpp>
#include <fast/ppu/ppu.hpp>
#include <alternative/dsp/dsp.hpp>
#include <alternative/ppu-fast/ppu.hpp>

View File

@ -1,7 +1,7 @@
namespace SNES {
namespace Info {
static const char Name[] = "bsnes";
static const char Version[] = "068.02";
static const char Version[] = "068.04";
static const unsigned SerializerVersion = 13;
}
}