Update to v068r10 release.

(there was no r09 release posted to the WIP thread)

byuu says:

It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.

Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.

There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.

Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.

254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.

SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.

So, barring a miracle, this is about the best it's going to get.
This commit is contained in:
Tim Allen 2010-09-03 21:37:36 +10:00
parent f1009ec634
commit 7df9157abd
13 changed files with 342 additions and 71 deletions

View File

@ -1,71 +1,101 @@
#ifdef PPU_CPP
void PPU::Background::render() {
if(regs.mode == Mode::Inactive) return;
if(regs.mode == Mode::Mode7) return;
if(regs.main_enable == false && regs.sub_enable == false) return;
#include "mode7.cpp"
const unsigned opt_valid_bit = (id == ID::BG1 ? 0x2000 : id == ID::BG2 ? 0x4000 : 0x0000);
const unsigned bgpal_index = (self.regs.bgmode == 0 ? id << 5 : 0);
unsigned PPU::Background::get_tile(unsigned hoffset, unsigned voffset) {
unsigned tile_x = hoffset >> tile_width;
unsigned tile_y = voffset >> tile_height;
const unsigned pal_size = 2 << regs.mode;
const unsigned tile_mask = 0x0fff >> regs.mode;
const unsigned tiledata_index = regs.tiledata_addr >> (4 + regs.mode);
unsigned tile_pos = ((tile_y & 0x1f) << 5) + (tile_x & 0x1f);
if(tile_y & 0x20) tile_pos += scy;
if(tile_x & 0x20) tile_pos += scx;
const bool hires = (self.regs.bgmode == 5 || self.regs.bgmode == 6);
const signed width = !hires ? 256 : 512;
const unsigned tiledata_addr = regs.screen_addr + (tile_pos << 1);
return (memory::vram[tiledata_addr + 0] << 0) + (memory::vram[tiledata_addr + 1] << 8);
}
const unsigned tile_height = regs.tile_size ? 4 : 3;
const unsigned tile_width = hires ? 4 : tile_height;
void PPU::Background::offset_per_tile(unsigned x, unsigned &hoffset, unsigned &voffset) {
unsigned opt_x = (x + (hscroll & 7)), hval, vval;
if(opt_x >= 8) {
hval = self.bg3.get_tile((opt_x - 8) + (self.bg3.regs.hoffset & ~7), self.bg3.regs.voffset + 0);
if(self.regs.bgmode != 4)
vval = self.bg3.get_tile((opt_x - 8) + (self.bg3.regs.hoffset & ~7), self.bg3.regs.voffset + 8);
unsigned mask_x = (tile_height == 4 ? width << 1 : width);
unsigned mask_y = mask_x;
if(self.regs.bgmode == 4) {
if(hval & opt_valid_bit) {
hoffset = opt_x + (hval & ~7);
} else {
voffset = y + hval;
}
} else {
if(hval & opt_valid_bit) {
hoffset = opt_x + (hval & ~7);
}
if(vval & opt_valid_bit) {
voffset = y + vval;
}
}
}
}
void PPU::Background::scanline() {
y = mosaic_table[regs.mosaic][self.vcounter()] + (regs.mosaic > 0);
hires = (self.regs.bgmode == 5 || self.regs.bgmode == 6);
width = !hires ? 256 : 512;
tile_height = regs.tile_size ? 4 : 3;
tile_width = hires ? 4 : tile_height;
mask_x = (tile_height == 4 ? width << 1 : width);
mask_y = mask_x;
if(regs.screen_size & 1) mask_x <<= 1;
if(regs.screen_size & 2) mask_y <<= 1;
mask_x--;
mask_y--;
unsigned scy = (regs.screen_size & 2 ? 32 << 5 : 0);
unsigned scx = (regs.screen_size & 1 ? 32 << 5 : 0);
scx = (regs.screen_size & 1 ? 32 << 5 : 0);
scy = (regs.screen_size & 2 ? 32 << 5 : 0);
if(regs.screen_size == 3) scy <<= 1;
}
unsigned y = self.vcounter();
unsigned hscroll = regs.hoffset;
unsigned vscroll = regs.voffset;
void PPU::Background::render() {
if(regs.mode == Mode::Inactive) return;
if(regs.main_enable == false && regs.sub_enable == false) return;
if(regs.main_enable) window.render(0);
if(regs.sub_enable) window.render(1);
if(regs.mode == Mode::Mode7) return render_mode7();
const unsigned bgpal_index = (self.regs.bgmode == 0 ? id << 5 : 0);
const unsigned pal_size = 2 << regs.mode;
const unsigned tile_mask = 0x0fff >> regs.mode;
const unsigned tiledata_index = regs.tiledata_addr >> (4 + regs.mode);
hscroll = regs.hoffset;
vscroll = regs.voffset;
if(hires) {
hscroll <<= 1;
if(self.regs.interlace) y = (y << 1) + self.field();
}
unsigned hval, vval;
unsigned tile_pri, tile_num;
unsigned pal_index, pal_num;
unsigned hoffset, voffset, opt_x, col;
unsigned hoffset, voffset, col;
bool mirror_x, mirror_y;
const bool is_opt_mode = (self.regs.bgmode == 2 || self.regs.bgmode == 4 || self.regs.bgmode == 6);
const bool is_direct_color_mode = (self.screen.regs.direct_color == true && id == ID::BG1 && (self.regs.bgmode == 3 || self.regs.bgmode == 4));
window.render(0);
window.render(1);
signed x = 0 - (hscroll & 7);
while(x < width) {
hoffset = x + hscroll;
voffset = y + vscroll;
if(is_opt_mode) offset_per_tile(x, hoffset, voffset);
hoffset &= mask_x;
voffset &= mask_y;
unsigned tile_x = hoffset >> tile_width;
unsigned tile_y = voffset >> tile_height;
unsigned tile_pos = ((tile_y & 0x1f) << 5) + (tile_x & 0x1f);
if(tile_y & 0x20) tile_pos += scy;
if(tile_x & 0x20) tile_pos += scx;
const unsigned tiledata_addr = regs.screen_addr + (tile_pos << 1);
tile_num = (memory::vram[tiledata_addr + 0] << 0) + (memory::vram[tiledata_addr + 1] << 8);
tile_num = get_tile(hoffset, voffset);
mirror_y = tile_num & 0x8000;
mirror_x = tile_num & 0x4000;
tile_pri = tile_num & 0x2000 ? regs.priority1 : regs.priority0;
@ -92,11 +122,26 @@ void PPU::Background::render() {
tiledata += ((voffset & 7) * 8);
for(unsigned n = 0; n < 8; n++) {
unsigned col = *tiledata++;
if(col && !(plot_x & 256)) {
unsigned color = self.screen.get_palette(pal_index + col);
if(regs.main_enable && !window.main[plot_x]) self.screen.output.plot_main(plot_x, color, tile_pri, id);
if(regs.sub_enable && !window.sub[plot_x]) self.screen.output.plot_sub(plot_x, color, tile_pri, id);
unsigned col = tiledata[n & regs.mosaic_mask];
if(col && !(plot_x & width)) {
unsigned color;
if(is_direct_color_mode) {
color = self.screen.get_direct_color(pal_num, col);
} else {
color = self.screen.get_palette(pal_index + col);
}
if(hires == false) {
if(regs.main_enable && !window.main[plot_x]) self.screen.output.plot_main(plot_x, color, tile_pri, id);
if(regs.sub_enable && !window.sub[plot_x]) self.screen.output.plot_sub(plot_x, color, tile_pri, id);
} else {
signed half_x = plot_x >> 1;
if(plot_x & 1) {
if(regs.main_enable && !window.main[half_x]) self.screen.output.plot_main(half_x, color, tile_pri, id);
} else {
if(regs.sub_enable && !window.sub[half_x]) self.screen.output.plot_sub(half_x, color, tile_pri, id);
}
}
}
plot_x += step;
}
@ -106,6 +151,20 @@ void PPU::Background::render() {
}
PPU::Background::Background(PPU &self, unsigned id) : self(self), id(id) {
opt_valid_bit = (id == ID::BG1 ? 0x2000 : id == ID::BG2 ? 0x4000 : 0x0000);
mosaic_table = new uint16*[16];
for(unsigned m = 0; m < 16; m++) {
mosaic_table[m] = new uint16[4096];
for(unsigned x = 0; x < 4096; x++) {
mosaic_table[m][x] = (x / (m + 1)) * (m + 1);
}
}
}
PPU::Background::~Background() {
for(unsigned m = 0; m < 16; m++) delete[] mosaic_table[m];
delete[] mosaic_table;
}
#endif

View File

@ -11,6 +11,7 @@ class Background {
bool tile_size;
unsigned mosaic;
unsigned mosaic_mask;
unsigned screen_addr;
unsigned screen_size;
@ -23,12 +24,37 @@ class Background {
bool sub_enable;
} regs;
LayerWindow window;
void render();
uint16 **mosaic_table;
const unsigned id;
unsigned opt_valid_bit;
unsigned y;
bool hires;
signed width;
unsigned tile_width;
unsigned tile_height;
unsigned mask_x;
unsigned mask_y;
unsigned scx;
unsigned scy;
unsigned hscroll;
unsigned vscroll;
LayerWindow window;
alwaysinline unsigned get_tile(unsigned hoffset, unsigned voffset);
void offset_per_tile(unsigned x, unsigned &hoffset, unsigned &voffset);
void scanline();
void render();
void render_mode7();
Background(PPU &self, unsigned id);
~Background();
PPU &self;
friend class PPU;

View File

@ -0,0 +1,101 @@
#ifdef PPU_CPP
#define Clip(x) (((x) & 0x2000) ? ((x) | ~0x03ff) : ((x) & 0x03ff))
void PPU::Background::render_mode7() {
signed px, py;
signed tx, ty, tile, palette;
signed a = sclip<16>(self.regs.m7a);
signed b = sclip<16>(self.regs.m7b);
signed c = sclip<16>(self.regs.m7c);
signed d = sclip<16>(self.regs.m7d);
signed cx = sclip<13>(self.regs.m7x);
signed cy = sclip<13>(self.regs.m7y);
signed hofs = sclip<13>(self.regs.mode7_hoffset);
signed vofs = sclip<13>(self.regs.mode7_voffset);
signed y = (self.regs.mode7_vflip == false ? self.vcounter() : 255 - self.vcounter());
uint16 *mosaic_x, *mosaic_y;
if(id == ID::BG1) {
mosaic_x = mosaic_table[self.bg1.regs.mosaic];
mosaic_y = mosaic_table[self.bg1.regs.mosaic];
} else {
mosaic_x = mosaic_table[self.bg2.regs.mosaic];
mosaic_y = mosaic_table[self.bg1.regs.mosaic];
}
signed psx = ((a * Clip(hofs - cx)) & ~63) + ((b * Clip(vofs - cy)) & ~63) + ((b * mosaic_y[y]) & ~63) + (cx << 8);
signed psy = ((c * Clip(hofs - cx)) & ~63) + ((d * Clip(vofs - cy)) & ~63) + ((d * mosaic_y[y]) & ~63) + (cy << 8);
for(signed x = 0; x < 256; x++) {
px = (psx + (a * mosaic_x[x])) >> 8;
py = (psy + (c * mosaic_x[x])) >> 8;
switch(self.regs.mode7_repeat) {
case 0: case 1: {
px &= 1023;
py &= 1023;
tx = ((px >> 3) & 127);
ty = ((py >> 3) & 127);
tile = memory::vram[(ty * 128 + tx) << 1];
palette = memory::vram[(((tile << 6) + ((py & 7) << 3) + (px & 7)) << 1) + 1];
break;
}
case 2: {
if(px < 0 || px > 1023 || py < 0 || py > 1023) {
palette = 0;
} else {
px &= 1023;
py &= 1023;
tx = ((px >> 3) & 127);
ty = ((py >> 3) & 127);
tile = memory::vram[(ty * 128 + tx) << 1];
palette = memory::vram[(((tile << 6) + ((py & 7) << 3) + (px & 7)) << 1) + 1];
}
break;
}
case 3: {
if(px < 0 || px > 1023 || py < 0 || py > 1023) {
tile = 0;
} else {
px &= 1023;
py &= 1023;
tx = ((px >> 3) & 127);
ty = ((py >> 3) & 127);
tile = memory::vram[(ty * 128 + tx) << 1];
}
palette = memory::vram[(((tile << 6) + ((py & 7) << 3) + (px & 7)) << 1) + 1];
break;
}
}
unsigned priority;
if(id == ID::BG1) {
priority = regs.priority0;
} else {
priority = (palette & 0x80 ? regs.priority1 : regs.priority0);
palette &= 0x7f;
}
if(palette == 0) continue;
unsigned plot_x = (self.regs.mode7_hflip == false ? x : 255 - x);
unsigned color;
if(self.screen.regs.direct_color && id == ID::BG1) {
color = self.screen.get_direct_color(0, palette);
} else {
color = self.screen.get_palette(palette);
}
if(regs.main_enable && !window.main[plot_x]) self.screen.output.plot_main(plot_x, color, priority, id);
if(regs.sub_enable && !window.sub[plot_x]) self.screen.output.plot_sub(plot_x, color, priority, id);
}
}
#undef Clip
#endif

View File

@ -0,0 +1,7 @@
#ifdef PPU_CPP
bool PPUDebugger::property(unsigned id, string &name, string &value) {
return false;
}
#endif

View File

@ -0,0 +1,10 @@
class PPUDebugger : public PPU, public ChipDebugger {
public:
bool property(unsigned id, string &name, string &value);
bool bg1_enabled[2];
bool bg2_enabled[2];
bool bg3_enabled[2];
bool bg4_enabled[2];
bool oam_enabled[4];
};

View File

@ -8,7 +8,7 @@ void PPU::latch_counters() {
bool PPU::interlace() const { return regs.interlace; }
bool PPU::overscan() const { return regs.overscan; }
bool PPU::hires() const { return regs.pseudo_hires || (regs.bgmode == 5 || regs.bgmode == 6); }
bool PPU::hires() const { return regs.pseudo_hires || regs.bgmode == 5 || regs.bgmode == 6; }
uint16 PPU::get_vram_addr() {
uint16 addr = regs.vram_addr;
@ -341,6 +341,10 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
bg3.regs.mosaic = (data & 0x04 ? mosaic_size : 0);
bg2.regs.mosaic = (data & 0x02 ? mosaic_size : 0);
bg1.regs.mosaic = (data & 0x01 ? mosaic_size : 0);
bg4.regs.mosaic_mask = ~(bg4.regs.mosaic >> 1);
bg3.regs.mosaic_mask = ~(bg3.regs.mosaic >> 1);
bg2.regs.mosaic_mask = ~(bg2.regs.mosaic >> 1);
bg1.regs.mosaic_mask = ~(bg1.regs.mosaic >> 1);
return;
}
@ -527,7 +531,7 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
regs.cgram_latchdata = data;
} else {
cgram_write((regs.cgram_addr & ~1) + 0, regs.cgram_latchdata);
cgram_write((regs.cgram_addr & ~1) + 1, data);
cgram_write((regs.cgram_addr & ~1) + 1, data & 0x7f);
}
regs.cgram_addr = (regs.cgram_addr + 1) & 0x01ff;
return;
@ -650,7 +654,8 @@ void PPU::mmio_write(unsigned addr, uint8 data) {
case 0x2131: { //CGADDSUB
screen.regs.color_mode = data & 0x80;
screen.regs.color_halve = data & 0x40;
screen.regs.color_enable[5] = data & 0x20;
screen.regs.color_enable[6] = data & 0x20;
screen.regs.color_enable[5] = data & 0x10;
screen.regs.color_enable[4] = data & 0x10;
screen.regs.color_enable[3] = data & 0x08;
screen.regs.color_enable[2] = data & 0x04;
@ -860,6 +865,7 @@ void PPU::mmio_reset() {
//$2131
screen.regs.color_mode = 0;
screen.regs.color_halve = 0;
screen.regs.color_enable[6] = 0;
screen.regs.color_enable[5] = 0;
screen.regs.color_enable[4] = 0;
screen.regs.color_enable[3] = 0;

View File

@ -3,7 +3,12 @@
#define PPU_CPP
namespace SNES {
PPU ppu;
#if defined(DEBUGGER)
#include "debugger/debugger.cpp"
PPUDebugger ppu;
#else
PPU ppu;
#endif
#include "mmio/mmio.cpp"
#include "window/window.cpp"
@ -52,6 +57,10 @@ void PPU::add_clocks(unsigned clocks) {
void PPU::render_scanline() {
if(regs.display_disable) return screen.render_black();
bg1.scanline();
bg2.scanline();
bg3.scanline();
bg4.scanline();
screen.scanline();
bg1.render();
bg2.render();

View File

@ -54,4 +54,9 @@ private:
friend class Video;
};
extern PPU ppu;
#if defined(DEBUGGER)
#include "debugger/debugger.hpp"
extern PPUDebugger ppu;
#else
extern PPU ppu;
#endif

View File

@ -10,6 +10,12 @@ unsigned PPU::Screen::get_palette(unsigned color) {
#endif
}
unsigned PPU::Screen::get_direct_color(unsigned p, unsigned t) {
return ((t & 7) << 2) | ((p & 1) << 1) |
(((t >> 3) & 7) << 7) | (((p >> 1) & 1) << 6) |
((t >> 6) << 13) | ((p >> 2) << 12);
}
uint16 PPU::Screen::addsub(unsigned x, unsigned y, bool halve) {
if(!regs.color_mode) {
if(!halve) {
@ -31,16 +37,18 @@ uint16 PPU::Screen::addsub(unsigned x, unsigned y, bool halve) {
}
void PPU::Screen::scanline() {
unsigned color = get_palette(0);
unsigned main_color = get_palette(0);
unsigned sub_color = (self.regs.pseudo_hires == false && self.regs.bgmode != 5 && self.regs.bgmode != 6)
? regs.color : main_color;
for(unsigned x = 0; x < 256; x++) {
output.main[x].color = color;
output.main[x].color = main_color;
output.main[x].priority = 0;
output.main[x].source = 5;
output.main[x].source = 6;
output.sub[x].color = regs.color;
output.sub[x].color = sub_color;
output.sub[x].priority = 0;
output.sub[x].source = 5;
output.sub[x].source = 6;
}
window.render(0);
@ -49,15 +57,16 @@ void PPU::Screen::scanline() {
void PPU::Screen::render_black() {
uint16 *data = self.output + self.vcounter() * 1024;
if(self.interlace() && self.field()) data += 512;
memset(data, 0, self.display.width << 1);
}
uint16 PPU::Screen::get_pixel_main(unsigned x) {
auto &main = output.main[x];
auto &sub = output.sub[x];
auto main = output.main[x];
auto sub = output.sub[x];
if(!regs.addsub_mode) {
sub.source = 5;
sub.source = 6;
sub.color = regs.color;
}
@ -68,10 +77,37 @@ uint16 PPU::Screen::get_pixel_main(unsigned x) {
main.color = 0x0000;
}
if(regs.color_enable[main.source] && window.sub[x]) {
if(main.source != 5 && regs.color_enable[main.source] && window.sub[x]) {
bool halve = false;
if(regs.color_halve && window.main[x]) {
if(!regs.addsub_mode || sub.source != 5) halve = true;
if(!regs.addsub_mode || sub.source != 6) halve = true;
}
return addsub(main.color, sub.color, halve);
}
return main.color;
}
uint16 PPU::Screen::get_pixel_sub(unsigned x) {
auto main = output.sub[x];
auto sub = output.main[x];
if(!regs.addsub_mode) {
sub.source = 6;
sub.color = regs.color;
}
if(!window.main[x]) {
if(!window.sub[x]) {
return 0x0000;
}
main.color = 0x0000;
}
if(main.source != 5 && regs.color_enable[main.source] && window.sub[x]) {
bool halve = false;
if(regs.color_halve && window.main[x]) {
if(!regs.addsub_mode || sub.source != 6) halve = true;
}
return addsub(main.color, sub.color, halve);
}
@ -81,9 +117,18 @@ uint16 PPU::Screen::get_pixel_main(unsigned x) {
void PPU::Screen::render() {
uint16 *data = self.output + self.vcounter() * 1024;
if(self.interlace() && self.field()) data += 512;
uint16 *light = light_table[self.regs.display_brightness];
for(unsigned i = 0; i < 256; i++) {
data[i] = light[get_pixel_main(i)];
if(!self.regs.pseudo_hires && self.regs.bgmode != 5 && self.regs.bgmode != 6) {
for(unsigned i = 0; i < 256; i++) {
data[i] = light[get_pixel_main(i)];
}
} else {
for(unsigned i = 0; i < 256; i++) {
*data++ = light[get_pixel_sub(i)];
*data++ = light[get_pixel_main(i)];
}
}
}

View File

@ -5,7 +5,7 @@ class Screen {
bool color_mode;
bool color_halve;
bool color_enable[6];
bool color_enable[7];
unsigned color_b;
unsigned color_g;
@ -20,18 +20,20 @@ class Screen {
unsigned source;
} main[256], sub[256];
void plot_main(unsigned x, unsigned color, unsigned priority, unsigned source);
void plot_sub(unsigned x, unsigned color, unsigned priority, unsigned source);
alwaysinline void plot_main(unsigned x, unsigned color, unsigned priority, unsigned source);
alwaysinline void plot_sub(unsigned x, unsigned color, unsigned priority, unsigned source);
} output;
ColorWindow window;
uint16 **light_table;
unsigned get_palette(unsigned color);
uint16 addsub(unsigned x, unsigned y, bool halve);
alwaysinline unsigned get_palette(unsigned color);
unsigned get_direct_color(unsigned palette, unsigned tile);
alwaysinline uint16 addsub(unsigned x, unsigned y, bool halve);
void scanline();
void render_black();
uint16 get_pixel_main(unsigned x);
alwaysinline uint16 get_pixel_main(unsigned x);
alwaysinline uint16 get_pixel_sub(unsigned x);
void render();
Screen(PPU &self);
~Screen();

View File

@ -160,16 +160,17 @@ void PPU::Sprite::render() {
}
}
window.render(0);
window.render(1);
if(regs.main_enable) window.render(0);
if(regs.sub_enable) window.render(1);
const unsigned priority_table[] = { regs.priority0, regs.priority1, regs.priority2, regs.priority3 };
for(unsigned x = 0; x < 256; x++) {
if(output.priority[x] == 0xff) continue;
unsigned priority = priority_table[output.priority[x]];
unsigned palette = output.palette[x];
unsigned color = self.screen.get_palette(output.palette[x]);
if(regs.main_enable && !window.main[x]) self.screen.output.plot_main(x, color, priority, 4);
if(regs.sub_enable && !window.sub[x]) self.screen.output.plot_sub(x, color, priority, 4);
if(regs.main_enable && !window.main[x]) self.screen.output.plot_main(x, color, priority, 4 + (palette < 192));
if(regs.sub_enable && !window.sub[x]) self.screen.output.plot_sub(x, color, priority, 4 + (palette < 192));
}
}

View File

@ -55,7 +55,7 @@ class Sprite {
void update_list(unsigned addr, uint8 data);
void address_reset();
void set_first();
bool on_scanline(unsigned sprite);
alwaysinline bool on_scanline(unsigned sprite);
void render();
Sprite(PPU &self);

View File

@ -1,7 +1,7 @@
namespace SNES {
namespace Info {
static const char Name[] = "bsnes";
static const char Version[] = "068.08";
static const char Version[] = "068.10";
static const unsigned SerializerVersion = 13;
}
}