Update to bsnes v067r03 release.

This substantially improves the S-PPU dot-renderer's sprite
processing. Instead of happening immediately at the start of the
scanline, each pixel is rendered one at a time. It eliminates the
SpriteList caching, sprite width/height caching, oam_palette caching
and oam_priority caching.

I'll explain it in more detail in the public thread in a bit.

Most noticeable is that Winter Olympics is now perfect, with no known
regressions on any of the sprite-sensitive games.
This commit is contained in:
byuu 2010-08-05 14:37:02 +00:00
parent 431d5c8db7
commit dcc661cb28
8 changed files with 186 additions and 174 deletions

BIN
bsnes.exe Normal file

Binary file not shown.

View File

@ -1,4 +1,4 @@
snes_core = sMemory sCPU sSMP sDSP bPPU
snes_core = sMemory sCPU sSMP sDSP sPPU
snes_objects := libco
snes_objects += snes-system

View File

@ -41,6 +41,7 @@ void sPPU::oam_write(unsigned addr, uint8 data) {
if(!regs.display_disabled && vcounter() < (!regs.overscan ? 225 : 240)) addr = regs.ioamaddr;
if(addr & 0x0200) addr &= 0x021f;
memory::oam[addr] = data;
oam.update(addr, data);
}
uint8 sPPU::cgram_read(unsigned addr) {

View File

@ -104,8 +104,6 @@ void sPPU::Background::serialize(serializer &s) {
void sPPU::Sprite::serialize(serializer &s) {
for(unsigned i = 0; i < 128; i++) {
s.integer(list[i].width);
s.integer(list[i].height);
s.integer(list[i].x);
s.integer(list[i].y);
s.integer(list[i].character);
@ -114,24 +112,28 @@ void sPPU::Sprite::serialize(serializer &s) {
s.integer(list[i].hflip);
s.integer(list[i].priority);
s.integer(list[i].palette);
s.integer(list[i].size);
}
s.integer(t.x);
s.integer(t.y);
s.integer(t.active_sprite);
s.integer(t.item_count);
s.integer(t.tile_count);
s.array(t.output_palette);
s.array(t.output_priority);
s.array(t.item_list);
for(unsigned i = 0; i < 34; i++) {
s.integer(t.tile_list[i].x);
s.integer(t.tile_list[i].y);
s.integer(t.tile_list[i].priority);
s.integer(t.tile_list[i].palette);
s.integer(t.tile_list[i].tile);
s.integer(t.tile_list[i].hflip);
s.integer(t.active);
for(unsigned n = 0; n < 2; n++) {
s.array(t.item[n]);
for(unsigned i = 0; i < 34; i++) {
s.integer(t.tile[n][i].x);
s.integer(t.tile[n][i].y);
s.integer(t.tile[n][i].priority);
s.integer(t.tile[n][i].palette);
s.integer(t.tile[n][i].tile);
s.integer(t.tile[n][i].hflip);
}
}
s.integer(t.active_sprite);
s.integer(regs.main_enabled);
s.integer(regs.sub_enabled);

View File

@ -0,0 +1,54 @@
#ifdef SPPU_CPP
void sPPU::Sprite::update(unsigned addr, uint8 data) {
if(addr < 0x0200) {
unsigned n = addr >> 2;
addr &= 3;
if(addr == 0) {
list[n].x = (list[n].x & 0x100) | data;
} else if(addr == 1) {
list[n].y = data;
} else if(addr == 2) {
list[n].character = data;
} else { //(addr == 3)
list[n].vflip = data & 0x80;
list[n].hflip = data & 0x40;
list[n].priority = (data >> 4) & 3;
list[n].palette = (data >> 1) & 7;
list[n].nameselect = data & 1;
}
} else {
unsigned n = (addr & 0x1f) << 2;
list[n + 0].x = ((data & 0x01) << 8) | (list[n + 0].x & 0xff);
list[n + 0].size = data & 0x02;
list[n + 1].x = ((data & 0x04) << 6) | (list[n + 1].x & 0xff);
list[n + 1].size = data & 0x08;
list[n + 2].x = ((data & 0x10) << 4) | (list[n + 2].x & 0xff);
list[n + 2].size = data & 0x20;
list[n + 3].x = ((data & 0x40) << 2) | (list[n + 3].x & 0xff);
list[n + 3].size = data & 0x80;
}
}
unsigned sPPU::Sprite::SpriteItem::width() const {
if(size == 0) {
static unsigned width[] = { 8, 8, 8, 16, 16, 32, 16, 16 };
return width[ppu.oam.regs.base_size];
} else {
static unsigned width[] = { 16, 32, 64, 32, 64, 64, 32, 32 };
return width[ppu.oam.regs.base_size];
}
}
unsigned sPPU::Sprite::SpriteItem::height() const {
if(size == 0) {
if(ppu.oam.regs.interlace && ppu.oam.regs.base_size >= 6) return 16;
static unsigned height[] = { 8, 8, 8, 16, 16, 32, 32, 32 };
return height[ppu.oam.regs.base_size];
} else {
static unsigned height[] = { 16, 32, 64, 32, 64, 64, 64, 32 };
return height[ppu.oam.regs.base_size];
}
}
#endif

View File

@ -1,5 +1,7 @@
#ifdef SPPU_CPP
#include "list.cpp"
void sPPU::Sprite::address_reset() {
self.regs.oam_addr = self.regs.oam_baseaddr << 1;
regs.first_sprite = (self.regs.oam_priority == false ? 0 : (self.regs.oam_addr >> 2) & 127);
@ -15,114 +17,44 @@ void sPPU::Sprite::scanline() {
t.y = self.vcounter();
if(t.y == (!self.regs.overscan ? 225 : 240) && self.regs.display_disabled == false) address_reset();
if(t.y < 1 || t.y > (!self.regs.overscan ? 224 : 239)) return;
const uint8 *tableA = memory::oam.data();
const uint8 *tableB = memory::oam.data() + 512;
for(unsigned i = 0; i < 128; i++) {
bool x = *tableB & (1 << ((i & 3) << 1));
bool size = *tableB & (2 << ((i & 3) << 1));
switch(regs.base_size) {
case 0: list[i].width = (!size ? 8 : 16);
list[i].height = (!size ? 8 : 16);
break;
case 1: list[i].width = (!size ? 8 : 32);
list[i].height = (!size ? 8 : 32);
break;
case 2: list[i].width = (!size ? 8 : 64);
list[i].height = (!size ? 8 : 64);
break;
case 3: list[i].width = (!size ? 16 : 32);
list[i].height = (!size ? 16 : 32);
break;
case 4: list[i].width = (!size ? 16 : 64);
list[i].height = (!size ? 16 : 64);
break;
case 5: list[i].width = (!size ? 32 : 64);
list[i].height = (!size ? 32 : 64);
break;
case 6: list[i].width = (!size ? 16 : 32);
list[i].height = (!size ? 32 : 64);
if(regs.interlace && !size) list[i].height = 16;
break;
case 7: list[i].width = (!size ? 16 : 32);
list[i].height = (!size ? 32 : 32);
if(regs.interlace && !size) list[i].height = 16;
break;
}
list[i].x = (x << 8) + tableA[0];
list[i].y = (tableA[1] + 1) & 0xff;
list[i].character = tableA[2];
list[i].vflip = tableA[3] & 0x80;
list[i].hflip = tableA[3] & 0x40;
list[i].priority = (tableA[3] >> 4) & 3;
list[i].palette = (tableA[3] >> 1) & 7;
list[i].nameselect = tableA[3] & 1;
tableA += 4;
if((i & 3) == 3) tableB++;
}
if(t.y > (!self.regs.overscan ? 224 : 239)) return;
t.item_count = 0;
t.tile_count = 0;
memset(t.output_priority, 0xff, 256);
memset(t.item_list, 0xff, 32);
for(unsigned i = 0; i < 34; i++) t.tile_list[i].tile = 0xffff;
t.active = !t.active;
auto oam_item = t.item[t.active];
auto oam_tile = t.tile[t.active];
memset(oam_item, 0xff, 32);
for(unsigned i = 0; i < 34; i++) oam_tile[i].tile = 0xffff;
for(unsigned i = 0; i < 128; i++) {
t.active_sprite = (i + regs.first_sprite) & 127;
if(on_scanline() == false) continue;
if(t.item_count++ >= 32) break;
t.item_list[t.item_count - 1] = (i + regs.first_sprite) & 127;
oam_item[t.item_count - 1] = (i + regs.first_sprite) & 127;
}
if(t.item_count > 0 && t.item_list[t.item_count - 1] != 0xff) {
ppu.regs.ioamaddr = 0x0200 + (t.item_list[t.item_count - 1] >> 2);
if(t.item_count > 0 && oam_item[t.item_count - 1] != 0xff) {
ppu.regs.ioamaddr = 0x0200 + (oam_item[t.item_count - 1] >> 2);
}
for(signed i = 31; i >= 0; i--) {
if(t.item_list[i] == 0xff) continue;
t.active_sprite = t.item_list[i];
if(oam_item[i] == 0xff) continue;
t.active_sprite = oam_item[i];
load_tiles();
}
regs.time_over |= (t.tile_count > 34);
regs.range_over |= (t.item_count > 32);
for(unsigned i = 0; i < 34; i++) {
if(t.tile_list[i].tile == 0xffff) continue;
render_tile(i);
}
}
void sPPU::Sprite::run() {
output.main.priority = 0;
output.sub.priority = 0;
unsigned x = t.x++;
if(t.output_priority[x] != 0xff) {
unsigned priority_table[] = { regs.priority0, regs.priority1, regs.priority2, regs.priority3 };
if(regs.main_enabled) {
output.main.palette = t.output_palette[x];
output.main.priority = priority_table[t.output_priority[x]];
}
if(regs.sub_enabled) {
output.sub.palette = t.output_palette[x];
output.sub.priority = priority_table[t.output_priority[x]];
}
}
}
bool sPPU::Sprite::on_scanline() {
SpriteItem &sprite = list[t.active_sprite];
if(sprite.x > 256 && (sprite.x + sprite.width - 1) < 512) return false;
if(sprite.x > 256 && (sprite.x + sprite.width() - 1) < 512) return false;
signed height = (regs.interlace == false ? sprite.height : (sprite.height >> 1));
signed height = (regs.interlace == false ? sprite.height() : (sprite.height() >> 1));
if(t.y >= sprite.y && t.y < (sprite.y + height)) return true;
if((sprite.y + height) >= 256 && t.y < ((sprite.y + height) & 255)) return true;
return false;
@ -130,16 +62,16 @@ bool sPPU::Sprite::on_scanline() {
void sPPU::Sprite::load_tiles() {
SpriteItem &sprite = list[t.active_sprite];
unsigned tile_width = sprite.width >> 3;
unsigned tile_width = sprite.width() >> 3;
signed x = sprite.x;
signed y = (t.y - sprite.y) & 0xff;
if(regs.interlace) y <<= 1;
if(sprite.vflip) {
if(sprite.width == sprite.height) {
y = (sprite.height - 1) - y;
if(sprite.width() == sprite.height()) {
y = (sprite.height() - 1) - y;
} else {
y = (y < sprite.width) ? ((sprite.width - 1) - y) : (sprite.width + ((sprite.width - 1) - (y - sprite.width)));
y = (y < sprite.width()) ? ((sprite.width() - 1) - y) : (sprite.width() + ((sprite.width() - 1) - (y - sprite.width())));
}
}
@ -160,98 +92,117 @@ void sPPU::Sprite::load_tiles() {
chry &= 15;
chry <<= 4;
auto oam_tile = t.tile[t.active];
for(unsigned tx = 0; tx < tile_width; tx++) {
unsigned sx = (x + (tx << 3)) & 511;
if(x != 256 && sx >= 256 && (sx + 7) < 512) continue;
if(t.tile_count++ >= 34) break;
unsigned n = t.tile_count - 1;
t.tile_list[n].x = sx;
t.tile_list[n].y = y;
t.tile_list[n].priority = sprite.priority;
t.tile_list[n].palette = 128 + (sprite.palette << 4);
t.tile_list[n].hflip = sprite.hflip;
oam_tile[n].x = sx;
oam_tile[n].y = y;
oam_tile[n].priority = sprite.priority;
oam_tile[n].palette = 128 + (sprite.palette << 4);
oam_tile[n].hflip = sprite.hflip;
unsigned mx = (sprite.hflip == false) ? tx : ((tile_width - 1) - tx);
unsigned pos = tiledata_addr + ((chry + ((chrx + mx) & 15)) << 5);
t.tile_list[n].tile = (pos >> 5) & 0x07ff;
oam_tile[n].tile = (pos >> 5) & 0x07ff;
}
}
void sPPU::Sprite::render_tile(unsigned tile) {
TileItem &item = t.tile_list[tile];
void sPPU::Sprite::run() {
output.main.priority = 0;
output.sub.priority = 0;
unsigned sx = item.x;
uint16 addr = (item.tile << 5) + ((item.y & 7) * 2);
for(unsigned x = 0; x < 8; x++) {
sx &= 511;
if(sx < 256) {
unsigned px = (item.hflip == false ? x : (7 - x));
unsigned mask = 0x80 >> (px & 7);
unsigned priority_table[] = { regs.priority0, regs.priority1, regs.priority2, regs.priority3 };
unsigned x = t.x++;
uint8 d0 = memory::vram[addr + 0];
uint8 d1 = memory::vram[addr + 1];
uint8 d2 = memory::vram[addr + 16];
uint8 d3 = memory::vram[addr + 17];
for(unsigned n = 0; n < 34; n++) {
TileItem &item = t.tile[!t.active][n];
if(item.tile == 0xffff) break;
unsigned color;
color = ((bool)(d0 & mask)) << 0;
color |= ((bool)(d1 & mask)) << 1;
color |= ((bool)(d2 & mask)) << 2;
color |= ((bool)(d3 & mask)) << 3;
int px = x - sclip<9>(item.x);
if(px & ~7) continue;
if(color) {
color += item.palette;
t.output_palette[sx] = color;
t.output_priority[sx] = item.priority;
uint16 addr = (item.tile << 5) + ((item.y & 7) * 2);
unsigned mask = 0x80 >> (item.hflip == false ? px : 7 - px);
uint8 d0 = memory::vram[addr + 0];
uint8 d1 = memory::vram[addr + 1];
uint8 d2 = memory::vram[addr + 16];
uint8 d3 = memory::vram[addr + 17];
unsigned color;
color = ((bool)(d0 & mask)) << 0;
color |= ((bool)(d1 & mask)) << 1;
color |= ((bool)(d2 & mask)) << 2;
color |= ((bool)(d3 & mask)) << 3;
if(color) {
color += item.palette;
if(regs.main_enabled) {
output.main.palette = color;
output.main.priority = priority_table[item.priority];
}
if(regs.sub_enabled) {
output.sub.palette = color;
output.sub.priority = priority_table[item.priority];
}
}
sx++;
}
}
void sPPU::Sprite::reset() {
t.x = 0;
t.y = 0;
t.item_count = 0;
t.tile_count = 0;
memset(t.output_palette, 0, 256);
memset(t.output_priority, 0, 256);
memset(t.item_list, 0, 32);
for(unsigned i = 0; i < 34; i++) {
t.tile_list[i].x = 0;
t.tile_list[i].y = 0;
t.tile_list[i].priority = 0;
t.tile_list[i].palette = 0;
t.tile_list[i].tile = 0;
t.tile_list[i].hflip = 0;
}
t.active_sprite = 0;
for(unsigned i = 0; i < 128; i++) {
list[i].width = 0;
list[i].height = 0;
list[i].x = 0;
list[i].y = 0;
list[i].character = 0;
list[i].nameselect = 0;
list[i].vflip = 0;
list[i].hflip = 0;
list[i].palette = 0;
list[i].priority = 0;
list[i].palette = 0;
list[i].size = 0;
}
t.x = 0;
t.y = 0;
t.active_sprite = 0;
t.item_count = 0;
t.tile_count = 0;
t.active = 0;
for(unsigned n = 0; n < 2; n++) {
memset(t.item[n], 0, 32);
for(unsigned i = 0; i < 34; i++) {
t.tile[n][i].x = 0;
t.tile[n][i].y = 0;
t.tile[n][i].priority = 0;
t.tile[n][i].palette = 0;
t.tile[n][i].tile = 0;
t.tile[n][i].hflip = 0;
}
}
regs.main_enabled = 0;
regs.sub_enabled = 0;
regs.interlace = 0;
regs.base_size = 0;
regs.nameselect = 0;
regs.tiledata_addr = 0;
regs.first_sprite = 0;
regs.priority0 = 0;
regs.priority1 = 0;
regs.priority2 = 0;
regs.priority3 = 0;
regs.time_over = 0;
regs.range_over = 0;

View File

@ -2,6 +2,20 @@ class Sprite {
public:
sPPU &self;
struct SpriteItem {
uint16 x;
uint16 y;
uint8 character;
bool nameselect;
bool vflip;
bool hflip;
uint8 priority;
uint8 palette;
bool size;
unsigned width() const;
unsigned height() const;
} list[128];
struct TileItem {
uint16 x;
uint16 y;
@ -11,30 +25,17 @@ public:
bool hflip;
};
struct SpriteItem {
uint8 width;
uint8 height;
uint16 x;
uint16 y;
uint8 character;
bool nameselect;
bool vflip;
bool hflip;
uint8 priority;
uint8 palette;
} list[128];
struct State {
unsigned x;
unsigned y;
unsigned active_sprite;
unsigned item_count;
unsigned tile_count;
uint8 output_palette[256];
uint8 output_priority[256];
uint8 item_list[32];
TileItem tile_list[34];
unsigned active_sprite;
bool active;
uint8 item[2][32];
TileItem tile[2][34];
} t;
struct {
@ -63,6 +64,10 @@ public:
} main, sub;
} output;
//list.cpp
void update(unsigned addr, uint8 data);
//sprite.cpp
void address_reset();
void frame();
void scanline();
@ -75,5 +80,4 @@ public:
private:
bool on_scanline();
void load_tiles();
void render_tile(unsigned tile);
};

View File

@ -1,12 +1,12 @@
static const char bsnesVersion[] = "067.01";
static const char bsnesVersion[] = "067.03";
static const char bsnesTitle[] = "bsnes";
static const unsigned bsnesSerializerVersion = 11;
static const unsigned bsnesSerializerVersion = 12;
#define CORE_SMEMORY
#define CORE_SCPU
#define CORE_SSMP
#define CORE_SDSP
#define CORE_BPPU
#define CORE_SPPU
//S-DSP can be encapsulated into a state machine using #define magic
//this avoids ~2.048m co_switch() calls per second (~5% speedup)