Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
#ifdef PPU_CPP
|
|
|
|
|
|
|
|
#define Clip(x) (((x) & 0x2000) ? ((x) | ~0x03ff) : ((x) & 0x03ff))
|
|
|
|
|
|
|
|
void PPU::Background::render_mode7() {
|
|
|
|
signed px, py;
|
|
|
|
signed tx, ty, tile, palette;
|
|
|
|
|
|
|
|
signed a = sclip<16>(self.regs.m7a);
|
|
|
|
signed b = sclip<16>(self.regs.m7b);
|
|
|
|
signed c = sclip<16>(self.regs.m7c);
|
|
|
|
signed d = sclip<16>(self.regs.m7d);
|
|
|
|
|
|
|
|
signed cx = sclip<13>(self.regs.m7x);
|
|
|
|
signed cy = sclip<13>(self.regs.m7y);
|
|
|
|
signed hofs = sclip<13>(self.regs.mode7_hoffset);
|
|
|
|
signed vofs = sclip<13>(self.regs.mode7_voffset);
|
|
|
|
|
|
|
|
signed y = (self.regs.mode7_vflip == false ? self.vcounter() : 255 - self.vcounter());
|
|
|
|
|
2013-05-05 09:21:30 +00:00
|
|
|
uint16* mosaic_x;
|
|
|
|
uint16* mosaic_y;
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
if(id == ID::BG1) {
|
|
|
|
mosaic_x = mosaic_table[self.bg1.regs.mosaic];
|
|
|
|
mosaic_y = mosaic_table[self.bg1.regs.mosaic];
|
|
|
|
} else {
|
|
|
|
mosaic_x = mosaic_table[self.bg2.regs.mosaic];
|
|
|
|
mosaic_y = mosaic_table[self.bg1.regs.mosaic];
|
|
|
|
}
|
|
|
|
|
2010-09-06 11:13:51 +00:00
|
|
|
unsigned priority0 = (priority0_enable ? regs.priority0 : 0);
|
|
|
|
unsigned priority1 = (priority1_enable ? regs.priority1 : 0);
|
|
|
|
if(priority0 + priority1 == 0) return;
|
|
|
|
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
signed psx = ((a * Clip(hofs - cx)) & ~63) + ((b * Clip(vofs - cy)) & ~63) + ((b * mosaic_y[y]) & ~63) + (cx << 8);
|
|
|
|
signed psy = ((c * Clip(hofs - cx)) & ~63) + ((d * Clip(vofs - cy)) & ~63) + ((d * mosaic_y[y]) & ~63) + (cy << 8);
|
|
|
|
for(signed x = 0; x < 256; x++) {
|
|
|
|
px = (psx + (a * mosaic_x[x])) >> 8;
|
|
|
|
py = (psy + (c * mosaic_x[x])) >> 8;
|
|
|
|
|
|
|
|
switch(self.regs.mode7_repeat) {
|
2013-05-05 09:21:30 +00:00
|
|
|
case 0: case 1: {
|
|
|
|
px &= 1023;
|
|
|
|
py &= 1023;
|
|
|
|
tx = ((px >> 3) & 127);
|
|
|
|
ty = ((py >> 3) & 127);
|
|
|
|
tile = ppu.vram[(ty * 128 + tx) << 1];
|
|
|
|
palette = ppu.vram[(((tile << 6) + ((py & 7) << 3) + (px & 7)) << 1) + 1];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case 2: {
|
|
|
|
if((px | py) & ~1023) {
|
|
|
|
palette = 0;
|
|
|
|
} else {
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
px &= 1023;
|
|
|
|
py &= 1023;
|
|
|
|
tx = ((px >> 3) & 127);
|
|
|
|
ty = ((py >> 3) & 127);
|
Update to v074r11 release.
byuu says:
Changelog:
- debugger compiles on all three profiles
- libsnes compiles on all three platforms (no API changes to libsnes)
- memory.cpp : namespace memory removed (wram -> cpu, apuram -> smp,
vram, oam, cgram -> ppu)
- sa1.cpp : namespace memory removed (SA-1 specific functions merged
inline to SA1::bus_read,write)
- GameBoy: added serial link support with interrupts and proper 8192hz
timing, but obviously it acts as if no other GB is connected to it
- GameBoy: added STAT OAM interrupt, and better STAT d1,d0 mode values
- UI: since Qt is dead, I've renamed the config files back to bsnes.cfg
and bsnes-geometry.cfg
- SA1: IRAM was not syncing to CPU on SA-1 side
- PPU/Accuracy and PPU/Performance needed Sprite oam renamed to Sprite
sprite; so that I could add uint8 oam[544]
- makes more sense anyway, OAM = object attribute memory, obj or
sprite are better names for Sprite rendering class
- more cleanup
2011-01-24 09:03:17 +00:00
|
|
|
tile = ppu.vram[(ty * 128 + tx) << 1];
|
|
|
|
palette = ppu.vram[(((tile << 6) + ((py & 7) << 3) + (px & 7)) << 1) + 1];
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
}
|
2013-05-05 09:21:30 +00:00
|
|
|
break;
|
|
|
|
}
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
|
2013-05-05 09:21:30 +00:00
|
|
|
case 3: {
|
|
|
|
if((px | py) & ~1023) {
|
|
|
|
tile = 0;
|
|
|
|
} else {
|
|
|
|
px &= 1023;
|
|
|
|
py &= 1023;
|
|
|
|
tx = ((px >> 3) & 127);
|
|
|
|
ty = ((py >> 3) & 127);
|
|
|
|
tile = ppu.vram[(ty * 128 + tx) << 1];
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
}
|
2013-05-05 09:21:30 +00:00
|
|
|
palette = ppu.vram[(((tile << 6) + ((py & 7) << 3) + (px & 7)) << 1) + 1];
|
|
|
|
break;
|
|
|
|
}
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
unsigned priority;
|
|
|
|
if(id == ID::BG1) {
|
2010-09-06 11:13:51 +00:00
|
|
|
priority = priority0;
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
} else {
|
2010-09-06 11:13:51 +00:00
|
|
|
priority = (palette & 0x80 ? priority1 : priority0);
|
Update to v068r10 release.
(there was no r09 release posted to the WIP thread)
byuu says:
It is feature-complete, but horizontal mosaic is less accurate. I have
an idea for a mosaic color ring buffer to get it equally accurate, but
I haven't implemented it yet. For now it's just a simple x & ~(mosaic >>
1) trick that is passable.
Hires blending was left out, as it's more processor intensive and
blargg's NTSC does a better job with that anyway.
There's some OPT vertical positioning issues in the SNES Test Program's
character test; Goodbye, Anthrox has some sort of fast CPU DMA issue;
etc.
Total speedup is a mere 13.5%. Not quite the 50% I wanted in the best
case, but I'll take what I can get.
254->289fps in Zelda 3 on my E8400 now. There's another 15% hiding with
blargg's SMP and 5-10% with blargg's fast DSP, but they lose too much
accuracy. It'd put me at or below Snes9X accuracy, while still being 50%
slower.
SSE2 was performing worse this time, both on x86 and amd64, so I left
that optimization off.
So, barring a miracle, this is about the best it's going to get.
2010-09-03 11:37:36 +00:00
|
|
|
palette &= 0x7f;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(palette == 0) continue;
|
|
|
|
unsigned plot_x = (self.regs.mode7_hflip == false ? x : 255 - x);
|
|
|
|
|
|
|
|
unsigned color;
|
|
|
|
if(self.screen.regs.direct_color && id == ID::BG1) {
|
|
|
|
color = self.screen.get_direct_color(0, palette);
|
|
|
|
} else {
|
|
|
|
color = self.screen.get_palette(palette);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(regs.main_enable && !window.main[plot_x]) self.screen.output.plot_main(plot_x, color, priority, id);
|
|
|
|
if(regs.sub_enable && !window.sub[plot_x]) self.screen.output.plot_sub(plot_x, color, priority, id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef Clip
|
|
|
|
|
|
|
|
#endif
|