* ~4.6% speedup (with the fast PPU)
* fix out-of-bounds DSP memory access [Sour]
This commit is contained in:
byuu 2019-08-02 04:45:06 +09:00
parent 9e8913cea0
commit f6303518d5
17 changed files with 138 additions and 117 deletions

View File

@ -31,13 +31,13 @@ using namespace nall;
namespace Emulator { namespace Emulator {
static const string Name = "bsnes"; static const string Name = "bsnes";
static const string Version = "108.3"; static const string Version = "108.4";
static const string Author = "byuu"; static const string Author = "byuu";
static const string License = "GPLv3"; static const string License = "GPLv3";
static const string Website = "https://byuu.org"; static const string Website = "https://byuu.org";
//incremented only when serialization format changes //incremented only when serialization format changes
static const string SerializerVersion = "108.2"; static const string SerializerVersion = "108.4";
namespace Constants { namespace Constants {
namespace Colorburst { namespace Colorburst {

View File

@ -45,7 +45,7 @@ auto WDC65816::instructionLongRead8(alu8 op, r16 I) -> void {
V.l = fetch(); V.l = fetch();
V.h = fetch(); V.h = fetch();
V.b = fetch(); V.b = fetch();
L W.l = read(V.d + I.w + 0); L W.l = readLong(V.d + I.w + 0);
alu(W.l); alu(W.l);
} }
@ -53,8 +53,8 @@ auto WDC65816::instructionLongRead16(alu16 op, r16 I) -> void {
V.l = fetch(); V.l = fetch();
V.h = fetch(); V.h = fetch();
V.b = fetch(); V.b = fetch();
W.l = read(V.d + I.w + 0); W.l = readLong(V.d + I.w + 0);
L W.h = read(V.d + I.w + 1); L W.h = readLong(V.d + I.w + 1);
alu(W.w); alu(W.w);
} }
@ -157,7 +157,7 @@ auto WDC65816::instructionIndirectLongRead8(alu8 op, r16 I) -> void {
V.l = readDirectN(U.l + 0); V.l = readDirectN(U.l + 0);
V.h = readDirectN(U.l + 1); V.h = readDirectN(U.l + 1);
V.b = readDirectN(U.l + 2); V.b = readDirectN(U.l + 2);
L W.l = read(V.d + I.w + 0); L W.l = readLong(V.d + I.w + 0);
alu(W.l); alu(W.l);
} }
@ -167,8 +167,8 @@ auto WDC65816::instructionIndirectLongRead16(alu16 op, r16 I) -> void {
V.l = readDirectN(U.l + 0); V.l = readDirectN(U.l + 0);
V.h = readDirectN(U.l + 1); V.h = readDirectN(U.l + 1);
V.b = readDirectN(U.l + 2); V.b = readDirectN(U.l + 2);
W.l = read(V.d + I.w + 0); W.l = readLong(V.d + I.w + 0);
L W.h = read(V.d + I.w + 1); L W.h = readLong(V.d + I.w + 1);
alu(W.w); alu(W.w);
} }

View File

@ -30,15 +30,15 @@ auto WDC65816::instructionLongWrite8(r16 I) -> void {
V.l = fetch(); V.l = fetch();
V.h = fetch(); V.h = fetch();
V.b = fetch(); V.b = fetch();
L write(V.d + I.w + 0, A.l); L writeLong(V.d + I.w + 0, A.l);
} }
auto WDC65816::instructionLongWrite16(r16 I) -> void { auto WDC65816::instructionLongWrite16(r16 I) -> void {
V.l = fetch(); V.l = fetch();
V.h = fetch(); V.h = fetch();
V.b = fetch(); V.b = fetch();
write(V.d + I.w + 0, A.l); writeLong(V.d + I.w + 0, A.l);
L write(V.d + I.w + 1, A.h); L writeLong(V.d + I.w + 1, A.h);
} }
auto WDC65816::instructionDirectWrite8(r16 F) -> void { auto WDC65816::instructionDirectWrite8(r16 F) -> void {
@ -130,7 +130,7 @@ auto WDC65816::instructionIndirectLongWrite8(r16 I) -> void {
V.l = readDirectN(U.l + 0); V.l = readDirectN(U.l + 0);
V.h = readDirectN(U.l + 1); V.h = readDirectN(U.l + 1);
V.b = readDirectN(U.l + 2); V.b = readDirectN(U.l + 2);
L write(V.d + I.w + 0, A.l); L writeLong(V.d + I.w + 0, A.l);
} }
auto WDC65816::instructionIndirectLongWrite16(r16 I) -> void { auto WDC65816::instructionIndirectLongWrite16(r16 I) -> void {
@ -139,8 +139,8 @@ auto WDC65816::instructionIndirectLongWrite16(r16 I) -> void {
V.l = readDirectN(U.l + 0); V.l = readDirectN(U.l + 0);
V.h = readDirectN(U.l + 1); V.h = readDirectN(U.l + 1);
V.b = readDirectN(U.l + 2); V.b = readDirectN(U.l + 2);
write(V.d + I.w + 0, A.l); writeLong(V.d + I.w + 0, A.l);
L write(V.d + I.w + 1, A.h); L writeLong(V.d + I.w + 1, A.h);
} }
auto WDC65816::instructionStackWrite8() -> void { auto WDC65816::instructionStackWrite8() -> void {

View File

@ -50,31 +50,39 @@ auto WDC65816::pushN(uint8 data) -> void {
} }
auto WDC65816::readDirect(uint address) -> uint8 { auto WDC65816::readDirect(uint address) -> uint8 {
if(EF && !D.l) return read(D.w | uint8(address)); if(EF && !D.l) return read(D.w | address & 0xff);
return read(uint16(D.w + address)); return read(D.w + address & 0xffff);
} }
auto WDC65816::writeDirect(uint address, uint8 data) -> void { auto WDC65816::writeDirect(uint address, uint8 data) -> void {
if(EF && !D.l) return write(D.w | uint8(address), data); if(EF && !D.l) return write(D.w | address & 0xff, data);
write(uint16(D.w + address), data); write(D.w + address & 0xffff, data);
} }
auto WDC65816::readDirectN(uint address) -> uint8 { auto WDC65816::readDirectN(uint address) -> uint8 {
return read(uint16(D.w + address)); return read(D.w + address & 0xffff);
} }
auto WDC65816::readBank(uint address) -> uint8 { auto WDC65816::readBank(uint address) -> uint8 {
return read((B << 16) + address); return read((B << 16) + address & 0xffffff);
} }
auto WDC65816::writeBank(uint address, uint8 data) -> void { auto WDC65816::writeBank(uint address, uint8 data) -> void {
write((B << 16) + address, data); write((B << 16) + address & 0xffffff, data);
}
auto WDC65816::readLong(uint address) -> uint8 {
return read(address & 0xffffff);
}
auto WDC65816::writeLong(uint address, uint8 data) -> void {
write(address & 0xffffff, data);
} }
auto WDC65816::readStack(uint address) -> uint8 { auto WDC65816::readStack(uint address) -> uint8 {
return read(uint16(S.w + address)); return read(S.w + address & 0xffff);
} }
auto WDC65816::writeStack(uint address, uint8 data) -> void { auto WDC65816::writeStack(uint address, uint8 data) -> void {
write(uint16(S.w + address), data); write(S.w + address & 0xffff, data);
} }

View File

@ -10,8 +10,8 @@ struct WDC65816 {
virtual auto idle() -> void = 0; virtual auto idle() -> void = 0;
virtual auto idleBranch() -> void {} virtual auto idleBranch() -> void {}
virtual auto idleJump() -> void {} virtual auto idleJump() -> void {}
virtual auto read(uint24 addr) -> uint8 = 0; virtual auto read(uint addr) -> uint8 = 0;
virtual auto write(uint24 addr, uint8 data) -> void = 0; virtual auto write(uint addr, uint8 data) -> void = 0;
virtual auto lastCycle() -> void = 0; virtual auto lastCycle() -> void = 0;
virtual auto interruptPending() const -> bool = 0; virtual auto interruptPending() const -> bool = 0;
virtual auto interrupt() -> void; virtual auto interrupt() -> void;
@ -44,22 +44,24 @@ struct WDC65816 {
auto power() -> void; auto power() -> void;
//memory.cpp //memory.cpp
inline auto idleIRQ() -> void; alwaysinline auto idleIRQ() -> void;
inline auto idle2() -> void; alwaysinline auto idle2() -> void;
inline auto idle4(uint16 x, uint16 y) -> void; alwaysinline auto idle4(uint16 x, uint16 y) -> void;
inline auto idle6(uint16 address) -> void; alwaysinline auto idle6(uint16 address) -> void;
inline auto fetch() -> uint8; alwaysinline auto fetch() -> uint8;
inline auto pull() -> uint8; alwaysinline auto pull() -> uint8;
auto push(uint8 data) -> void; auto push(uint8 data) -> void;
inline auto pullN() -> uint8; alwaysinline auto pullN() -> uint8;
inline auto pushN(uint8 data) -> void; alwaysinline auto pushN(uint8 data) -> void;
inline auto readDirect(uint address) -> uint8; alwaysinline auto readDirect(uint address) -> uint8;
inline auto writeDirect(uint address, uint8 data) -> void; alwaysinline auto writeDirect(uint address, uint8 data) -> void;
inline auto readDirectN(uint address) -> uint8; alwaysinline auto readDirectN(uint address) -> uint8;
inline auto readBank(uint address) -> uint8; alwaysinline auto readBank(uint address) -> uint8;
inline auto writeBank(uint address, uint8 data) -> void; alwaysinline auto writeBank(uint address, uint8 data) -> void;
inline auto readStack(uint address) -> uint8; alwaysinline auto readLong(uint address) -> uint8;
inline auto writeStack(uint address, uint8 data) -> void; alwaysinline auto writeLong(uint address, uint8 data) -> void;
alwaysinline auto readStack(uint address) -> uint8;
alwaysinline auto writeStack(uint address, uint8 data) -> void;
//algorithms.cpp //algorithms.cpp
using alu8 = auto (WDC65816::*)( uint8) -> uint8; using alu8 = auto (WDC65816::*)( uint8) -> uint8;

View File

@ -18,7 +18,7 @@ auto SA1::idleBranch() -> void {
if(r.pc.d & 1) idleJump(); if(r.pc.d & 1) idleJump();
} }
auto SA1::read(uint24 address) -> uint8 { auto SA1::read(uint address) -> uint8 {
r.mar = address; r.mar = address;
uint8 data = r.mdr; uint8 data = r.mdr;
@ -62,7 +62,7 @@ auto SA1::read(uint24 address) -> uint8 {
return data; return data;
} }
auto SA1::write(uint24 address, uint8 data) -> void { auto SA1::write(uint address, uint8 data) -> void {
r.mar = address; r.mar = address;
r.mdr = data; r.mdr = data;

View File

@ -38,8 +38,8 @@ struct SA1 : Processor::WDC65816, Thread {
alwaysinline auto idle() -> void override; alwaysinline auto idle() -> void override;
alwaysinline auto idleJump() -> void override; alwaysinline auto idleJump() -> void override;
alwaysinline auto idleBranch() -> void override; alwaysinline auto idleBranch() -> void override;
alwaysinline auto read(uint24 address) -> uint8 override; alwaysinline auto read(uint address) -> uint8 override;
alwaysinline auto write(uint24 address, uint8 data) -> void override; alwaysinline auto write(uint address, uint8 data) -> void override;
auto readVBR(uint address, uint8 data = 0) -> uint8; auto readVBR(uint address, uint8 data = 0) -> uint8;
auto readDisassembler(uint address) -> uint8 override; auto readDisassembler(uint address) -> uint8 override;

View File

@ -109,7 +109,7 @@ auto CPU::power(bool reset) -> void {
alu = {}; alu = {};
status = {}; status = {};
status.lineClocks = lineclocks(); status.lineClocks = hperiod();
status.dramRefreshPosition = (version == 1 ? 530 : 538); status.dramRefreshPosition = (version == 1 ? 530 : 538);
status.hdmaSetupPosition = (version == 1 ? 12 + 8 - dmaCounter() : 12 + dmaCounter()); status.hdmaSetupPosition = (version == 1 ? 12 + 8 - dmaCounter() : 12 + dmaCounter());
status.hdmaPosition = 1104; status.hdmaPosition = 1104;

View File

@ -25,8 +25,8 @@ struct CPU : Processor::WDC65816, Thread, PPUcounter {
//memory.cpp //memory.cpp
auto idle() -> void override; auto idle() -> void override;
auto read(uint24 addr) -> uint8 override; auto read(uint addr) -> uint8 override;
auto write(uint24 addr, uint8 data) -> void override; auto write(uint addr, uint8 data) -> void override;
auto readDisassembler(uint addr) -> uint8 override; auto readDisassembler(uint addr) -> uint8 override;
//io.cpp //io.cpp

View File

@ -6,7 +6,7 @@ auto CPU::idle() -> void {
aluEdge(); aluEdge();
} }
auto CPU::read(uint24 address) -> uint8 { auto CPU::read(uint address) -> uint8 {
status.irqLock = false; status.irqLock = false;
if(address & 0x408000) { if(address & 0x408000) {
@ -46,7 +46,7 @@ auto CPU::read(uint24 address) -> uint8 {
return data; return data;
} }
auto CPU::write(uint24 address, uint8 data) -> void { auto CPU::write(uint address, uint8 data) -> void {
status.irqLock = false; status.irqLock = false;
aluEdge(); aluEdge();

View File

@ -82,7 +82,7 @@ auto CPU::step(uint clocks) -> void {
//called by ppu.tick() when Hcounter=0 //called by ppu.tick() when Hcounter=0
auto CPU::scanline() -> void { auto CPU::scanline() -> void {
status.lineClocks = lineclocks(); status.lineClocks = hperiod();
//forcefully sync S-CPU to other processors, in case chips are not communicating //forcefully sync S-CPU to other processors, in case chips are not communicating
synchronizeSMP(); synchronizeSMP();

View File

@ -415,7 +415,7 @@ MISC_CLOCK( 30 )
inline VOICE_CLOCK( V1 ) inline VOICE_CLOCK( V1 )
{ {
m.t_dir_addr = m.t_dir * 0x100 + m.t_srcn * 4; m.t_dir_addr = (m.t_dir * 0x100 + m.t_srcn * 4) & 0xffff;
m.t_srcn = VREG(v->regs,srcn); m.t_srcn = VREG(v->regs,srcn);
} }
inline VOICE_CLOCK( V2 ) inline VOICE_CLOCK( V2 )

View File

@ -100,7 +100,7 @@ auto PPU::main() -> void {
} }
} }
step(lineclocks() - hcounter()); step(hperiod() - hcounter());
} }
auto PPU::scanline() -> void { auto PPU::scanline() -> void {

99
bsnes/sfc/ppu/counter/counter-inline.hpp Normal file → Executable file
View File

@ -1,71 +1,84 @@
//this should only be called by CPU::PPUcounter::tick();
//keeps track of previous counter positions in history table
auto PPUcounter::tick() -> void { auto PPUcounter::tick() -> void {
status.hcounter += 2; //increment by smallest unit of time time.hcounter += 2; //increment by smallest unit of time.
if(status.hcounter == lineclocks()) { if(time.hcounter == hperiod()) {
status.hcounter = 0; last.hperiod = hperiod();
vcounterTick(); time.hcounter = 0;
tickScanline();
}
} }
history.index = (history.index + 1) & 2047;
history.field [history.index] = status.field;
history.vcounter[history.index] = status.vcounter;
history.hcounter[history.index] = status.hcounter;
}
//this should only be called by PPU::PPUcounter::tick(n);
//allows stepping by more than the smallest unit of time
auto PPUcounter::tick(uint clocks) -> void { auto PPUcounter::tick(uint clocks) -> void {
status.hcounter += clocks; time.hcounter += clocks;
if(status.hcounter >= lineclocks()) { if(time.hcounter >= hperiod()) {
status.hcounter -= lineclocks(); last.hperiod = hperiod();
vcounterTick(); time.hcounter -= hperiod();
tickScanline();
} }
} }
//internal auto PPUcounter::tickScanline() -> void {
auto PPUcounter::vcounterTick() -> void { if(++time.vcounter == 128) {
if(++status.vcounter == 128) status.interlace = ppu.interlace(); //it's not important when this is captured: it is only needed at V=240 or V=311.
if(vcounter() == (Region::NTSC() ? 262 : 312) + (interlace() && !field())) { time.interlace = ppu.interlace();
status.vcounter = 0; time.vperiod += interlace() && !field();
status.field ^= 1;
} }
status.lineclocks = 1364;
//NTSC and PAL scanlines rates would not match up with color clocks if every scanline were 1364 clocks if(vcounter() == vperiod()) {
//to offset for this error, NTSC has one short scanline, and PAL has one long scanline last.vperiod = vperiod();
if(Region::NTSC() && interlace() == 0 && field() == 1 && vcounter() == 240) status.lineclocks -= 4; //this may be off by one until V=128, hence why vperiod() is a private function.
if(Region::PAL() && interlace() == 1 && field() == 1 && vcounter() == 311) status.lineclocks += 4; time.vperiod = Region::NTSC() ? 262 : 312;
time.vcounter = 0;
time.field ^= 1;
}
time.hperiod = 1364;
//NTSC and PAL scanline rates would not match up with color clocks if every scanline were 1364 clocks.
//to offset for this error, NTSC has one short scanline, and PAL has one long scanline.
if(Region::NTSC() && interlace() == 0 && field() == 1 && vcounter() == 240) time.hperiod -= 4;
if(Region::PAL() && interlace() == 1 && field() == 1 && vcounter() == 311) time.hperiod += 4;
if(scanline) scanline(); if(scanline) scanline();
} }
auto PPUcounter::interlace() const -> bool { return status.interlace; } auto PPUcounter::interlace() const -> bool { return time.interlace; }
auto PPUcounter::field() const -> bool { return status.field; } auto PPUcounter::field() const -> bool { return time.field; }
auto PPUcounter::vcounter() const -> uint { return status.vcounter; } auto PPUcounter::vcounter() const -> uint { return time.vcounter; }
auto PPUcounter::hcounter() const -> uint { return status.hcounter; } auto PPUcounter::hcounter() const -> uint { return time.hcounter; }
auto PPUcounter::lineclocks() const -> uint { return status.lineclocks; } auto PPUcounter::vperiod() const -> uint { return time.vperiod; }
auto PPUcounter::hperiod() const -> uint { return time.hperiod; }
auto PPUcounter::field(uint offset) const -> bool { return history.field[(history.index - (offset >> 1)) & 2047]; } auto PPUcounter::vcounter(uint offset) const -> uint {
auto PPUcounter::vcounter(uint offset) const -> uint { return history.vcounter[(history.index - (offset >> 1)) & 2047]; } if(offset <= hcounter()) return vcounter();
auto PPUcounter::hcounter(uint offset) const -> uint { return history.hcounter[(history.index - (offset >> 1)) & 2047]; } if(vcounter() > 0) return vcounter() - 1;
return last.vperiod - 1;
}
//one PPU dot = 4 CPU clocks auto PPUcounter::hcounter(uint offset) const -> uint {
if(offset <= hcounter()) return hcounter() - offset;
return hcounter() + last.hperiod - offset;
}
//one PPU dot = 4 CPU clocks.
// //
//PPU dots 323 and 327 are 6 CPU clocks long. //PPU dots 323 and 327 are 6 CPU clocks long.
//this does not apply to NTSC non-interlace scanline 240 on odd fields. this is //this does not apply to NTSC non-interlace scanline 240 on odd fields. this is
//because the PPU skips one dot to alter the color burst phase of the video signal. //because the PPU skips one dot to alter the color burst phase of the video signal.
//it is not known what happens for PAL 1368 clock scanlines.
// //
//dot 323 range = {1292, 1294, 1296} //dot 323 range = {1292, 1294, 1296}
//dot 327 range = {1310, 1312, 1314} //dot 327 range = {1310, 1312, 1314}
auto PPUcounter::hdot() const -> uint { auto PPUcounter::hdot() const -> uint {
if(lineclocks() == 1360) { if(hperiod() == 1360) {
return (hcounter() >> 2); return hcounter() >> 2;
} else { } else {
return (hcounter() - ((hcounter() > 1292) << 1) - ((hcounter() > 1310) << 1)) >> 2; return hcounter() - ((hcounter() > 1292) << 1) - ((hcounter() > 1310) << 1) >> 2;
} }
} }
auto PPUcounter::reset() -> void { auto PPUcounter::reset() -> void {
status = {}; time = {};
history = {}; last = {};
time.vperiod = last.vperiod = Region::NTSC() ? 262 : 312;
time.hperiod = last.hperiod = 1364;
} }

24
bsnes/sfc/ppu/counter/counter.hpp Normal file → Executable file
View File

@ -12,16 +12,17 @@
struct PPUcounter { struct PPUcounter {
alwaysinline auto tick() -> void; alwaysinline auto tick() -> void;
alwaysinline auto tick(uint clocks) -> void; alwaysinline auto tick(uint clocks) -> void; private:
alwaysinline auto tickScanline() -> void; public:
alwaysinline auto interlace() const -> bool; alwaysinline auto interlace() const -> bool;
alwaysinline auto field() const -> bool; alwaysinline auto field() const -> bool;
alwaysinline auto vcounter() const -> uint; alwaysinline auto vcounter() const -> uint;
alwaysinline auto hcounter() const -> uint; alwaysinline auto hcounter() const -> uint;
alwaysinline auto hdot() const -> uint; alwaysinline auto hdot() const -> uint; private:
alwaysinline auto lineclocks() const -> uint; alwaysinline auto vperiod() const -> uint; public:
alwaysinline auto hperiod() const -> uint;
alwaysinline auto field(uint offset) const -> bool;
alwaysinline auto vcounter(uint offset) const -> uint; alwaysinline auto vcounter(uint offset) const -> uint;
alwaysinline auto hcounter(uint offset) const -> uint; alwaysinline auto hcounter(uint offset) const -> uint;
@ -31,20 +32,17 @@ struct PPUcounter {
function<void ()> scanline; function<void ()> scanline;
private: private:
alwaysinline auto vcounterTick() -> void;
struct { struct {
bool interlace = 0; bool interlace = 0;
bool field = 0; bool field = 0;
uint vperiod = 0;
uint hperiod = 0;
uint vcounter = 0; uint vcounter = 0;
uint hcounter = 0; uint hcounter = 0;
uint lineclocks = 1364; } time;
} status;
struct { struct {
uint index = 0; uint vperiod = 0;
bool field[2048] = {}; uint hperiod = 0;
uint vcounter[2048] = {}; } last;
uint hcounter[2048] = {};
} history;
}; };

View File

@ -1,11 +1,11 @@
auto PPUcounter::serialize(serializer& s) -> void { auto PPUcounter::serialize(serializer& s) -> void {
s.integer(status.interlace); s.integer(time.interlace);
s.integer(status.field); s.integer(time.field);
s.integer(status.vcounter); s.integer(time.vperiod);
s.integer(status.hcounter); s.integer(time.hperiod);
s.integer(time.vcounter);
s.integer(time.hcounter);
s.array(history.field); s.integer(last.vperiod);
s.array(history.vcounter); s.integer(last.hperiod);
s.array(history.hcounter);
s.integer(history.index);
} }

View File

@ -90,7 +90,7 @@ auto PPU::main() -> void {
obj.tilefetch(); obj.tilefetch();
} }
step(lineclocks() - hcounter()); step(hperiod() - hcounter());
} }
auto PPU::load() -> bool { auto PPU::load() -> bool {