* ~4.6% speedup (with the fast PPU)
* fix out-of-bounds DSP memory access [Sour]
This commit is contained in:
byuu 2019-08-02 04:45:06 +09:00
parent 9e8913cea0
commit f6303518d5
17 changed files with 138 additions and 117 deletions

View File

@ -31,13 +31,13 @@ using namespace nall;
namespace Emulator {
static const string Name = "bsnes";
static const string Version = "108.3";
static const string Version = "108.4";
static const string Author = "byuu";
static const string License = "GPLv3";
static const string Website = "https://byuu.org";
//incremented only when serialization format changes
static const string SerializerVersion = "108.2";
static const string SerializerVersion = "108.4";
namespace Constants {
namespace Colorburst {

View File

@ -45,7 +45,7 @@ auto WDC65816::instructionLongRead8(alu8 op, r16 I) -> void {
V.l = fetch();
V.h = fetch();
V.b = fetch();
L W.l = read(V.d + I.w + 0);
L W.l = readLong(V.d + I.w + 0);
alu(W.l);
}
@ -53,8 +53,8 @@ auto WDC65816::instructionLongRead16(alu16 op, r16 I) -> void {
V.l = fetch();
V.h = fetch();
V.b = fetch();
W.l = read(V.d + I.w + 0);
L W.h = read(V.d + I.w + 1);
W.l = readLong(V.d + I.w + 0);
L W.h = readLong(V.d + I.w + 1);
alu(W.w);
}
@ -157,7 +157,7 @@ auto WDC65816::instructionIndirectLongRead8(alu8 op, r16 I) -> void {
V.l = readDirectN(U.l + 0);
V.h = readDirectN(U.l + 1);
V.b = readDirectN(U.l + 2);
L W.l = read(V.d + I.w + 0);
L W.l = readLong(V.d + I.w + 0);
alu(W.l);
}
@ -167,8 +167,8 @@ auto WDC65816::instructionIndirectLongRead16(alu16 op, r16 I) -> void {
V.l = readDirectN(U.l + 0);
V.h = readDirectN(U.l + 1);
V.b = readDirectN(U.l + 2);
W.l = read(V.d + I.w + 0);
L W.h = read(V.d + I.w + 1);
W.l = readLong(V.d + I.w + 0);
L W.h = readLong(V.d + I.w + 1);
alu(W.w);
}

View File

@ -30,15 +30,15 @@ auto WDC65816::instructionLongWrite8(r16 I) -> void {
V.l = fetch();
V.h = fetch();
V.b = fetch();
L write(V.d + I.w + 0, A.l);
L writeLong(V.d + I.w + 0, A.l);
}
auto WDC65816::instructionLongWrite16(r16 I) -> void {
V.l = fetch();
V.h = fetch();
V.b = fetch();
write(V.d + I.w + 0, A.l);
L write(V.d + I.w + 1, A.h);
writeLong(V.d + I.w + 0, A.l);
L writeLong(V.d + I.w + 1, A.h);
}
auto WDC65816::instructionDirectWrite8(r16 F) -> void {
@ -130,7 +130,7 @@ auto WDC65816::instructionIndirectLongWrite8(r16 I) -> void {
V.l = readDirectN(U.l + 0);
V.h = readDirectN(U.l + 1);
V.b = readDirectN(U.l + 2);
L write(V.d + I.w + 0, A.l);
L writeLong(V.d + I.w + 0, A.l);
}
auto WDC65816::instructionIndirectLongWrite16(r16 I) -> void {
@ -139,8 +139,8 @@ auto WDC65816::instructionIndirectLongWrite16(r16 I) -> void {
V.l = readDirectN(U.l + 0);
V.h = readDirectN(U.l + 1);
V.b = readDirectN(U.l + 2);
write(V.d + I.w + 0, A.l);
L write(V.d + I.w + 1, A.h);
writeLong(V.d + I.w + 0, A.l);
L writeLong(V.d + I.w + 1, A.h);
}
auto WDC65816::instructionStackWrite8() -> void {

View File

@ -50,31 +50,39 @@ auto WDC65816::pushN(uint8 data) -> void {
}
auto WDC65816::readDirect(uint address) -> uint8 {
if(EF && !D.l) return read(D.w | uint8(address));
return read(uint16(D.w + address));
if(EF && !D.l) return read(D.w | address & 0xff);
return read(D.w + address & 0xffff);
}
auto WDC65816::writeDirect(uint address, uint8 data) -> void {
if(EF && !D.l) return write(D.w | uint8(address), data);
write(uint16(D.w + address), data);
if(EF && !D.l) return write(D.w | address & 0xff, data);
write(D.w + address & 0xffff, data);
}
auto WDC65816::readDirectN(uint address) -> uint8 {
return read(uint16(D.w + address));
return read(D.w + address & 0xffff);
}
auto WDC65816::readBank(uint address) -> uint8 {
return read((B << 16) + address);
return read((B << 16) + address & 0xffffff);
}
auto WDC65816::writeBank(uint address, uint8 data) -> void {
write((B << 16) + address, data);
write((B << 16) + address & 0xffffff, data);
}
auto WDC65816::readLong(uint address) -> uint8 {
return read(address & 0xffffff);
}
auto WDC65816::writeLong(uint address, uint8 data) -> void {
write(address & 0xffffff, data);
}
auto WDC65816::readStack(uint address) -> uint8 {
return read(uint16(S.w + address));
return read(S.w + address & 0xffff);
}
auto WDC65816::writeStack(uint address, uint8 data) -> void {
write(uint16(S.w + address), data);
write(S.w + address & 0xffff, data);
}

View File

@ -10,8 +10,8 @@ struct WDC65816 {
virtual auto idle() -> void = 0;
virtual auto idleBranch() -> void {}
virtual auto idleJump() -> void {}
virtual auto read(uint24 addr) -> uint8 = 0;
virtual auto write(uint24 addr, uint8 data) -> void = 0;
virtual auto read(uint addr) -> uint8 = 0;
virtual auto write(uint addr, uint8 data) -> void = 0;
virtual auto lastCycle() -> void = 0;
virtual auto interruptPending() const -> bool = 0;
virtual auto interrupt() -> void;
@ -44,22 +44,24 @@ struct WDC65816 {
auto power() -> void;
//memory.cpp
inline auto idleIRQ() -> void;
inline auto idle2() -> void;
inline auto idle4(uint16 x, uint16 y) -> void;
inline auto idle6(uint16 address) -> void;
inline auto fetch() -> uint8;
inline auto pull() -> uint8;
auto push(uint8 data) -> void;
inline auto pullN() -> uint8;
inline auto pushN(uint8 data) -> void;
inline auto readDirect(uint address) -> uint8;
inline auto writeDirect(uint address, uint8 data) -> void;
inline auto readDirectN(uint address) -> uint8;
inline auto readBank(uint address) -> uint8;
inline auto writeBank(uint address, uint8 data) -> void;
inline auto readStack(uint address) -> uint8;
inline auto writeStack(uint address, uint8 data) -> void;
alwaysinline auto idleIRQ() -> void;
alwaysinline auto idle2() -> void;
alwaysinline auto idle4(uint16 x, uint16 y) -> void;
alwaysinline auto idle6(uint16 address) -> void;
alwaysinline auto fetch() -> uint8;
alwaysinline auto pull() -> uint8;
auto push(uint8 data) -> void;
alwaysinline auto pullN() -> uint8;
alwaysinline auto pushN(uint8 data) -> void;
alwaysinline auto readDirect(uint address) -> uint8;
alwaysinline auto writeDirect(uint address, uint8 data) -> void;
alwaysinline auto readDirectN(uint address) -> uint8;
alwaysinline auto readBank(uint address) -> uint8;
alwaysinline auto writeBank(uint address, uint8 data) -> void;
alwaysinline auto readLong(uint address) -> uint8;
alwaysinline auto writeLong(uint address, uint8 data) -> void;
alwaysinline auto readStack(uint address) -> uint8;
alwaysinline auto writeStack(uint address, uint8 data) -> void;
//algorithms.cpp
using alu8 = auto (WDC65816::*)( uint8) -> uint8;

View File

@ -18,7 +18,7 @@ auto SA1::idleBranch() -> void {
if(r.pc.d & 1) idleJump();
}
auto SA1::read(uint24 address) -> uint8 {
auto SA1::read(uint address) -> uint8 {
r.mar = address;
uint8 data = r.mdr;
@ -62,7 +62,7 @@ auto SA1::read(uint24 address) -> uint8 {
return data;
}
auto SA1::write(uint24 address, uint8 data) -> void {
auto SA1::write(uint address, uint8 data) -> void {
r.mar = address;
r.mdr = data;

View File

@ -38,8 +38,8 @@ struct SA1 : Processor::WDC65816, Thread {
alwaysinline auto idle() -> void override;
alwaysinline auto idleJump() -> void override;
alwaysinline auto idleBranch() -> void override;
alwaysinline auto read(uint24 address) -> uint8 override;
alwaysinline auto write(uint24 address, uint8 data) -> void override;
alwaysinline auto read(uint address) -> uint8 override;
alwaysinline auto write(uint address, uint8 data) -> void override;
auto readVBR(uint address, uint8 data = 0) -> uint8;
auto readDisassembler(uint address) -> uint8 override;

View File

@ -109,7 +109,7 @@ auto CPU::power(bool reset) -> void {
alu = {};
status = {};
status.lineClocks = lineclocks();
status.lineClocks = hperiod();
status.dramRefreshPosition = (version == 1 ? 530 : 538);
status.hdmaSetupPosition = (version == 1 ? 12 + 8 - dmaCounter() : 12 + dmaCounter());
status.hdmaPosition = 1104;

View File

@ -25,8 +25,8 @@ struct CPU : Processor::WDC65816, Thread, PPUcounter {
//memory.cpp
auto idle() -> void override;
auto read(uint24 addr) -> uint8 override;
auto write(uint24 addr, uint8 data) -> void override;
auto read(uint addr) -> uint8 override;
auto write(uint addr, uint8 data) -> void override;
auto readDisassembler(uint addr) -> uint8 override;
//io.cpp

View File

@ -6,7 +6,7 @@ auto CPU::idle() -> void {
aluEdge();
}
auto CPU::read(uint24 address) -> uint8 {
auto CPU::read(uint address) -> uint8 {
status.irqLock = false;
if(address & 0x408000) {
@ -46,7 +46,7 @@ auto CPU::read(uint24 address) -> uint8 {
return data;
}
auto CPU::write(uint24 address, uint8 data) -> void {
auto CPU::write(uint address, uint8 data) -> void {
status.irqLock = false;
aluEdge();

View File

@ -82,7 +82,7 @@ auto CPU::step(uint clocks) -> void {
//called by ppu.tick() when Hcounter=0
auto CPU::scanline() -> void {
status.lineClocks = lineclocks();
status.lineClocks = hperiod();
//forcefully sync S-CPU to other processors, in case chips are not communicating
synchronizeSMP();

View File

@ -415,7 +415,7 @@ MISC_CLOCK( 30 )
inline VOICE_CLOCK( V1 )
{
m.t_dir_addr = m.t_dir * 0x100 + m.t_srcn * 4;
m.t_dir_addr = (m.t_dir * 0x100 + m.t_srcn * 4) & 0xffff;
m.t_srcn = VREG(v->regs,srcn);
}
inline VOICE_CLOCK( V2 )

View File

@ -100,7 +100,7 @@ auto PPU::main() -> void {
}
}
step(lineclocks() - hcounter());
step(hperiod() - hcounter());
}
auto PPU::scanline() -> void {

97
bsnes/sfc/ppu/counter/counter-inline.hpp Normal file → Executable file
View File

@ -1,71 +1,84 @@
//this should only be called by CPU::PPUcounter::tick();
//keeps track of previous counter positions in history table
auto PPUcounter::tick() -> void {
status.hcounter += 2; //increment by smallest unit of time
if(status.hcounter == lineclocks()) {
status.hcounter = 0;
vcounterTick();
time.hcounter += 2; //increment by smallest unit of time.
if(time.hcounter == hperiod()) {
last.hperiod = hperiod();
time.hcounter = 0;
tickScanline();
}
history.index = (history.index + 1) & 2047;
history.field [history.index] = status.field;
history.vcounter[history.index] = status.vcounter;
history.hcounter[history.index] = status.hcounter;
}
//this should only be called by PPU::PPUcounter::tick(n);
//allows stepping by more than the smallest unit of time
auto PPUcounter::tick(uint clocks) -> void {
status.hcounter += clocks;
if(status.hcounter >= lineclocks()) {
status.hcounter -= lineclocks();
vcounterTick();
time.hcounter += clocks;
if(time.hcounter >= hperiod()) {
last.hperiod = hperiod();
time.hcounter -= hperiod();
tickScanline();
}
}
//internal
auto PPUcounter::vcounterTick() -> void {
if(++status.vcounter == 128) status.interlace = ppu.interlace();
if(vcounter() == (Region::NTSC() ? 262 : 312) + (interlace() && !field())) {
status.vcounter = 0;
status.field ^= 1;
auto PPUcounter::tickScanline() -> void {
if(++time.vcounter == 128) {
//it's not important when this is captured: it is only needed at V=240 or V=311.
time.interlace = ppu.interlace();
time.vperiod += interlace() && !field();
}
status.lineclocks = 1364;
//NTSC and PAL scanlines rates would not match up with color clocks if every scanline were 1364 clocks
//to offset for this error, NTSC has one short scanline, and PAL has one long scanline
if(Region::NTSC() && interlace() == 0 && field() == 1 && vcounter() == 240) status.lineclocks -= 4;
if(Region::PAL() && interlace() == 1 && field() == 1 && vcounter() == 311) status.lineclocks += 4;
if(vcounter() == vperiod()) {
last.vperiod = vperiod();
//this may be off by one until V=128, hence why vperiod() is a private function.
time.vperiod = Region::NTSC() ? 262 : 312;
time.vcounter = 0;
time.field ^= 1;
}
time.hperiod = 1364;
//NTSC and PAL scanline rates would not match up with color clocks if every scanline were 1364 clocks.
//to offset for this error, NTSC has one short scanline, and PAL has one long scanline.
if(Region::NTSC() && interlace() == 0 && field() == 1 && vcounter() == 240) time.hperiod -= 4;
if(Region::PAL() && interlace() == 1 && field() == 1 && vcounter() == 311) time.hperiod += 4;
if(scanline) scanline();
}
auto PPUcounter::interlace() const -> bool { return status.interlace; }
auto PPUcounter::field() const -> bool { return status.field; }
auto PPUcounter::vcounter() const -> uint { return status.vcounter; }
auto PPUcounter::hcounter() const -> uint { return status.hcounter; }
auto PPUcounter::lineclocks() const -> uint { return status.lineclocks; }
auto PPUcounter::interlace() const -> bool { return time.interlace; }
auto PPUcounter::field() const -> bool { return time.field; }
auto PPUcounter::vcounter() const -> uint { return time.vcounter; }
auto PPUcounter::hcounter() const -> uint { return time.hcounter; }
auto PPUcounter::vperiod() const -> uint { return time.vperiod; }
auto PPUcounter::hperiod() const -> uint { return time.hperiod; }
auto PPUcounter::field(uint offset) const -> bool { return history.field[(history.index - (offset >> 1)) & 2047]; }
auto PPUcounter::vcounter(uint offset) const -> uint { return history.vcounter[(history.index - (offset >> 1)) & 2047]; }
auto PPUcounter::hcounter(uint offset) const -> uint { return history.hcounter[(history.index - (offset >> 1)) & 2047]; }
auto PPUcounter::vcounter(uint offset) const -> uint {
if(offset <= hcounter()) return vcounter();
if(vcounter() > 0) return vcounter() - 1;
return last.vperiod - 1;
}
//one PPU dot = 4 CPU clocks
auto PPUcounter::hcounter(uint offset) const -> uint {
if(offset <= hcounter()) return hcounter() - offset;
return hcounter() + last.hperiod - offset;
}
//one PPU dot = 4 CPU clocks.
//
//PPU dots 323 and 327 are 6 CPU clocks long.
//this does not apply to NTSC non-interlace scanline 240 on odd fields. this is
//because the PPU skips one dot to alter the color burst phase of the video signal.
//it is not known what happens for PAL 1368 clock scanlines.
//
//dot 323 range = {1292, 1294, 1296}
//dot 327 range = {1310, 1312, 1314}
auto PPUcounter::hdot() const -> uint {
if(lineclocks() == 1360) {
return (hcounter() >> 2);
if(hperiod() == 1360) {
return hcounter() >> 2;
} else {
return (hcounter() - ((hcounter() > 1292) << 1) - ((hcounter() > 1310) << 1)) >> 2;
return hcounter() - ((hcounter() > 1292) << 1) - ((hcounter() > 1310) << 1) >> 2;
}
}
auto PPUcounter::reset() -> void {
status = {};
history = {};
time = {};
last = {};
time.vperiod = last.vperiod = Region::NTSC() ? 262 : 312;
time.hperiod = last.hperiod = 1364;
}

24
bsnes/sfc/ppu/counter/counter.hpp Normal file → Executable file
View File

@ -12,16 +12,17 @@
struct PPUcounter {
alwaysinline auto tick() -> void;
alwaysinline auto tick(uint clocks) -> void;
alwaysinline auto tick(uint clocks) -> void; private:
alwaysinline auto tickScanline() -> void; public:
alwaysinline auto interlace() const -> bool;
alwaysinline auto field() const -> bool;
alwaysinline auto vcounter() const -> uint;
alwaysinline auto hcounter() const -> uint;
alwaysinline auto hdot() const -> uint;
alwaysinline auto lineclocks() const -> uint;
alwaysinline auto hdot() const -> uint; private:
alwaysinline auto vperiod() const -> uint; public:
alwaysinline auto hperiod() const -> uint;
alwaysinline auto field(uint offset) const -> bool;
alwaysinline auto vcounter(uint offset) const -> uint;
alwaysinline auto hcounter(uint offset) const -> uint;
@ -31,20 +32,17 @@ struct PPUcounter {
function<void ()> scanline;
private:
alwaysinline auto vcounterTick() -> void;
struct {
bool interlace = 0;
bool field = 0;
uint vperiod = 0;
uint hperiod = 0;
uint vcounter = 0;
uint hcounter = 0;
uint lineclocks = 1364;
} status;
} time;
struct {
uint index = 0;
bool field[2048] = {};
uint vcounter[2048] = {};
uint hcounter[2048] = {};
} history;
uint vperiod = 0;
uint hperiod = 0;
} last;
};

View File

@ -1,11 +1,11 @@
auto PPUcounter::serialize(serializer& s) -> void {
s.integer(status.interlace);
s.integer(status.field);
s.integer(status.vcounter);
s.integer(status.hcounter);
s.integer(time.interlace);
s.integer(time.field);
s.integer(time.vperiod);
s.integer(time.hperiod);
s.integer(time.vcounter);
s.integer(time.hcounter);
s.array(history.field);
s.array(history.vcounter);
s.array(history.hcounter);
s.integer(history.index);
s.integer(last.vperiod);
s.integer(last.hperiod);
}

View File

@ -90,7 +90,7 @@ auto PPU::main() -> void {
obj.tilefetch();
}
step(lineclocks() - hcounter());
step(hperiod() - hcounter());
}
auto PPU::load() -> bool {