From a3e0f6da251aef0da53e7b2b6b14ba9d12eb3bc2 Mon Sep 17 00:00:00 2001 From: Tim Allen Date: Mon, 3 Sep 2018 00:06:41 +1000 Subject: [PATCH] Update to v106r60 release. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit byuu says: I added (imperfect) memory conflict timing to the SA1. Before: - WRAM↔↔ROM ran 7% too fast - ROM↔↔ROM ran 100% too fast - WRAM↔↔IRAM ran 7% too fast - ROM↔↔IRAM ran 7% too fast - IRAM↔↔IRAM ran 287% too fast - BWRAM↔↔BWRAM ran 100% too fast - HDMA ROM↔↔ROM ran 15% too fast - HDMA WRAM↔↔ROM ran 15% too fast - DMA ROM↔↔ROM ran 100% too fast After: - ROM↔↔ROM runs 14% too fast - HDMA WRAM↔↔ROM runs 7% too fast - DMA ROM↔↔ROM runs 4% too fast If you enable this with the fast PPU + DSP, your framerate in SA1 games will drop by 51%. And even if you disable it, you'll still lose 9% speed in SA1 games, and 2% speed in non-SA1 games, because of changes needed to make this support possible. By default, I'm leaving this off. Compile with `-DACCURATE_SA1` (or uncomment the line in sfc/sfc.hpp) if you want to try it out. This'll almost certainly cause some SA1 regressions, so I guess we'll tackle those as they arise. --- higan/emulator/emulator.hpp | 2 +- higan/gb/apu/apu.cpp | 4 +- higan/processor/wdc65816/instructions-pc.cpp | 16 +- higan/processor/wdc65816/memory.cpp | 4 +- higan/processor/wdc65816/serialization.cpp | 2 + higan/processor/wdc65816/wdc65816.cpp | 4 +- higan/processor/wdc65816/wdc65816.hpp | 6 +- higan/sfc/cartridge/cartridge.hpp | 4 +- higan/sfc/cartridge/load.cpp | 8 +- higan/sfc/cartridge/save.cpp | 2 +- higan/sfc/coprocessor/sa1/bus.cpp | 30 -- higan/sfc/coprocessor/sa1/bwram.cpp | 120 +++++++ higan/sfc/coprocessor/sa1/dma.cpp | 90 ++--- higan/sfc/coprocessor/sa1/io.cpp | 16 +- higan/sfc/coprocessor/sa1/iram.cpp | 33 ++ higan/sfc/coprocessor/sa1/memory.cpp | 337 +++++++------------ higan/sfc/coprocessor/sa1/rom.cpp | 74 ++++ higan/sfc/coprocessor/sa1/sa1.cpp | 27 +- higan/sfc/coprocessor/sa1/sa1.hpp | 92 +++-- higan/sfc/coprocessor/sa1/serialization.cpp | 6 +- higan/sfc/coprocessor/superfx/bus.cpp | 8 + higan/sfc/coprocessor/superfx/superfx.hpp | 14 +- higan/sfc/cpu/cpu.hpp | 12 +- higan/sfc/cpu/dma.cpp | 63 ++-- higan/sfc/cpu/memory.cpp | 31 +- higan/sfc/cpu/timing.cpp | 10 +- higan/sfc/memory/memory-inline.hpp | 4 - higan/sfc/memory/memory.hpp | 13 +- higan/sfc/sfc.hpp | 2 + higan/sfc/slot/bsmemory/bsmemory.cpp | 4 + higan/sfc/slot/bsmemory/bsmemory.hpp | 7 +- nall/arithmetic.hpp | 9 + nall/arithmetic/unsigned.hpp | 22 +- nall/array-span.hpp | 84 +++++ nall/array-view.hpp | 78 ++++- nall/beat/archive/archive.hpp | 272 +++++++++++++++ nall/cipher/chacha20.hpp | 55 ++- nall/encode/base.hpp | 5 + nall/inode.hpp | 44 ++- nall/nall.hpp | 1 + nall/platform.hpp | 8 - nall/random.hpp | 139 +++++++- nall/string.hpp | 2 + nall/suffix-array.hpp | 255 ++++++++------ nall/vector.hpp | 6 +- nall/vector/core.hpp | 4 + nall/vector/specialization/uint8_t.hpp | 18 + ruby/input/joypad/directinput.cpp | 2 +- ruby/input/joypad/udev.cpp | 2 +- 49 files changed, 1448 insertions(+), 603 deletions(-) delete mode 100644 higan/sfc/coprocessor/sa1/bus.cpp create mode 100644 higan/sfc/coprocessor/sa1/bwram.cpp create mode 100644 higan/sfc/coprocessor/sa1/iram.cpp create mode 100644 higan/sfc/coprocessor/sa1/rom.cpp create mode 100644 nall/array-span.hpp create mode 100644 nall/beat/archive/archive.hpp create mode 100644 nall/vector/specialization/uint8_t.hpp diff --git a/higan/emulator/emulator.hpp b/higan/emulator/emulator.hpp index 267d2bb3..14b45d1e 100644 --- a/higan/emulator/emulator.hpp +++ b/higan/emulator/emulator.hpp @@ -28,7 +28,7 @@ using namespace nall; namespace Emulator { static const string Name = "higan"; - static const string Version = "106.59"; + static const string Version = "106.60"; static const string Author = "byuu"; static const string License = "GPLv3"; static const string Website = "https://byuu.org/"; diff --git a/higan/gb/apu/apu.cpp b/higan/gb/apu/apu.cpp index a91afb1a..ec3e5170 100644 --- a/higan/gb/apu/apu.cpp +++ b/higan/gb/apu/apu.cpp @@ -67,8 +67,8 @@ auto APU::power() -> void { phase = 0; cycle = 0; - LinearFeedbackShiftRegisterGenerator r; - for(auto& n : wave.pattern) n = r(); + PRNG prng; + for(auto& n : wave.pattern) n = prng.random(); } auto APU::readIO(uint16 addr) -> uint8 { diff --git a/higan/processor/wdc65816/instructions-pc.cpp b/higan/processor/wdc65816/instructions-pc.cpp index 0d29c890..0603927c 100644 --- a/higan/processor/wdc65816/instructions-pc.cpp +++ b/higan/processor/wdc65816/instructions-pc.cpp @@ -7,20 +7,24 @@ L fetch(); idle6(absolute); L idle(); aa(PC) = absolute; + idleBranch(); } } auto WDC65816::instructionBranchLong() -> void { uint16 displacement = fetch(); hi(displacement) = fetch(); + uint16 absolute = PC + (int16)displacement; L idle(); - aa(PC) = PC + (int16)displacement; + aa(PC) = absolute; + idleBranch(); } auto WDC65816::instructionJumpShort() -> void { uint16 data = fetch(); L hi(data) = fetch(); aa(PC) = data; + idleJump(); } auto WDC65816::instructionJumpLong() -> void { @@ -28,6 +32,7 @@ auto WDC65816::instructionJumpLong() -> void { hi(data) = fetch(); L db(data) = fetch(); PC = data; + idleJump(); } auto WDC65816::instructionJumpIndirect() -> void { @@ -36,6 +41,7 @@ auto WDC65816::instructionJumpIndirect() -> void { uint16 data = read(uint16(absolute + 0)); L hi(data) = read(uint16(absolute + 1)); aa(PC) = data; + idleJump(); } auto WDC65816::instructionJumpIndexedIndirect() -> void { @@ -45,6 +51,7 @@ auto WDC65816::instructionJumpIndexedIndirect() -> void { uint16 data = read(db(PC) << 16 | uint16(absolute + X + 0)); L hi(data) = read(db(PC) << 16 | uint16(absolute + X + 1)); aa(PC) = data; + idleJump(); } auto WDC65816::instructionJumpIndirectLong() -> void { @@ -54,6 +61,7 @@ auto WDC65816::instructionJumpIndirectLong() -> void { hi(data) = read(uint16(absolute + 1)); L db(data) = read(uint16(absolute + 2)); PC = data; + idleJump(); } auto WDC65816::instructionCallShort() -> void { @@ -64,6 +72,7 @@ auto WDC65816::instructionCallShort() -> void { push(hi(PC)); L push(lo(PC)); aa(PC) = data; + idleJump(); } auto WDC65816::instructionCallLong() -> void { @@ -77,6 +86,7 @@ auto WDC65816::instructionCallLong() -> void { L pushN(lo(PC)); PC = data; E hi(S) = 0x01; + idleJump(); } auto WDC65816::instructionCallIndexedIndirect() -> void { @@ -89,6 +99,7 @@ auto WDC65816::instructionCallIndexedIndirect() -> void { L hi(data) = read(db(PC) << 16 | uint16(absolute + X + 1)); aa(PC) = data; E hi(S) = 0x01; + idleJump(); } auto WDC65816::instructionReturnInterrupt() -> void { @@ -104,6 +115,7 @@ E XF = 1, MF = 1; hi(PC) = pull(); L db(PC) = pull(); } + idleJump(); } auto WDC65816::instructionReturnShort() -> void { @@ -114,6 +126,7 @@ auto WDC65816::instructionReturnShort() -> void { L idle(); aa(PC) = data; aa(PC)++; + idleJump(); } auto WDC65816::instructionReturnLong() -> void { @@ -125,4 +138,5 @@ L db(data) = pullN(); PC = data; aa(PC)++; E hi(S) = 0x01; + idleJump(); } diff --git a/higan/processor/wdc65816/memory.cpp b/higan/processor/wdc65816/memory.cpp index b6b3fd7e..9c77bf70 100644 --- a/higan/processor/wdc65816/memory.cpp +++ b/higan/processor/wdc65816/memory.cpp @@ -23,8 +23,8 @@ auto WDC65816::idle4(uint16 x, uint16 y) -> void { if(!XF || hi(x) != hi(y)) idle(); } -auto WDC65816::idle6(uint16 addr) -> void { - if(EF && hi(PC) != hi(addr)) idle(); +auto WDC65816::idle6(uint16 address) -> void { + if(EF && hi(PC) != hi(address)) idle(); } auto WDC65816::fetch() -> uint8 { diff --git a/higan/processor/wdc65816/serialization.cpp b/higan/processor/wdc65816/serialization.cpp index 6ce48002..3b9bff3d 100644 --- a/higan/processor/wdc65816/serialization.cpp +++ b/higan/processor/wdc65816/serialization.cpp @@ -22,6 +22,8 @@ auto WDC65816::serialize(serializer& s) -> void { s.integer(r.irq); s.integer(r.wai); s.integer(r.stp); + s.integer(r.rwb); + s.integer(r.mar); s.integer(r.mdr); s.integer(r.vector); } diff --git a/higan/processor/wdc65816/wdc65816.cpp b/higan/processor/wdc65816/wdc65816.cpp index 1a6f25ce..51644adf 100644 --- a/higan/processor/wdc65816/wdc65816.cpp +++ b/higan/processor/wdc65816/wdc65816.cpp @@ -54,9 +54,11 @@ auto WDC65816::power() -> void { P = 0x34; EF = 1; - r.mdr = 0x00; r.wai = false; r.stp = false; + r.rwb = false; + r.mar = 0x000000; + r.mdr = 0x00; r.vector = 0xfffc; //reset vector address } diff --git a/higan/processor/wdc65816/wdc65816.hpp b/higan/processor/wdc65816/wdc65816.hpp index dbde218c..7a0c06dd 100644 --- a/higan/processor/wdc65816/wdc65816.hpp +++ b/higan/processor/wdc65816/wdc65816.hpp @@ -8,6 +8,8 @@ namespace Processor { struct WDC65816 { virtual auto idle() -> void = 0; + virtual auto idleBranch() -> void {} + virtual auto idleJump() -> void {} virtual auto read(uint24 addr) -> uint8 = 0; virtual auto write(uint24 addr, uint8 data) -> void = 0; virtual auto lastCycle() -> void = 0; @@ -24,7 +26,7 @@ struct WDC65816 { inline auto idleIRQ() -> void; inline auto idle2() -> void; inline auto idle4(uint16 x, uint16 y) -> void; - inline auto idle6(uint16 addr) -> void; + inline auto idle6(uint16 address) -> void; inline auto fetch() -> uint8; inline auto pull() -> uint8; auto push(uint8 data) -> void; @@ -250,6 +252,8 @@ struct WDC65816 { bool irq = false; //IRQ pin (0 = low, 1 = trigger) bool wai = false; //raised during wai, cleared after interrupt triggered bool stp = false; //raised during stp, never cleared + bool rwb = false; //read/write pin + uint24 mar; //memory address register uint8 mdr; //memory data register uint16 vector; //interrupt vector address } r; diff --git a/higan/sfc/cartridge/cartridge.hpp b/higan/sfc/cartridge/cartridge.hpp index 93c83f0e..5d937849 100644 --- a/higan/sfc/cartridge/cartridge.hpp +++ b/higan/sfc/cartridge/cartridge.hpp @@ -66,7 +66,7 @@ private: auto loadCartridgeSufamiTurboA(Markup::Node) -> void; auto loadCartridgeSufamiTurboB(Markup::Node) -> void; - auto loadMemory(MappedRAM&, Markup::Node, bool required) -> void; + auto loadMemory(Memory&, Markup::Node, bool required) -> void; auto loadMap(Markup::Node, SuperFamicom::Memory&) -> void; auto loadMap(Markup::Node, const function&, const function&) -> void; @@ -99,7 +99,7 @@ private: auto saveCartridgeSufamiTurboA(Markup::Node) -> void; auto saveCartridgeSufamiTurboB(Markup::Node) -> void; - auto saveMemory(MappedRAM&, Markup::Node) -> void; + auto saveMemory(Memory&, Markup::Node) -> void; auto saveRAM(Markup::Node) -> void; auto saveMCC(Markup::Node) -> void; diff --git a/higan/sfc/cartridge/load.cpp b/higan/sfc/cartridge/load.cpp index f94074ff..0765926e 100644 --- a/higan/sfc/cartridge/load.cpp +++ b/higan/sfc/cartridge/load.cpp @@ -119,7 +119,7 @@ auto Cartridge::loadCartridgeSufamiTurboB(Markup::Node node) -> void { // -auto Cartridge::loadMemory(MappedRAM& ram, Markup::Node node, bool required) -> void { +auto Cartridge::loadMemory(Memory& ram, Markup::Node node, bool required) -> void { if(auto memory = game.memory(node)) { ram.allocate(memory->size); if(memory->type == "RAM" && !memory->nonVolatile) return; @@ -306,7 +306,7 @@ auto Cartridge::loadSA1(Markup::Node node) -> void { if(auto mcu = node["mcu"]) { for(auto map : mcu.find("map")) { - loadMap(map, {&SA1::mmcromRead, &sa1}, {&SA1::mmcromWrite, &sa1}); + loadMap(map, {&SA1::ROM::readCPU, &sa1.rom}, {&SA1::ROM::writeCPU, &sa1.rom}); } if(auto memory = mcu["memory(type=ROM,content=Program)"]) { loadMemory(sa1.rom, memory, File::Required); @@ -319,14 +319,14 @@ auto Cartridge::loadSA1(Markup::Node node) -> void { if(auto memory = node["memory(type=RAM,content=Save)"]) { loadMemory(sa1.bwram, memory, File::Optional); for(auto map : memory.find("map")) { - loadMap(map, {&SA1::mmcbwramRead, &sa1}, {&SA1::mmcbwramWrite, &sa1}); + loadMap(map, {&SA1::BWRAM::readCPU, &sa1.bwram}, {&SA1::BWRAM::writeCPU, &sa1.bwram}); } } if(auto memory = node["memory(type=RAM,content=Internal)"]) { loadMemory(sa1.iram, memory, File::Optional); for(auto map : memory.find("map")) { - loadMap(map, sa1.cpuiram); + loadMap(map, {&SA1::IRAM::readCPU, &sa1.iram}, {&SA1::IRAM::writeCPU, &sa1.iram}); } } } diff --git a/higan/sfc/cartridge/save.cpp b/higan/sfc/cartridge/save.cpp index 7c35173d..bd547200 100644 --- a/higan/sfc/cartridge/save.cpp +++ b/higan/sfc/cartridge/save.cpp @@ -46,7 +46,7 @@ auto Cartridge::saveCartridgeSufamiTurboB(Markup::Node node) -> void { // -auto Cartridge::saveMemory(MappedRAM& ram, Markup::Node node) -> void { +auto Cartridge::saveMemory(Memory& ram, Markup::Node node) -> void { if(auto memory = game.memory(node)) { if(memory->type == "RAM" && !memory->nonVolatile) return; if(memory->type == "RTC" && !memory->nonVolatile) return; diff --git a/higan/sfc/coprocessor/sa1/bus.cpp b/higan/sfc/coprocessor/sa1/bus.cpp deleted file mode 100644 index 02861f30..00000000 --- a/higan/sfc/coprocessor/sa1/bus.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//ROM / RAM access from the S-CPU - -auto SA1::CPUIRAM::size() const -> uint { - return sa1.iram.size(); -} - -auto SA1::CPUIRAM::read(uint24 addr, uint8) -> uint8 { - cpu.synchronize(sa1); - return sa1.iram.read(addr & 0x07ff); -} - -auto SA1::CPUIRAM::write(uint24 addr, uint8 data) -> void { - cpu.synchronize(sa1); - sa1.iram.write(addr & 0x07ff, data); -} - -auto SA1::CPUBWRAM::size() const -> uint { - return sa1.bwram.size(); -} - -auto SA1::CPUBWRAM::read(uint24 addr, uint8) -> uint8 { - cpu.synchronize(sa1); - if(dma) return sa1.dmaCC1Read(addr); - return sa1.bwram.read(addr); -} - -auto SA1::CPUBWRAM::write(uint24 addr, uint8 data) -> void { - cpu.synchronize(sa1); - sa1.bwram.write(addr, data); -} diff --git a/higan/sfc/coprocessor/sa1/bwram.cpp b/higan/sfc/coprocessor/sa1/bwram.cpp new file mode 100644 index 00000000..e4a23dd8 --- /dev/null +++ b/higan/sfc/coprocessor/sa1/bwram.cpp @@ -0,0 +1,120 @@ +auto SA1::BWRAM::conflict() const -> bool { + if(!cpu.r.rwb) return false; + if((cpu.r.mar & 0x40e000) == 0x006000) return true; //00-3f,80-bf:6000-7fff + if((cpu.r.mar & 0xf00000) == 0x400000) return true; //40-4f:0000-ffff + return false; +} + +auto SA1::BWRAM::read(uint24 address, uint8 data) -> uint8 { + if(!size()) return data; + address = bus.mirror(address, size()); + return _data[address]; +} + +auto SA1::BWRAM::write(uint24 address, uint8 data) -> void { + if(!size()) return; + address = bus.mirror(address, size()); + _data[address] = data; +} + +//note: addresses are translated prior to invoking this function: +//00-3f,80-bf:6000-7fff size=0x2000 => 00:0000-1fff +//40-4f:0000-ffff => untranslated +auto SA1::BWRAM::readCPU(uint24 address, uint8 data) -> uint8 { + cpu.synchronize(sa1); + + if(address < 0x2000) { //$00-3f,80-bf:6000-7fff + address = sa1.mmio.sbm * 0x2000 + (address & 0x1fff); + } + + if(dma) return sa1.dmaCC1Read(address); + return read(address, data); +} + +auto SA1::BWRAM::writeCPU(uint24 address, uint8 data) -> void { + cpu.synchronize(sa1); + + if(address < 0x2000) { //$00-3f,80-bf:6000-7fff + address = sa1.mmio.sbm * 0x2000 + (address & 0x1fff); + } + + return write(address, data); +} + +auto SA1::BWRAM::readSA1(uint24 address, uint8 data) -> uint8 { + if(sa1.mmio.sw46 == 0) { + //$40-43:0000-ffff x 32 projection + address = (sa1.mmio.cbm & 0x1f) * 0x2000 + (address & 0x1fff); + return readLinear(address, data); + } else { + //$60-6f:0000-ffff x 128 projection + address = sa1.mmio.cbm * 0x2000 + (address & 0x1fff); + return readBitmap(address, data); + } +} + +auto SA1::BWRAM::writeSA1(uint24 address, uint8 data) -> void { + if(sa1.mmio.sw46 == 0) { + //$40-43:0000-ffff x 32 projection + address = (sa1.mmio.cbm & 0x1f) * 0x2000 + (address & 0x1fff); + return writeLinear(address, data); + } else { + //$60-6f:0000-ffff x 128 projection + address = sa1.mmio.cbm * 0x2000 + (address & 0x1fff); + return writeBitmap(address, data); + } +} + +auto SA1::BWRAM::readLinear(uint24 address, uint8 data) -> uint8 { + return read(address, data); +} + +auto SA1::BWRAM::writeLinear(uint24 address, uint8 data) -> void { + return write(address, data); +} + +auto SA1::BWRAM::readBitmap(uint20 address, uint8 data) -> uint8 { + if(sa1.mmio.bbf == 0) { + //4bpp + uint shift = address & 1; + address >>= 1; + switch(shift) { + case 0: return read(address).bits(0,3); + case 1: return read(address).bits(4,7); + } + } else { + //2bpp + uint shift = address & 3; + address >>= 2; + switch(shift) { + case 0: return read(address).bits(0,1); + case 1: return read(address).bits(2,3); + case 2: return read(address).bits(4,5); + case 3: return read(address).bits(6,7); + } + } + unreachable; +} + +auto SA1::BWRAM::writeBitmap(uint20 address, uint8 data) -> void { + if(sa1.mmio.bbf == 0) { + //4bpp + uint shift = address & 1; + address >>= 1; + switch(shift) { + case 0: data = read(address) & 0xf0 | data.bits(0,3) << 0; break; + case 1: data = read(address) & 0x0f | data.bits(0,3) << 4; break; + } + } else { + //2bpp + uint shift = address & 3; + address >>= 2; + switch(shift) { + case 0: data = read(address) & 0xfc | data.bits(0,1) << 0; break; + case 1: data = read(address) & 0xf3 | data.bits(0,1) << 2; break; + case 2: data = read(address) & 0xcf | data.bits(0,1) << 4; break; + case 3: data = read(address) & 0x3f | data.bits(0,1) << 6; break; + } + } + write(address, data); +} diff --git a/higan/sfc/coprocessor/sa1/dma.cpp b/higan/sfc/coprocessor/sa1/dma.cpp index 944d9a3c..7252fba2 100644 --- a/higan/sfc/coprocessor/sa1/dma.cpp +++ b/higan/sfc/coprocessor/sa1/dma.cpp @@ -1,45 +1,32 @@ -//==================== //direct data transfer -//==================== - auto SA1::dmaNormal() -> void { while(mmio.dtc--) { uint8 data = r.mdr; - uint32 dsa = mmio.dsa++; - uint32 dda = mmio.dda++; + uint24 source = mmio.dsa++; + uint16 target = mmio.dda++; - //source and destination cannot be the same - if(mmio.sd == DMA::SourceBWRAM && mmio.dd == DMA::DestBWRAM) continue; - if(mmio.sd == DMA::SourceIRAM && mmio.dd == DMA::DestIRAM ) continue; - - switch(mmio.sd) { - case DMA::SourceROM: - if((dsa & 0x408000) == 0x008000 || (dsa & 0xc00000) == 0xc00000) { - data = busRead(dsa, data); - } - break; - - case DMA::SourceBWRAM: - if((dsa & 0x40e000) == 0x006000 || (dsa & 0xf00000) == 0x400000) { - data = busRead(dsa, data); - } - break; - - case DMA::SourceIRAM: - data = iram.read(dsa & 0x07ff); - break; + if(mmio.sd == DMA::SourceROM && mmio.dd == DMA::DestBWRAM) { + step(bwram.conflict() ? 8 : 4); + data = rom.readSA1(source, data); + bwram.write(target, data); } - switch(mmio.dd) { - case DMA::DestBWRAM: - if((dda & 0x40e000) == 0x006000 || (dda & 0xf00000) == 0x400000) { - busWrite(dda, data); - } - break; + if(mmio.sd == DMA::SourceROM && mmio.dd == DMA::DestIRAM) { + step(iram.conflict() ? 6 : 4); + data = rom.readSA1(source, data); + iram.write(target, data); + } - case DMA::DestIRAM: - iram.write(dda & 0x07ff, data); - break; + if(mmio.sd == DMA::SourceBWRAM && mmio.dd == DMA::DestIRAM) { + step(bwram.conflict() ? 8 : iram.conflict() ? 6 : 4); + data = bwram.read(source, data); + iram.write(target, data); + } + + if(mmio.sd == DMA::SourceIRAM && mmio.dd == DMA::DestBWRAM) { + step(bwram.conflict() ? 8 : iram.conflict() ? 6 : 4); + data = iram.read(source, data); + bwram.write(target, data); } } @@ -47,17 +34,9 @@ auto SA1::dmaNormal() -> void { if(mmio.dma_irqen) mmio.dma_irqcl = 0; } -//((byte & 6) << 3) + (byte & 1) explanation: -//transforms a byte index (0-7) into a planar index: -//result[] = { 0, 1, 16, 17, 32, 33, 48, 49 }; -//works for 2bpp, 4bpp and 8bpp modes - -//=========================== //type-1 character conversion -//=========================== - auto SA1::dmaCC1() -> void { - cpubwram.dma = true; + bwram.dma = true; mmio.chdma_irqfl = true; if(mmio.chdma_irqen) { mmio.chdma_irqcl = 0; @@ -65,6 +44,12 @@ auto SA1::dmaCC1() -> void { } } +//((byte & 6) << 3) + (byte & 1) explanation: +//transforms a byte index (0-7) into a planar index: +//result[] = {0, 1, 16, 17, 32, 33, 48, 49}; +//works for 2bpp, 4bpp and 8bpp modes + +//type-1 character conversion auto SA1::dmaCC1Read(uint addr) -> uint8 { //16 bytes/char (2bpp); 32 bytes/char (4bpp); 64 bytes/char (8bpp) uint charmask = (1 << (6 - mmio.dmacb)) - 1; @@ -88,16 +73,16 @@ auto SA1::dmaCC1Read(uint addr) -> uint8 { uint8 out[] = {0, 0, 0, 0, 0, 0, 0, 0}; for(auto x : range(8)) { - out[0] |= (data & 1) << (7 - x); data >>= 1; - out[1] |= (data & 1) << (7 - x); data >>= 1; + out[0] |= (data & 1) << 7 - x; data >>= 1; + out[1] |= (data & 1) << 7 - x; data >>= 1; if(mmio.dmacb == 2) continue; - out[2] |= (data & 1) << (7 - x); data >>= 1; - out[3] |= (data & 1) << (7 - x); data >>= 1; + out[2] |= (data & 1) << 7 - x; data >>= 1; + out[3] |= (data & 1) << 7 - x; data >>= 1; if(mmio.dmacb == 1) continue; - out[4] |= (data & 1) << (7 - x); data >>= 1; - out[5] |= (data & 1) << (7 - x); data >>= 1; - out[6] |= (data & 1) << (7 - x); data >>= 1; - out[7] |= (data & 1) << (7 - x); data >>= 1; + out[4] |= (data & 1) << 7 - x; data >>= 1; + out[5] |= (data & 1) << 7 - x; data >>= 1; + out[6] |= (data & 1) << 7 - x; data >>= 1; + out[7] |= (data & 1) << 7 - x; data >>= 1; } for(auto byte : range(bpp)) { @@ -110,10 +95,7 @@ auto SA1::dmaCC1Read(uint addr) -> uint8 { return iram.read((mmio.dda + (addr & charmask)) & 0x07ff); } -//=========================== //type-2 character conversion -//=========================== - auto SA1::dmaCC2() -> void { //select register file index (0-7 or 8-15) const uint8* brf = &mmio.brf[(dma.line & 1) << 3]; diff --git a/higan/sfc/coprocessor/sa1/io.cpp b/higan/sfc/coprocessor/sa1/io.cpp index 37d58796..cb1f9138 100644 --- a/higan/sfc/coprocessor/sa1/io.cpp +++ b/higan/sfc/coprocessor/sa1/io.cpp @@ -54,9 +54,9 @@ auto SA1::readIO(uint24 addr, uint8) -> uint8 { //(VDPL) variable-length data read port low case 0x230c: { uint24 data; - data.byte(0) = vbrRead(mmio.va + 0); - data.byte(1) = vbrRead(mmio.va + 1); - data.byte(2) = vbrRead(mmio.va + 2); + data.byte(0) = readVBR(mmio.va + 0); + data.byte(1) = readVBR(mmio.va + 1); + data.byte(2) = readVBR(mmio.va + 2); data >>= mmio.vbit; return data >> 0; @@ -65,9 +65,9 @@ auto SA1::readIO(uint24 addr, uint8) -> uint8 { //(VDPH) variable-length data read port high case 0x230d: { uint24 data; - data.byte(0) = vbrRead(mmio.va + 0); - data.byte(1) = vbrRead(mmio.va + 1); - data.byte(2) = vbrRead(mmio.va + 2); + data.byte(0) = readVBR(mmio.va + 0); + data.byte(1) = readVBR(mmio.va + 1); + data.byte(2) = readVBR(mmio.va + 2); data >>= mmio.vbit; if(mmio.hl == 1) { @@ -82,7 +82,7 @@ auto SA1::readIO(uint24 addr, uint8) -> uint8 { //(VC) version code register case 0x230e: { - return 0x01; //true value unknown + return 0x23; //RF5A123 } } @@ -333,7 +333,7 @@ auto SA1::writeIO(uint24 addr, uint8 data) -> void { mmio.dmasize = (data >> 2) & 7; mmio.dmacb = (data & 0x03); - if(mmio.chdend) cpubwram.dma = false; + if(mmio.chdend) bwram.dma = false; if(mmio.dmasize > 5) mmio.dmasize = 5; if(mmio.dmacb > 2) mmio.dmacb = 2; return; diff --git a/higan/sfc/coprocessor/sa1/iram.cpp b/higan/sfc/coprocessor/sa1/iram.cpp new file mode 100644 index 00000000..fdeaeabc --- /dev/null +++ b/higan/sfc/coprocessor/sa1/iram.cpp @@ -0,0 +1,33 @@ +auto SA1::IRAM::conflict() const -> bool { + if(!cpu.r.rwb) return false; + if((cpu.r.mar & 0x40f800) == 0x003000) return true; //00-3f,80-bf:3000-37ff + return false; +} + +auto SA1::IRAM::read(uint24 address, uint8 data) -> uint8 { + if(!size()) return data; + return _data[address & size() - 1]; +} + +auto SA1::IRAM::write(uint24 address, uint8 data) -> void { + if(!size()) return; + _data[address & size() - 1] = data; +} + +auto SA1::IRAM::readCPU(uint24 address, uint8 data) -> uint8 { + cpu.synchronize(sa1); + return read(address, data); +} + +auto SA1::IRAM::writeCPU(uint24 address, uint8 data) -> void { + cpu.synchronize(sa1); + return write(address, data); +} + +auto SA1::IRAM::readSA1(uint24 address, uint8 data) -> uint8 { + return read(address, data); +} + +auto SA1::IRAM::writeSA1(uint24 address, uint8 data) -> void { + return write(address, data); +} diff --git a/higan/sfc/coprocessor/sa1/memory.cpp b/higan/sfc/coprocessor/sa1/memory.cpp index 5f282b1b..2d78e179 100644 --- a/higan/sfc/coprocessor/sa1/memory.cpp +++ b/higan/sfc/coprocessor/sa1/memory.cpp @@ -1,270 +1,169 @@ -auto SA1::busRead(uint24 addr, uint8 data) -> uint8 { - if((addr & 0x40fe00) == 0x002200) { //$00-3f,80-bf:2200-23ff - return readIO(addr, data); +auto SA1::idle() -> void { + r.rwb = 0; + step(2); +} + +//RTx, JMx, JSx +auto SA1::idleJump() -> void { + //ROM access penalty cycle: does not apply to BWRAM or IRAM + if((r.pc & 0x408000) == 0x008000 || (r.pc & 0xc00000) == 0xc00000) idle(); +} + +//Bxx +auto SA1::idleBranch() -> void { + if(r.pc & 1) idleJump(); +} + +auto SA1::read(uint24 address) -> uint8 { + r.rwb = 1; + r.mar = address; + uint8 data = r.mdr; + + //00-3f,80-bf:2200-23ff + if((address & 0x40fe00) == 0x002200) { + step(2); + return r.mdr = readIO(address, data); } - if((addr & 0x408000) == 0x008000) { //$00-3f,80-bf:8000-ffff - addr = ((addr & 0x800000) >> 2) | ((addr & 0x3f0000) >> 1) | (addr & 0x7fff); - return mmcromRead(addr, data); + //00-3f,80-bf:8000-ffff + if((address & 0x408000) == 0x008000) { + step(rom.conflict() ? 4 : 2); + return r.mdr = rom.readSA1(address, data); } - if((addr & 0xc00000) == 0xc00000) { //$c0-ff:0000-ffff - return mmcromRead(addr, data); + //c0-ff:0000-ffff + if((address & 0xc00000) == 0xc00000) { + step(rom.conflict() ? 4 : 2); + return r.mdr = rom.readSA1(address, data); } - if((addr & 0x40e000) == 0x006000) { //$00-3f,80-bf:6000-7fff - return mmcSA1Read(addr, data); + //00-3f,80-bf:6000-7fff + if((address & 0x40e000) == 0x006000) { + step(bwram.conflict() ? 8 : 4); + return r.mdr = bwram.readSA1(address, data); } - if((addr & 0x40f800) == 0x000000) { //$00-3f,80-bf:0000-07ff - synchronize(cpu); - return iram.read(addr & 2047, data); + //00-3f,80-bf:0000-07ff + if((address & 0x40f800) == 0x000000) { + step(iram.conflict() ? 6 : 2); + return r.mdr = iram.readSA1(address, data); } - if((addr & 0x40f800) == 0x003000) { //$00-3f,80-bf:3000-37ff - synchronize(cpu); - return iram.read(addr & 2047, data); + //00-3f,80-bf:3000-37ff + if((address & 0x40f800) == 0x003000) { + step(iram.conflict() ? 6 : 2); + return r.mdr = iram.readSA1(address, data); } - if((addr & 0xf00000) == 0x400000) { //$40-4f:0000-ffff - synchronize(cpu); - return bwram.read(addr & (bwram.size() - 1), data); + //40-4f:0000-ffff + if((address & 0xf00000) == 0x400000) { + step(bwram.conflict() ? 8 : 4); + return r.mdr = bwram.readLinear(address, data); } - if((addr & 0xf00000) == 0x600000) { //$60-6f:0000-ffff - synchronize(cpu); - return bitmapRead(addr & 0x0fffff, data); + //60-6f:0000-ffff + if((address & 0xf00000) == 0x600000) { + step(bwram.conflict() ? 8 : 4); + return r.mdr = bwram.readBitmap(address, data); } //unmapped region + step(2); return data; } -auto SA1::busWrite(uint24 addr, uint8 data) -> void { - if((addr & 0x40fe00) == 0x002200) { //$00-3f,80-bf:2200-23ff - return writeIO(addr, data); +auto SA1::write(uint24 address, uint8 data) -> void { + r.rwb = 1; + r.mar = address; + r.mdr = data; + + //00-3f,80-bf:2200-23ff + if((address & 0x40fe00) == 0x002200) { + step(2); + return writeIO(address, data); } - if((addr & 0x40e000) == 0x006000) { //$00-3f,80-bf:6000-7fff - return mmcSA1Write(addr, data); + //00-3f,80-bf:8000-ffff + if((address & 0x408000) == 0x008000) { + step(rom.conflict() ? 4 : 2); + return rom.writeSA1(address, data); } - if((addr & 0x40f800) == 0x000000) { //$00-3f,80-bf:0000-07ff - synchronize(cpu); - return iram.write(addr & 2047, data); + //c0-ff:0000-ffff + if((address & 0xc00000) == 0xc00000) { + step(rom.conflict() ? 4 : 2); + return rom.writeSA1(address, data); } - if((addr & 0x40f800) == 0x003000) { //$00-3f,80-bf:3000-37ff - synchronize(cpu); - return iram.write(addr & 2047, data); + //00-3f,80-bf:6000-7fff + if((address & 0x40e000) == 0x006000) { + step(bwram.conflict() ? 8 : 4); + return bwram.writeSA1(address, data); } - if((addr & 0xf00000) == 0x400000) { //$40-4f:0000-ffff - synchronize(cpu); - return bwram.write(addr & (bwram.size() - 1), data); + //00-3f,80-bf:0000-07ff + if((address & 0x40f800) == 0x000000) { + step(iram.conflict() ? 6 : 2); + return iram.writeSA1(address, data); } - if((addr & 0xf00000) == 0x600000) { //$60-6f:0000-ffff - synchronize(cpu); - return bitmapWrite(addr & 0x0fffff, data); + //00-3f,80-bf:3000-37ff + if((address & 0x40f800) == 0x003000) { + step(iram.conflict() ? 6 : 2); + return iram.writeSA1(address, data); } + + //40-4f:0000-ffff + if((address & 0xf00000) == 0x400000) { + step(bwram.conflict() ? 8 : 4); + return bwram.writeLinear(address, data); + } + + //60-6f:0000-ffff + if((address & 0xf00000) == 0x600000) { + step(bwram.conflict() ? 8 : 4); + return bwram.writeBitmap(address, data); + } + + //unmapped region + step(2); + return; } //$230c (VDPL), $230d (VDPH) use this bus to read variable-length data. //this is used both to keep VBR-reads from accessing MMIO registers, and //to avoid syncing the S-CPU and SA-1*; as both chips are able to access //these ports. -auto SA1::vbrRead(uint24 addr, uint8 data) -> uint8 { - if((addr & 0x408000) == 0x008000) { //$00-3f,80-bf:8000-ffff - addr = ((addr & 0x800000) >> 2) | ((addr & 0x3f0000) >> 1) | (addr & 0x7fff); - return mmcromRead(addr, data); +auto SA1::readVBR(uint24 address, uint8 data) -> uint8 { + //00-3f,80-bf:8000-ffff + if((address & 0x408000) == 0x008000) { + return rom.readSA1(address, data); } - if((addr & 0xc00000) == 0xc00000) { //$c0-ff:0000-ffff - return mmcromRead(addr, data); + //c0-ff:0000-ffff + if((address & 0xc00000) == 0xc00000) { + return rom.readSA1(address, data); } - if((addr & 0x40e000) == 0x006000) { //$00-3f,80-bf:6000-7fff - return bwram.read(addr & (bwram.size() - 1), data); + //00-3f,80-bf:6000-7fff + if((address & 0x40e000) == 0x006000) { + return bwram.read(address, data); } - if((addr & 0xf00000) == 0x400000) { //$40-4f:0000-ffff - return bwram.read(addr & (bwram.size() - 1), data); + //40-4f:0000-ffff + if((address & 0xf00000) == 0x400000) { + return bwram.read(address, data); } - if((addr & 0x40f800) == 0x000000) { //$00-3f,80-bf:0000-07ff - return iram.read(addr & 2047, data); + //00-3f,80-bf:0000-07ff + if((address & 0x40f800) == 0x000000) { + return iram.read(address, data); } - if((addr & 0x40f800) == 0x003000) { //$00-3f,80-bf:3000-37ff - return iram.read(addr & 2047, data); + //00-3f,80-bf:3000-37ff + if((address & 0x40f800) == 0x003000) { + return iram.read(address, data); } return 0x00; } - -//ROM, I-RAM and MMIO registers are accessed at ~10.74MHz (2 clock ticks) -//BW-RAM is accessed at ~5.37MHz (4 clock ticks) -//tick() == 2 clock ticks -//note: bus conflict delays are not emulated at this time - -auto SA1::idle() -> void { - tick(); -} - -auto SA1::read(uint24 addr) -> uint8 { - tick(); - if(((addr & 0x40e000) == 0x006000) || ((addr & 0xd00000) == 0x400000)) tick(); - return busRead(addr, r.mdr); -} - -auto SA1::write(uint24 addr, uint8 data) -> void { - tick(); - if(((addr & 0x40e000) == 0x006000) || ((addr & 0xd00000) == 0x400000)) tick(); - busWrite(addr, r.mdr = data); -} - -//note: addresses are translated prior to invoking this function: -//$00-3f,80-bf:8000-ffff mask=0x408000 => $00-3f:0000-ffff -//$c0-ff:0000-ffff mask=0 -auto SA1::mmcromRead(uint24 addr, uint8) -> uint8 { - //reset vector overrides - if((addr & 0xffffe0) == 0x007fe0) { //$00:ffe0-ffef - if(addr == 0x7fea && sa1.mmio.cpu_nvsw) return sa1.mmio.snv >> 0; - if(addr == 0x7feb && sa1.mmio.cpu_nvsw) return sa1.mmio.snv >> 8; - if(addr == 0x7fee && sa1.mmio.cpu_ivsw) return sa1.mmio.siv >> 0; - if(addr == 0x7fef && sa1.mmio.cpu_ivsw) return sa1.mmio.siv >> 8; - } - - static auto read = [](uint addr) { - if((addr & 0x400000) && bsmemory.size()) return bsmemory.read(addr, 0x00); - return sa1.rom.read(bus.mirror(addr, sa1.rom.size())); - }; - - bool lo = addr < 0x400000; //*bmode==0 only applies to $00-3f,80-bf:8000-ffff - addr &= 0x3fffff; - - if(addr < 0x100000) { //$00-1f,8000-ffff; $c0-cf:0000-ffff - if(lo && mmio.cbmode == 0) return read(addr); - return read((mmio.cb << 20) | (addr & 0x0fffff)); - } - - if(addr < 0x200000) { //$20-3f,8000-ffff; $d0-df:0000-ffff - if(lo && mmio.dbmode == 0) return read(addr); - return read((mmio.db << 20) | (addr & 0x0fffff)); - } - - if(addr < 0x300000) { //$80-9f,8000-ffff; $e0-ef:0000-ffff - if(lo && mmio.ebmode == 0) return read(addr); - return read((mmio.eb << 20) | (addr & 0x0fffff)); - } - - if(addr < 0x400000) { //$a0-bf,8000-ffff; $f0-ff:0000-ffff - if(lo && mmio.fbmode == 0) return read(addr); - return read((mmio.fb << 20) | (addr & 0x0fffff)); - } - - return 0x00; -} - -auto SA1::mmcromWrite(uint24 addr, uint8 data) -> void { -} - -auto SA1::mmcbwramRead(uint24 addr, uint8 data) -> uint8 { - if(addr < 0x2000) { //$00-3f,80-bf:6000-7fff - cpu.synchronize(sa1); - addr = bus.mirror(mmio.sbm * 0x2000 + (addr & 0x1fff), cpubwram.size()); - return cpubwram.read(addr); - } - - if((addr & 0xf00000) == 0x400000) { //$40-4f:0000-ffff - return cpubwram.read(addr & 0x0fffff); - } - - return data; -} - -auto SA1::mmcbwramWrite(uint24 addr, uint8 data) -> void { - if(addr < 0x2000) { //$00-3f,80-bf:6000-7fff - cpu.synchronize(sa1); - addr = bus.mirror(mmio.sbm * 0x2000 + (addr & 0x1fff), cpubwram.size()); - return cpubwram.write(addr, data); - } - - if((addr & 0xf00000) == 0x400000) { //$40-4f:0000-ffff - return cpubwram.write(addr & 0x0fffff, data); - } -} - -auto SA1::mmcSA1Read(uint addr, uint8 data) -> uint8 { - synchronize(cpu); - if(mmio.sw46 == 0) { - //$40-43:0000-ffff x 32 projection - addr = bus.mirror((mmio.cbm & 0x1f) * 0x2000 + (addr & 0x1fff), bwram.size()); - return bwram.read(addr, data); - } else { - //$60-6f:0000-ffff x 128 projection - addr = bus.mirror(mmio.cbm * 0x2000 + (addr & 0x1fff), 0x100000); - return bitmapRead(addr, data); - } -} - -auto SA1::mmcSA1Write(uint addr, uint8 data) -> void { - synchronize(cpu); - if(mmio.sw46 == 0) { - //$40-43:0000-ffff x 32 projection - addr = bus.mirror((mmio.cbm & 0x1f) * 0x2000 + (addr & 0x1fff), bwram.size()); - bwram.write(addr, data); - } else { - //$60-6f:0000-ffff x 128 projection - addr = bus.mirror(mmio.cbm * 0x2000 + (addr & 0x1fff), 0x100000); - bitmapWrite(addr, data); - } -} - -auto SA1::bitmapRead(uint addr, uint8 data) -> uint8 { - if(mmio.bbf == 0) { - //4bpp - uint shift = addr & 1; - addr = (addr >> 1) & (bwram.size() - 1); - switch(shift) { - case 0: return (bwram.read(addr) >> 0) & 15; - case 1: return (bwram.read(addr) >> 4) & 15; - } - } else { - //2bpp - uint shift = addr & 3; - addr = (addr >> 2) & (bwram.size() - 1); - switch(shift) { - case 0: return (bwram.read(addr) >> 0) & 3; - case 1: return (bwram.read(addr) >> 2) & 3; - case 2: return (bwram.read(addr) >> 4) & 3; - case 3: return (bwram.read(addr) >> 6) & 3; - } - } - unreachable; -} - -auto SA1::bitmapWrite(uint addr, uint8 data) -> void { - if(mmio.bbf == 0) { - //4bpp - uint shift = addr & 1; - addr = (addr >> 1) & (bwram.size() - 1); - switch(shift) { - case 0: data = (bwram.read(addr) & 0xf0) | ((data & 15) << 0); break; - case 1: data = (bwram.read(addr) & 0x0f) | ((data & 15) << 4); break; - } - } else { - //2bpp - uint shift = addr & 3; - addr = (addr >> 2) & (bwram.size() - 1); - switch(shift) { - case 0: data = (bwram.read(addr) & 0xfc) | ((data & 3) << 0); break; - case 1: data = (bwram.read(addr) & 0xf3) | ((data & 3) << 2); break; - case 2: data = (bwram.read(addr) & 0xcf) | ((data & 3) << 4); break; - case 3: data = (bwram.read(addr) & 0x3f) | ((data & 3) << 6); break; - } - } - - bwram.write(addr, data); -} diff --git a/higan/sfc/coprocessor/sa1/rom.cpp b/higan/sfc/coprocessor/sa1/rom.cpp new file mode 100644 index 00000000..b1b94950 --- /dev/null +++ b/higan/sfc/coprocessor/sa1/rom.cpp @@ -0,0 +1,74 @@ +auto SA1::ROM::conflict() const -> bool { + if(!cpu.r.rwb) return false; + if((cpu.r.mar & 0x408000) == 0x008000) return true; //00-3f,80-bf:8000-ffff + if((cpu.r.mar & 0xc00000) == 0xc00000) return true; //c0-ff:0000-ffff + return false; +} + +auto SA1::ROM::read(uint24 address, uint8 data) -> uint8 { + address = bus.mirror(address, size()); + return _data[address]; +} + +auto SA1::ROM::write(uint24 address, uint8 data) -> void { +} + +//note: addresses are translated prior to invoking this function: +//00-3f,80-bf:8000-ffff mask=0x408000 => 00-3f:0000-ffff +//c0-ff:0000-ffff => untranslated +auto SA1::ROM::readCPU(uint24 address, uint8 data) -> uint8 { + //reset vector overrides + if((address & 0xffffe0) == 0x007fe0) { //00:ffe0-ffef + if(address == 0x7fea && sa1.mmio.cpu_nvsw) return sa1.mmio.snv >> 0; + if(address == 0x7feb && sa1.mmio.cpu_nvsw) return sa1.mmio.snv >> 8; + if(address == 0x7fee && sa1.mmio.cpu_ivsw) return sa1.mmio.siv >> 0; + if(address == 0x7fef && sa1.mmio.cpu_ivsw) return sa1.mmio.siv >> 8; + } + + static auto read = [](uint address) { + if((address & 0x400000) && bsmemory.size()) return bsmemory.read(address, 0x00); + return sa1.rom.read(address); + }; + + bool lo = address < 0x400000; //*bmode==0 only applies to 00-3f,80-bf:8000-ffff + address &= 0x3fffff; + + if(address < 0x100000) { //00-1f,8000-ffff; c0-cf:0000-ffff + if(lo && sa1.mmio.cbmode == 0) return read(address); + return read(sa1.mmio.cb << 20 | address & 0x0fffff); + } + + if(address < 0x200000) { //20-3f,8000-ffff; d0-df:0000-ffff + if(lo && sa1.mmio.dbmode == 0) return read(address); + return read(sa1.mmio.db << 20 | address & 0x0fffff); + } + + if(address < 0x300000) { //80-9f,8000-ffff; e0-ef:0000-ffff + if(lo && sa1.mmio.ebmode == 0) return read(address); + return read(sa1.mmio.eb << 20 | address & 0x0fffff); + } + + if(address < 0x400000) { //a0-bf,8000-ffff; f0-ff:0000-ffff + if(lo && sa1.mmio.fbmode == 0) return read(address); + return read(sa1.mmio.fb << 20 | address & 0x0fffff); + } + + return 0x00; +} + +auto SA1::ROM::writeCPU(uint24 address, uint8 data) -> void { +} + +auto SA1::ROM::readSA1(uint24 address, uint8 data) -> uint8 { + if((address & 0x408000) == 0x008000) { + address = (address & 0x800000) >> 2 | (address & 0x3f0000) >> 1 | address & 0x007fff; + } + return readCPU(address, data); +} + +auto SA1::ROM::writeSA1(uint24 address, uint8 data) -> void { + if((address & 0x408000) == 0x008000) { + address = (address & 0x800000) >> 2 | (address & 0x3f0000) >> 1 | address & 0x007fff; + } + return writeCPU(address, data); +} diff --git a/higan/sfc/coprocessor/sa1/sa1.cpp b/higan/sfc/coprocessor/sa1/sa1.cpp index 1b9260ec..2bb2cf23 100644 --- a/higan/sfc/coprocessor/sa1/sa1.cpp +++ b/higan/sfc/coprocessor/sa1/sa1.cpp @@ -2,7 +2,9 @@ namespace SuperFamicom { -#include "bus.cpp" +#include "rom.cpp" +#include "bwram.cpp" +#include "iram.cpp" #include "dma.cpp" #include "memory.cpp" #include "io.cpp" @@ -19,8 +21,7 @@ auto SA1::main() -> void { if(mmio.sa1_rdyb || mmio.sa1_resb) { //SA-1 co-processor is asleep - tick(); - synchronize(cpu); + step(2); return; } @@ -81,23 +82,23 @@ auto SA1::synchronizing() const -> bool { return scheduler.synchronizing(); } -auto SA1::tick() -> void { - step(2); - if(++status.counter == 0) synchronize(cpu); +auto SA1::step(uint clocks) -> void { + Thread::step(clocks); + synchronize(cpu); //adjust counters: //note that internally, status counters are in clocks; //whereas MMIO register counters are in dots (4 clocks = 1 dot) if(mmio.hvselb == 0) { //HV timer - status.hcounter += 2; - if(status.hcounter >= 1364) { - status.hcounter = 0; + status.hcounter += clocks; + while(status.hcounter >= 1364) { + status.hcounter -= 1364; if(++status.vcounter >= status.scanlines) status.vcounter = 0; } } else { //linear timer - status.hcounter += 2; + status.hcounter += clocks; status.vcounter += (status.hcounter >> 11); status.hcounter &= 0x07ff; status.vcounter &= 0x01ff; @@ -106,9 +107,9 @@ auto SA1::tick() -> void { //test counters for timer IRQ switch((mmio.ven << 1) + (mmio.hen << 0)) { case 0: break; - case 1: if(status.hcounter == (mmio.hcnt << 2)) triggerIRQ(); break; + case 1: if(status.hcounter == mmio.hcnt << 2) triggerIRQ(); break; case 2: if(status.vcounter == mmio.vcnt && status.hcounter == 0) triggerIRQ(); break; - case 3: if(status.vcounter == mmio.vcnt && status.hcounter == (mmio.hcnt << 2)) triggerIRQ(); break; + case 3: if(status.vcounter == mmio.vcnt && status.hcounter == mmio.hcnt << 2) triggerIRQ(); break; } } @@ -131,7 +132,7 @@ auto SA1::power() -> void { bwram.writeProtect(false); iram.writeProtect(false); - cpubwram.dma = false; + bwram.dma = false; for(auto addr : range(iram.size())) { iram.write(addr, 0x00); } diff --git a/higan/sfc/coprocessor/sa1/sa1.hpp b/higan/sfc/coprocessor/sa1/sa1.hpp index c39303e5..04302d35 100644 --- a/higan/sfc/coprocessor/sa1/sa1.hpp +++ b/higan/sfc/coprocessor/sa1/sa1.hpp @@ -1,8 +1,10 @@ +//Super Accelerator 1 + struct SA1 : Processor::WDC65816, Thread { //sa1.cpp static auto Enter() -> void; auto main() -> void; - auto tick() -> void; + auto step(uint clocks) -> void; auto interrupt() -> void override; alwaysinline auto triggerIRQ() -> void; @@ -13,20 +15,6 @@ struct SA1 : Processor::WDC65816, Thread { auto unload() -> void; auto power() -> void; - //bus.cpp - struct CPUIRAM : Memory { - auto size() const -> uint; - alwaysinline auto read(uint24, uint8 = 0) -> uint8; - alwaysinline auto write(uint24, uint8) -> void; - }; - - struct CPUBWRAM : Memory { - auto size() const -> uint; - alwaysinline auto read(uint24, uint8 = 0) -> uint8; - alwaysinline auto write(uint24, uint8) -> void; - bool dma; - }; - //dma.cpp struct DMA { enum CDEN : uint { DmaNormal = 0, DmaCharConversion = 1 }; @@ -41,25 +29,16 @@ struct SA1 : Processor::WDC65816, Thread { auto dmaCC2() -> void; //memory.cpp - auto busRead(uint24 addr, uint8 data) -> uint8; - auto busWrite(uint24 addr, uint8 data) -> void; - auto vbrRead(uint24 addr, uint8 data = 0) -> uint8; + alwaysinline auto conflictROM() const -> bool; + alwaysinline auto conflictBWRAM() const -> bool; + alwaysinline auto conflictIRAM() const -> bool; alwaysinline auto idle() -> void override; + alwaysinline auto idleJump() -> void override; + alwaysinline auto idleBranch() -> void override; alwaysinline auto read(uint24 addr) -> uint8 override; alwaysinline auto write(uint24 addr, uint8 data) -> void override; - - auto mmcromRead(uint24 addr, uint8 data) -> uint8; - auto mmcromWrite(uint24 addr, uint8 data) -> void; - - auto mmcbwramRead(uint24 addr, uint8 data) -> uint8; - auto mmcbwramWrite(uint24 addr, uint8 data) -> void; - - auto mmcSA1Read(uint addr, uint8 data) -> uint8; - auto mmcSA1Write(uint addr, uint8 data) -> void; - - auto bitmapRead(uint addr, uint8 data) -> uint8; - auto bitmapWrite(uint addr, uint8 data) -> void; + auto readVBR(uint24 addr, uint8 data = 0) -> uint8; //io.cpp auto readIO(uint24 addr, uint8 data) -> uint8; @@ -68,12 +47,55 @@ struct SA1 : Processor::WDC65816, Thread { //serialization.cpp auto serialize(serializer&) -> void; - MappedRAM rom; - MappedRAM iram; - MappedRAM bwram; + struct ROM : MappedRAM { + //rom.cpp + alwaysinline auto conflict() const -> bool; - CPUIRAM cpuiram; - CPUBWRAM cpubwram; + alwaysinline auto read(uint24 address, uint8 data = 0) -> uint8 override; + alwaysinline auto write(uint24 address, uint8 data) -> void override; + + auto readCPU(uint24 address, uint8 data = 0) -> uint8; + auto writeCPU(uint24 address, uint8 data) -> void; + + auto readSA1(uint24 address, uint8 data = 0) -> uint8; + auto writeSA1(uint24 address, uint8 data) -> void; + } rom; + + struct BWRAM : MappedRAM { + //bwram.cpp + alwaysinline auto conflict() const -> bool; + + alwaysinline auto read(uint24 address, uint8 data = 0) -> uint8 override; + alwaysinline auto write(uint24 address, uint8 data) -> void override; + + auto readCPU(uint24 address, uint8 data = 0) -> uint8; + auto writeCPU(uint24 address, uint8 data) -> void; + + auto readSA1(uint24 address, uint8 data = 0) -> uint8; + auto writeSA1(uint24 address, uint8 data) -> void; + + auto readLinear(uint24 address, uint8 data = 0) -> uint8; + auto writeLinear(uint24 address, uint8 data) -> void; + + auto readBitmap(uint20 address, uint8 data = 0) -> uint8; + auto writeBitmap(uint20 address, uint8 data) -> void; + + bool dma; + } bwram; + + struct IRAM : MappedRAM { + //iram.cpp + alwaysinline auto conflict() const -> bool; + + alwaysinline auto read(uint24 address, uint8 data = 0) -> uint8 override; + alwaysinline auto write(uint24 address, uint8 data) -> void override; + + auto readCPU(uint24 address, uint8 data) -> uint8; + auto writeCPU(uint24 address, uint8 data) -> void; + + auto readSA1(uint24 address, uint8 data = 0) -> uint8; + auto writeSA1(uint24 address, uint8 data) -> void; + } iram; private: DMA dma; diff --git a/higan/sfc/coprocessor/sa1/serialization.cpp b/higan/sfc/coprocessor/sa1/serialization.cpp index 5ab06884..d528563c 100644 --- a/higan/sfc/coprocessor/sa1/serialization.cpp +++ b/higan/sfc/coprocessor/sa1/serialization.cpp @@ -4,6 +4,7 @@ auto SA1::serialize(serializer& s) -> void { s.array(iram.data(), iram.size()); s.array(bwram.data(), bwram.size()); + s.integer(bwram.dma); //sa1.hpp s.integer(status.counter); @@ -14,11 +15,6 @@ auto SA1::serialize(serializer& s) -> void { s.integer(status.vcounter); s.integer(status.hcounter); - //bus/bus.hpp - s.array(iram.data(), iram.size()); - - s.integer(cpubwram.dma); - //dma/dma.hpp s.integer(dma.line); diff --git a/higan/sfc/coprocessor/superfx/bus.cpp b/higan/sfc/coprocessor/superfx/bus.cpp index d7d73069..40d971df 100644 --- a/higan/sfc/coprocessor/superfx/bus.cpp +++ b/higan/sfc/coprocessor/superfx/bus.cpp @@ -1,5 +1,9 @@ //ROM / RAM access from the S-CPU +auto SuperFX::CPUROM::data() -> uint8* { + return superfx.rom.data(); +} + auto SuperFX::CPUROM::size() const -> uint { return superfx.rom.size(); } @@ -19,6 +23,10 @@ auto SuperFX::CPUROM::write(uint24 addr, uint8 data) -> void { superfx.rom.write(addr, data); } +auto SuperFX::CPURAM::data() -> uint8* { + return superfx.ram.data(); +} + auto SuperFX::CPURAM::size() const -> uint { return superfx.ram.size(); } diff --git a/higan/sfc/coprocessor/superfx/superfx.hpp b/higan/sfc/coprocessor/superfx/superfx.hpp index e890543f..48da52a2 100644 --- a/higan/sfc/coprocessor/superfx/superfx.hpp +++ b/higan/sfc/coprocessor/superfx/superfx.hpp @@ -10,15 +10,17 @@ struct SuperFX : Processor::GSU, Thread { //bus.cpp struct CPUROM : Memory { - auto size() const -> uint; - auto read(uint24, uint8) -> uint8; - auto write(uint24, uint8) -> void; + auto data() -> uint8* override; + auto size() const -> uint override; + auto read(uint24, uint8) -> uint8 override; + auto write(uint24, uint8) -> void override; }; struct CPURAM : Memory { - auto size() const -> uint; - auto read(uint24, uint8) -> uint8; - auto write(uint24, uint8) -> void; + auto data() -> uint8* override; + auto size() const -> uint override; + auto read(uint24, uint8) -> uint8 override; + auto write(uint24, uint8) -> void override; }; //core.cpp diff --git a/higan/sfc/cpu/cpu.hpp b/higan/sfc/cpu/cpu.hpp index 45884573..13412d34 100644 --- a/higan/sfc/cpu/cpu.hpp +++ b/higan/sfc/cpu/cpu.hpp @@ -166,12 +166,14 @@ private: //dma.cpp inline auto step(uint clocks) -> void; inline auto edge() -> void; - inline auto valid(uint24 address) -> bool; - inline auto read(uint24 address, bool valid) -> uint8; - inline auto read(uint24 address) -> uint8; + inline auto validA(uint24 address) -> bool; + inline auto readA(uint24 address) -> uint8; + inline auto readA(uint24 address, bool valid) -> uint8; + inline auto readB(uint8 address, bool valid) -> uint8; inline auto flush() -> void; - inline auto write(uint24 address, uint8 data, bool valid) -> void; - inline auto write(uint24 address, uint8 data) -> void; + inline auto writeA(uint24 address, uint8 data) -> void; + inline auto writeA(uint24 address, uint8 data, bool valid) -> void; + inline auto writeB(uint8 address, uint8 data, bool valid) -> void; inline auto transfer(uint24 address, uint2 index) -> void; inline auto dmaRun() -> void; diff --git a/higan/sfc/cpu/dma.cpp b/higan/sfc/cpu/dma.cpp index e23a4829..62baa7fa 100644 --- a/higan/sfc/cpu/dma.cpp +++ b/higan/sfc/cpu/dma.cpp @@ -25,6 +25,7 @@ auto CPU::dmaFlush() -> void { } auto CPU::dmaRun() -> void { + r.rwb = 0; dmaStep(8); dmaFlush(); dmaEdge(); @@ -38,6 +39,7 @@ auto CPU::hdmaReset() -> void { } auto CPU::hdmaSetup() -> void { + r.rwb = 0; dmaStep(8); dmaFlush(); for(auto& channel : channels) channel.hdmaSetup(); @@ -46,6 +48,7 @@ auto CPU::hdmaSetup() -> void { } auto CPU::hdmaRun() -> void { + r.rwb = 0; dmaStep(8); dmaFlush(); for(auto& channel : channels) channel.hdmaTransfer(); @@ -64,7 +67,7 @@ auto CPU::Channel::edge() -> void { return cpu.dmaEdge(); } -auto CPU::Channel::valid(uint24 address) -> bool { +auto CPU::Channel::validA(uint24 address) -> bool { //A-bus cannot access the B-bus or CPU I/O registers if((address & 0x40ff00) == 0x2100) return false; //00-3f,80-bf:2100-21ff if((address & 0x40fe00) == 0x4000) return false; //00-3f,80-bf:4000-41ff @@ -73,7 +76,11 @@ auto CPU::Channel::valid(uint24 address) -> bool { return true; } -auto CPU::Channel::read(uint24 address, bool valid) -> uint8 { +auto CPU::Channel::readA(uint24 address) -> uint8 { + return readA(address, validA(address)); +} + +auto CPU::Channel::readA(uint24 address, bool valid) -> uint8 { step(4); cpu.r.mdr = valid ? bus.read(address, cpu.r.mdr) : (uint8)0x00; step(4); @@ -81,41 +88,53 @@ auto CPU::Channel::read(uint24 address, bool valid) -> uint8 { return cpu.r.mdr; } -auto CPU::Channel::read(uint24 address) -> uint8 { - return read(address, valid(address)); +auto CPU::Channel::readB(uint8 address, bool valid) -> uint8 { + step(4); + cpu.r.mdr = valid ? bus.read(0x2100 | address, cpu.r.mdr) : (uint8)0x00; + step(4); + flush(); + return cpu.r.mdr; } auto CPU::Channel::flush() -> void { return cpu.dmaFlush(); } -auto CPU::Channel::write(uint24 address, uint8 data, bool valid) -> void { +auto CPU::Channel::writeA(uint24 address, uint8 data) -> void { + return writeA(address, data, validA(address)); +} + +auto CPU::Channel::writeA(uint24 address, uint8 data, bool valid) -> void { cpu.pipe.valid = valid; cpu.pipe.address = address; cpu.pipe.data = data; } -auto CPU::Channel::write(uint24 address, uint8 data) -> void { - return write(address, data, valid(address)); +auto CPU::Channel::writeB(uint8 address, uint8 data, bool valid) -> void { + cpu.pipe.valid = valid; + cpu.pipe.address = 0x2100 | address; + cpu.pipe.data = data; } -auto CPU::Channel::transfer(uint24 aAddress, uint2 index) -> void { - uint24 bAddress = 0x2100 | targetAddress; +auto CPU::Channel::transfer(uint24 addressA, uint2 index) -> void { + uint8 addressB = targetAddress; switch(transferMode) { - case 1: case 5: bAddress += index.bit(0); break; - case 3: case 7: bAddress += index.bit(1); break; - case 4: bAddress += index; break; + case 1: case 5: addressB += index.bit(0); break; + case 3: case 7: addressB += index.bit(1); break; + case 4: addressB += index; break; } //transfers from WRAM to WRAM are invalid - bool valid = bAddress != 0x2180 || ((aAddress & 0xfe0000) != 0x7e0000 && (aAddress & 0x40e000) != 0x0000); + bool valid = addressB != 0x2180 || ((addressA & 0xfe0000) != 0x7e0000 && (addressA & 0x40e000) != 0x0000); + cpu.r.rwb = 1; + cpu.r.mar = addressA; if(direction == 0) { - auto data = read(aAddress); - write(bAddress, data, valid); + auto data = readA(addressA); + writeB(addressB, data, valid); } else { - auto data = read(bAddress, valid); - write(aAddress, data); + auto data = readB(addressB, valid); + writeA(addressA, data); } } @@ -129,6 +148,7 @@ auto CPU::Channel::dmaRun() -> void { edge(); } while(dmaEnable && --transferSize); + cpu.r.rwb = 0; step(8); flush(); edge(); @@ -164,7 +184,8 @@ auto CPU::Channel::hdmaSetup() -> void { } auto CPU::Channel::hdmaReload() -> void { - auto data = read(sourceBank << 16 | hdmaAddress); + cpu.r.rwb = 1; + auto data = readA(cpu.r.mar = sourceBank << 16 | hdmaAddress); if((uint7)lineCounter == 0) { lineCounter = data; @@ -174,11 +195,13 @@ auto CPU::Channel::hdmaReload() -> void { hdmaDoTransfer = !hdmaCompleted; if(indirect) { - data = read(sourceBank << 16 | hdmaAddress++); + cpu.r.rwb = 1; + data = readA(cpu.r.mar = sourceBank << 16 | hdmaAddress++); indirectAddress = data << 8 | 0x00; //todo: should 0x00 be indirectAddress >> 8 ? if(hdmaCompleted && hdmaFinished()) return; - data = read(sourceBank << 16 | hdmaAddress++); + cpu.r.rwb = 1; + data = readA(cpu.r.mar = sourceBank << 16 | hdmaAddress++); indirectAddress = data << 8 | indirectAddress >> 8; } } diff --git a/higan/sfc/cpu/memory.cpp b/higan/sfc/cpu/memory.cpp index 3e21c70e..5b2735ae 100644 --- a/higan/sfc/cpu/memory.cpp +++ b/higan/sfc/cpu/memory.cpp @@ -1,37 +1,42 @@ auto CPU::idle() -> void { status.clockCount = 6; dmaEdge(); + r.rwb = 0; step(6); aluEdge(); } -auto CPU::read(uint24 addr) -> uint8 { - status.clockCount = speed(addr); +auto CPU::read(uint24 address) -> uint8 { + status.clockCount = speed(address); dmaEdge(); + r.rwb = 1; + r.mar = address; step(status.clockCount - 4); - auto data = bus.read(addr, r.mdr); + auto data = bus.read(r.mar, r.mdr); step(4); aluEdge(); //$00-3f,80-bf:4000-43ff reads are internal to CPU, and do not update the MDR - if((addr & 0x40fc00) != 0x4000) r.mdr = data; + if((r.mar & 0x40fc00) != 0x4000) r.mdr = data; return data; } -auto CPU::write(uint24 addr, uint8 data) -> void { +auto CPU::write(uint24 address, uint8 data) -> void { aluEdge(); - status.clockCount = speed(addr); + status.clockCount = speed(address); dmaEdge(); + r.rwb = 1; + r.mar = address; step(status.clockCount); - bus.write(addr, r.mdr = data); + bus.write(r.mar, r.mdr = data); } -auto CPU::speed(uint24 addr) const -> uint { - if(addr & 0x408000) return addr & 0x800000 ? io.romSpeed : 8; - if(addr + 0x6000 & 0x4000) return 8; - if(addr - 0x4000 & 0x7e00) return 6; +auto CPU::speed(uint24 address) const -> uint { + if(address & 0x408000) return address & 0x800000 ? io.romSpeed : 8; + if(address + 0x6000 & 0x4000) return 8; + if(address - 0x4000 & 0x7e00) return 6; return 12; } -auto CPU::readDisassembler(uint24 addr) -> uint8 { - return bus.read(addr, r.mdr); +auto CPU::readDisassembler(uint24 address) -> uint8 { + return bus.read(address, r.mdr); } diff --git a/higan/sfc/cpu/timing.cpp b/higan/sfc/cpu/timing.cpp index e7e2d259..21a007c4 100644 --- a/higan/sfc/cpu/timing.cpp +++ b/higan/sfc/cpu/timing.cpp @@ -16,6 +16,7 @@ auto CPU::step(uint clocks) -> void { if(!status.dramRefreshed && hcounter() >= status.dramRefreshPosition) { status.dramRefreshed = true; + r.rwb = 0; for(auto _ : range(5)) { step(8); aluEdge(); @@ -23,9 +24,12 @@ auto CPU::step(uint clocks) -> void { } #if defined(DEBUGGER) - synchronizeSMP(); - synchronizePPU(); - synchronizeCoprocessors(); + synchronize(smp); + synchronize(ppu); + #endif + + #if defined(DEBUGGER) || defined(ACCURATE_SA1) + for(auto coprocessor : coprocessors) synchronize(*coprocessor); #endif } diff --git a/higan/sfc/memory/memory-inline.hpp b/higan/sfc/memory/memory-inline.hpp index dc92a4aa..c1eb8f60 100644 --- a/higan/sfc/memory/memory-inline.hpp +++ b/higan/sfc/memory/memory-inline.hpp @@ -1,7 +1,3 @@ -//Memory - -auto Memory::size() const -> uint { return 0; } - //StaticRAM StaticRAM::StaticRAM(uint size) : _size(size) { _data = new uint8[_size]; } diff --git a/higan/sfc/memory/memory.hpp b/higan/sfc/memory/memory.hpp index 6dadbd14..f65b3423 100644 --- a/higan/sfc/memory/memory.hpp +++ b/higan/sfc/memory/memory.hpp @@ -1,5 +1,12 @@ struct Memory { - virtual inline auto size() const -> uint; + inline explicit operator bool() const { return size() > 0; } + + virtual auto reset() -> void {} + virtual auto allocate(uint) -> void {} + + virtual auto data() -> uint8* = 0; + virtual auto size() const -> uint = 0; + virtual auto read(uint24 addr, uint8 data = 0) -> uint8 = 0; virtual auto write(uint24 addr, uint8 data) -> void = 0; }; @@ -16,7 +23,7 @@ struct StaticRAM : Memory { inline auto operator[](uint24 addr) -> uint8&; inline auto operator[](uint24 addr) const -> const uint8&; -private: +protected: uint8* _data = nullptr; uint _size = 0; }; @@ -33,7 +40,7 @@ struct MappedRAM : Memory { inline auto write(uint24 addr, uint8 data) -> void; inline auto operator[](uint24 addr) const -> const uint8&; -private: +protected: uint8* _data = nullptr; uint _size = 0; bool _writeProtect = false; diff --git a/higan/sfc/sfc.hpp b/higan/sfc/sfc.hpp index 07aea368..9c99458c 100644 --- a/higan/sfc/sfc.hpp +++ b/higan/sfc/sfc.hpp @@ -20,6 +20,8 @@ #include #endif +//#define ACCURATE_SA1 + namespace SuperFamicom { #define platform Emulator::platform namespace File = Emulator::File; diff --git a/higan/sfc/slot/bsmemory/bsmemory.cpp b/higan/sfc/slot/bsmemory/bsmemory.cpp index 69c0685e..c265f5a5 100644 --- a/higan/sfc/slot/bsmemory/bsmemory.cpp +++ b/higan/sfc/slot/bsmemory/bsmemory.cpp @@ -24,6 +24,10 @@ auto BSMemory::power() -> void { memory.writeProtect(!regs.writeEnable); } +auto BSMemory::data() -> uint8* { + return memory.data(); +} + auto BSMemory::size() const -> uint { return memory.size(); } diff --git a/higan/sfc/slot/bsmemory/bsmemory.hpp b/higan/sfc/slot/bsmemory/bsmemory.hpp index c2098737..28d23c5f 100644 --- a/higan/sfc/slot/bsmemory/bsmemory.hpp +++ b/higan/sfc/slot/bsmemory/bsmemory.hpp @@ -4,9 +4,10 @@ struct BSMemory : Memory { auto unload() -> void; auto power() -> void; - auto size() const -> uint; - auto read(uint24 addr, uint8) -> uint8; - auto write(uint24 addr, uint8 data) -> void; + auto data() -> uint8* override; + auto size() const -> uint override; + auto read(uint24 addr, uint8 data) -> uint8 override; + auto write(uint24 addr, uint8 data) -> void override; //serialization.cpp auto serialize(serializer&) -> void; diff --git a/nall/arithmetic.hpp b/nall/arithmetic.hpp index 180dac03..c19c9290 100644 --- a/nall/arithmetic.hpp +++ b/nall/arithmetic.hpp @@ -67,3 +67,12 @@ #undef PairBits #undef TypeBits #undef HalfBits + +namespace nall { + //TODO: these types are for expressing smaller bit ranges in class interfaces + //for instance, XChaCha20 taking a 192-bit nonce + //however, they still allow more bits than expressed ... + //some sort of wrapper needs to be devised to ensure these sizes are masked and wrap appropriately + + using uint192_t = uint256_t; +} diff --git a/nall/arithmetic/unsigned.hpp b/nall/arithmetic/unsigned.hpp index 0c8b9c46..f6ce86fd 100644 --- a/nall/arithmetic/unsigned.hpp +++ b/nall/arithmetic/unsigned.hpp @@ -26,11 +26,23 @@ template alwaysinline auto ror(const T& lhs, const U& rh #if INTMAX_BITS >= 128 inline auto operator"" _u128(const char* s) -> uint128_t { uint128_t p = 0; - while(*s) { - auto c = *s++; - if(c == '\'') continue; - if(c < '0' || c > '9') break; - p = (p << 3) + (p << 1) + (c - '0'); + if(s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + s += 2; + while(*s) { + auto c = *s++; + if(c == '\''); + else if(c >= '0' && c <= '9') p = (p << 4) + (c - '0'); + else if(c >= 'a' && c <= 'f') p = (p << 4) + (c - 'a' + 10); + else if(c >= 'A' && c <= 'F') p = (p << 4) + (c - 'A' + 10); + else break; + } + } else { + while(*s) { + auto c = *s++; + if(c == '\''); + else if(c >= '0' && c <= '9') p = (p << 3) + (p << 1) + (c - '0'); + else break; + } } return p; } diff --git a/nall/array-span.hpp b/nall/array-span.hpp new file mode 100644 index 00000000..117dfc4a --- /dev/null +++ b/nall/array-span.hpp @@ -0,0 +1,84 @@ +#pragma once + +#include + +namespace nall { + +template struct array_span : array_view { + using type = array_span; + using super = array_view; + + inline array_span() { + super::_data = nullptr; + super::_size = 0; + } + + inline array_span(nullptr_t) { + super::_data = nullptr; + super::_size = 0; + } + + inline array_span(void* data, uint64_t size) { + super::_data = (T*)data; + super::_size = (int)size; + } + + inline operator T*() { return (T*)super::operator const T*(); } + + inline auto operator[](uint index) -> T& { return (T&)super::operator[](index); } + + template inline auto data() -> U* { return (U*)super::_data; } + + inline auto begin() -> iterator { return {(T*)super::_data, (uint)0}; } + inline auto end() -> iterator { return {(T*)super::_data, (uint)super::_size}; } + + inline auto rbegin() -> reverse_iterator { return {(T*)super::_data, (uint)super::_size - 1}; } + inline auto rend() -> reverse_iterator { return {(T*)super::_data, (uint)-1}; } + + auto write(T value) -> void { + operator[](0) = value; + super::_data++; + super::_size--; + } + + //array_span specializations + template auto writel(U value, uint size) -> void; + template auto writem(U value, uint size) -> void; + template auto writevn(U value, uint size) -> void; + template auto writevi(U value, uint size) -> void; +}; + +//array_span + +template<> inline auto array_span::write(uint8_t value) -> void { + operator[](0) = value; + _data++; + _size--; +} + +template<> template inline auto array_span::writel(U value, uint size) -> void { + for(uint byte : range(size)) write(value >> byte * 8); +} + +template<> template inline auto array_span::writem(U value, uint size) -> void { + for(uint byte : reverse(range(size))) write(value >> byte * 8); +} + +template<> template inline auto array_span::writevn(U value, uint size) -> void { + while(true) { + auto byte = value & 0x7f; + value >>= 7; + if(value == 0) return write(0x80 | byte); + write(byte); + value--; + } +} + +template<> template inline auto array_span::writevi(U value, uint size) -> void { + bool negate = value < 0; + if(negate) value = ~value; + value = value << 1 | negate; + writevn(value); +} + +} diff --git a/nall/array-view.hpp b/nall/array-view.hpp index eb95600a..58b0839d 100644 --- a/nall/array-view.hpp +++ b/nall/array-view.hpp @@ -1,13 +1,11 @@ #pragma once #include +#include +#include namespace nall { -struct string; -template struct vector; -template struct array; - template struct array_view { using type = array_view; @@ -16,13 +14,25 @@ template struct array_view { _size = 0; } + inline array_view(nullptr_t) { + _data = nullptr; + _size = 0; + } + inline array_view(const void* data, uint64_t size) { _data = (const T*)data; - _size = (uint)size; + _size = (int)size; } inline explicit operator bool() const { return _data && _size > 0; } - inline operator const T*() const { return _data; } + + inline operator const T*() const { + #ifdef DEBUG + struct out_of_bounds {}; + if(_size <= 0) throw out_of_bounds{}; + #endif + return _data; + } inline auto operator++() -> type& { _data++; _size--; return *this; } inline auto operator--() -> type& { _data--; _size++; return *this; } @@ -30,6 +40,9 @@ template struct array_view { inline auto operator++(int) -> type { auto copy = *this; ++(*this); return copy; } inline auto operator--(int) -> type { auto copy = *this; --(*this); return copy; } + inline auto operator-=(int distance) -> type& { _data -= distance; _size += distance; return *this; } + inline auto operator+=(int distance) -> type& { _data += distance; _size -= distance; return *this; } + inline auto operator[](uint index) const -> const T& { #ifdef DEBUG struct out_of_bounds {}; @@ -52,9 +65,62 @@ template struct array_view { inline auto rbegin() const -> reverse_iterator_const { return {_data, (uint)_size - 1}; } inline auto rend() const -> reverse_iterator_const { return {_data, (uint)-1}; } + auto read() -> T { + auto value = operator[](0); + _data++; + _size--; + return value; + } + + //array_view specializations + template auto readl(U& value, uint size) -> U; + template auto readm(U& value, uint size) -> U; + template auto readvn(U& value, uint size) -> U; + template auto readvi(U& value, uint size) -> U; + + template auto readl(uint size) -> U { U value; return readl(value, size); } + template auto readm(uint size) -> U { U value; return readm(value, size); } + template auto readvn(uint size) -> U { U value; return readvn(value, size); } + template auto readvi(uint size) -> U { U value; return readvi(value, size); } + protected: const T* _data; int _size; }; +//array_view + +template<> template inline auto array_view::readl(U& value, uint size) -> U { + value = 0; + for(uint byte : range(size)) value |= read() << byte * 8; + return value; +} + +template<> template inline auto array_view::readm(U& value, uint size) -> U { + value = 0; + for(uint byte : reverse(range(size))) value |= read() << byte * 8; + return value; +} + +template<> template inline auto array_view::readvn(U& value, uint size) -> U { + value = 0; + uint shift = 1; + while(true) { + auto byte = read(); + value += (byte & 0x7f) * shift; + if(byte & 0x80) break; + shift <<= 7; + value += shift; + } + return value; +} + +template<> template inline auto array_view::readvi(U& value, uint size) -> U { + value = readvn(); + bool negate = value & 1; + value >>= 1; + if(negate) value = ~value; + return value; +} + } diff --git a/nall/beat/archive/archive.hpp b/nall/beat/archive/archive.hpp new file mode 100644 index 00000000..9b90fad6 --- /dev/null +++ b/nall/beat/archive/archive.hpp @@ -0,0 +1,272 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace nall { namespace Beat { + +struct Archive { + struct Encryption { + string type; + uint256_t key = 0; + uint192_t nonce = 0; + }; + + struct Signature { + string type; + uint256_t privateKey = 0; + uint256_t publicKey = 0; + uint512_t signature = 0; + }; + + struct Compression { + string type; + uint size = 0; + }; + + //timestamps are human-readable strings in ISO 8601 format; save for T=>space + //times are stored in UTC, rather than local times + struct Timestamps { + string created; + string modified; + string accessed; + }; + + struct Permissions { + string name; + bool readable = false; + bool writable = false; + bool executable = false; + }; + + struct Node { + string name; + + //paths and files + Timestamps timestamps; + struct { + Permissions owner; + Permissions group; + Permissions other; + } permissions; + + //files only + uint offset = 0; + uint size = 0; + + Compression compression; + + string filename; + vector filedata; + }; + + auto append(const Node& node) -> bool; + auto encryptionManifest() -> string; + auto manifest() -> string; + auto create() -> vector; + + //internal functions + auto encode() -> vector; + auto encode(Node& node, uint64_t offset) -> vector; + + Encryption encryption; + Signature signature; + Compression compression; //solid archiving + vector nodes; +}; + +auto Archive::append(const Node& node) -> bool { + //prevent multiple nodes with the same name + if(nodes.find([&](auto& item) { return item.name == node.name; })) return false; + + nodes.append(node); + return true; +} + +auto Archive::encryptionManifest() -> string { + string manifest; + manifest.append("encryption\n"); + manifest.append(" type: ", encryption.type, "\n"); + manifest.append(" nonce: ", Encode::Base<57>(encryption.nonce), "\n"); + return manifest; +} + +auto Archive::manifest() -> string { + string manifest; + manifest.append("archive\n"); + + for(auto& node : nodes) { + if(node.name.endsWith("/")) { + manifest.append(" path: ", string{node.name}.trimRight("/", 1L), "\n"); + } else { + manifest.append(" file: ", node.name, "\n"); + manifest.append(" offset: ", node.offset, "\n"); + manifest.append(" size: ", node.size, "\n"); + if(node.compression.type) { + manifest.append(" compression: ", node.compression.type, "\n"); + manifest.append(" size: ", node.compression.size, "\n"); + } + } + if(node.timestamps.created || node.timestamps.modified || node.timestamps.accessed) { + manifest.append(" timestamp\n"); + if(auto timestamp = node.timestamps.created ) manifest.append(" created: ", timestamp, "\n"); + if(auto timestamp = node.timestamps.modified) manifest.append(" modified: ", timestamp, "\n"); + if(auto timestamp = node.timestamps.accessed) manifest.append(" accessed: ", timestamp, "\n"); + } + if(node.permissions.owner.name || node.permissions.group.name || node.permissions.other.name) { + manifest.append(" permission\n"); + if(node.permissions.owner.name) { + manifest.append(" owner: ", node.permissions.owner.name, "\n"); + if(node.permissions.owner.readable ) manifest.append(" readable\n"); + if(node.permissions.owner.writable ) manifest.append(" writable\n"); + if(node.permissions.owner.executable) manifest.append(" executable\n"); + } + if(node.permissions.group.name) { + manifest.append(" group: ", node.permissions.group.name, "\n"); + if(node.permissions.group.readable ) manifest.append(" readable\n"); + if(node.permissions.group.writable ) manifest.append(" writable\n"); + if(node.permissions.group.executable) manifest.append(" executable\n"); + } + if(node.permissions.other.name) { + manifest.append(" other\n"); + if(node.permissions.other.readable ) manifest.append(" readable\n"); + if(node.permissions.other.writable ) manifest.append(" writable\n"); + if(node.permissions.other.executable) manifest.append(" executable\n"); + } + } + } + + if(compression.type) { + manifest.append(" compression: ", compression.type, "\n"); + manifest.append(" size: ", compression.size, "\n"); + } + + if(signature.type == "ed25519") { + manifest.append(" signature: ", signature.type, "\n"); + manifest.append(" publicKey: ", Encode::Base<57>(signature.publicKey), "\n"); + manifest.append(" signature: ", Encode::Base<57>(signature.signature), "\n"); + } + + return manifest; +} + +auto Archive::create() -> vector { + vector output; + + output.append('B'); + output.append('P'); + output.append('A'); + output.append('1'); + + nodes.sort([&](auto& lhs, auto& rhs) { + return string::compare(lhs.name, rhs.name) < 0; + }); + + auto content = encode(); + if(compression.type == "lzsa") { + content = Encode::LZSA(content); + compression.size = content.size(); + } + + if(signature.type == "ed25519") { + EllipticCurve::Ed25519 ed25519; + signature.publicKey = ed25519.publicKey(signature.privateKey); + signature.signature = ed25519.sign(content, signature.privateKey); + } + + if(encryption.type == "xchacha20") { + //a randomly generated nonce is preferred + if(!encryption.nonce) { + CSPRNG csprng; + encryption.nonce = csprng.random(); + } + Cipher::XChaCha20 xchacha20{encryption.key, encryption.nonce}; + content = xchacha20.encrypt(content); + + string manifest; + manifest.append("encryption\n"); + manifest.append(" type: ", encryption.type, "\n"); + manifest.append(" nonce: ", Encode::Base<57>(encryption.nonce), "\n"); + + output.append(content); + for(uint8_t byte : manifest) output.append(byte); + output.appendl(manifest.size(), 8); + } else { + encryption = {}; + output.append(content); + } + + auto sha256 = Hash::SHA256(output).value(); + output.appendl(sha256, 32); + + return output; +} + +// + +auto Archive::encode() -> vector { + vector output; + + for(auto& node : nodes) { + if(node.filename) { + node.timestamps.created = chrono::utc::datetime(inode::timestamp(node.filename, inode::time::create)); + node.timestamps.accessed = chrono::utc::datetime(inode::timestamp(node.filename, inode::time::access)); + node.timestamps.modified = chrono::utc::datetime(inode::timestamp(node.filename, inode::time::modify)); + + uint mode = inode::mode(node.filename); + node.permissions.owner.name = inode::user(node.filename); + node.permissions.owner.executable = mode & 0100; + node.permissions.owner.writable = mode & 0200; + node.permissions.owner.readable = mode & 0400; + node.permissions.group.name = inode::group(node.filename); + node.permissions.group.executable = mode & 0010; + node.permissions.group.writable = mode & 0020; + node.permissions.group.readable = mode & 0040; + node.permissions.other.name = " "; + node.permissions.other.executable = mode & 0001; + node.permissions.other.writable = mode & 0002; + node.permissions.other.readable = mode & 0004; + } + + if(node.name.endsWith("/")) continue; + + auto buffer = encode(node, output.size()); + output.append(buffer); + } + + auto manifest = this->manifest(); + for(auto byte : manifest) output.append(byte); + for(auto byte : range(8)) output.append((uint64_t)manifest.size() >> byte * 8); + + return output; +} + +auto Archive::encode(Node& node, uint64_t offset) -> vector { + node.offset = offset; + + vector output; + + if(node.filename) { + output = file::read(node.filename); + } else { + output = node.filedata; + } + + node.size = output.size(); + + if(node.compression.type == "lzsa") { + output = Encode::LZSA(output); + node.compression.size = output.size(); + } else { + node.compression = {}; + } + + return output; +} + +}} diff --git a/nall/cipher/chacha20.hpp b/nall/cipher/chacha20.hpp index 30d06ef9..3df44272 100644 --- a/nall/cipher/chacha20.hpp +++ b/nall/cipher/chacha20.hpp @@ -1,12 +1,14 @@ #pragma once #include +#include namespace nall { namespace Cipher { +//64-bit nonce; 64-bit x 64-byte (256GB) counter struct ChaCha20 { - auto initialize(uint256_t key, uint64_t nonce, uint64_t counter = 0) -> void { - static const uint256_t sigma = 0x6b20657479622d323320646e61707865_u256; //"expand 32-byte k" + ChaCha20(uint256_t key, uint64_t nonce, uint64_t counter = 0) { + static const uint128_t sigma = 0x6b20657479622d323320646e61707865_u128; //"expand 32-byte k" input[ 0] = sigma >> 0; input[ 1] = sigma >> 32; @@ -24,25 +26,31 @@ struct ChaCha20 { input[13] = counter >> 32; input[14] = nonce >> 0; input[15] = nonce >> 32; + offset = 0; } - auto encrypt(const uint8_t* input, uint8_t* output, uint64_t length) -> void { - while(length--) { - if(!offset) cipher(); + auto encrypt(array_view input) -> vector { + vector output; + while(input) { + if(!offset) { + cipher(); + increment(); + } auto byte = offset++; - *output++ = *input++ ^ (block[byte >> 2] >> (byte & 3) * 8); + output.append(*input++ ^ (block[byte >> 2] >> (byte & 3) * 8)); offset &= 63; } + return output; } - auto decrypt(const uint8_t* input, uint8_t* output, uint64_t length) -> void { - encrypt(input, output, length); //reciprocal cipher + auto decrypt(array_view input) -> vector { + return encrypt(input); //reciprocal cipher } -private: +//protected: inline auto rol(uint32_t value, uint bits) -> uint32_t { - return value << bits | value >> (32 - bits); + return value << bits | value >> 32 - bits; } auto quarterRound(uint32_t x[16], uint a, uint b, uint c, uint d) -> void { @@ -54,7 +62,7 @@ private: auto cipher() -> void { memory::copy(block, input, 64); - for(auto n : range(10)) { + for(uint n : range(10)) { quarterRound(block, 0, 4, 8, 12); quarterRound(block, 1, 5, 9, 13); quarterRound(block, 2, 6, 10, 14); @@ -64,7 +72,10 @@ private: quarterRound(block, 2, 7, 8, 13); quarterRound(block, 3, 4, 9, 14); } - for(auto n : range(16)) { + } + + auto increment() -> void { + for(uint n : range(16)) { block[n] += input[n]; } if(!++input[12]) ++input[13]; @@ -75,4 +86,24 @@ private: uint64_t offset; }; +struct HChaCha20 : protected ChaCha20 { + HChaCha20(uint256_t key, uint128_t nonce) : ChaCha20(key, nonce >> 64, nonce >> 0) { + cipher(); + } + + auto key() const -> uint256_t { + uint256_t key = 0; + for(uint n : range(4)) key |= (uint256_t)block[ 0 + n] << (n + 0) * 32; + for(uint n : range(4)) key |= (uint256_t)block[12 + n] << (n + 4) * 32; + return key; + } +}; + +//192-bit nonce; 64-bit x 64-byte (256GB) counter +struct XChaCha20 : ChaCha20 { + XChaCha20(uint256_t key, uint192_t nonce, uint64_t counter = 0): + ChaCha20(HChaCha20(key, nonce).key(), nonce >> 128, counter) { + } +}; + }} diff --git a/nall/encode/base.hpp b/nall/encode/base.hpp index ae517267..d1f7ad73 100644 --- a/nall/encode/base.hpp +++ b/nall/encode/base.hpp @@ -1,5 +1,10 @@ #pragma once +//required bytes: ceil(bits / log2(base)) +//base57 => 128=22, 256=44, 512=88 +//base62 => 128=22, 256=43, 512=86 +//base64 => 128=22, 256=43, 512=86 + #include namespace nall { namespace Encode { diff --git a/nall/inode.hpp b/nall/inode.hpp index 364985d5..5eeb59dc 100644 --- a/nall/inode.hpp +++ b/nall/inode.hpp @@ -9,7 +9,7 @@ namespace nall { struct inode { - enum class time : uint { access, modify }; + enum class time : uint { create, modify, access }; static auto exists(const string& name) -> bool { return access(name, F_OK) == 0; @@ -27,31 +27,55 @@ struct inode { return access(name, X_OK) == 0; } + static auto mode(const string& name) -> uint { + struct stat data{}; + stat(name, &data); + return data.st_mode; + } + static auto uid(const string& name) -> uint { - struct stat data{0}; + struct stat data{}; stat(name, &data); return data.st_uid; } static auto gid(const string& name) -> uint { - struct stat data{0}; + struct stat data{}; stat(name, &data); return data.st_gid; } - static auto mode(const string& name) -> uint { - struct stat data{0}; - stat(name, &data); - return data.st_mode; + #if !defined(PLATFORM_WINDOWS) + static auto user(const string& name) -> string { + struct passwd* pw = getpwuid(uid(name)); + if(pw && pw->pw_name) return pw->pw_name; + return {}; } + static auto group(const string& name) -> string { + struct group* gr = getgrgid(gid(name)); + if(gr && gr->gr_name) return gr->gr_name; + return {}; + } + #endif + static auto timestamp(const string& name, time mode = time::modify) -> uint64_t { - struct stat data = {0}; + struct stat data{}; stat(name, &data); - switch(mode) { default: - case time::access: return data.st_atime; + switch(mode) { + #if defined(PLATFORM_WINDOWS) + case time::create: return data.st_ctime; + #else + //st_birthtime may return -1 or st_atime if it is not supported + //the best that can be done in this case is to return st_mtime if it's older + case time::create: return min((uint)data.st_birthtime, (uint)data.st_mtime); + #endif case time::modify: return data.st_mtime; + //for performance reasons, last access time is usually not enabled on various filesystems + //ensure that the last access time is not older than the last modify time (eg for NTFS) + case time::access: return max((uint)data.st_atime, data.st_mtime); } + return 0; } //returns true if 'name' already exists diff --git a/nall/nall.hpp b/nall/nall.hpp index ea8021a1..72508f48 100644 --- a/nall/nall.hpp +++ b/nall/nall.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/nall/platform.hpp b/nall/platform.hpp index e944b986..493611d9 100644 --- a/nall/platform.hpp +++ b/nall/platform.hpp @@ -115,11 +115,3 @@ namespace Math { #else #define unreachable throw #endif - -#if defined(COMPILER_GCC) && __GNUC__ == 4 && __GNUC_MINOR__ <= 7 - //GCC 4.7.x has a bug (#54849) when specifying override with a trailing return type: - //auto function() -> return_type override; //this is the syntax that the C++11 standard requires - //auto function() override -> return_type; //this is the syntax that GCC 4.7.x requires - //in order to compile code correctly with both compilers, we disable the override keyword for GCC - #define override -#endif diff --git a/nall/random.hpp b/nall/random.hpp index 3f9d20bd..6af3d430 100644 --- a/nall/random.hpp +++ b/nall/random.hpp @@ -1,25 +1,64 @@ #pragma once +#include +#include +#include #include #include +#include + +#if defined(PLATFORM_LINUX) + #include +#elif defined(PLATFORM_WINDOWS) + #include +#endif namespace nall { -struct RandomNumberGenerator { - virtual auto seed(uint64_t) -> void = 0; - virtual auto operator()() -> uint64_t = 0; - virtual auto serialize(serializer&) -> void = 0; -}; - -//Galois LFSR using CRC64 polynomials -struct LinearFeedbackShiftRegisterGenerator : RandomNumberGenerator { - auto seed(uint64_t seed) -> void { - lfsr = seed; - for(uint n = 0; n < 8; n++) operator()(); +template struct RNG { + template auto random() -> T { + T value = 0; + for(uint n : range((sizeof(T) + 3) / 4)) { + value = value << 32 | (uint32_t)static_cast(this)->read(); + } + return value; } - auto operator()() -> uint64_t { - return lfsr = (lfsr >> 1) ^ (-(lfsr & 1) & crc64); + template auto bound(T range) -> T { + T threshold = -range % range; + while(true) { + T value = random(); + if(value >= threshold) return value % range; + } + } + +protected: + auto randomSeed() -> uint256_t { + uint256_t seed = 0; + #if defined(PLATFORM_BSD) || defined(PLATFORM_MACOS) + for(uint n : range(8)) seed = seed << 32 | (uint32_t)arc4random(); + #elif defined(PLATFORM_LINUX) + getrandom(&seed, 32, GRND_NONBLOCK); + #elif defined(PLATFORM_WINDOWS) + HCRYPTPROV provider; + if(CryptAcquireContext(&provider, nullptr, MS_STRONG_PROV, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { + CryptGenRandom(provider, 32, (BYTE*)&seed); + CryptReleaseContext(provider, 0); + } + #else + //it's ... better than nothing ... + srand(time(nullptr)); + for(uint n : range(32)) seed = seed << 8 | (uint8_t)rand(); + #endif + return seed; + } +}; + +//Galois linear feedback shift register using CRC64 polynomials +struct PRNG_LFSR : RNG { + auto seed(maybe seed = {}) -> void { + lfsr = seed ? seed() : (uint64_t)randomSeed(); + for(uint n : range(8)) read(); //hide the CRC64 polynomial from initial output } auto serialize(serializer& s) -> void { @@ -27,13 +66,81 @@ struct LinearFeedbackShiftRegisterGenerator : RandomNumberGenerator { } private: + auto read() -> uint64_t { + return lfsr = (lfsr >> 1) ^ (-(lfsr & 1) & crc64); + } + static const uint64_t crc64 = 0xc96c'5795'd787'0f42; uint64_t lfsr = crc64; + + friend class RNG; }; -inline auto random() -> uint64_t { - static LinearFeedbackShiftRegisterGenerator lfsr; - return lfsr(); +struct PRNG_PCG : RNG { + auto seed(maybe seed = {}, maybe sequence = {}) -> void { + if(!seed) seed = (uint32_t)randomSeed(); + if(!sequence) sequence = 0; + + state = 0; + increment = sequence() << 1 | 1; + read(); + state += seed(); + read(); + } + + auto serialize(serializer& s) -> void { + s.integer(state); + s.integer(increment); + } + +private: + auto read() -> uint32_t { + uint64_t state = this->state; + this->state = state * 6'364'136'223'846'793'005ull + increment; + uint32_t xorshift = (state >> 18 ^ state) >> 27; + uint32_t rotate = state >> 59; + return xorshift >> rotate | xorshift << (-rotate & 31); + } + + uint64_t state = 0; + uint64_t increment = 0; + + friend class RNG; +}; + +//XChaCha20 cryptographically secure pseudo-random number generator +struct CSPRNG_XChaCha20 : RNG { + CSPRNG_XChaCha20() { seed(); } + + auto seed(maybe key = {}, maybe nonce = {}) -> void { + //the randomness comes from the key; the nonce just adds a bit of added entropy + if(!key) key = randomSeed(); + if(!nonce) nonce = (uint192_t)clock() << 64 | chrono::nanosecond(); + context = {key(), nonce()}; + } + +private: + auto read() -> uint32_t { + if(!counter) { context.cipher(); context.increment(); } + uint32_t value = context.block[counter++]; + if(counter == 16) counter = 0; //64-bytes per block; 4 bytes per read + return value; + } + + Cipher::XChaCha20 context{0, 0}; + uint counter = 0; + + friend class RNG; +}; + +// + +using PRNG = PRNG_PCG; +using CSPRNG = CSPRNG_XChaCha20; + +template inline auto random() -> T { + static PRNG_PCG pcg; //note: unseeded + return pcg.random(); } } diff --git a/nall/string.hpp b/nall/string.hpp index 7569356b..efacbfce 100644 --- a/nall/string.hpp +++ b/nall/string.hpp @@ -147,7 +147,9 @@ public: explicit operator bool() const { return _size; } operator const char*() const { return (const char*)data(); } + operator array_span() { return {(char*)get(), size()}; } operator array_view() const { return {(const char*)data(), size()}; } + operator array_span() { return {(uint8_t*)get(), size()}; } operator array_view() const { return {(const uint8_t*)data(), size()}; } auto operator==(const string& source) const -> bool { diff --git a/nall/suffix-array.hpp b/nall/suffix-array.hpp index afe3ce51..37bd750b 100644 --- a/nall/suffix-array.hpp +++ b/nall/suffix-array.hpp @@ -51,9 +51,12 @@ suffix_array_invert: 8 "t" 0 "" +suffix_array_phi: + phi = [2,5,9,0,1,7,8,3,4,0] + suffix_array_lcp: - prefixes = [0,1,3,1,2,0,2,0,1] => lcp[n] == lcp(n, n-1) - "" - + prefixes = [0,0,1,3,1,2,0,2,0,1] => lcp[n] == lcp(n, n-1) + "" 0 "aacatat" 0 "acaacatat" 1 "a" "acatat" 3 "aca" @@ -64,13 +67,13 @@ suffix_array_lcp: "t" 0 "tat" 1 "t" +suffix_array_plcp: + plcp = [1,0,0,3,2,2,1,1,0,0] + suffix_array_lrcp: llcp = [0,0,0,3,1,0,0,0,0,1] => llcp[m] == lcp(l, m) rlcp = [0,1,1,1,2,0,2,0,0,0] => rlcp[m] == lcp(m, r) -suffix_array_phi: - phi = [2,5,9,0,1,7,8,3,4] - suffix_array_lpf: lengths = [0,0,1,3,2,1,0,2,1,0] offsets = [0,0,0,0,1,3,4,5,6,2] @@ -87,7 +90,7 @@ suffix_array_lpf: */ -// via induced sorting +// suffix array via induced sorting // O(n) inline auto suffix_array(array_view input) -> vector { return induced_sort(input.data(), input.size()); @@ -95,128 +98,169 @@ inline auto suffix_array(array_view input) -> vector { // inverse // O(n) -inline auto suffix_array_invert(array_view suffixes) -> vector { - vector inverted; - inverted.reset(), inverted.reallocate(suffixes.size()); - for(int n : range(suffixes.size())) inverted[suffixes[n]] = n; - return inverted; +inline auto suffix_array_invert(array_view sa) -> vector { + vector isa; + isa.reallocate(sa.size()); + for(int i : range(sa.size())) isa[sa[i]] = i; + return isa; +} + +// auxiliary data structure for plcp and lpf computation +// O(n) +inline auto suffix_array_phi(array_view sa) -> vector { + vector phi; + phi.reallocate(sa.size()); + phi[sa[0]] = 0; + for(int i : range(1, sa.size())) phi[sa[i]] = sa[i - 1]; + return phi; +} + +// longest common prefix: lcp(l, r) +// O(n) +inline auto suffix_array_lcp(int l, int r, array_view sa, array_view input) -> int { + int i = sa[l], j = sa[r], k = 0, size = input.size(); + while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + return k; +} + +// longest common prefix: lcp(i, j, k) +// O(n) +inline auto suffix_array_lcp(int i, int j, int k, array_view input) -> int { + int size = input.size(); + while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + return k; } // longest common prefix: lcp[n] == lcp(n, n-1) -// algorithm: kasai // O(n) -inline auto suffix_array_lcp(array_view suffixes, array_view inverted, array_view input) -> vector { - int size = input.size(); - vector prefixes; - prefixes.reset(), prefixes.reallocate(size); - for(int i = 0, l = 0; i < size; i++) { - if(inverted[i] == size) { l = 0; continue; } //the next substring is empty; ignore it - int j = suffixes[inverted[i] + 1]; - while(i + l < size && j + l < size && input[i + l] == input[j + l]) l++; - prefixes[inverted[i]] = l; - if(l) l--; +inline auto suffix_array_lcp(array_view sa, array_view isa, array_view input) -> vector { + int k = 0, size = input.size(); + vector lcp; + lcp.reallocate(size + 1); + for(int i : range(size)) { + if(isa[i] == size) { k = 0; continue; } //the next substring is empty; ignore it + int j = sa[isa[i] + 1]; + while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + lcp[1 + isa[i]] = k; + if(k) k--; } - return prefixes; + lcp[0] = 0; + return lcp; +} + +// longest common prefix (from permuted longest common prefix) +// O(n) +inline auto suffix_array_lcp(array_view plcp, array_view sa) -> vector { + vector lcp; + lcp.reallocate(plcp.size()); + for(int i : range(plcp.size())) lcp[i] = plcp[sa[i]]; + return lcp; +} + +// permuted longest common prefix +// O(n) +inline auto suffix_array_plcp(array_view phi, array_view input) -> vector { + vector plcp; + plcp.reallocate(phi.size()); + int k = 0, size = input.size(); + for(int i : range(size)) { + int j = phi[i]; + while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; + plcp[i] = k; + if(k) k--; + } + return plcp; +} + +// permuted longest common prefix (from longest common prefix) +// O(n) +inline auto suffix_array_plcp(array_view lcp, array_view sa) -> vector { + vector plcp; + plcp.reallocate(lcp.size()); + for(int i : range(lcp.size())) plcp[sa[i]] = lcp[i]; + return plcp; } // longest common prefixes - left + right // llcp[m] == lcp(l, m) // rlcp[m] == lcp(m, r) // O(n) -inline auto suffix_array_lrcp(vector& llcp, vector& rlcp, array_view lcp, array_view suffixes, array_view input) -> void { - llcp.reset(), llcp.reallocate(lcp.size() + 1); - rlcp.reset(), rlcp.reallocate(lcp.size() + 1); +// requires: lcp -or- plcp+sa +inline auto suffix_array_lrcp(vector& llcp, vector& rlcp, array_view lcp, array_view plcp, array_view sa, array_view input) -> void { + int size = input.size(); + llcp.reset(), llcp.reallocate(size + 1); + rlcp.reset(), rlcp.reallocate(size + 1); function recurse = [&](int l, int r) -> int { - if(l == lcp.size()) return 0; - if(l == r - 1) return lcp[l]; + if(l == r - 1) { + if(r > size) return 0; + if(lcp) return lcp[r]; + return plcp[sa[r]]; + } int m = l + r >> 1; llcp[m] = recurse(l, m); rlcp[m] = recurse(m, r); return min(llcp[m], rlcp[m]); }; - recurse(0, lcp.size() + 1); + recurse(0, size + 1); + llcp[0] = 0; rlcp[0] = 0; } -// auxiliary data for suffix_array_lpf -// O(n) -inline auto suffix_array_phi(array_view suffixes) -> vector { - vector phi; - phi.reset(), phi.reallocate(suffixes.size() - 1); - for(int i : range(1, suffixes.size())) { - phi[suffixes[i]] = suffixes[i - 1]; - } - return phi; -} - // longest previous factor // O(n) -inline auto suffix_array_lpf(vector& lengths, vector& offsets, array_view phi, array_view input) -> void { - int l = 0, size = input.size(); +// optional: plcp +inline auto suffix_array_lpf(vector& lengths, vector& offsets, array_view phi, array_view plcp, array_view input) -> void { + int k = 0, size = input.size(); lengths.reset(), lengths.resize(size + 1, -1); offsets.reset(), offsets.resize(size + 1, -1); - function recurse = [&](int i, int l, int j) -> void { + function recurse = [&](int i, int j, int k) -> void { if(lengths[i] < 0) { - lengths[i] = l; + lengths[i] = k; + offsets[i] = j; + } else if(lengths[i] < k) { + if(offsets[i] > j) { + recurse(offsets[i], j, lengths[i]); + } else { + recurse(j, offsets[i], lengths[i]); + } + lengths[i] = k; offsets[i] = j; } else { - if(lengths[i] < l) { - if(offsets[i] > j) { - recurse(offsets[i], lengths[i], j); - } else { - recurse(j, lengths[i], offsets[i]); - } - lengths[i] = l; - offsets[i] = j; + if(offsets[i] > j) { + recurse(offsets[i], j, k); } else { - if(offsets[i] > j) { - recurse(offsets[i], l, j); - } else { - recurse(j, l, offsets[i]); - } + recurse(j, offsets[i], k); } } }; for(int i : range(size)) { int j = phi[i]; - while(i + l < size && j + l < size && input[i + l] == input[j + l]) l++; + if(plcp) k = plcp[i]; + else while(i + k < size && j + k < size && input[i + k] == input[j + k]) k++; if(i > j) { - recurse(i, l, j); + recurse(i, j, k); } else { - recurse(j, l, i); + recurse(j, i, k); } - if(l) l--; + if(k) k--; } - //there can be no previous factor for the start of input; clear these values from -1 to 0 lengths[0] = 0; offsets[0] = 0; } -// longest common prefix: lcp(l, r) -// O(n) -inline auto suffix_array_lcp(int l, int r, array_view suffixes, array_view input) -> int { - int k = 0, size = input.size(); - l = suffixes[l], r = suffixes[r]; - while(l + k < size && r + k < size) { - if(input[l + k] != input[r + k]) break; - k++; - } - return k; -} - // O(n log m) -inline auto suffix_array_find(int& length, int& offset, array_view suffixes, array_view input, array_view match) -> bool { +inline auto suffix_array_find(int& length, int& offset, array_view sa, array_view input, array_view match) -> bool { length = 0, offset = 0; int l = 0, r = input.size(); while(l < r - 1) { int m = l + r >> 1; - int s = suffixes[m]; + int s = sa[m]; int k = 0; while(k < match.size() && s + k < input.size()) { @@ -241,13 +285,13 @@ inline auto suffix_array_find(int& length, int& offset, array_view suffixes } // O(n + log m) -inline auto suffix_array_find(int& length, int& offset, array_view llcp, array_view rlcp, array_view suffixes, array_view input, array_view match) -> bool { +inline auto suffix_array_find(int& length, int& offset, array_view llcp, array_view rlcp, array_view sa, array_view input, array_view match) -> bool { length = 0, offset = 0; int l = 0, r = input.size(), k = 0; while(l < r - 1) { int m = l + r >> 1; - int s = suffixes[m]; + int s = sa[m]; while(k < match.size() && s + k < input.size()) { if(match[k] != input[s + k]) break; @@ -274,52 +318,47 @@ inline auto suffix_array_find(int& length, int& offset, array_view llcp, ar // +//there are multiple strategies for building the required auxiliary structures for suffix arrays + struct SuffixArray { using type = SuffixArray; //O(n) inline SuffixArray(array_view input) : input(input) { - suffixes = suffix_array(input); - } - - //O(n) - inline auto lcp() -> type& { - inverted = suffix_array_invert(suffixes); - prefixes = suffix_array_lcp(suffixes, inverted, input); - return *this; + sa = suffix_array(input); } //O(n) inline auto lrcp() -> type& { - lcp(); - suffix_array_lrcp(prefixesL, prefixesR, prefixes, suffixes, input); + //if(!isa) isa = suffix_array_invert(sa); + //if(!lcp) lcp = suffix_array_lcp(sa, isa, input); + if(!phi) phi = suffix_array_phi(sa); + if(!plcp) plcp = suffix_array_plcp(phi, input); + //if(!lcp) lcp = suffix_array_lcp(plcp, sa); + if(!llcp || !rlcp) suffix_array_lrcp(llcp, rlcp, lcp, plcp, sa, input); return *this; } //O(n) inline auto lpf() -> type& { - auto phi = suffix_array_phi(suffixes); - suffix_array_lpf(lengths, offsets, phi, input); + if(!phi) phi = suffix_array_phi(sa); + //if(!plcp) plcp = suffix_array_plcp(phi, input); + if(!lengths || !offsets) suffix_array_lpf(lengths, offsets, phi, plcp, input); return *this; } inline auto operator[](int offset) const -> int { - return suffixes[offset]; + return sa[offset]; } //O(n log m) - //inline auto find(int& length, int& offset, array_view match) -> bool { - // return suffix_array_find(length, offset, suffixes, input, match); - //} - - //requires: lrcp() - //O(n + log m) + //O(n + log m) with lrcp() inline auto find(int& length, int& offset, array_view match) -> bool { - return suffix_array_find(length, offset, prefixesL, prefixesR, suffixes, input, match); + if(!llcp || !rlcp) return suffix_array_find(length, offset, sa, input, match); //O(n log m) + return suffix_array_find(length, offset, llcp, rlcp, sa, input, match); //O(n + log m) } - //requires: lpf() - //O(n) + //O(n) with lpf() inline auto previous(int& length, int& offset, int address) -> void { length = lengths[address]; offset = offsets[address]; @@ -329,13 +368,15 @@ struct SuffixArray { array_view input; //suffix array and auxiliary data structures - vector suffixes; //suffix array - vector inverted; //inverted suffix array - vector prefixes; //longest common prefixes - lcp(n, n-1) - vector prefixesL; //longest common prefixes - lcp(l, m) - vector prefixesR; //longest common prefixes - lcp(m, r) - vector lengths; //longest previous factors - vector offsets; //longest previous factors + vector sa; //suffix array + vector isa; //inverted suffix array + vector phi; //phi + vector plcp; //permuted longest common prefixes + vector lcp; //longest common prefixes + vector llcp; //longest common prefixes - left + vector rlcp; //longest common prefixes - right + vector lengths; //longest previous factors + vector offsets; //longest previous factors }; } diff --git a/nall/vector.hpp b/nall/vector.hpp index 73f4e6ed..d8baa96c 100644 --- a/nall/vector.hpp +++ b/nall/vector.hpp @@ -2,6 +2,7 @@ #include +#include #include #include #include @@ -30,6 +31,7 @@ struct vector_base { ~vector_base(); explicit operator bool() const; + operator array_span(); operator array_view() const; template auto capacity() const -> uint; template auto size() const -> uint; @@ -125,7 +127,7 @@ struct vector_base { auto foreach(const function& callback) -> void; auto foreach(const function& callback) -> void; -private: +protected: T* _pool = nullptr; //pointer to first initialized element in pool uint _size = 0; //number of initialized elements in pool uint _left = 0; //number of allocated elements free on the left of pool @@ -150,3 +152,5 @@ namespace nall { using vector_base::vector_base; }; } + +#include diff --git a/nall/vector/core.hpp b/nall/vector/core.hpp index 9cfcc9e4..143a405c 100644 --- a/nall/vector/core.hpp +++ b/nall/vector/core.hpp @@ -31,6 +31,10 @@ template vector::operator bool() const { return _size; } +template vector::operator array_span() { + return {data(), size()}; +} + template vector::operator array_view() const { return {data(), size()}; } diff --git a/nall/vector/specialization/uint8_t.hpp b/nall/vector/specialization/uint8_t.hpp new file mode 100644 index 00000000..56758266 --- /dev/null +++ b/nall/vector/specialization/uint8_t.hpp @@ -0,0 +1,18 @@ +#pragma once + +namespace nall { + +template<> struct vector : vector_base { + using type = vector; + using vector_base::vector_base; + + template auto appendl(U value, uint size) -> void { + for(uint byte : range(size)) append(uint8_t(value >> byte * 8)); + } + + template auto appendm(U value, uint size) -> void { + for(uint byte : reverse(range(size))) append(uint8_t(value >> byte * 8)); + } +}; + +} diff --git a/ruby/input/joypad/directinput.cpp b/ruby/input/joypad/directinput.cpp index bfc9e09b..ffab1229 100644 --- a/ruby/input/joypad/directinput.cpp +++ b/ruby/input/joypad/directinput.cpp @@ -138,7 +138,7 @@ struct InputJoypadDirectInput { property.diph.dwHow = DIPH_DEVICE; device->GetProperty(DIPROP_GUIDANDPATH, &property.diph); string devicePath = (const char*)utf8_t(property.wszPath); - jp.pathID = Hash::CRC32(devicePath.data(), devicePath.size()).value(); + jp.pathID = Hash::CRC32(devicePath).value(); jp.hid->setVendorID(jp.vendorID); jp.hid->setProductID(jp.productID); jp.hid->setPathID(jp.pathID); diff --git a/ruby/input/joypad/udev.cpp b/ruby/input/joypad/udev.cpp index 02433d54..89d15cb0 100644 --- a/ruby/input/joypad/udev.cpp +++ b/ruby/input/joypad/udev.cpp @@ -266,7 +266,7 @@ private: auto createJoypadHID(Joypad& jp) -> void { jp.hid->setVendorID(jp.vendorID.hex()); jp.hid->setProductID(jp.productID.hex()); - jp.hid->setPathID(Hash::CRC32(jp.deviceName.data(), jp.deviceName.size()).value()); + jp.hid->setPathID(Hash::CRC32(jp.deviceName).value()); for(uint n : range(jp.axes.size())) jp.hid->axes().append(n); for(uint n : range(jp.hats.size())) jp.hid->hats().append(n);