merge ares upstream, resolves #3710

This commit is contained in:
CasualPokePlayer 2023-08-10 17:14:21 -07:00
parent 8260a59bcf
commit 2e75645cbd
58 changed files with 1839 additions and 361 deletions

Binary file not shown.

View File

@ -184,7 +184,6 @@ static inline SaveType DetectSaveType(u8* rom)
if (id == "NCR") ret = EEPROM512;
if (id == "NEA") ret = EEPROM512;
if (id == "NPW") ret = EEPROM512;
if (id == "NPM") ret = EEPROM512;
if (id == "NPY") ret = EEPROM512;
if (id == "NPT") ret = EEPROM512;
if (id == "NRA") ret = EEPROM512;
@ -194,7 +193,6 @@ static inline SaveType DetectSaveType(u8* rom)
if (id == "NK2") ret = EEPROM512;
if (id == "NSV") ret = EEPROM512;
if (id == "NFX") ret = EEPROM512;
if (id == "NFP") ret = EEPROM512;
if (id == "NS6") ret = EEPROM512;
if (id == "NNA") ret = EEPROM512;
if (id == "NRS") ret = EEPROM512;
@ -202,7 +200,6 @@ static inline SaveType DetectSaveType(u8* rom)
if (id == "NSC") ret = EEPROM512;
if (id == "NSA") ret = EEPROM512;
if (id == "NB6") ret = EEPROM512;
if (id == "NSM") ret = EEPROM512;
if (id == "NSS") ret = EEPROM512;
if (id == "NTX") ret = EEPROM512;
if (id == "NT6") ret = EEPROM512;
@ -215,12 +212,13 @@ static inline SaveType DetectSaveType(u8* rom)
if (id == "NIR") ret = EEPROM512;
if (id == "NVL") ret = EEPROM512;
if (id == "NVY") ret = EEPROM512;
if (id == "NWR") ret = EEPROM512;
if (id == "NWC") ret = EEPROM512;
if (id == "NAD") ret = EEPROM512;
if (id == "NWU") ret = EEPROM512;
if (id == "NYK") ret = EEPROM512;
if (id == "NMZ") ret = EEPROM512;
if (id == "NSM") ret = EEPROM512;
if (id == "NWR") ret = EEPROM512;
if (id == "NDK" && region_code == 'J') ret = EEPROM512;
if (id == "NWT" && region_code == 'J') ret = EEPROM512;
@ -236,7 +234,6 @@ static inline SaveType DetectSaveType(u8* rom)
if (id == "NMX") ret = EEPROM2KB;
if (id == "NGC") ret = EEPROM2KB;
if (id == "NIM") ret = EEPROM2KB;
if (id == "NK4") ret = EEPROM2KB;
if (id == "NNB") ret = EEPROM2KB;
if (id == "NMV") ret = EEPROM2KB;
if (id == "NM8") ret = EEPROM2KB;
@ -248,16 +245,17 @@ static inline SaveType DetectSaveType(u8* rom)
if (id == "NR7") ret = EEPROM2KB;
if (id == "NEP") ret = EEPROM2KB;
if (id == "NYS") ret = EEPROM2KB;
if (id == "NK4") ret = EEPROM2KB;
if (id == "ND3" && region_code == 'J') ret = EEPROM2KB;
if (id == "ND4" && region_code == 'J') ret = EEPROM2KB;
if (id == "NTE") ret = SRAM32KB;
if (id == "NVB") ret = SRAM32KB;
if (id == "NB5") ret = SRAM32KB;
if (id == "CFZ") ret = SRAM32KB;
if (id == "NFZ") ret = SRAM32KB;
if (id == "NSI") ret = SRAM32KB;
if (id == "NG6") ret = SRAM32KB;
if (id == "N3H") ret = SRAM32KB;
if (id == "NGP") ret = SRAM32KB;
if (id == "NYW") ret = SRAM32KB;
if (id == "NHY") ret = SRAM32KB;
@ -278,7 +276,7 @@ static inline SaveType DetectSaveType(u8* rom)
if (id == "NUM") ret = SRAM32KB;
if (id == "NOB") ret = SRAM32KB;
if (id == "CPS") ret = SRAM32KB;
if (id == "NB5") ret = SRAM32KB;
if (id == "NPM") ret = SRAM32KB;
if (id == "NRE") ret = SRAM32KB;
if (id == "NAL") ret = SRAM32KB;
if (id == "NT3") ret = SRAM32KB;
@ -288,6 +286,7 @@ static inline SaveType DetectSaveType(u8* rom)
if (id == "NWL") ret = SRAM32KB;
if (id == "NW2") ret = SRAM32KB;
if (id == "NWX") ret = SRAM32KB;
if (id == "N3H" && region_code == 'J') ret = SRAM32KB;
if (id == "NK4" && region_code == 'J' && revision < 2) ret = SRAM32KB;
if (id == "CDZ") ret = SRAM96KB;
@ -697,7 +696,7 @@ static u8 PeekFunc(u64 address)
}
}
u32 unused = 0;
ares::Nintendo64::Thread unused;
return ares::Nintendo64::bus.read<ares::Nintendo64::Byte>(addr, unused);
}
@ -705,7 +704,7 @@ static void SysBusAccess(u8* buffer, u64 address, u64 count, bool write)
{
if (write)
{
u32 unused = 0;
ares::Nintendo64::Thread unused;
while (count--)
ares::Nintendo64::bus.write<ares::Nintendo64::Byte>(address++, *buffer++, unused);
}

View File

@ -43,7 +43,7 @@ using namespace nall::primitives;
namespace ares {
static const string Name = "ares";
static const string Version = "132";
static const string Version = "133";
static const string Copyright = "ares team, Near";
static const string License = "ISC";
static const string LicenseURI = "https://opensource.org/licenses/ISC";

View File

@ -2,12 +2,15 @@
namespace ares::Memory {
constexpr u32 fixedBufferSize = 8_MiB;
#if defined(PLATFORM_MACOS)
//stub for unsupported platforms
//dynamic allocation for unsupported platforms
FixedAllocator::FixedAllocator() {
_allocator.resize(fixedBufferSize, bump_allocator::executable);
}
#else
alignas(4096) u8 fixedBuffer[8_MiB];
alignas(4096) u8 fixedBuffer[fixedBufferSize];
FixedAllocator::FixedAllocator() {
_allocator.resize(sizeof(fixedBuffer), 0, fixedBuffer);

View File

@ -2,9 +2,15 @@ struct Rumble : Input {
DeclareClass(Rumble, "input.rumble")
using Input::Input;
auto enable() const -> bool { return _enable; }
auto setEnable(bool enable) -> void { _enable = enable; }
auto weakValue() const -> u16 { return _weak; }
auto strongValue() const -> u16 { return _strong; }
auto setValues(u16 weak, u16 strong) -> void { _weak = weak; _strong = strong; }
// For systems with binary motors
auto enable() const -> bool { return _weak > 0 || _strong > 0; }
auto setEnable(bool enable) -> void { _weak = enable ? 65535 : 0; _strong = enable ? 65535 : 0; }
protected:
bool _enable = 0;
u16 _weak = 0;
u16 _strong = 0;
};

View File

@ -25,10 +25,12 @@ auto AI::unload() -> void {
}
auto AI::main() -> void {
f64 left = 0, right = 0;
sample(left, right);
stream->frame(left, right);
step(dac.period);
while(Thread::clock < 0) {
f64 left = 0, right = 0;
sample(left, right);
stream->frame(left, right);
step(dac.period);
}
}
auto AI::sample(f64& left, f64& right) -> void {
@ -55,10 +57,6 @@ auto AI::sample(f64& left, f64& right) -> void {
}
}
auto AI::step(u32 clocks) -> void {
Thread::clock += clocks;
}
auto AI::power(bool reset) -> void {
Thread::reset();

View File

@ -19,12 +19,11 @@ struct AI : Thread, Memory::RCP<AI> {
auto unload() -> void;
auto main() -> void;
auto sample(f64& left, f64& right) -> void;
auto step(u32 clocks) -> void;
auto power(bool reset) -> void;
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;

View File

@ -1,4 +1,4 @@
auto AI::readWord(u32 address, u32& cycles) -> u32 {
auto AI::readWord(u32 address, Thread& thread) -> u32 {
address = (address & 0xfffff) >> 2;
n32 data;
@ -21,7 +21,7 @@ auto AI::readWord(u32 address, u32& cycles) -> u32 {
return data;
}
auto AI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
auto AI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
address = (address & 0xfffff) >> 2;
n32 data = data_;
@ -56,7 +56,7 @@ auto AI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
//AI_DACRATE
auto frequency = dac.frequency;
io.dacRate = data.bit(0,13);
dac.frequency = max(1, system.frequency() / 4 / (io.dacRate + 1)) * 1.037;
dac.frequency = max(1, system.videoFrequency() / (io.dacRate + 1));
dac.period = system.frequency() / dac.frequency;
if(frequency != dac.frequency) stream->setFrequency(dac.frequency);
}

View File

@ -51,7 +51,6 @@ auto CIC::cmdChallenge() -> void {
fifo.write(0); // write 0 bit
for (auto i : range(30)) fifo.writeNibble(data[i]);
state = Run;
printf("CIC challenge complete %d\n", fifo.size());
}
}

View File

@ -238,46 +238,62 @@ auto Gamepad::read() -> n32 {
platform->input(start);
#if false
//scale {-32768 ... +32767} to {-85 ... +85}
auto ax = x->value() * 85.0 / 32767.0;
auto ay = y->value() * 85.0 / 32767.0;
auto cardinalMax = 85.0;
auto diagonalMax = 69.0;
auto innerDeadzone = 7.0; // default should remain 7 (~8.2% of 85) as the deadzone is axial in nature and fights cardinalMax
auto outerDeadzoneRadiusMax = 2.0 / sqrt(2.0) * (diagonalMax / cardinalMax * (cardinalMax - innerDeadzone) + innerDeadzone); //from linear scaling equation, substitute outerDeadzoneRadiusMax*sqrt(2)/2 for lengthAbsoluteX and set diagonalMax as the result then solve for outerDeadzoneRadiusMax
//create inner axial dead-zone in range {-7 ... +7} and scale from it up to outer circular dead-zone of radius 85
//scale {-32768 ... +32767} to {-outerDeadzoneRadiusMax ... +outerDeadzoneRadiusMax}
auto ax = x->value() * outerDeadzoneRadiusMax / 32767.0;
auto ay = y->value() * outerDeadzoneRadiusMax / 32767.0;
//create inner axial dead-zone in range {-innerDeadzone ... +innerDeadzone} and scale from it up to outer circular dead-zone of radius outerDeadzoneRadiusMax
auto length = sqrt(ax * ax + ay * ay);
if(length <= 85.0) {
if(length <= outerDeadzoneRadiusMax) {
auto lengthAbsoluteX = abs(ax);
auto lengthAbsoluteY = abs(ay);
if(lengthAbsoluteX <= 7.0) {
if(lengthAbsoluteX <= innerDeadzone) {
lengthAbsoluteX = 0.0;
} else {
lengthAbsoluteX = (lengthAbsoluteX - 7.0) * 85.0 / (85.0 - 7.0) / lengthAbsoluteX;
lengthAbsoluteX = (lengthAbsoluteX - innerDeadzone) * cardinalMax / (cardinalMax - innerDeadzone) / lengthAbsoluteX;
}
ax *= lengthAbsoluteX;
if(lengthAbsoluteY <= 7.0) {
if(lengthAbsoluteY <= innerDeadzone) {
lengthAbsoluteY = 0.0;
} else {
lengthAbsoluteY = (lengthAbsoluteY - 7.0) * 85.0 / (85.0 - 7.0) / lengthAbsoluteY;
lengthAbsoluteY = (lengthAbsoluteY - innerDeadzone) * cardinalMax / (cardinalMax - innerDeadzone) / lengthAbsoluteY;
}
ay *= lengthAbsoluteY;
} else {
length = 85.0 / length;
length = outerDeadzoneRadiusMax / length;
ax *= length;
ay *= length;
}
//bound diagonals to an octagonal range {-69 ... +69}
//bound diagonals to an octagonal range {-diagonalMax ... +diagonalMax}
if(ax != 0.0 && ay != 0.0) {
auto slope = ay / ax;
auto edgex = copysign(85.0 / (abs(slope) + 16.0 / 69.0), ax);
auto edgey = copysign(min(abs(edgex * slope), 85.0 / (1.0 / abs(slope) + 16.0 / 69.0)), ay);
auto edgex = copysign(cardinalMax / (abs(slope) + (cardinalMax - diagonalMax) / diagonalMax), ax);
auto edgey = copysign(min(abs(edgex * slope), cardinalMax / (1.0 / abs(slope) + (cardinalMax - diagonalMax) / diagonalMax)), ay);
edgex = edgey / slope;
auto scale = sqrt(edgex * edgex + edgey * edgey) / 85.0;
ax *= scale;
ay *= scale;
length = sqrt(ax * ax + ay * ay);
auto distanceToEdge = sqrt(edgex * edgex + edgey * edgey);
if(length > distanceToEdge) {
ax = edgex;
ay = edgey;
}
}
#endif
//keep cardinal input within positive and negative bounds of cardinalMax
if(abs(ax) > cardinalMax) ax = copysign(cardinalMax, ax);
if(abs(ay) > cardinalMax) ay = copysign(cardinalMax, ay);
//add epsilon to counteract floating point precision error
ax = copysign(abs(ax) + 1e-09, ax);
ay = copysign(abs(ay) + 1e-09, ay);
#endif
n32 data;
data.byte(0) = y->value();
data.byte(1) = x->value();
@ -297,7 +313,7 @@ auto Gamepad::read() -> n32 {
data.bit(29) = z->value();
data.bit(30) = b->value();
data.bit(31) = a->value();
//when L+R+Start are pressed: the X/Y axes are zeroed, RST is set, and Start is cleared
if(l->value() && r->value() && start->value()) {
data.byte(0) = 0; //Y-Axis

View File

@ -30,16 +30,16 @@ auto CPU::unload() -> void {
}
auto CPU::main() -> void {
instruction();
synchronize();
}
while(!vi.refreshed) {
instruction();
synchronize();
}
auto CPU::step(u32 clocks) -> void {
Thread::clock += clocks;
vi.refreshed = false;
}
auto CPU::synchronize() -> void {
auto clocks = Thread::clock * 2;
auto clocks = Thread::clock;
Thread::clock = 0;
vi.clock -= clocks;
@ -47,11 +47,11 @@ auto CPU::synchronize() -> void {
rsp.clock -= clocks;
rdp.clock -= clocks;
pif.clock -= clocks;
while( vi.clock < 0) vi.main();
while( ai.clock < 0) ai.main();
while(rsp.clock < 0) rsp.main();
while(rdp.clock < 0) rdp.main();
while(pif.clock < 0) pif.main();
vi.main();
ai.main();
rsp.main();
rdp.main();
pif.main();
queue.step(clocks, [](u32 event) {
switch(event) {
@ -81,19 +81,30 @@ auto CPU::instruction() -> void {
if(auto interrupts = scc.cause.interruptPending & scc.status.interruptMask) {
if(scc.status.interruptEnable && !scc.status.exceptionLevel && !scc.status.errorLevel) {
debugger.interrupt(scc.cause.interruptPending);
step(1);
step(1 * 2);
return exception.interrupt();
}
}
if (scc.nmiPending) {
debugger.nmi();
step(1);
step(1 * 2);
return exception.nmi();
}
if constexpr(Accuracy::CPU::Recompiler) {
// Fast path: attempt to lookup previously compiled blocks with devirtualizeFast
// and fastFetchBlock, this skips exception handling, error checking, and
// code emitting pathways for maximum lookup performance.
// As memory writes cause recompiler block invalidation, this shouldn't be detectable.
if (auto address = devirtualizeFast(ipu.pc)) {
if(auto block = recompiler.fastFetchBlock(address)) {
block->execute(*this);
return;
}
}
if (auto address = devirtualize(ipu.pc)) {
auto block = recompiler.block(*address);
auto block = recompiler.block(ipu.pc, *address);
block->execute(*this);
}
}
@ -111,7 +122,8 @@ auto CPU::instruction() -> void {
auto CPU::instructionEpilogue() -> s32 {
if constexpr(Accuracy::CPU::Recompiler) {
icache.step(ipu.pc); //simulates timings without performing actual icache loads
//simulates timings without performing actual icache loads
icache.step(ipu.pc, devirtualizeFast(ipu.pc));
}
ipu.r[0].u64 = 0;
@ -156,7 +168,9 @@ auto CPU::power(bool reset) -> void {
if constexpr(Accuracy::CPU::Recompiler) {
auto buffer = ares::Memory::FixedAllocator::get().tryAcquire(4_MiB);
memory::jitprotect(false);
recompiler.allocator.resize(4_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer);
memory::jitprotect(true);
recompiler.reset();
}
}

View File

@ -33,7 +33,6 @@ struct CPU : Thread {
auto unload() -> void;
auto main() -> void;
auto step(u32 clocks) -> void;
auto synchronize() -> void;
auto instruction() -> void;
@ -106,27 +105,27 @@ struct CPU : Thread {
struct InstructionCache {
CPU& self;
struct Line;
auto line(u32 address) -> Line& { return lines[address >> 5 & 0x1ff]; }
auto line(u32 vaddr) -> Line& { return lines[vaddr >> 5 & 0x1ff]; }
//used by the recompiler to simulate instruction cache fetch timing
auto step(u32 address) -> void {
auto& line = this->line(address);
auto step(u32 vaddr, u32 address) -> void {
auto& line = this->line(vaddr);
if(!line.hit(address)) {
self.step(48);
self.step(48 * 2);
line.valid = 1;
line.tag = address & ~0x0000'0fff;
} else {
self.step(2);
self.step(1 * 2);
}
}
//used by the interpreter to fully emulate the instruction cache
auto fetch(u32 address, CPU& cpu) -> u32 {
auto& line = this->line(address);
auto fetch(u32 vaddr, u32 address, CPU& cpu) -> u32 {
auto& line = this->line(vaddr);
if(!line.hit(address)) {
line.fill(address, cpu);
} else {
cpu.step(2);
cpu.step(1 * 2);
}
return line.read(address);
}
@ -145,7 +144,7 @@ struct CPU : Thread {
struct Line {
auto hit(u32 address) const -> bool { return valid && tag == (address & ~0x0000'0fff); }
auto fill(u32 address, CPU& cpu) -> void {
cpu.step(48);
cpu.step(48 * 2);
valid = 1;
tag = address & ~0x0000'0fff;
words[0] = cpu.busRead<Word>(tag | index | 0x00);
@ -159,7 +158,7 @@ struct CPU : Thread {
}
auto writeBack(CPU& cpu) -> void {
cpu.step(48);
cpu.step(48 * 2);
cpu.busWrite<Word>(tag | index | 0x00, words[0]);
cpu.busWrite<Word>(tag | index | 0x04, words[1]);
cpu.busWrite<Word>(tag | index | 0x08, words[2]);
@ -182,9 +181,9 @@ struct CPU : Thread {
//dcache.cpp
struct DataCache {
struct Line;
auto line(u32 address) -> Line&;
template<u32 Size> auto read(u32 address) -> u64;
template<u32 Size> auto write(u32 address, u64 data) -> void;
auto line(u32 vaddr) -> Line&;
template<u32 Size> auto read(u32 vaddr, u32 address) -> u64;
template<u32 Size> auto write(u32 vaddr, u32 address, u64 data) -> void;
auto power(bool reset) -> void;
//8KB
@ -222,10 +221,6 @@ struct CPU : Thread {
u32 address;
};
//tlb.cpp
auto load(u64 vaddr) -> Match;
auto store(u64 vaddr) -> Match;
struct Entry {
//scc-tlb.cpp
auto synchronize() -> void;
@ -239,13 +234,49 @@ struct CPU : Thread {
n40 virtualAddress;
n8 addressSpaceID;
n2 region;
//internal:
//internal:
n1 globals;
n40 addressMaskHi;
n40 addressMaskLo;
n40 addressSelect;
} entry[TLB::Entries];
//tlb.cpp
auto load(u64 vaddr) -> Match;
auto load(u64 vaddr, const Entry& entry) -> Match;
auto loadFast(u64 vaddr) -> Match;
auto store(u64 vaddr) -> Match;
auto store(u64 vaddr, const Entry& entry) -> Match;
struct TlbCache { ;
static constexpr int entries = 4;
struct CachedTlbEntry {
const Entry *entry;
int frequency;
} entry[entries];
void insert(const Entry& entry) {
this->entry[refresh()].entry = &entry;
}
int refresh() {
CachedTlbEntry* leastUsed = &entry[0];
int index = 0;
for(auto n = 0; n < entries; n++) {
if(entry[n].frequency < leastUsed->frequency) {
index = n;
leastUsed = &entry[n];
}
}
leastUsed->entry = nullptr;
leastUsed->frequency = 0;
return index;
}
} tlbCache;
u32 physicalAddress;
} tlb{*this};
@ -260,11 +291,13 @@ struct CPU : Thread {
auto segment(u64 vaddr) -> Context::Segment;
auto devirtualize(u64 vaddr) -> maybe<u64>;
alwaysinline auto devirtualizeFast(u64 vaddr) -> u64;
auto fetch(u64 vaddr) -> maybe<u32>;
template<u32 Size> auto busWrite(u32 address, u64 data) -> void;
template<u32 Size> auto busRead(u32 address) -> u64;
template<u32 Size> auto read(u64 vaddr) -> maybe<u64>;
template<u32 Size> auto write(u64 vaddr, u64 data) -> bool;
template<u32 Size> auto write(u64 vaddr, u64 data, bool alignedError=true) -> bool;
template<u32 Size> auto vaddrAlignedError(u64 vaddr, bool write) -> bool;
auto addressException(u64 vaddr) -> void;
@ -861,9 +894,10 @@ struct CPU : Thread {
}
auto pool(u32 address) -> Pool*;
auto block(u32 address) -> Block*;
auto block(u32 vaddr, u32 address) -> Block*;
auto fastFetchBlock(u32 address) -> Block*;
auto emit(u32 address) -> Block*;
auto emit(u32 vaddr, u32 address) -> Block*;
auto emitEXECUTE(u32 instruction) -> bool;
auto emitSPECIAL(u32 instruction) -> bool;
auto emitREGIMM(u32 instruction) -> bool;
@ -906,6 +940,11 @@ struct CPU : Thread {
u32 address;
u32 instruction;
} disassembler{*this};
struct DevirtualizeCache {
uint64_t vbase;
uint64_t pbase;
} devirtualizeCache;
};
extern CPU cpu;

View File

@ -3,7 +3,7 @@ auto CPU::DataCache::Line::hit(u32 address) const -> bool {
}
template<u32 Size> auto CPU::DataCache::Line::fill(u32 address, u64 data) -> void {
cpu.step(40);
cpu.step(40 * 2);
valid = 1;
dirty = 1;
tag = address & ~0x0000'0fff;
@ -31,7 +31,7 @@ template<u32 Size> auto CPU::DataCache::Line::fill(u32 address, u64 data) -> voi
}
auto CPU::DataCache::Line::fill(u32 address) -> void {
cpu.step(40);
cpu.step(40 * 2);
valid = 1;
dirty = 0;
tag = address & ~0x0000'0fff;
@ -53,7 +53,7 @@ auto CPU::DataCache::Line::fill(u32 address) -> void {
}
auto CPU::DataCache::Line::writeBack() -> void {
cpu.step(40);
cpu.step(40 * 2);
dirty = 0;
cpu.busWrite<Word>(tag | index | 0x0, words[0]);
cpu.busWrite<Word>(tag | index | 0x4, words[1]);
@ -61,8 +61,8 @@ auto CPU::DataCache::Line::writeBack() -> void {
cpu.busWrite<Word>(tag | index | 0xc, words[3]);
}
auto CPU::DataCache::line(u32 address) -> Line& {
return lines[address >> 4 & 0x1ff];
auto CPU::DataCache::line(u32 vaddr) -> Line& {
return lines[vaddr >> 4 & 0x1ff];
}
template<u32 Size>
@ -90,25 +90,25 @@ auto CPU::DataCache::Line::write(u32 address, u64 data) -> void {
}
template<u32 Size>
auto CPU::DataCache::read(u32 address) -> u64 {
auto& line = this->line(address);
auto CPU::DataCache::read(u32 vaddr, u32 address) -> u64 {
auto& line = this->line(vaddr);
if(!line.hit(address)) {
if(line.valid && line.dirty) line.writeBack();
line.fill(address);
} else {
cpu.step(1);
cpu.step(1 * 2);
}
return line.read<Size>(address);
}
template<u32 Size>
auto CPU::DataCache::write(u32 address, u64 data) -> void {
auto& line = this->line(address);
auto CPU::DataCache::write(u32 vaddr, u32 address, u64 data) -> void {
auto& line = this->line(vaddr);
if(!line.hit(address)) {
if(line.valid && line.dirty) line.writeBack();
return line.fill<Size>(address, data);
} else {
cpu.step(1);
cpu.step(1 * 2);
}
line.write<Size>(address, data);
}

View File

@ -421,6 +421,7 @@ auto CPU::FADD_S(u8 fd, u8 fs, u8 ft) -> void {
CHECK_FPE(f32, ffd, FS(f32) + FT(f32));
if(!fpuCheckOutput(ffd)) return;
FD(f32) = ffd;
step((3 - 1) * 2);
}
auto CPU::FADD_D(u8 fd, u8 fs, u8 ft) -> void {
@ -431,6 +432,7 @@ auto CPU::FADD_D(u8 fd, u8 fs, u8 ft) -> void {
CHECK_FPE(f64, ffd, ffs + fft);
if(!fpuCheckOutput(ffd)) return;
FD(f64) = ffd;
step((3 - 1) * 2);
}
auto CPU::FCEIL_L_S(u8 fd, u8 fs) -> void {
@ -439,6 +441,7 @@ auto CPU::FCEIL_L_S(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s64>(ffs)) return;
CHECK_FPE(s64, ffd, roundCeil<s64>(ffs));
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCEIL_L_D(u8 fd, u8 fs) -> void {
@ -447,6 +450,7 @@ auto CPU::FCEIL_L_D(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s64>(ffs)) return;
CHECK_FPE(s64, ffd, roundCeil<s64>(ffs));
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCEIL_W_S(u8 fd, u8 fs) -> void {
@ -455,6 +459,7 @@ auto CPU::FCEIL_W_S(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s32>(ffs)) return;
CHECK_FPE_CONV(s32, ffd, roundCeil<s32>(ffs));
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCEIL_W_D(u8 fd, u8 fs) -> void {
@ -463,6 +468,7 @@ auto CPU::FCEIL_W_D(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s32>(ffs)) return;
CHECK_FPE_CONV(s32, ffd, roundCeil<s32>(ffs));
FD(s32) = ffd;
step((5 - 1) * 2);
}
#define XORDERED(type, value, quiet) \
@ -652,6 +658,7 @@ auto CPU::FCVT_S_D(u8 fd, u8 fs) -> void {
CHECK_FPE(f32, ffd, (f32)ffs);
if(!fpuCheckOutput(ffd)) return;
FD(f32) = ffd;
step((2 - 1) * 2);
}
auto CPU::FCVT_S_W(u8 fd, u8 fs) -> void {
@ -660,6 +667,7 @@ auto CPU::FCVT_S_W(u8 fd, u8 fs) -> void {
CHECK_FPE(f32, ffd, ffs);
if(!fpuCheckOutput(ffd)) return;
FD(f32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCVT_S_L(u8 fd, u8 fs) -> void {
@ -672,6 +680,7 @@ auto CPU::FCVT_S_L(u8 fd, u8 fs) -> void {
CHECK_FPE(f32, ffd, (f32)ffs);
if(!fpuCheckOutput(ffd)) return;
FD(f32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCVT_D_S(u8 fd, u8 fs) -> void {
@ -694,6 +703,7 @@ auto CPU::FCVT_D_W(u8 fd, u8 fs) -> void {
CHECK_FPE(f64, ffd, (f64)ffs);
if(!fpuCheckOutput(ffd)) return;
FD(f64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCVT_D_L(u8 fd, u8 fs) -> void {
@ -706,6 +716,7 @@ auto CPU::FCVT_D_L(u8 fd, u8 fs) -> void {
CHECK_FPE(f64, ffd, (f64)ffs);
if(!fpuCheckOutput(ffd)) return;
FD(f64) = ffs;
step((5 - 1) * 2);
}
auto CPU::FCVT_L_S(u8 fd, u8 fs) -> void {
@ -714,6 +725,7 @@ auto CPU::FCVT_L_S(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s64>(ffs)) return;
CHECK_FPE(s64, ffd, roundCurrent<s64>(ffs));
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCVT_L_D(u8 fd, u8 fs) -> void {
@ -722,6 +734,7 @@ auto CPU::FCVT_L_D(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s64>(ffs)) return;
CHECK_FPE(s64, ffd, roundCurrent<s64>(ffs));
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCVT_W_S(u8 fd, u8 fs) -> void {
@ -730,6 +743,7 @@ auto CPU::FCVT_W_S(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s32>(ffs)) return;
CHECK_FPE_CONV(s32, ffd, roundCurrent<s32>(ffs));
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FCVT_W_D(u8 fd, u8 fs) -> void {
@ -738,6 +752,7 @@ auto CPU::FCVT_W_D(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s32>(ffs)) return;
CHECK_FPE_CONV(s32, ffd, roundCurrent<s32>(ffs));
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FDIV_S(u8 fd, u8 fs, u8 ft) -> void {
@ -748,6 +763,7 @@ auto CPU::FDIV_S(u8 fd, u8 fs, u8 ft) -> void {
CHECK_FPE(f32, ffd, ffs / fft);
if(!fpuCheckOutput(ffd)) return;
FD(f32) = ffd;
step((29 - 1) * 2);
}
auto CPU::FDIV_D(u8 fd, u8 fs, u8 ft) -> void {
@ -758,6 +774,7 @@ auto CPU::FDIV_D(u8 fd, u8 fs, u8 ft) -> void {
CHECK_FPE(f64, ffd, ffs / fft);
if(!fpuCheckOutput(ffd)) return;
FD(f64) = ffd;
step((58 - 1) * 2);
}
auto CPU::FFLOOR_L_S(u8 fd, u8 fs) -> void {
@ -766,6 +783,7 @@ auto CPU::FFLOOR_L_S(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s64>(ffs)) return;
CHECK_FPE(s64, ffd, roundFloor<s64>(ffs));
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FFLOOR_L_D(u8 fd, u8 fs) -> void {
@ -774,6 +792,7 @@ auto CPU::FFLOOR_L_D(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s64>(ffs)) return;
CHECK_FPE(s64, ffd, roundFloor<s64>(ffs));
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FFLOOR_W_S(u8 fd, u8 fs) -> void {
@ -782,6 +801,7 @@ auto CPU::FFLOOR_W_S(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s32>(ffs)) return;
CHECK_FPE_CONV(s32, ffd, roundFloor<s32>(ffs));
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FFLOOR_W_D(u8 fd, u8 fs) -> void {
@ -790,6 +810,7 @@ auto CPU::FFLOOR_W_D(u8 fd, u8 fs) -> void {
if(!fpuCheckInputConv<s32>(ffs)) return;
CHECK_FPE_CONV(s32, ffd, roundFloor<s32>(ffs));
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FMOV_S(u8 fd, u8 fs) -> void {
@ -810,6 +831,7 @@ auto CPU::FMUL_S(u8 fd, u8 fs, u8 ft) -> void {
CHECK_FPE(f32, ffd, ffs * fft);
if(!fpuCheckOutput(ffd)) return;
FD(f32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FMUL_D(u8 fd, u8 fs, u8 ft) -> void {
@ -820,6 +842,7 @@ auto CPU::FMUL_D(u8 fd, u8 fs, u8 ft) -> void {
CHECK_FPE(f64, ffd, ffs * fft);
if(!fpuCheckOutput(ffd)) return;
FD(f64) = ffd;
step((8 - 1) * 2);
}
auto CPU::FNEG_S(u8 fd, u8 fs) -> void {
@ -847,6 +870,7 @@ auto CPU::FROUND_L_S(u8 fd, u8 fs) -> void {
CHECK_FPE(s64, ffd, roundNearest<s64>(ffs));
if(ffd != ffs && fpeInexact()) return exception.floatingPoint();
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FROUND_L_D(u8 fd, u8 fs) -> void {
@ -856,6 +880,7 @@ auto CPU::FROUND_L_D(u8 fd, u8 fs) -> void {
CHECK_FPE(s64, ffd, roundNearest<s64>(ffs));
if(ffd != ffs && fpeInexact()) return exception.floatingPoint();
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FROUND_W_S(u8 fd, u8 fs) -> void {
@ -865,6 +890,7 @@ auto CPU::FROUND_W_S(u8 fd, u8 fs) -> void {
CHECK_FPE_CONV(s32, ffd, roundNearest<s32>(ffs));
if(ffd != ffs && fpeInexact()) return exception.floatingPoint();
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FROUND_W_D(u8 fd, u8 fs) -> void {
@ -874,6 +900,7 @@ auto CPU::FROUND_W_D(u8 fd, u8 fs) -> void {
CHECK_FPE_CONV(s32, ffd, roundNearest<s32>(ffs));
if(ffd != ffs && fpeInexact()) return exception.floatingPoint();
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FSQRT_S(u8 fd, u8 fs) -> void {
@ -883,6 +910,7 @@ auto CPU::FSQRT_S(u8 fd, u8 fs) -> void {
CHECK_FPE(f32, ffd, squareRoot(ffs));
if(!fpuCheckOutput(ffd)) return;
FD(f32) = ffd;
step((29 - 1) * 2);
}
auto CPU::FSQRT_D(u8 fd, u8 fs) -> void {
@ -892,6 +920,7 @@ auto CPU::FSQRT_D(u8 fd, u8 fs) -> void {
CHECK_FPE(f64, ffd, squareRoot(ffs));
if(!fpuCheckOutput(ffd)) return;
FD(f64) = ffd;
step((58 - 1) * 2);
}
auto CPU::FSUB_S(u8 fd, u8 fs, u8 ft) -> void {
@ -902,6 +931,7 @@ auto CPU::FSUB_S(u8 fd, u8 fs, u8 ft) -> void {
CHECK_FPE(f32, ffd, ffs - fft);
if(!fpuCheckOutput(ffd)) return;
FD(f32) = ffd;
step((3 - 1) * 2);
}
auto CPU::FSUB_D(u8 fd, u8 fs, u8 ft) -> void {
@ -912,6 +942,7 @@ auto CPU::FSUB_D(u8 fd, u8 fs, u8 ft) -> void {
CHECK_FPE(f64, ffd, ffs - fft);
if(!fpuCheckOutput(ffd)) return;
FD(f64) = ffd;
step((3 - 1) * 2);
}
auto CPU::FTRUNC_L_S(u8 fd, u8 fs) -> void {
@ -921,6 +952,7 @@ auto CPU::FTRUNC_L_S(u8 fd, u8 fs) -> void {
CHECK_FPE(s64, ffd, roundTrunc<s64>(ffs));
if((f32)ffd != ffs && fpeInexact()) return exception.floatingPoint();
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FTRUNC_L_D(u8 fd, u8 fs) -> void {
@ -930,6 +962,7 @@ auto CPU::FTRUNC_L_D(u8 fd, u8 fs) -> void {
CHECK_FPE(s64, ffd, roundTrunc<s64>(ffs));
if((f64)ffd != ffs && fpeInexact()) return exception.floatingPoint();
FD(s64) = ffd;
step((5 - 1) * 2);
}
auto CPU::FTRUNC_W_S(u8 fd, u8 fs) -> void {
@ -939,6 +972,7 @@ auto CPU::FTRUNC_W_S(u8 fd, u8 fs) -> void {
CHECK_FPE_CONV(s32, ffd, roundTrunc<s32>(ffs));
if((f32)ffd != ffs && fpeInexact()) return exception.floatingPoint();
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::FTRUNC_W_D(u8 fd, u8 fs) -> void {
@ -948,6 +982,7 @@ auto CPU::FTRUNC_W_D(u8 fd, u8 fs) -> void {
CHECK_FPE_CONV(s32, ffd, roundTrunc<s32>(ffs));
if((f64)ffd != ffs && fpeInexact()) return exception.floatingPoint();
FD(s32) = ffd;
step((5 - 1) * 2);
}
auto CPU::LDC1(u8 ft, cr64& rs, s16 imm) -> void {

View File

@ -269,7 +269,7 @@ auto CPU::DDIV(cr64& rs, cr64& rt) -> void {
LO.u64 = rs.s64 < 0 ? +1 : -1;
HI.u64 = rs.s64;
}
step(69);
step((69 - 1) * 2);
}
auto CPU::DDIVU(cr64& rs, cr64& rt) -> void {
@ -281,7 +281,7 @@ auto CPU::DDIVU(cr64& rs, cr64& rt) -> void {
LO.u64 = -1;
HI.u64 = rs.u64;
}
step(69);
step((69 - 1) * 2);
}
auto CPU::DIV(cr64& rs, cr64& rt) -> void {
@ -294,7 +294,7 @@ auto CPU::DIV(cr64& rs, cr64& rt) -> void {
LO.u64 = rs.s32 < 0 ? +1 : -1;
HI.u64 = rs.s32;
}
step(37);
step((37 - 1) * 2);
}
auto CPU::DIVU(cr64& rs, cr64& rt) -> void {
@ -306,7 +306,7 @@ auto CPU::DIVU(cr64& rs, cr64& rt) -> void {
LO.u64 = -1;
HI.u64 = rs.s32;
}
step(37);
step((37 - 1) * 2);
}
auto CPU::DMULT(cr64& rs, cr64& rt) -> void {
@ -329,7 +329,7 @@ auto CPU::DMULT(cr64& rs, cr64& rt) -> void {
LO.u64 = result >> 0;
HI.u64 = result >> 64;
#endif
step(8);
step((8 - 1) * 2);
}
auto CPU::DMULTU(cr64& rs, cr64& rt) -> void {
@ -346,7 +346,7 @@ auto CPU::DMULTU(cr64& rs, cr64& rt) -> void {
LO.u64 = result >> 0;
HI.u64 = result >> 64;
#endif
step(8);
step((8 - 1) * 2);
}
auto CPU::DSLL(r64& rd, cr64& rt, u8 sa) -> void {
@ -786,14 +786,14 @@ auto CPU::MULT(cr64& rs, cr64& rt) -> void {
u64 result = s64(rs.s32) * s64(rt.s32);
LO.u64 = s32(result >> 0);
HI.u64 = s32(result >> 32);
step(5);
step((5 - 1) * 2);
}
auto CPU::MULTU(cr64& rs, cr64& rt) -> void {
u64 result = u64(rs.u32) * u64(rt.u32);
LO.u64 = s32(result >> 0);
HI.u64 = s32(result >> 32);
step(5);
step((5 - 1) * 2);
}
auto CPU::NOR(r64& rd, cr64& rs, cr64& rt) -> void {
@ -1061,17 +1061,17 @@ auto CPU::SWL(cr64& rt, cr64& rs, s16 imm) -> void {
if(context.bigEndian())
switch(vaddr & 3) {
case 0:
if(!write<Word>(vaddr & ~3 | 0, data >> 0)) return;
if(!write<Word>(vaddr + 0, data >> 0)) return;
break;
case 1:
if(!write<Byte>(vaddr & ~3 | 1, data >> 24)) return;
if(!write<Half>(vaddr & ~3 | 2, data >> 8)) return;
if(!write<Byte>(vaddr + 0, data >> 24)) return;
if(!write<Half>(vaddr + 1, data >> 8)) return;
break;
case 2:
if(!write<Half>(vaddr & ~3 | 2, data >> 16)) return;
if(!write<Half>(vaddr + 0, data >> 16)) return;
break;
case 3:
if(!write<Byte>(vaddr & ~3 | 3, data >> 24)) return;
if(!write<Byte>(vaddr + 0, data >> 24)) return;
break;
}
}
@ -1100,17 +1100,17 @@ auto CPU::SWR(cr64& rt, cr64& rs, s16 imm) -> void {
if(context.bigEndian())
switch(vaddr & 3) {
case 0:
if(!write<Byte>(vaddr & ~3 | 0, data >> 0)) return;
if(!write<Byte>(vaddr + 0, data >> 0, false)) return;
break;
case 1:
if(!write<Half>(vaddr & ~3 | 0, data >> 0)) return;
if(!write<Half>(vaddr + 0, data >> 0, false)) return;
break;
case 2:
if(!write<Half>(vaddr & ~3 | 0, data >> 8)) return;
if(!write<Byte>(vaddr & ~3 | 2, data >> 0)) return;
if(!write<Byte>(vaddr + 0, data >> 0, false)) return;
if(!write<Half>(vaddr - 2, data >> 8, false)) return;
break;
case 3:
if(!write<Word>(vaddr & ~3 | 0, data >> 0)) return;
if(!write<Word>(vaddr + 0, data >> 0, false)) return;
break;
}
}

View File

@ -347,6 +347,7 @@ auto CPU::TLBWI() -> void {
if(!scc.status.enable.coprocessor0) return exception.coprocessor0();
}
if(scc.index.tlbEntry >= TLB::Entries) return;
devirtualizeCache = {};
tlb.entry[scc.index.tlbEntry] = scc.tlb;
tlb.entry[scc.index.tlbEntry].synchronize();
debugger.tlbWrite(scc.index.tlbEntry);
@ -358,6 +359,7 @@ auto CPU::TLBWR() -> void {
}
u8 index = getControlRandom();
if(index >= TLB::Entries) return;
devirtualizeCache = {};
tlb.entry[index] = scc.tlb;
tlb.entry[index].synchronize();
debugger.tlbWrite(index);

View File

@ -109,46 +109,73 @@ auto CPU::devirtualize(u64 vaddr) -> maybe<u64> {
unreachable;
}
// Fast(er) version of devirtualize for icache lookups
// avoids handling unmapped regions/exceptions as these should have already
// been handled by instruction fetch, also ignores tlb match failure
auto CPU::devirtualizeFast(u64 vaddr) -> u64 {
// Assume address space is mapped into pages that are 4kb in size
// If we have a cached physical address for this page, use it
// This cache is purged on any writes to the TLB so should never become stale
auto vbase = vaddr >> 12;
if(devirtualizeCache.vbase == vbase && devirtualizeCache.pbase) {
auto offset = vaddr & 0xfff;
return (devirtualizeCache.pbase & ~0xfff) + offset;
}
// Cache the physical address of this page for the next call
devirtualizeCache.vbase = vaddr >> 12;
switch(segment(vaddr)) {
case Context::Segment::Mapped: {
auto match = tlb.loadFast(vaddr);
return devirtualizeCache.pbase = match.address & context.physMask;
}
case Context::Segment::Cached:
case Context::Segment::Direct:
return devirtualizeCache.pbase = vaddr & 0x1fff'ffff;
case Context::Segment::Cached32:
case Context::Segment::Direct32:
return devirtualizeCache.pbase = vaddr & 0xffff'ffff;
}
return devirtualizeCache.pbase = 0;
}
template<u32 Size>
inline auto CPU::busWrite(u32 address, u64 data) -> void {
u32 cycles = 0;
bus.write<Size>(address, data, cycles);
step(cycles);
bus.write<Size>(address, data, *this);
}
template<u32 Size>
inline auto CPU::busRead(u32 address) -> u64 {
u32 cycles = 0; u64 data;
data = bus.read<Size>(address, cycles);
return step(cycles), data;
return bus.read<Size>(address, *this);
}
auto CPU::fetch(u64 vaddr) -> maybe<u32> {
if(vaddrAlignedError<Word>(vaddr, false)) return nothing;
switch(segment(vaddr)) {
case Context::Segment::Unused:
step(1);
step(1 * 2);
addressException(vaddr);
exception.addressLoad();
return nothing;
case Context::Segment::Mapped:
if(auto match = tlb.load(vaddr)) {
if(match.cache) return icache.fetch(match.address & context.physMask, cpu);
step(1);
if(match.cache) return icache.fetch(vaddr, match.address & context.physMask, cpu);
step(1 * 2);
return busRead<Word>(match.address & context.physMask);
}
step(1);
step(1 * 2);
addressException(vaddr);
return nothing;
case Context::Segment::Cached:
return icache.fetch(vaddr & 0x1fff'ffff, cpu);
return icache.fetch(vaddr, vaddr & 0x1fff'ffff, cpu);
case Context::Segment::Cached32:
return icache.fetch(vaddr & 0xffff'ffff, cpu);
return icache.fetch(vaddr, vaddr & 0xffff'ffff, cpu);
case Context::Segment::Direct:
step(1);
step(1 * 2);
return busRead<Word>(vaddr & 0x1fff'ffff);
case Context::Segment::Direct32:
step(1);
step(1 * 2);
return busRead<Word>(vaddr & 0xffff'ffff);
}
@ -160,28 +187,28 @@ auto CPU::read(u64 vaddr) -> maybe<u64> {
if(vaddrAlignedError<Size>(vaddr, false)) return nothing;
switch(segment(vaddr)) {
case Context::Segment::Unused:
step(1);
step(1 * 2);
addressException(vaddr);
exception.addressLoad();
return nothing;
case Context::Segment::Mapped:
if(auto match = tlb.load(vaddr)) {
if(match.cache) return dcache.read<Size>(match.address & context.physMask);
step(1);
if(match.cache) return dcache.read<Size>(vaddr, match.address & context.physMask);
step(1 * 2);
return busRead<Size>(match.address & context.physMask);
}
step(1);
step(1 * 2);
addressException(vaddr);
return nothing;
case Context::Segment::Cached:
return dcache.read<Size>(vaddr & 0x1fff'ffff);
return dcache.read<Size>(vaddr, vaddr & 0x1fff'ffff);
case Context::Segment::Cached32:
return dcache.read<Size>(vaddr & 0xffff'ffff);
return dcache.read<Size>(vaddr, vaddr & 0xffff'ffff);
case Context::Segment::Direct:
step(1);
step(1 * 2);
return busRead<Size>(vaddr & 0x1fff'ffff);
case Context::Segment::Direct32:
step(1);
step(1 * 2);
return busRead<Size>(vaddr & 0xffff'ffff);
}
@ -189,32 +216,33 @@ auto CPU::read(u64 vaddr) -> maybe<u64> {
}
template<u32 Size>
auto CPU::write(u64 vaddr, u64 data) -> bool {
if(vaddrAlignedError<Size>(vaddr, true)) return false;
auto CPU::write(u64 vaddr0, u64 data, bool alignedError) -> bool {
if(alignedError && vaddrAlignedError<Size>(vaddr0, true)) return false;
u64 vaddr = vaddr0 & ~((u64)Size - 1);
switch(segment(vaddr)) {
case Context::Segment::Unused:
step(1);
addressException(vaddr);
step(1 * 2);
addressException(vaddr0);
exception.addressStore();
return false;
case Context::Segment::Mapped:
if(auto match = tlb.store(vaddr)) {
if(match.cache) return dcache.write<Size>(match.address & context.physMask, data), true;
step(1);
if(match.cache) return dcache.write<Size>(vaddr, match.address & context.physMask, data), true;
step(1 * 2);
return busWrite<Size>(match.address & context.physMask, data), true;
}
step(1);
addressException(vaddr);
step(1 * 2);
addressException(vaddr0);
return false;
case Context::Segment::Cached:
return dcache.write<Size>(vaddr & 0x1fff'ffff, data), true;
return dcache.write<Size>(vaddr, vaddr & 0x1fff'ffff, data), true;
case Context::Segment::Cached32:
return dcache.write<Size>(vaddr & 0xffff'ffff, data), true;
return dcache.write<Size>(vaddr, vaddr & 0xffff'ffff, data), true;
case Context::Segment::Direct:
step(1);
step(1 * 2);
return busWrite<Size>(vaddr & 0x1fff'ffff, data), true;
case Context::Segment::Direct32:
step(1);
step(1 * 2);
return busWrite<Size>(vaddr & 0xffff'ffff, data), true;
}
@ -225,14 +253,14 @@ template<u32 Size>
auto CPU::vaddrAlignedError(u64 vaddr, bool write) -> bool {
if constexpr(Accuracy::CPU::AddressErrors) {
if(unlikely(vaddr & Size - 1)) {
step(1);
step(1 * 2);
addressException(vaddr);
if(write) exception.addressStore();
else exception.addressLoad();
return true;
}
if (context.bits == 32 && unlikely((s32)vaddr != vaddr)) {
step(1);
step(1 * 2);
addressException(vaddr);
if(write) exception.addressStore();
else exception.addressLoad();

View File

@ -4,15 +4,21 @@ auto CPU::Recompiler::pool(u32 address) -> Pool* {
return pool;
}
auto CPU::Recompiler::block(u32 address) -> Block* {
auto CPU::Recompiler::block(u32 vaddr, u32 address) -> Block* {
if(auto block = pool(address)->blocks[address >> 2 & 0x3f]) return block;
auto block = emit(address);
auto block = emit(vaddr, address);
pool(address)->blocks[address >> 2 & 0x3f] = block;
memory::jitprotect(true);
return block;
}
auto CPU::Recompiler::emit(u32 address) -> Block* {
auto CPU::Recompiler::fastFetchBlock(u32 address) -> Block* {
auto& pool = pools[address >> 8 & 0x1fffff];
if(pool) return pool->blocks[address >> 2 & 0x3f];
return nullptr;
}
auto CPU::Recompiler::emit(u32 vaddr, u32 address) -> Block* {
if(unlikely(allocator.available() < 1_MiB)) {
print("CPU allocator flush\n");
memory::jitprotect(false);
@ -24,17 +30,19 @@ auto CPU::Recompiler::emit(u32 address) -> Block* {
auto block = (Block*)allocator.acquire(sizeof(Block));
beginFunction(3);
u32 memCycles;
Thread thread;
bool hasBranched = 0;
while(true) {
u32 instruction = bus.read<Word>(address, memCycles);
u32 instruction = bus.read<Word>(address, thread);
bool branched = emitEXECUTE(instruction);
if(unlikely(instruction == 0x1000'ffff)) {
if(unlikely(instruction == 0x1000'ffff //beq 0,0,<pc>
|| instruction == (2 << 26 | vaddr >> 2 & 0x3ff'ffff))) { //j <pc>
//accelerate idle loops
mov32(reg(1), imm(64));
mov32(reg(1), imm(64 * 2));
call(&CPU::step);
}
call(&CPU::instructionEpilogue);
vaddr += 4;
address += 4;
if(hasBranched || (address & 0xfc) == 0) break; //block boundary
hasBranched = branched;

View File

@ -1,48 +1,104 @@
auto CPU::TLB::load(u64 vaddr, const Entry& entry) -> Match {
bool lo = vaddr & entry.addressSelect;
if(!entry.valid[lo]) {
self.addressException(vaddr);
self.debugger.tlbLoadInvalid(vaddr);
self.exception.tlbLoadInvalid();
return {false};
}
physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo);
self.debugger.tlbLoad(vaddr, physicalAddress);
return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress};
}
auto CPU::TLB::load(u64 vaddr) -> Match {
for(auto& entry : this->tlbCache.entry) {
if(!entry.entry) continue;
if(!entry.entry->globals && entry.entry->addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((vaddr & entry.entry->addressMaskHi) != entry.entry->virtualAddress) continue;
if(vaddr >> 62 != entry.entry->region) continue;
if(auto match = load(vaddr, *entry.entry)) {
entry.frequency++;
return match;
}
}
for(auto& entry : this->entry) {
if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue;
if(vaddr >> 62 != entry.region) continue;
bool lo = vaddr & entry.addressSelect;
if(!entry.valid[lo]) {
self.addressException(vaddr);
self.debugger.tlbLoadInvalid(vaddr);
self.exception.tlbLoadInvalid();
return {false};
if(auto match = load(vaddr, entry)) {
this->tlbCache.insert(entry);
return match;
}
physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo);
self.debugger.tlbLoad(vaddr, physicalAddress);
return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress};
}
self.addressException(vaddr);
self.debugger.tlbLoadMiss(vaddr);
self.exception.tlbLoadMiss();
return {false};
}
auto CPU::TLB::store(u64 vaddr) -> Match {
// Fast(er) version of load for recompiler icache lookups
// avoids exceptions/debug checks
auto CPU::TLB::loadFast(u64 vaddr) -> Match {
for(auto& entry : this->entry) {
if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue;
if(vaddr >> 62 != entry.region) continue;
bool lo = vaddr & entry.addressSelect;
if(!entry.valid[lo]) {
self.addressException(vaddr);
self.debugger.tlbStoreInvalid(vaddr);
self.exception.tlbStoreInvalid();
return {false};
}
if(!entry.dirty[lo]) {
self.addressException(vaddr);
self.debugger.tlbModification(vaddr);
self.exception.tlbModification();
return {false};
}
if(!entry.valid[lo]) return { false, 0, 0 };
physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo);
self.debugger.tlbStore(vaddr, physicalAddress);
return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress};
}
return {false, 0, 0};
}
auto CPU::TLB::store(u64 vaddr, const Entry& entry) -> Match {
bool lo = vaddr & entry.addressSelect;
if(!entry.valid[lo]) {
self.addressException(vaddr);
self.debugger.tlbStoreInvalid(vaddr);
self.exception.tlbStoreInvalid();
return {false};
}
if(!entry.dirty[lo]) {
self.addressException(vaddr);
self.debugger.tlbModification(vaddr);
self.exception.tlbModification();
return {false};
}
physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo);
self.debugger.tlbStore(vaddr, physicalAddress);
return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress};
}
auto CPU::TLB::store(u64 vaddr) -> Match {
for(auto& entry : this->tlbCache.entry) {
if(!entry.entry) continue;
if(!entry.entry->globals && entry.entry->addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((vaddr & entry.entry->addressMaskHi) != entry.entry->virtualAddress) continue;
if(vaddr >> 62 != entry.entry->region) continue;
if(auto match = store(vaddr, *entry.entry)) {
entry.frequency++;
return match;
}
}
for(auto& entry : this->entry) {
if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue;
if(vaddr >> 62 != entry.region) continue;
if(auto match = store(vaddr, entry)) {
this->tlbCache.insert(entry);
return match;
}
}
self.addressException(vaddr);
self.debugger.tlbStoreMiss(vaddr);
self.exception.tlbStoreMiss();

View File

@ -1,30 +1,30 @@
template<u32 Size>
inline auto Bus::read(u32 address, u32& cycles) -> u64 {
inline auto Bus::read(u32 address, Thread& thread) -> u64 {
static constexpr u64 unmapped = 0;
address &= 0x1fff'ffff - (Size - 1);
if(address <= 0x007f'ffff) return rdram.ram.read<Size>(address);
if(address <= 0x03ef'ffff) return unmapped;
if(address <= 0x03ff'ffff) return rdram.read<Size>(address, cycles);
if(address <= 0x0407'ffff) return rsp.read<Size>(address, cycles);
if(address <= 0x040f'ffff) return rsp.status.read<Size>(address, cycles);
if(address <= 0x041f'ffff) return rdp.read<Size>(address, cycles);
if(address <= 0x042f'ffff) return rdp.io.read<Size>(address, cycles);
if(address <= 0x043f'ffff) return mi.read<Size>(address, cycles);
if(address <= 0x044f'ffff) return vi.read<Size>(address, cycles);
if(address <= 0x045f'ffff) return ai.read<Size>(address, cycles);
if(address <= 0x046f'ffff) return pi.read<Size>(address, cycles);
if(address <= 0x047f'ffff) return ri.read<Size>(address, cycles);
if(address <= 0x048f'ffff) return si.read<Size>(address, cycles);
if(address <= 0x03ff'ffff) return rdram.read<Size>(address, thread);
if(address <= 0x0407'ffff) return rsp.read<Size>(address, thread);
if(address <= 0x040f'ffff) return rsp.status.read<Size>(address, thread);
if(address <= 0x041f'ffff) return rdp.read<Size>(address, thread);
if(address <= 0x042f'ffff) return rdp.io.read<Size>(address, thread);
if(address <= 0x043f'ffff) return mi.read<Size>(address, thread);
if(address <= 0x044f'ffff) return vi.read<Size>(address, thread);
if(address <= 0x045f'ffff) return ai.read<Size>(address, thread);
if(address <= 0x046f'ffff) return pi.read<Size>(address, thread);
if(address <= 0x047f'ffff) return ri.read<Size>(address, thread);
if(address <= 0x048f'ffff) return si.read<Size>(address, thread);
if(address <= 0x04ff'ffff) return unmapped;
if(address <= 0x1fbf'ffff) return pi.read<Size>(address, cycles);
if(address <= 0x1fcf'ffff) return si.read<Size>(address, cycles);
if(address <= 0x7fff'ffff) return pi.read<Size>(address, cycles);
if(address <= 0x1fbf'ffff) return pi.read<Size>(address, thread);
if(address <= 0x1fcf'ffff) return si.read<Size>(address, thread);
if(address <= 0x7fff'ffff) return pi.read<Size>(address, thread);
return unmapped;
}
template<u32 Size>
inline auto Bus::write(u32 address, u64 data, u32& cycles) -> void {
inline auto Bus::write(u32 address, u64 data, Thread& thread) -> void {
address &= 0x1fff'ffff - (Size - 1);
if constexpr(Accuracy::CPU::Recompiler) {
cpu.recompiler.invalidate(address + 0); if constexpr(Size == Dual)
@ -33,20 +33,20 @@ inline auto Bus::write(u32 address, u64 data, u32& cycles) -> void {
if(address <= 0x007f'ffff) return rdram.ram.write<Size>(address, data);
if(address <= 0x03ef'ffff) return;
if(address <= 0x03ff'ffff) return rdram.write<Size>(address, data, cycles);
if(address <= 0x0407'ffff) return rsp.write<Size>(address, data, cycles);
if(address <= 0x040f'ffff) return rsp.status.write<Size>(address, data, cycles);
if(address <= 0x041f'ffff) return rdp.write<Size>(address, data, cycles);
if(address <= 0x042f'ffff) return rdp.io.write<Size>(address, data, cycles);
if(address <= 0x043f'ffff) return mi.write<Size>(address, data, cycles);
if(address <= 0x044f'ffff) return vi.write<Size>(address, data, cycles);
if(address <= 0x045f'ffff) return ai.write<Size>(address, data, cycles);
if(address <= 0x046f'ffff) return pi.write<Size>(address, data, cycles);
if(address <= 0x047f'ffff) return ri.write<Size>(address, data, cycles);
if(address <= 0x048f'ffff) return si.write<Size>(address, data, cycles);
if(address <= 0x03ff'ffff) return rdram.write<Size>(address, data, thread);
if(address <= 0x0407'ffff) return rsp.write<Size>(address, data, thread);
if(address <= 0x040f'ffff) return rsp.status.write<Size>(address, data, thread);
if(address <= 0x041f'ffff) return rdp.write<Size>(address, data, thread);
if(address <= 0x042f'ffff) return rdp.io.write<Size>(address, data, thread);
if(address <= 0x043f'ffff) return mi.write<Size>(address, data, thread);
if(address <= 0x044f'ffff) return vi.write<Size>(address, data, thread);
if(address <= 0x045f'ffff) return ai.write<Size>(address, data, thread);
if(address <= 0x046f'ffff) return pi.write<Size>(address, data, thread);
if(address <= 0x047f'ffff) return ri.write<Size>(address, data, thread);
if(address <= 0x048f'ffff) return si.write<Size>(address, data, thread);
if(address <= 0x04ff'ffff) return;
if(address <= 0x1fbf'ffff) return pi.write<Size>(address, data, cycles);
if(address <= 0x1fcf'ffff) return si.write<Size>(address, data, cycles);
if(address <= 0x7fff'ffff) return pi.write<Size>(address, data, cycles);
if(address <= 0x1fbf'ffff) return pi.write<Size>(address, data, thread);
if(address <= 0x1fcf'ffff) return si.write<Size>(address, data, thread);
if(address <= 0x7fff'ffff) return pi.write<Size>(address, data, thread);
return;
}

View File

@ -4,10 +4,10 @@ struct RCP { //A device which is part of RCP
const u32 DefaultWriteCycles = 0; //not implemented until we implement the CPU write queue
template<u32 Size>
auto read(u32 address, u32& cycles) -> u64 {
cycles = DefaultReadCycles;
auto read(u32 address, Thread& thread) -> u64 {
thread.step(DefaultReadCycles * 2);
if constexpr(Size == Byte) {
auto data = ((T*)this)->readWord(address, cycles);
auto data = ((T*)this)->readWord(address, thread);
switch(address & 3) {
case 0: return data >> 24;
case 1: return data >> 16;
@ -16,44 +16,44 @@ struct RCP { //A device which is part of RCP
}
}
if constexpr(Size == Half) {
auto data = ((T*)this)->readWord(address, cycles);
auto data = ((T*)this)->readWord(address, thread);
switch(address & 2) {
case 0: return data >> 16;
case 2: return data >> 0;
}
}
if constexpr(Size == Word) {
return ((T*)this)->readWord(address, cycles);
return ((T*)this)->readWord(address, thread);
}
if constexpr(Size == Dual) {
u64 data = ((T*)this)->readWord(address, cycles);
return data << 32 | ((T*)this)->readWord(address + 4, cycles);
u64 data = ((T*)this)->readWord(address, thread);
return data << 32 | ((T*)this)->readWord(address + 4, thread);
}
unreachable;
}
template<u32 Size>
auto write(u32 address, u64 data, u32& cycles) -> void {
cycles = DefaultWriteCycles;
auto write(u32 address, u64 data, Thread& thread) -> void {
thread.step(DefaultWriteCycles * 2);
if constexpr(Size == Byte) {
switch(address & 3) {
case 0: return ((T*)this)->writeWord(address, data << 24, cycles);
case 1: return ((T*)this)->writeWord(address, data << 16, cycles);
case 2: return ((T*)this)->writeWord(address, data << 8, cycles);
case 3: return ((T*)this)->writeWord(address, data << 0, cycles);
case 0: return ((T*)this)->writeWord(address, data << 24, thread);
case 1: return ((T*)this)->writeWord(address, data << 16, thread);
case 2: return ((T*)this)->writeWord(address, data << 8, thread);
case 3: return ((T*)this)->writeWord(address, data << 0, thread);
}
}
if constexpr(Size == Half) {
switch(address & 2) {
case 0: return ((T*)this)->writeWord(address, data << 16, cycles);
case 2: return ((T*)this)->writeWord(address, data << 0, cycles);
case 0: return ((T*)this)->writeWord(address, data << 16, thread);
case 2: return ((T*)this)->writeWord(address, data << 0, thread);
}
}
if constexpr(Size == Word) {
((T*)this)->writeWord(address, data, cycles);
((T*)this)->writeWord(address, data, thread);
}
if constexpr(Size == Dual) {
((T*)this)->writeWord(address, data >> 32, cycles);
((T*)this)->writeWord(address, data >> 32, thread);
}
}
};

View File

@ -31,8 +31,8 @@ namespace Memory {
struct Bus {
//bus.hpp
template<u32 Size> auto read(u32 address, u32& cycles) -> u64;
template<u32 Size> auto write(u32 address, u64 data, u32& cycles) -> void;
template<u32 Size> auto read(u32 address, Thread& thread) -> u64;
template<u32 Size> auto write(u32 address, u64 data, Thread& thread) -> void;
};
extern Bus bus;

View File

@ -1,4 +1,4 @@
auto MI::readWord(u32 address, u32& cycles) -> u32 {
auto MI::readWord(u32 address, Thread& thread) -> u32 {
address = (address & 0xfffff) >> 2;
n32 data;
@ -42,7 +42,7 @@ auto MI::readWord(u32 address, u32& cycles) -> u32 {
return data;
}
auto MI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
auto MI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
address = (address & 0xfffff) >> 2;
n32 data = data_;

View File

@ -27,8 +27,8 @@ struct MI : Memory::RCP<MI> {
auto power(bool reset) -> void;
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;

View File

@ -19,6 +19,10 @@ using v128 = __m128i;
using v128 = __m128i;
#endif
#if defined(VULKAN)
#include <n64/vulkan/vulkan.hpp>
#endif
namespace ares::Nintendo64 {
auto enumerate() -> vector<string>;
auto load(Node::System& node, string name) -> bool;
@ -39,6 +43,10 @@ namespace ares::Nintendo64 {
clock = 0;
}
auto step(u32 clocks) -> void {
clock += clocks;
}
auto serialize(serializer& s) -> void {
s(clock);
}

View File

@ -1,11 +1,11 @@
inline auto PI::readWord(u32 address, u32& cycles) -> u32 {
inline auto PI::readWord(u32 address, Thread& thread) -> u32 {
if(address <= 0x046f'ffff) return ioRead(address);
if (unlikely(io.ioBusy)) {
cycles += writeForceFinish();
thread.step(writeForceFinish() * 2);
return io.busLatch;
}
cycles += 250;
thread.step(250 * 2);
return busRead<Word>(address);
}
@ -51,7 +51,7 @@ inline auto PI::busRead(u32 address) -> u32 {
return unmapped; //accesses here actually lock out the RCP
}
inline auto PI::writeWord(u32 address, u32 data, u32& cycles) -> void {
inline auto PI::writeWord(u32 address, u32 data, Thread& thread) -> void {
if(address <= 0x046f'ffff) return ioWrite(address, data);
if(io.ioBusy) return;

View File

@ -52,3 +52,50 @@ auto PI::dmaFinished() -> void {
io.interrupt = 1;
mi.raise(MI::IRQ::PI);
}
auto PI::dmaDuration(bool read) -> u32 {
auto len = read ? io.readLength : io.writeLength;
len = (len | 1) + 1;
BSD bsd;
switch (io.pbusAddress.bit(24,31)) {
case 0x05: bsd = bsd2; break;
case range8(0x08, 0x0F): bsd = bsd2; break;
default: bsd = bsd1; break;
}
auto pageShift = bsd.pageSize + 2;
auto pageSize = 1 << pageShift;
auto pageMask = pageSize - 1;
auto pbusFirst = io.pbusAddress;
auto pbusLast = io.pbusAddress + len - 2;
auto pbusFirstPage = pbusFirst >> pageShift;
auto pbusLastPage = pbusLast >> pageShift;
auto pbusPages = pbusLastPage - pbusFirstPage + 1;
auto numBuffers = 0;
auto partialBytes = 0;
if (pbusFirstPage == pbusLastPage) {
if (len == 128) numBuffers = 1;
else partialBytes = len;
} else {
bool fullFirst = (pbusFirst & pageMask) == 0;
bool fullLast = ((pbusLast + 2) & pageMask) == 0;
if (fullFirst) numBuffers++;
else partialBytes += pageSize - (pbusFirst & pageMask);
if (fullLast) numBuffers++;
else partialBytes += (pbusLast & pageMask) + 2;
if (pbusFirstPage + 1 < pbusLastPage)
numBuffers += (pbusPages - 2) * pageSize / 128;
}
u32 cycles = 0;
cycles += (14 + bsd.latency + 1) * pbusPages;
cycles += (bsd.pulseWidth + 1 + bsd.releaseDuration + 1) * len / 2;
cycles += numBuffers * 28;
cycles += partialBytes * 1;
return cycles * 3;
}

View File

@ -42,12 +42,12 @@ auto PI::ioRead(u32 address) -> u32 {
if(address == 7) {
//PI_BSD_DOM1_PGS
data.bit(0,7) = bsd1.pageSize;
data.bit(0,3) = bsd1.pageSize;
}
if(address == 8) {
//PI_BSD_DOM1_RLS
data.bit(0,7) = bsd1.releaseDuration;
data.bit(0,1) = bsd1.releaseDuration;
}
if(address == 9) {
@ -98,7 +98,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
//PI_READ_LENGTH
io.readLength = n24(data);
io.dmaBusy = 1;
queue.insert(Queue::PI_DMA_Read, io.readLength * 36);
queue.insert(Queue::PI_DMA_Read, dmaDuration(true));
dmaRead();
}
@ -106,7 +106,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
//PI_WRITE_LENGTH
io.writeLength = n24(data);
io.dmaBusy = 1;
queue.insert(Queue::PI_DMA_Write, io.writeLength * 36);
queue.insert(Queue::PI_DMA_Write, dmaDuration(false));
dmaWrite();
}
@ -136,12 +136,12 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
if(address == 7) {
//PI_BSD_DOM1_PGS
bsd1.pageSize = data.bit(0,7);
bsd1.pageSize = data.bit(0,3);
}
if(address == 8) {
//PI_BSD_DOM1_RLS
bsd1.releaseDuration = data.bit(0,7);
bsd1.releaseDuration = data.bit(0,1);
}
if(address == 9) {

View File

@ -22,14 +22,15 @@ struct PI : Memory::RCP<PI> {
auto dmaRead() -> void;
auto dmaWrite() -> void;
auto dmaFinished() -> void;
auto dmaDuration(bool read) -> u32;
//io.cpp
auto ioRead(u32 address) -> u32;
auto ioWrite(u32 address, u32 data) -> void;
//bus.hpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
auto writeFinished() -> void;
auto writeForceFinish() -> u32;
template <u32 Size>
@ -55,8 +56,8 @@ struct PI : Memory::RCP<PI> {
struct BSD {
n8 latency;
n8 pulseWidth;
n8 pageSize;
n8 releaseDuration;
n4 pageSize;
n2 releaseDuration;
} bsd1, bsd2;
};

View File

@ -30,11 +30,6 @@ auto PIF::descramble(n4 *buf, int size) -> void {
for(int i=size-1; i>0; i--) buf[i] -= buf[i-1] + 1;
}
auto PIF::step(u32 clocks) -> void {
Thread::clock += clocks;
if(intram.bootTimeout > 0) intram.bootTimeout -= clocks;
}
auto PIF::ramReadCommand() -> u8 {
return ram.read<Byte>(0x3f);
}
@ -264,7 +259,9 @@ auto PIF::challenge() -> void {
}
auto PIF::mainHLE() -> void {
step(10240*8);
constexpr u32 clocks = 10240 * 8;
step(clocks);
if(intram.bootTimeout > 0) intram.bootTimeout -= clocks;
if(likely(state == Run)) {
//cicCompare()

View File

@ -24,7 +24,9 @@ auto PIF::unload() -> void {
}
auto PIF::main() -> void {
while(Thread::clock < 0) {
mainHLE();
}
}
auto PIF::power(bool reset) -> void {

View File

@ -35,7 +35,6 @@ struct PIF : Thread, Memory::SI<PIF> {
} intram;
//pif.cpp
auto step(u32 clocks) -> void;
auto load(Node::Object) -> void;
auto unload() -> void;
auto main() -> void;

View File

@ -1,4 +1,4 @@
auto RDP::readWord(u32 address, u32& cycles) -> u32 {
auto RDP::readWord(u32 address, Thread& thread) -> u32 {
address = (address & 0xfffff) >> 2;
n32 data;
@ -34,7 +34,7 @@ auto RDP::readWord(u32 address, u32& cycles) -> u32 {
if(address == 4) {
//DPC_CLOCK
data.bit(0,23) = command.clock;
data.bit(0,23) = command.clock - (Thread::clock - thread.clock) / 3;
}
if(address == 5) {
@ -56,7 +56,7 @@ auto RDP::readWord(u32 address, u32& cycles) -> u32 {
return data;
}
auto RDP::writeWord(u32 address, u32 data_, u32& cycles) -> void {
auto RDP::writeWord(u32 address, u32 data_, Thread& thread) -> void {
address = (address & 0xfffff) >> 2;
n32 data = data_;
@ -91,7 +91,7 @@ auto RDP::writeWord(u32 address, u32 data_, u32& cycles) -> void {
if(data.bit(6) && !command.crashed) command.tmemBusy = 0;
if(data.bit(7) && !command.crashed) command.pipeBusy = 0;
if(data.bit(8) && !command.crashed) command.bufferBusy = 0;
if(data.bit(9)) command.clock = 0;
if(data.bit(9)) command.clock = (Thread::clock - thread.clock) / 3;
}
if(address == 4) {
@ -113,7 +113,7 @@ auto RDP::writeWord(u32 address, u32 data_, u32& cycles) -> void {
debugger.ioDPC(Write, address, data);
}
auto RDP::IO::readWord(u32 address, u32& cycles) -> u32 {
auto RDP::IO::readWord(u32 address, Thread& thread) -> u32 {
address = (address & 0xfffff) >> 2;
n32 data;
@ -144,7 +144,7 @@ auto RDP::IO::readWord(u32 address, u32& cycles) -> u32 {
return data;
}
auto RDP::IO::writeWord(u32 address, u32 data_, u32& cycles) -> void {
auto RDP::IO::writeWord(u32 address, u32 data_, Thread& thread) -> void {
address = (address & 0xfffff) >> 2;
n32 data = data_;

View File

@ -31,11 +31,11 @@ auto RDP::crash(const char *reason) -> void {
}
auto RDP::main() -> void {
step(system.frequency());
}
auto RDP::step(u32 clocks) -> void {
Thread::clock += clocks;
const u32 clocks = system.frequency();
while(Thread::clock < 0) {
step(clocks);
command.clock += clocks / 3;
}
}
auto RDP::power(bool reset) -> void {

View File

@ -21,7 +21,6 @@ struct RDP : Thread, Memory::RCP<RDP> {
auto unload() -> void;
auto main() -> void;
auto step(u32 clocks) -> void;
auto power(bool reset) -> void;
auto crash(const char *reason) -> void;
@ -66,8 +65,8 @@ struct RDP : Thread, Memory::RCP<RDP> {
auto setColorImage() -> void;
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
auto flushCommands() -> void;
//serialization.cpp
@ -338,8 +337,8 @@ struct RDP : Thread, Memory::RCP<RDP> {
IO(RDP& self) : self(self) {}
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
struct BIST {
n1 check;

View File

@ -45,8 +45,513 @@ static const vector<string> commandNames = {
};
auto RDP::render() -> void {
#if defined(VULKAN)
if(vulkan.enable && vulkan.render()) {
const char *msg = vulkan.crashed();
if(msg) crash(msg);
return;
}
#endif
angrylion::ProcessRDPList();
command.current = command.end;
return;
auto& memory = !command.source ? rdram.ram : rsp.dmem;
auto fetch = [&]() -> u64 {
u64 op = memory.readUnaligned<Dual>(command.current);
command.current += 8;
return op;
};
auto fetchEdge = [&](u64 op) {
edge.lmajor = n1 (op >> 55);
edge.level = n3 (op >> 51);
edge.tile = n3 (op >> 48);
edge.y.lo = n14(op >> 32);
edge.y.md = n14(op >> 16);
edge.y.hi = n14(op >> 0);
op = fetch();
edge.x.lo.c.i = n16(op >> 48);
edge.x.lo.c.f = n16(op >> 32);
edge.x.lo.s.i = n16(op >> 16);
edge.x.lo.s.f = n16(op >> 0);
op = fetch();
edge.x.hi.c.i = n16(op >> 48);
edge.x.hi.c.f = n16(op >> 32);
edge.x.hi.s.i = n16(op >> 16);
edge.x.hi.s.f = n16(op >> 0);
op = fetch();
edge.x.md.c.i = n16(op >> 48);
edge.x.md.c.f = n16(op >> 32);
edge.x.md.s.i = n16(op >> 16);
edge.x.md.s.f = n16(op >> 0);
};
auto fetchShade = [&](u64 op) {
op = fetch();
shade.r.c.i = n16(op >> 48);
shade.g.c.i = n16(op >> 32);
shade.b.c.i = n16(op >> 16);
shade.a.c.i = n16(op >> 0);
op = fetch();
shade.r.x.i = n16(op >> 48);
shade.g.x.i = n16(op >> 32);
shade.b.x.i = n16(op >> 16);
shade.a.x.i = n16(op >> 0);
op = fetch();
shade.r.c.f = n16(op >> 48);
shade.g.c.f = n16(op >> 32);
shade.b.c.f = n16(op >> 16);
shade.a.c.f = n16(op >> 0);
op = fetch();
shade.r.x.f = n16(op >> 48);
shade.g.x.f = n16(op >> 32);
shade.b.x.f = n16(op >> 16);
shade.a.x.f = n16(op >> 0);
op = fetch();
shade.r.e.i = n16(op >> 48);
shade.g.e.i = n16(op >> 32);
shade.b.e.i = n16(op >> 16);
shade.a.e.i = n16(op >> 0);
op = fetch();
shade.r.y.i = n16(op >> 48);
shade.g.y.i = n16(op >> 32);
shade.b.y.i = n16(op >> 16);
shade.a.y.i = n16(op >> 0);
op = fetch();
shade.r.e.f = n16(op >> 48);
shade.g.e.f = n16(op >> 32);
shade.b.e.f = n16(op >> 16);
shade.a.e.f = n16(op >> 0);
op = fetch();
shade.r.y.f = n16(op >> 48);
shade.g.y.f = n16(op >> 32);
shade.b.y.f = n16(op >> 16);
shade.a.y.f = n16(op >> 0);
};
auto fetchTexture = [&](u64 op) {
op = fetch();
texture.s.c.i = n16(op >> 48);
texture.t.c.i = n16(op >> 32);
texture.w.c.i = n16(op >> 16);
op = fetch();
texture.s.x.i = n16(op >> 48);
texture.t.x.i = n16(op >> 32);
texture.w.x.i = n16(op >> 16);
op = fetch();
texture.s.c.f = n16(op >> 48);
texture.t.c.f = n16(op >> 32);
texture.w.c.f = n16(op >> 16);
op = fetch();
texture.s.x.f = n16(op >> 48);
texture.t.x.f = n16(op >> 32);
texture.w.x.f = n16(op >> 16);
op = fetch();
texture.s.e.i = n16(op >> 48);
texture.t.e.i = n16(op >> 32);
texture.w.e.i = n16(op >> 16);
op = fetch();
texture.s.y.i = n16(op >> 48);
texture.t.y.i = n16(op >> 32);
texture.w.y.i = n16(op >> 16);
op = fetch();
texture.s.e.f = n16(op >> 48);
texture.t.e.f = n16(op >> 32);
texture.w.e.f = n16(op >> 16);
op = fetch();
texture.s.y.f = n16(op >> 48);
texture.t.y.f = n16(op >> 32);
texture.w.y.f = n16(op >> 16);
};
auto fetchZBuffer = [&](u64 op) {
op = fetch();
zbuffer.d.i = n16(op >> 48);
zbuffer.d.f = n16(op >> 32);
zbuffer.x.i = n16(op >> 16);
zbuffer.x.f = n16(op >> 0);
op = fetch();
zbuffer.e.i = n16(op >> 48);
zbuffer.e.f = n16(op >> 32);
zbuffer.y.i = n16(op >> 16);
zbuffer.y.f = n16(op >> 0);
};
auto fetchRectangle = [&](u64 op) {
rectangle.x.lo = n12(op >> 44);
rectangle.y.lo = n12(op >> 32);
rectangle.tile = n3 (op >> 24);
rectangle.x.hi = n12(op >> 12);
rectangle.y.hi = n12(op >> 0);
op = fetch();
rectangle.s.i = n16(op >> 48);
rectangle.t.i = n16(op >> 32);
rectangle.s.f = n16(op >> 16);
rectangle.t.f = n16(op >> 0);
};
while(command.current < command.end) {
u64 op = fetch();
if(debugger.tracer.command->enabled()) {
debugger.command({hex(op, 16L), " ", commandNames(op >> 56 & 0x3f, "Invalid")});
}
switch(op >> 56 & 0x3f) {
case 0x00: {
noOperation();
} break;
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07: {
invalidOperation();
} break;
case 0x08: {
fetchEdge(op);
unshadedTriangle();
} break;
case 0x09: {
fetchEdge(op);
fetchZBuffer(op);
unshadedZbufferTriangle();
} break;
case 0x0a: {
fetchEdge(op);
fetchTexture(op);
textureTriangle();
} break;
case 0x0b: {
fetchEdge(op);
fetchTexture(op);
fetchZBuffer(op);
textureZbufferTriangle();
} break;
case 0x0c: {
fetchEdge(op);
fetchShade(op);
shadedTriangle();
} break;
case 0x0d: {
fetchEdge(op);
fetchShade(op);
fetchZBuffer(op);
shadedZbufferTriangle();
} break;
case 0x0e: {
fetchEdge(op);
fetchShade(op);
fetchTexture(op);
shadedTextureTriangle();
} break;
case 0x0f: {
fetchEdge(op);
fetchShade(op);
fetchTexture(op);
fetchZBuffer(op);
shadedTextureZbufferTriangle();
} break;
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:
case 0x20:
case 0x21:
case 0x22:
case 0x23: {
invalidOperation();
} break;
case 0x24: {
fetchRectangle(op);
textureRectangle();
} break;
case 0x25: {
fetchRectangle(op);
textureRectangleFlip();
} break;
case 0x26: {
syncLoad();
} break;
case 0x27: {
syncPipe();
} break;
case 0x28: {
syncTile();
} break;
case 0x29: {
syncFull();
} break;
case 0x2a: {
key.g.width = n12(op >> 44);
key.b.width = n12(op >> 32);
key.g.center = n8 (op >> 24);
key.g.scale = n8 (op >> 16);
key.b.center = n8 (op >> 8);
key.b.scale = n8 (op >> 0);
setKeyGB();
} break;
case 0x2b: {
key.r.width = n12(op >> 16);
key.r.center = n8 (op >> 8);
key.r.scale = n8 (op >> 0);
setKeyR();
} break;
case 0x2c: {
convert.k[0] = n9(op >> 45);
convert.k[1] = n9(op >> 36);
convert.k[2] = n9(op >> 27);
convert.k[3] = n9(op >> 18);
convert.k[4] = n9(op >> 9);
convert.k[5] = n8(op >> 0);
setConvert();
} break;
case 0x2d: {
scissor.x.hi = n12(op >> 44);
scissor.y.hi = n12(op >> 32);
scissor.field = n1 (op >> 25);
scissor.odd = n1 (op >> 24);
scissor.x.lo = n12(op >> 12);
scissor.y.lo = n12(op >> 0);
setScissor();
} break;
case 0x2e: {
primitiveDepth.z = n16(op >> 16);
primitiveDepth.deltaZ = n16(op >> 0);
setPrimitiveDepth();
} break;
case 0x2f: {
other.atomicPrimitive = n1(op >> 55);
other.reserved1 = n1(op >> 54);
other.cycleType = n2(op >> 52);
other.perspective = n1(op >> 51);
other.detailTexture = n1(op >> 50);
other.sharpenTexture = n1(op >> 49);
other.lodTexture = n1(op >> 48);
other.tlut = n1(op >> 47);
other.tlutType = n1(op >> 46);
other.sampleType = n1(op >> 45);
other.midTexel = n1(op >> 44);
other.bilerp[0] = n1(op >> 43);
other.bilerp[1] = n1(op >> 42);
other.convertOne = n1(op >> 41);
other.colorKey = n1(op >> 40);
other.colorDitherMode = n2(op >> 38);
other.alphaDitherMode = n2(op >> 36);
other.reserved2 = n4(op >> 32);
other.blend1a[0] = n2(op >> 30);
other.blend1a[1] = n2(op >> 28);
other.blend1b[0] = n2(op >> 26);
other.blend1b[1] = n2(op >> 24);
other.blend2a[0] = n2(op >> 22);
other.blend2a[1] = n2(op >> 20);
other.blend2b[0] = n2(op >> 18);
other.blend2b[1] = n2(op >> 16);
other.reserved3 = n1(op >> 15);
other.forceBlend = n1(op >> 14);
other.alphaCoverage = n1(op >> 13);
other.coverageXalpha = n1(op >> 12);
other.zMode = n2(op >> 10);
other.coverageMode = n2(op >> 8);
other.colorOnCoverage = n1(op >> 7);
other.imageRead = n1(op >> 6);
other.zUpdate = n1(op >> 5);
other.zCompare = n1(op >> 4);
other.antialias = n1(op >> 3);
other.zSource = n1(op >> 2);
other.ditherAlpha = n1(op >> 1);
other.alphaCompare = n1(op >> 0);
setOtherModes();
} break;
case 0x30: {
tlut.s.lo = n12(op >> 44);
tlut.t.lo = n12(op >> 32);
tlut.index = n3 (op >> 24);
tlut.s.hi = n12(op >> 12);
tlut.t.hi = n12(op >> 0);
loadTLUT();
} break;
case 0x31: {
invalidOperation();
} break;
case 0x32: {
tileSize.s.lo = n12(op >> 44);
tileSize.t.lo = n12(op >> 32);
tileSize.index = n3 (op >> 24);
tileSize.s.hi = n12(op >> 12);
tileSize.t.hi = n12(op >> 0);
setTileSize();
} break;
case 0x33: {
load_.block.s.lo = n12(op >> 44);
load_.block.t.lo = n12(op >> 32);
load_.block.index = n3 (op >> 24);
load_.block.s.hi = n12(op >> 12);
load_.block.t.hi = n12(op >> 0);
loadBlock();
} break;
case 0x34: {
load_.tile.s.lo = n12(op >> 44);
load_.tile.t.lo = n12(op >> 32);
load_.tile.index = n3 (op >> 24);
load_.tile.s.hi = n12(op >> 12);
load_.tile.t.hi = n12(op >> 0);
loadTile();
} break;
case 0x35: {
tile.format = n3(op >> 53);
tile.size = n2(op >> 51);
tile.line = n9(op >> 41);
tile.address = n9(op >> 32);
tile.index = n3(op >> 24);
tile.palette = n4(op >> 20);
tile.t.clamp = n1(op >> 19);
tile.t.mirror = n1(op >> 18);
tile.t.mask = n4(op >> 14);
tile.t.shift = n4(op >> 10);
tile.s.clamp = n1(op >> 9);
tile.s.mirror = n1(op >> 8);
tile.s.mask = n4(op >> 4);
tile.s.shift = n4(op >> 0);
setTile();
} break;
case 0x36: {
fillRectangle_.x.lo = n12(op >> 44);
fillRectangle_.y.lo = n12(op >> 32);
fillRectangle_.x.hi = n12(op >> 12);
fillRectangle_.y.hi = n12(op >> 0);
fillRectangle();
} break;
case 0x37: {
set.fill.color = n32(op >> 0);
setFillColor();
} break;
case 0x38: {
fog.red = n8(op >> 24);
fog.green = n8(op >> 16);
fog.blue = n8(op >> 8);
fog.alpha = n8(op >> 0);
setFogColor();
} break;
case 0x39: {
blend.red = n8(op >> 24);
blend.green = n8(op >> 16);
blend.blue = n8(op >> 8);
blend.alpha = n8(op >> 0);
setBlendColor();
} break;
case 0x3a: {
primitive.minimum = n4(op >> 40);
primitive.fraction = n8(op >> 32);
primitive.red = n8(op >> 24);
primitive.green = n8(op >> 16);
primitive.blue = n8(op >> 8);
primitive.alpha = n8(op >> 0);
setPrimitiveColor();
} break;
case 0x3b: {
environment.red = n8(op >> 24);
environment.green = n8(op >> 16);
environment.blue = n8(op >> 8);
environment.alpha = n8(op >> 0);
setEnvironmentColor();
} break;
case 0x3c: {
combine.sba.color[0] = n4(op >> 52);
combine.mul.color[0] = n5(op >> 47);
combine.sba.alpha[0] = n3(op >> 44);
combine.mul.alpha[0] = n3(op >> 41);
combine.sba.color[1] = n4(op >> 37);
combine.mul.color[1] = n5(op >> 32);
combine.sbb.color[0] = n4(op >> 28);
combine.sbb.color[1] = n4(op >> 24);
combine.sba.alpha[1] = n3(op >> 21);
combine.mul.alpha[1] = n3(op >> 18);
combine.add.color[0] = n3(op >> 15);
combine.sbb.alpha[0] = n3(op >> 12);
combine.add.alpha[0] = n3(op >> 9);
combine.add.color[1] = n3(op >> 6);
combine.sbb.alpha[1] = n3(op >> 3);
combine.add.alpha[1] = n3(op >> 0);
setCombineMode();
} break;
case 0x3d: {
set.texture.format = n3 (op >> 53);
set.texture.size = n2 (op >> 51);
set.texture.width = n10(op >> 32);
set.texture.dramAddress = n26(op >> 0);
setTextureImage();
} break;
case 0x3e: {
set.mask.dramAddress = n26(op >> 0);
setMaskImage();
} break;
case 0x3f: {
set.color.format = n3 (op >> 53);
set.color.size = n2 (op >> 51);
set.color.width = n10(op >> 32);
set.color.dramAddress = n26(op >> 0);
setColorImage();
} break;
}
}
}
//0x00

View File

@ -1,4 +1,4 @@
auto RDRAM::readWord(u32 address, u32& cycles) -> u32 {
auto RDRAM::readWord(u32 address, Thread& thread) -> u32 {
u32 chipID = address >> 13 & 3;
auto& chip = chips[chipID];
address = (address & 0x3ff) >> 2;
@ -63,7 +63,7 @@ auto RDRAM::readWord(u32 address, u32& cycles) -> u32 {
return data;
}
auto RDRAM::writeWord(u32 address, u32 data, u32& cycles) -> void {
auto RDRAM::writeWord(u32 address, u32 data, Thread& thread) -> void {
u32 chipID = address >> 13 & 3;
auto& chip = chips[chipID];
address = (address & 0x3ff) >> 2;

View File

@ -2,7 +2,20 @@
struct RDRAM : Memory::RCP<RDRAM> {
Node::Object node;
Memory::Writable ram;
struct Writable : public Memory::Writable {
template<u32 Size>
auto read(u32 address) -> u64 {
if (address >= size) return 0;
return Memory::Writable::read<Size>(address);
}
template<u32 Size>
auto write(u32 address, u64 value) -> void {
if (address >= size) return;
Memory::Writable::write<Size>(address, value);
}
} ram;
struct Debugger {
//debugger.cpp
@ -24,8 +37,8 @@ struct RDRAM : Memory::RCP<RDRAM> {
auto power(bool reset) -> void;
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;

View File

@ -1,4 +1,4 @@
auto RI::readWord(u32 address, u32& cycles) -> u32 {
auto RI::readWord(u32 address, Thread& thread) -> u32 {
address = (address & 0xfffff) >> 2;
n32 data = 0;
@ -30,7 +30,7 @@ auto RI::readWord(u32 address, u32& cycles) -> u32 {
//after a cold boot on real hardware with no expansion pak and using the
//CIC-NUS-6102 IPL3, upon reaching the test ROM's entry point the count
//register was measured to be ~0x1184000.
cpu.step(17'641'000);
cpu.step(17'641'000 * 2);
}
}
@ -58,7 +58,7 @@ auto RI::readWord(u32 address, u32& cycles) -> u32 {
return data;
}
auto RI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
auto RI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
address = (address & 0xfffff) >> 2;
n32 data = data_;

View File

@ -19,8 +19,8 @@ struct RI : Memory::RCP<RI> {
auto power(bool reset) -> void;
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;

View File

@ -0,0 +1,386 @@
#define jp(id, name, ...) case id: return decoder##name(instruction)
#define op(id, name, ...) case id: { OpInfo info = {}; __VA_ARGS__; return info; }
#define RD (instruction >> 11 & 31)
#define RT (instruction >> 16 & 31)
#define RS (instruction >> 21 & 31)
#define VD (instruction >> 6 & 31)
#define VS (instruction >> 11 & 31)
#define VT (instruction >> 16 & 31)
#define VCO 0
#define VCC 1
#define VCE 2
#define RUse(n) info.r.use |= 1 << n
#define RDef(n) info.r.def |= 1 << n
#define RDefB(n) (void)0 //bypassable
#define VUse(n) info.v.use |= 1 << n
#define VDef(n) info.v.def |= 1 << n
#define VGUse(n) info.v.use |= 0xff << (n & ~7)
#define VGDef(n) info.v.def |= 0xff << (n & ~7)
#define VCUse(n) info.vc.use |= 1 << (n & 3)
#define VCDef(n) info.vc.def |= 1 << (n & 3)
#define VCRef(n) VCUse(n), VCDef(n)
#define VFRef(n) info.vfake |= 1 << n
#define Load info.flags |= OpInfo::Load
#define Store info.flags |= OpInfo::Store
#define Branch info.flags |= OpInfo::Branch
#define Vector info.flags |= OpInfo::Vector
#define VNopGroup info.flags |= OpInfo::VNopGroup
auto RSP::decoderEXECUTE(u32 instruction) const -> OpInfo {
switch(instruction >> 26) {
jp(0x00, SPECIAL);
jp(0x01, REGIMM);
op(0x02, J, Branch);
op(0x03, JAL, Branch);
op(0x04, BEQ, RUse(RS), RUse(RT), Branch);
op(0x05, BNE, RUse(RS), RUse(RT), Branch);
op(0x06, BLEZ, RUse(RS), Branch);
op(0x07, BGTZ, RUse(RS), Branch);
op(0x08, ADDI, RDefB(RT), RUse(RS));
op(0x09, ADDIU, RDefB(RT), RUse(RS));
op(0x0a, SLTI, RDefB(RT), RUse(RS));
op(0x0b, SLTIU, RDefB(RT), RUse(RS));
op(0x0c, ANDI, RDefB(RT), RUse(RS));
op(0x0d, ORI, RDefB(RT), RUse(RS));
op(0x0e, XORI, RDefB(RT), RUse(RS));
op(0x0f, LUI, RDefB(RT));
jp(0x10, SCC);
op(0x11, INVALID); //COP1
jp(0x12, VU);
op(0x13, INVALID); //COP3
op(0x14, INVALID); //BEQL
op(0x15, INVALID); //BNEL
op(0x16, INVALID); //BLEZL
op(0x17, INVALID); //BGTZL
op(0x18, INVALID); //DADDI
op(0x19, INVALID); //DADDIU
op(0x1a, INVALID); //LDL
op(0x1b, INVALID); //LDR
op(0x1c, INVALID);
op(0x1d, INVALID);
op(0x1e, INVALID);
op(0x1f, INVALID);
op(0x20, LB, RDef(RT), RUse(RS), Load);
op(0x21, LH, RDef(RT), RUse(RS), Load);
op(0x22, INVALID); //LWL
op(0x23, LW, RDef(RT), RUse(RS), Load);
op(0x24, LBU, RDef(RT), RUse(RS), Load);
op(0x25, LHU, RDef(RT), RUse(RS), Load);
op(0x26, INVALID); //LWR
op(0x27, LWU, RDef(RT), RUse(RS), Load);
op(0x28, SB, RUse(RT), RUse(RS), Store);
op(0x29, SH, RUse(RT), RUse(RS), Store);
op(0x2a, INVALID); //SWL
op(0x2b, SW, RUse(RT), RUse(RS), Store);
op(0x2c, INVALID); //SDL
op(0x2d, INVALID); //SDR
op(0x2e, INVALID); //SWR
op(0x2f, INVALID); //CACHE
op(0x30, INVALID); //LL
op(0x31, INVALID); //LWC1
jp(0x32, LWC2);
op(0x33, INVALID); //LWC3
op(0x34, INVALID); //LLD
op(0x35, INVALID); //LDC1
op(0x36, INVALID); //LDC2
op(0x37, INVALID); //LD
op(0x38, INVALID); //SC
op(0x39, INVALID); //SWC1
jp(0x3a, SWC2);
op(0x3b, INVALID); //SWC3
op(0x3c, INVALID); //SCD
op(0x3d, INVALID); //SDC1
op(0x3e, INVALID); //SDC2
op(0x3f, INVALID); //SD
}
return {};
}
auto RSP::decoderSPECIAL(u32 instruction) const -> OpInfo {
switch(instruction & 0x3f) {
op(0x00, SLL, RDefB(RD), RUse(RT));
op(0x01, INVALID);
op(0x02, SRL, RDefB(RD), RUse(RT));
op(0x03, SRA, RDefB(RD), RUse(RT));
op(0x04, SLLV, RDefB(RD), RUse(RT), RUse(RS));
op(0x05, INVALID);
op(0x06, SRLV, RDefB(RD), RUse(RT), RUse(RS));
op(0x07, SRAV, RDefB(RD), RUse(RT), RUse(RS));
op(0x08, JR, RUse(RS), Branch);
op(0x09, JALR, RDefB(RD), RUse(RS), Branch);
op(0x0a, INVALID);
op(0x0b, INVALID);
op(0x0c, INVALID); //SYSCALL
op(0x0d, BREAK, Branch);
op(0x0e, INVALID);
op(0x0f, INVALID); //SYNC
op(0x10, INVALID); //MFHI
op(0x11, INVALID); //MTHI
op(0x12, INVALID); //MFLO
op(0x13, INVALID); //MTLO
op(0x14, INVALID); //DSLLV
op(0x15, INVALID);
op(0x16, INVALID); //DSRLV
op(0x17, INVALID); //DSRAV
op(0x18, INVALID); //MULT
op(0x19, INVALID); //MULTU
op(0x1a, INVALID); //DIV
op(0x1b, INVALID); //DIVU
op(0x1c, INVALID); //DMULT
op(0x1d, INVALID); //DMULTU
op(0x1e, INVALID); //DDIV
op(0x1f, INVALID); //DDIVU
op(0x20, ADDU, RDefB(RD), RUse(RS), RUse(RT)); //ADD
op(0x21, ADDU, RDefB(RD), RUse(RS), RUse(RT));
op(0x22, SUBU, RDefB(RD), RUse(RS), RUse(RT)); //SUB
op(0x23, SUBU, RDefB(RD), RUse(RS), RUse(RT));
op(0x24, AND, RDefB(RD), RUse(RS), RUse(RT));
op(0x25, OR, RDefB(RD), RUse(RS), RUse(RT));
op(0x26, XOR, RDefB(RD), RUse(RS), RUse(RT));
op(0x27, NOR, RDefB(RD), RUse(RS), RUse(RT));
op(0x28, INVALID);
op(0x29, INVALID);
op(0x2a, SLT, RDefB(RD), RUse(RS), RUse(RT));
op(0x2b, SLTU, RDefB(RD), RUse(RS), RUse(RT));
op(0x2c, INVALID); //DADD
op(0x2d, INVALID); //DADDU
op(0x2e, INVALID); //DSUB
op(0x2f, INVALID); //DSUBU
op(0x30, INVALID); //TGE
op(0x31, INVALID); //TGEU
op(0x32, INVALID); //TLT
op(0x33, INVALID); //TLTU
op(0x34, INVALID); //TEQ
op(0x35, INVALID);
op(0x36, INVALID); //TNE
op(0x37, INVALID);
op(0x38, INVALID); //DSLL
op(0x39, INVALID);
op(0x3a, INVALID); //DSRL
op(0x3b, INVALID); //DSRA
op(0x3c, INVALID); //DSLL32
op(0x3d, INVALID);
op(0x3e, INVALID); //DSRL32
op(0x3f, INVALID); //DSRA32
}
return {};
}
auto RSP::decoderREGIMM(u32 instruction) const -> OpInfo {
switch(instruction >> 16 & 0x1f) {
op(0x00, BLTZ, RUse(RS), Branch);
op(0x01, BGEZ, RUse(RS), Branch);
op(0x02, INVALID); //BLTZL
op(0x03, INVALID); //BGEZL
op(0x04, INVALID);
op(0x05, INVALID);
op(0x06, INVALID);
op(0x07, INVALID);
op(0x08, INVALID); //TGEI
op(0x09, INVALID); //TGEIU
op(0x0a, INVALID); //TLTI
op(0x0b, INVALID); //TLTIU
op(0x0c, INVALID); //TEQI
op(0x0d, INVALID);
op(0x0e, INVALID); //TNEI
op(0x0f, INVALID);
op(0x10, BLTZAL, RUse(RS), Branch);
op(0x11, BGEZAL, RUse(RS), Branch);
op(0x12, INVALID); //BLTZALL
op(0x13, INVALID); //BGEZALL
op(0x14, INVALID);
op(0x15, INVALID);
op(0x16, INVALID);
op(0x17, INVALID);
op(0x18, INVALID);
op(0x19, INVALID);
op(0x1a, INVALID);
op(0x1b, INVALID);
op(0x1c, INVALID);
op(0x1d, INVALID);
op(0x1e, INVALID);
op(0x1f, INVALID);
}
return {};
}
auto RSP::decoderSCC(u32 instruction) const -> OpInfo {
switch(instruction >> 21 & 0x1f) {
op(0x00, MFC0, RDef(RT), Load, Store);
op(0x01, INVALID); //DMFC0
op(0x02, INVALID); //CFC0
op(0x03, INVALID);
op(0x04, MTC0, RUse(RT), Load, Store);
op(0x05, INVALID); //DMTC0
op(0x06, INVALID); //CTC0
op(0x07, INVALID);
op(0x08, INVALID); //BC0
op(0x09, INVALID);
op(0x0a, INVALID);
op(0x0b, INVALID);
op(0x0c, INVALID);
op(0x0d, INVALID);
op(0x0e, INVALID);
op(0x0f, INVALID);
}
return {};
}
auto RSP::decoderVU(u32 instruction) const -> OpInfo {
switch(instruction >> 21 & 0x1f) {
op(0x00, MFC2, RDef(RT), VUse(VS), Load, Store);
op(0x01, INVALID); //DMFC2
op(0x02, CFC2, RDef(RT), VCUse(RD), Load, Store);
op(0x03, INVALID);
op(0x04, MTC2, RUse(RT), VDef(VS), Load, Store, VNopGroup);
op(0x05, INVALID); //DMTC2
op(0x06, CTC2, RUse(RT), VCDef(RD), Load, Store);
op(0x07, INVALID);
op(0x08, INVALID); //BC2
op(0x09, INVALID);
op(0x0a, INVALID);
op(0x0b, INVALID);
op(0x0c, INVALID);
op(0x0d, INVALID);
op(0x0e, INVALID);
op(0x0f, INVALID);
}
switch(instruction & 0x3f) {
op(0x00, VMULF, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x01, VMULU, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x02, VRNDP, VDef(VD), VUse(VT), Vector);
op(0x03, VMULQ, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x04, VMUDL, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x05, VMUDM, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x06, VMUDN, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x07, VMUDH, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x08, VMACF, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x09, VMACU, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x0a, VRNDN, VDef(VD), VUse(VT), Vector);
op(0x0b, VMACQ, VDef(VD), Vector);
op(0x0c, VMADL, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x0d, VMADM, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x0e, VMADN, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x0f, VMADH, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x10, VADD, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
op(0x11, VSUB, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
op(0x12, VZERO, Vector); //VSUT
op(0x13, VABS, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
op(0x14, VADDC, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
op(0x15, VSUBC, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
op(0x16, VZERO, Vector); //VADDB
op(0x17, VZERO, Vector); //VSUBB
op(0x18, VZERO, Vector); //VACCB
op(0x19, VZERO, Vector); //VSUCB
op(0x1a, VZERO, Vector); //VSAD
op(0x1b, VZERO, Vector); //VSAC
op(0x1c, VZERO, Vector); //VSUM
op(0x1d, VSAR, VDef(VD), Vector);
op(0x1e, VZERO, Vector);
op(0x1f, VZERO, Vector);
op(0x20, VLT, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
op(0x21, VEQ, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
op(0x22, VNE, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
op(0x23, VGE, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
op(0x24, VCL, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), VCRef(VCE), Vector);
op(0x25, VCH, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), VCRef(VCE), Vector);
op(0x26, VCR, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), VCRef(VCE), Vector);
op(0x27, VMRG, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
op(0x28, VAND, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x29, VNAND, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x2a, VOR, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x2b, VNOR, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x2c, VXOR, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x2d, VNXOR, VDef(VD), VUse(VS), VUse(VT), Vector);
op(0x2e, VZERO, Vector);
op(0x2f, VZERO, Vector);
op(0x30, VRCP, VDef(VD), VFRef(VS), VUse(VT), Vector);
op(0x31, VRCPL, VDef(VD), VFRef(VS), VUse(VT), Vector);
op(0x32, VRCPH, VDef(VD), VFRef(VS), VUse(VT), Vector);
op(0x33, VMOV, VDef(VD), VFRef(VS), VUse(VT), Vector);
op(0x34, VRSQ, VDef(VD), VFRef(VS), VUse(VT), Vector);
op(0x35, VRSQL, VDef(VD), VFRef(VS), VUse(VT), Vector);
op(0x36, VRSQH, VDef(VD), VFRef(VS), VUse(VT), Vector);
op(0x37, VNOP, VFRef(VD), Vector, VNopGroup);
op(0x38, VZERO, Vector); //VEXTT
op(0x39, VZERO, Vector); //VEXTQ
op(0x3a, VZERO, Vector); //VEXTN
op(0x3b, VZERO, Vector);
op(0x3c, VZERO, Vector); //VINST
op(0x3d, VZERO, Vector); //VINSQ
op(0x3e, VZERO, Vector); //VINSN
op(0x3f, VNOP, Vector); //VNULL
}
return {};
}
auto RSP::decoderLWC2(u32 instruction) const -> OpInfo {
switch(instruction >> 11 & 0x1f) {
op(0x00, LBV, VDef(VT), RUse(RS), Load);
op(0x01, LSV, VDef(VT), RUse(RS), Load);
op(0x02, LLV, VDef(VT), RUse(RS), Load);
op(0x03, LDV, VDef(VT), RUse(RS), Load);
op(0x04, LQV, VDef(VT), RUse(RS), Load);
op(0x05, LRV, VDef(VT), RUse(RS), Load);
op(0x06, LPV, VDef(VT), RUse(RS), Load);
op(0x07, LUV, VDef(VT), RUse(RS), Load);
op(0x08, LHV, VDef(VT), RUse(RS), Load);
op(0x09, LFV, VDef(VT), RUse(RS), Load);
//op(0x0a, LWV, VDef(VT), RUse(RS), Load); //not present on N64 RSP
op(0x0b, LTV, VGDef(VT), RUse(RS), Load, VNopGroup);
}
return {};
}
auto RSP::decoderSWC2(u32 instruction) const -> OpInfo {
switch(instruction >> 11 & 0x1f) {
op(0x00, SBV, VUse(VT), RUse(RS), Store);
op(0x01, SSV, VUse(VT), RUse(RS), Store);
op(0x02, SLV, VUse(VT), RUse(RS), Store);
op(0x03, SDV, VUse(VT), RUse(RS), Store);
op(0x04, SQV, VUse(VT), RUse(RS), Store);
op(0x05, SRV, VUse(VT), RUse(RS), Store);
op(0x06, SPV, VUse(VT), RUse(RS), Store);
op(0x07, SUV, VUse(VT), RUse(RS), Store);
op(0x08, SHV, VUse(VT), RUse(RS), Store);
op(0x09, SFV, VUse(VT), RUse(RS), Store);
op(0x0a, SWV, VUse(VT), RUse(RS), Store);
op(0x0b, STV, VGUse(VT), RUse(RS), Store);
}
return {};
}
#undef RUse
#undef RDef
#undef RDefB
#undef VUse
#undef VDef
#undef VGUse
#undef VGDef
#undef VCUse
#undef VCDef
#undef VCRef
#undef VFRef
#undef Load
#undef Store
#undef Branch
#undef Vector
#undef VNopGroup
#undef VCO
#undef VCC
#undef VCE
#undef RD
#undef RT
#undef RS
#undef VD
#undef VS
#undef VT
#undef jp
#undef op

View File

@ -490,10 +490,10 @@ auto RSP::Disassembler::sccRegisterName(u32 index) const -> string {
}
auto RSP::Disassembler::sccRegisterValue(u32 index) const -> string {
u32 value = 0; u32 cycles;
if(index <= 6) value = rsp.readWord((index & 7) << 2, cycles);
u32 value = 0; Thread thread;
if(index <= 6) value = rsp.readWord((index & 7) << 2, thread);
if(index == 7) value = self.status.semaphore; //rsp.readSCC(7) has side-effects
if(index >= 8) value = rdp.readWord((index & 7) << 2, cycles);
if(index >= 8) value = rdp.readWord((index & 7) << 2, thread);
if(showValues) return {sccRegisterName(index), hint("{$", hex(value, 8L), "}")};
return sccRegisterName(index);
}

View File

@ -1,13 +1,9 @@
auto RSP::MFC0(r32& rt, u8 rd) -> void {
u32 cycles = 0;
if((rd & 8) == 0) rt.u32 = Nintendo64::rsp.ioRead ((rd & 7) << 2);
if((rd & 8) != 0) rt.u32 = Nintendo64::rdp.readWord((rd & 7) << 2, cycles);
step(cycles);
if((rd & 8) != 0) rt.u32 = Nintendo64::rdp.readWord((rd & 7) << 2, *this);
}
auto RSP::MTC0(cr32& rt, u8 rd) -> void {
u32 cycles = 0;
if((rd & 8) == 0) Nintendo64::rsp.ioWrite ((rd & 7) << 2, rt.u32);
if((rd & 8) != 0) Nintendo64::rdp.writeWord((rd & 7) << 2, rt.u32, cycles);
step(cycles);
if((rd & 8) != 0) Nintendo64::rdp.writeWord((rd & 7) << 2, rt.u32, *this);
}

View File

@ -6,7 +6,7 @@
#define VS vpu.r[VSn]
#define VT vpu.r[VTn]
#define jp(id, name, ...) case id: return decoder##name(__VA_ARGS__)
#define jp(id, name, ...) case id: return interpreter##name(__VA_ARGS__)
#define op(id, name, ...) case id: return name(__VA_ARGS__)
#define br(id, name, ...) case id: return name(__VA_ARGS__)
#define vu(id, name, ...) case id: \
@ -41,7 +41,7 @@
#define IMMu16 u16(OP)
#define IMMu26 (OP & 0x03ff'ffff)
auto RSP::decoderEXECUTE() -> void {
auto RSP::interpreterEXECUTE() -> void {
switch(OP >> 26) {
jp(0x00, SPECIAL);
jp(0x01, REGIMM);
@ -110,7 +110,7 @@ auto RSP::decoderEXECUTE() -> void {
}
}
auto RSP::decoderSPECIAL() -> void {
auto RSP::interpreterSPECIAL() -> void {
switch(OP & 0x3f) {
op(0x00, SLL, RD, RT, SA);
op(0x01, INVALID);
@ -179,7 +179,7 @@ auto RSP::decoderSPECIAL() -> void {
}
}
auto RSP::decoderREGIMM() -> void {
auto RSP::interpreterREGIMM() -> void {
switch(OP >> 16 & 0x1f) {
br(0x00, BLTZ, RS, IMMi16);
br(0x01, BGEZ, RS, IMMi16);
@ -216,7 +216,7 @@ auto RSP::decoderREGIMM() -> void {
}
}
auto RSP::decoderSCC() -> void {
auto RSP::interpreterSCC() -> void {
switch(OP >> 21 & 0x1f) {
op(0x00, MFC0, RT, RDn);
op(0x01, INVALID); //DMFC0
@ -237,7 +237,7 @@ auto RSP::decoderSCC() -> void {
}
}
auto RSP::decoderVU() -> void {
auto RSP::interpreterVU() -> void {
#define E (OP >> 7 & 15)
switch(OP >> 21 & 0x1f) {
vu(0x00, MFC2, RT, VS);
@ -331,7 +331,7 @@ auto RSP::decoderVU() -> void {
#undef DE
}
auto RSP::decoderLWC2() -> void {
auto RSP::interpreterLWC2() -> void {
#define E (OP >> 7 & 15)
#define IMMi7 i7(OP)
switch(OP >> 11 & 0x1f) {
@ -352,7 +352,7 @@ auto RSP::decoderLWC2() -> void {
#undef IMMi7
}
auto RSP::decoderSWC2() -> void {
auto RSP::interpreterSWC2() -> void {
#define E (OP >> 7 & 15)
#define IMMi7 i7(OP)
switch(OP >> 11 & 0x1f) {
@ -390,6 +390,7 @@ auto RSP::INVALID() -> void {
#undef jp
#undef op
#undef br
#undef vu
#undef OP
#undef RD

View File

@ -1,4 +1,4 @@
auto RSP::readWord(u32 address, u32& cycles) -> u32 {
auto RSP::readWord(u32 address, Thread& thread) -> u32 {
if(address <= 0x0403'ffff) {
if(address & 0x1000) return imem.read<Word>(address);
else return dmem.read<Word>(address);
@ -67,7 +67,7 @@ auto RSP::ioRead(u32 address) -> u32 {
return data;
}
auto RSP::writeWord(u32 address, u32 data, u32& cycles) -> void {
auto RSP::writeWord(u32 address, u32 data, Thread& thread) -> void {
if(address <= 0x0403'ffff) {
if(address & 0x1000) return recompiler.invalidate(address & 0xfff), imem.write<Word>(address, data);
else return dmem.write<Word>(address, data);
@ -156,7 +156,7 @@ auto RSP::ioWrite(u32 address, u32 data_) -> void {
debugger.ioSCC(Write, address, data);
}
auto RSP::Status::readWord(u32 address, u32& cycles) -> u32 {
auto RSP::Status::readWord(u32 address, Thread& thread) -> u32 {
address = (address & 0x7ffff) >> 2;
n32 data;
@ -177,7 +177,7 @@ auto RSP::Status::readWord(u32 address, u32& cycles) -> u32 {
return data;
}
auto RSP::Status::writeWord(u32 address, u32 data_, u32& cycles) -> void {
auto RSP::Status::writeWord(u32 address, u32 data_, Thread& thread) -> void {
address = (address & 0x7ffff) >> 2;
n32 data = data_;

View File

@ -38,6 +38,7 @@ auto RSP::Recompiler::block(u12 address) -> Block* {
auto size = measure(address);
auto hashcode = hash(address, size);
hashcode ^= self.pipeline.hash();
BlockHashPair pair;
pair.hashcode = hashcode;
@ -66,14 +67,35 @@ auto RSP::Recompiler::emit(u12 address) -> Block* {
reset();
}
pipeline = self.pipeline;
auto block = (Block*)allocator.acquire(sizeof(Block));
beginFunction(3);
u12 start = address;
bool hasBranched = 0;
while(true) {
pipeline.begin();
u32 instruction = self.imem.read<Word>(address);
OpInfo op0 = self.decoderEXECUTE(instruction);
pipeline.issue(op0);
bool branched = emitEXECUTE(instruction);
if(!pipeline.singleIssue && !branched && u12(address + 4) != start) {
u32 instruction = self.imem.read<Word>(address + 4);
OpInfo op1 = self.decoderEXECUTE(instruction);
if(RSP::canDualIssue(op0, op1)) {
mov32(reg(1), imm(0));
call(&RSP::instructionEpilogue);
address += 4;
pipeline.issue(op1);
branched = emitEXECUTE(instruction);
}
}
pipeline.end();
mov32(reg(1), imm(pipeline.clocks));
call(&RSP::instructionEpilogue);
address += 4;
if(hasBranched || address == start) break;
@ -82,9 +104,13 @@ auto RSP::Recompiler::emit(u12 address) -> Block* {
}
jumpEpilog();
//reset clocks to zero every time block is executed
pipeline.clocks = 0;
memory::jitprotect(false);
block->code = endFunction();
block->size = address - start;
block->pipeline = pipeline;
//print(hex(PC, 8L), " ", instructions, " ", size(), "\n");
return block;

View File

@ -3,6 +3,7 @@
namespace ares::Nintendo64 {
RSP rsp;
#include "decoder.cpp"
#include "dma.cpp"
#include "io.cpp"
#include "interpreter.cpp"
@ -29,12 +30,10 @@ auto RSP::unload() -> void {
}
auto RSP::main() -> void {
if(status.halted) return step(128);
instruction();
}
auto RSP::step(u32 clocks) -> void {
Thread::clock += clocks;
while(Thread::clock < 0) {
if(status.halted) return step(128);
instruction();
}
}
auto RSP::instruction() -> void {
@ -44,18 +43,40 @@ auto RSP::instruction() -> void {
}
if constexpr(Accuracy::RSP::Interpreter) {
pipeline.begin();
pipeline.address = ipu.pc;
pipeline.instruction = imem.read<Word>(pipeline.address);
OpInfo op0 = decoderEXECUTE(pipeline.instruction);
pipeline.issue(op0);
debugger.instruction();
decoderEXECUTE();
instructionEpilogue();
step(3);
interpreterEXECUTE();
if(!pipeline.singleIssue && !op0.branch()) {
u32 instruction = imem.read<Word>(ipu.pc + 4);
OpInfo op1 = decoderEXECUTE(instruction);
if(canDualIssue(op0, op1)) {
instructionEpilogue(0);
pipeline.address = ipu.pc;
pipeline.instruction = instruction;
pipeline.issue(op1);
debugger.instruction();
interpreterEXECUTE();
}
}
pipeline.end();
instructionEpilogue(0);
}
//this handles all stepping for the interpreter
//with the recompiler, it only steps for taken branch stalls
step(pipeline.clocks);
}
auto RSP::instructionEpilogue() -> s32 {
auto RSP::instructionEpilogue(u32 clocks) -> s32 {
if constexpr(Accuracy::RSP::Recompiler) {
step(3);
step(clocks);
}
ipu.r[0].u32 = 0;
@ -63,7 +84,12 @@ auto RSP::instructionEpilogue() -> s32 {
switch(branch.state) {
case Branch::Step: ipu.pc += 4; return status.halted;
case Branch::Take: ipu.pc += 4; branch.delaySlot(); return status.halted;
case Branch::DelaySlot: ipu.pc = branch.pc; branch.reset(); return 1;
case Branch::DelaySlot:
ipu.pc = branch.pc;
branch.reset();
pipeline.stall();
if(branch.pc & 4) pipeline.singleIssue = 1;
return 1;
}
unreachable;
@ -116,7 +142,9 @@ auto RSP::power(bool reset) -> void {
if constexpr(Accuracy::RSP::Recompiler) {
auto buffer = ares::Memory::FixedAllocator::get().tryAcquire(4_MiB);
memory::jitprotect(false);
recompiler.allocator.resize(4_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer);
memory::jitprotect(true);
recompiler.reset();
}

View File

@ -30,16 +30,132 @@ struct RSP : Thread, Memory::RCP<RSP> {
auto unload() -> void;
auto main() -> void;
auto step(u32 clocks) -> void;
auto instruction() -> void;
auto instructionEpilogue() -> s32;
auto instructionEpilogue(u32 clocks) -> s32;
auto power(bool reset) -> void;
struct OpInfo {
enum : u32 {
Load = 1 << 0,
Store = 1 << 1,
Branch = 1 << 2,
Vector = 1 << 3,
VNopGroup = 1 << 4, //dual issue conflicts with VNOP
};
u32 flags;
u32 vfake; //only affects dual issue logic
struct {
u32 use, def;
} r, v, vc;
auto load() const -> bool { return flags & Load; }
auto store() const -> bool { return flags & Store; }
auto branch() const -> bool { return flags & Branch; }
auto vector() const -> bool { return flags & Vector; }
};
static auto canDualIssue(const OpInfo& op0, const OpInfo& op1) -> bool {
return op0.vector() != op1.vector() //must be one SU and one VU
&& !(op0.v.def & (op1.v.use | op1.v.def)) //second op cannot read/write vector registers written by the first
&& !(op0.vc.def & (op1.vc.use | op1.vc.def)) //the same logic applies to vector control registers
//certain instructions conflict due to "fake" uses from misinterpreted fields
//such false conflicts only occur with VNOP if the preceding instruction is MTC2 or LTV
&& !(((op0.flags | ~op1.flags) & OpInfo::VNopGroup) && (op0.v.def & op1.vfake));
}
struct Pipeline {
u32 address;
u32 instruction;
u32 clocks;
u1 singleIssue;
struct Stage {
u1 load;
u32 rWrite;
u32 vWrite;
} previous[3];
struct : Stage {
u1 store;
u1 branch;
u32 rRead;
u32 vRead;
} current;
auto hash() const -> u32 {
Hash::CRC32 hash;
hash.input(u8(singleIssue));
for(auto& p : previous) {
hash.input(u8(p.load));
for(auto n : range(4)) hash.input(u8(p.rWrite >> n * 8));
for(auto n : range(4)) hash.input(u8(p.vWrite >> n * 8));
}
return hash.value();
}
auto begin() -> void {
clocks = 0;
}
auto end() -> void {
readGPR(current.rRead);
readVR(current.vRead);
if(current.store) store();
singleIssue = current.branch;
previous[2] = previous[1];
previous[1] = previous[0];
previous[0] = current;
current = {};
clocks += 3;
}
auto stall() -> void {
previous[2] = previous[1];
previous[1] = previous[0];
previous[0] = {};
clocks += 3;
}
auto issue(const OpInfo& op) -> void {
current.rRead |= op.r.use;
current.rWrite |= op.r.def & ~1; //zero register can't be written
current.vRead |= op.v.use;
current.vWrite |= op.v.def;
current.load |= op.load();
current.store |= op.store();
current.branch |= op.branch();
}
private:
auto readGPR(u32 mask) -> Pipeline& {
if(mask & previous[0].rWrite) {
stall(), stall();
} else if(mask & previous[1].rWrite) {
stall();
}
return *this;
}
auto readVR(u32 mask) -> Pipeline& {
if(mask & previous[0].vWrite) {
stall(), stall(), stall();
} else if(mask & previous[1].vWrite) {
stall(), stall();
} else if(mask & previous[2].vWrite) {
stall();
}
return *this;
}
auto store() -> void {
while(previous[1].load) {
stall();
}
}
} pipeline;
//dma.cpp
@ -47,8 +163,8 @@ struct RSP : Thread, Memory::RCP<RSP> {
auto dmaTransferStep() -> void;
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
auto ioRead(u32 address) -> u32;
auto ioWrite(u32 address, u32 data) -> void;
@ -80,8 +196,8 @@ struct RSP : Thread, Memory::RCP<RSP> {
Status(RSP& self) : self(self) {}
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
n1 semaphore;
n1 halted = 1;
@ -316,13 +432,22 @@ struct RSP : Thread, Memory::RCP<RSP> {
u16 inverseSquareRoots[512];
//decoder.cpp
auto decoderEXECUTE() -> void;
auto decoderSPECIAL() -> void;
auto decoderREGIMM() -> void;
auto decoderSCC() -> void;
auto decoderVU() -> void;
auto decoderLWC2() -> void;
auto decoderSWC2() -> void;
auto decoderEXECUTE(u32 instruction) const -> OpInfo;
auto decoderSPECIAL(u32 instruction) const -> OpInfo;
auto decoderREGIMM(u32 instruction) const -> OpInfo;
auto decoderSCC(u32 instruction) const -> OpInfo;
auto decoderVU(u32 instruction) const -> OpInfo;
auto decoderLWC2(u32 instruction) const -> OpInfo;
auto decoderSWC2(u32 instruction) const -> OpInfo;
//interpreter.cpp
auto interpreterEXECUTE() -> void;
auto interpreterSPECIAL() -> void;
auto interpreterREGIMM() -> void;
auto interpreterSCC() -> void;
auto interpreterVU() -> void;
auto interpreterLWC2() -> void;
auto interpreterSWC2() -> void;
auto INVALID() -> void;
@ -333,11 +458,13 @@ struct RSP : Thread, Memory::RCP<RSP> {
struct Block {
auto execute(RSP& self) -> void {
self.pipeline = pipeline; //must be updated first so instructionEpilog() can handle taken branch
((void (*)(RSP*, IPU*, VU*))code)(&self, &self.ipu, &self.vpu);
}
u8* code;
u12 size;
Pipeline pipeline; //state at *end* of block excepting taken branch stall
};
struct BlockHashPair {
@ -385,6 +512,7 @@ struct RSP : Thread, Memory::RCP<RSP> {
return s <= e ? smask & emask : smask | emask;
}
Pipeline pipeline;
bump_allocator allocator;
array<Block*[1024]> context;
hashset<BlockHashPair> blocks;

View File

@ -5,6 +5,12 @@ auto RSP::serialize(serializer& s) -> void {
s(pipeline.address);
s(pipeline.instruction);
s(pipeline.singleIssue);
for(auto& p : pipeline.previous) {
s(p.load);
s(p.rWrite);
s(p.vWrite);
}
s(dma.pending);
s(dma.current);

View File

@ -1,4 +1,4 @@
auto SI::readWord(u32 address, u32& cycles) -> u32 {
auto SI::readWord(u32 address, Thread& thread) -> u32 {
if(address <= 0x048f'ffff) return ioRead(address);
if (unlikely(io.ioBusy)) {
@ -54,7 +54,7 @@ auto SI::ioRead(u32 address) -> u32 {
return data;
}
auto SI::writeWord(u32 address, u32 data, u32& cycles) -> void {
auto SI::writeWord(u32 address, u32 data, Thread& thread) -> void {
if(address <= 0x048f'ffff) return ioWrite(address, data);
if(io.ioBusy) return;

View File

@ -25,8 +25,8 @@ struct SI : Memory::RCP<SI> {
//io.cpp
auto ioRead(u32 address) -> u32;
auto ioWrite(u32 address, u32 data) -> void;
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
auto writeFinished() -> void;
auto writeForceFinish() -> void;

View File

@ -1,4 +1,4 @@
static const string SerializerVersion = "v131";
static const string SerializerVersion = "v133.1";
auto System::serialize(bool synchronize) -> serializer {
serializer s;

View File

@ -18,6 +18,17 @@ auto load(Node::System& node, string name) -> bool {
}
auto option(string name, string value) -> bool {
#if defined(VULKAN)
if(name == "Enable GPU acceleration") vulkan.enable = value.boolean();
if(name == "Quality" && value == "SD" ) vulkan.internalUpscale = 1;
if(name == "Quality" && value == "HD" ) vulkan.internalUpscale = 2;
if(name == "Quality" && value == "UHD") vulkan.internalUpscale = 4;
if(name == "Supersampling") vulkan.supersampleScanout = value.boolean();
if(name == "Disable Video Interface Processing") vulkan.disableVideoInterfaceProcessing = value.boolean();
if(name == "Weave Deinterlacing") vulkan.weaveDeinterlacing = value.boolean();
if(vulkan.internalUpscale == 1) vulkan.supersampleScanout = false;
vulkan.outputUpscale = vulkan.supersampleScanout ? 1 : vulkan.internalUpscale;
#endif
return true;
}
@ -38,8 +49,7 @@ auto System::game() -> string {
}
auto System::run() -> void {
while(!vi.refreshed) cpu.main();
vi.refreshed = false;
cpu.main();
}
auto System::load(Node::System& root, string name) -> bool {
@ -57,9 +67,11 @@ auto System::load(Node::System& root, string name) -> bool {
if(name.find("NTSC")) {
information.region = Region::NTSC;
information.videoFrequency = 48'681'812;
}
if(name.find("PAL")) {
information.region = Region::PAL;
information.videoFrequency = 49'656'530;
}
node = Node::System::create(information.name);
@ -90,6 +102,9 @@ auto System::load(Node::System& root, string name) -> bool {
rsp.load(node);
rdp.load(node);
if(_DD()) dd.load(node);
#if defined(VULKAN)
vulkan.load(node);
#endif
return true;
}
@ -97,6 +112,9 @@ auto System::unload() -> void {
if(!node) return;
save();
if(vi.screen) vi.screen->quit(); //stop video thread
#if defined(VULKAN)
vulkan.unload();
#endif
cartridgeSlot.unload();
controllerPort1.unload();
controllerPort2.unload();

View File

@ -8,6 +8,7 @@ struct System {
auto region() const -> Region { return information.region; }
auto _DD() const -> bool { return information.dd; }
auto frequency() const -> u32 { return information.frequency; }
auto videoFrequency() const -> u32 { return information.videoFrequency; }
//system.cpp
auto game() -> string;
@ -26,6 +27,7 @@ private:
string name = "Nintendo 64";
Region region = Region::NTSC;
u32 frequency = 93'750'000 * 2;
u32 videoFrequency = 48'681'812;
bool dd = false;
} information;

View File

@ -1,4 +1,4 @@
auto VI::readWord(u32 address, u32& cycles) -> u32 {
auto VI::readWord(u32 address, Thread& thread) -> u32 {
address = (address & 0xfffff) >> 2;
n32 data;
@ -96,7 +96,7 @@ auto VI::readWord(u32 address, u32& cycles) -> u32 {
return data;
}
auto VI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
auto VI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
address = (address & 0xfffff) >> 2;
n32 data = data_;

View File

@ -11,7 +11,6 @@ VI vi;
bool BobDeinterlace = false;
bool FastVI = false;
u32* OutFrameBuffer;
auto VI::load(Node::Object parent) -> void {
node = parent->append<Node::Object>("VI");
@ -19,6 +18,12 @@ auto VI::load(Node::Object parent) -> void {
u32 width = 640;
u32 height = 576;
#if defined(VULKAN)
if (vulkan.enable) {
width *= vulkan.outputUpscale;
height *= vulkan.outputUpscale;
}
#endif
screen = node->append<Node::Video::Screen>("Screen", width, height);
screen->setRefresh({&VI::refresh, this});
screen->colors((1 << 24) + (1 << 15), [&](n32 color) -> n64 {
@ -36,8 +41,19 @@ auto VI::load(Node::Object parent) -> void {
return a << 48 | r << 32 | g << 16 | b << 0;
}
});
#if defined(VULKAN)
if(vulkan.enable) {
screen->setSize(vulkan.outputUpscale * 640, vulkan.outputUpscale * 480);
if(!vulkan.supersampleScanout) {
screen->setScale(1.0 / vulkan.outputUpscale, 1.0 / vulkan.outputUpscale);
}
} else {
screen->setSize(640, 480);
}
#else
screen->setSize(640, 480);
#endif
debugger.load(node);
}
@ -50,30 +66,125 @@ auto VI::unload() -> void {
}
auto VI::main() -> void {
//field is not compared
if(io.vcounter << 1 == io.coincidence) {
mi.raise(MI::IRQ::VI);
while(Thread::clock < 0) {
if(++io.vcounter >= (Region::NTSC() ? 262 : 312) + io.field) {
io.vcounter = 0;
io.field = io.field + 1 & io.serrate;
#if defined(VULKAN)
if (vulkan.enable) {
gpuOutputValid = vulkan.scanoutAsync(io.field);
vulkan.frame();
}
#endif
angrylion::UpdateScreen(FastVI);
angrylion::FinalizeFrame(BobDeinterlace);
refreshed = true;
#if false
screen->frame();
#endif
}
//field is not compared
if(io.vcounter << 1 == io.coincidence) {
mi.raise(MI::IRQ::VI);
}
if(Region::NTSC()) step(system.frequency() / 60 / 262);
if(Region::PAL ()) step(system.frequency() / 50 / 312);
}
if(++io.vcounter >= (Region::NTSC() ? 262 : 312) + io.field) {
io.vcounter = 0;
io.field = io.field + 1 & io.serrate;
angrylion::UpdateScreen(FastVI);
refresh();
}
if(Region::NTSC()) step(system.frequency() / 60 / 262);
if(Region::PAL ()) step(system.frequency() / 50 / 312);
}
auto VI::step(u32 clocks) -> void {
Thread::clock += clocks;
}
auto VI::refresh() -> void {
angrylion::FinalizeFrame(BobDeinterlace);
refreshed = true;
#if defined(VULKAN)
if(vulkan.enable && gpuOutputValid) {
const u8* rgba = nullptr;
u32 width = 0, height = 0;
vulkan.mapScanoutRead(rgba, width, height);
if(rgba) {
screen->setViewport(0, 0, width, height);
for(u32 y : range(height)) {
u32 y_fix = y;
// When weave interlacing is active, we need to fix the order of interleaved lines for the image output
// but only when the VI is set to interlance and we don't use supersampling (causes severe bugs)
// Otherwise proceed as normal
if(io.serrate == 1 && vulkan.weaveDeinterlacing && !vulkan.supersampleScanout) y_fix = (y % 2 == 0)? y+1 : y-1; // Swap each even/odd line
auto source = rgba + width * y_fix * sizeof(u32);
auto target = screen->pixels(1).data() + y * vulkan.outputUpscale * 640;
for(u32 x : range(width)) {
target[x] = source[x * 4 + 0] << 16 | source[x * 4 + 1] << 8 | source[x * 4 + 2] << 0;
}
}
} else {
screen->setViewport(0, 0, 1, 1);
screen->pixels(1).data()[0] = 0;
}
vulkan.unmapScanoutRead();
vulkan.endScanout();
return;
}
#endif
if(io.serrate == 0) screen->setProgressive(0);
if(io.serrate == 1) screen->setInterlace(!io.field);
u32 hscan_start = Region::NTSC() ? 108 : 128;
u32 vscan_start = Region::NTSC() ? 34 : 44;
u32 hscan_len = Region::NTSC() ? 640 : 640;
u32 vscan_len = Region::NTSC() ? 480 : 576;
u32 hscan_stop = hscan_start + hscan_len;
u32 vscan_stop = vscan_start + vscan_len;
screen->setViewport(0, 0, hscan_len, vscan_len);
i32 dy0 = vi.io.vstart;
i32 dy1 = vi.io.vend; if (dy1 < dy0) dy1 = vscan_stop;
i32 dx0 = vi.io.hstart;
i32 dx1 = vi.io.hend;
dy0 = max(vscan_start, dy0);
dy1 = min(vscan_stop, dy1);
dx0 = max(hscan_start, dx0);
dx1 = min(hscan_stop, dx1);
// Undocumented VI guard-band "hardware bug" (match parallel-RDP)
if(dx0 >= hscan_start) dx0 += 8;
if(dx1 < hscan_stop) dx1 -= 7;
u32 pitch = vi.io.width;
if(vi.io.colorDepth == 2) {
//15bpp
u32 y0 = vi.io.ysubpixel + vi.io.yscale * (dy0 - vi.io.vstart);
for(i32 dy = dy0; dy < dy1; dy++) {
if(!io.serrate || (dy & 1) == !io.field) {
u32 address = vi.io.dramAddress + (y0 >> 11) * pitch * 2;
auto line = screen->pixels(1).data() + (dy - vscan_start) * hscan_len;
u32 x0 = vi.io.xsubpixel + vi.io.xscale * (dx0 - vi.io.hstart);
for(i32 dx = dx0; dx < dx1; dx++) {
u16 data = rdram.ram.read<Half>(address + (x0 >> 10) * 2);
line[dx - hscan_start] = 1 << 24 | data >> 1;
x0 += vi.io.xscale;
}
}
y0 += vi.io.yscale;
}
}
if(vi.io.colorDepth == 3) {
//24bpp
u32 y0 = vi.io.ysubpixel + vi.io.yscale * (dy0 - vi.io.vstart);
for(i32 dy = dy0; dy < dy1; dy++) {
if(!io.serrate || (dy & 1) == !io.field) {
u32 address = vi.io.dramAddress + (y0 >> 11) * pitch * 4;
auto line = screen->pixels(1).data() + (dy - vscan_start) * hscan_len;
u32 x0 = vi.io.xsubpixel + vi.io.xscale * (dx0 - vi.io.hstart);
for(i32 dx = dx0; dx < dx1; dx++) {
u32 data = rdram.ram.read<Word>(address + (x0 >> 10) * 4);
line[dx - hscan_start] = data >> 8;
x0 += vi.io.xscale;
}
}
y0 += vi.io.yscale;
}
}
}
auto VI::power(bool reset) -> void {
@ -81,6 +192,10 @@ auto VI::power(bool reset) -> void {
screen->power();
io = {};
refreshed = false;
#if defined(VULKAN)
gpuOutputValid = false;
#endif
}
}

View File

@ -19,13 +19,12 @@ struct VI : Thread, Memory::RCP<VI> {
auto unload() -> void;
auto main() -> void;
auto step(u32 clocks) -> void;
auto refresh() -> void;
auto power(bool reset) -> void;
//io.cpp
auto readWord(u32 address, u32& cycles) -> u32;
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
auto readWord(u32 address, Thread& thread) -> u32;
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;