diff --git a/Assets/dll/ares64.wbx.gz b/Assets/dll/ares64.wbx.gz index 8c244c044c..8228b014a8 100644 Binary files a/Assets/dll/ares64.wbx.gz and b/Assets/dll/ares64.wbx.gz differ diff --git a/waterbox/ares64/ares/ares/ares/ares.hpp b/waterbox/ares64/ares/ares/ares/ares.hpp index c6dbd1638f..705ac53ddf 100644 --- a/waterbox/ares64/ares/ares/ares/ares.hpp +++ b/waterbox/ares64/ares/ares/ares/ares.hpp @@ -40,7 +40,7 @@ using namespace nall; namespace ares { static const string Name = "ares"; - static const string Version = "128"; + static const string Version = "129"; static const string Copyright = "ares team, Near"; static const string License = "ISC"; static const string LicenseURI = "https://opensource.org/licenses/ISC"; @@ -49,7 +49,7 @@ namespace ares { //incremented only when serialization format changes static const u32 SerializerSignature = 0x31545342; //"BST1" (little-endian) - static const string SerializerVersion = "128"; + static const string SerializerVersion = "129"; namespace VFS { using Pak = shared_pointer; diff --git a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp index 1ec6f171d1..3c7142da8f 100644 --- a/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp +++ b/waterbox/ares64/ares/ares/ares/node/debugger/tracer/instruction.hpp @@ -28,7 +28,7 @@ struct Instruction : Tracer { } auto address(u64 address) -> bool { - address &= (u128(1) << _addressBits) - 1; //mask upper bits of address + address &= ~0ull >> (64 - _addressBits); //mask upper bits of address _address = address; /*address >>= _addressMask; //clip unneeded alignment bits (to reduce _masks size) @@ -57,7 +57,7 @@ struct Instruction : Tracer { //call when writing to executable RAM to support self-modifying code. auto invalidate(u64 address) -> void { /*if(unlikely(_mask && updateMasks())) { - address &= (u128(1) << _addressBits) - 1; + address &= ~0ull >> (64 - _addressBits); address >>= _addressMask; _masks[address >> 3] &= ~(1 << (address & 7)); }*/ @@ -72,6 +72,7 @@ struct Instruction : Tracer { ); _omitted = 0; } + string output{ _component, " ", hex(_address, _addressBits + 3 >> 2), " ", @@ -103,7 +104,7 @@ struct Instruction : Tracer { protected: auto updateMasks() -> bool { - auto size = 1ull << _addressBits >> _addressMask >> 3; + auto size = 1ull << (_addressBits - _addressMask - 3); if(!_mask || !size) return _masks.reset(), false; if(_masks.size() == size) return true; _masks.reset(); diff --git a/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp b/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp index 01abd89d82..79419be879 100644 --- a/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp +++ b/waterbox/ares64/ares/ares/ares/scheduler/thread.cpp @@ -84,7 +84,8 @@ inline auto Thread::synchronize() -> void { template inline auto Thread::synchronize(Thread& thread, P&&... p) -> void { //switching to another thread does not guarantee it will catch up before switching back. - while(thread.clock() < clock()) { + //make sure not to switch to threads that were destroyed during synchronization + while(thread.clock() < clock() && thread.handle()) { //disable synchronization for auxiliary threads during scheduler synchronization. //synchronization can begin inside of this while loop. if(scheduler.synchronizing()) break; diff --git a/waterbox/ares64/ares/ares/n64/accuracy.hpp b/waterbox/ares64/ares/ares/n64/accuracy.hpp index a07c845355..f5c54e1be2 100644 --- a/waterbox/ares64/ares/ares/n64/accuracy.hpp +++ b/waterbox/ares64/ares/ares/n64/accuracy.hpp @@ -15,7 +15,7 @@ struct Accuracy { static constexpr bool Recompiler = !Interpreter; //VU instructions - static constexpr bool SISD = 0 | Reference | !Architecture::amd64 | !Architecture::sse41; + static constexpr bool SISD = 0;// | Reference | !Architecture::amd64 | !Architecture::sse41; static constexpr bool SIMD = !SISD; }; diff --git a/waterbox/ares64/ares/ares/n64/cpu/context.cpp b/waterbox/ares64/ares/ares/n64/cpu/context.cpp index 467b6acf1b..770cfd76a5 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/context.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/context.cpp @@ -18,7 +18,8 @@ auto CPU::Context::setMode() -> void { break; } - if(bits == 32 || bits == 64) { + if(bits == 32) { + physMask = 0x1fff'ffff; segment[0] = Segment::Mapped; segment[1] = Segment::Mapped; segment[2] = Segment::Mapped; @@ -47,6 +48,7 @@ auto CPU::Context::setMode() -> void { } if(bits == 64) { + physMask = 0x7fff'ffff; for(auto n : range(8)) switch(mode) { case Mode::Kernel: diff --git a/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp b/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp index d3424072e5..5a7a9ac0be 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp +++ b/waterbox/ares64/ares/ares/n64/cpu/cpu.hpp @@ -95,6 +95,7 @@ struct CPU : Thread { auto setMode() -> void; bool endian; + u64 physMask; u32 mode; u32 bits; u32 segment[8]; //512_MiB chunks @@ -167,9 +168,8 @@ struct CPU : Thread { }; //tlb.cpp - auto load(u32 address) -> Match; - auto store(u32 address) -> Match; - auto exception(u32 address) -> void; + auto load(u64 vaddr) -> Match; + auto store(u64 vaddr) -> Match; struct Entry { //scc-tlb.cpp @@ -195,20 +195,20 @@ struct CPU : Thread { } tlb{*this}; //memory.cpp - auto kernelSegment32(u32 address) const -> Context::Segment; - auto supervisorSegment32(u32 address) const -> Context::Segment; - auto userSegment32(u32 address) const -> Context::Segment; + auto kernelSegment32(u32 vaddr) const -> Context::Segment; + auto supervisorSegment32(u32 vaddr) const -> Context::Segment; + auto userSegment32(u32 vaddr) const -> Context::Segment; - auto kernelSegment64(u64 address) const -> Context::Segment; - auto supervisorSegment64(u64 address) const -> Context::Segment; - auto userSegment64(u64 address) const -> Context::Segment; + auto kernelSegment64(u64 vaddr) const -> Context::Segment; + auto supervisorSegment64(u64 vaddr) const -> Context::Segment; + auto userSegment64(u64 vaddr) const -> Context::Segment; - auto segment(u64 address) -> Context::Segment; - auto devirtualize(u64 address) -> maybe; - auto fetch(u64 address) -> u32; - template auto read(u64 address) -> maybe; - template auto write(u64 address, u64 data) -> bool; - auto addressException(u64 address) -> void; + auto segment(u64 vaddr) -> Context::Segment; + auto devirtualize(u64 vaddr) -> maybe; + auto fetch(u64 vaddr) -> u32; + template auto read(u64 vaddr) -> maybe; + template auto write(u64 vaddr, u64 data) -> bool; + auto addressException(u64 vaddr) -> void; //serialization.cpp auto serialize(serializer&) -> void; @@ -522,6 +522,9 @@ struct CPU : Thread { //30: Error Exception Program Counter n64 epcError; + + //other + n64 latch; } scc; //interpreter-scc.cpp diff --git a/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp b/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp index 403172ccd2..4ec2409691 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/dcache.cpp @@ -1,12 +1,12 @@ auto CPU::DataCache::Line::hit(u32 address) const -> bool { - return valid && tag == (address & ~0xe000'0fff); + return valid && tag == (address & ~0x0000'0fff); } template auto CPU::DataCache::Line::fill(u32 address, u64 data) -> void { cpu.step(40); valid = 1; dirty = 1; - tag = address & ~0xe000'0fff; + tag = address & ~0x0000'0fff; //read words according to critical doubleword first scheme switch(address & 8) { case 0: @@ -34,7 +34,7 @@ auto CPU::DataCache::Line::fill(u32 address) -> void { cpu.step(40); valid = 1; dirty = 0; - tag = address & ~0xe000'0fff; + tag = address & ~0x0000'0fff; //read words according to critical doubleword first scheme switch(address & 8) { case 0: diff --git a/waterbox/ares64/ares/ares/n64/cpu/icache.cpp b/waterbox/ares64/ares/ares/n64/cpu/icache.cpp index 9c5d9218f3..b8115c374b 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/icache.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/icache.cpp @@ -1,11 +1,11 @@ auto CPU::InstructionCache::Line::hit(u32 address) const -> bool { - return valid && tag == (address & ~0xe000'0fff); + return valid && tag == (address & ~0x0000'0fff); } auto CPU::InstructionCache::Line::fill(u32 address) -> void { cpu.step(48); valid = 1; - tag = address & ~0xe000'0fff; + tag = address & ~0x0000'0fff; words[0] = bus.read(tag | index | 0x00); words[1] = bus.read(tag | index | 0x04); words[2] = bus.read(tag | index | 0x08); @@ -42,7 +42,7 @@ auto CPU::InstructionCache::step(u32 address) -> void { if(!line.hit(address)) { cpu.step(48); line.valid = 1; - line.tag = address & ~0xe000'0fff; + line.tag = address & ~0x0000'0fff; } else { cpu.step(2); } diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp index 3b7271fb58..9137f9a768 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/interpreter-ipu.cpp @@ -119,8 +119,8 @@ auto CPU::BREAK() -> void { } auto CPU::CACHE(u8 operation, cr64& rs, s16 imm) -> void { - u32 address = rs.u64 + imm; - if (auto phys = devirtualize(address)) address = *phys; + u32 address; + if (auto phys = devirtualize(rs.u64 + imm)) address = *phys; else return; switch(operation) { @@ -394,88 +394,88 @@ auto CPU::LD(r64& rt, cr64& rs, s16 imm) -> void { auto CPU::LDL(r64& rt, cr64& rs, s16 imm) -> void { if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); - u64 address = rs.u64 + imm; + u64 vaddr = rs.u64 + imm; u64 data = rt.u64; if(context.littleEndian()) - switch(address & 7) { + switch(vaddr & 7) { case 0: data &= 0x00ffffffffffffffull; - if(auto byte = read(address & ~7 | 7)) data |= byte() << 56; else return; + if(auto byte = read(vaddr & ~7 | 7)) data |= byte() << 56; else return; break; case 1: data &= 0x0000ffffffffffffull; - if(auto half = read(address & ~7 | 6)) data |= half() << 48; else return; + if(auto half = read(vaddr & ~7 | 6)) data |= half() << 48; else return; break; case 2: data &= 0x000000ffffffffffull; - if(auto byte = read(address & ~7 | 5)) data |= byte() << 56; else return; - if(auto half = read(address & ~7 | 6)) data |= half() << 40; else return; + if(auto byte = read(vaddr & ~7 | 5)) data |= byte() << 56; else return; + if(auto half = read(vaddr & ~7 | 6)) data |= half() << 40; else return; break; case 3: data &= 0x00000000ffffffffull; - if(auto word = read(address & ~7 | 4)) data |= word() << 32; else return; + if(auto word = read(vaddr & ~7 | 4)) data |= word() << 32; else return; break; case 4: data &= 0x0000000000ffffffull; - if(auto byte = read(address & ~7 | 3)) data |= byte() << 56; else return; - if(auto word = read(address & ~7 | 4)) data |= word() << 24; else return; + if(auto byte = read(vaddr & ~7 | 3)) data |= byte() << 56; else return; + if(auto word = read(vaddr & ~7 | 4)) data |= word() << 24; else return; break; case 5: data &= 0x000000000000ffffull; - if(auto half = read(address & ~7 | 2)) data |= half() << 48; else return; - if(auto word = read(address & ~7 | 4)) data |= word() << 16; else return; + if(auto half = read(vaddr & ~7 | 2)) data |= half() << 48; else return; + if(auto word = read(vaddr & ~7 | 4)) data |= word() << 16; else return; break; case 6: data &= 0x00000000000000ffull; - if(auto byte = read(address & ~7 | 1)) data |= byte() << 56; else return; - if(auto half = read(address & ~7 | 2)) data |= half() << 40; else return; - if(auto word = read(address & ~7 | 4)) data |= word() << 8; else return; + if(auto byte = read(vaddr & ~7 | 1)) data |= byte() << 56; else return; + if(auto half = read(vaddr & ~7 | 2)) data |= half() << 40; else return; + if(auto word = read(vaddr & ~7 | 4)) data |= word() << 8; else return; break; case 7: data &= 0x0000000000000000ull; - if(auto dual = read(address & ~7 | 0)) data |= dual() << 0; else return; + if(auto dual = read(vaddr & ~7 | 0)) data |= dual() << 0; else return; break; } if(context.bigEndian()) - switch(address & 7) { + switch(vaddr & 7) { case 0: data &= 0x0000000000000000ull; - if(auto dual = read(address & ~7 | 0)) data |= dual() << 0; else return; + if(auto dual = read(vaddr & ~7 | 0)) data |= dual() << 0; else return; break; case 1: data &= 0x00000000000000ffull; - if(auto byte = read(address & ~7 | 1)) data |= byte() << 56; else return; - if(auto half = read(address & ~7 | 2)) data |= half() << 40; else return; - if(auto word = read(address & ~7 | 4)) data |= word() << 8; else return; + if(auto byte = read(vaddr & ~7 | 1)) data |= byte() << 56; else return; + if(auto half = read(vaddr & ~7 | 2)) data |= half() << 40; else return; + if(auto word = read(vaddr & ~7 | 4)) data |= word() << 8; else return; break; case 2: data &= 0x000000000000ffffull; - if(auto half = read(address & ~7 | 2)) data |= half() << 48; else return; - if(auto word = read(address & ~7 | 4)) data |= word() << 16; else return; + if(auto half = read(vaddr & ~7 | 2)) data |= half() << 48; else return; + if(auto word = read(vaddr & ~7 | 4)) data |= word() << 16; else return; break; case 3: data &= 0x0000000000ffffffull; - if(auto byte = read(address & ~7 | 3)) data |= byte() << 56; else return; - if(auto word = read(address & ~7 | 4)) data |= word() << 24; else return; + if(auto byte = read(vaddr & ~7 | 3)) data |= byte() << 56; else return; + if(auto word = read(vaddr & ~7 | 4)) data |= word() << 24; else return; break; case 4: data &= 0x00000000ffffffffull; - if(auto word = read(address & ~7 | 4)) data |= word() << 32; else return; + if(auto word = read(vaddr & ~7 | 4)) data |= word() << 32; else return; break; case 5: data &= 0x000000ffffffffffull; - if(auto byte = read(address & ~7 | 5)) data |= byte() << 56; else return; - if(auto half = read(address & ~7 | 6)) data |= half() << 40; else return; + if(auto byte = read(vaddr & ~7 | 5)) data |= byte() << 56; else return; + if(auto half = read(vaddr & ~7 | 6)) data |= half() << 40; else return; break; case 6: data &= 0x0000ffffffffffffull; - if(auto half = read(address & ~7 | 6)) data |= half() << 48; else return; + if(auto half = read(vaddr & ~7 | 6)) data |= half() << 48; else return; break; case 7: data &= 0x00ffffffffffffffull; - if(auto byte = read(address & ~7 | 7)) data |= byte() << 56; else return; + if(auto byte = read(vaddr & ~7 | 7)) data |= byte() << 56; else return; break; } @@ -484,88 +484,88 @@ auto CPU::LDL(r64& rt, cr64& rs, s16 imm) -> void { auto CPU::LDR(r64& rt, cr64& rs, s16 imm) -> void { if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); - u64 address = rs.u64 + imm; + u64 vaddr = rs.u64 + imm; u64 data = rt.u64; if(context.littleEndian()) - switch(address & 7) { + switch(vaddr & 7) { case 0: data &= 0x0000000000000000ull; - if(auto dual = read(address & ~7 | 0)) data |= dual() << 0; else return; + if(auto dual = read(vaddr & ~7 | 0)) data |= dual() << 0; else return; break; case 1: data &= 0xff00000000000000ull; - if(auto word = read(address & ~7 | 0)) data |= word() << 24; else return; - if(auto half = read(address & ~7 | 4)) data |= half() << 8; else return; - if(auto byte = read(address & ~7 | 6)) data |= byte() << 0; else return; + if(auto word = read(vaddr & ~7 | 0)) data |= word() << 24; else return; + if(auto half = read(vaddr & ~7 | 4)) data |= half() << 8; else return; + if(auto byte = read(vaddr & ~7 | 6)) data |= byte() << 0; else return; break; case 2: data &= 0xffff000000000000ull; - if(auto word = read(address & ~7 | 0)) data |= word() << 16; else return; - if(auto half = read(address & ~7 | 4)) data |= half() << 0; else return; + if(auto word = read(vaddr & ~7 | 0)) data |= word() << 16; else return; + if(auto half = read(vaddr & ~7 | 4)) data |= half() << 0; else return; break; case 3: data &= 0xffffff0000000000ull; - if(auto word = read(address & ~7 | 0)) data |= word() << 8; else return; - if(auto byte = read(address & ~7 | 4)) data |= byte() << 0; else return; + if(auto word = read(vaddr & ~7 | 0)) data |= word() << 8; else return; + if(auto byte = read(vaddr & ~7 | 4)) data |= byte() << 0; else return; break; case 4: data &= 0xffffffff00000000ull; - if(auto word = read(address & ~7 | 0)) data |= word() << 0; else return; + if(auto word = read(vaddr & ~7 | 0)) data |= word() << 0; else return; break; case 5: data &= 0xffffffffff000000ull; - if(auto half = read(address & ~7 | 0)) data |= half() << 8; else return; - if(auto byte = read(address & ~7 | 2)) data |= byte() << 0; else return; + if(auto half = read(vaddr & ~7 | 0)) data |= half() << 8; else return; + if(auto byte = read(vaddr & ~7 | 2)) data |= byte() << 0; else return; break; case 6: data &= 0xffffffffffff0000ull; - if(auto half = read(address & ~7 | 0)) data |= half() << 0; else return; + if(auto half = read(vaddr & ~7 | 0)) data |= half() << 0; else return; break; case 7: data &= 0xffffffffffffff00ull; - if(auto byte = read(address & ~7 | 0)) data |= byte() << 0; else return; + if(auto byte = read(vaddr & ~7 | 0)) data |= byte() << 0; else return; break; } if(context.bigEndian()) - switch(address & 7) { + switch(vaddr & 7) { case 0: data &= 0xffffffffffffff00ull; - if(auto byte = read(address & ~7 | 0)) data |= byte() << 0; else return; + if(auto byte = read(vaddr & ~7 | 0)) data |= byte() << 0; else return; break; case 1: data &= 0xffffffffffff0000ull; - if(auto half = read(address & ~7 | 0)) data |= half() << 0; else return; + if(auto half = read(vaddr & ~7 | 0)) data |= half() << 0; else return; break; case 2: data &= 0xffffffffff000000ull; - if(auto half = read(address & ~7 | 0)) data |= half() << 8; else return; - if(auto byte = read(address & ~7 | 2)) data |= byte() << 0; else return; + if(auto half = read(vaddr & ~7 | 0)) data |= half() << 8; else return; + if(auto byte = read(vaddr & ~7 | 2)) data |= byte() << 0; else return; break; case 3: data &= 0xffffffff00000000ull; - if(auto word = read(address & ~7 | 0)) data |= word() << 0; else return; + if(auto word = read(vaddr & ~7 | 0)) data |= word() << 0; else return; break; case 4: data &= 0xffffff0000000000ull; - if(auto word = read(address & ~7 | 0)) data |= word() << 8; else return; - if(auto byte = read(address & ~7 | 4)) data |= byte() << 0; else return; + if(auto word = read(vaddr & ~7 | 0)) data |= word() << 8; else return; + if(auto byte = read(vaddr & ~7 | 4)) data |= byte() << 0; else return; break; case 5: data &= 0xffff000000000000ull; - if(auto word = read(address & ~7 | 0)) data |= word() << 16; else return; - if(auto half = read(address & ~7 | 4)) data |= half() << 0; else return; + if(auto word = read(vaddr & ~7 | 0)) data |= word() << 16; else return; + if(auto half = read(vaddr & ~7 | 4)) data |= half() << 0; else return; break; case 6: data &= 0xff00000000000000ull; - if(auto word = read(address & ~7 | 0)) data |= word() << 24; else return; - if(auto half = read(address & ~7 | 4)) data |= half() << 8; else return; - if(auto byte = read(address & ~7 | 6)) data |= byte() << 0; else return; + if(auto word = read(vaddr & ~7 | 0)) data |= word() << 24; else return; + if(auto half = read(vaddr & ~7 | 4)) data |= half() << 8; else return; + if(auto byte = read(vaddr & ~7 | 6)) data |= byte() << 0; else return; break; case 7: data &= 0x0000000000000000ull; - if(auto dual = read(address & ~7 | 0)) data |= dual() << 0; else return; + if(auto dual = read(vaddr & ~7 | 0)) data |= dual() << 0; else return; break; } @@ -582,9 +582,9 @@ auto CPU::LHU(r64& rt, cr64& rs, s16 imm) -> void { auto CPU::LL(r64& rt, cr64& rs, s16 imm) -> void { if(auto address = devirtualize(rs.u64 + imm)) { - if (auto data = read(*address)) { + if (auto data = read(rs.u64 + imm)) { rt.u64 = s32(*data); - scc.ll = (*address & 0x1fff'ffff) >> 4; + scc.ll = *address >> 4; scc.llbit = 1; } } @@ -593,9 +593,9 @@ auto CPU::LL(r64& rt, cr64& rs, s16 imm) -> void { auto CPU::LLD(r64& rt, cr64& rs, s16 imm) -> void { if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); if(auto address = devirtualize(rs.u64 + imm)) { - if (auto data = read(*address)) { + if (auto data = read(rs.u64 + imm)) { rt.u64 = *data; - scc.ll = (*address & 0x1fff'ffff) >> 4; + scc.ll = *address >> 4; scc.llbit = 1; } } @@ -610,110 +610,119 @@ auto CPU::LW(r64& rt, cr64& rs, s16 imm) -> void { } auto CPU::LWL(r64& rt, cr64& rs, s16 imm) -> void { - u64 address = rs.u64 + imm; + u64 vaddr = rs.u64 + imm; u32 data = rt.u32; + auto mem = read(vaddr & ~3); + if (!mem) return; if(context.littleEndian()) - switch(address & 3) { + switch(vaddr & 3) { case 0: data &= 0x00ffffff; - if(auto byte = read(address & ~3 | 3)) data |= byte() << 24; else return; + *mem <<= 24; break; case 1: data &= 0x0000ffff; - if(auto half = read(address & ~3 | 2)) data |= half() << 16; else return; + *mem <<= 16; break; case 2: data &= 0x000000ff; - if(auto byte = read(address & ~3 | 1)) data |= byte() << 24; else return; - if(auto half = read(address & ~3 | 2)) data |= half() << 8; else return; + *mem <<= 8; break; case 3: data &= 0x00000000; - if(auto word = read(address & ~3 | 0)) data |= word() << 0; else return; + *mem <<= 0; break; } if(context.bigEndian()) - switch(address & 3) { + switch(vaddr & 3) { case 0: data &= 0x00000000; - if(auto word = read(address & ~3 | 0)) data |= word() << 0; else return; + *mem <<= 0; break; case 1: data &= 0x000000ff; - if(auto byte = read(address & ~3 | 1)) data |= byte() << 24; else return; - if(auto half = read(address & ~3 | 2)) data |= half() << 8; else return; + *mem <<= 8; break; case 2: data &= 0x0000ffff; - if(auto half = read(address & ~3 | 2)) data |= half() << 16; else return; + *mem <<= 16; break; case 3: data &= 0x00ffffff; - if(auto byte = read(address & ~3 | 3)) data |= byte() << 24; else return; + *mem <<= 24; break; } + data |= *mem; rt.s64 = (s32)data; } auto CPU::LWR(r64& rt, cr64& rs, s16 imm) -> void { - u64 address = rs.u64 + imm; + u64 vaddr = rs.u64 + imm; u32 data = rt.u32; + auto mem = read(vaddr & ~3); + if (!mem) return; if(context.littleEndian()) - switch(address & 3) { + switch(vaddr & 3) { case 0: data &= 0x00000000; - if(auto word = read(address & ~3 | 0)) data |= word() << 0; else return; + *mem >>= 0; + data |= *mem; rt.s64 = (s32)data; break; case 1: data &= 0xff000000; - if(auto half = read(address & ~3 | 0)) data |= half() << 8; else return; - if(auto byte = read(address & ~3 | 2)) data |= byte() << 0; else return; + *mem >>= 8; + data |= *mem; if(context.bits == 32) rt.u32 = data; if(context.bits == 64) rt.s64 = (s32)data; break; case 2: data &= 0xffff0000; - if(auto half = read(address & ~3 | 0)) data |= half() << 0; else return; + *mem >>= 16; + data |= *mem; if(context.bits == 32) rt.u32 = data; if(context.bits == 64) rt.s64 = (s32)data; break; case 3: data &= 0xffffff00; - if(auto byte = read(address & ~3 | 0)) data |= byte() << 0; else return; + *mem >>= 24; + data |= *mem; if(context.bits == 32) rt.u32 = data; if(context.bits == 64) rt.s64 = (s32)data; break; } if(context.bigEndian()) - switch(address & 3) { + switch(vaddr & 3) { case 0: data &= 0xffffff00; - if(auto byte = read(address & ~3 | 0)) data |= byte() << 0; else return; + *mem >>= 24; + data |= *mem; if(context.bits == 32) rt.u32 = data; if(context.bits == 64) rt.s64 = (s32)data; break; case 1: data &= 0xffff0000; - if(auto half = read(address & ~3 | 0)) data |= half() << 0; else return; + *mem >>= 16; + data |= *mem; if(context.bits == 32) rt.u32 = data; if(context.bits == 64) rt.s64 = (s32)data; break; case 2: data &= 0xff000000; - if(auto half = read(address & ~3 | 0)) data |= half() << 8; else return; - if(auto byte = read(address & ~3 | 2)) data |= byte() << 0; else return; + *mem >>= 8; + data |= *mem; if(context.bits == 32) rt.u32 = data; if(context.bits == 64) rt.s64 = (s32)data; break; case 3: data &= 0x00000000; - if(auto word = read(address & ~3 | 0)) data |= word() << 0; else return; + *mem >>= 0; + data |= *mem; rt.s64 = (s32)data; break; } @@ -770,25 +779,21 @@ auto CPU::SB(cr64& rt, cr64& rs, s16 imm) -> void { } auto CPU::SC(r64& rt, cr64& rs, s16 imm) -> void { - if(auto address = devirtualize(rs.u64 + imm)) { - if(scc.llbit) { - scc.llbit = 0; - rt.u64 = write(*address, rt.u32); - } else { - rt.u64 = 0; - } + if(scc.llbit) { + scc.llbit = 0; + rt.u64 = write(rs.u64 + imm, rt.u32); + } else { + rt.u64 = 0; } } auto CPU::SCD(r64& rt, cr64& rs, s16 imm) -> void { if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); - if(auto address = devirtualize(rs.u64 + imm)) { - if(scc.llbit) { - scc.llbit = 0; - rt.u64 = write(*address, rt.u64); - } else { - rt.u64 = 0; - } + if(scc.llbit) { + scc.llbit = 0; + rt.u64 = write(rs.u64 + imm, rt.u64); + } else { + rt.u64 = 0; } } @@ -799,144 +804,144 @@ auto CPU::SD(cr64& rt, cr64& rs, s16 imm) -> void { auto CPU::SDL(cr64& rt, cr64& rs, s16 imm) -> void { if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); - u64 address = rs.u64 + imm; + u64 vaddr = rs.u64 + imm; u64 data = rt.u64; if(context.littleEndian()) - switch(address & 7) { + switch(vaddr & 7) { case 0: - if(!write(address & ~7 | 7, data >> 56)) return; + if(!write(vaddr & ~7 | 7, data >> 56)) return; break; case 1: - if(!write(address & ~7 | 6, data >> 48)) return; + if(!write(vaddr & ~7 | 6, data >> 48)) return; break; case 2: - if(!write(address & ~7 | 5, data >> 56)) return; - if(!write(address & ~7 | 6, data >> 40)) return; + if(!write(vaddr & ~7 | 5, data >> 56)) return; + if(!write(vaddr & ~7 | 6, data >> 40)) return; break; case 3: - if(!write(address & ~7 | 4, data >> 32)) return; + if(!write(vaddr & ~7 | 4, data >> 32)) return; break; case 4: - if(!write(address & ~7 | 3, data >> 56)) return; - if(!write(address & ~7 | 4, data >> 24)) return; + if(!write(vaddr & ~7 | 3, data >> 56)) return; + if(!write(vaddr & ~7 | 4, data >> 24)) return; break; case 5: - if(!write(address & ~7 | 2, data >> 48)) return; - if(!write(address & ~7 | 4, data >> 16)) return; + if(!write(vaddr & ~7 | 2, data >> 48)) return; + if(!write(vaddr & ~7 | 4, data >> 16)) return; break; case 6: - if(!write(address & ~7 | 1, data >> 56)) return; - if(!write(address & ~7 | 2, data >> 40)) return; - if(!write(address & ~7 | 4, data >> 8)) return; + if(!write(vaddr & ~7 | 1, data >> 56)) return; + if(!write(vaddr & ~7 | 2, data >> 40)) return; + if(!write(vaddr & ~7 | 4, data >> 8)) return; break; case 7: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; } if(context.bigEndian()) - switch(address & 7) { + switch(vaddr & 7) { case 0: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; case 1: - if(!write(address & ~7 | 1, data >> 56)) return; - if(!write(address & ~7 | 2, data >> 40)) return; - if(!write(address & ~7 | 4, data >> 8)) return; + if(!write(vaddr & ~7 | 1, data >> 56)) return; + if(!write(vaddr & ~7 | 2, data >> 40)) return; + if(!write(vaddr & ~7 | 4, data >> 8)) return; break; case 2: - if(!write(address & ~7 | 2, data >> 48)) return; - if(!write(address & ~7 | 4, data >> 16)) return; + if(!write(vaddr & ~7 | 2, data >> 48)) return; + if(!write(vaddr & ~7 | 4, data >> 16)) return; break; case 3: - if(!write(address & ~7 | 3, data >> 56)) return; - if(!write(address & ~7 | 4, data >> 24)) return; + if(!write(vaddr & ~7 | 3, data >> 56)) return; + if(!write(vaddr & ~7 | 4, data >> 24)) return; break; case 4: - if(!write(address & ~7 | 4, data >> 32)) return; + if(!write(vaddr & ~7 | 4, data >> 32)) return; break; case 5: - if(!write(address & ~7 | 5, data >> 56)) return; - if(!write(address & ~7 | 6, data >> 40)) return; + if(!write(vaddr & ~7 | 5, data >> 56)) return; + if(!write(vaddr & ~7 | 6, data >> 40)) return; break; case 6: - if(!write(address & ~7 | 6, data >> 48)) return; + if(!write(vaddr & ~7 | 6, data >> 48)) return; break; case 7: - if(!write(address & ~7 | 7, data >> 56)) return; + if(!write(vaddr & ~7 | 7, data >> 56)) return; break; } } auto CPU::SDR(cr64& rt, cr64& rs, s16 imm) -> void { if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction(); - u64 address = rs.u64 + imm; + u64 vaddr = rs.u64 + imm; u64 data = rt.u64; if(context.littleEndian()) - switch(address & 7) { + switch(vaddr & 7) { case 0: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; case 1: - if(!write(address & ~7 | 0, data >> 24)) return; - if(!write(address & ~7 | 4, data >> 8)) return; - if(!write(address & ~7 | 6, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 24)) return; + if(!write(vaddr & ~7 | 4, data >> 8)) return; + if(!write(vaddr & ~7 | 6, data >> 0)) return; break; case 2: - if(!write(address & ~7 | 0, data >> 16)) return; - if(!write(address & ~7 | 4, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 16)) return; + if(!write(vaddr & ~7 | 4, data >> 0)) return; break; case 3: - if(!write(address & ~7 | 0, data >> 8)) return; - if(!write(address & ~7 | 4, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 8)) return; + if(!write(vaddr & ~7 | 4, data >> 0)) return; break; case 4: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; case 5: - if(!write(address & ~7 | 0, data >> 8)) return; - if(!write(address & ~7 | 2, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 8)) return; + if(!write(vaddr & ~7 | 2, data >> 0)) return; break; case 6: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; case 7: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; } if(context.bigEndian()) - switch(address & 7) { + switch(vaddr & 7) { case 0: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; case 1: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; case 2: - if(!write(address & ~7 | 0, data >> 8)) return; - if(!write(address & ~7 | 2, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 8)) return; + if(!write(vaddr & ~7 | 2, data >> 0)) return; break; case 3: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; case 4: - if(!write(address & ~7 | 0, data >> 8)) return; - if(!write(address & ~7 | 4, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 8)) return; + if(!write(vaddr & ~7 | 4, data >> 0)) return; break; case 5: - if(!write(address & ~7 | 0, data >> 16)) return; - if(!write(address & ~7 | 4, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 16)) return; + if(!write(vaddr & ~7 | 4, data >> 0)) return; break; case 6: - if(!write(address & ~7 | 0, data >> 24)) return; - if(!write(address & ~7 | 4, data >> 8)) return; - if(!write(address & ~7 | 6, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 24)) return; + if(!write(vaddr & ~7 | 4, data >> 8)) return; + if(!write(vaddr & ~7 | 6, data >> 0)) return; break; case 7: - if(!write(address & ~7 | 0, data >> 0)) return; + if(!write(vaddr & ~7 | 0, data >> 0)) return; break; } } @@ -999,79 +1004,79 @@ auto CPU::SW(cr64& rt, cr64& rs, s16 imm) -> void { } auto CPU::SWL(cr64& rt, cr64& rs, s16 imm) -> void { - u64 address = rs.u64 + imm; + u64 vaddr = rs.u64 + imm; u32 data = rt.u32; if(context.littleEndian()) - switch(address & 3) { + switch(vaddr & 3) { case 0: - if(!write(address & ~3 | 3, data >> 24)) return; + if(!write(vaddr & ~3 | 3, data >> 24)) return; break; case 1: - if(!write(address & ~3 | 2, data >> 16)) return; + if(!write(vaddr & ~3 | 2, data >> 16)) return; break; case 2: - if(!write(address & ~3 | 1, data >> 24)) return; - if(!write(address & ~3 | 2, data >> 8)) return; + if(!write(vaddr & ~3 | 1, data >> 24)) return; + if(!write(vaddr & ~3 | 2, data >> 8)) return; break; case 3: - if(!write(address & ~3 | 0, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 0)) return; break; } if(context.bigEndian()) - switch(address & 3) { + switch(vaddr & 3) { case 0: - if(!write(address & ~3 | 0, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 0)) return; break; case 1: - if(!write(address & ~3 | 1, data >> 24)) return; - if(!write(address & ~3 | 2, data >> 8)) return; + if(!write(vaddr & ~3 | 1, data >> 24)) return; + if(!write(vaddr & ~3 | 2, data >> 8)) return; break; case 2: - if(!write(address & ~3 | 2, data >> 16)) return; + if(!write(vaddr & ~3 | 2, data >> 16)) return; break; case 3: - if(!write(address & ~3 | 3, data >> 24)) return; + if(!write(vaddr & ~3 | 3, data >> 24)) return; break; } } auto CPU::SWR(cr64& rt, cr64& rs, s16 imm) -> void { - u64 address = rs.u64 + imm; + u64 vaddr = rs.u64 + imm; u32 data = rt.u32; if(context.littleEndian()) - switch(address & 3) { + switch(vaddr & 3) { case 0: - if(!write(address & ~3 | 0, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 0)) return; break; case 1: - if(!write(address & ~3 | 0, data >> 8)) return; - if(!write(address & ~3 | 2, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 8)) return; + if(!write(vaddr & ~3 | 2, data >> 0)) return; break; case 2: - if(!write(address & ~3 | 0, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 0)) return; break; case 3: - if(!write(address & ~3 | 0, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 0)) return; break; } if(context.bigEndian()) - switch(address & 3) { + switch(vaddr & 3) { case 0: - if(!write(address & ~3 | 0, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 0)) return; break; case 1: - if(!write(address & ~3 | 0, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 0)) return; break; case 2: - if(!write(address & ~3 | 0, data >> 8)) return; - if(!write(address & ~3 | 2, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 8)) return; + if(!write(vaddr & ~3 | 2, data >> 0)) return; break; case 3: - if(!write(address & ~3 | 0, data >> 0)) return; + if(!write(vaddr & ~3 | 0, data >> 0)) return; break; } } diff --git a/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp b/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp index b7c6c35c60..277a571452 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/interpreter-scc.cpp @@ -126,11 +126,15 @@ auto CPU::getControlRegister(n5 index) -> u64 { case 30: //error exception program counter data = scc.epcError; break; + default: + data = scc.latch; + break; } return data; } auto CPU::setControlRegister(n5 index, n64 data) -> void { + scc.latch = data; //read-only variables are defined but commented out for documentation purposes switch(index) { case 0: //index @@ -320,6 +324,7 @@ auto CPU::TLBP() -> void { auto& entry = tlb.entry[index]; auto mask = ~entry.pageMask & ~0x1fff; if((entry.virtualAddress & mask) != (scc.tlb.virtualAddress & mask)) continue; + if(entry.region != scc.tlb.region) continue; if(!entry.global[0] || !entry.global[1]) { if(entry.addressSpaceID != scc.tlb.addressSpaceID) continue; } diff --git a/waterbox/ares64/ares/ares/n64/cpu/memory.cpp b/waterbox/ares64/ares/ares/n64/cpu/memory.cpp index c5fb04357e..5efa72943a 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/memory.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/memory.cpp @@ -1,223 +1,225 @@ //32-bit segments -auto CPU::kernelSegment32(u32 address) const -> Context::Segment { - if(address <= 0x7fff'ffff) return Context::Segment::Mapped; //kuseg - if(address <= 0x9fff'ffff) return Context::Segment::Cached; //kseg0 - if(address <= 0xbfff'ffff) return Context::Segment::Direct; //kseg1 - if(address <= 0xdfff'ffff) return Context::Segment::Mapped; //ksseg - if(address <= 0xffff'ffff) return Context::Segment::Mapped; //kseg3 +auto CPU::kernelSegment32(u32 vaddr) const -> Context::Segment { + if(vaddr <= 0x7fff'ffff) return Context::Segment::Mapped; //kuseg + if(vaddr <= 0x9fff'ffff) return Context::Segment::Cached; //kseg0 + if(vaddr <= 0xbfff'ffff) return Context::Segment::Direct; //kseg1 + if(vaddr <= 0xdfff'ffff) return Context::Segment::Mapped; //ksseg + if(vaddr <= 0xffff'ffff) return Context::Segment::Mapped; //kseg3 unreachable; } -auto CPU::supervisorSegment32(u32 address) const -> Context::Segment { - if(address <= 0x7fff'ffff) return Context::Segment::Mapped; //suseg - if(address <= 0xbfff'ffff) return Context::Segment::Unused; - if(address <= 0xdfff'ffff) return Context::Segment::Mapped; //sseg - if(address <= 0xffff'ffff) return Context::Segment::Unused; +auto CPU::supervisorSegment32(u32 vaddr) const -> Context::Segment { + if(vaddr <= 0x7fff'ffff) return Context::Segment::Mapped; //suseg + if(vaddr <= 0xbfff'ffff) return Context::Segment::Unused; + if(vaddr <= 0xdfff'ffff) return Context::Segment::Mapped; //sseg + if(vaddr <= 0xffff'ffff) return Context::Segment::Unused; unreachable; } -auto CPU::userSegment32(u32 address) const -> Context::Segment { - if(address <= 0x7fff'ffff) return Context::Segment::Mapped; //useg - if(address <= 0xffff'ffff) return Context::Segment::Unused; +auto CPU::userSegment32(u32 vaddr) const -> Context::Segment { + if(vaddr <= 0x7fff'ffff) return Context::Segment::Mapped; //useg + if(vaddr <= 0xffff'ffff) return Context::Segment::Unused; unreachable; } //64-bit segments -auto CPU::kernelSegment64(u64 address) const -> Context::Segment { - if(address <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xkuseg - if(address <= 0x3fff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0x4000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xksseg - if(address <= 0x7fff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0x8000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* - if(address <= 0x87ff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0x8800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* - if(address <= 0x8fff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0x9000'0000'ffff'ffffull) return Context::Segment::Direct; //xkphys* - if(address <= 0x97ff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0x9800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* - if(address <= 0x9fff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0xa000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* - if(address <= 0xa7ff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0xa800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* - if(address <= 0xafff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0xb000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* - if(address <= 0xb7ff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0xb800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* - if(address <= 0xbfff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0xc000'00ff'7fff'ffffull) return Context::Segment::Mapped; //xkseg - if(address <= 0xffff'ffff'7fff'ffffull) return Context::Segment::Unused; - if(address <= 0xffff'ffff'9fff'ffffull) return Context::Segment::Cached; //ckseg0 - if(address <= 0xffff'ffff'bfff'ffffull) return Context::Segment::Direct; //ckseg1 - if(address <= 0xffff'ffff'dfff'ffffull) return Context::Segment::Mapped; //ckseg2 - if(address <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Mapped; //ckseg3 +auto CPU::kernelSegment64(u64 vaddr) const -> Context::Segment { + if(vaddr <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xkuseg + if(vaddr <= 0x3fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0x4000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xksseg + if(vaddr <= 0x7fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0x8000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(vaddr <= 0x87ff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0x8800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(vaddr <= 0x8fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0x9000'0000'ffff'ffffull) return Context::Segment::Direct; //xkphys* + if(vaddr <= 0x97ff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0x9800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(vaddr <= 0x9fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0xa000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(vaddr <= 0xa7ff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0xa800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(vaddr <= 0xafff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0xb000'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(vaddr <= 0xb7ff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0xb800'0000'ffff'ffffull) return Context::Segment::Cached; //xkphys* + if(vaddr <= 0xbfff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0xc000'00ff'7fff'ffffull) return Context::Segment::Mapped; //xkseg + if(vaddr <= 0xffff'ffff'7fff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0xffff'ffff'9fff'ffffull) return Context::Segment::Cached; //ckseg0 + if(vaddr <= 0xffff'ffff'bfff'ffffull) return Context::Segment::Direct; //ckseg1 + if(vaddr <= 0xffff'ffff'dfff'ffffull) return Context::Segment::Mapped; //ckseg2 + if(vaddr <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Mapped; //ckseg3 unreachable; } -auto CPU::supervisorSegment64(u64 address) const -> Context::Segment { - if(address <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xsuseg - if(address <= 0x3fff'ffff'ffff'ffffull) return Context::Segment::Unused; - if(address <= 0x4000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xsseg - if(address <= 0xffff'ffff'bfff'ffffull) return Context::Segment::Unused; - if(address <= 0xffff'ffff'dfff'ffffull) return Context::Segment::Mapped; //csseg - if(address <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Unused; +auto CPU::supervisorSegment64(u64 vaddr) const -> Context::Segment { + if(vaddr <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xsuseg + if(vaddr <= 0x3fff'ffff'ffff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0x4000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xsseg + if(vaddr <= 0xffff'ffff'bfff'ffffull) return Context::Segment::Unused; + if(vaddr <= 0xffff'ffff'dfff'ffffull) return Context::Segment::Mapped; //csseg + if(vaddr <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Unused; unreachable; } -auto CPU::userSegment64(u64 address) const -> Context::Segment { - if(address <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xuseg - if(address <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Unused; +auto CPU::userSegment64(u64 vaddr) const -> Context::Segment { + if(vaddr <= 0x0000'00ff'ffff'ffffull) return Context::Segment::Mapped; //xuseg + if(vaddr <= 0xffff'ffff'ffff'ffffull) return Context::Segment::Unused; unreachable; } // -auto CPU::segment(u64 address) -> Context::Segment { - auto segment = context.segment[address >> 29 & 7]; +auto CPU::segment(u64 vaddr) -> Context::Segment { + auto segment = context.segment[vaddr >> 29 & 7]; if(likely(context.bits == 32)) return (Context::Segment)segment; switch(segment) { case Context::Segment::Kernel64: - return kernelSegment64(address); + return kernelSegment64(vaddr); case Context::Segment::Supervisor64: - return supervisorSegment64(address); + return supervisorSegment64(vaddr); case Context::Segment::User64: - return userSegment64(address); + return userSegment64(vaddr); } unreachable; } -auto CPU::devirtualize(u64 address) -> maybe { - switch(context.segment[address >> 29 & 7]) { +auto CPU::devirtualize(u64 vaddr) -> maybe { + switch(segment(vaddr)) { case Context::Segment::Unused: - addressException(address); + addressException(vaddr); exception.addressLoad(); return nothing; case Context::Segment::Mapped: - if(auto match = tlb.load(address)) return match.address; - tlb.exception(address); + if(auto match = tlb.load(vaddr)) return match.address & context.physMask; + addressException(vaddr); return nothing; case Context::Segment::Cached: case Context::Segment::Direct: - return address; + return vaddr & context.physMask; } unreachable; } -auto CPU::fetch(u64 address) -> u32 { - switch(segment(address)) { +auto CPU::fetch(u64 vaddr) -> u32 { + switch(segment(vaddr)) { case Context::Segment::Unused: step(1); - addressException(address); + addressException(vaddr); exception.addressLoad(); return 0; //nop case Context::Segment::Mapped: - if(auto match = tlb.load(address)) { - if(match.cache) return icache.fetch(match.address); + if(auto match = tlb.load(vaddr)) { + if(match.cache) return icache.fetch(match.address & context.physMask); step(1); - return bus.read(match.address); + return bus.read(match.address & context.physMask); } step(1); - tlb.exception(address); + addressException(vaddr); return 0; //nop case Context::Segment::Cached: - return icache.fetch(address); + return icache.fetch(vaddr & context.physMask); case Context::Segment::Direct: step(1); - return bus.read(address); + return bus.read(vaddr & context.physMask); } unreachable; } template -auto CPU::read(u64 address) -> maybe { +auto CPU::read(u64 vaddr) -> maybe { if constexpr(Accuracy::CPU::AddressErrors) { - if(unlikely(address & Size - 1)) { + if(unlikely(vaddr & Size - 1)) { step(1); - addressException(address); + addressException(vaddr); exception.addressLoad(); return nothing; } - if (context.bits == 32 && unlikely((s32)address != address)) { + if (context.bits == 32 && unlikely((s32)vaddr != vaddr)) { step(1); - addressException(address); + addressException(vaddr); exception.addressLoad(); return nothing; } } - switch(segment(address)) { + switch(segment(vaddr)) { case Context::Segment::Unused: step(1); - addressException(address); + addressException(vaddr); exception.addressLoad(); return nothing; case Context::Segment::Mapped: - if(auto match = tlb.load(address)) { - if(match.cache) return dcache.read(match.address); + if(auto match = tlb.load(vaddr)) { + if(match.cache) return dcache.read(match.address & context.physMask); step(1); - return bus.read(match.address); + return bus.read(match.address & context.physMask); } step(1); - tlb.exception(address); + addressException(vaddr); return nothing; case Context::Segment::Cached: - return dcache.read(address); + return dcache.read(vaddr & context.physMask); case Context::Segment::Direct: step(1); - return bus.read(address); + return bus.read(vaddr & context.physMask); } unreachable; } template -auto CPU::write(u64 address, u64 data) -> bool { +auto CPU::write(u64 vaddr, u64 data) -> bool { if constexpr(Accuracy::CPU::AddressErrors) { - if(unlikely(address & Size - 1)) { + if(unlikely(vaddr & Size - 1)) { step(1); - addressException(address); + addressException(vaddr); exception.addressStore(); return false; } - if (context.bits == 32 && unlikely((s32)address != address)) { + if (context.bits == 32 && unlikely((s32)vaddr != vaddr)) { step(1); - addressException(address); + addressException(vaddr); exception.addressStore(); return false; } } - switch(segment(address)) { + switch(segment(vaddr)) { case Context::Segment::Unused: step(1); - addressException(address); + addressException(vaddr); exception.addressStore(); return false; case Context::Segment::Mapped: - if(auto match = tlb.store(address)) { - if(match.cache) return dcache.write(match.address, data), true; + if(auto match = tlb.store(vaddr)) { + if(match.cache) return dcache.write(match.address & context.physMask, data), true; step(1); - return bus.write(match.address, data), true; + return bus.write(match.address & context.physMask, data), true; } step(1); - tlb.exception(address); + addressException(vaddr); return false; case Context::Segment::Cached: - return dcache.write(address, data), true; + return dcache.write(vaddr & context.physMask, data), true; case Context::Segment::Direct: step(1); - return bus.write(address, data), true; + return bus.write(vaddr & context.physMask, data), true; } unreachable; } -auto CPU::addressException(u64 address) -> void { - scc.badVirtualAddress = address; - scc.context.badVirtualAddress = address >> 13; - scc.xcontext.badVirtualAddress = address >> 13; - scc.xcontext.region = address >> 62; +auto CPU::addressException(u64 vaddr) -> void { + scc.badVirtualAddress = vaddr; + scc.tlb.virtualAddress.bit(13,39) = vaddr >> 13; + scc.tlb.region = vaddr >> 62; + scc.context.badVirtualAddress = vaddr >> 13; + scc.xcontext.badVirtualAddress = vaddr >> 13; + scc.xcontext.region = vaddr >> 62; } diff --git a/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp b/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp index 7eda649b49..8c6978e7a5 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/serialization.cpp @@ -8,6 +8,7 @@ auto CPU::serialize(serializer& s) -> void { s(branch.state); s(context.endian); + s(context.physMask); s(context.mode); s(context.bits); s(context.segment); diff --git a/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp b/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp index dd0658c25e..512a841e41 100644 --- a/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp +++ b/waterbox/ares64/ares/ares/n64/cpu/tlb.cpp @@ -1,61 +1,54 @@ -//the N64 TLB is 32-bit only: only the 64-bit XTLB exception vector is used. -auto CPU::TLB::load(u32 address) -> Match { +auto CPU::TLB::load(u64 vaddr) -> Match { for(auto& entry : this->entry) { if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue; - if((u32)(address & entry.addressMaskHi) != (u32)entry.virtualAddress) continue; - bool lo = address & entry.addressSelect; + if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue; + if(vaddr >> 62 != entry.region) continue; + bool lo = vaddr & entry.addressSelect; if(!entry.valid[lo]) { - exception(address); - self.debugger.tlbLoadInvalid(address); + self.addressException(vaddr); + self.debugger.tlbLoadInvalid(vaddr); self.exception.tlbLoadInvalid(); return {false}; } - physicalAddress = entry.physicalAddress[lo] + (address & entry.addressMaskLo); - self.debugger.tlbLoad(address, physicalAddress); + physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo); + self.debugger.tlbLoad(vaddr, physicalAddress); return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; } - exception(address); - self.debugger.tlbLoadMiss(address); + self.addressException(vaddr); + self.debugger.tlbLoadMiss(vaddr); self.exception.tlbLoadMiss(); return {false}; } -auto CPU::TLB::store(u32 address) -> Match { +auto CPU::TLB::store(u64 vaddr) -> Match { for(auto& entry : this->entry) { if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue; - if((u32)(address & entry.addressMaskHi) != (u32)entry.virtualAddress) continue; - bool lo = address & entry.addressSelect; + if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue; + if(vaddr >> 62 != entry.region) continue; + bool lo = vaddr & entry.addressSelect; if(!entry.valid[lo]) { - exception(address); - self.debugger.tlbStoreInvalid(address); + self.addressException(vaddr); + self.debugger.tlbStoreInvalid(vaddr); self.exception.tlbStoreInvalid(); return {false}; } if(!entry.dirty[lo]) { - exception(address); - self.debugger.tlbModification(address); + self.addressException(vaddr); + self.debugger.tlbModification(vaddr); self.exception.tlbModification(); return {false}; } - physicalAddress = entry.physicalAddress[lo] + (address & entry.addressMaskLo); - self.debugger.tlbStore(address, physicalAddress); + physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo); + self.debugger.tlbStore(vaddr, physicalAddress); return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress}; } - exception(address); - self.debugger.tlbStoreMiss(address); + self.addressException(vaddr); + self.debugger.tlbStoreMiss(vaddr); self.exception.tlbStoreMiss(); return {false}; } -auto CPU::TLB::exception(u32 address) -> void { - self.scc.badVirtualAddress = address; - self.scc.tlb.virtualAddress.bit(13,39) = address >> 13; - self.scc.context.badVirtualAddress = address >> 13; - self.scc.xcontext.badVirtualAddress = address >> 13; - self.scc.xcontext.region = 0; -} - auto CPU::TLB::Entry::synchronize() -> void { pageMask = pageMask & (0b101010101010 << 13); pageMask |= pageMask >> 1; diff --git a/waterbox/ares64/ares/ares/n64/rdp/io.cpp b/waterbox/ares64/ares/ares/n64/rdp/io.cpp index 2f9ec6b7b9..da372a4f40 100644 --- a/waterbox/ares64/ares/ares/n64/rdp/io.cpp +++ b/waterbox/ares64/ares/ares/n64/rdp/io.cpp @@ -22,14 +22,14 @@ auto RDP::readWord(u32 address) -> u32 { data.bit( 0) = command.source; data.bit( 1) = command.freeze; data.bit( 2) = command.flush; - data.bit( 3) = 0; //start gclk? + data.bit( 3) = command.startGclk; data.bit( 4) = command.tmemBusy > 0; data.bit( 5) = command.pipeBusy > 0; data.bit( 6) = command.bufferBusy > 0; data.bit( 7) = command.ready; data.bit( 8) = 0; //DMA busy - data.bit( 9) = 0; //end valid - data.bit(10) = 0; //start valid + data.bit( 9) = command.endValid; + data.bit(10) = command.startValid; } if(address == 4) { @@ -62,18 +62,18 @@ auto RDP::writeWord(u32 address, u32 data_) -> void { if(address == 0) { //DPC_START - command.start = data.bit(0,23) & ~7; - command.current = command.start; + if(!command.startValid) command.start = data.bit(0,23) & ~7; + command.startValid = 1; } if(address == 1) { //DPC_END command.end = data.bit(0,23) & ~7; - if(command.end > command.current) { - command.freeze = 0; - render(); - command.ready = 1; + if(command.startValid) { + command.current = command.start; + command.startValid = 0; } + flushCommands(); } if(address == 2) { @@ -84,8 +84,8 @@ auto RDP::writeWord(u32 address, u32 data_) -> void { //DPC_STATUS if(data.bit(0)) command.source = 0; if(data.bit(1)) command.source = 1; - if(data.bit(2)) command.freeze = 0; - //if(data.bit(3)) command.freeze = 1; + if(data.bit(2)) command.freeze = 0, flushCommands(); + if(data.bit(3)) command.freeze = 1; if(data.bit(4)) command.flush = 0; if(data.bit(5)) command.flush = 1; if(data.bit(6)) command.tmemBusy = 0; @@ -172,3 +172,11 @@ auto RDP::IO::writeWord(u32 address, u32 data_) -> void { self.debugger.ioDPS(Write, address, data); } + +auto RDP::flushCommands() -> void { + if(command.freeze) return; + command.pipeBusy = 1; + command.startGclk = 1; + if(command.end > command.current) render(); + command.ready = 1; +} diff --git a/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp b/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp index cba1869201..bdf0228062 100644 --- a/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp +++ b/waterbox/ares64/ares/ares/n64/rdp/rdp.hpp @@ -67,6 +67,7 @@ struct RDP : Thread, Memory::IO { //io.cpp auto readWord(u32 address) -> u32; auto writeWord(u32 address, u32 data) -> void; + auto flushCommands() -> void; //serialization.cpp auto serialize(serializer&) -> void; @@ -82,6 +83,9 @@ struct RDP : Thread, Memory::IO { n1 source; //0 = RDRAM, 1 = DMEM n1 freeze; n1 flush; + n1 startValid; + n1 endValid; + n1 startGclk; n1 ready = 1; } command; diff --git a/waterbox/ares64/ares/ares/n64/rdp/render.cpp b/waterbox/ares64/ares/ares/n64/rdp/render.cpp index ca4d92f469..0abeabf0ec 100644 --- a/waterbox/ares64/ares/ares/n64/rdp/render.cpp +++ b/waterbox/ares64/ares/ares/n64/rdp/render.cpp @@ -616,6 +616,8 @@ auto RDP::syncTile() -> void { //0x29 auto RDP::syncFull() -> void { mi.raise(MI::IRQ::DP); + command.pipeBusy = 0; + command.startGclk = 0; } //0x2a diff --git a/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp b/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp index 9cae254f1e..72def4e527 100644 --- a/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp +++ b/waterbox/ares64/ares/ares/n64/rsp/interpreter-vpu.cpp @@ -301,13 +301,57 @@ auto RSP::SDV(cr128& vt, cr32& rs, s8 imm) -> void { template auto RSP::SFV(cr128& vt, cr32& rs, s8 imm) -> void { auto address = rs.u32 + imm * 16; - auto start = e >> 1; - auto end = start + 4; - auto base = address & 15; - address &= ~15; - for(u32 offset = start; offset < end; offset++) { - dmem.write(address + (base & 15), vt.element(offset & 7) >> 7); - base += 4; + auto base = address & 7; + address &= ~7; + switch (e) { + case 0: case 15: + dmem.write(address + (base + 0 & 15), vt.element(0) >> 7); + dmem.write(address + (base + 4 & 15), vt.element(1) >> 7); + dmem.write(address + (base + 8 & 15), vt.element(2) >> 7); + dmem.write(address + (base + 12 & 15), vt.element(3) >> 7); + break; + case 1: + dmem.write(address + (base + 0 & 15), vt.element(6) >> 7); + dmem.write(address + (base + 4 & 15), vt.element(7) >> 7); + dmem.write(address + (base + 8 & 15), vt.element(4) >> 7); + dmem.write(address + (base + 12 & 15), vt.element(5) >> 7); + break; + case 4: + dmem.write(address + (base + 0 & 15), vt.element(1) >> 7); + dmem.write(address + (base + 4 & 15), vt.element(2) >> 7); + dmem.write(address + (base + 8 & 15), vt.element(3) >> 7); + dmem.write(address + (base + 12 & 15), vt.element(0) >> 7); + break; + case 5: + dmem.write(address + (base + 0 & 15), vt.element(7) >> 7); + dmem.write(address + (base + 4 & 15), vt.element(4) >> 7); + dmem.write(address + (base + 8 & 15), vt.element(5) >> 7); + dmem.write(address + (base + 12 & 15), vt.element(6) >> 7); + break; + case 8: + dmem.write(address + (base + 0 & 15), vt.element(4) >> 7); + dmem.write(address + (base + 4 & 15), vt.element(5) >> 7); + dmem.write(address + (base + 8 & 15), vt.element(6) >> 7); + dmem.write(address + (base + 12 & 15), vt.element(7) >> 7); + break; + case 11: + dmem.write(address + (base + 0 & 15), vt.element(3) >> 7); + dmem.write(address + (base + 4 & 15), vt.element(0) >> 7); + dmem.write(address + (base + 8 & 15), vt.element(1) >> 7); + dmem.write(address + (base + 12 & 15), vt.element(2) >> 7); + break; + case 12: + dmem.write(address + (base + 0 & 15), vt.element(5) >> 7); + dmem.write(address + (base + 4 & 15), vt.element(6) >> 7); + dmem.write(address + (base + 8 & 15), vt.element(7) >> 7); + dmem.write(address + (base + 12 & 15), vt.element(4) >> 7); + break; + default: + dmem.write(address + (base + 0 & 15), 0); + dmem.write(address + (base + 4 & 15), 0); + dmem.write(address + (base + 8 & 15), 0); + dmem.write(address + (base + 12 & 15), 0); + break; } } @@ -382,14 +426,14 @@ auto RSP::SSV(cr128& vt, cr32& rs, s8 imm) -> void { template auto RSP::STV(u8 vt, cr32& rs, s8 imm) -> void { auto address = rs.u32 + imm * 16; - auto start = vt; - auto end = min(32, start + 8); - auto element = 8 - (e >> 1); - auto base = (address & 15) + (element << 1); - address &= ~15; + auto start = vt & ~7; + auto end = start + 8; + auto element = 16 - (e & ~1); + auto base = (address & 7) - (e & ~1); + address &= ~7; for(u32 offset = start; offset < end; offset++) { - dmem.writeUnaligned(address + (base & 15), vpu.r[offset].element(element++ & 7)); - base += 2; + dmem.write(address + (base++ & 15), vpu.r[offset].byte(element++ & 15)); + dmem.write(address + (base++ & 15), vpu.r[offset].byte(element++ & 15)); } } @@ -425,15 +469,21 @@ auto RSP::VABS(r128& vd, cr128& vs, cr128& vt) -> void { r128 vte = vt(e); for(u32 n : range(8)) { if(vs.s16(n) < 0) { - if(vte.s16(n) == -32768) vte.s16(n) = -32767; - ACCL.s16(n) = -vte.s16(n); + if(vte.s16(n) == -32768) { + ACCL.s16(n) = -32768; + vd.s16(n) = 32767; + } else { + ACCL.s16(n) = -vte.s16(n); + vd.s16(n) = -vte.s16(n); + } } else if(vs.s16(n) > 0) { ACCL.s16(n) = +vte.s16(n); + vd.s16(n) = +vte.s16(n); } else { ACCL.s16(n) = 0; + vd.s16(n) = 0; } } - vd = ACCL; } if constexpr(Accuracy::RSP::SIMD) { @@ -581,10 +631,14 @@ auto RSP::VCL(r128& vd, cr128& vs, cr128& vt) -> void { if(VCOL.get(n)) { if(VCOH.get(n)) { ACCL.u16(n) = VCCL.get(n) ? -vte.u16(n) : vs.u16(n); - } else if(VCE.get(n)) { - ACCL.u16(n) = VCCL.set(n, vs.u16(n) + vte.u16(n) <= 0xffff) ? -vte.u16(n) : vs.u16(n); } else { - ACCL.u16(n) = VCCL.set(n, vs.u16(n) + vte.u16(n) == 0) ? -vte.u16(n) : vs.u16(n); + u16 sum = vs.u16(n) + vte.u16(n); + bool carry = (vs.u16(n) + vte.u16(n)) != sum; + if(VCE.get(n)) { + ACCL.u16(n) = VCCL.set(n, (!sum || !carry)) ? -vte.u16(n) : vs.u16(n); + } else { + ACCL.u16(n) = VCCL.set(n, (!sum && !carry)) ? -vte.u16(n) : vs.u16(n); + } } } else { if(VCOH.get(n)) { @@ -679,12 +733,11 @@ auto RSP::VEQ(r128& vd, cr128& vs, cr128& vt) -> void { if constexpr(Accuracy::RSP::SISD) { cr128 vte = vt(e); for(u32 n : range(8)) { - ACCL.u16(n) = VCCL.set(n, !VCE.get(n) && vs.u16(n) == vte.u16(n)) ? vs.u16(n) : vte.u16(n); + ACCL.u16(n) = VCCL.set(n, !VCOH.get(n) && vs.u16(n) == vte.u16(n)) ? vs.u16(n) : vte.u16(n); } VCCH = zero; //unverified VCOL = zero; VCOH = zero; - VCE = zero; vd = ACCL; } @@ -707,12 +760,11 @@ auto RSP::VGE(r128& vd, cr128& vs, cr128& vt) -> void { if constexpr(Accuracy::RSP::SISD) { cr128 vte = vt(e); for(u32 n : range(8)) { - ACCL.u16(n) = VCCL.set(n, vs.s16(n) > vte.s16(n) || (vs.s16(n) == vte.s16(n) && (!VCOL.get(n) || VCE.get(n)))) ? vs.u16(n) : vte.u16(n); + ACCL.u16(n) = VCCL.set(n, vs.s16(n) > vte.s16(n) || (vs.s16(n) == vte.s16(n) && (!VCOL.get(n) || !VCOH.get(n)))) ? vs.u16(n) : vte.u16(n); } VCCH = zero; //unverified VCOL = zero; VCOH = zero; - VCE = zero; vd = ACCL; } @@ -738,12 +790,11 @@ auto RSP::VLT(r128& vd, cr128& vs, cr128& vt) -> void { if constexpr(Accuracy::RSP::SISD) { cr128 vte = vt(e); for(u32 n : range(8)) { - ACCL.u16(n) = VCCL.set(n, vs.s16(n) < vte.s16(n) || (vs.s16(n) == vte.s16(n) && VCOL.get(n) && !VCE.get(n))) ? vs.u16(n) : vte.u16(n); + ACCL.u16(n) = VCCL.set(n, vs.s16(n) < vte.s16(n) || (vs.s16(n) == vte.s16(n) && VCOL.get(n) && VCOH.get(n))) ? vs.u16(n) : vte.u16(n); } - VCCH = zero; //unverified + VCCH = zero; VCOL = zero; VCOH = zero; - VCE = zero; vd = ACCL; } @@ -769,7 +820,7 @@ auto RSP::VMACF(r128& vd, cr128& vs, cr128& vt) -> void { if constexpr(Accuracy::RSP::SISD) { cr128 vte = vt(e); for(u32 n : range(8)) { - accumulatorSet(n, accumulatorGet(n) + vs.s16(n) * vte.s16(n) * 2); + accumulatorSet(n, accumulatorGet(n) + (s64)vs.s16(n) * (s64)vte.s16(n) * 2); if constexpr(U == 0) { vd.u16(n) = accumulatorSaturate(n, 1, 0x8000, 0x7fff); } @@ -1099,9 +1150,9 @@ auto RSP::VMULF(r128& vd, cr128& vs, cr128& vt) -> void { if constexpr(Accuracy::RSP::SISD) { cr128 vte = vt(e); for(u32 n : range(8)) { - accumulatorSet(n, vs.s16(n) * vte.s16(n) * 2 + 0x8000); + accumulatorSet(n, (s64)vs.s16(n) * (s64)vte.s16(n) * 2 + 0x8000); if constexpr(U == 0) { - vd.u16(n) = ACCM.u16(n); + vd.u16(n) = accumulatorSaturate(n, 1, 0x8000, 0x7fff); } if constexpr(U == 1) { vd.u16(n) = ACCH.s16(n) < 0 ? 0x0000 : (ACCH.s16(n) ^ ACCM.s16(n)) < 0 ? 0xffff : ACCM.u16(n); @@ -1175,12 +1226,11 @@ auto RSP::VNE(r128& vd, cr128& vs, cr128& vt) -> void { if constexpr(Accuracy::RSP::SISD) { cr128 vte = vt(e); for(u32 n : range(8)) { - ACCL.u16(n) = VCCL.set(n, vs.u16(n) != vte.u16(n) || VCE.get(n)) ? vs.u16(n) : vte.u16(n); + ACCL.u16(n) = VCCL.set(n, vs.u16(n) != vte.u16(n) || VCOH.get(n)) ? vs.u16(n) : vte.u16(n); } VCCH = zero; //unverified VCOL = zero; VCOH = zero; - VCE = zero; vd = ACCL; } diff --git a/waterbox/ares64/ares/thirdparty/angrylion-rdp b/waterbox/ares64/ares/thirdparty/angrylion-rdp index 53dfbd3e6c..fd4173287f 160000 --- a/waterbox/ares64/ares/thirdparty/angrylion-rdp +++ b/waterbox/ares64/ares/thirdparty/angrylion-rdp @@ -1 +1 @@ -Subproject commit 53dfbd3e6c385bf1898de53d7ff176de6c4cffa1 +Subproject commit fd4173287f22271908b82148470e4e1e34707228