merge ares upstream, resolves #3710
This commit is contained in:
parent
8260a59bcf
commit
2e75645cbd
Binary file not shown.
Binary file not shown.
|
@ -184,7 +184,6 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
if (id == "NCR") ret = EEPROM512;
|
||||
if (id == "NEA") ret = EEPROM512;
|
||||
if (id == "NPW") ret = EEPROM512;
|
||||
if (id == "NPM") ret = EEPROM512;
|
||||
if (id == "NPY") ret = EEPROM512;
|
||||
if (id == "NPT") ret = EEPROM512;
|
||||
if (id == "NRA") ret = EEPROM512;
|
||||
|
@ -194,7 +193,6 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
if (id == "NK2") ret = EEPROM512;
|
||||
if (id == "NSV") ret = EEPROM512;
|
||||
if (id == "NFX") ret = EEPROM512;
|
||||
if (id == "NFP") ret = EEPROM512;
|
||||
if (id == "NS6") ret = EEPROM512;
|
||||
if (id == "NNA") ret = EEPROM512;
|
||||
if (id == "NRS") ret = EEPROM512;
|
||||
|
@ -202,7 +200,6 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
if (id == "NSC") ret = EEPROM512;
|
||||
if (id == "NSA") ret = EEPROM512;
|
||||
if (id == "NB6") ret = EEPROM512;
|
||||
if (id == "NSM") ret = EEPROM512;
|
||||
if (id == "NSS") ret = EEPROM512;
|
||||
if (id == "NTX") ret = EEPROM512;
|
||||
if (id == "NT6") ret = EEPROM512;
|
||||
|
@ -215,12 +212,13 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
if (id == "NIR") ret = EEPROM512;
|
||||
if (id == "NVL") ret = EEPROM512;
|
||||
if (id == "NVY") ret = EEPROM512;
|
||||
if (id == "NWR") ret = EEPROM512;
|
||||
if (id == "NWC") ret = EEPROM512;
|
||||
if (id == "NAD") ret = EEPROM512;
|
||||
if (id == "NWU") ret = EEPROM512;
|
||||
if (id == "NYK") ret = EEPROM512;
|
||||
if (id == "NMZ") ret = EEPROM512;
|
||||
if (id == "NSM") ret = EEPROM512;
|
||||
if (id == "NWR") ret = EEPROM512;
|
||||
if (id == "NDK" && region_code == 'J') ret = EEPROM512;
|
||||
if (id == "NWT" && region_code == 'J') ret = EEPROM512;
|
||||
|
||||
|
@ -236,7 +234,6 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
if (id == "NMX") ret = EEPROM2KB;
|
||||
if (id == "NGC") ret = EEPROM2KB;
|
||||
if (id == "NIM") ret = EEPROM2KB;
|
||||
if (id == "NK4") ret = EEPROM2KB;
|
||||
if (id == "NNB") ret = EEPROM2KB;
|
||||
if (id == "NMV") ret = EEPROM2KB;
|
||||
if (id == "NM8") ret = EEPROM2KB;
|
||||
|
@ -248,16 +245,17 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
if (id == "NR7") ret = EEPROM2KB;
|
||||
if (id == "NEP") ret = EEPROM2KB;
|
||||
if (id == "NYS") ret = EEPROM2KB;
|
||||
if (id == "NK4") ret = EEPROM2KB;
|
||||
if (id == "ND3" && region_code == 'J') ret = EEPROM2KB;
|
||||
if (id == "ND4" && region_code == 'J') ret = EEPROM2KB;
|
||||
|
||||
if (id == "NTE") ret = SRAM32KB;
|
||||
if (id == "NVB") ret = SRAM32KB;
|
||||
if (id == "NB5") ret = SRAM32KB;
|
||||
if (id == "CFZ") ret = SRAM32KB;
|
||||
if (id == "NFZ") ret = SRAM32KB;
|
||||
if (id == "NSI") ret = SRAM32KB;
|
||||
if (id == "NG6") ret = SRAM32KB;
|
||||
if (id == "N3H") ret = SRAM32KB;
|
||||
if (id == "NGP") ret = SRAM32KB;
|
||||
if (id == "NYW") ret = SRAM32KB;
|
||||
if (id == "NHY") ret = SRAM32KB;
|
||||
|
@ -278,7 +276,7 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
if (id == "NUM") ret = SRAM32KB;
|
||||
if (id == "NOB") ret = SRAM32KB;
|
||||
if (id == "CPS") ret = SRAM32KB;
|
||||
if (id == "NB5") ret = SRAM32KB;
|
||||
if (id == "NPM") ret = SRAM32KB;
|
||||
if (id == "NRE") ret = SRAM32KB;
|
||||
if (id == "NAL") ret = SRAM32KB;
|
||||
if (id == "NT3") ret = SRAM32KB;
|
||||
|
@ -288,6 +286,7 @@ static inline SaveType DetectSaveType(u8* rom)
|
|||
if (id == "NWL") ret = SRAM32KB;
|
||||
if (id == "NW2") ret = SRAM32KB;
|
||||
if (id == "NWX") ret = SRAM32KB;
|
||||
if (id == "N3H" && region_code == 'J') ret = SRAM32KB;
|
||||
if (id == "NK4" && region_code == 'J' && revision < 2) ret = SRAM32KB;
|
||||
|
||||
if (id == "CDZ") ret = SRAM96KB;
|
||||
|
@ -697,7 +696,7 @@ static u8 PeekFunc(u64 address)
|
|||
}
|
||||
}
|
||||
|
||||
u32 unused = 0;
|
||||
ares::Nintendo64::Thread unused;
|
||||
return ares::Nintendo64::bus.read<ares::Nintendo64::Byte>(addr, unused);
|
||||
}
|
||||
|
||||
|
@ -705,7 +704,7 @@ static void SysBusAccess(u8* buffer, u64 address, u64 count, bool write)
|
|||
{
|
||||
if (write)
|
||||
{
|
||||
u32 unused = 0;
|
||||
ares::Nintendo64::Thread unused;
|
||||
while (count--)
|
||||
ares::Nintendo64::bus.write<ares::Nintendo64::Byte>(address++, *buffer++, unused);
|
||||
}
|
||||
|
|
|
@ -43,7 +43,7 @@ using namespace nall::primitives;
|
|||
|
||||
namespace ares {
|
||||
static const string Name = "ares";
|
||||
static const string Version = "132";
|
||||
static const string Version = "133";
|
||||
static const string Copyright = "ares team, Near";
|
||||
static const string License = "ISC";
|
||||
static const string LicenseURI = "https://opensource.org/licenses/ISC";
|
||||
|
|
|
@ -2,12 +2,15 @@
|
|||
|
||||
namespace ares::Memory {
|
||||
|
||||
constexpr u32 fixedBufferSize = 8_MiB;
|
||||
|
||||
#if defined(PLATFORM_MACOS)
|
||||
//stub for unsupported platforms
|
||||
//dynamic allocation for unsupported platforms
|
||||
FixedAllocator::FixedAllocator() {
|
||||
_allocator.resize(fixedBufferSize, bump_allocator::executable);
|
||||
}
|
||||
#else
|
||||
alignas(4096) u8 fixedBuffer[8_MiB];
|
||||
alignas(4096) u8 fixedBuffer[fixedBufferSize];
|
||||
|
||||
FixedAllocator::FixedAllocator() {
|
||||
_allocator.resize(sizeof(fixedBuffer), 0, fixedBuffer);
|
||||
|
|
|
@ -2,9 +2,15 @@ struct Rumble : Input {
|
|||
DeclareClass(Rumble, "input.rumble")
|
||||
using Input::Input;
|
||||
|
||||
auto enable() const -> bool { return _enable; }
|
||||
auto setEnable(bool enable) -> void { _enable = enable; }
|
||||
auto weakValue() const -> u16 { return _weak; }
|
||||
auto strongValue() const -> u16 { return _strong; }
|
||||
|
||||
auto setValues(u16 weak, u16 strong) -> void { _weak = weak; _strong = strong; }
|
||||
|
||||
// For systems with binary motors
|
||||
auto enable() const -> bool { return _weak > 0 || _strong > 0; }
|
||||
auto setEnable(bool enable) -> void { _weak = enable ? 65535 : 0; _strong = enable ? 65535 : 0; }
|
||||
protected:
|
||||
bool _enable = 0;
|
||||
u16 _weak = 0;
|
||||
u16 _strong = 0;
|
||||
};
|
||||
|
|
|
@ -25,10 +25,12 @@ auto AI::unload() -> void {
|
|||
}
|
||||
|
||||
auto AI::main() -> void {
|
||||
f64 left = 0, right = 0;
|
||||
sample(left, right);
|
||||
stream->frame(left, right);
|
||||
step(dac.period);
|
||||
while(Thread::clock < 0) {
|
||||
f64 left = 0, right = 0;
|
||||
sample(left, right);
|
||||
stream->frame(left, right);
|
||||
step(dac.period);
|
||||
}
|
||||
}
|
||||
|
||||
auto AI::sample(f64& left, f64& right) -> void {
|
||||
|
@ -55,10 +57,6 @@ auto AI::sample(f64& left, f64& right) -> void {
|
|||
}
|
||||
}
|
||||
|
||||
auto AI::step(u32 clocks) -> void {
|
||||
Thread::clock += clocks;
|
||||
}
|
||||
|
||||
auto AI::power(bool reset) -> void {
|
||||
Thread::reset();
|
||||
|
||||
|
|
|
@ -19,12 +19,11 @@ struct AI : Thread, Memory::RCP<AI> {
|
|||
auto unload() -> void;
|
||||
auto main() -> void;
|
||||
auto sample(f64& left, f64& right) -> void;
|
||||
auto step(u32 clocks) -> void;
|
||||
auto power(bool reset) -> void;
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
|
||||
//serialization.cpp
|
||||
auto serialize(serializer&) -> void;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
auto AI::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto AI::readWord(u32 address, Thread& thread) -> u32 {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data;
|
||||
|
||||
|
@ -21,7 +21,7 @@ auto AI::readWord(u32 address, u32& cycles) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto AI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
||||
auto AI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data = data_;
|
||||
|
||||
|
@ -56,7 +56,7 @@ auto AI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
|||
//AI_DACRATE
|
||||
auto frequency = dac.frequency;
|
||||
io.dacRate = data.bit(0,13);
|
||||
dac.frequency = max(1, system.frequency() / 4 / (io.dacRate + 1)) * 1.037;
|
||||
dac.frequency = max(1, system.videoFrequency() / (io.dacRate + 1));
|
||||
dac.period = system.frequency() / dac.frequency;
|
||||
if(frequency != dac.frequency) stream->setFrequency(dac.frequency);
|
||||
}
|
||||
|
|
|
@ -51,7 +51,6 @@ auto CIC::cmdChallenge() -> void {
|
|||
fifo.write(0); // write 0 bit
|
||||
for (auto i : range(30)) fifo.writeNibble(data[i]);
|
||||
state = Run;
|
||||
printf("CIC challenge complete %d\n", fifo.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -238,46 +238,62 @@ auto Gamepad::read() -> n32 {
|
|||
platform->input(start);
|
||||
|
||||
#if false
|
||||
//scale {-32768 ... +32767} to {-85 ... +85}
|
||||
auto ax = x->value() * 85.0 / 32767.0;
|
||||
auto ay = y->value() * 85.0 / 32767.0;
|
||||
auto cardinalMax = 85.0;
|
||||
auto diagonalMax = 69.0;
|
||||
auto innerDeadzone = 7.0; // default should remain 7 (~8.2% of 85) as the deadzone is axial in nature and fights cardinalMax
|
||||
auto outerDeadzoneRadiusMax = 2.0 / sqrt(2.0) * (diagonalMax / cardinalMax * (cardinalMax - innerDeadzone) + innerDeadzone); //from linear scaling equation, substitute outerDeadzoneRadiusMax*sqrt(2)/2 for lengthAbsoluteX and set diagonalMax as the result then solve for outerDeadzoneRadiusMax
|
||||
|
||||
//create inner axial dead-zone in range {-7 ... +7} and scale from it up to outer circular dead-zone of radius 85
|
||||
//scale {-32768 ... +32767} to {-outerDeadzoneRadiusMax ... +outerDeadzoneRadiusMax}
|
||||
auto ax = x->value() * outerDeadzoneRadiusMax / 32767.0;
|
||||
auto ay = y->value() * outerDeadzoneRadiusMax / 32767.0;
|
||||
|
||||
//create inner axial dead-zone in range {-innerDeadzone ... +innerDeadzone} and scale from it up to outer circular dead-zone of radius outerDeadzoneRadiusMax
|
||||
auto length = sqrt(ax * ax + ay * ay);
|
||||
if(length <= 85.0) {
|
||||
if(length <= outerDeadzoneRadiusMax) {
|
||||
auto lengthAbsoluteX = abs(ax);
|
||||
auto lengthAbsoluteY = abs(ay);
|
||||
if(lengthAbsoluteX <= 7.0) {
|
||||
if(lengthAbsoluteX <= innerDeadzone) {
|
||||
lengthAbsoluteX = 0.0;
|
||||
} else {
|
||||
lengthAbsoluteX = (lengthAbsoluteX - 7.0) * 85.0 / (85.0 - 7.0) / lengthAbsoluteX;
|
||||
lengthAbsoluteX = (lengthAbsoluteX - innerDeadzone) * cardinalMax / (cardinalMax - innerDeadzone) / lengthAbsoluteX;
|
||||
}
|
||||
ax *= lengthAbsoluteX;
|
||||
if(lengthAbsoluteY <= 7.0) {
|
||||
if(lengthAbsoluteY <= innerDeadzone) {
|
||||
lengthAbsoluteY = 0.0;
|
||||
} else {
|
||||
lengthAbsoluteY = (lengthAbsoluteY - 7.0) * 85.0 / (85.0 - 7.0) / lengthAbsoluteY;
|
||||
lengthAbsoluteY = (lengthAbsoluteY - innerDeadzone) * cardinalMax / (cardinalMax - innerDeadzone) / lengthAbsoluteY;
|
||||
}
|
||||
ay *= lengthAbsoluteY;
|
||||
} else {
|
||||
length = 85.0 / length;
|
||||
length = outerDeadzoneRadiusMax / length;
|
||||
ax *= length;
|
||||
ay *= length;
|
||||
}
|
||||
|
||||
//bound diagonals to an octagonal range {-69 ... +69}
|
||||
|
||||
//bound diagonals to an octagonal range {-diagonalMax ... +diagonalMax}
|
||||
if(ax != 0.0 && ay != 0.0) {
|
||||
auto slope = ay / ax;
|
||||
auto edgex = copysign(85.0 / (abs(slope) + 16.0 / 69.0), ax);
|
||||
auto edgey = copysign(min(abs(edgex * slope), 85.0 / (1.0 / abs(slope) + 16.0 / 69.0)), ay);
|
||||
auto edgex = copysign(cardinalMax / (abs(slope) + (cardinalMax - diagonalMax) / diagonalMax), ax);
|
||||
auto edgey = copysign(min(abs(edgex * slope), cardinalMax / (1.0 / abs(slope) + (cardinalMax - diagonalMax) / diagonalMax)), ay);
|
||||
edgex = edgey / slope;
|
||||
|
||||
auto scale = sqrt(edgex * edgex + edgey * edgey) / 85.0;
|
||||
ax *= scale;
|
||||
ay *= scale;
|
||||
length = sqrt(ax * ax + ay * ay);
|
||||
auto distanceToEdge = sqrt(edgex * edgex + edgey * edgey);
|
||||
if(length > distanceToEdge) {
|
||||
ax = edgex;
|
||||
ay = edgey;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//keep cardinal input within positive and negative bounds of cardinalMax
|
||||
if(abs(ax) > cardinalMax) ax = copysign(cardinalMax, ax);
|
||||
if(abs(ay) > cardinalMax) ay = copysign(cardinalMax, ay);
|
||||
|
||||
//add epsilon to counteract floating point precision error
|
||||
ax = copysign(abs(ax) + 1e-09, ax);
|
||||
ay = copysign(abs(ay) + 1e-09, ay);
|
||||
#endif
|
||||
|
||||
n32 data;
|
||||
data.byte(0) = y->value();
|
||||
data.byte(1) = x->value();
|
||||
|
@ -297,7 +313,7 @@ auto Gamepad::read() -> n32 {
|
|||
data.bit(29) = z->value();
|
||||
data.bit(30) = b->value();
|
||||
data.bit(31) = a->value();
|
||||
|
||||
|
||||
//when L+R+Start are pressed: the X/Y axes are zeroed, RST is set, and Start is cleared
|
||||
if(l->value() && r->value() && start->value()) {
|
||||
data.byte(0) = 0; //Y-Axis
|
||||
|
|
|
@ -30,16 +30,16 @@ auto CPU::unload() -> void {
|
|||
}
|
||||
|
||||
auto CPU::main() -> void {
|
||||
instruction();
|
||||
synchronize();
|
||||
}
|
||||
while(!vi.refreshed) {
|
||||
instruction();
|
||||
synchronize();
|
||||
}
|
||||
|
||||
auto CPU::step(u32 clocks) -> void {
|
||||
Thread::clock += clocks;
|
||||
vi.refreshed = false;
|
||||
}
|
||||
|
||||
auto CPU::synchronize() -> void {
|
||||
auto clocks = Thread::clock * 2;
|
||||
auto clocks = Thread::clock;
|
||||
Thread::clock = 0;
|
||||
|
||||
vi.clock -= clocks;
|
||||
|
@ -47,11 +47,11 @@ auto CPU::synchronize() -> void {
|
|||
rsp.clock -= clocks;
|
||||
rdp.clock -= clocks;
|
||||
pif.clock -= clocks;
|
||||
while( vi.clock < 0) vi.main();
|
||||
while( ai.clock < 0) ai.main();
|
||||
while(rsp.clock < 0) rsp.main();
|
||||
while(rdp.clock < 0) rdp.main();
|
||||
while(pif.clock < 0) pif.main();
|
||||
vi.main();
|
||||
ai.main();
|
||||
rsp.main();
|
||||
rdp.main();
|
||||
pif.main();
|
||||
|
||||
queue.step(clocks, [](u32 event) {
|
||||
switch(event) {
|
||||
|
@ -81,19 +81,30 @@ auto CPU::instruction() -> void {
|
|||
if(auto interrupts = scc.cause.interruptPending & scc.status.interruptMask) {
|
||||
if(scc.status.interruptEnable && !scc.status.exceptionLevel && !scc.status.errorLevel) {
|
||||
debugger.interrupt(scc.cause.interruptPending);
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
return exception.interrupt();
|
||||
}
|
||||
}
|
||||
if (scc.nmiPending) {
|
||||
debugger.nmi();
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
return exception.nmi();
|
||||
}
|
||||
|
||||
if constexpr(Accuracy::CPU::Recompiler) {
|
||||
// Fast path: attempt to lookup previously compiled blocks with devirtualizeFast
|
||||
// and fastFetchBlock, this skips exception handling, error checking, and
|
||||
// code emitting pathways for maximum lookup performance.
|
||||
// As memory writes cause recompiler block invalidation, this shouldn't be detectable.
|
||||
if (auto address = devirtualizeFast(ipu.pc)) {
|
||||
if(auto block = recompiler.fastFetchBlock(address)) {
|
||||
block->execute(*this);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (auto address = devirtualize(ipu.pc)) {
|
||||
auto block = recompiler.block(*address);
|
||||
auto block = recompiler.block(ipu.pc, *address);
|
||||
block->execute(*this);
|
||||
}
|
||||
}
|
||||
|
@ -111,7 +122,8 @@ auto CPU::instruction() -> void {
|
|||
|
||||
auto CPU::instructionEpilogue() -> s32 {
|
||||
if constexpr(Accuracy::CPU::Recompiler) {
|
||||
icache.step(ipu.pc); //simulates timings without performing actual icache loads
|
||||
//simulates timings without performing actual icache loads
|
||||
icache.step(ipu.pc, devirtualizeFast(ipu.pc));
|
||||
}
|
||||
|
||||
ipu.r[0].u64 = 0;
|
||||
|
@ -156,7 +168,9 @@ auto CPU::power(bool reset) -> void {
|
|||
|
||||
if constexpr(Accuracy::CPU::Recompiler) {
|
||||
auto buffer = ares::Memory::FixedAllocator::get().tryAcquire(4_MiB);
|
||||
memory::jitprotect(false);
|
||||
recompiler.allocator.resize(4_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer);
|
||||
memory::jitprotect(true);
|
||||
recompiler.reset();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,6 @@ struct CPU : Thread {
|
|||
auto unload() -> void;
|
||||
|
||||
auto main() -> void;
|
||||
auto step(u32 clocks) -> void;
|
||||
auto synchronize() -> void;
|
||||
|
||||
auto instruction() -> void;
|
||||
|
@ -106,27 +105,27 @@ struct CPU : Thread {
|
|||
struct InstructionCache {
|
||||
CPU& self;
|
||||
struct Line;
|
||||
auto line(u32 address) -> Line& { return lines[address >> 5 & 0x1ff]; }
|
||||
auto line(u32 vaddr) -> Line& { return lines[vaddr >> 5 & 0x1ff]; }
|
||||
|
||||
//used by the recompiler to simulate instruction cache fetch timing
|
||||
auto step(u32 address) -> void {
|
||||
auto& line = this->line(address);
|
||||
auto step(u32 vaddr, u32 address) -> void {
|
||||
auto& line = this->line(vaddr);
|
||||
if(!line.hit(address)) {
|
||||
self.step(48);
|
||||
self.step(48 * 2);
|
||||
line.valid = 1;
|
||||
line.tag = address & ~0x0000'0fff;
|
||||
} else {
|
||||
self.step(2);
|
||||
self.step(1 * 2);
|
||||
}
|
||||
}
|
||||
|
||||
//used by the interpreter to fully emulate the instruction cache
|
||||
auto fetch(u32 address, CPU& cpu) -> u32 {
|
||||
auto& line = this->line(address);
|
||||
auto fetch(u32 vaddr, u32 address, CPU& cpu) -> u32 {
|
||||
auto& line = this->line(vaddr);
|
||||
if(!line.hit(address)) {
|
||||
line.fill(address, cpu);
|
||||
} else {
|
||||
cpu.step(2);
|
||||
cpu.step(1 * 2);
|
||||
}
|
||||
return line.read(address);
|
||||
}
|
||||
|
@ -145,7 +144,7 @@ struct CPU : Thread {
|
|||
struct Line {
|
||||
auto hit(u32 address) const -> bool { return valid && tag == (address & ~0x0000'0fff); }
|
||||
auto fill(u32 address, CPU& cpu) -> void {
|
||||
cpu.step(48);
|
||||
cpu.step(48 * 2);
|
||||
valid = 1;
|
||||
tag = address & ~0x0000'0fff;
|
||||
words[0] = cpu.busRead<Word>(tag | index | 0x00);
|
||||
|
@ -159,7 +158,7 @@ struct CPU : Thread {
|
|||
}
|
||||
|
||||
auto writeBack(CPU& cpu) -> void {
|
||||
cpu.step(48);
|
||||
cpu.step(48 * 2);
|
||||
cpu.busWrite<Word>(tag | index | 0x00, words[0]);
|
||||
cpu.busWrite<Word>(tag | index | 0x04, words[1]);
|
||||
cpu.busWrite<Word>(tag | index | 0x08, words[2]);
|
||||
|
@ -182,9 +181,9 @@ struct CPU : Thread {
|
|||
//dcache.cpp
|
||||
struct DataCache {
|
||||
struct Line;
|
||||
auto line(u32 address) -> Line&;
|
||||
template<u32 Size> auto read(u32 address) -> u64;
|
||||
template<u32 Size> auto write(u32 address, u64 data) -> void;
|
||||
auto line(u32 vaddr) -> Line&;
|
||||
template<u32 Size> auto read(u32 vaddr, u32 address) -> u64;
|
||||
template<u32 Size> auto write(u32 vaddr, u32 address, u64 data) -> void;
|
||||
auto power(bool reset) -> void;
|
||||
|
||||
//8KB
|
||||
|
@ -222,10 +221,6 @@ struct CPU : Thread {
|
|||
u32 address;
|
||||
};
|
||||
|
||||
//tlb.cpp
|
||||
auto load(u64 vaddr) -> Match;
|
||||
auto store(u64 vaddr) -> Match;
|
||||
|
||||
struct Entry {
|
||||
//scc-tlb.cpp
|
||||
auto synchronize() -> void;
|
||||
|
@ -239,13 +234,49 @@ struct CPU : Thread {
|
|||
n40 virtualAddress;
|
||||
n8 addressSpaceID;
|
||||
n2 region;
|
||||
//internal:
|
||||
//internal:
|
||||
n1 globals;
|
||||
n40 addressMaskHi;
|
||||
n40 addressMaskLo;
|
||||
n40 addressSelect;
|
||||
} entry[TLB::Entries];
|
||||
|
||||
//tlb.cpp
|
||||
auto load(u64 vaddr) -> Match;
|
||||
auto load(u64 vaddr, const Entry& entry) -> Match;
|
||||
auto loadFast(u64 vaddr) -> Match;
|
||||
auto store(u64 vaddr) -> Match;
|
||||
auto store(u64 vaddr, const Entry& entry) -> Match;
|
||||
|
||||
struct TlbCache { ;
|
||||
static constexpr int entries = 4;
|
||||
|
||||
struct CachedTlbEntry {
|
||||
const Entry *entry;
|
||||
int frequency;
|
||||
} entry[entries];
|
||||
|
||||
void insert(const Entry& entry) {
|
||||
this->entry[refresh()].entry = &entry;
|
||||
}
|
||||
|
||||
int refresh() {
|
||||
CachedTlbEntry* leastUsed = &entry[0];
|
||||
int index = 0;
|
||||
|
||||
for(auto n = 0; n < entries; n++) {
|
||||
if(entry[n].frequency < leastUsed->frequency) {
|
||||
index = n;
|
||||
leastUsed = &entry[n];
|
||||
}
|
||||
}
|
||||
|
||||
leastUsed->entry = nullptr;
|
||||
leastUsed->frequency = 0;
|
||||
return index;
|
||||
}
|
||||
} tlbCache;
|
||||
|
||||
u32 physicalAddress;
|
||||
} tlb{*this};
|
||||
|
||||
|
@ -260,11 +291,13 @@ struct CPU : Thread {
|
|||
|
||||
auto segment(u64 vaddr) -> Context::Segment;
|
||||
auto devirtualize(u64 vaddr) -> maybe<u64>;
|
||||
alwaysinline auto devirtualizeFast(u64 vaddr) -> u64;
|
||||
|
||||
auto fetch(u64 vaddr) -> maybe<u32>;
|
||||
template<u32 Size> auto busWrite(u32 address, u64 data) -> void;
|
||||
template<u32 Size> auto busRead(u32 address) -> u64;
|
||||
template<u32 Size> auto read(u64 vaddr) -> maybe<u64>;
|
||||
template<u32 Size> auto write(u64 vaddr, u64 data) -> bool;
|
||||
template<u32 Size> auto write(u64 vaddr, u64 data, bool alignedError=true) -> bool;
|
||||
template<u32 Size> auto vaddrAlignedError(u64 vaddr, bool write) -> bool;
|
||||
auto addressException(u64 vaddr) -> void;
|
||||
|
||||
|
@ -861,9 +894,10 @@ struct CPU : Thread {
|
|||
}
|
||||
|
||||
auto pool(u32 address) -> Pool*;
|
||||
auto block(u32 address) -> Block*;
|
||||
auto block(u32 vaddr, u32 address) -> Block*;
|
||||
auto fastFetchBlock(u32 address) -> Block*;
|
||||
|
||||
auto emit(u32 address) -> Block*;
|
||||
auto emit(u32 vaddr, u32 address) -> Block*;
|
||||
auto emitEXECUTE(u32 instruction) -> bool;
|
||||
auto emitSPECIAL(u32 instruction) -> bool;
|
||||
auto emitREGIMM(u32 instruction) -> bool;
|
||||
|
@ -906,6 +940,11 @@ struct CPU : Thread {
|
|||
u32 address;
|
||||
u32 instruction;
|
||||
} disassembler{*this};
|
||||
|
||||
struct DevirtualizeCache {
|
||||
uint64_t vbase;
|
||||
uint64_t pbase;
|
||||
} devirtualizeCache;
|
||||
};
|
||||
|
||||
extern CPU cpu;
|
||||
|
|
|
@ -3,7 +3,7 @@ auto CPU::DataCache::Line::hit(u32 address) const -> bool {
|
|||
}
|
||||
|
||||
template<u32 Size> auto CPU::DataCache::Line::fill(u32 address, u64 data) -> void {
|
||||
cpu.step(40);
|
||||
cpu.step(40 * 2);
|
||||
valid = 1;
|
||||
dirty = 1;
|
||||
tag = address & ~0x0000'0fff;
|
||||
|
@ -31,7 +31,7 @@ template<u32 Size> auto CPU::DataCache::Line::fill(u32 address, u64 data) -> voi
|
|||
}
|
||||
|
||||
auto CPU::DataCache::Line::fill(u32 address) -> void {
|
||||
cpu.step(40);
|
||||
cpu.step(40 * 2);
|
||||
valid = 1;
|
||||
dirty = 0;
|
||||
tag = address & ~0x0000'0fff;
|
||||
|
@ -53,7 +53,7 @@ auto CPU::DataCache::Line::fill(u32 address) -> void {
|
|||
}
|
||||
|
||||
auto CPU::DataCache::Line::writeBack() -> void {
|
||||
cpu.step(40);
|
||||
cpu.step(40 * 2);
|
||||
dirty = 0;
|
||||
cpu.busWrite<Word>(tag | index | 0x0, words[0]);
|
||||
cpu.busWrite<Word>(tag | index | 0x4, words[1]);
|
||||
|
@ -61,8 +61,8 @@ auto CPU::DataCache::Line::writeBack() -> void {
|
|||
cpu.busWrite<Word>(tag | index | 0xc, words[3]);
|
||||
}
|
||||
|
||||
auto CPU::DataCache::line(u32 address) -> Line& {
|
||||
return lines[address >> 4 & 0x1ff];
|
||||
auto CPU::DataCache::line(u32 vaddr) -> Line& {
|
||||
return lines[vaddr >> 4 & 0x1ff];
|
||||
}
|
||||
|
||||
template<u32 Size>
|
||||
|
@ -90,25 +90,25 @@ auto CPU::DataCache::Line::write(u32 address, u64 data) -> void {
|
|||
}
|
||||
|
||||
template<u32 Size>
|
||||
auto CPU::DataCache::read(u32 address) -> u64 {
|
||||
auto& line = this->line(address);
|
||||
auto CPU::DataCache::read(u32 vaddr, u32 address) -> u64 {
|
||||
auto& line = this->line(vaddr);
|
||||
if(!line.hit(address)) {
|
||||
if(line.valid && line.dirty) line.writeBack();
|
||||
line.fill(address);
|
||||
} else {
|
||||
cpu.step(1);
|
||||
cpu.step(1 * 2);
|
||||
}
|
||||
return line.read<Size>(address);
|
||||
}
|
||||
|
||||
template<u32 Size>
|
||||
auto CPU::DataCache::write(u32 address, u64 data) -> void {
|
||||
auto& line = this->line(address);
|
||||
auto CPU::DataCache::write(u32 vaddr, u32 address, u64 data) -> void {
|
||||
auto& line = this->line(vaddr);
|
||||
if(!line.hit(address)) {
|
||||
if(line.valid && line.dirty) line.writeBack();
|
||||
return line.fill<Size>(address, data);
|
||||
} else {
|
||||
cpu.step(1);
|
||||
cpu.step(1 * 2);
|
||||
}
|
||||
line.write<Size>(address, data);
|
||||
}
|
||||
|
|
|
@ -421,6 +421,7 @@ auto CPU::FADD_S(u8 fd, u8 fs, u8 ft) -> void {
|
|||
CHECK_FPE(f32, ffd, FS(f32) + FT(f32));
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f32) = ffd;
|
||||
step((3 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FADD_D(u8 fd, u8 fs, u8 ft) -> void {
|
||||
|
@ -431,6 +432,7 @@ auto CPU::FADD_D(u8 fd, u8 fs, u8 ft) -> void {
|
|||
CHECK_FPE(f64, ffd, ffs + fft);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f64) = ffd;
|
||||
step((3 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCEIL_L_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -439,6 +441,7 @@ auto CPU::FCEIL_L_S(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s64>(ffs)) return;
|
||||
CHECK_FPE(s64, ffd, roundCeil<s64>(ffs));
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCEIL_L_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -447,6 +450,7 @@ auto CPU::FCEIL_L_D(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s64>(ffs)) return;
|
||||
CHECK_FPE(s64, ffd, roundCeil<s64>(ffs));
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCEIL_W_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -455,6 +459,7 @@ auto CPU::FCEIL_W_S(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s32>(ffs)) return;
|
||||
CHECK_FPE_CONV(s32, ffd, roundCeil<s32>(ffs));
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCEIL_W_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -463,6 +468,7 @@ auto CPU::FCEIL_W_D(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s32>(ffs)) return;
|
||||
CHECK_FPE_CONV(s32, ffd, roundCeil<s32>(ffs));
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
#define XORDERED(type, value, quiet) \
|
||||
|
@ -652,6 +658,7 @@ auto CPU::FCVT_S_D(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(f32, ffd, (f32)ffs);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f32) = ffd;
|
||||
step((2 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCVT_S_W(u8 fd, u8 fs) -> void {
|
||||
|
@ -660,6 +667,7 @@ auto CPU::FCVT_S_W(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(f32, ffd, ffs);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCVT_S_L(u8 fd, u8 fs) -> void {
|
||||
|
@ -672,6 +680,7 @@ auto CPU::FCVT_S_L(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(f32, ffd, (f32)ffs);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCVT_D_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -694,6 +703,7 @@ auto CPU::FCVT_D_W(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(f64, ffd, (f64)ffs);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCVT_D_L(u8 fd, u8 fs) -> void {
|
||||
|
@ -706,6 +716,7 @@ auto CPU::FCVT_D_L(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(f64, ffd, (f64)ffs);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f64) = ffs;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCVT_L_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -714,6 +725,7 @@ auto CPU::FCVT_L_S(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s64>(ffs)) return;
|
||||
CHECK_FPE(s64, ffd, roundCurrent<s64>(ffs));
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCVT_L_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -722,6 +734,7 @@ auto CPU::FCVT_L_D(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s64>(ffs)) return;
|
||||
CHECK_FPE(s64, ffd, roundCurrent<s64>(ffs));
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCVT_W_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -730,6 +743,7 @@ auto CPU::FCVT_W_S(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s32>(ffs)) return;
|
||||
CHECK_FPE_CONV(s32, ffd, roundCurrent<s32>(ffs));
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FCVT_W_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -738,6 +752,7 @@ auto CPU::FCVT_W_D(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s32>(ffs)) return;
|
||||
CHECK_FPE_CONV(s32, ffd, roundCurrent<s32>(ffs));
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FDIV_S(u8 fd, u8 fs, u8 ft) -> void {
|
||||
|
@ -748,6 +763,7 @@ auto CPU::FDIV_S(u8 fd, u8 fs, u8 ft) -> void {
|
|||
CHECK_FPE(f32, ffd, ffs / fft);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f32) = ffd;
|
||||
step((29 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FDIV_D(u8 fd, u8 fs, u8 ft) -> void {
|
||||
|
@ -758,6 +774,7 @@ auto CPU::FDIV_D(u8 fd, u8 fs, u8 ft) -> void {
|
|||
CHECK_FPE(f64, ffd, ffs / fft);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f64) = ffd;
|
||||
step((58 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FFLOOR_L_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -766,6 +783,7 @@ auto CPU::FFLOOR_L_S(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s64>(ffs)) return;
|
||||
CHECK_FPE(s64, ffd, roundFloor<s64>(ffs));
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FFLOOR_L_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -774,6 +792,7 @@ auto CPU::FFLOOR_L_D(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s64>(ffs)) return;
|
||||
CHECK_FPE(s64, ffd, roundFloor<s64>(ffs));
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FFLOOR_W_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -782,6 +801,7 @@ auto CPU::FFLOOR_W_S(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s32>(ffs)) return;
|
||||
CHECK_FPE_CONV(s32, ffd, roundFloor<s32>(ffs));
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FFLOOR_W_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -790,6 +810,7 @@ auto CPU::FFLOOR_W_D(u8 fd, u8 fs) -> void {
|
|||
if(!fpuCheckInputConv<s32>(ffs)) return;
|
||||
CHECK_FPE_CONV(s32, ffd, roundFloor<s32>(ffs));
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FMOV_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -810,6 +831,7 @@ auto CPU::FMUL_S(u8 fd, u8 fs, u8 ft) -> void {
|
|||
CHECK_FPE(f32, ffd, ffs * fft);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FMUL_D(u8 fd, u8 fs, u8 ft) -> void {
|
||||
|
@ -820,6 +842,7 @@ auto CPU::FMUL_D(u8 fd, u8 fs, u8 ft) -> void {
|
|||
CHECK_FPE(f64, ffd, ffs * fft);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f64) = ffd;
|
||||
step((8 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FNEG_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -847,6 +870,7 @@ auto CPU::FROUND_L_S(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(s64, ffd, roundNearest<s64>(ffs));
|
||||
if(ffd != ffs && fpeInexact()) return exception.floatingPoint();
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FROUND_L_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -856,6 +880,7 @@ auto CPU::FROUND_L_D(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(s64, ffd, roundNearest<s64>(ffs));
|
||||
if(ffd != ffs && fpeInexact()) return exception.floatingPoint();
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FROUND_W_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -865,6 +890,7 @@ auto CPU::FROUND_W_S(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE_CONV(s32, ffd, roundNearest<s32>(ffs));
|
||||
if(ffd != ffs && fpeInexact()) return exception.floatingPoint();
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FROUND_W_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -874,6 +900,7 @@ auto CPU::FROUND_W_D(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE_CONV(s32, ffd, roundNearest<s32>(ffs));
|
||||
if(ffd != ffs && fpeInexact()) return exception.floatingPoint();
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FSQRT_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -883,6 +910,7 @@ auto CPU::FSQRT_S(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(f32, ffd, squareRoot(ffs));
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f32) = ffd;
|
||||
step((29 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FSQRT_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -892,6 +920,7 @@ auto CPU::FSQRT_D(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(f64, ffd, squareRoot(ffs));
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f64) = ffd;
|
||||
step((58 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FSUB_S(u8 fd, u8 fs, u8 ft) -> void {
|
||||
|
@ -902,6 +931,7 @@ auto CPU::FSUB_S(u8 fd, u8 fs, u8 ft) -> void {
|
|||
CHECK_FPE(f32, ffd, ffs - fft);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f32) = ffd;
|
||||
step((3 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FSUB_D(u8 fd, u8 fs, u8 ft) -> void {
|
||||
|
@ -912,6 +942,7 @@ auto CPU::FSUB_D(u8 fd, u8 fs, u8 ft) -> void {
|
|||
CHECK_FPE(f64, ffd, ffs - fft);
|
||||
if(!fpuCheckOutput(ffd)) return;
|
||||
FD(f64) = ffd;
|
||||
step((3 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FTRUNC_L_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -921,6 +952,7 @@ auto CPU::FTRUNC_L_S(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(s64, ffd, roundTrunc<s64>(ffs));
|
||||
if((f32)ffd != ffs && fpeInexact()) return exception.floatingPoint();
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FTRUNC_L_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -930,6 +962,7 @@ auto CPU::FTRUNC_L_D(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE(s64, ffd, roundTrunc<s64>(ffs));
|
||||
if((f64)ffd != ffs && fpeInexact()) return exception.floatingPoint();
|
||||
FD(s64) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FTRUNC_W_S(u8 fd, u8 fs) -> void {
|
||||
|
@ -939,6 +972,7 @@ auto CPU::FTRUNC_W_S(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE_CONV(s32, ffd, roundTrunc<s32>(ffs));
|
||||
if((f32)ffd != ffs && fpeInexact()) return exception.floatingPoint();
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::FTRUNC_W_D(u8 fd, u8 fs) -> void {
|
||||
|
@ -948,6 +982,7 @@ auto CPU::FTRUNC_W_D(u8 fd, u8 fs) -> void {
|
|||
CHECK_FPE_CONV(s32, ffd, roundTrunc<s32>(ffs));
|
||||
if((f64)ffd != ffs && fpeInexact()) return exception.floatingPoint();
|
||||
FD(s32) = ffd;
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::LDC1(u8 ft, cr64& rs, s16 imm) -> void {
|
||||
|
|
|
@ -269,7 +269,7 @@ auto CPU::DDIV(cr64& rs, cr64& rt) -> void {
|
|||
LO.u64 = rs.s64 < 0 ? +1 : -1;
|
||||
HI.u64 = rs.s64;
|
||||
}
|
||||
step(69);
|
||||
step((69 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::DDIVU(cr64& rs, cr64& rt) -> void {
|
||||
|
@ -281,7 +281,7 @@ auto CPU::DDIVU(cr64& rs, cr64& rt) -> void {
|
|||
LO.u64 = -1;
|
||||
HI.u64 = rs.u64;
|
||||
}
|
||||
step(69);
|
||||
step((69 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::DIV(cr64& rs, cr64& rt) -> void {
|
||||
|
@ -294,7 +294,7 @@ auto CPU::DIV(cr64& rs, cr64& rt) -> void {
|
|||
LO.u64 = rs.s32 < 0 ? +1 : -1;
|
||||
HI.u64 = rs.s32;
|
||||
}
|
||||
step(37);
|
||||
step((37 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::DIVU(cr64& rs, cr64& rt) -> void {
|
||||
|
@ -306,7 +306,7 @@ auto CPU::DIVU(cr64& rs, cr64& rt) -> void {
|
|||
LO.u64 = -1;
|
||||
HI.u64 = rs.s32;
|
||||
}
|
||||
step(37);
|
||||
step((37 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::DMULT(cr64& rs, cr64& rt) -> void {
|
||||
|
@ -329,7 +329,7 @@ auto CPU::DMULT(cr64& rs, cr64& rt) -> void {
|
|||
LO.u64 = result >> 0;
|
||||
HI.u64 = result >> 64;
|
||||
#endif
|
||||
step(8);
|
||||
step((8 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::DMULTU(cr64& rs, cr64& rt) -> void {
|
||||
|
@ -346,7 +346,7 @@ auto CPU::DMULTU(cr64& rs, cr64& rt) -> void {
|
|||
LO.u64 = result >> 0;
|
||||
HI.u64 = result >> 64;
|
||||
#endif
|
||||
step(8);
|
||||
step((8 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::DSLL(r64& rd, cr64& rt, u8 sa) -> void {
|
||||
|
@ -786,14 +786,14 @@ auto CPU::MULT(cr64& rs, cr64& rt) -> void {
|
|||
u64 result = s64(rs.s32) * s64(rt.s32);
|
||||
LO.u64 = s32(result >> 0);
|
||||
HI.u64 = s32(result >> 32);
|
||||
step(5);
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::MULTU(cr64& rs, cr64& rt) -> void {
|
||||
u64 result = u64(rs.u32) * u64(rt.u32);
|
||||
LO.u64 = s32(result >> 0);
|
||||
HI.u64 = s32(result >> 32);
|
||||
step(5);
|
||||
step((5 - 1) * 2);
|
||||
}
|
||||
|
||||
auto CPU::NOR(r64& rd, cr64& rs, cr64& rt) -> void {
|
||||
|
@ -1061,17 +1061,17 @@ auto CPU::SWL(cr64& rt, cr64& rs, s16 imm) -> void {
|
|||
if(context.bigEndian())
|
||||
switch(vaddr & 3) {
|
||||
case 0:
|
||||
if(!write<Word>(vaddr & ~3 | 0, data >> 0)) return;
|
||||
if(!write<Word>(vaddr + 0, data >> 0)) return;
|
||||
break;
|
||||
case 1:
|
||||
if(!write<Byte>(vaddr & ~3 | 1, data >> 24)) return;
|
||||
if(!write<Half>(vaddr & ~3 | 2, data >> 8)) return;
|
||||
if(!write<Byte>(vaddr + 0, data >> 24)) return;
|
||||
if(!write<Half>(vaddr + 1, data >> 8)) return;
|
||||
break;
|
||||
case 2:
|
||||
if(!write<Half>(vaddr & ~3 | 2, data >> 16)) return;
|
||||
if(!write<Half>(vaddr + 0, data >> 16)) return;
|
||||
break;
|
||||
case 3:
|
||||
if(!write<Byte>(vaddr & ~3 | 3, data >> 24)) return;
|
||||
if(!write<Byte>(vaddr + 0, data >> 24)) return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1100,17 +1100,17 @@ auto CPU::SWR(cr64& rt, cr64& rs, s16 imm) -> void {
|
|||
if(context.bigEndian())
|
||||
switch(vaddr & 3) {
|
||||
case 0:
|
||||
if(!write<Byte>(vaddr & ~3 | 0, data >> 0)) return;
|
||||
if(!write<Byte>(vaddr + 0, data >> 0, false)) return;
|
||||
break;
|
||||
case 1:
|
||||
if(!write<Half>(vaddr & ~3 | 0, data >> 0)) return;
|
||||
if(!write<Half>(vaddr + 0, data >> 0, false)) return;
|
||||
break;
|
||||
case 2:
|
||||
if(!write<Half>(vaddr & ~3 | 0, data >> 8)) return;
|
||||
if(!write<Byte>(vaddr & ~3 | 2, data >> 0)) return;
|
||||
if(!write<Byte>(vaddr + 0, data >> 0, false)) return;
|
||||
if(!write<Half>(vaddr - 2, data >> 8, false)) return;
|
||||
break;
|
||||
case 3:
|
||||
if(!write<Word>(vaddr & ~3 | 0, data >> 0)) return;
|
||||
if(!write<Word>(vaddr + 0, data >> 0, false)) return;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -347,6 +347,7 @@ auto CPU::TLBWI() -> void {
|
|||
if(!scc.status.enable.coprocessor0) return exception.coprocessor0();
|
||||
}
|
||||
if(scc.index.tlbEntry >= TLB::Entries) return;
|
||||
devirtualizeCache = {};
|
||||
tlb.entry[scc.index.tlbEntry] = scc.tlb;
|
||||
tlb.entry[scc.index.tlbEntry].synchronize();
|
||||
debugger.tlbWrite(scc.index.tlbEntry);
|
||||
|
@ -358,6 +359,7 @@ auto CPU::TLBWR() -> void {
|
|||
}
|
||||
u8 index = getControlRandom();
|
||||
if(index >= TLB::Entries) return;
|
||||
devirtualizeCache = {};
|
||||
tlb.entry[index] = scc.tlb;
|
||||
tlb.entry[index].synchronize();
|
||||
debugger.tlbWrite(index);
|
||||
|
|
|
@ -109,46 +109,73 @@ auto CPU::devirtualize(u64 vaddr) -> maybe<u64> {
|
|||
unreachable;
|
||||
}
|
||||
|
||||
// Fast(er) version of devirtualize for icache lookups
|
||||
// avoids handling unmapped regions/exceptions as these should have already
|
||||
// been handled by instruction fetch, also ignores tlb match failure
|
||||
auto CPU::devirtualizeFast(u64 vaddr) -> u64 {
|
||||
// Assume address space is mapped into pages that are 4kb in size
|
||||
// If we have a cached physical address for this page, use it
|
||||
// This cache is purged on any writes to the TLB so should never become stale
|
||||
auto vbase = vaddr >> 12;
|
||||
if(devirtualizeCache.vbase == vbase && devirtualizeCache.pbase) {
|
||||
auto offset = vaddr & 0xfff;
|
||||
return (devirtualizeCache.pbase & ~0xfff) + offset;
|
||||
}
|
||||
|
||||
// Cache the physical address of this page for the next call
|
||||
devirtualizeCache.vbase = vaddr >> 12;
|
||||
|
||||
switch(segment(vaddr)) {
|
||||
case Context::Segment::Mapped: {
|
||||
auto match = tlb.loadFast(vaddr);
|
||||
return devirtualizeCache.pbase = match.address & context.physMask;
|
||||
}
|
||||
case Context::Segment::Cached:
|
||||
case Context::Segment::Direct:
|
||||
return devirtualizeCache.pbase = vaddr & 0x1fff'ffff;
|
||||
case Context::Segment::Cached32:
|
||||
case Context::Segment::Direct32:
|
||||
return devirtualizeCache.pbase = vaddr & 0xffff'ffff;
|
||||
}
|
||||
return devirtualizeCache.pbase = 0;
|
||||
}
|
||||
|
||||
template<u32 Size>
|
||||
inline auto CPU::busWrite(u32 address, u64 data) -> void {
|
||||
u32 cycles = 0;
|
||||
bus.write<Size>(address, data, cycles);
|
||||
step(cycles);
|
||||
bus.write<Size>(address, data, *this);
|
||||
}
|
||||
|
||||
template<u32 Size>
|
||||
inline auto CPU::busRead(u32 address) -> u64 {
|
||||
u32 cycles = 0; u64 data;
|
||||
data = bus.read<Size>(address, cycles);
|
||||
return step(cycles), data;
|
||||
return bus.read<Size>(address, *this);
|
||||
}
|
||||
|
||||
auto CPU::fetch(u64 vaddr) -> maybe<u32> {
|
||||
if(vaddrAlignedError<Word>(vaddr, false)) return nothing;
|
||||
switch(segment(vaddr)) {
|
||||
case Context::Segment::Unused:
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
addressException(vaddr);
|
||||
exception.addressLoad();
|
||||
return nothing;
|
||||
case Context::Segment::Mapped:
|
||||
if(auto match = tlb.load(vaddr)) {
|
||||
if(match.cache) return icache.fetch(match.address & context.physMask, cpu);
|
||||
step(1);
|
||||
if(match.cache) return icache.fetch(vaddr, match.address & context.physMask, cpu);
|
||||
step(1 * 2);
|
||||
return busRead<Word>(match.address & context.physMask);
|
||||
}
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
addressException(vaddr);
|
||||
return nothing;
|
||||
case Context::Segment::Cached:
|
||||
return icache.fetch(vaddr & 0x1fff'ffff, cpu);
|
||||
return icache.fetch(vaddr, vaddr & 0x1fff'ffff, cpu);
|
||||
case Context::Segment::Cached32:
|
||||
return icache.fetch(vaddr & 0xffff'ffff, cpu);
|
||||
return icache.fetch(vaddr, vaddr & 0xffff'ffff, cpu);
|
||||
case Context::Segment::Direct:
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
return busRead<Word>(vaddr & 0x1fff'ffff);
|
||||
case Context::Segment::Direct32:
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
return busRead<Word>(vaddr & 0xffff'ffff);
|
||||
}
|
||||
|
||||
|
@ -160,28 +187,28 @@ auto CPU::read(u64 vaddr) -> maybe<u64> {
|
|||
if(vaddrAlignedError<Size>(vaddr, false)) return nothing;
|
||||
switch(segment(vaddr)) {
|
||||
case Context::Segment::Unused:
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
addressException(vaddr);
|
||||
exception.addressLoad();
|
||||
return nothing;
|
||||
case Context::Segment::Mapped:
|
||||
if(auto match = tlb.load(vaddr)) {
|
||||
if(match.cache) return dcache.read<Size>(match.address & context.physMask);
|
||||
step(1);
|
||||
if(match.cache) return dcache.read<Size>(vaddr, match.address & context.physMask);
|
||||
step(1 * 2);
|
||||
return busRead<Size>(match.address & context.physMask);
|
||||
}
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
addressException(vaddr);
|
||||
return nothing;
|
||||
case Context::Segment::Cached:
|
||||
return dcache.read<Size>(vaddr & 0x1fff'ffff);
|
||||
return dcache.read<Size>(vaddr, vaddr & 0x1fff'ffff);
|
||||
case Context::Segment::Cached32:
|
||||
return dcache.read<Size>(vaddr & 0xffff'ffff);
|
||||
return dcache.read<Size>(vaddr, vaddr & 0xffff'ffff);
|
||||
case Context::Segment::Direct:
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
return busRead<Size>(vaddr & 0x1fff'ffff);
|
||||
case Context::Segment::Direct32:
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
return busRead<Size>(vaddr & 0xffff'ffff);
|
||||
}
|
||||
|
||||
|
@ -189,32 +216,33 @@ auto CPU::read(u64 vaddr) -> maybe<u64> {
|
|||
}
|
||||
|
||||
template<u32 Size>
|
||||
auto CPU::write(u64 vaddr, u64 data) -> bool {
|
||||
if(vaddrAlignedError<Size>(vaddr, true)) return false;
|
||||
auto CPU::write(u64 vaddr0, u64 data, bool alignedError) -> bool {
|
||||
if(alignedError && vaddrAlignedError<Size>(vaddr0, true)) return false;
|
||||
u64 vaddr = vaddr0 & ~((u64)Size - 1);
|
||||
switch(segment(vaddr)) {
|
||||
case Context::Segment::Unused:
|
||||
step(1);
|
||||
addressException(vaddr);
|
||||
step(1 * 2);
|
||||
addressException(vaddr0);
|
||||
exception.addressStore();
|
||||
return false;
|
||||
case Context::Segment::Mapped:
|
||||
if(auto match = tlb.store(vaddr)) {
|
||||
if(match.cache) return dcache.write<Size>(match.address & context.physMask, data), true;
|
||||
step(1);
|
||||
if(match.cache) return dcache.write<Size>(vaddr, match.address & context.physMask, data), true;
|
||||
step(1 * 2);
|
||||
return busWrite<Size>(match.address & context.physMask, data), true;
|
||||
}
|
||||
step(1);
|
||||
addressException(vaddr);
|
||||
step(1 * 2);
|
||||
addressException(vaddr0);
|
||||
return false;
|
||||
case Context::Segment::Cached:
|
||||
return dcache.write<Size>(vaddr & 0x1fff'ffff, data), true;
|
||||
return dcache.write<Size>(vaddr, vaddr & 0x1fff'ffff, data), true;
|
||||
case Context::Segment::Cached32:
|
||||
return dcache.write<Size>(vaddr & 0xffff'ffff, data), true;
|
||||
return dcache.write<Size>(vaddr, vaddr & 0xffff'ffff, data), true;
|
||||
case Context::Segment::Direct:
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
return busWrite<Size>(vaddr & 0x1fff'ffff, data), true;
|
||||
case Context::Segment::Direct32:
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
return busWrite<Size>(vaddr & 0xffff'ffff, data), true;
|
||||
}
|
||||
|
||||
|
@ -225,14 +253,14 @@ template<u32 Size>
|
|||
auto CPU::vaddrAlignedError(u64 vaddr, bool write) -> bool {
|
||||
if constexpr(Accuracy::CPU::AddressErrors) {
|
||||
if(unlikely(vaddr & Size - 1)) {
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
addressException(vaddr);
|
||||
if(write) exception.addressStore();
|
||||
else exception.addressLoad();
|
||||
return true;
|
||||
}
|
||||
if (context.bits == 32 && unlikely((s32)vaddr != vaddr)) {
|
||||
step(1);
|
||||
step(1 * 2);
|
||||
addressException(vaddr);
|
||||
if(write) exception.addressStore();
|
||||
else exception.addressLoad();
|
||||
|
|
|
@ -4,15 +4,21 @@ auto CPU::Recompiler::pool(u32 address) -> Pool* {
|
|||
return pool;
|
||||
}
|
||||
|
||||
auto CPU::Recompiler::block(u32 address) -> Block* {
|
||||
auto CPU::Recompiler::block(u32 vaddr, u32 address) -> Block* {
|
||||
if(auto block = pool(address)->blocks[address >> 2 & 0x3f]) return block;
|
||||
auto block = emit(address);
|
||||
auto block = emit(vaddr, address);
|
||||
pool(address)->blocks[address >> 2 & 0x3f] = block;
|
||||
memory::jitprotect(true);
|
||||
return block;
|
||||
}
|
||||
|
||||
auto CPU::Recompiler::emit(u32 address) -> Block* {
|
||||
auto CPU::Recompiler::fastFetchBlock(u32 address) -> Block* {
|
||||
auto& pool = pools[address >> 8 & 0x1fffff];
|
||||
if(pool) return pool->blocks[address >> 2 & 0x3f];
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto CPU::Recompiler::emit(u32 vaddr, u32 address) -> Block* {
|
||||
if(unlikely(allocator.available() < 1_MiB)) {
|
||||
print("CPU allocator flush\n");
|
||||
memory::jitprotect(false);
|
||||
|
@ -24,17 +30,19 @@ auto CPU::Recompiler::emit(u32 address) -> Block* {
|
|||
auto block = (Block*)allocator.acquire(sizeof(Block));
|
||||
beginFunction(3);
|
||||
|
||||
u32 memCycles;
|
||||
Thread thread;
|
||||
bool hasBranched = 0;
|
||||
while(true) {
|
||||
u32 instruction = bus.read<Word>(address, memCycles);
|
||||
u32 instruction = bus.read<Word>(address, thread);
|
||||
bool branched = emitEXECUTE(instruction);
|
||||
if(unlikely(instruction == 0x1000'ffff)) {
|
||||
if(unlikely(instruction == 0x1000'ffff //beq 0,0,<pc>
|
||||
|| instruction == (2 << 26 | vaddr >> 2 & 0x3ff'ffff))) { //j <pc>
|
||||
//accelerate idle loops
|
||||
mov32(reg(1), imm(64));
|
||||
mov32(reg(1), imm(64 * 2));
|
||||
call(&CPU::step);
|
||||
}
|
||||
call(&CPU::instructionEpilogue);
|
||||
vaddr += 4;
|
||||
address += 4;
|
||||
if(hasBranched || (address & 0xfc) == 0) break; //block boundary
|
||||
hasBranched = branched;
|
||||
|
|
|
@ -1,48 +1,104 @@
|
|||
|
||||
auto CPU::TLB::load(u64 vaddr, const Entry& entry) -> Match {
|
||||
bool lo = vaddr & entry.addressSelect;
|
||||
if(!entry.valid[lo]) {
|
||||
self.addressException(vaddr);
|
||||
self.debugger.tlbLoadInvalid(vaddr);
|
||||
self.exception.tlbLoadInvalid();
|
||||
return {false};
|
||||
}
|
||||
physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo);
|
||||
self.debugger.tlbLoad(vaddr, physicalAddress);
|
||||
return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress};
|
||||
}
|
||||
|
||||
auto CPU::TLB::load(u64 vaddr) -> Match {
|
||||
for(auto& entry : this->tlbCache.entry) {
|
||||
if(!entry.entry) continue;
|
||||
if(!entry.entry->globals && entry.entry->addressSpaceID != self.scc.tlb.addressSpaceID) continue;
|
||||
if((vaddr & entry.entry->addressMaskHi) != entry.entry->virtualAddress) continue;
|
||||
if(vaddr >> 62 != entry.entry->region) continue;
|
||||
if(auto match = load(vaddr, *entry.entry)) {
|
||||
entry.frequency++;
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
for(auto& entry : this->entry) {
|
||||
if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
|
||||
if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue;
|
||||
if(vaddr >> 62 != entry.region) continue;
|
||||
bool lo = vaddr & entry.addressSelect;
|
||||
if(!entry.valid[lo]) {
|
||||
self.addressException(vaddr);
|
||||
self.debugger.tlbLoadInvalid(vaddr);
|
||||
self.exception.tlbLoadInvalid();
|
||||
return {false};
|
||||
if(auto match = load(vaddr, entry)) {
|
||||
this->tlbCache.insert(entry);
|
||||
return match;
|
||||
}
|
||||
physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo);
|
||||
self.debugger.tlbLoad(vaddr, physicalAddress);
|
||||
return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress};
|
||||
}
|
||||
|
||||
self.addressException(vaddr);
|
||||
self.debugger.tlbLoadMiss(vaddr);
|
||||
self.exception.tlbLoadMiss();
|
||||
return {false};
|
||||
}
|
||||
|
||||
auto CPU::TLB::store(u64 vaddr) -> Match {
|
||||
// Fast(er) version of load for recompiler icache lookups
|
||||
// avoids exceptions/debug checks
|
||||
auto CPU::TLB::loadFast(u64 vaddr) -> Match {
|
||||
for(auto& entry : this->entry) {
|
||||
if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
|
||||
if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue;
|
||||
if(vaddr >> 62 != entry.region) continue;
|
||||
bool lo = vaddr & entry.addressSelect;
|
||||
if(!entry.valid[lo]) {
|
||||
self.addressException(vaddr);
|
||||
self.debugger.tlbStoreInvalid(vaddr);
|
||||
self.exception.tlbStoreInvalid();
|
||||
return {false};
|
||||
}
|
||||
if(!entry.dirty[lo]) {
|
||||
self.addressException(vaddr);
|
||||
self.debugger.tlbModification(vaddr);
|
||||
self.exception.tlbModification();
|
||||
return {false};
|
||||
}
|
||||
if(!entry.valid[lo]) return { false, 0, 0 };
|
||||
physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo);
|
||||
self.debugger.tlbStore(vaddr, physicalAddress);
|
||||
return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress};
|
||||
}
|
||||
|
||||
return {false, 0, 0};
|
||||
}
|
||||
|
||||
auto CPU::TLB::store(u64 vaddr, const Entry& entry) -> Match {
|
||||
bool lo = vaddr & entry.addressSelect;
|
||||
if(!entry.valid[lo]) {
|
||||
self.addressException(vaddr);
|
||||
self.debugger.tlbStoreInvalid(vaddr);
|
||||
self.exception.tlbStoreInvalid();
|
||||
return {false};
|
||||
}
|
||||
if(!entry.dirty[lo]) {
|
||||
self.addressException(vaddr);
|
||||
self.debugger.tlbModification(vaddr);
|
||||
self.exception.tlbModification();
|
||||
return {false};
|
||||
}
|
||||
physicalAddress = entry.physicalAddress[lo] + (vaddr & entry.addressMaskLo);
|
||||
self.debugger.tlbStore(vaddr, physicalAddress);
|
||||
return {true, entry.cacheAlgorithm[lo] != 2, physicalAddress};
|
||||
}
|
||||
|
||||
auto CPU::TLB::store(u64 vaddr) -> Match {
|
||||
for(auto& entry : this->tlbCache.entry) {
|
||||
if(!entry.entry) continue;
|
||||
if(!entry.entry->globals && entry.entry->addressSpaceID != self.scc.tlb.addressSpaceID) continue;
|
||||
if((vaddr & entry.entry->addressMaskHi) != entry.entry->virtualAddress) continue;
|
||||
if(vaddr >> 62 != entry.entry->region) continue;
|
||||
|
||||
if(auto match = store(vaddr, *entry.entry)) {
|
||||
entry.frequency++;
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
for(auto& entry : this->entry) {
|
||||
if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
|
||||
if((vaddr & entry.addressMaskHi) != entry.virtualAddress) continue;
|
||||
if(vaddr >> 62 != entry.region) continue;
|
||||
|
||||
if(auto match = store(vaddr, entry)) {
|
||||
this->tlbCache.insert(entry);
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
self.addressException(vaddr);
|
||||
self.debugger.tlbStoreMiss(vaddr);
|
||||
self.exception.tlbStoreMiss();
|
||||
|
|
|
@ -1,30 +1,30 @@
|
|||
template<u32 Size>
|
||||
inline auto Bus::read(u32 address, u32& cycles) -> u64 {
|
||||
inline auto Bus::read(u32 address, Thread& thread) -> u64 {
|
||||
static constexpr u64 unmapped = 0;
|
||||
address &= 0x1fff'ffff - (Size - 1);
|
||||
|
||||
if(address <= 0x007f'ffff) return rdram.ram.read<Size>(address);
|
||||
if(address <= 0x03ef'ffff) return unmapped;
|
||||
if(address <= 0x03ff'ffff) return rdram.read<Size>(address, cycles);
|
||||
if(address <= 0x0407'ffff) return rsp.read<Size>(address, cycles);
|
||||
if(address <= 0x040f'ffff) return rsp.status.read<Size>(address, cycles);
|
||||
if(address <= 0x041f'ffff) return rdp.read<Size>(address, cycles);
|
||||
if(address <= 0x042f'ffff) return rdp.io.read<Size>(address, cycles);
|
||||
if(address <= 0x043f'ffff) return mi.read<Size>(address, cycles);
|
||||
if(address <= 0x044f'ffff) return vi.read<Size>(address, cycles);
|
||||
if(address <= 0x045f'ffff) return ai.read<Size>(address, cycles);
|
||||
if(address <= 0x046f'ffff) return pi.read<Size>(address, cycles);
|
||||
if(address <= 0x047f'ffff) return ri.read<Size>(address, cycles);
|
||||
if(address <= 0x048f'ffff) return si.read<Size>(address, cycles);
|
||||
if(address <= 0x03ff'ffff) return rdram.read<Size>(address, thread);
|
||||
if(address <= 0x0407'ffff) return rsp.read<Size>(address, thread);
|
||||
if(address <= 0x040f'ffff) return rsp.status.read<Size>(address, thread);
|
||||
if(address <= 0x041f'ffff) return rdp.read<Size>(address, thread);
|
||||
if(address <= 0x042f'ffff) return rdp.io.read<Size>(address, thread);
|
||||
if(address <= 0x043f'ffff) return mi.read<Size>(address, thread);
|
||||
if(address <= 0x044f'ffff) return vi.read<Size>(address, thread);
|
||||
if(address <= 0x045f'ffff) return ai.read<Size>(address, thread);
|
||||
if(address <= 0x046f'ffff) return pi.read<Size>(address, thread);
|
||||
if(address <= 0x047f'ffff) return ri.read<Size>(address, thread);
|
||||
if(address <= 0x048f'ffff) return si.read<Size>(address, thread);
|
||||
if(address <= 0x04ff'ffff) return unmapped;
|
||||
if(address <= 0x1fbf'ffff) return pi.read<Size>(address, cycles);
|
||||
if(address <= 0x1fcf'ffff) return si.read<Size>(address, cycles);
|
||||
if(address <= 0x7fff'ffff) return pi.read<Size>(address, cycles);
|
||||
if(address <= 0x1fbf'ffff) return pi.read<Size>(address, thread);
|
||||
if(address <= 0x1fcf'ffff) return si.read<Size>(address, thread);
|
||||
if(address <= 0x7fff'ffff) return pi.read<Size>(address, thread);
|
||||
return unmapped;
|
||||
}
|
||||
|
||||
template<u32 Size>
|
||||
inline auto Bus::write(u32 address, u64 data, u32& cycles) -> void {
|
||||
inline auto Bus::write(u32 address, u64 data, Thread& thread) -> void {
|
||||
address &= 0x1fff'ffff - (Size - 1);
|
||||
if constexpr(Accuracy::CPU::Recompiler) {
|
||||
cpu.recompiler.invalidate(address + 0); if constexpr(Size == Dual)
|
||||
|
@ -33,20 +33,20 @@ inline auto Bus::write(u32 address, u64 data, u32& cycles) -> void {
|
|||
|
||||
if(address <= 0x007f'ffff) return rdram.ram.write<Size>(address, data);
|
||||
if(address <= 0x03ef'ffff) return;
|
||||
if(address <= 0x03ff'ffff) return rdram.write<Size>(address, data, cycles);
|
||||
if(address <= 0x0407'ffff) return rsp.write<Size>(address, data, cycles);
|
||||
if(address <= 0x040f'ffff) return rsp.status.write<Size>(address, data, cycles);
|
||||
if(address <= 0x041f'ffff) return rdp.write<Size>(address, data, cycles);
|
||||
if(address <= 0x042f'ffff) return rdp.io.write<Size>(address, data, cycles);
|
||||
if(address <= 0x043f'ffff) return mi.write<Size>(address, data, cycles);
|
||||
if(address <= 0x044f'ffff) return vi.write<Size>(address, data, cycles);
|
||||
if(address <= 0x045f'ffff) return ai.write<Size>(address, data, cycles);
|
||||
if(address <= 0x046f'ffff) return pi.write<Size>(address, data, cycles);
|
||||
if(address <= 0x047f'ffff) return ri.write<Size>(address, data, cycles);
|
||||
if(address <= 0x048f'ffff) return si.write<Size>(address, data, cycles);
|
||||
if(address <= 0x03ff'ffff) return rdram.write<Size>(address, data, thread);
|
||||
if(address <= 0x0407'ffff) return rsp.write<Size>(address, data, thread);
|
||||
if(address <= 0x040f'ffff) return rsp.status.write<Size>(address, data, thread);
|
||||
if(address <= 0x041f'ffff) return rdp.write<Size>(address, data, thread);
|
||||
if(address <= 0x042f'ffff) return rdp.io.write<Size>(address, data, thread);
|
||||
if(address <= 0x043f'ffff) return mi.write<Size>(address, data, thread);
|
||||
if(address <= 0x044f'ffff) return vi.write<Size>(address, data, thread);
|
||||
if(address <= 0x045f'ffff) return ai.write<Size>(address, data, thread);
|
||||
if(address <= 0x046f'ffff) return pi.write<Size>(address, data, thread);
|
||||
if(address <= 0x047f'ffff) return ri.write<Size>(address, data, thread);
|
||||
if(address <= 0x048f'ffff) return si.write<Size>(address, data, thread);
|
||||
if(address <= 0x04ff'ffff) return;
|
||||
if(address <= 0x1fbf'ffff) return pi.write<Size>(address, data, cycles);
|
||||
if(address <= 0x1fcf'ffff) return si.write<Size>(address, data, cycles);
|
||||
if(address <= 0x7fff'ffff) return pi.write<Size>(address, data, cycles);
|
||||
if(address <= 0x1fbf'ffff) return pi.write<Size>(address, data, thread);
|
||||
if(address <= 0x1fcf'ffff) return si.write<Size>(address, data, thread);
|
||||
if(address <= 0x7fff'ffff) return pi.write<Size>(address, data, thread);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -4,10 +4,10 @@ struct RCP { //A device which is part of RCP
|
|||
const u32 DefaultWriteCycles = 0; //not implemented until we implement the CPU write queue
|
||||
|
||||
template<u32 Size>
|
||||
auto read(u32 address, u32& cycles) -> u64 {
|
||||
cycles = DefaultReadCycles;
|
||||
auto read(u32 address, Thread& thread) -> u64 {
|
||||
thread.step(DefaultReadCycles * 2);
|
||||
if constexpr(Size == Byte) {
|
||||
auto data = ((T*)this)->readWord(address, cycles);
|
||||
auto data = ((T*)this)->readWord(address, thread);
|
||||
switch(address & 3) {
|
||||
case 0: return data >> 24;
|
||||
case 1: return data >> 16;
|
||||
|
@ -16,44 +16,44 @@ struct RCP { //A device which is part of RCP
|
|||
}
|
||||
}
|
||||
if constexpr(Size == Half) {
|
||||
auto data = ((T*)this)->readWord(address, cycles);
|
||||
auto data = ((T*)this)->readWord(address, thread);
|
||||
switch(address & 2) {
|
||||
case 0: return data >> 16;
|
||||
case 2: return data >> 0;
|
||||
}
|
||||
}
|
||||
if constexpr(Size == Word) {
|
||||
return ((T*)this)->readWord(address, cycles);
|
||||
return ((T*)this)->readWord(address, thread);
|
||||
}
|
||||
if constexpr(Size == Dual) {
|
||||
u64 data = ((T*)this)->readWord(address, cycles);
|
||||
return data << 32 | ((T*)this)->readWord(address + 4, cycles);
|
||||
u64 data = ((T*)this)->readWord(address, thread);
|
||||
return data << 32 | ((T*)this)->readWord(address + 4, thread);
|
||||
}
|
||||
unreachable;
|
||||
}
|
||||
|
||||
template<u32 Size>
|
||||
auto write(u32 address, u64 data, u32& cycles) -> void {
|
||||
cycles = DefaultWriteCycles;
|
||||
auto write(u32 address, u64 data, Thread& thread) -> void {
|
||||
thread.step(DefaultWriteCycles * 2);
|
||||
if constexpr(Size == Byte) {
|
||||
switch(address & 3) {
|
||||
case 0: return ((T*)this)->writeWord(address, data << 24, cycles);
|
||||
case 1: return ((T*)this)->writeWord(address, data << 16, cycles);
|
||||
case 2: return ((T*)this)->writeWord(address, data << 8, cycles);
|
||||
case 3: return ((T*)this)->writeWord(address, data << 0, cycles);
|
||||
case 0: return ((T*)this)->writeWord(address, data << 24, thread);
|
||||
case 1: return ((T*)this)->writeWord(address, data << 16, thread);
|
||||
case 2: return ((T*)this)->writeWord(address, data << 8, thread);
|
||||
case 3: return ((T*)this)->writeWord(address, data << 0, thread);
|
||||
}
|
||||
}
|
||||
if constexpr(Size == Half) {
|
||||
switch(address & 2) {
|
||||
case 0: return ((T*)this)->writeWord(address, data << 16, cycles);
|
||||
case 2: return ((T*)this)->writeWord(address, data << 0, cycles);
|
||||
case 0: return ((T*)this)->writeWord(address, data << 16, thread);
|
||||
case 2: return ((T*)this)->writeWord(address, data << 0, thread);
|
||||
}
|
||||
}
|
||||
if constexpr(Size == Word) {
|
||||
((T*)this)->writeWord(address, data, cycles);
|
||||
((T*)this)->writeWord(address, data, thread);
|
||||
}
|
||||
if constexpr(Size == Dual) {
|
||||
((T*)this)->writeWord(address, data >> 32, cycles);
|
||||
((T*)this)->writeWord(address, data >> 32, thread);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -31,8 +31,8 @@ namespace Memory {
|
|||
|
||||
struct Bus {
|
||||
//bus.hpp
|
||||
template<u32 Size> auto read(u32 address, u32& cycles) -> u64;
|
||||
template<u32 Size> auto write(u32 address, u64 data, u32& cycles) -> void;
|
||||
template<u32 Size> auto read(u32 address, Thread& thread) -> u64;
|
||||
template<u32 Size> auto write(u32 address, u64 data, Thread& thread) -> void;
|
||||
};
|
||||
|
||||
extern Bus bus;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
auto MI::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto MI::readWord(u32 address, Thread& thread) -> u32 {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data;
|
||||
|
||||
|
@ -42,7 +42,7 @@ auto MI::readWord(u32 address, u32& cycles) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto MI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
||||
auto MI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data = data_;
|
||||
|
||||
|
|
|
@ -27,8 +27,8 @@ struct MI : Memory::RCP<MI> {
|
|||
auto power(bool reset) -> void;
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
|
||||
//serialization.cpp
|
||||
auto serialize(serializer&) -> void;
|
||||
|
|
|
@ -19,6 +19,10 @@ using v128 = __m128i;
|
|||
using v128 = __m128i;
|
||||
#endif
|
||||
|
||||
#if defined(VULKAN)
|
||||
#include <n64/vulkan/vulkan.hpp>
|
||||
#endif
|
||||
|
||||
namespace ares::Nintendo64 {
|
||||
auto enumerate() -> vector<string>;
|
||||
auto load(Node::System& node, string name) -> bool;
|
||||
|
@ -39,6 +43,10 @@ namespace ares::Nintendo64 {
|
|||
clock = 0;
|
||||
}
|
||||
|
||||
auto step(u32 clocks) -> void {
|
||||
clock += clocks;
|
||||
}
|
||||
|
||||
auto serialize(serializer& s) -> void {
|
||||
s(clock);
|
||||
}
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
inline auto PI::readWord(u32 address, u32& cycles) -> u32 {
|
||||
inline auto PI::readWord(u32 address, Thread& thread) -> u32 {
|
||||
if(address <= 0x046f'ffff) return ioRead(address);
|
||||
|
||||
if (unlikely(io.ioBusy)) {
|
||||
cycles += writeForceFinish();
|
||||
thread.step(writeForceFinish() * 2);
|
||||
return io.busLatch;
|
||||
}
|
||||
cycles += 250;
|
||||
thread.step(250 * 2);
|
||||
return busRead<Word>(address);
|
||||
}
|
||||
|
||||
|
@ -51,7 +51,7 @@ inline auto PI::busRead(u32 address) -> u32 {
|
|||
return unmapped; //accesses here actually lock out the RCP
|
||||
}
|
||||
|
||||
inline auto PI::writeWord(u32 address, u32 data, u32& cycles) -> void {
|
||||
inline auto PI::writeWord(u32 address, u32 data, Thread& thread) -> void {
|
||||
if(address <= 0x046f'ffff) return ioWrite(address, data);
|
||||
|
||||
if(io.ioBusy) return;
|
||||
|
|
|
@ -52,3 +52,50 @@ auto PI::dmaFinished() -> void {
|
|||
io.interrupt = 1;
|
||||
mi.raise(MI::IRQ::PI);
|
||||
}
|
||||
|
||||
auto PI::dmaDuration(bool read) -> u32 {
|
||||
auto len = read ? io.readLength : io.writeLength;
|
||||
len = (len | 1) + 1;
|
||||
|
||||
BSD bsd;
|
||||
switch (io.pbusAddress.bit(24,31)) {
|
||||
case 0x05: bsd = bsd2; break;
|
||||
case range8(0x08, 0x0F): bsd = bsd2; break;
|
||||
default: bsd = bsd1; break;
|
||||
}
|
||||
|
||||
auto pageShift = bsd.pageSize + 2;
|
||||
auto pageSize = 1 << pageShift;
|
||||
auto pageMask = pageSize - 1;
|
||||
auto pbusFirst = io.pbusAddress;
|
||||
auto pbusLast = io.pbusAddress + len - 2;
|
||||
|
||||
auto pbusFirstPage = pbusFirst >> pageShift;
|
||||
auto pbusLastPage = pbusLast >> pageShift;
|
||||
auto pbusPages = pbusLastPage - pbusFirstPage + 1;
|
||||
auto numBuffers = 0;
|
||||
auto partialBytes = 0;
|
||||
|
||||
if (pbusFirstPage == pbusLastPage) {
|
||||
if (len == 128) numBuffers = 1;
|
||||
else partialBytes = len;
|
||||
} else {
|
||||
bool fullFirst = (pbusFirst & pageMask) == 0;
|
||||
bool fullLast = ((pbusLast + 2) & pageMask) == 0;
|
||||
|
||||
if (fullFirst) numBuffers++;
|
||||
else partialBytes += pageSize - (pbusFirst & pageMask);
|
||||
if (fullLast) numBuffers++;
|
||||
else partialBytes += (pbusLast & pageMask) + 2;
|
||||
|
||||
if (pbusFirstPage + 1 < pbusLastPage)
|
||||
numBuffers += (pbusPages - 2) * pageSize / 128;
|
||||
}
|
||||
|
||||
u32 cycles = 0;
|
||||
cycles += (14 + bsd.latency + 1) * pbusPages;
|
||||
cycles += (bsd.pulseWidth + 1 + bsd.releaseDuration + 1) * len / 2;
|
||||
cycles += numBuffers * 28;
|
||||
cycles += partialBytes * 1;
|
||||
return cycles * 3;
|
||||
}
|
||||
|
|
|
@ -42,12 +42,12 @@ auto PI::ioRead(u32 address) -> u32 {
|
|||
|
||||
if(address == 7) {
|
||||
//PI_BSD_DOM1_PGS
|
||||
data.bit(0,7) = bsd1.pageSize;
|
||||
data.bit(0,3) = bsd1.pageSize;
|
||||
}
|
||||
|
||||
if(address == 8) {
|
||||
//PI_BSD_DOM1_RLS
|
||||
data.bit(0,7) = bsd1.releaseDuration;
|
||||
data.bit(0,1) = bsd1.releaseDuration;
|
||||
}
|
||||
|
||||
if(address == 9) {
|
||||
|
@ -98,7 +98,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
|
|||
//PI_READ_LENGTH
|
||||
io.readLength = n24(data);
|
||||
io.dmaBusy = 1;
|
||||
queue.insert(Queue::PI_DMA_Read, io.readLength * 36);
|
||||
queue.insert(Queue::PI_DMA_Read, dmaDuration(true));
|
||||
dmaRead();
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
|
|||
//PI_WRITE_LENGTH
|
||||
io.writeLength = n24(data);
|
||||
io.dmaBusy = 1;
|
||||
queue.insert(Queue::PI_DMA_Write, io.writeLength * 36);
|
||||
queue.insert(Queue::PI_DMA_Write, dmaDuration(false));
|
||||
dmaWrite();
|
||||
}
|
||||
|
||||
|
@ -136,12 +136,12 @@ auto PI::ioWrite(u32 address, u32 data_) -> void {
|
|||
|
||||
if(address == 7) {
|
||||
//PI_BSD_DOM1_PGS
|
||||
bsd1.pageSize = data.bit(0,7);
|
||||
bsd1.pageSize = data.bit(0,3);
|
||||
}
|
||||
|
||||
if(address == 8) {
|
||||
//PI_BSD_DOM1_RLS
|
||||
bsd1.releaseDuration = data.bit(0,7);
|
||||
bsd1.releaseDuration = data.bit(0,1);
|
||||
}
|
||||
|
||||
if(address == 9) {
|
||||
|
|
|
@ -22,14 +22,15 @@ struct PI : Memory::RCP<PI> {
|
|||
auto dmaRead() -> void;
|
||||
auto dmaWrite() -> void;
|
||||
auto dmaFinished() -> void;
|
||||
auto dmaDuration(bool read) -> u32;
|
||||
|
||||
//io.cpp
|
||||
auto ioRead(u32 address) -> u32;
|
||||
auto ioWrite(u32 address, u32 data) -> void;
|
||||
|
||||
//bus.hpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
auto writeFinished() -> void;
|
||||
auto writeForceFinish() -> u32;
|
||||
template <u32 Size>
|
||||
|
@ -55,8 +56,8 @@ struct PI : Memory::RCP<PI> {
|
|||
struct BSD {
|
||||
n8 latency;
|
||||
n8 pulseWidth;
|
||||
n8 pageSize;
|
||||
n8 releaseDuration;
|
||||
n4 pageSize;
|
||||
n2 releaseDuration;
|
||||
} bsd1, bsd2;
|
||||
};
|
||||
|
||||
|
|
|
@ -30,11 +30,6 @@ auto PIF::descramble(n4 *buf, int size) -> void {
|
|||
for(int i=size-1; i>0; i--) buf[i] -= buf[i-1] + 1;
|
||||
}
|
||||
|
||||
auto PIF::step(u32 clocks) -> void {
|
||||
Thread::clock += clocks;
|
||||
if(intram.bootTimeout > 0) intram.bootTimeout -= clocks;
|
||||
}
|
||||
|
||||
auto PIF::ramReadCommand() -> u8 {
|
||||
return ram.read<Byte>(0x3f);
|
||||
}
|
||||
|
@ -264,7 +259,9 @@ auto PIF::challenge() -> void {
|
|||
}
|
||||
|
||||
auto PIF::mainHLE() -> void {
|
||||
step(10240*8);
|
||||
constexpr u32 clocks = 10240 * 8;
|
||||
step(clocks);
|
||||
if(intram.bootTimeout > 0) intram.bootTimeout -= clocks;
|
||||
|
||||
if(likely(state == Run)) {
|
||||
//cicCompare()
|
||||
|
|
|
@ -24,7 +24,9 @@ auto PIF::unload() -> void {
|
|||
}
|
||||
|
||||
auto PIF::main() -> void {
|
||||
while(Thread::clock < 0) {
|
||||
mainHLE();
|
||||
}
|
||||
}
|
||||
|
||||
auto PIF::power(bool reset) -> void {
|
||||
|
|
|
@ -35,7 +35,6 @@ struct PIF : Thread, Memory::SI<PIF> {
|
|||
} intram;
|
||||
|
||||
//pif.cpp
|
||||
auto step(u32 clocks) -> void;
|
||||
auto load(Node::Object) -> void;
|
||||
auto unload() -> void;
|
||||
auto main() -> void;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
auto RDP::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto RDP::readWord(u32 address, Thread& thread) -> u32 {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data;
|
||||
|
||||
|
@ -34,7 +34,7 @@ auto RDP::readWord(u32 address, u32& cycles) -> u32 {
|
|||
|
||||
if(address == 4) {
|
||||
//DPC_CLOCK
|
||||
data.bit(0,23) = command.clock;
|
||||
data.bit(0,23) = command.clock - (Thread::clock - thread.clock) / 3;
|
||||
}
|
||||
|
||||
if(address == 5) {
|
||||
|
@ -56,7 +56,7 @@ auto RDP::readWord(u32 address, u32& cycles) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto RDP::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
||||
auto RDP::writeWord(u32 address, u32 data_, Thread& thread) -> void {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data = data_;
|
||||
|
||||
|
@ -91,7 +91,7 @@ auto RDP::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
|||
if(data.bit(6) && !command.crashed) command.tmemBusy = 0;
|
||||
if(data.bit(7) && !command.crashed) command.pipeBusy = 0;
|
||||
if(data.bit(8) && !command.crashed) command.bufferBusy = 0;
|
||||
if(data.bit(9)) command.clock = 0;
|
||||
if(data.bit(9)) command.clock = (Thread::clock - thread.clock) / 3;
|
||||
}
|
||||
|
||||
if(address == 4) {
|
||||
|
@ -113,7 +113,7 @@ auto RDP::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
|||
debugger.ioDPC(Write, address, data);
|
||||
}
|
||||
|
||||
auto RDP::IO::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto RDP::IO::readWord(u32 address, Thread& thread) -> u32 {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data;
|
||||
|
||||
|
@ -144,7 +144,7 @@ auto RDP::IO::readWord(u32 address, u32& cycles) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto RDP::IO::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
||||
auto RDP::IO::writeWord(u32 address, u32 data_, Thread& thread) -> void {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data = data_;
|
||||
|
||||
|
|
|
@ -31,11 +31,11 @@ auto RDP::crash(const char *reason) -> void {
|
|||
}
|
||||
|
||||
auto RDP::main() -> void {
|
||||
step(system.frequency());
|
||||
}
|
||||
|
||||
auto RDP::step(u32 clocks) -> void {
|
||||
Thread::clock += clocks;
|
||||
const u32 clocks = system.frequency();
|
||||
while(Thread::clock < 0) {
|
||||
step(clocks);
|
||||
command.clock += clocks / 3;
|
||||
}
|
||||
}
|
||||
|
||||
auto RDP::power(bool reset) -> void {
|
||||
|
|
|
@ -21,7 +21,6 @@ struct RDP : Thread, Memory::RCP<RDP> {
|
|||
auto unload() -> void;
|
||||
|
||||
auto main() -> void;
|
||||
auto step(u32 clocks) -> void;
|
||||
auto power(bool reset) -> void;
|
||||
auto crash(const char *reason) -> void;
|
||||
|
||||
|
@ -66,8 +65,8 @@ struct RDP : Thread, Memory::RCP<RDP> {
|
|||
auto setColorImage() -> void;
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
auto flushCommands() -> void;
|
||||
|
||||
//serialization.cpp
|
||||
|
@ -338,8 +337,8 @@ struct RDP : Thread, Memory::RCP<RDP> {
|
|||
IO(RDP& self) : self(self) {}
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
|
||||
struct BIST {
|
||||
n1 check;
|
||||
|
|
|
@ -45,8 +45,513 @@ static const vector<string> commandNames = {
|
|||
};
|
||||
|
||||
auto RDP::render() -> void {
|
||||
#if defined(VULKAN)
|
||||
if(vulkan.enable && vulkan.render()) {
|
||||
const char *msg = vulkan.crashed();
|
||||
if(msg) crash(msg);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
angrylion::ProcessRDPList();
|
||||
command.current = command.end;
|
||||
return;
|
||||
|
||||
auto& memory = !command.source ? rdram.ram : rsp.dmem;
|
||||
|
||||
auto fetch = [&]() -> u64 {
|
||||
u64 op = memory.readUnaligned<Dual>(command.current);
|
||||
command.current += 8;
|
||||
return op;
|
||||
};
|
||||
|
||||
auto fetchEdge = [&](u64 op) {
|
||||
edge.lmajor = n1 (op >> 55);
|
||||
edge.level = n3 (op >> 51);
|
||||
edge.tile = n3 (op >> 48);
|
||||
edge.y.lo = n14(op >> 32);
|
||||
edge.y.md = n14(op >> 16);
|
||||
edge.y.hi = n14(op >> 0);
|
||||
op = fetch();
|
||||
edge.x.lo.c.i = n16(op >> 48);
|
||||
edge.x.lo.c.f = n16(op >> 32);
|
||||
edge.x.lo.s.i = n16(op >> 16);
|
||||
edge.x.lo.s.f = n16(op >> 0);
|
||||
op = fetch();
|
||||
edge.x.hi.c.i = n16(op >> 48);
|
||||
edge.x.hi.c.f = n16(op >> 32);
|
||||
edge.x.hi.s.i = n16(op >> 16);
|
||||
edge.x.hi.s.f = n16(op >> 0);
|
||||
op = fetch();
|
||||
edge.x.md.c.i = n16(op >> 48);
|
||||
edge.x.md.c.f = n16(op >> 32);
|
||||
edge.x.md.s.i = n16(op >> 16);
|
||||
edge.x.md.s.f = n16(op >> 0);
|
||||
};
|
||||
|
||||
auto fetchShade = [&](u64 op) {
|
||||
op = fetch();
|
||||
shade.r.c.i = n16(op >> 48);
|
||||
shade.g.c.i = n16(op >> 32);
|
||||
shade.b.c.i = n16(op >> 16);
|
||||
shade.a.c.i = n16(op >> 0);
|
||||
op = fetch();
|
||||
shade.r.x.i = n16(op >> 48);
|
||||
shade.g.x.i = n16(op >> 32);
|
||||
shade.b.x.i = n16(op >> 16);
|
||||
shade.a.x.i = n16(op >> 0);
|
||||
op = fetch();
|
||||
shade.r.c.f = n16(op >> 48);
|
||||
shade.g.c.f = n16(op >> 32);
|
||||
shade.b.c.f = n16(op >> 16);
|
||||
shade.a.c.f = n16(op >> 0);
|
||||
op = fetch();
|
||||
shade.r.x.f = n16(op >> 48);
|
||||
shade.g.x.f = n16(op >> 32);
|
||||
shade.b.x.f = n16(op >> 16);
|
||||
shade.a.x.f = n16(op >> 0);
|
||||
op = fetch();
|
||||
shade.r.e.i = n16(op >> 48);
|
||||
shade.g.e.i = n16(op >> 32);
|
||||
shade.b.e.i = n16(op >> 16);
|
||||
shade.a.e.i = n16(op >> 0);
|
||||
op = fetch();
|
||||
shade.r.y.i = n16(op >> 48);
|
||||
shade.g.y.i = n16(op >> 32);
|
||||
shade.b.y.i = n16(op >> 16);
|
||||
shade.a.y.i = n16(op >> 0);
|
||||
op = fetch();
|
||||
shade.r.e.f = n16(op >> 48);
|
||||
shade.g.e.f = n16(op >> 32);
|
||||
shade.b.e.f = n16(op >> 16);
|
||||
shade.a.e.f = n16(op >> 0);
|
||||
op = fetch();
|
||||
shade.r.y.f = n16(op >> 48);
|
||||
shade.g.y.f = n16(op >> 32);
|
||||
shade.b.y.f = n16(op >> 16);
|
||||
shade.a.y.f = n16(op >> 0);
|
||||
};
|
||||
|
||||
auto fetchTexture = [&](u64 op) {
|
||||
op = fetch();
|
||||
texture.s.c.i = n16(op >> 48);
|
||||
texture.t.c.i = n16(op >> 32);
|
||||
texture.w.c.i = n16(op >> 16);
|
||||
op = fetch();
|
||||
texture.s.x.i = n16(op >> 48);
|
||||
texture.t.x.i = n16(op >> 32);
|
||||
texture.w.x.i = n16(op >> 16);
|
||||
op = fetch();
|
||||
texture.s.c.f = n16(op >> 48);
|
||||
texture.t.c.f = n16(op >> 32);
|
||||
texture.w.c.f = n16(op >> 16);
|
||||
op = fetch();
|
||||
texture.s.x.f = n16(op >> 48);
|
||||
texture.t.x.f = n16(op >> 32);
|
||||
texture.w.x.f = n16(op >> 16);
|
||||
op = fetch();
|
||||
texture.s.e.i = n16(op >> 48);
|
||||
texture.t.e.i = n16(op >> 32);
|
||||
texture.w.e.i = n16(op >> 16);
|
||||
op = fetch();
|
||||
texture.s.y.i = n16(op >> 48);
|
||||
texture.t.y.i = n16(op >> 32);
|
||||
texture.w.y.i = n16(op >> 16);
|
||||
op = fetch();
|
||||
texture.s.e.f = n16(op >> 48);
|
||||
texture.t.e.f = n16(op >> 32);
|
||||
texture.w.e.f = n16(op >> 16);
|
||||
op = fetch();
|
||||
texture.s.y.f = n16(op >> 48);
|
||||
texture.t.y.f = n16(op >> 32);
|
||||
texture.w.y.f = n16(op >> 16);
|
||||
};
|
||||
|
||||
auto fetchZBuffer = [&](u64 op) {
|
||||
op = fetch();
|
||||
zbuffer.d.i = n16(op >> 48);
|
||||
zbuffer.d.f = n16(op >> 32);
|
||||
zbuffer.x.i = n16(op >> 16);
|
||||
zbuffer.x.f = n16(op >> 0);
|
||||
op = fetch();
|
||||
zbuffer.e.i = n16(op >> 48);
|
||||
zbuffer.e.f = n16(op >> 32);
|
||||
zbuffer.y.i = n16(op >> 16);
|
||||
zbuffer.y.f = n16(op >> 0);
|
||||
};
|
||||
|
||||
auto fetchRectangle = [&](u64 op) {
|
||||
rectangle.x.lo = n12(op >> 44);
|
||||
rectangle.y.lo = n12(op >> 32);
|
||||
rectangle.tile = n3 (op >> 24);
|
||||
rectangle.x.hi = n12(op >> 12);
|
||||
rectangle.y.hi = n12(op >> 0);
|
||||
op = fetch();
|
||||
rectangle.s.i = n16(op >> 48);
|
||||
rectangle.t.i = n16(op >> 32);
|
||||
rectangle.s.f = n16(op >> 16);
|
||||
rectangle.t.f = n16(op >> 0);
|
||||
};
|
||||
|
||||
while(command.current < command.end) {
|
||||
u64 op = fetch();
|
||||
|
||||
if(debugger.tracer.command->enabled()) {
|
||||
debugger.command({hex(op, 16L), " ", commandNames(op >> 56 & 0x3f, "Invalid")});
|
||||
}
|
||||
|
||||
switch(op >> 56 & 0x3f) {
|
||||
|
||||
case 0x00: {
|
||||
noOperation();
|
||||
} break;
|
||||
|
||||
case 0x01:
|
||||
case 0x02:
|
||||
case 0x03:
|
||||
case 0x04:
|
||||
case 0x05:
|
||||
case 0x06:
|
||||
case 0x07: {
|
||||
invalidOperation();
|
||||
} break;
|
||||
|
||||
case 0x08: {
|
||||
fetchEdge(op);
|
||||
unshadedTriangle();
|
||||
} break;
|
||||
|
||||
case 0x09: {
|
||||
fetchEdge(op);
|
||||
fetchZBuffer(op);
|
||||
unshadedZbufferTriangle();
|
||||
} break;
|
||||
|
||||
case 0x0a: {
|
||||
fetchEdge(op);
|
||||
fetchTexture(op);
|
||||
textureTriangle();
|
||||
} break;
|
||||
|
||||
case 0x0b: {
|
||||
fetchEdge(op);
|
||||
fetchTexture(op);
|
||||
fetchZBuffer(op);
|
||||
textureZbufferTriangle();
|
||||
} break;
|
||||
|
||||
case 0x0c: {
|
||||
fetchEdge(op);
|
||||
fetchShade(op);
|
||||
shadedTriangle();
|
||||
} break;
|
||||
|
||||
case 0x0d: {
|
||||
fetchEdge(op);
|
||||
fetchShade(op);
|
||||
fetchZBuffer(op);
|
||||
shadedZbufferTriangle();
|
||||
} break;
|
||||
|
||||
case 0x0e: {
|
||||
fetchEdge(op);
|
||||
fetchShade(op);
|
||||
fetchTexture(op);
|
||||
shadedTextureTriangle();
|
||||
} break;
|
||||
|
||||
case 0x0f: {
|
||||
fetchEdge(op);
|
||||
fetchShade(op);
|
||||
fetchTexture(op);
|
||||
fetchZBuffer(op);
|
||||
shadedTextureZbufferTriangle();
|
||||
} break;
|
||||
|
||||
case 0x10:
|
||||
case 0x11:
|
||||
case 0x12:
|
||||
case 0x13:
|
||||
case 0x14:
|
||||
case 0x15:
|
||||
case 0x16:
|
||||
case 0x17:
|
||||
case 0x18:
|
||||
case 0x19:
|
||||
case 0x1a:
|
||||
case 0x1b:
|
||||
case 0x1c:
|
||||
case 0x1d:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
case 0x20:
|
||||
case 0x21:
|
||||
case 0x22:
|
||||
case 0x23: {
|
||||
invalidOperation();
|
||||
} break;
|
||||
|
||||
case 0x24: {
|
||||
fetchRectangle(op);
|
||||
textureRectangle();
|
||||
} break;
|
||||
|
||||
case 0x25: {
|
||||
fetchRectangle(op);
|
||||
textureRectangleFlip();
|
||||
} break;
|
||||
|
||||
case 0x26: {
|
||||
syncLoad();
|
||||
} break;
|
||||
|
||||
case 0x27: {
|
||||
syncPipe();
|
||||
} break;
|
||||
|
||||
case 0x28: {
|
||||
syncTile();
|
||||
} break;
|
||||
|
||||
case 0x29: {
|
||||
syncFull();
|
||||
} break;
|
||||
|
||||
case 0x2a: {
|
||||
key.g.width = n12(op >> 44);
|
||||
key.b.width = n12(op >> 32);
|
||||
key.g.center = n8 (op >> 24);
|
||||
key.g.scale = n8 (op >> 16);
|
||||
key.b.center = n8 (op >> 8);
|
||||
key.b.scale = n8 (op >> 0);
|
||||
setKeyGB();
|
||||
} break;
|
||||
|
||||
case 0x2b: {
|
||||
key.r.width = n12(op >> 16);
|
||||
key.r.center = n8 (op >> 8);
|
||||
key.r.scale = n8 (op >> 0);
|
||||
setKeyR();
|
||||
} break;
|
||||
|
||||
case 0x2c: {
|
||||
convert.k[0] = n9(op >> 45);
|
||||
convert.k[1] = n9(op >> 36);
|
||||
convert.k[2] = n9(op >> 27);
|
||||
convert.k[3] = n9(op >> 18);
|
||||
convert.k[4] = n9(op >> 9);
|
||||
convert.k[5] = n8(op >> 0);
|
||||
setConvert();
|
||||
} break;
|
||||
|
||||
case 0x2d: {
|
||||
scissor.x.hi = n12(op >> 44);
|
||||
scissor.y.hi = n12(op >> 32);
|
||||
scissor.field = n1 (op >> 25);
|
||||
scissor.odd = n1 (op >> 24);
|
||||
scissor.x.lo = n12(op >> 12);
|
||||
scissor.y.lo = n12(op >> 0);
|
||||
setScissor();
|
||||
} break;
|
||||
|
||||
case 0x2e: {
|
||||
primitiveDepth.z = n16(op >> 16);
|
||||
primitiveDepth.deltaZ = n16(op >> 0);
|
||||
setPrimitiveDepth();
|
||||
} break;
|
||||
|
||||
case 0x2f: {
|
||||
other.atomicPrimitive = n1(op >> 55);
|
||||
other.reserved1 = n1(op >> 54);
|
||||
other.cycleType = n2(op >> 52);
|
||||
other.perspective = n1(op >> 51);
|
||||
other.detailTexture = n1(op >> 50);
|
||||
other.sharpenTexture = n1(op >> 49);
|
||||
other.lodTexture = n1(op >> 48);
|
||||
other.tlut = n1(op >> 47);
|
||||
other.tlutType = n1(op >> 46);
|
||||
other.sampleType = n1(op >> 45);
|
||||
other.midTexel = n1(op >> 44);
|
||||
other.bilerp[0] = n1(op >> 43);
|
||||
other.bilerp[1] = n1(op >> 42);
|
||||
other.convertOne = n1(op >> 41);
|
||||
other.colorKey = n1(op >> 40);
|
||||
other.colorDitherMode = n2(op >> 38);
|
||||
other.alphaDitherMode = n2(op >> 36);
|
||||
other.reserved2 = n4(op >> 32);
|
||||
other.blend1a[0] = n2(op >> 30);
|
||||
other.blend1a[1] = n2(op >> 28);
|
||||
other.blend1b[0] = n2(op >> 26);
|
||||
other.blend1b[1] = n2(op >> 24);
|
||||
other.blend2a[0] = n2(op >> 22);
|
||||
other.blend2a[1] = n2(op >> 20);
|
||||
other.blend2b[0] = n2(op >> 18);
|
||||
other.blend2b[1] = n2(op >> 16);
|
||||
other.reserved3 = n1(op >> 15);
|
||||
other.forceBlend = n1(op >> 14);
|
||||
other.alphaCoverage = n1(op >> 13);
|
||||
other.coverageXalpha = n1(op >> 12);
|
||||
other.zMode = n2(op >> 10);
|
||||
other.coverageMode = n2(op >> 8);
|
||||
other.colorOnCoverage = n1(op >> 7);
|
||||
other.imageRead = n1(op >> 6);
|
||||
other.zUpdate = n1(op >> 5);
|
||||
other.zCompare = n1(op >> 4);
|
||||
other.antialias = n1(op >> 3);
|
||||
other.zSource = n1(op >> 2);
|
||||
other.ditherAlpha = n1(op >> 1);
|
||||
other.alphaCompare = n1(op >> 0);
|
||||
setOtherModes();
|
||||
} break;
|
||||
|
||||
case 0x30: {
|
||||
tlut.s.lo = n12(op >> 44);
|
||||
tlut.t.lo = n12(op >> 32);
|
||||
tlut.index = n3 (op >> 24);
|
||||
tlut.s.hi = n12(op >> 12);
|
||||
tlut.t.hi = n12(op >> 0);
|
||||
loadTLUT();
|
||||
} break;
|
||||
|
||||
case 0x31: {
|
||||
invalidOperation();
|
||||
} break;
|
||||
|
||||
case 0x32: {
|
||||
tileSize.s.lo = n12(op >> 44);
|
||||
tileSize.t.lo = n12(op >> 32);
|
||||
tileSize.index = n3 (op >> 24);
|
||||
tileSize.s.hi = n12(op >> 12);
|
||||
tileSize.t.hi = n12(op >> 0);
|
||||
setTileSize();
|
||||
} break;
|
||||
|
||||
case 0x33: {
|
||||
load_.block.s.lo = n12(op >> 44);
|
||||
load_.block.t.lo = n12(op >> 32);
|
||||
load_.block.index = n3 (op >> 24);
|
||||
load_.block.s.hi = n12(op >> 12);
|
||||
load_.block.t.hi = n12(op >> 0);
|
||||
loadBlock();
|
||||
} break;
|
||||
|
||||
case 0x34: {
|
||||
load_.tile.s.lo = n12(op >> 44);
|
||||
load_.tile.t.lo = n12(op >> 32);
|
||||
load_.tile.index = n3 (op >> 24);
|
||||
load_.tile.s.hi = n12(op >> 12);
|
||||
load_.tile.t.hi = n12(op >> 0);
|
||||
loadTile();
|
||||
} break;
|
||||
|
||||
case 0x35: {
|
||||
tile.format = n3(op >> 53);
|
||||
tile.size = n2(op >> 51);
|
||||
tile.line = n9(op >> 41);
|
||||
tile.address = n9(op >> 32);
|
||||
tile.index = n3(op >> 24);
|
||||
tile.palette = n4(op >> 20);
|
||||
tile.t.clamp = n1(op >> 19);
|
||||
tile.t.mirror = n1(op >> 18);
|
||||
tile.t.mask = n4(op >> 14);
|
||||
tile.t.shift = n4(op >> 10);
|
||||
tile.s.clamp = n1(op >> 9);
|
||||
tile.s.mirror = n1(op >> 8);
|
||||
tile.s.mask = n4(op >> 4);
|
||||
tile.s.shift = n4(op >> 0);
|
||||
setTile();
|
||||
} break;
|
||||
|
||||
case 0x36: {
|
||||
fillRectangle_.x.lo = n12(op >> 44);
|
||||
fillRectangle_.y.lo = n12(op >> 32);
|
||||
fillRectangle_.x.hi = n12(op >> 12);
|
||||
fillRectangle_.y.hi = n12(op >> 0);
|
||||
fillRectangle();
|
||||
} break;
|
||||
|
||||
case 0x37: {
|
||||
set.fill.color = n32(op >> 0);
|
||||
setFillColor();
|
||||
} break;
|
||||
|
||||
case 0x38: {
|
||||
fog.red = n8(op >> 24);
|
||||
fog.green = n8(op >> 16);
|
||||
fog.blue = n8(op >> 8);
|
||||
fog.alpha = n8(op >> 0);
|
||||
setFogColor();
|
||||
} break;
|
||||
|
||||
case 0x39: {
|
||||
blend.red = n8(op >> 24);
|
||||
blend.green = n8(op >> 16);
|
||||
blend.blue = n8(op >> 8);
|
||||
blend.alpha = n8(op >> 0);
|
||||
setBlendColor();
|
||||
} break;
|
||||
|
||||
case 0x3a: {
|
||||
primitive.minimum = n4(op >> 40);
|
||||
primitive.fraction = n8(op >> 32);
|
||||
primitive.red = n8(op >> 24);
|
||||
primitive.green = n8(op >> 16);
|
||||
primitive.blue = n8(op >> 8);
|
||||
primitive.alpha = n8(op >> 0);
|
||||
setPrimitiveColor();
|
||||
} break;
|
||||
|
||||
case 0x3b: {
|
||||
environment.red = n8(op >> 24);
|
||||
environment.green = n8(op >> 16);
|
||||
environment.blue = n8(op >> 8);
|
||||
environment.alpha = n8(op >> 0);
|
||||
setEnvironmentColor();
|
||||
} break;
|
||||
|
||||
case 0x3c: {
|
||||
combine.sba.color[0] = n4(op >> 52);
|
||||
combine.mul.color[0] = n5(op >> 47);
|
||||
combine.sba.alpha[0] = n3(op >> 44);
|
||||
combine.mul.alpha[0] = n3(op >> 41);
|
||||
combine.sba.color[1] = n4(op >> 37);
|
||||
combine.mul.color[1] = n5(op >> 32);
|
||||
combine.sbb.color[0] = n4(op >> 28);
|
||||
combine.sbb.color[1] = n4(op >> 24);
|
||||
combine.sba.alpha[1] = n3(op >> 21);
|
||||
combine.mul.alpha[1] = n3(op >> 18);
|
||||
combine.add.color[0] = n3(op >> 15);
|
||||
combine.sbb.alpha[0] = n3(op >> 12);
|
||||
combine.add.alpha[0] = n3(op >> 9);
|
||||
combine.add.color[1] = n3(op >> 6);
|
||||
combine.sbb.alpha[1] = n3(op >> 3);
|
||||
combine.add.alpha[1] = n3(op >> 0);
|
||||
setCombineMode();
|
||||
} break;
|
||||
|
||||
case 0x3d: {
|
||||
set.texture.format = n3 (op >> 53);
|
||||
set.texture.size = n2 (op >> 51);
|
||||
set.texture.width = n10(op >> 32);
|
||||
set.texture.dramAddress = n26(op >> 0);
|
||||
setTextureImage();
|
||||
} break;
|
||||
|
||||
case 0x3e: {
|
||||
set.mask.dramAddress = n26(op >> 0);
|
||||
setMaskImage();
|
||||
} break;
|
||||
|
||||
case 0x3f: {
|
||||
set.color.format = n3 (op >> 53);
|
||||
set.color.size = n2 (op >> 51);
|
||||
set.color.width = n10(op >> 32);
|
||||
set.color.dramAddress = n26(op >> 0);
|
||||
setColorImage();
|
||||
} break;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//0x00
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
auto RDRAM::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto RDRAM::readWord(u32 address, Thread& thread) -> u32 {
|
||||
u32 chipID = address >> 13 & 3;
|
||||
auto& chip = chips[chipID];
|
||||
address = (address & 0x3ff) >> 2;
|
||||
|
@ -63,7 +63,7 @@ auto RDRAM::readWord(u32 address, u32& cycles) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto RDRAM::writeWord(u32 address, u32 data, u32& cycles) -> void {
|
||||
auto RDRAM::writeWord(u32 address, u32 data, Thread& thread) -> void {
|
||||
u32 chipID = address >> 13 & 3;
|
||||
auto& chip = chips[chipID];
|
||||
address = (address & 0x3ff) >> 2;
|
||||
|
|
|
@ -2,7 +2,20 @@
|
|||
|
||||
struct RDRAM : Memory::RCP<RDRAM> {
|
||||
Node::Object node;
|
||||
Memory::Writable ram;
|
||||
|
||||
struct Writable : public Memory::Writable {
|
||||
template<u32 Size>
|
||||
auto read(u32 address) -> u64 {
|
||||
if (address >= size) return 0;
|
||||
return Memory::Writable::read<Size>(address);
|
||||
}
|
||||
|
||||
template<u32 Size>
|
||||
auto write(u32 address, u64 value) -> void {
|
||||
if (address >= size) return;
|
||||
Memory::Writable::write<Size>(address, value);
|
||||
}
|
||||
} ram;
|
||||
|
||||
struct Debugger {
|
||||
//debugger.cpp
|
||||
|
@ -24,8 +37,8 @@ struct RDRAM : Memory::RCP<RDRAM> {
|
|||
auto power(bool reset) -> void;
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
|
||||
//serialization.cpp
|
||||
auto serialize(serializer&) -> void;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
auto RI::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto RI::readWord(u32 address, Thread& thread) -> u32 {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data = 0;
|
||||
|
||||
|
@ -30,7 +30,7 @@ auto RI::readWord(u32 address, u32& cycles) -> u32 {
|
|||
//after a cold boot on real hardware with no expansion pak and using the
|
||||
//CIC-NUS-6102 IPL3, upon reaching the test ROM's entry point the count
|
||||
//register was measured to be ~0x1184000.
|
||||
cpu.step(17'641'000);
|
||||
cpu.step(17'641'000 * 2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,7 @@ auto RI::readWord(u32 address, u32& cycles) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto RI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
||||
auto RI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data = data_;
|
||||
|
||||
|
|
|
@ -19,8 +19,8 @@ struct RI : Memory::RCP<RI> {
|
|||
auto power(bool reset) -> void;
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
|
||||
//serialization.cpp
|
||||
auto serialize(serializer&) -> void;
|
||||
|
|
|
@ -0,0 +1,386 @@
|
|||
#define jp(id, name, ...) case id: return decoder##name(instruction)
|
||||
#define op(id, name, ...) case id: { OpInfo info = {}; __VA_ARGS__; return info; }
|
||||
|
||||
#define RD (instruction >> 11 & 31)
|
||||
#define RT (instruction >> 16 & 31)
|
||||
#define RS (instruction >> 21 & 31)
|
||||
#define VD (instruction >> 6 & 31)
|
||||
#define VS (instruction >> 11 & 31)
|
||||
#define VT (instruction >> 16 & 31)
|
||||
|
||||
#define VCO 0
|
||||
#define VCC 1
|
||||
#define VCE 2
|
||||
|
||||
#define RUse(n) info.r.use |= 1 << n
|
||||
#define RDef(n) info.r.def |= 1 << n
|
||||
#define RDefB(n) (void)0 //bypassable
|
||||
#define VUse(n) info.v.use |= 1 << n
|
||||
#define VDef(n) info.v.def |= 1 << n
|
||||
#define VGUse(n) info.v.use |= 0xff << (n & ~7)
|
||||
#define VGDef(n) info.v.def |= 0xff << (n & ~7)
|
||||
#define VCUse(n) info.vc.use |= 1 << (n & 3)
|
||||
#define VCDef(n) info.vc.def |= 1 << (n & 3)
|
||||
#define VCRef(n) VCUse(n), VCDef(n)
|
||||
#define VFRef(n) info.vfake |= 1 << n
|
||||
#define Load info.flags |= OpInfo::Load
|
||||
#define Store info.flags |= OpInfo::Store
|
||||
#define Branch info.flags |= OpInfo::Branch
|
||||
#define Vector info.flags |= OpInfo::Vector
|
||||
#define VNopGroup info.flags |= OpInfo::VNopGroup
|
||||
|
||||
auto RSP::decoderEXECUTE(u32 instruction) const -> OpInfo {
|
||||
switch(instruction >> 26) {
|
||||
jp(0x00, SPECIAL);
|
||||
jp(0x01, REGIMM);
|
||||
op(0x02, J, Branch);
|
||||
op(0x03, JAL, Branch);
|
||||
op(0x04, BEQ, RUse(RS), RUse(RT), Branch);
|
||||
op(0x05, BNE, RUse(RS), RUse(RT), Branch);
|
||||
op(0x06, BLEZ, RUse(RS), Branch);
|
||||
op(0x07, BGTZ, RUse(RS), Branch);
|
||||
op(0x08, ADDI, RDefB(RT), RUse(RS));
|
||||
op(0x09, ADDIU, RDefB(RT), RUse(RS));
|
||||
op(0x0a, SLTI, RDefB(RT), RUse(RS));
|
||||
op(0x0b, SLTIU, RDefB(RT), RUse(RS));
|
||||
op(0x0c, ANDI, RDefB(RT), RUse(RS));
|
||||
op(0x0d, ORI, RDefB(RT), RUse(RS));
|
||||
op(0x0e, XORI, RDefB(RT), RUse(RS));
|
||||
op(0x0f, LUI, RDefB(RT));
|
||||
jp(0x10, SCC);
|
||||
op(0x11, INVALID); //COP1
|
||||
jp(0x12, VU);
|
||||
op(0x13, INVALID); //COP3
|
||||
op(0x14, INVALID); //BEQL
|
||||
op(0x15, INVALID); //BNEL
|
||||
op(0x16, INVALID); //BLEZL
|
||||
op(0x17, INVALID); //BGTZL
|
||||
op(0x18, INVALID); //DADDI
|
||||
op(0x19, INVALID); //DADDIU
|
||||
op(0x1a, INVALID); //LDL
|
||||
op(0x1b, INVALID); //LDR
|
||||
op(0x1c, INVALID);
|
||||
op(0x1d, INVALID);
|
||||
op(0x1e, INVALID);
|
||||
op(0x1f, INVALID);
|
||||
op(0x20, LB, RDef(RT), RUse(RS), Load);
|
||||
op(0x21, LH, RDef(RT), RUse(RS), Load);
|
||||
op(0x22, INVALID); //LWL
|
||||
op(0x23, LW, RDef(RT), RUse(RS), Load);
|
||||
op(0x24, LBU, RDef(RT), RUse(RS), Load);
|
||||
op(0x25, LHU, RDef(RT), RUse(RS), Load);
|
||||
op(0x26, INVALID); //LWR
|
||||
op(0x27, LWU, RDef(RT), RUse(RS), Load);
|
||||
op(0x28, SB, RUse(RT), RUse(RS), Store);
|
||||
op(0x29, SH, RUse(RT), RUse(RS), Store);
|
||||
op(0x2a, INVALID); //SWL
|
||||
op(0x2b, SW, RUse(RT), RUse(RS), Store);
|
||||
op(0x2c, INVALID); //SDL
|
||||
op(0x2d, INVALID); //SDR
|
||||
op(0x2e, INVALID); //SWR
|
||||
op(0x2f, INVALID); //CACHE
|
||||
op(0x30, INVALID); //LL
|
||||
op(0x31, INVALID); //LWC1
|
||||
jp(0x32, LWC2);
|
||||
op(0x33, INVALID); //LWC3
|
||||
op(0x34, INVALID); //LLD
|
||||
op(0x35, INVALID); //LDC1
|
||||
op(0x36, INVALID); //LDC2
|
||||
op(0x37, INVALID); //LD
|
||||
op(0x38, INVALID); //SC
|
||||
op(0x39, INVALID); //SWC1
|
||||
jp(0x3a, SWC2);
|
||||
op(0x3b, INVALID); //SWC3
|
||||
op(0x3c, INVALID); //SCD
|
||||
op(0x3d, INVALID); //SDC1
|
||||
op(0x3e, INVALID); //SDC2
|
||||
op(0x3f, INVALID); //SD
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
auto RSP::decoderSPECIAL(u32 instruction) const -> OpInfo {
|
||||
switch(instruction & 0x3f) {
|
||||
op(0x00, SLL, RDefB(RD), RUse(RT));
|
||||
op(0x01, INVALID);
|
||||
op(0x02, SRL, RDefB(RD), RUse(RT));
|
||||
op(0x03, SRA, RDefB(RD), RUse(RT));
|
||||
op(0x04, SLLV, RDefB(RD), RUse(RT), RUse(RS));
|
||||
op(0x05, INVALID);
|
||||
op(0x06, SRLV, RDefB(RD), RUse(RT), RUse(RS));
|
||||
op(0x07, SRAV, RDefB(RD), RUse(RT), RUse(RS));
|
||||
op(0x08, JR, RUse(RS), Branch);
|
||||
op(0x09, JALR, RDefB(RD), RUse(RS), Branch);
|
||||
op(0x0a, INVALID);
|
||||
op(0x0b, INVALID);
|
||||
op(0x0c, INVALID); //SYSCALL
|
||||
op(0x0d, BREAK, Branch);
|
||||
op(0x0e, INVALID);
|
||||
op(0x0f, INVALID); //SYNC
|
||||
op(0x10, INVALID); //MFHI
|
||||
op(0x11, INVALID); //MTHI
|
||||
op(0x12, INVALID); //MFLO
|
||||
op(0x13, INVALID); //MTLO
|
||||
op(0x14, INVALID); //DSLLV
|
||||
op(0x15, INVALID);
|
||||
op(0x16, INVALID); //DSRLV
|
||||
op(0x17, INVALID); //DSRAV
|
||||
op(0x18, INVALID); //MULT
|
||||
op(0x19, INVALID); //MULTU
|
||||
op(0x1a, INVALID); //DIV
|
||||
op(0x1b, INVALID); //DIVU
|
||||
op(0x1c, INVALID); //DMULT
|
||||
op(0x1d, INVALID); //DMULTU
|
||||
op(0x1e, INVALID); //DDIV
|
||||
op(0x1f, INVALID); //DDIVU
|
||||
op(0x20, ADDU, RDefB(RD), RUse(RS), RUse(RT)); //ADD
|
||||
op(0x21, ADDU, RDefB(RD), RUse(RS), RUse(RT));
|
||||
op(0x22, SUBU, RDefB(RD), RUse(RS), RUse(RT)); //SUB
|
||||
op(0x23, SUBU, RDefB(RD), RUse(RS), RUse(RT));
|
||||
op(0x24, AND, RDefB(RD), RUse(RS), RUse(RT));
|
||||
op(0x25, OR, RDefB(RD), RUse(RS), RUse(RT));
|
||||
op(0x26, XOR, RDefB(RD), RUse(RS), RUse(RT));
|
||||
op(0x27, NOR, RDefB(RD), RUse(RS), RUse(RT));
|
||||
op(0x28, INVALID);
|
||||
op(0x29, INVALID);
|
||||
op(0x2a, SLT, RDefB(RD), RUse(RS), RUse(RT));
|
||||
op(0x2b, SLTU, RDefB(RD), RUse(RS), RUse(RT));
|
||||
op(0x2c, INVALID); //DADD
|
||||
op(0x2d, INVALID); //DADDU
|
||||
op(0x2e, INVALID); //DSUB
|
||||
op(0x2f, INVALID); //DSUBU
|
||||
op(0x30, INVALID); //TGE
|
||||
op(0x31, INVALID); //TGEU
|
||||
op(0x32, INVALID); //TLT
|
||||
op(0x33, INVALID); //TLTU
|
||||
op(0x34, INVALID); //TEQ
|
||||
op(0x35, INVALID);
|
||||
op(0x36, INVALID); //TNE
|
||||
op(0x37, INVALID);
|
||||
op(0x38, INVALID); //DSLL
|
||||
op(0x39, INVALID);
|
||||
op(0x3a, INVALID); //DSRL
|
||||
op(0x3b, INVALID); //DSRA
|
||||
op(0x3c, INVALID); //DSLL32
|
||||
op(0x3d, INVALID);
|
||||
op(0x3e, INVALID); //DSRL32
|
||||
op(0x3f, INVALID); //DSRA32
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
auto RSP::decoderREGIMM(u32 instruction) const -> OpInfo {
|
||||
switch(instruction >> 16 & 0x1f) {
|
||||
op(0x00, BLTZ, RUse(RS), Branch);
|
||||
op(0x01, BGEZ, RUse(RS), Branch);
|
||||
op(0x02, INVALID); //BLTZL
|
||||
op(0x03, INVALID); //BGEZL
|
||||
op(0x04, INVALID);
|
||||
op(0x05, INVALID);
|
||||
op(0x06, INVALID);
|
||||
op(0x07, INVALID);
|
||||
op(0x08, INVALID); //TGEI
|
||||
op(0x09, INVALID); //TGEIU
|
||||
op(0x0a, INVALID); //TLTI
|
||||
op(0x0b, INVALID); //TLTIU
|
||||
op(0x0c, INVALID); //TEQI
|
||||
op(0x0d, INVALID);
|
||||
op(0x0e, INVALID); //TNEI
|
||||
op(0x0f, INVALID);
|
||||
op(0x10, BLTZAL, RUse(RS), Branch);
|
||||
op(0x11, BGEZAL, RUse(RS), Branch);
|
||||
op(0x12, INVALID); //BLTZALL
|
||||
op(0x13, INVALID); //BGEZALL
|
||||
op(0x14, INVALID);
|
||||
op(0x15, INVALID);
|
||||
op(0x16, INVALID);
|
||||
op(0x17, INVALID);
|
||||
op(0x18, INVALID);
|
||||
op(0x19, INVALID);
|
||||
op(0x1a, INVALID);
|
||||
op(0x1b, INVALID);
|
||||
op(0x1c, INVALID);
|
||||
op(0x1d, INVALID);
|
||||
op(0x1e, INVALID);
|
||||
op(0x1f, INVALID);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
auto RSP::decoderSCC(u32 instruction) const -> OpInfo {
|
||||
switch(instruction >> 21 & 0x1f) {
|
||||
op(0x00, MFC0, RDef(RT), Load, Store);
|
||||
op(0x01, INVALID); //DMFC0
|
||||
op(0x02, INVALID); //CFC0
|
||||
op(0x03, INVALID);
|
||||
op(0x04, MTC0, RUse(RT), Load, Store);
|
||||
op(0x05, INVALID); //DMTC0
|
||||
op(0x06, INVALID); //CTC0
|
||||
op(0x07, INVALID);
|
||||
op(0x08, INVALID); //BC0
|
||||
op(0x09, INVALID);
|
||||
op(0x0a, INVALID);
|
||||
op(0x0b, INVALID);
|
||||
op(0x0c, INVALID);
|
||||
op(0x0d, INVALID);
|
||||
op(0x0e, INVALID);
|
||||
op(0x0f, INVALID);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
auto RSP::decoderVU(u32 instruction) const -> OpInfo {
|
||||
switch(instruction >> 21 & 0x1f) {
|
||||
op(0x00, MFC2, RDef(RT), VUse(VS), Load, Store);
|
||||
op(0x01, INVALID); //DMFC2
|
||||
op(0x02, CFC2, RDef(RT), VCUse(RD), Load, Store);
|
||||
op(0x03, INVALID);
|
||||
op(0x04, MTC2, RUse(RT), VDef(VS), Load, Store, VNopGroup);
|
||||
op(0x05, INVALID); //DMTC2
|
||||
op(0x06, CTC2, RUse(RT), VCDef(RD), Load, Store);
|
||||
op(0x07, INVALID);
|
||||
op(0x08, INVALID); //BC2
|
||||
op(0x09, INVALID);
|
||||
op(0x0a, INVALID);
|
||||
op(0x0b, INVALID);
|
||||
op(0x0c, INVALID);
|
||||
op(0x0d, INVALID);
|
||||
op(0x0e, INVALID);
|
||||
op(0x0f, INVALID);
|
||||
}
|
||||
|
||||
switch(instruction & 0x3f) {
|
||||
op(0x00, VMULF, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x01, VMULU, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x02, VRNDP, VDef(VD), VUse(VT), Vector);
|
||||
op(0x03, VMULQ, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x04, VMUDL, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x05, VMUDM, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x06, VMUDN, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x07, VMUDH, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x08, VMACF, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x09, VMACU, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x0a, VRNDN, VDef(VD), VUse(VT), Vector);
|
||||
op(0x0b, VMACQ, VDef(VD), Vector);
|
||||
op(0x0c, VMADL, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x0d, VMADM, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x0e, VMADN, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x0f, VMADH, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x10, VADD, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
|
||||
op(0x11, VSUB, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
|
||||
op(0x12, VZERO, Vector); //VSUT
|
||||
op(0x13, VABS, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
|
||||
op(0x14, VADDC, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
|
||||
op(0x15, VSUBC, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), Vector);
|
||||
op(0x16, VZERO, Vector); //VADDB
|
||||
op(0x17, VZERO, Vector); //VSUBB
|
||||
op(0x18, VZERO, Vector); //VACCB
|
||||
op(0x19, VZERO, Vector); //VSUCB
|
||||
op(0x1a, VZERO, Vector); //VSAD
|
||||
op(0x1b, VZERO, Vector); //VSAC
|
||||
op(0x1c, VZERO, Vector); //VSUM
|
||||
op(0x1d, VSAR, VDef(VD), Vector);
|
||||
op(0x1e, VZERO, Vector);
|
||||
op(0x1f, VZERO, Vector);
|
||||
op(0x20, VLT, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
|
||||
op(0x21, VEQ, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
|
||||
op(0x22, VNE, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
|
||||
op(0x23, VGE, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
|
||||
op(0x24, VCL, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), VCRef(VCE), Vector);
|
||||
op(0x25, VCH, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), VCRef(VCE), Vector);
|
||||
op(0x26, VCR, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), VCRef(VCE), Vector);
|
||||
op(0x27, VMRG, VDef(VD), VUse(VS), VUse(VT), VCRef(VCO), VCRef(VCC), Vector);
|
||||
op(0x28, VAND, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x29, VNAND, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x2a, VOR, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x2b, VNOR, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x2c, VXOR, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x2d, VNXOR, VDef(VD), VUse(VS), VUse(VT), Vector);
|
||||
op(0x2e, VZERO, Vector);
|
||||
op(0x2f, VZERO, Vector);
|
||||
op(0x30, VRCP, VDef(VD), VFRef(VS), VUse(VT), Vector);
|
||||
op(0x31, VRCPL, VDef(VD), VFRef(VS), VUse(VT), Vector);
|
||||
op(0x32, VRCPH, VDef(VD), VFRef(VS), VUse(VT), Vector);
|
||||
op(0x33, VMOV, VDef(VD), VFRef(VS), VUse(VT), Vector);
|
||||
op(0x34, VRSQ, VDef(VD), VFRef(VS), VUse(VT), Vector);
|
||||
op(0x35, VRSQL, VDef(VD), VFRef(VS), VUse(VT), Vector);
|
||||
op(0x36, VRSQH, VDef(VD), VFRef(VS), VUse(VT), Vector);
|
||||
op(0x37, VNOP, VFRef(VD), Vector, VNopGroup);
|
||||
op(0x38, VZERO, Vector); //VEXTT
|
||||
op(0x39, VZERO, Vector); //VEXTQ
|
||||
op(0x3a, VZERO, Vector); //VEXTN
|
||||
op(0x3b, VZERO, Vector);
|
||||
op(0x3c, VZERO, Vector); //VINST
|
||||
op(0x3d, VZERO, Vector); //VINSQ
|
||||
op(0x3e, VZERO, Vector); //VINSN
|
||||
op(0x3f, VNOP, Vector); //VNULL
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
auto RSP::decoderLWC2(u32 instruction) const -> OpInfo {
|
||||
switch(instruction >> 11 & 0x1f) {
|
||||
op(0x00, LBV, VDef(VT), RUse(RS), Load);
|
||||
op(0x01, LSV, VDef(VT), RUse(RS), Load);
|
||||
op(0x02, LLV, VDef(VT), RUse(RS), Load);
|
||||
op(0x03, LDV, VDef(VT), RUse(RS), Load);
|
||||
op(0x04, LQV, VDef(VT), RUse(RS), Load);
|
||||
op(0x05, LRV, VDef(VT), RUse(RS), Load);
|
||||
op(0x06, LPV, VDef(VT), RUse(RS), Load);
|
||||
op(0x07, LUV, VDef(VT), RUse(RS), Load);
|
||||
op(0x08, LHV, VDef(VT), RUse(RS), Load);
|
||||
op(0x09, LFV, VDef(VT), RUse(RS), Load);
|
||||
//op(0x0a, LWV, VDef(VT), RUse(RS), Load); //not present on N64 RSP
|
||||
op(0x0b, LTV, VGDef(VT), RUse(RS), Load, VNopGroup);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
auto RSP::decoderSWC2(u32 instruction) const -> OpInfo {
|
||||
switch(instruction >> 11 & 0x1f) {
|
||||
op(0x00, SBV, VUse(VT), RUse(RS), Store);
|
||||
op(0x01, SSV, VUse(VT), RUse(RS), Store);
|
||||
op(0x02, SLV, VUse(VT), RUse(RS), Store);
|
||||
op(0x03, SDV, VUse(VT), RUse(RS), Store);
|
||||
op(0x04, SQV, VUse(VT), RUse(RS), Store);
|
||||
op(0x05, SRV, VUse(VT), RUse(RS), Store);
|
||||
op(0x06, SPV, VUse(VT), RUse(RS), Store);
|
||||
op(0x07, SUV, VUse(VT), RUse(RS), Store);
|
||||
op(0x08, SHV, VUse(VT), RUse(RS), Store);
|
||||
op(0x09, SFV, VUse(VT), RUse(RS), Store);
|
||||
op(0x0a, SWV, VUse(VT), RUse(RS), Store);
|
||||
op(0x0b, STV, VGUse(VT), RUse(RS), Store);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
#undef RUse
|
||||
#undef RDef
|
||||
#undef RDefB
|
||||
#undef VUse
|
||||
#undef VDef
|
||||
#undef VGUse
|
||||
#undef VGDef
|
||||
#undef VCUse
|
||||
#undef VCDef
|
||||
#undef VCRef
|
||||
#undef VFRef
|
||||
#undef Load
|
||||
#undef Store
|
||||
#undef Branch
|
||||
#undef Vector
|
||||
#undef VNopGroup
|
||||
|
||||
#undef VCO
|
||||
#undef VCC
|
||||
#undef VCE
|
||||
|
||||
#undef RD
|
||||
#undef RT
|
||||
#undef RS
|
||||
#undef VD
|
||||
#undef VS
|
||||
#undef VT
|
||||
|
||||
#undef jp
|
||||
#undef op
|
|
@ -490,10 +490,10 @@ auto RSP::Disassembler::sccRegisterName(u32 index) const -> string {
|
|||
}
|
||||
|
||||
auto RSP::Disassembler::sccRegisterValue(u32 index) const -> string {
|
||||
u32 value = 0; u32 cycles;
|
||||
if(index <= 6) value = rsp.readWord((index & 7) << 2, cycles);
|
||||
u32 value = 0; Thread thread;
|
||||
if(index <= 6) value = rsp.readWord((index & 7) << 2, thread);
|
||||
if(index == 7) value = self.status.semaphore; //rsp.readSCC(7) has side-effects
|
||||
if(index >= 8) value = rdp.readWord((index & 7) << 2, cycles);
|
||||
if(index >= 8) value = rdp.readWord((index & 7) << 2, thread);
|
||||
if(showValues) return {sccRegisterName(index), hint("{$", hex(value, 8L), "}")};
|
||||
return sccRegisterName(index);
|
||||
}
|
||||
|
|
|
@ -1,13 +1,9 @@
|
|||
auto RSP::MFC0(r32& rt, u8 rd) -> void {
|
||||
u32 cycles = 0;
|
||||
if((rd & 8) == 0) rt.u32 = Nintendo64::rsp.ioRead ((rd & 7) << 2);
|
||||
if((rd & 8) != 0) rt.u32 = Nintendo64::rdp.readWord((rd & 7) << 2, cycles);
|
||||
step(cycles);
|
||||
if((rd & 8) != 0) rt.u32 = Nintendo64::rdp.readWord((rd & 7) << 2, *this);
|
||||
}
|
||||
|
||||
auto RSP::MTC0(cr32& rt, u8 rd) -> void {
|
||||
u32 cycles = 0;
|
||||
if((rd & 8) == 0) Nintendo64::rsp.ioWrite ((rd & 7) << 2, rt.u32);
|
||||
if((rd & 8) != 0) Nintendo64::rdp.writeWord((rd & 7) << 2, rt.u32, cycles);
|
||||
step(cycles);
|
||||
if((rd & 8) != 0) Nintendo64::rdp.writeWord((rd & 7) << 2, rt.u32, *this);
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#define VS vpu.r[VSn]
|
||||
#define VT vpu.r[VTn]
|
||||
|
||||
#define jp(id, name, ...) case id: return decoder##name(__VA_ARGS__)
|
||||
#define jp(id, name, ...) case id: return interpreter##name(__VA_ARGS__)
|
||||
#define op(id, name, ...) case id: return name(__VA_ARGS__)
|
||||
#define br(id, name, ...) case id: return name(__VA_ARGS__)
|
||||
#define vu(id, name, ...) case id: \
|
||||
|
@ -41,7 +41,7 @@
|
|||
#define IMMu16 u16(OP)
|
||||
#define IMMu26 (OP & 0x03ff'ffff)
|
||||
|
||||
auto RSP::decoderEXECUTE() -> void {
|
||||
auto RSP::interpreterEXECUTE() -> void {
|
||||
switch(OP >> 26) {
|
||||
jp(0x00, SPECIAL);
|
||||
jp(0x01, REGIMM);
|
||||
|
@ -110,7 +110,7 @@ auto RSP::decoderEXECUTE() -> void {
|
|||
}
|
||||
}
|
||||
|
||||
auto RSP::decoderSPECIAL() -> void {
|
||||
auto RSP::interpreterSPECIAL() -> void {
|
||||
switch(OP & 0x3f) {
|
||||
op(0x00, SLL, RD, RT, SA);
|
||||
op(0x01, INVALID);
|
||||
|
@ -179,7 +179,7 @@ auto RSP::decoderSPECIAL() -> void {
|
|||
}
|
||||
}
|
||||
|
||||
auto RSP::decoderREGIMM() -> void {
|
||||
auto RSP::interpreterREGIMM() -> void {
|
||||
switch(OP >> 16 & 0x1f) {
|
||||
br(0x00, BLTZ, RS, IMMi16);
|
||||
br(0x01, BGEZ, RS, IMMi16);
|
||||
|
@ -216,7 +216,7 @@ auto RSP::decoderREGIMM() -> void {
|
|||
}
|
||||
}
|
||||
|
||||
auto RSP::decoderSCC() -> void {
|
||||
auto RSP::interpreterSCC() -> void {
|
||||
switch(OP >> 21 & 0x1f) {
|
||||
op(0x00, MFC0, RT, RDn);
|
||||
op(0x01, INVALID); //DMFC0
|
||||
|
@ -237,7 +237,7 @@ auto RSP::decoderSCC() -> void {
|
|||
}
|
||||
}
|
||||
|
||||
auto RSP::decoderVU() -> void {
|
||||
auto RSP::interpreterVU() -> void {
|
||||
#define E (OP >> 7 & 15)
|
||||
switch(OP >> 21 & 0x1f) {
|
||||
vu(0x00, MFC2, RT, VS);
|
||||
|
@ -331,7 +331,7 @@ auto RSP::decoderVU() -> void {
|
|||
#undef DE
|
||||
}
|
||||
|
||||
auto RSP::decoderLWC2() -> void {
|
||||
auto RSP::interpreterLWC2() -> void {
|
||||
#define E (OP >> 7 & 15)
|
||||
#define IMMi7 i7(OP)
|
||||
switch(OP >> 11 & 0x1f) {
|
||||
|
@ -352,7 +352,7 @@ auto RSP::decoderLWC2() -> void {
|
|||
#undef IMMi7
|
||||
}
|
||||
|
||||
auto RSP::decoderSWC2() -> void {
|
||||
auto RSP::interpreterSWC2() -> void {
|
||||
#define E (OP >> 7 & 15)
|
||||
#define IMMi7 i7(OP)
|
||||
switch(OP >> 11 & 0x1f) {
|
||||
|
@ -390,6 +390,7 @@ auto RSP::INVALID() -> void {
|
|||
#undef jp
|
||||
#undef op
|
||||
#undef br
|
||||
#undef vu
|
||||
|
||||
#undef OP
|
||||
#undef RD
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
auto RSP::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto RSP::readWord(u32 address, Thread& thread) -> u32 {
|
||||
if(address <= 0x0403'ffff) {
|
||||
if(address & 0x1000) return imem.read<Word>(address);
|
||||
else return dmem.read<Word>(address);
|
||||
|
@ -67,7 +67,7 @@ auto RSP::ioRead(u32 address) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto RSP::writeWord(u32 address, u32 data, u32& cycles) -> void {
|
||||
auto RSP::writeWord(u32 address, u32 data, Thread& thread) -> void {
|
||||
if(address <= 0x0403'ffff) {
|
||||
if(address & 0x1000) return recompiler.invalidate(address & 0xfff), imem.write<Word>(address, data);
|
||||
else return dmem.write<Word>(address, data);
|
||||
|
@ -156,7 +156,7 @@ auto RSP::ioWrite(u32 address, u32 data_) -> void {
|
|||
debugger.ioSCC(Write, address, data);
|
||||
}
|
||||
|
||||
auto RSP::Status::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto RSP::Status::readWord(u32 address, Thread& thread) -> u32 {
|
||||
address = (address & 0x7ffff) >> 2;
|
||||
n32 data;
|
||||
|
||||
|
@ -177,7 +177,7 @@ auto RSP::Status::readWord(u32 address, u32& cycles) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto RSP::Status::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
||||
auto RSP::Status::writeWord(u32 address, u32 data_, Thread& thread) -> void {
|
||||
address = (address & 0x7ffff) >> 2;
|
||||
n32 data = data_;
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ auto RSP::Recompiler::block(u12 address) -> Block* {
|
|||
|
||||
auto size = measure(address);
|
||||
auto hashcode = hash(address, size);
|
||||
hashcode ^= self.pipeline.hash();
|
||||
|
||||
BlockHashPair pair;
|
||||
pair.hashcode = hashcode;
|
||||
|
@ -66,14 +67,35 @@ auto RSP::Recompiler::emit(u12 address) -> Block* {
|
|||
reset();
|
||||
}
|
||||
|
||||
pipeline = self.pipeline;
|
||||
|
||||
auto block = (Block*)allocator.acquire(sizeof(Block));
|
||||
beginFunction(3);
|
||||
|
||||
u12 start = address;
|
||||
bool hasBranched = 0;
|
||||
while(true) {
|
||||
pipeline.begin();
|
||||
u32 instruction = self.imem.read<Word>(address);
|
||||
OpInfo op0 = self.decoderEXECUTE(instruction);
|
||||
pipeline.issue(op0);
|
||||
bool branched = emitEXECUTE(instruction);
|
||||
|
||||
if(!pipeline.singleIssue && !branched && u12(address + 4) != start) {
|
||||
u32 instruction = self.imem.read<Word>(address + 4);
|
||||
OpInfo op1 = self.decoderEXECUTE(instruction);
|
||||
|
||||
if(RSP::canDualIssue(op0, op1)) {
|
||||
mov32(reg(1), imm(0));
|
||||
call(&RSP::instructionEpilogue);
|
||||
address += 4;
|
||||
pipeline.issue(op1);
|
||||
branched = emitEXECUTE(instruction);
|
||||
}
|
||||
}
|
||||
|
||||
pipeline.end();
|
||||
mov32(reg(1), imm(pipeline.clocks));
|
||||
call(&RSP::instructionEpilogue);
|
||||
address += 4;
|
||||
if(hasBranched || address == start) break;
|
||||
|
@ -82,9 +104,13 @@ auto RSP::Recompiler::emit(u12 address) -> Block* {
|
|||
}
|
||||
jumpEpilog();
|
||||
|
||||
//reset clocks to zero every time block is executed
|
||||
pipeline.clocks = 0;
|
||||
|
||||
memory::jitprotect(false);
|
||||
block->code = endFunction();
|
||||
block->size = address - start;
|
||||
block->pipeline = pipeline;
|
||||
|
||||
//print(hex(PC, 8L), " ", instructions, " ", size(), "\n");
|
||||
return block;
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
namespace ares::Nintendo64 {
|
||||
|
||||
RSP rsp;
|
||||
#include "decoder.cpp"
|
||||
#include "dma.cpp"
|
||||
#include "io.cpp"
|
||||
#include "interpreter.cpp"
|
||||
|
@ -29,12 +30,10 @@ auto RSP::unload() -> void {
|
|||
}
|
||||
|
||||
auto RSP::main() -> void {
|
||||
if(status.halted) return step(128);
|
||||
instruction();
|
||||
}
|
||||
|
||||
auto RSP::step(u32 clocks) -> void {
|
||||
Thread::clock += clocks;
|
||||
while(Thread::clock < 0) {
|
||||
if(status.halted) return step(128);
|
||||
instruction();
|
||||
}
|
||||
}
|
||||
|
||||
auto RSP::instruction() -> void {
|
||||
|
@ -44,18 +43,40 @@ auto RSP::instruction() -> void {
|
|||
}
|
||||
|
||||
if constexpr(Accuracy::RSP::Interpreter) {
|
||||
pipeline.begin();
|
||||
pipeline.address = ipu.pc;
|
||||
pipeline.instruction = imem.read<Word>(pipeline.address);
|
||||
OpInfo op0 = decoderEXECUTE(pipeline.instruction);
|
||||
pipeline.issue(op0);
|
||||
debugger.instruction();
|
||||
decoderEXECUTE();
|
||||
instructionEpilogue();
|
||||
step(3);
|
||||
interpreterEXECUTE();
|
||||
|
||||
if(!pipeline.singleIssue && !op0.branch()) {
|
||||
u32 instruction = imem.read<Word>(ipu.pc + 4);
|
||||
OpInfo op1 = decoderEXECUTE(instruction);
|
||||
|
||||
if(canDualIssue(op0, op1)) {
|
||||
instructionEpilogue(0);
|
||||
pipeline.address = ipu.pc;
|
||||
pipeline.instruction = instruction;
|
||||
pipeline.issue(op1);
|
||||
debugger.instruction();
|
||||
interpreterEXECUTE();
|
||||
}
|
||||
}
|
||||
|
||||
pipeline.end();
|
||||
instructionEpilogue(0);
|
||||
}
|
||||
|
||||
//this handles all stepping for the interpreter
|
||||
//with the recompiler, it only steps for taken branch stalls
|
||||
step(pipeline.clocks);
|
||||
}
|
||||
|
||||
auto RSP::instructionEpilogue() -> s32 {
|
||||
auto RSP::instructionEpilogue(u32 clocks) -> s32 {
|
||||
if constexpr(Accuracy::RSP::Recompiler) {
|
||||
step(3);
|
||||
step(clocks);
|
||||
}
|
||||
|
||||
ipu.r[0].u32 = 0;
|
||||
|
@ -63,7 +84,12 @@ auto RSP::instructionEpilogue() -> s32 {
|
|||
switch(branch.state) {
|
||||
case Branch::Step: ipu.pc += 4; return status.halted;
|
||||
case Branch::Take: ipu.pc += 4; branch.delaySlot(); return status.halted;
|
||||
case Branch::DelaySlot: ipu.pc = branch.pc; branch.reset(); return 1;
|
||||
case Branch::DelaySlot:
|
||||
ipu.pc = branch.pc;
|
||||
branch.reset();
|
||||
pipeline.stall();
|
||||
if(branch.pc & 4) pipeline.singleIssue = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
unreachable;
|
||||
|
@ -116,7 +142,9 @@ auto RSP::power(bool reset) -> void {
|
|||
|
||||
if constexpr(Accuracy::RSP::Recompiler) {
|
||||
auto buffer = ares::Memory::FixedAllocator::get().tryAcquire(4_MiB);
|
||||
memory::jitprotect(false);
|
||||
recompiler.allocator.resize(4_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer);
|
||||
memory::jitprotect(true);
|
||||
recompiler.reset();
|
||||
}
|
||||
|
||||
|
|
|
@ -30,16 +30,132 @@ struct RSP : Thread, Memory::RCP<RSP> {
|
|||
auto unload() -> void;
|
||||
|
||||
auto main() -> void;
|
||||
auto step(u32 clocks) -> void;
|
||||
|
||||
auto instruction() -> void;
|
||||
auto instructionEpilogue() -> s32;
|
||||
auto instructionEpilogue(u32 clocks) -> s32;
|
||||
|
||||
auto power(bool reset) -> void;
|
||||
|
||||
struct OpInfo {
|
||||
enum : u32 {
|
||||
Load = 1 << 0,
|
||||
Store = 1 << 1,
|
||||
Branch = 1 << 2,
|
||||
Vector = 1 << 3,
|
||||
VNopGroup = 1 << 4, //dual issue conflicts with VNOP
|
||||
};
|
||||
|
||||
u32 flags;
|
||||
u32 vfake; //only affects dual issue logic
|
||||
struct {
|
||||
u32 use, def;
|
||||
} r, v, vc;
|
||||
|
||||
auto load() const -> bool { return flags & Load; }
|
||||
auto store() const -> bool { return flags & Store; }
|
||||
auto branch() const -> bool { return flags & Branch; }
|
||||
auto vector() const -> bool { return flags & Vector; }
|
||||
};
|
||||
|
||||
static auto canDualIssue(const OpInfo& op0, const OpInfo& op1) -> bool {
|
||||
return op0.vector() != op1.vector() //must be one SU and one VU
|
||||
&& !(op0.v.def & (op1.v.use | op1.v.def)) //second op cannot read/write vector registers written by the first
|
||||
&& !(op0.vc.def & (op1.vc.use | op1.vc.def)) //the same logic applies to vector control registers
|
||||
//certain instructions conflict due to "fake" uses from misinterpreted fields
|
||||
//such false conflicts only occur with VNOP if the preceding instruction is MTC2 or LTV
|
||||
&& !(((op0.flags | ~op1.flags) & OpInfo::VNopGroup) && (op0.v.def & op1.vfake));
|
||||
}
|
||||
|
||||
struct Pipeline {
|
||||
u32 address;
|
||||
u32 instruction;
|
||||
u32 clocks;
|
||||
u1 singleIssue;
|
||||
|
||||
struct Stage {
|
||||
u1 load;
|
||||
u32 rWrite;
|
||||
u32 vWrite;
|
||||
} previous[3];
|
||||
|
||||
struct : Stage {
|
||||
u1 store;
|
||||
u1 branch;
|
||||
u32 rRead;
|
||||
u32 vRead;
|
||||
} current;
|
||||
|
||||
auto hash() const -> u32 {
|
||||
Hash::CRC32 hash;
|
||||
hash.input(u8(singleIssue));
|
||||
for(auto& p : previous) {
|
||||
hash.input(u8(p.load));
|
||||
for(auto n : range(4)) hash.input(u8(p.rWrite >> n * 8));
|
||||
for(auto n : range(4)) hash.input(u8(p.vWrite >> n * 8));
|
||||
}
|
||||
return hash.value();
|
||||
}
|
||||
|
||||
auto begin() -> void {
|
||||
clocks = 0;
|
||||
}
|
||||
|
||||
auto end() -> void {
|
||||
readGPR(current.rRead);
|
||||
readVR(current.vRead);
|
||||
if(current.store) store();
|
||||
singleIssue = current.branch;
|
||||
|
||||
previous[2] = previous[1];
|
||||
previous[1] = previous[0];
|
||||
previous[0] = current;
|
||||
current = {};
|
||||
clocks += 3;
|
||||
}
|
||||
|
||||
auto stall() -> void {
|
||||
previous[2] = previous[1];
|
||||
previous[1] = previous[0];
|
||||
previous[0] = {};
|
||||
clocks += 3;
|
||||
}
|
||||
|
||||
auto issue(const OpInfo& op) -> void {
|
||||
current.rRead |= op.r.use;
|
||||
current.rWrite |= op.r.def & ~1; //zero register can't be written
|
||||
current.vRead |= op.v.use;
|
||||
current.vWrite |= op.v.def;
|
||||
current.load |= op.load();
|
||||
current.store |= op.store();
|
||||
current.branch |= op.branch();
|
||||
}
|
||||
|
||||
private:
|
||||
auto readGPR(u32 mask) -> Pipeline& {
|
||||
if(mask & previous[0].rWrite) {
|
||||
stall(), stall();
|
||||
} else if(mask & previous[1].rWrite) {
|
||||
stall();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
auto readVR(u32 mask) -> Pipeline& {
|
||||
if(mask & previous[0].vWrite) {
|
||||
stall(), stall(), stall();
|
||||
} else if(mask & previous[1].vWrite) {
|
||||
stall(), stall();
|
||||
} else if(mask & previous[2].vWrite) {
|
||||
stall();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
auto store() -> void {
|
||||
while(previous[1].load) {
|
||||
stall();
|
||||
}
|
||||
}
|
||||
} pipeline;
|
||||
|
||||
//dma.cpp
|
||||
|
@ -47,8 +163,8 @@ struct RSP : Thread, Memory::RCP<RSP> {
|
|||
auto dmaTransferStep() -> void;
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
auto ioRead(u32 address) -> u32;
|
||||
auto ioWrite(u32 address, u32 data) -> void;
|
||||
|
||||
|
@ -80,8 +196,8 @@ struct RSP : Thread, Memory::RCP<RSP> {
|
|||
Status(RSP& self) : self(self) {}
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
|
||||
n1 semaphore;
|
||||
n1 halted = 1;
|
||||
|
@ -316,13 +432,22 @@ struct RSP : Thread, Memory::RCP<RSP> {
|
|||
u16 inverseSquareRoots[512];
|
||||
|
||||
//decoder.cpp
|
||||
auto decoderEXECUTE() -> void;
|
||||
auto decoderSPECIAL() -> void;
|
||||
auto decoderREGIMM() -> void;
|
||||
auto decoderSCC() -> void;
|
||||
auto decoderVU() -> void;
|
||||
auto decoderLWC2() -> void;
|
||||
auto decoderSWC2() -> void;
|
||||
auto decoderEXECUTE(u32 instruction) const -> OpInfo;
|
||||
auto decoderSPECIAL(u32 instruction) const -> OpInfo;
|
||||
auto decoderREGIMM(u32 instruction) const -> OpInfo;
|
||||
auto decoderSCC(u32 instruction) const -> OpInfo;
|
||||
auto decoderVU(u32 instruction) const -> OpInfo;
|
||||
auto decoderLWC2(u32 instruction) const -> OpInfo;
|
||||
auto decoderSWC2(u32 instruction) const -> OpInfo;
|
||||
|
||||
//interpreter.cpp
|
||||
auto interpreterEXECUTE() -> void;
|
||||
auto interpreterSPECIAL() -> void;
|
||||
auto interpreterREGIMM() -> void;
|
||||
auto interpreterSCC() -> void;
|
||||
auto interpreterVU() -> void;
|
||||
auto interpreterLWC2() -> void;
|
||||
auto interpreterSWC2() -> void;
|
||||
|
||||
auto INVALID() -> void;
|
||||
|
||||
|
@ -333,11 +458,13 @@ struct RSP : Thread, Memory::RCP<RSP> {
|
|||
|
||||
struct Block {
|
||||
auto execute(RSP& self) -> void {
|
||||
self.pipeline = pipeline; //must be updated first so instructionEpilog() can handle taken branch
|
||||
((void (*)(RSP*, IPU*, VU*))code)(&self, &self.ipu, &self.vpu);
|
||||
}
|
||||
|
||||
u8* code;
|
||||
u12 size;
|
||||
Pipeline pipeline; //state at *end* of block excepting taken branch stall
|
||||
};
|
||||
|
||||
struct BlockHashPair {
|
||||
|
@ -385,6 +512,7 @@ struct RSP : Thread, Memory::RCP<RSP> {
|
|||
return s <= e ? smask & emask : smask | emask;
|
||||
}
|
||||
|
||||
Pipeline pipeline;
|
||||
bump_allocator allocator;
|
||||
array<Block*[1024]> context;
|
||||
hashset<BlockHashPair> blocks;
|
||||
|
|
|
@ -5,6 +5,12 @@ auto RSP::serialize(serializer& s) -> void {
|
|||
|
||||
s(pipeline.address);
|
||||
s(pipeline.instruction);
|
||||
s(pipeline.singleIssue);
|
||||
for(auto& p : pipeline.previous) {
|
||||
s(p.load);
|
||||
s(p.rWrite);
|
||||
s(p.vWrite);
|
||||
}
|
||||
|
||||
s(dma.pending);
|
||||
s(dma.current);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
auto SI::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto SI::readWord(u32 address, Thread& thread) -> u32 {
|
||||
if(address <= 0x048f'ffff) return ioRead(address);
|
||||
|
||||
if (unlikely(io.ioBusy)) {
|
||||
|
@ -54,7 +54,7 @@ auto SI::ioRead(u32 address) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto SI::writeWord(u32 address, u32 data, u32& cycles) -> void {
|
||||
auto SI::writeWord(u32 address, u32 data, Thread& thread) -> void {
|
||||
if(address <= 0x048f'ffff) return ioWrite(address, data);
|
||||
|
||||
if(io.ioBusy) return;
|
||||
|
|
|
@ -25,8 +25,8 @@ struct SI : Memory::RCP<SI> {
|
|||
//io.cpp
|
||||
auto ioRead(u32 address) -> u32;
|
||||
auto ioWrite(u32 address, u32 data) -> void;
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
auto writeFinished() -> void;
|
||||
auto writeForceFinish() -> void;
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
static const string SerializerVersion = "v131";
|
||||
static const string SerializerVersion = "v133.1";
|
||||
|
||||
auto System::serialize(bool synchronize) -> serializer {
|
||||
serializer s;
|
||||
|
|
|
@ -18,6 +18,17 @@ auto load(Node::System& node, string name) -> bool {
|
|||
}
|
||||
|
||||
auto option(string name, string value) -> bool {
|
||||
#if defined(VULKAN)
|
||||
if(name == "Enable GPU acceleration") vulkan.enable = value.boolean();
|
||||
if(name == "Quality" && value == "SD" ) vulkan.internalUpscale = 1;
|
||||
if(name == "Quality" && value == "HD" ) vulkan.internalUpscale = 2;
|
||||
if(name == "Quality" && value == "UHD") vulkan.internalUpscale = 4;
|
||||
if(name == "Supersampling") vulkan.supersampleScanout = value.boolean();
|
||||
if(name == "Disable Video Interface Processing") vulkan.disableVideoInterfaceProcessing = value.boolean();
|
||||
if(name == "Weave Deinterlacing") vulkan.weaveDeinterlacing = value.boolean();
|
||||
if(vulkan.internalUpscale == 1) vulkan.supersampleScanout = false;
|
||||
vulkan.outputUpscale = vulkan.supersampleScanout ? 1 : vulkan.internalUpscale;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -38,8 +49,7 @@ auto System::game() -> string {
|
|||
}
|
||||
|
||||
auto System::run() -> void {
|
||||
while(!vi.refreshed) cpu.main();
|
||||
vi.refreshed = false;
|
||||
cpu.main();
|
||||
}
|
||||
|
||||
auto System::load(Node::System& root, string name) -> bool {
|
||||
|
@ -57,9 +67,11 @@ auto System::load(Node::System& root, string name) -> bool {
|
|||
|
||||
if(name.find("NTSC")) {
|
||||
information.region = Region::NTSC;
|
||||
information.videoFrequency = 48'681'812;
|
||||
}
|
||||
if(name.find("PAL")) {
|
||||
information.region = Region::PAL;
|
||||
information.videoFrequency = 49'656'530;
|
||||
}
|
||||
|
||||
node = Node::System::create(information.name);
|
||||
|
@ -90,6 +102,9 @@ auto System::load(Node::System& root, string name) -> bool {
|
|||
rsp.load(node);
|
||||
rdp.load(node);
|
||||
if(_DD()) dd.load(node);
|
||||
#if defined(VULKAN)
|
||||
vulkan.load(node);
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -97,6 +112,9 @@ auto System::unload() -> void {
|
|||
if(!node) return;
|
||||
save();
|
||||
if(vi.screen) vi.screen->quit(); //stop video thread
|
||||
#if defined(VULKAN)
|
||||
vulkan.unload();
|
||||
#endif
|
||||
cartridgeSlot.unload();
|
||||
controllerPort1.unload();
|
||||
controllerPort2.unload();
|
||||
|
|
|
@ -8,6 +8,7 @@ struct System {
|
|||
auto region() const -> Region { return information.region; }
|
||||
auto _DD() const -> bool { return information.dd; }
|
||||
auto frequency() const -> u32 { return information.frequency; }
|
||||
auto videoFrequency() const -> u32 { return information.videoFrequency; }
|
||||
|
||||
//system.cpp
|
||||
auto game() -> string;
|
||||
|
@ -26,6 +27,7 @@ private:
|
|||
string name = "Nintendo 64";
|
||||
Region region = Region::NTSC;
|
||||
u32 frequency = 93'750'000 * 2;
|
||||
u32 videoFrequency = 48'681'812;
|
||||
bool dd = false;
|
||||
} information;
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
auto VI::readWord(u32 address, u32& cycles) -> u32 {
|
||||
auto VI::readWord(u32 address, Thread& thread) -> u32 {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data;
|
||||
|
||||
|
@ -96,7 +96,7 @@ auto VI::readWord(u32 address, u32& cycles) -> u32 {
|
|||
return data;
|
||||
}
|
||||
|
||||
auto VI::writeWord(u32 address, u32 data_, u32& cycles) -> void {
|
||||
auto VI::writeWord(u32 address, u32 data_, Thread& thread) -> void {
|
||||
address = (address & 0xfffff) >> 2;
|
||||
n32 data = data_;
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ VI vi;
|
|||
|
||||
bool BobDeinterlace = false;
|
||||
bool FastVI = false;
|
||||
u32* OutFrameBuffer;
|
||||
|
||||
auto VI::load(Node::Object parent) -> void {
|
||||
node = parent->append<Node::Object>("VI");
|
||||
|
@ -19,6 +18,12 @@ auto VI::load(Node::Object parent) -> void {
|
|||
u32 width = 640;
|
||||
u32 height = 576;
|
||||
|
||||
#if defined(VULKAN)
|
||||
if (vulkan.enable) {
|
||||
width *= vulkan.outputUpscale;
|
||||
height *= vulkan.outputUpscale;
|
||||
}
|
||||
#endif
|
||||
screen = node->append<Node::Video::Screen>("Screen", width, height);
|
||||
screen->setRefresh({&VI::refresh, this});
|
||||
screen->colors((1 << 24) + (1 << 15), [&](n32 color) -> n64 {
|
||||
|
@ -36,8 +41,19 @@ auto VI::load(Node::Object parent) -> void {
|
|||
return a << 48 | r << 32 | g << 16 | b << 0;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
#if defined(VULKAN)
|
||||
if(vulkan.enable) {
|
||||
screen->setSize(vulkan.outputUpscale * 640, vulkan.outputUpscale * 480);
|
||||
if(!vulkan.supersampleScanout) {
|
||||
screen->setScale(1.0 / vulkan.outputUpscale, 1.0 / vulkan.outputUpscale);
|
||||
}
|
||||
} else {
|
||||
screen->setSize(640, 480);
|
||||
}
|
||||
#else
|
||||
screen->setSize(640, 480);
|
||||
#endif
|
||||
|
||||
debugger.load(node);
|
||||
}
|
||||
|
@ -50,30 +66,125 @@ auto VI::unload() -> void {
|
|||
}
|
||||
|
||||
auto VI::main() -> void {
|
||||
//field is not compared
|
||||
if(io.vcounter << 1 == io.coincidence) {
|
||||
mi.raise(MI::IRQ::VI);
|
||||
while(Thread::clock < 0) {
|
||||
if(++io.vcounter >= (Region::NTSC() ? 262 : 312) + io.field) {
|
||||
io.vcounter = 0;
|
||||
io.field = io.field + 1 & io.serrate;
|
||||
#if defined(VULKAN)
|
||||
if (vulkan.enable) {
|
||||
gpuOutputValid = vulkan.scanoutAsync(io.field);
|
||||
vulkan.frame();
|
||||
}
|
||||
#endif
|
||||
angrylion::UpdateScreen(FastVI);
|
||||
angrylion::FinalizeFrame(BobDeinterlace);
|
||||
refreshed = true;
|
||||
#if false
|
||||
screen->frame();
|
||||
#endif
|
||||
}
|
||||
|
||||
//field is not compared
|
||||
if(io.vcounter << 1 == io.coincidence) {
|
||||
mi.raise(MI::IRQ::VI);
|
||||
}
|
||||
|
||||
if(Region::NTSC()) step(system.frequency() / 60 / 262);
|
||||
if(Region::PAL ()) step(system.frequency() / 50 / 312);
|
||||
}
|
||||
|
||||
if(++io.vcounter >= (Region::NTSC() ? 262 : 312) + io.field) {
|
||||
io.vcounter = 0;
|
||||
io.field = io.field + 1 & io.serrate;
|
||||
|
||||
angrylion::UpdateScreen(FastVI);
|
||||
refresh();
|
||||
}
|
||||
|
||||
if(Region::NTSC()) step(system.frequency() / 60 / 262);
|
||||
if(Region::PAL ()) step(system.frequency() / 50 / 312);
|
||||
}
|
||||
|
||||
auto VI::step(u32 clocks) -> void {
|
||||
Thread::clock += clocks;
|
||||
}
|
||||
|
||||
auto VI::refresh() -> void {
|
||||
angrylion::FinalizeFrame(BobDeinterlace);
|
||||
refreshed = true;
|
||||
#if defined(VULKAN)
|
||||
if(vulkan.enable && gpuOutputValid) {
|
||||
const u8* rgba = nullptr;
|
||||
u32 width = 0, height = 0;
|
||||
vulkan.mapScanoutRead(rgba, width, height);
|
||||
if(rgba) {
|
||||
screen->setViewport(0, 0, width, height);
|
||||
for(u32 y : range(height)) {
|
||||
u32 y_fix = y;
|
||||
// When weave interlacing is active, we need to fix the order of interleaved lines for the image output
|
||||
// but only when the VI is set to interlance and we don't use supersampling (causes severe bugs)
|
||||
// Otherwise proceed as normal
|
||||
if(io.serrate == 1 && vulkan.weaveDeinterlacing && !vulkan.supersampleScanout) y_fix = (y % 2 == 0)? y+1 : y-1; // Swap each even/odd line
|
||||
auto source = rgba + width * y_fix * sizeof(u32);
|
||||
auto target = screen->pixels(1).data() + y * vulkan.outputUpscale * 640;
|
||||
for(u32 x : range(width)) {
|
||||
target[x] = source[x * 4 + 0] << 16 | source[x * 4 + 1] << 8 | source[x * 4 + 2] << 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
screen->setViewport(0, 0, 1, 1);
|
||||
screen->pixels(1).data()[0] = 0;
|
||||
}
|
||||
vulkan.unmapScanoutRead();
|
||||
vulkan.endScanout();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if(io.serrate == 0) screen->setProgressive(0);
|
||||
if(io.serrate == 1) screen->setInterlace(!io.field);
|
||||
|
||||
u32 hscan_start = Region::NTSC() ? 108 : 128;
|
||||
u32 vscan_start = Region::NTSC() ? 34 : 44;
|
||||
u32 hscan_len = Region::NTSC() ? 640 : 640;
|
||||
u32 vscan_len = Region::NTSC() ? 480 : 576;
|
||||
u32 hscan_stop = hscan_start + hscan_len;
|
||||
u32 vscan_stop = vscan_start + vscan_len;
|
||||
screen->setViewport(0, 0, hscan_len, vscan_len);
|
||||
|
||||
i32 dy0 = vi.io.vstart;
|
||||
i32 dy1 = vi.io.vend; if (dy1 < dy0) dy1 = vscan_stop;
|
||||
i32 dx0 = vi.io.hstart;
|
||||
i32 dx1 = vi.io.hend;
|
||||
|
||||
dy0 = max(vscan_start, dy0);
|
||||
dy1 = min(vscan_stop, dy1);
|
||||
dx0 = max(hscan_start, dx0);
|
||||
dx1 = min(hscan_stop, dx1);
|
||||
|
||||
// Undocumented VI guard-band "hardware bug" (match parallel-RDP)
|
||||
if(dx0 >= hscan_start) dx0 += 8;
|
||||
if(dx1 < hscan_stop) dx1 -= 7;
|
||||
|
||||
u32 pitch = vi.io.width;
|
||||
if(vi.io.colorDepth == 2) {
|
||||
//15bpp
|
||||
u32 y0 = vi.io.ysubpixel + vi.io.yscale * (dy0 - vi.io.vstart);
|
||||
for(i32 dy = dy0; dy < dy1; dy++) {
|
||||
if(!io.serrate || (dy & 1) == !io.field) {
|
||||
u32 address = vi.io.dramAddress + (y0 >> 11) * pitch * 2;
|
||||
auto line = screen->pixels(1).data() + (dy - vscan_start) * hscan_len;
|
||||
u32 x0 = vi.io.xsubpixel + vi.io.xscale * (dx0 - vi.io.hstart);
|
||||
for(i32 dx = dx0; dx < dx1; dx++) {
|
||||
u16 data = rdram.ram.read<Half>(address + (x0 >> 10) * 2);
|
||||
line[dx - hscan_start] = 1 << 24 | data >> 1;
|
||||
x0 += vi.io.xscale;
|
||||
}
|
||||
}
|
||||
y0 += vi.io.yscale;
|
||||
}
|
||||
}
|
||||
|
||||
if(vi.io.colorDepth == 3) {
|
||||
//24bpp
|
||||
u32 y0 = vi.io.ysubpixel + vi.io.yscale * (dy0 - vi.io.vstart);
|
||||
for(i32 dy = dy0; dy < dy1; dy++) {
|
||||
if(!io.serrate || (dy & 1) == !io.field) {
|
||||
u32 address = vi.io.dramAddress + (y0 >> 11) * pitch * 4;
|
||||
auto line = screen->pixels(1).data() + (dy - vscan_start) * hscan_len;
|
||||
u32 x0 = vi.io.xsubpixel + vi.io.xscale * (dx0 - vi.io.hstart);
|
||||
for(i32 dx = dx0; dx < dx1; dx++) {
|
||||
u32 data = rdram.ram.read<Word>(address + (x0 >> 10) * 4);
|
||||
line[dx - hscan_start] = data >> 8;
|
||||
x0 += vi.io.xscale;
|
||||
}
|
||||
}
|
||||
y0 += vi.io.yscale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto VI::power(bool reset) -> void {
|
||||
|
@ -81,6 +192,10 @@ auto VI::power(bool reset) -> void {
|
|||
screen->power();
|
||||
io = {};
|
||||
refreshed = false;
|
||||
|
||||
#if defined(VULKAN)
|
||||
gpuOutputValid = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,13 +19,12 @@ struct VI : Thread, Memory::RCP<VI> {
|
|||
auto unload() -> void;
|
||||
|
||||
auto main() -> void;
|
||||
auto step(u32 clocks) -> void;
|
||||
auto refresh() -> void;
|
||||
auto power(bool reset) -> void;
|
||||
|
||||
//io.cpp
|
||||
auto readWord(u32 address, u32& cycles) -> u32;
|
||||
auto writeWord(u32 address, u32 data, u32& cycles) -> void;
|
||||
auto readWord(u32 address, Thread& thread) -> u32;
|
||||
auto writeWord(u32 address, u32 data, Thread& thread) -> void;
|
||||
|
||||
//serialization.cpp
|
||||
auto serialize(serializer&) -> void;
|
||||
|
|
Loading…
Reference in New Issue