merge aresv128

This commit is contained in:
CasualPokePlayer 2022-05-26 22:26:56 -07:00
parent 48cfc79885
commit 009bff30d7
117 changed files with 14467 additions and 6568 deletions

Binary file not shown.

View File

@ -8,7 +8,7 @@ using BizHawk.Emulation.Cores.Waterbox;
namespace BizHawk.Emulation.Cores.Consoles.Nintendo.Ares64
{
[PortedCore(CoreNames.Ares64, "ares team, Near", "v127", "https://ares-emulator.github.io/")]
[PortedCore(CoreNames.Ares64, "ares team, Near", "v128", "https://ares-emulator.github.io/")]
[ServiceNotApplicable(new[] { typeof(IDriveLight), })]
public partial class Ares64 : WaterboxCore, IRegionable
{

View File

@ -648,8 +648,8 @@ EXPORT void GetMemoryAreas(MemoryArea *m)
int i = 0;
ADD_MEMORY_DOMAIN(rdram.ram, "RDRAM", MEMORYAREA_FLAGS_PRIMARY);
ADD_MEMORY_DOMAIN(cartridge.rom, "ROM", 0);
ADD_MEMORY_DOMAIN(pi.rom, "PI ROM", 0);
ADD_MEMORY_DOMAIN(pi.ram, "PI RAM", 0);
ADD_MEMORY_DOMAIN(pif.rom, "PIF ROM", 0);
ADD_MEMORY_DOMAIN(pif.ram, "PIF RAM", 0);
ADD_MEMORY_DOMAIN(rsp.dmem, "RSP DMEM", 0);
ADD_MEMORY_DOMAIN(rsp.imem, "RSP IMEM", 0);
ADD_MEMORY_DOMAIN(cartridge.ram, "SRAM", MEMORYAREA_FLAGS_ONEFILLED | MEMORYAREA_FLAGS_SAVERAMMABLE);

View File

@ -32,6 +32,7 @@ SRCS_N64 = \
$(ARES_PATH)/n64/vi/vi.cpp \
$(ARES_PATH)/n64/ai/ai.cpp \
$(ARES_PATH)/n64/pi/pi.cpp \
$(ARES_PATH)/n64/pif/pif.cpp \
$(ARES_PATH)/n64/ri/ri.cpp \
$(ARES_PATH)/n64/si/si.cpp \
$(ARES_PATH)/n64/rdram/rdram.cpp \

View File

@ -76,3 +76,73 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------
----------------------------------------------------------------------
libchdr
Copyright Romain Tisserand
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the <organization> nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
----------------------------------------------------------------------
----------------------------------------------------------------------
LZMA SDK is placed in the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or distribute
the original LZMA SDK code, either in source code form or as a compiled binary,
for any purpose, commercial or non-commercial, and by any means.
----------------------------------------------------------------------
----------------------------------------------------------------------
zlib
(C) 1995-2017 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
If you use the zlib library in a product, we would appreciate *not* receiving
lengthy legal documents to sign. The sources are provided for free but without
warranty of any kind. The library has been entirely written by Jean-loup
Gailly and Mark Adler; it does not include third-party code.
If you redistribute modified sources, we would appreciate that you include in
the file ChangeLog history information documenting your changes. Please read
the FAQ for more information on the distribution of modified source versions.
----------------------------------------------------------------------

View File

@ -40,16 +40,16 @@ using namespace nall;
namespace ares {
static const string Name = "ares";
static const string Version = "126";
static const string Version = "128";
static const string Copyright = "ares team, Near";
static const string License = "ISC";
static const string LicenseURI = "https://opensource.org/licenses/ISC";
static const string Website = "ares-emulator.github.io";
static const string WebsiteURI = "https://ares-emulator.github.io";
static const string Website = "ares-emu.net";
static const string WebsiteURI = "https://ares-emu.net/";
//incremented only when serialization format changes
static const u32 SerializerSignature = 0x31545342; //"BST1" (little-endian)
static const string SerializerVersion = "125";
static const string SerializerVersion = "128";
namespace VFS {
using Pak = shared_pointer<vfs::directory>;

View File

@ -27,7 +27,7 @@ struct Instruction : Tracer {
for(auto& history : _history) history = ~0;
}
auto address(u32 address) -> bool {
auto address(u64 address) -> bool {
address &= (1ull << _addressBits) - 1; //mask upper bits of address
_address = address;
address >>= _addressMask; //clip unneeded alignment bits (to reduce _masks size)
@ -55,7 +55,7 @@ struct Instruction : Tracer {
//mark an already-executed address as not executed yet for trace masking.
//call when writing to executable RAM to support self-modifying code.
auto invalidate(u32 address) -> void {
auto invalidate(u64 address) -> void {
if(unlikely(_mask && updateMasks())) {
address &= (1ull << _addressBits) - 1;
address >>= _addressMask;

View File

@ -1,6 +1,10 @@
build := stable
nall.path := ../../../nall
include $(nall.path)/GNUmakefile
all:
sourcery resource.bml resource.cpp resource.hpp
clean:
rm resource.cpp
rm resource.hpp
$(call delete,resource.cpp)
$(call delete,resource.hpp)

View File

@ -7,7 +7,7 @@ struct Accuracy {
static constexpr bool Recompiler = !Interpreter;
//exceptions when the CPU accesses unaligned memory addresses
static constexpr bool AddressErrors = 0 | Reference;
static constexpr bool AddressErrors = 1 | Reference;
};
struct RSP {
@ -15,7 +15,7 @@ struct Accuracy {
static constexpr bool Recompiler = !Interpreter;
//VU instructions
static constexpr bool SISD = 0 | Reference | !Architecture::amd64;
static constexpr bool SISD = 0 | Reference | !Architecture::amd64 | !Architecture::sse41;
static constexpr bool SIMD = !SISD;
};

View File

@ -32,13 +32,15 @@ auto AI::main() -> void {
auto AI::sample() -> void {
if(io.dmaCount == 0) return stream->frame(0.0, 0.0);
io.dmaAddress[0].bit(13,23) += io.dmaAddressCarry;
auto data = rdram.ram.read<Word>(io.dmaAddress[0]);
auto left = s16(data >> 16);
auto right = s16(data >> 0);
stream->frame(left / 32768.0, right / 32768.0);
io.dmaAddress[0] += 4;
io.dmaLength [0] -= 4;
io.dmaAddress[0].bit(0,12) += 4;
io.dmaAddressCarry = io.dmaAddress[0].bit(0,12) == 0;
io.dmaLength[0] -= 4;
if(!io.dmaLength[0]) {
mi.raise(MI::IRQ::AI);
if(--io.dmaCount) {

View File

@ -36,6 +36,7 @@ struct AI : Thread, Memory::IO<AI> {
struct IO {
n1 dmaEnable;
n24 dmaAddress[2];
n1 dmaAddressCarry;
n18 dmaLength[2];
n2 dmaCount;
n14 dacRate;

View File

@ -6,6 +6,7 @@ auto AI::serialize(serializer& s) -> void {
s(io.dmaEnable);
s(io.dmaAddress);
s(io.dmaAddressCarry);
s(io.dmaLength);
s(io.dmaCount);
s(io.dacRate);

View File

@ -1,9 +1,9 @@
struct Cartridge {
Node::Peripheral node;
VFS::Pak pak;
Memory::Readable rom;
Memory::Writable ram;
Memory::Writable eeprom;
Memory::Readable16 rom;
Memory::Writable16 ram;
Memory::Writable16 eeprom;
struct Flash : Memory::Writable {
template<u32 Size>
auto read(u32 address) -> u64 {

View File

@ -3,6 +3,7 @@ struct Controller {
virtual ~Controller() = default;
virtual auto save() -> void {}
virtual auto comm(n8 send, n8 recv, n8 input[], n8 output[]) -> n2 { return 1; }
virtual auto read() -> n32 { return 0; }
virtual auto serialize(serializer&) -> void {}
};

View File

@ -101,6 +101,118 @@ auto Gamepad::rumble(bool enable) -> void {
platform->input(motor);
}
auto Gamepad::comm(n8 send, n8 recv, n8 input[], n8 output[]) -> n2 {
b1 valid = 0;
b1 over = 0;
//status
if(input[0] == 0x00 || input[0] == 0xff) {
output[0] = 0x05; //0x05 = gamepad; 0x02 = mouse
output[1] = 0x00;
output[2] = 0x02; //0x02 = nothing present in controller slot
if(ram || motor || transferPak) {
output[2] = 0x01; //0x01 = pak present
}
valid = 1;
}
//read controller state
if(input[0] == 0x01) {
u32 data = read();
output[0] = data >> 24;
output[1] = data >> 16;
output[2] = data >> 8;
output[3] = data >> 0;
if(recv <= 4) {
over = 0;
} else {
over = 1;
}
valid = 1;
}
//read pak
if(input[0] == 0x02 && send >= 3 && recv >= 1) {
//controller pak
if(ram) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(pif.addressCRC(address) == (n5)input[2]) {
for(u32 index : range(recv - 1)) {
output[index] = ram.read<Byte>(address++);
}
output[recv - 1] = pif.dataCRC({&output[0], recv - 1});
valid = 1;
}
}
//rumble pak
if(motor) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(pif.addressCRC(address) == (n5)input[2]) {
for(u32 index : range(recv - 1)) {
output[index] = 0x80;
}
output[recv - 1] = pif.dataCRC({&output[0], recv - 1});
valid = 1;
}
}
//transfer pak
if(transferPak) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(pif.addressCRC(address) == (n5)input[2]) {
for(u32 index : range(recv - 1)) {
output[index] = transferPak.read(address++);
}
output[recv - 1] = pif.dataCRC({&output[0], recv - 1});
valid = 1;
}
}
}
//write pak
if(input[0] == 0x03 && send >= 3 && recv >= 1) {
//controller pak
if(ram) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(pif.addressCRC(address) == (n5)input[2]) {
for(u32 index : range(send - 3)) {
ram.write<Byte>(address++, input[3 + index]);
}
output[0] = pif.dataCRC({&input[3], send - 3});
valid = 1;
}
}
//rumble pak
if(motor) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(pif.addressCRC(address) == (n5)input[2]) {
output[0] = pif.dataCRC({&input[3], send - 3});
valid = 1;
rumble(input[3] & 1);
}
}
//transfer pak
if(transferPak) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(pif.addressCRC(address) == (n5)input[2]) {
for(u32 index : range(send - 3)) {
transferPak.write(address++, input[3 + index]);
}
output[0] = pif.dataCRC({&input[3], send - 3});
valid = 1;
}
}
}
n2 status = 0;
status.bit(0) = valid;
status.bit(1) = over;
return status;
}
bool RestrictAnalogRange;
auto Gamepad::read() -> n32 {
@ -121,35 +233,6 @@ auto Gamepad::read() -> n32 {
platform->input(z);
platform->input(start);
/*
//scale {-32768 ... +32767} to {-84 ... +84}
auto ax = x->value() * 85.0 / 32767.0;
auto ay = y->value() * 85.0 / 32767.0;
//create scaled circular dead-zone in range {-15 ... +15}
auto length = sqrt(ax * ax + ay * ay);
if(length < 16.0) {
length = 0.0;
} else if(length > 85.0) {
length = 85.0 / length;
} else {
length = (length - 16.0) * 85.0 / 69.0 / length;
}
ax *= length;
ay *= length;
//bound diagonals to an octagonal range {-68 ... +68}
if(ax != 0.0 && ay != 0.0) {
auto slope = ay / ax;
auto edgex = copysign(85.0 / (abs(slope) + 16.0 / 69.0), ax);
auto edgey = copysign(min(abs(edgex * slope), 85.0 / (1.0 / abs(slope) + 16.0 / 69.0)), ay);
edgex = edgey / slope;
auto scale = sqrt(edgex * edgex + edgey * edgey) / 85.0;
ax *= scale;
ay *= scale;
}
*/
auto ax = x->value() * 1.0;
auto ay = y->value() * 1.0;
@ -158,14 +241,24 @@ auto Gamepad::read() -> n32 {
ax = ax * 85.0 / 127.0;
ay = ay * 85.0 / 127.0;
//create scaled circular dead-zone in range {-15 ... +15}
//create square dead-zone in range {-7 ... +7}
auto lengthAbsoluteX = abs (ax);
auto lengthAbsoluteY = abs (ay);
if (lengthAbsoluteX < 7.0) {
lengthAbsoluteX = 0.0;
ax *= lengthAbsoluteX;
}
if (lengthAbsoluteY < 7.0) {
lengthAbsoluteY = 0.0;
ay *= lengthAbsoluteY;
}
//create outer circular dead-zone in ranges {-inf ... -85} and {+85 ... +inf} and scale between the two dead-zones according to the two-dimensional length
auto length = sqrt(ax * ax + ay * ay);
if(length < 16.0) {
length = 0.0;
} else if(length > 85.0) {
if(length > 85.0) {
length = 85.0 / length;
} else {
length = (length - 16.0) * 85.0 / 69.0 / length;
length = (length - 7.0) * 85.0 / (85.0 - 7.0) / length;
}
ax *= length;
ay *= length;

View File

@ -32,6 +32,7 @@ struct Gamepad : Controller {
auto connect() -> void;
auto disconnect() -> void;
auto rumble(bool enable) -> void;
auto comm(n8 send, n8 recv, n8 input[], n8 output[]) -> n2 override;
auto read() -> n32 override;
auto formatControllerPak() -> void;
auto serialize(serializer&) -> void override;

View File

@ -7,6 +7,42 @@ Mouse::Mouse(Node::Port parent) {
lclick = node->append<Node::Input::Button>("Left Click");
}
Mouse::~Mouse() {
}
auto Mouse::comm(n8 send, n8 recv, n8 input[], n8 output[]) -> n2 {
b1 valid = 0;
b1 over = 0;
//status
if(input[0] == 0x00 || input[0] == 0xff) {
output[0] = 0x02; //0x05 = gamepad; 0x02 = mouse
output[1] = 0x00;
output[2] = 0x02; //0x02 = nothing present in controller slot
valid = 1;
}
//read controller state
if(input[0] == 0x01) {
u32 data = read();
output[0] = data >> 24;
output[1] = data >> 16;
output[2] = data >> 8;
output[3] = data >> 0;
if(recv <= 4) {
over = 0;
} else {
over = 1;
}
valid = 1;
}
n2 status = 0;
status.bit(0) = valid;
status.bit(1) = over;
return status;
}
auto Mouse::read() -> n32 {
platform->input(x);
platform->input(y);

View File

@ -5,5 +5,8 @@ struct Mouse : Controller {
Node::Input::Button lclick;
Mouse(Node::Port);
~Mouse();
auto comm(n8 send, n8 recv, n8 input[], n8 output[]) -> n2 override;
auto read() -> n32 override;
};

View File

@ -26,7 +26,7 @@ auto ControllerPort::save() -> void {
auto ControllerPort::allocate(string name) -> Node::Peripheral {
if(name == "Gamepad") device = new Gamepad(port);
if(name == "Mouse") device = new Mouse(port);
if(name == "Mouse" ) device = new Mouse(port);
if(device) return device->node;
return {};
}

View File

@ -52,9 +52,10 @@ auto CPU::synchronize() -> void {
queue.step(clocks, [](u32 event) {
switch(event) {
case Queue::RSP_DMA: return rsp.dmaTransfer();
case Queue::PI_DMA_Read: return pi.dmaRead();
case Queue::PI_DMA_Write: return pi.dmaWrite();
case Queue::RSP_DMA: return rsp.dmaTransferStep();
case Queue::PI_DMA_Read: return pi.dmaFinished();
case Queue::PI_DMA_Write: return pi.dmaFinished();
case Queue::PI_BUS_Write: return pi.writeFinished();
case Queue::SI_DMA_Read: return si.dmaRead();
case Queue::SI_DMA_Write: return si.dmaWrite();
}
@ -98,14 +99,12 @@ auto CPU::instructionEpilogue() -> s32 {
ipu.r[0].u64 = 0;
if(--scc.random.index < scc.wired.index) {
scc.random.index = 31;
}
switch(branch.state) {
case Branch::Step: ipu.pc += 4; return 0;
case Branch::Take: ipu.pc += 4; branch.delaySlot(); return 0;
case Branch::DelaySlot: ipu.pc = branch.pc; branch.reset(); return 1;
case Branch::Take: ipu.pc += 4; branch.delaySlot(true); return 0;
case Branch::NotTaken: ipu.pc += 4; branch.delaySlot(false); return 0;
case Branch::DelaySlotTaken: ipu.pc = branch.pc; branch.reset(); return 1;
case Branch::DelaySlotNotTaken: ipu.pc += 4; branch.reset(); return 0;
case Branch::Exception: branch.reset(); return 1;
case Branch::Discard: ipu.pc += 8; branch.reset(); return 1;
}
@ -124,13 +123,13 @@ auto CPU::power(bool reset) -> void {
for(auto& segment : context.segment) segment = Context::Segment::Unused;
icache.power(reset);
dcache.power(reset);
for(auto& entry : tlb.entry) entry = {};
for(auto& entry : tlb.entry) entry = {}, entry.synchronize();
tlb.physicalAddress = 0;
for(auto& r : ipu.r) r.u64 = 0;
ipu.lo.u64 = 0;
ipu.hi.u64 = 0;
ipu.r[29].u64 = u32(0xa400'1ff0); //stack pointer
ipu.pc = u32(0xbfc0'0000);
ipu.r[29].u64 = 0xffff'ffff'a400'1ff0ull; //stack pointer
ipu.pc = 0xffff'ffff'bfc0'0000ull;
scc = {};
for(auto& r : fpu.r) r.u64 = 0;
fpu.csr = {};

View File

@ -41,7 +41,7 @@ struct CPU : Thread {
auto power(bool reset) -> void;
struct Pipeline {
u32 address;
u64 address;
u32 instruction;
struct InstructionCache {
@ -61,12 +61,14 @@ struct CPU : Thread {
} pipeline;
struct Branch {
enum : u32 { Step, Take, DelaySlot, Exception, Discard };
enum : u32 { Step, Take, NotTaken, DelaySlotTaken, DelaySlotNotTaken, Exception, Discard };
auto inDelaySlot() const -> bool { return state == DelaySlot; }
auto inDelaySlot() const -> bool { return state == DelaySlotTaken || state == DelaySlotNotTaken; }
auto inDelaySlotTaken() const -> bool { return state == DelaySlotTaken; }
auto reset() -> void { state = Step; }
auto take(u32 address) -> void { state = Take; pc = address; }
auto delaySlot() -> void { state = DelaySlot; }
auto take(u64 address) -> void { state = Take; pc = address; }
auto notTaken() -> void { state = NotTaken; }
auto delaySlot(bool taken) -> void { state = taken ? DelaySlotTaken : DelaySlotNotTaken; }
auto exception() -> void { state = Exception; }
auto discard() -> void { state = Discard; }
@ -177,17 +179,16 @@ struct CPU : Thread {
n1 valid[2];
n1 dirty[2];
n3 cacheAlgorithm[2];
n32 physicalAddress[2];
n36 physicalAddress[2];
n32 pageMask;
n40 virtualAddress;
n8 addressSpaceID;
n2 region;
//internal:
n1 globals;
n32 addressMaskHi;
n32 addressMaskLo;
n32 addressSelect;
n40 addressCompare;
n40 addressMaskHi;
n40 addressMaskLo;
n40 addressSelect;
} entry[TLB::Entries];
u32 physicalAddress;
@ -207,6 +208,7 @@ struct CPU : Thread {
auto fetch(u64 address) -> u32;
template<u32 Size> auto read(u64 address) -> maybe<u64>;
template<u32 Size> auto write(u64 address, u64 data) -> bool;
auto addressException(u64 address) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;
@ -403,12 +405,7 @@ struct CPU : Thread {
n1 probeFailure;
} index;
//1
struct Random {
n5 index = 31;
n1 unused;
} random;
//1: Random
//2: EntryLo0
//3: EntryLo1
//5: PageMask
@ -423,8 +420,7 @@ struct CPU : Thread {
//6
struct Wired {
n5 index;
n1 unused;
n6 index;
} wired;
//8
@ -487,11 +483,11 @@ struct CPU : Thread {
n2 cu; //reserved
n1 bigEndian = 1;
n2 sysadWritebackPattern;
n2 systemClockRatio = 6;
n3 systemClockRatio = 7;
} configuration;
//17: Load Linked Address
n64 ll;
n32 ll;
n1 llbit;
//18
@ -531,6 +527,7 @@ struct CPU : Thread {
//interpreter-scc.cpp
auto getControlRegister(n5) -> u64;
auto setControlRegister(n5, n64) -> void;
auto getControlRandom() -> u8;
auto DMFC0(r64& rt, u8 rd) -> void;
auto DMTC0(cr64& rt, u8 rd) -> void;
@ -707,6 +704,11 @@ struct CPU : Thread {
auto invalidate(u32 address) -> void {
pools[address >> 8 & 0x1fffff] = nullptr;
}
auto invalidateRange(u32 address, u32 length) -> void {
for (u32 s = 0; s < length; s += 256)
invalidate(address + s);
invalidate(address + length - 1);
}
auto pool(u32 address) -> Pool*;
auto block(u32 address) -> Block*;

View File

@ -1,12 +1,12 @@
auto CPU::DataCache::Line::hit(u32 address) const -> bool {
return valid && tag == (address & ~0xfff);
return valid && tag == (address & ~0xe000'0fff);
}
template<u32 Size> auto CPU::DataCache::Line::fill(u32 address, u64 data) -> void {
cpu.step(40);
valid = 1;
dirty = 1;
tag = address & ~0xfff;
tag = address & ~0xe000'0fff;
//read words according to critical doubleword first scheme
switch(address & 8) {
case 0:
@ -34,7 +34,7 @@ auto CPU::DataCache::Line::fill(u32 address) -> void {
cpu.step(40);
valid = 1;
dirty = 0;
tag = address & ~0xfff;
tag = address & ~0xe000'0fff;
//read words according to critical doubleword first scheme
switch(address & 8) {
case 0:

View File

@ -1,6 +1,6 @@
auto CPU::Debugger::load(Node::Object parent) -> void {
tracer.instruction = parent->append<Node::Debugger::Tracer::Instruction>("Instruction", "CPU");
tracer.instruction->setAddressBits(32, 2);
tracer.instruction->setAddressBits(64, 2);
tracer.exception = parent->append<Node::Debugger::Tracer::Notification>("Exception", "CPU");
tracer.interrupt = parent->append<Node::Debugger::Tracer::Notification>("Interrupt", "CPU");
@ -16,7 +16,7 @@ auto CPU::Debugger::unload() -> void {
auto CPU::Debugger::instruction() -> void {
if(unlikely(tracer.instruction->enabled())) {
u32 address = cpu.pipeline.address;
u64 address = cpu.pipeline.address;
u32 instruction = cpu.pipeline.instruction;
if(tracer.instruction->address(address)) {
cpu.disassembler.showColors = 0;
@ -72,6 +72,7 @@ auto CPU::Debugger::tlbWrite(u32 index) -> void {
auto entry = cpu.tlb.entry[index & 31];
tracer.tlb->notify({"write: ", index, " {"});
tracer.tlb->notify({" global: ", entry.global[0], ",", entry.global[1]});
tracer.tlb->notify({" valid: ", entry.valid[0], ",", entry.valid[1]});
tracer.tlb->notify({" physical address: 0x", hex(entry.physicalAddress[0]), ",0x", hex(entry.physicalAddress[1])});
tracer.tlb->notify({" page mask: 0x", hex(entry.pageMask)});
tracer.tlb->notify({" virtual address: 0x", hex(entry.virtualAddress)});

View File

@ -1,11 +1,11 @@
auto CPU::InstructionCache::Line::hit(u32 address) const -> bool {
return valid && tag == (address & ~0xfff);
return valid && tag == (address & ~0xe000'0fff);
}
auto CPU::InstructionCache::Line::fill(u32 address) -> void {
cpu.step(48);
valid = 1;
tag = address & ~0xfff;
tag = address & ~0xe000'0fff;
words[0] = bus.read<Word>(tag | index | 0x00);
words[1] = bus.read<Word>(tag | index | 0x04);
words[2] = bus.read<Word>(tag | index | 0x08);
@ -42,7 +42,7 @@ auto CPU::InstructionCache::step(u32 address) -> void {
if(!line.hit(address)) {
cpu.step(48);
line.valid = 1;
line.tag = address & ~0xfff;
line.tag = address & ~0xe000'0fff;
} else {
cpu.step(2);
}

View File

@ -134,6 +134,7 @@ auto CPU::BC1(bool value, bool likely, s16 imm) -> void {
if(!scc.status.enable.coprocessor1) return exception.coprocessor1();
if(CF == value) branch.take(ipu.pc + 4 + (imm << 2));
else if(likely) branch.discard();
else branch.notTaken();
}
auto CPU::CFC1(r64& rt, u8 rd) -> void {
@ -554,12 +555,12 @@ auto CPU::FTRUNC_W_D(u8 fd, u8 fs) -> void {
auto CPU::LDC1(u8 ft, cr64& rs, s16 imm) -> void {
if(!scc.status.enable.coprocessor1) return exception.coprocessor1();
if(auto data = read<Dual>(rs.u32 + imm)) FT(u64) = *data;
if(auto data = read<Dual>(rs.u64 + imm)) FT(u64) = *data;
}
auto CPU::LWC1(u8 ft, cr64& rs, s16 imm) -> void {
if(!scc.status.enable.coprocessor1) return exception.coprocessor1();
if(auto data = read<Word>(rs.u32 + imm)) FT(u32) = *data;
if(auto data = read<Word>(rs.u64 + imm)) FT(u32) = *data;
}
auto CPU::MFC1(r64& rt, u8 fs) -> void {
@ -574,12 +575,12 @@ auto CPU::MTC1(cr64& rt, u8 fs) -> void {
auto CPU::SDC1(u8 ft, cr64& rs, s16 imm) -> void {
if(!scc.status.enable.coprocessor1) return exception.coprocessor1();
write<Dual>(rs.u32 + imm, FT(u64));
write<Dual>(rs.u64 + imm, FT(u64));
}
auto CPU::SWC1(u8 ft, cr64& rs, s16 imm) -> void {
if(!scc.status.enable.coprocessor1) return exception.coprocessor1();
write<Word>(rs.u32 + imm, FT(u32));
write<Word>(rs.u64 + imm, FT(u32));
}
#undef CF

View File

@ -31,6 +31,7 @@ auto CPU::ANDI(r64& rt, cr64& rs, u16 imm) -> void {
auto CPU::BEQ(cr64& rs, cr64& rt, s16 imm) -> void {
if(rs.u64 == rt.u64) branch.take(PC + 4 + (imm << 2));
else branch.notTaken();
}
auto CPU::BEQL(cr64& rs, cr64& rt, s16 imm) -> void {
@ -40,17 +41,20 @@ auto CPU::BEQL(cr64& rs, cr64& rt, s16 imm) -> void {
auto CPU::BGEZ(cr64& rs, s16 imm) -> void {
if(rs.s64 >= 0) branch.take(PC + 4 + (imm << 2));
else branch.notTaken();
}
auto CPU::BGEZAL(cr64& rs, s16 imm) -> void {
RA.u64 = s32(PC + 8);
bool inDelaySlot = branch.inDelaySlot();
if(rs.s64 >= 0) branch.take(PC + 4 + (imm << 2));
else branch.notTaken();
RA.u64 = s32(inDelaySlot ? branch.pc+4 : PC+8);
}
auto CPU::BGEZALL(cr64& rs, s16 imm) -> void {
RA.u64 = s32(PC + 8);
if(rs.s64 >= 0) branch.take(PC + 4 + (imm << 2));
else branch.discard();
RA.u64 = s32(PC + 8);
}
auto CPU::BGEZL(cr64& rs, s16 imm) -> void {
@ -60,6 +64,7 @@ auto CPU::BGEZL(cr64& rs, s16 imm) -> void {
auto CPU::BGTZ(cr64& rs, s16 imm) -> void {
if(rs.s64 > 0) branch.take(PC + 4 + (imm << 2));
else branch.notTaken();
}
auto CPU::BGTZL(cr64& rs, s16 imm) -> void {
@ -69,6 +74,7 @@ auto CPU::BGTZL(cr64& rs, s16 imm) -> void {
auto CPU::BLEZ(cr64& rs, s16 imm) -> void {
if(rs.s64 <= 0) branch.take(PC + 4 + (imm << 2));
else branch.notTaken();
}
auto CPU::BLEZL(cr64& rs, s16 imm) -> void {
@ -78,11 +84,13 @@ auto CPU::BLEZL(cr64& rs, s16 imm) -> void {
auto CPU::BLTZ(cr64& rs, s16 imm) -> void {
if(rs.s64 < 0) branch.take(PC + 4 + (imm << 2));
else branch.notTaken();
}
auto CPU::BLTZAL(cr64& rs, s16 imm) -> void {
RA.u64 = s32(PC + 8);
if(rs.s64 < 0) branch.take(PC + 4 + (imm << 2));
else branch.notTaken();
}
auto CPU::BLTZALL(cr64& rs, s16 imm) -> void {
@ -98,6 +106,7 @@ auto CPU::BLTZL(cr64& rs, s16 imm) -> void {
auto CPU::BNE(cr64& rs, cr64& rt, s16 imm) -> void {
if(rs.u64 != rt.u64) branch.take(PC + 4 + (imm << 2));
else branch.notTaken();
}
auto CPU::BNEL(cr64& rs, cr64& rt, s16 imm) -> void {
@ -111,6 +120,8 @@ auto CPU::BREAK() -> void {
auto CPU::CACHE(u8 operation, cr64& rs, s16 imm) -> void {
u32 address = rs.u64 + imm;
if (auto phys = devirtualize(address)) address = *phys;
else return;
switch(operation) {
@ -346,34 +357,39 @@ auto CPU::DSUBU(r64& rd, cr64& rs, cr64& rt) -> void {
}
auto CPU::J(u32 imm) -> void {
branch.take((PC + 4 & 0xf000'0000) | (imm << 2));
if (branch.inDelaySlotTaken()) return;
branch.take((PC + 4 & 0xffff'ffff'f000'0000) | (imm << 2));
}
auto CPU::JAL(u32 imm) -> void {
RA.u64 = s32(PC + 8);
branch.take((PC + 4 & 0xf000'0000) | (imm << 2));
RA.u64 = branch.inDelaySlotTaken() ? branch.pc+4 : PC+8;
if (!branch.inDelaySlotTaken()) branch.take((PC + 4 & 0xffff'ffff'f000'0000) | (imm << 2));
else if (!branch.inDelaySlot()) branch.notTaken();
}
auto CPU::JALR(r64& rd, cr64& rs) -> void {
rd.u64 = s32(PC + 8);
branch.take(rs.u32);
u64 tgt = rs.u64;
rd.u64 = branch.inDelaySlotTaken() ? branch.pc+4 : PC+8;
if (!branch.inDelaySlotTaken()) branch.take(tgt);
else if (!branch.inDelaySlot()) branch.notTaken();
}
auto CPU::JR(cr64& rs) -> void {
branch.take(rs.u32);
if (!branch.inDelaySlotTaken()) branch.take(rs.u64);
else if (!branch.inDelaySlot()) branch.notTaken();
}
auto CPU::LB(r64& rt, cr64& rs, s16 imm) -> void {
if(auto data = read<Byte>(rs.u32 + imm)) rt.u64 = s8(*data);
if(auto data = read<Byte>(rs.u64 + imm)) rt.u64 = s8(*data);
}
auto CPU::LBU(r64& rt, cr64& rs, s16 imm) -> void {
if(auto data = read<Byte>(rs.u32 + imm)) rt.u64 = u8(*data);
if(auto data = read<Byte>(rs.u64 + imm)) rt.u64 = u8(*data);
}
auto CPU::LD(r64& rt, cr64& rs, s16 imm) -> void {
if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction();
if(auto data = read<Dual>(rs.u32 + imm)) rt.u64 = *data;
if(auto data = read<Dual>(rs.u64 + imm)) rt.u64 = *data;
}
auto CPU::LDL(r64& rt, cr64& rs, s16 imm) -> void {
@ -557,27 +573,31 @@ auto CPU::LDR(r64& rt, cr64& rs, s16 imm) -> void {
}
auto CPU::LH(r64& rt, cr64& rs, s16 imm) -> void {
if(auto data = read<Half>(rs.u32 + imm)) rt.u64 = s16(*data);
if(auto data = read<Half>(rs.u64 + imm)) rt.u64 = s16(*data);
}
auto CPU::LHU(r64& rt, cr64& rs, s16 imm) -> void {
if(auto data = read<Half>(rs.u32 + imm)) rt.u64 = u16(*data);
if(auto data = read<Half>(rs.u64 + imm)) rt.u64 = u16(*data);
}
auto CPU::LL(r64& rt, cr64& rs, s16 imm) -> void {
if(auto data = read<Word>(rs.u32 + imm)) {
rt.u64 = s32(*data);
scc.ll = tlb.physicalAddress >> 4;
scc.llbit = 1;
if(auto address = devirtualize(rs.u64 + imm)) {
if (auto data = read<Word>(*address)) {
rt.u64 = s32(*data);
scc.ll = (*address & 0x1fff'ffff) >> 4;
scc.llbit = 1;
}
}
}
auto CPU::LLD(r64& rt, cr64& rs, s16 imm) -> void {
if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction();
if(auto data = read<Dual>(rs.u32 + imm)) {
rt.u64 = *data;
scc.ll = tlb.physicalAddress >> 4;
scc.llbit = 1;
if(auto address = devirtualize(rs.u64 + imm)) {
if (auto data = read<Dual>(*address)) {
rt.u64 = *data;
scc.ll = (*address & 0x1fff'ffff) >> 4;
scc.llbit = 1;
}
}
}
@ -586,7 +606,7 @@ auto CPU::LUI(r64& rt, u16 imm) -> void {
}
auto CPU::LW(r64& rt, cr64& rs, s16 imm) -> void {
if(auto data = read<Word>(rs.u32 + imm)) rt.u64 = s32(*data);
if(auto data = read<Word>(rs.u64 + imm)) rt.u64 = s32(*data);
}
auto CPU::LWL(r64& rt, cr64& rs, s16 imm) -> void {
@ -700,7 +720,7 @@ auto CPU::LWR(r64& rt, cr64& rs, s16 imm) -> void {
}
auto CPU::LWU(r64& rt, cr64& rs, s16 imm) -> void {
if(auto data = read<Word>(rs.u32 + imm)) rt.u64 = u32(*data);
if(auto data = read<Word>(rs.u64 + imm)) rt.u64 = u32(*data);
}
auto CPU::MFHI(r64& rd) -> void {
@ -746,31 +766,35 @@ auto CPU::ORI(r64& rt, cr64& rs, u16 imm) -> void {
}
auto CPU::SB(cr64& rt, cr64& rs, s16 imm) -> void {
write<Byte>(rs.u32 + imm, rt.u32);
write<Byte>(rs.u64 + imm, rt.u32);
}
auto CPU::SC(r64& rt, cr64& rs, s16 imm) -> void {
if(scc.llbit) {
scc.llbit = 0;
rt.u64 = write<Word>(rs.u32 + imm, rt.u32);
} else {
rt.u64 = 0;
if(auto address = devirtualize(rs.u64 + imm)) {
if(scc.llbit) {
scc.llbit = 0;
rt.u64 = write<Word>(*address, rt.u32);
} else {
rt.u64 = 0;
}
}
}
auto CPU::SCD(r64& rt, cr64& rs, s16 imm) -> void {
if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction();
if(scc.llbit) {
scc.llbit = 0;
rt.u64 = write<Dual>(rs.u32 + imm, rt.u64);
} else {
rt.u64 = 0;
if(auto address = devirtualize(rs.u64 + imm)) {
if(scc.llbit) {
scc.llbit = 0;
rt.u64 = write<Dual>(*address, rt.u64);
} else {
rt.u64 = 0;
}
}
}
auto CPU::SD(cr64& rt, cr64& rs, s16 imm) -> void {
if(!context.kernelMode() && context.bits == 32) return exception.reservedInstruction();
write<Dual>(rs.u32 + imm, rt.u64);
write<Dual>(rs.u64 + imm, rt.u64);
}
auto CPU::SDL(cr64& rt, cr64& rs, s16 imm) -> void {
@ -918,7 +942,7 @@ auto CPU::SDR(cr64& rt, cr64& rs, s16 imm) -> void {
}
auto CPU::SH(cr64& rt, cr64& rs, s16 imm) -> void {
write<Half>(rs.u32 + imm, rt.u32);
write<Half>(rs.u64 + imm, rt.u32);
}
auto CPU::SLL(r64& rd, cr64& rt, u8 sa) -> void {
@ -971,7 +995,7 @@ auto CPU::SUBU(r64& rd, cr64& rs, cr64& rt) -> void {
}
auto CPU::SW(cr64& rt, cr64& rs, s16 imm) -> void {
write<Word>(rs.u32 + imm, rt.u32);
write<Word>(rs.u64 + imm, rt.u32);
}
auto CPU::SWL(cr64& rt, cr64& rs, s16 imm) -> void {

View File

@ -6,8 +6,7 @@ auto CPU::getControlRegister(n5 index) -> u64 {
data.bit(31) = scc.index.probeFailure;
break;
case 1: //random
data.bit(0,4) = scc.random.index;
data.bit(5) = scc.random.unused;
data.bit(0,5) = getControlRandom();
break;
case 2: //entrylo0
data.bit(0) = scc.tlb.global[0];
@ -31,8 +30,7 @@ auto CPU::getControlRegister(n5 index) -> u64 {
data.bit(13,24) = scc.tlb.pageMask.bit(13,24);
break;
case 6: //wired
data.bit(0,4) = scc.wired.index;
data.bit(5) = scc.wired.unused;
data.bit(0,5) = scc.wired.index;
break;
case 8: //badvaddr
data = scc.badVirtualAddress;
@ -90,7 +88,9 @@ auto CPU::getControlRegister(n5 index) -> u64 {
case 16: //configuration
data.bit( 0, 1) = scc.configuration.coherencyAlgorithmKSEG0;
data.bit( 2, 3) = scc.configuration.cu;
data.bit( 4,14) = 0b11001000110;
data.bit(15) = scc.configuration.bigEndian;
data.bit(16,23) = 0b00000110;
data.bit(24,27) = scc.configuration.sysadWritebackPattern;
data.bit(28,30) = scc.configuration.systemClockRatio;
break;
@ -138,8 +138,6 @@ auto CPU::setControlRegister(n5 index, n64 data) -> void {
scc.index.probeFailure = data.bit(31);
break;
case 1: //random
//scc.random.index = data.bit(0,4);
scc.random.unused = data.bit(5);
break;
case 2: //entrylo0
scc.tlb.global[0] = data.bit(0);
@ -147,7 +145,6 @@ auto CPU::setControlRegister(n5 index, n64 data) -> void {
scc.tlb.dirty[0] = data.bit(2);
scc.tlb.cacheAlgorithm[0] = data.bit(3, 5);
scc.tlb.physicalAddress[0].bit(12,35) = data.bit(6,29);
scc.tlb.synchronize();
break;
case 3: //entrylo1
scc.tlb.global[1] = data.bit(0);
@ -155,20 +152,15 @@ auto CPU::setControlRegister(n5 index, n64 data) -> void {
scc.tlb.dirty[1] = data.bit(2);
scc.tlb.cacheAlgorithm[1] = data.bit(3, 5);
scc.tlb.physicalAddress[1].bit(12,35) = data.bit(6,29);
scc.tlb.synchronize();
break;
case 4: //context
scc.context.badVirtualAddress = data.bit( 4,22);
scc.context.pageTableEntryBase = data.bit(23,63);
break;
case 5: //pagemask
scc.tlb.pageMask.bit(13,24) = data.bit(13,24);
scc.tlb.synchronize();
break;
case 6: //wired
scc.wired.index = data.bit(0,4);
scc.wired.unused = data.bit(5);
scc.random.index = 31;
scc.wired.index = data.bit(0,5);
break;
case 8: //badvaddr
//scc.badVirtualAddress = data; //read-only
@ -180,7 +172,6 @@ auto CPU::setControlRegister(n5 index, n64 data) -> void {
scc.tlb.addressSpaceID = data.bit( 0, 7);
scc.tlb.virtualAddress.bit(13,39) = data.bit(13,39);
scc.tlb.region = data.bit(62,63);
scc.tlb.synchronize();
break;
case 11: //compare
scc.compare = data.bit(0,31) << 1;
@ -238,7 +229,7 @@ auto CPU::setControlRegister(n5 index, n64 data) -> void {
context.setMode();
break;
case 17: //load linked address
scc.ll = data;
scc.ll.bit(0,31) = data.bit(0,31);
break;
case 18: //watchlo
scc.watchLo.trapOnWrite = data.bit(0);
@ -249,8 +240,6 @@ auto CPU::setControlRegister(n5 index, n64 data) -> void {
scc.watchHi.physicalAddressExtended = data.bit(0,3);
break;
case 20: //xcontext
scc.xcontext.badVirtualAddress = data.bit( 4,30);
scc.xcontext.region = data.bit(31,32);
scc.xcontext.pageTableEntryBase = data.bit(33,63);
break;
case 26: //parity error
@ -270,6 +259,11 @@ auto CPU::setControlRegister(n5 index, n64 data) -> void {
}
}
auto CPU::getControlRandom() -> u8 {
if (scc.wired.index > 31) return (n6)random();
return random() % (32 - scc.wired.index) + scc.wired.index;
}
auto CPU::DMFC0(r64& rt, u8 rd) -> void {
if(!context.kernelMode()) {
if(!scc.status.enable.coprocessor0) return exception.coprocessor0();
@ -349,6 +343,7 @@ auto CPU::TLBWI() -> void {
}
if(scc.index.tlbEntry >= TLB::Entries) return;
tlb.entry[scc.index.tlbEntry] = scc.tlb;
tlb.entry[scc.index.tlbEntry].synchronize();
debugger.tlbWrite(scc.index.tlbEntry);
}
@ -356,7 +351,9 @@ auto CPU::TLBWR() -> void {
if(!context.kernelMode()) {
if(!scc.status.enable.coprocessor0) return exception.coprocessor0();
}
if(scc.random.index >= TLB::Entries) return;
tlb.entry[scc.random.index] = scc.tlb;
debugger.tlbWrite(scc.random.index);
u8 index = getControlRandom();
if(index >= TLB::Entries) return;
tlb.entry[index] = scc.tlb;
tlb.entry[index].synchronize();
debugger.tlbWrite(index);
}

View File

@ -75,8 +75,8 @@ auto CPU::userSegment64(u64 address) const -> Context::Segment {
auto CPU::segment(u64 address) -> Context::Segment {
auto segment = context.segment[address >> 29 & 7];
//if(likely(context.bits == 32))
return (Context::Segment)segment;
if(likely(context.bits == 32))
return (Context::Segment)segment;
switch(segment) {
case Context::Segment::Kernel64:
return kernelSegment64(address);
@ -91,6 +91,7 @@ auto CPU::segment(u64 address) -> Context::Segment {
auto CPU::devirtualize(u64 address) -> maybe<u64> {
switch(context.segment[address >> 29 & 7]) {
case Context::Segment::Unused:
addressException(address);
exception.addressLoad();
return nothing;
case Context::Segment::Mapped:
@ -108,6 +109,7 @@ auto CPU::fetch(u64 address) -> u32 {
switch(segment(address)) {
case Context::Segment::Unused:
step(1);
addressException(address);
exception.addressLoad();
return 0; //nop
case Context::Segment::Mapped:
@ -134,14 +136,22 @@ auto CPU::read(u64 address) -> maybe<u64> {
if constexpr(Accuracy::CPU::AddressErrors) {
if(unlikely(address & Size - 1)) {
step(1);
addressException(address);
exception.addressLoad();
return nothing;
}
if (context.bits == 32 && unlikely((s32)address != address)) {
step(1);
addressException(address);
exception.addressLoad();
return nothing;
}
}
switch(segment(address)) {
case Context::Segment::Unused:
step(1);
addressException(address);
exception.addressLoad();
return nothing;
case Context::Segment::Mapped:
@ -168,6 +178,13 @@ auto CPU::write(u64 address, u64 data) -> bool {
if constexpr(Accuracy::CPU::AddressErrors) {
if(unlikely(address & Size - 1)) {
step(1);
addressException(address);
exception.addressStore();
return false;
}
if (context.bits == 32 && unlikely((s32)address != address)) {
step(1);
addressException(address);
exception.addressStore();
return false;
}
@ -176,6 +193,7 @@ auto CPU::write(u64 address, u64 data) -> bool {
switch(segment(address)) {
case Context::Segment::Unused:
step(1);
addressException(address);
exception.addressStore();
return false;
case Context::Segment::Mapped:
@ -196,3 +214,10 @@ auto CPU::write(u64 address, u64 data) -> bool {
unreachable;
}
auto CPU::addressException(u64 address) -> void {
scc.badVirtualAddress = address;
scc.context.badVirtualAddress = address >> 13;
scc.xcontext.badVirtualAddress = address >> 13;
scc.xcontext.region = address >> 62;
}

View File

@ -41,7 +41,6 @@ auto CPU::serialize(serializer& s) -> void {
s(e.addressMaskHi);
s(e.addressMaskLo);
s(e.addressSelect);
s(e.addressCompare);
}
s(tlb.physicalAddress);
@ -52,8 +51,6 @@ auto CPU::serialize(serializer& s) -> void {
s(scc.index.tlbEntry);
s(scc.index.probeFailure);
s(scc.random.index);
s(scc.random.unused);
s(scc.tlb.global);
s(scc.tlb.valid);
s(scc.tlb.dirty);
@ -67,11 +64,9 @@ auto CPU::serialize(serializer& s) -> void {
s(scc.tlb.addressMaskHi);
s(scc.tlb.addressMaskLo);
s(scc.tlb.addressSelect);
s(scc.tlb.addressCompare);
s(scc.context.badVirtualAddress);
s(scc.context.pageTableEntryBase);
s(scc.wired.index);
s(scc.wired.unused);
s(scc.badVirtualAddress);
s(scc.count);
s(scc.compare);

View File

@ -2,8 +2,8 @@
auto CPU::TLB::load(u32 address) -> Match {
for(auto& entry : this->entry) {
if(!entry.globals || entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((address & entry.addressMaskHi) != (u32)entry.addressCompare) continue;
if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((u32)(address & entry.addressMaskHi) != (u32)entry.virtualAddress) continue;
bool lo = address & entry.addressSelect;
if(!entry.valid[lo]) {
exception(address);
@ -23,8 +23,8 @@ auto CPU::TLB::load(u32 address) -> Match {
auto CPU::TLB::store(u32 address) -> Match {
for(auto& entry : this->entry) {
if(!entry.globals || entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((address & entry.addressMaskHi) != (u32)entry.addressCompare) continue;
if(!entry.globals && entry.addressSpaceID != self.scc.tlb.addressSpaceID) continue;
if((u32)(address & entry.addressMaskHi) != (u32)entry.virtualAddress) continue;
bool lo = address & entry.addressSelect;
if(!entry.valid[lo]) {
exception(address);
@ -57,9 +57,14 @@ auto CPU::TLB::exception(u32 address) -> void {
}
auto CPU::TLB::Entry::synchronize() -> void {
pageMask = pageMask & (0b101010101010 << 13);
pageMask |= pageMask >> 1;
globals = global[0] && global[1];
addressMaskHi = ~(pageMask | 0x1fff);
addressMaskHi = ~(n40)(pageMask | 0x1fff);
addressMaskLo = (pageMask | 0x1fff) >> 1;
addressSelect = addressMaskLo + 1;
addressCompare = virtualAddress & addressMaskHi;
physicalAddress[0] &= 0xffff'ffff;
physicalAddress[1] &= 0xffff'ffff;
virtualAddress &= addressMaskHi;
global[0] = global[1] = globals;
}

View File

@ -6,9 +6,6 @@ inline auto Bus::read(u32 address) -> u64 {
if(address <= 0x007f'ffff) return rdram.ram.read<Size>(address);
if(address <= 0x03ef'ffff) return unmapped;
if(address <= 0x03ff'ffff) return rdram.read<Size>(address);
if(address <= 0x0400'0fff) return rsp.dmem.read<Size>(address);
if(address <= 0x0400'1fff) return rsp.imem.read<Size>(address);
if(address <= 0x0403'ffff) return unmapped;
if(address <= 0x0407'ffff) return rsp.read<Size>(address);
if(address <= 0x040f'ffff) return rsp.status.read<Size>(address);
if(address <= 0x041f'ffff) return rdp.read<Size>(address);
@ -20,44 +17,23 @@ inline auto Bus::read(u32 address) -> u64 {
if(address <= 0x047f'ffff) return ri.read<Size>(address);
if(address <= 0x048f'ffff) return si.read<Size>(address);
if(address <= 0x04ff'ffff) return unmapped;
if(address <= 0x0500'03ff) return dd.c2s.read<Size>(address);
if(address <= 0x0500'04ff) return dd.ds.read<Size>(address);
if(address <= 0x0500'057f) return dd.read<Size>(address);
if(address <= 0x0500'05bf) return dd.ms.read<Size>(address);
if(address <= 0x05ff'ffff) return unmapped;
if(address <= 0x063f'ffff) return dd.iplrom.read<Size>(address);
if(address <= 0x07ff'ffff) return unmapped;
if(address <= 0x0fff'ffff) {
if(cartridge.ram ) return cartridge.ram.read<Size>(address);
if(cartridge.flash) return cartridge.flash.read<Size>(address);
return unmapped;
}
if(address <= 0x1fbf'ffff) {
if(address >= 0x13ff'0000 && address <= 0x13ff'ffff) {
return cartridge.isviewer.read<Size>(address);
}
return cartridge.rom.read<Size>(address);
}
if(address <= 0x1fc0'07bf) {
if(pi.io.romLockout) return unmapped;
return pi.rom.read<Size>(address);
}
if(address <= 0x1fc0'07ff) return pi.ram.read<Size>(address);
if(address <= 0x1fbf'ffff) return pi.read<Size>(address);
if(address <= 0x1fcf'ffff) return pif.read<Size>(address);
if(address <= 0x7fff'ffff) return pi.read<Size>(address);
return unmapped;
}
template<u32 Size>
inline auto Bus::write(u32 address, u64 data) -> void {
address &= 0x1fff'ffff - (Size - 1);
cpu.recompiler.invalidate(address + 0); if constexpr(Size == Dual)
cpu.recompiler.invalidate(address + 4);
if constexpr(Accuracy::CPU::Recompiler) {
cpu.recompiler.invalidate(address + 0); if constexpr(Size == Dual)
cpu.recompiler.invalidate(address + 4);
}
if(address <= 0x007f'ffff) return rdram.ram.write<Size>(address, data);
if(address <= 0x03ef'ffff) return;
if(address <= 0x03ff'ffff) return rdram.write<Size>(address, data);
if(address <= 0x0400'0fff) return rsp.dmem.write<Size>(address, data);
if(address <= 0x0400'1fff) return rsp.recompiler.invalidate(), rsp.imem.write<Size>(address, data);
if(address <= 0x0403'ffff) return;
if(address <= 0x0407'ffff) return rsp.write<Size>(address, data);
if(address <= 0x040f'ffff) return rsp.status.write<Size>(address, data);
if(address <= 0x041f'ffff) return rdp.write<Size>(address, data);
@ -69,28 +45,8 @@ inline auto Bus::write(u32 address, u64 data) -> void {
if(address <= 0x047f'ffff) return ri.write<Size>(address, data);
if(address <= 0x048f'ffff) return si.write<Size>(address, data);
if(address <= 0x04ff'ffff) return;
if(address <= 0x0500'03ff) return dd.c2s.write<Size>(address, data);
if(address <= 0x0500'04ff) return dd.ds.write<Size>(address, data);
if(address <= 0x0500'057f) return dd.write<Size>(address, data);
if(address <= 0x0500'05bf) return dd.ms.write<Size>(address, data);
if(address <= 0x05ff'ffff) return;
if(address <= 0x063f'ffff) return dd.iplrom.write<Size>(address, data);
if(address <= 0x07ff'ffff) return;
if(address <= 0x0fff'ffff) {
if(cartridge.ram ) return cartridge.ram.write<Size>(address, data);
if(cartridge.flash) return cartridge.flash.write<Size>(address, data);
return;
}
if(address <= 0x1fbf'ffff) {
if(address >= 0x13ff'0000 && address <= 0x13ff'ffff) {
cartridge.isviewer.write<Size>(address, data);
}
return cartridge.rom.write<Size>(address, data);
}
if(address <= 0x1fc0'07bf) {
if(pi.io.romLockout) return;
return pi.rom.write<Size>(address, data);
}
if(address <= 0x1fc0'07ff) return pi.ram.write<Size>(address, data);
if(address <= 0x1fbf'ffff) return pi.write<Size>(address, data);
if(address <= 0x1fcf'ffff) return pif.write<Size>(address, data);
if(address <= 0x7fff'ffff) return pi.write<Size>(address, data);
return;
}

View File

@ -48,8 +48,7 @@ struct IO {
((T*)this)->writeWord(address, data);
}
if constexpr(Size == Dual) {
((T*)this)->writeWord(address + 0, data >> 32);
((T*)this)->writeWord(address + 4, data >> 0);
((T*)this)->writeWord(address, data >> 32);
}
}
};

View File

@ -2,6 +2,31 @@ namespace Memory {
#include "lsb/readable.hpp"
#include "lsb/writable.hpp"
#include "io.hpp"
struct Readable16 : Memory::Readable {
template<u32 Size>
auto read(u32 address) -> u64 {
if constexpr(Size == Dual) return (read<Word>(address) << 32) | read<Word>(address+4);
if constexpr(Size == Word) return (read<Half>(address) << 16) | read<Half>(address+2);
return Memory::Readable::read<Size>(address);
}
};
struct Writable16 : Memory::Writable {
template<u32 Size>
auto read(u32 address) -> u64 {
if constexpr(Size == Dual) return (read<Word>(address) << 32) | read<Word>(address+4);
if constexpr(Size == Word) return (read<Half>(address) << 16) | read<Half>(address+2);
return Memory::Writable::read<Size>(address);
}
template<u32 Size>
auto write(u32 address, u64 data) -> void {
if constexpr(Size == Dual) return write<Word>(address, data >> 32), write<Word>(address+4, data);
if constexpr(Size == Word) return write<Half>(address, data >> 16), write<Half>(address+2, data);
return Memory::Writable::write<Size>(address, data);
}
};
}
struct Bus {

View File

@ -45,6 +45,7 @@ namespace ares::Nintendo64 {
RSP_DMA,
PI_DMA_Read,
PI_DMA_Write,
PI_BUS_Write,
SI_DMA_Read,
SI_DMA_Write,
};
@ -61,11 +62,13 @@ namespace ares::Nintendo64 {
#include <n64/vi/vi.hpp>
#include <n64/ai/ai.hpp>
#include <n64/pi/pi.hpp>
#include <n64/pif/pif.hpp>
#include <n64/ri/ri.hpp>
#include <n64/si/si.hpp>
#include <n64/rdram/rdram.hpp>
#include <n64/cpu/cpu.hpp>
#include <n64/rdp/rdp.hpp>
#include <n64/rsp/rsp.hpp>
#include <n64/rdp/rdp.hpp>
#include <n64/memory/bus.hpp>
#include <n64/pi/bus.hpp>
}

View File

@ -0,0 +1,76 @@
inline auto PI::readWord(u32 address) -> u32 {
if(address <= 0x046f'ffff) return ioRead(address);
if (unlikely(io.ioBusy)) {
writeForceFinish(); //technically, we should wait until Queue::PI_BUS_Write
return io.busLatch;
}
return busRead<Word>(address);
}
template <u32 Size>
inline auto PI::busRead(u32 address) -> u32 {
static_assert(Size == Half || Size == Word); //PI bus will do 32-bit (CPU) or 16-bit (DMA) only
static constexpr u32 unmapped = 0;
if(address <= 0x04ff'ffff) return unmapped; //Address range not memory mapped, only accessible via DMA
if(address <= 0x0500'03ff) return dd.c2s.read<Size>(address);
if(address <= 0x0500'04ff) return dd.ds.read<Size>(address);
if(address <= 0x0500'057f) return dd.read<Size>(address);
if(address <= 0x0500'05bf) return dd.ms.read<Size>(address);
if(address <= 0x05ff'ffff) return unmapped;
if(address <= 0x063f'ffff) return dd.iplrom.read<Size>(address);
if(address <= 0x07ff'ffff) return unmapped;
if(address <= 0x0fff'ffff) {
if(cartridge.ram ) return cartridge.ram.read<Size>(address);
if(cartridge.flash) return cartridge.flash.read<Size>(address);
return unmapped;
}
if(address <= 0x13fe'ffff) return cartridge.rom.read<Size>(address);
if(address <= 0x13ff'ffff) return cartridge.isviewer.read<Size>(address);
if(address <= 0x7fff'ffff) return unmapped;
return unmapped; //accesses here actually lock out the RCP
}
inline auto PI::writeWord(u32 address, u32 data) -> void {
if(address <= 0x046f'ffff) return ioWrite(address, data);
if(io.ioBusy) return;
io.ioBusy = 1;
io.busLatch = data;
queue.insert(Queue::PI_BUS_Write, 400);
return busWrite<Word>(address, data);
}
template <u32 Size>
inline auto PI::busWrite(u32 address, u32 data) -> void {
static_assert(Size == Half || Size == Word); //PI bus will do 32-bit (CPU) or 16-bit (DMA) only
if(address <= 0x04ff'ffff) return; //Address range not memory mapped, only accessible via DMA
if(address <= 0x0500'03ff) return dd.c2s.write<Size>(address, data);
if(address <= 0x0500'04ff) return dd.ds.write<Size>(address, data);
if(address <= 0x0500'057f) return dd.write<Size>(address, data);
if(address <= 0x0500'05bf) return dd.ms.write<Size>(address, data);
if(address <= 0x05ff'ffff) return;
if(address <= 0x063f'ffff) return dd.iplrom.write<Size>(address, data);
if(address <= 0x07ff'ffff) return;
if(address <= 0x0fff'ffff) {
if(cartridge.ram ) return cartridge.ram.write<Size>(address, data);
if(cartridge.flash) return cartridge.flash.write<Size>(address, data);
return;
}
if(address <= 0x13fe'ffff) return cartridge.rom.write<Size>(address, data);
if(address <= 0x13ff'ffff) {
writeForceFinish(); //Debugging channel for homebrew, be gentle
return cartridge.isviewer.write<Size>(address, data);
}
if(address <= 0x7fff'ffff) return;
}
inline auto PI::writeFinished() -> void {
io.ioBusy = 0;
}
inline auto PI::writeForceFinish() -> void {
io.ioBusy = 0;
queue.remove(Queue::PI_BUS_Write);
}

View File

@ -1,13 +1,4 @@
auto PI::Debugger::load(Node::Object parent) -> void {
memory.ram = parent->append<Node::Debugger::Memory>("PI RAM");
memory.ram->setSize(64);
memory.ram->setRead([&](u32 address) -> u8 {
return pi.ram.read<Byte>(address);
});
memory.ram->setWrite([&](u32 address, u8 data) -> void {
return pi.ram.write<Byte>(address, data);
});
tracer.io = parent->append<Node::Debugger::Tracer::Notification>("I/O", "PI");
}

View File

@ -1,12 +1,9 @@
auto PI::dmaRead() -> void {
io.readLength = (io.readLength | 1) + 1;
for(u32 address = 0; address < io.readLength; address += 2) {
u16 data = bus.read<Half>(io.dramAddress + address);
bus.write<Half>(io.pbusAddress + address, data);
u16 data = rdram.ram.read<Half>(io.dramAddress + address);
busWrite<Half>(io.pbusAddress + address, data);
}
io.dmaBusy = 0;
io.interrupt = 1;
mi.raise(MI::IRQ::PI);
}
auto PI::dmaWrite() -> void {
@ -28,7 +25,7 @@ auto PI::dmaWrite() -> void {
i32 rom_len = (cur_len + 1) & ~1;
for (u32 i = 0; i < rom_len; i += 2) {
u16 data = bus.read<Half>(io.pbusAddress);
u16 data = busRead<Half>(io.pbusAddress);
mem[i + 0] = data >> 8;
mem[i + 1] = data & 0xFF;
io.pbusAddress += 2;
@ -39,13 +36,18 @@ auto PI::dmaWrite() -> void {
cur_len = max(cur_len-misalign, 0);
}
if constexpr(Accuracy::CPU::Recompiler) {
cpu.recompiler.invalidateRange(io.dramAddress, io.dramAddress + cur_len);
}
for (u32 i = 0; i < cur_len; i++)
bus.write<Byte>(io.dramAddress++, mem[i]);
rdram.ram.write<Byte>(io.dramAddress++, mem[i]);
io.dramAddress = (io.dramAddress + 7) & ~7;
first_block = false;
}
}
auto PI::dmaFinished() -> void {
io.dmaBusy = 0;
io.interrupt = 1;
mi.raise(MI::IRQ::PI);

View File

@ -1,4 +1,4 @@
auto PI::readWord(u32 address) -> u32 {
auto PI::ioRead(u32 address) -> u32 {
address = (address & 0xfffff) >> 2;
n32 data;
@ -74,7 +74,7 @@ auto PI::readWord(u32 address) -> u32 {
return data;
}
auto PI::writeWord(u32 address, u32 data_) -> void {
auto PI::ioWrite(u32 address, u32 data_) -> void {
address = (address & 0xfffff) >> 2;
n32 data = data_;
@ -91,21 +91,23 @@ auto PI::writeWord(u32 address, u32 data_) -> void {
if(address == 1) {
//PI_PBUS_ADDRESS
io.pbusAddress = n29(data) & ~1;
io.pbusAddress = n32(data) & ~1;
}
if(address == 2) {
//PI_READ_LENGTH
io.readLength = n24(data);
io.dmaBusy = 1;
queue.insert(Queue::PI_DMA_Read, io.readLength * 9);
dmaRead();
queue.insert(Queue::PI_DMA_Read, io.readLength * 36);
}
if(address == 3) {
//PI_WRITE_LENGTH
io.writeLength = n24(data);
io.dmaBusy = 1;
queue.insert(Queue::PI_DMA_Write, io.writeLength * 9);
dmaWrite();
queue.insert(Queue::PI_DMA_Write, io.writeLength * 36);
}
if(address == 4) {

View File

@ -10,47 +10,19 @@ PI pi;
auto PI::load(Node::Object parent) -> void {
node = parent->append<Node::Object>("PI");
rom.allocate(0x7c0);
ram.allocate(0x040);
debugger.load(node);
}
auto PI::unload() -> void {
debugger = {};
rom.reset();
ram.reset();
node.reset();
}
auto PI::power(bool reset) -> void {
string pifrom = cartridge.region() == "NTSC" ? "pif.ntsc.rom" : "pif.pal.rom";
if(auto fp = system.pak->read(pifrom)) {
rom.load(fp);
}
ram.fill();
io = {};
bsd1 = {};
bsd2 = {};
//write CIC seeds into PIF RAM so that cartridge checksum function passes
string cic = cartridge.cic();
n8 seed = 0x3f;
n1 version = 0;
if(cic == "CIC-NUS-6101" || cic == "CIC-NUS-7102") seed = 0x3f, version = 1;
if(cic == "CIC-NUS-6102" || cic == "CIC-NUS-7101") seed = 0x3f;
if(cic == "CIC-NUS-6103" || cic == "CIC-NUS-7103") seed = 0x78;
if(cic == "CIC-NUS-6105" || cic == "CIC-NUS-7105") seed = 0x91;
if(cic == "CIC-NUS-6106" || cic == "CIC-NUS-7106") seed = 0x85;
n32 data;
data.bit(0, 7) = 0x3f; //CIC IPL2 seed
data.bit(8,15) = seed; //CIC IPL3 seed
data.bit(17) = reset; //osResetType (0 = power; 1 = reset (NMI))
data.bit(18) = version; //osVersion
data.bit(19) = 0; //osRomType (0 = Gamepak; 1 = 64DD)
ram.write<Word>(0x24, data);
}
}

View File

@ -2,18 +2,12 @@
struct PI : Memory::IO<PI> {
Node::Object node;
Memory::Readable rom;
Memory::Writable ram;
struct Debugger {
//debugger.cpp
auto load(Node::Object) -> void;
auto io(bool mode, u32 address, u32 data) -> void;
struct Memory {
Node::Debugger::Memory ram;
} memory;
struct Tracer {
Node::Debugger::Tracer::Notification io;
} tracer;
@ -27,11 +21,22 @@ struct PI : Memory::IO<PI> {
//dma.cpp
auto dmaRead() -> void;
auto dmaWrite() -> void;
auto dmaFinished() -> void;
//io.cpp
auto ioRead(u32 address) -> u32;
auto ioWrite(u32 address, u32 data) -> void;
//bus.hpp
auto readWord(u32 address) -> u32;
auto writeWord(u32 address, u32 data) -> void;
auto writeFinished() -> void;
auto writeForceFinish() -> void;
template <u32 Size>
auto busRead(u32 address) -> u32;
template <u32 Size>
auto busWrite(u32 address, u32 data) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;
@ -44,7 +49,7 @@ struct PI : Memory::IO<PI> {
n32 pbusAddress;
n32 readLength;
n32 writeLength;
n1 romLockout;
n32 busLatch;
} io;
struct BSD {

View File

@ -1,6 +1,4 @@
auto PI::serialize(serializer& s) -> void {
s(ram);
s(io.dmaBusy);
s(io.ioBusy);
s(io.error);
@ -9,7 +7,7 @@ auto PI::serialize(serializer& s) -> void {
s(io.pbusAddress);
s(io.readLength);
s(io.writeLength);
s(io.romLockout);
s(io.busLatch);
s(bsd1.latency);
s(bsd1.pulseWidth);

View File

@ -0,0 +1,10 @@
auto PIF::Debugger::load(Node::Object parent) -> void {
memory.ram = parent->append<Node::Debugger::Memory>("PIF RAM");
memory.ram->setSize(64);
memory.ram->setRead([&](u32 address) -> u8 {
return pif.ram.read<Byte>(address);
});
memory.ram->setWrite([&](u32 address, u8 data) -> void {
return pif.ram.write<Byte>(address, data);
});
}

View File

@ -0,0 +1,17 @@
auto PIF::readWord(u32 address) -> u32 {
address &= 0x7ff;
if(address <= 0x7bf) {
if(io.romLockout) return 0;
return rom.read<Word>(address);
}
return ram.read<Word>(address);
}
auto PIF::writeWord(u32 address, u32 data) -> void {
address &= 0x7ff;
if(address <= 0x7bf) {
if(io.romLockout) return;
return rom.write<Word>(address, data);
}
return ram.write<Word>(address, data);
}

View File

@ -0,0 +1,305 @@
#include <n64/n64.hpp>
namespace ares::Nintendo64 {
PIF pif;
#include "io.cpp"
#include "debugger.cpp"
#include "serialization.cpp"
auto PIF::load(Node::Object parent) -> void {
node = parent->append<Node::Object>("PIF");
rom.allocate(0x7c0);
ram.allocate(0x040);
debugger.load(node);
}
auto PIF::unload() -> void {
debugger = {};
rom.reset();
ram.reset();
node.reset();
}
auto PIF::addressCRC(u16 address) const -> n5 {
n5 crc = 0;
for(u32 i : range(16)) {
n5 xor = crc & 0x10 ? 0x15 : 0x00;
crc <<= 1;
if(address & 0x8000) crc |= 1;
address <<= 1;
crc ^= xor;
}
return crc;
}
auto PIF::dataCRC(array_view<u8> data) const -> n8 {
n8 crc = 0;
for(u32 i : range(33)) {
for(u32 j : reverse(range(8))) {
n8 xor = crc & 0x80 ? 0x85 : 0x00;
crc <<= 1;
if(i < 32) {
if(data[i] & 1 << j) crc |= 1;
}
crc ^= xor;
}
}
return crc;
}
auto PIF::run() -> void {
auto flags = ram.read<Byte>(0x3f);
//controller polling
if(flags & 0x01) {
//todo: this flag is supposed to be cleared, but doing so breaks inputs
//flags &= ~0x01;
scan();
}
//CIC-NUS-6105 challenge/response
if(flags & 0x02) {
flags &= ~0x02;
challenge();
}
//unknown purpose
if(flags & 0x04) {
flags &= ~0x04;
debug(unimplemented, "[SI::main] flags & 0x04");
}
//must be sent within 5s of the console booting, or SM5 will lock the N64
if(flags & 0x08) {
flags &= ~0x08;
}
//PIF ROM lockout
if(flags & 0x10) {
flags &= ~0x10;
io.romLockout = 1;
}
//initialization
if(flags & 0x20) {
flags &= ~0x20;
flags |= 0x80; //set completion flag
}
//clear PIF RAM
if(flags & 0x40) {
flags &= ~0x40;
ram.fill();
}
ram.write<Byte>(0x3f, flags);
}
auto PIF::scan() -> void {
ControllerPort* controllers[4] = {
&controllerPort1,
&controllerPort2,
&controllerPort3,
&controllerPort4,
};
static constexpr bool Debug = 0;
if constexpr(Debug) {
print("{\n");
for(u32 y : range(8)) {
print(" ");
for(u32 x : range(8)) {
print(hex(ram.read<Byte>(y * 8 + x), 2L), " ");
}
print("\n");
}
print("}\n");
}
n3 channel = 0; //0-5
for(u32 offset = 0; offset < 64;) {
n8 send = ram.read<Byte>(offset++);
if(send == 0x00) { channel++; continue; }
if(send == 0xfd) continue; //channel reset
if(send == 0xfe) break; //end of packets
if(send == 0xff) continue; //alignment padding
n8 recvOffset = offset;
n8 recv = ram.read<Byte>(offset++);
if(recv == 0xfe) break; //end of packets
//clear flags from lengths
send &= 0x3f;
recv &= 0x3f;
n8 input[64];
for(u32 index : range(send)) {
input[index] = ram.read<Byte>(offset++);
}
n8 output[64];
b1 valid = 0;
b1 over = 0;
//controller port communication
if (channel < 4 && controllers[channel]->device) {
n2 status = controllers[channel]->device->comm(send, recv, input, output);
valid = status.bit(0);
over = status.bit(1);
}
if (channel >= 4) {
//status
if(input[0] == 0x00 || input[0] == 0xff) {
//cartridge EEPROM (4kbit)
if(cartridge.eeprom.size == 512) {
output[0] = 0x00;
output[1] = 0x80;
output[2] = 0x00;
valid = 1;
}
//cartridge EEPROM (16kbit)
if(cartridge.eeprom.size == 2048) {
output[0] = 0x00;
output[1] = 0xc0;
output[2] = 0x00;
valid = 1;
}
}
//read EEPROM
if(input[0] == 0x04 && send >= 2) {
u32 address = input[1] * 8;
for(u32 index : range(recv)) {
output[index] = cartridge.eeprom.read<Byte>(address++);
}
valid = 1;
}
//write EEPROM
if(input[0] == 0x05 && send >= 2 && recv >= 1) {
u32 address = input[1] * 8;
for(u32 index : range(send - 2)) {
cartridge.eeprom.write<Byte>(address++, input[2 + index]);
}
output[0] = 0x00;
valid = 1;
}
//RTC status
if(input[0] == 0x06) {
debug(unimplemented, "[SI::main] RTC status");
}
//RTC read
if(input[0] == 0x07) {
debug(unimplemented, "[SI::main] RTC read");
}
//RTC write
if(input[0] == 0x08) {
debug(unimplemented, "[SI::main] RTC write");
}
}
if(!valid) {
ram.write<Byte>(recvOffset, 0x80 | recv & 0x3f);
}
if(over) {
ram.write<Byte>(recvOffset, 0x40 | recv & 0x3f);
}
for(u32 index : range(recv)) {
ram.write<Byte>(offset++, output[index]);
}
channel++;
}
if constexpr(Debug) {
print("[\n");
for(u32 y : range(8)) {
print(" ");
for(u32 x : range(8)) {
print(hex(ram.read<Byte>(y * 8 + x), 2L), " ");
}
print("\n");
}
print("]\n");
}
}
//CIC-NUS-6105 anti-piracy challenge/response
auto PIF::challenge() -> void {
static n4 lut[32] = {
0x4, 0x7, 0xa, 0x7, 0xe, 0x5, 0xe, 0x1,
0xc, 0xf, 0x8, 0xf, 0x6, 0x3, 0x6, 0x9,
0x4, 0x1, 0xa, 0x7, 0xe, 0x5, 0xe, 0x1,
0xc, 0x9, 0x8, 0x5, 0x6, 0x3, 0xc, 0x9,
};
n4 challenge[30];
n4 response[30];
//15 bytes -> 30 nibbles
for(u32 address : range(15)) {
auto data = ram.read<Byte>(0x30 + address);
challenge[address << 1 | 0] = data >> 4;
challenge[address << 1 | 1] = data >> 0;
}
n4 key = 0xb;
n1 sel = 0;
for(u32 address : range(30)) {
n4 data = key + 5 * challenge[address];
response[address] = data;
key = lut[sel << 4 | data];
n1 mod = data >> 3;
n3 mag = data >> 0;
if(mod) mag = ~mag;
if(mag % 3 != 1) mod = !mod;
if(sel) {
if(data == 0x1 || data == 0x9) mod = 1;
if(data == 0xb || data == 0xe) mod = 0;
}
sel = mod;
}
//30 nibbles -> 15 bytes
for(u32 address : range(15)) {
n8 data = 0;
data |= response[address << 1 | 0] << 4;
data |= response[address << 1 | 1] << 0;
ram.write<Byte>(0x30 + address, data);
}
}
auto PIF::power(bool reset) -> void {
string pifrom = cartridge.region() == "NTSC" ? "pif.ntsc.rom" : "pif.pal.rom";
if(auto fp = system.pak->read(pifrom)) {
rom.load(fp);
}
ram.fill();
io = {};
//write CIC seeds into PIF RAM so that cartridge checksum function passes
string cic = cartridge.cic();
n8 seed = 0x3f;
n1 version = 0;
if(cic == "CIC-NUS-6101" || cic == "CIC-NUS-7102") seed = 0x3f, version = 1;
if(cic == "CIC-NUS-6102" || cic == "CIC-NUS-7101") seed = 0x3f;
if(cic == "CIC-NUS-6103" || cic == "CIC-NUS-7103") seed = 0x78;
if(cic == "CIC-NUS-6105" || cic == "CIC-NUS-7105") seed = 0x91;
if(cic == "CIC-NUS-6106" || cic == "CIC-NUS-7106") seed = 0x85;
n32 data;
data.bit(0, 7) = 0x3f; //CIC IPL2 seed
data.bit(8,15) = seed; //CIC IPL3 seed
data.bit(17) = reset; //osResetType (0 = power; 1 = reset (NMI))
data.bit(18) = version; //osVersion
data.bit(19) = 0; //osRomType (0 = Gamepak; 1 = 64DD)
ram.write<Word>(0x24, data);
}
}

View File

@ -0,0 +1,41 @@
//PIF-NUS
struct PIF : Memory::IO<PIF> {
Node::Object node;
Memory::Readable rom;
Memory::Writable ram;
struct Debugger {
//debugger.cpp
auto load(Node::Object) -> void;
auto io(bool mode, u32 address, u32 data) -> void;
struct Memory {
Node::Debugger::Memory ram;
} memory;
} debugger;
//pif.cpp
auto load(Node::Object) -> void;
auto unload() -> void;
auto addressCRC(u16 address) const -> n5;
auto dataCRC(array_view<u8> data) const -> n8;
auto run() -> void;
auto scan() -> void;
auto challenge() -> void;
auto power(bool reset) -> void;
//io.cpp
auto readWord(u32 address) -> u32;
auto writeWord(u32 address, u32 data) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;
struct IO {
n1 romLockout;
} io;
};
extern PIF pif;

View File

@ -0,0 +1,5 @@
auto PIF::serialize(serializer& s) -> void {
s(ram);
s(io.romLockout);
}

View File

@ -1,27 +1,43 @@
auto RSP::dmaTransfer() -> void {
if(dma.requests.empty()) return;
auto request = *dma.requests.read();
auto region = !request.pbusRegion ? 0x0400'0000 : 0x0400'1000;
if(request.type == DMA::Request::Type::Read) {
for(u32 block : range(request.count)) {
for(u32 offset = 0; offset < request.length; offset += 4) {
u32 data = bus.read<Word>(request.dramAddress + offset);
bus.write<Word>(region + request.pbusAddress + offset, data);
}
request.pbusAddress += request.length;
request.dramAddress += request.length + request.skip;
}
}
if(request.type == DMA::Request::Type::Write) {
for(u32 block : range(request.count)) {
for(u32 offset = 0; offset < request.length; offset += 4) {
u32 data = bus.read<Word>(region + request.pbusAddress + offset);
bus.write<Word>(request.dramAddress + offset, data);
}
request.pbusAddress += request.length;
request.dramAddress += request.length + request.skip;
}
auto RSP::dmaTransferStart(void) -> void {
if(dma.busy.any()) return;
if(dma.full.any()) {
dma.current = dma.pending;
dma.busy = dma.full;
dma.full = {0,0};
queue.insert(Queue::RSP_DMA, (dma.current.length+8) / 8 * 3);
}
}
auto RSP::dmaTransferStep() -> void {
auto& region = !dma.current.pbusRegion ? dmem : imem;
if(dma.busy.read) {
if constexpr(Accuracy::RSP::Recompiler) {
if(dma.current.pbusRegion) recompiler.invalidate();
}
for(u32 i = 0; i <= dma.current.length; i += 8) {
u64 data = rdram.ram.read<Dual>(dma.current.dramAddress);
region.write<Dual>(dma.current.pbusAddress, data);
dma.current.dramAddress += 8;
dma.current.pbusAddress += 8;
}
}
if(dma.busy.write) {
for(u32 i = 0; i <= dma.current.length; i += 8) {
u64 data = region.read<Dual>(dma.current.pbusAddress);
rdram.ram.write<Dual>(dma.current.dramAddress, data);
dma.current.dramAddress += 8;
dma.current.pbusAddress += 8;
}
}
if(dma.current.count) {
dma.current.count -= 1;
dma.current.dramAddress += dma.current.skip;
queue.insert(Queue::RSP_DMA, (dma.current.length+8) / 8 * 3);
} else {
dma.busy = {0,0};
dma.current.length = 0xFF8;
dmaTransferStart();
}
}

View File

@ -26,8 +26,8 @@ auto RSP::BGEZ(cr32& rs, s16 imm) -> void {
}
auto RSP::BGEZAL(cr32& rs, s16 imm) -> void {
RA.u32 = s32(PC + 8);
if(rs.s32 >= 0) branch.take(PC + 4 + (imm << 2));
RA.u32 = u12(PC + 8);
}
auto RSP::BGTZ(cr32& rs, s16 imm) -> void {
@ -43,8 +43,8 @@ auto RSP::BLTZ(cr32& rs, s16 imm) -> void {
}
auto RSP::BLTZAL(cr32& rs, s16 imm) -> void {
RA.u32 = s32(PC + 8);
if(rs.s32 < 0) branch.take(PC + 4 + (imm << 2));
RA.u32 = u12(PC + 8);
}
auto RSP::BNE(cr32& rs, cr32& rt, s16 imm) -> void {
@ -58,17 +58,17 @@ auto RSP::BREAK() -> void {
}
auto RSP::J(u32 imm) -> void {
branch.take((PC + 4 & 0xf000'0000) | (imm << 2));
branch.take(imm << 2);
}
auto RSP::JAL(u32 imm) -> void {
RA.u32 = s32(PC + 8);
branch.take((PC + 4 & 0xf000'0000) | (imm << 2));
RA.u32 = u12(PC + 8);
branch.take(imm << 2);
}
auto RSP::JALR(r32& rd, cr32& rs) -> void {
rd.u32 = s32(PC + 8);
branch.take(rs.u32);
rd.u32 = u12(PC + 8);
}
auto RSP::JR(cr32& rs) -> void {
@ -96,7 +96,11 @@ auto RSP::LUI(r32& rt, u16 imm) -> void {
}
auto RSP::LW(r32& rt, cr32& rs, s16 imm) -> void {
rt.u32 = s32(dmem.readUnaligned<Word>(rs.u32 + imm));
rt.u32 = dmem.readUnaligned<Word>(rs.u32 + imm);
}
auto RSP::LWU(r32& rt, cr32& rs, s16 imm) -> void {
rt.u32 = dmem.readUnaligned<Word>(rs.u32 + imm);
}
auto RSP::NOR(r32& rd, cr32& rs, cr32& rt) -> void {

View File

@ -1,9 +1,9 @@
auto RSP::MFC0(r32& rt, u8 rd) -> void {
if((rd & 8) == 0) rt.u32 = Nintendo64::rsp.readWord((rd & 7) << 2);
if((rd & 8) == 0) rt.u32 = Nintendo64::rsp.ioRead ((rd & 7) << 2);
if((rd & 8) != 0) rt.u32 = Nintendo64::rdp.readWord((rd & 7) << 2);
}
auto RSP::MTC0(cr32& rt, u8 rd) -> void {
if((rd & 8) == 0) Nintendo64::rsp.writeWord((rd & 7) << 2, rt.u32);
if((rd & 8) == 0) Nintendo64::rsp.ioWrite ((rd & 7) << 2, rt.u32);
if((rd & 8) != 0) Nintendo64::rdp.writeWord((rd & 7) << 2, rt.u32);
}

View File

@ -148,7 +148,7 @@ template<u8 e>
auto RSP::LDV(r128& vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 8;
auto start = e;
auto end = start + 8;
auto end = min(start + 8, 16);
for(u32 offset = start; offset < end; offset++) {
vt.byte(offset & 15) = dmem.read<Byte>(address++);
}
@ -157,19 +157,27 @@ auto RSP::LDV(r128& vt, cr32& rs, s8 imm) -> void {
template<u8 e>
auto RSP::LFV(r128& vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 16;
auto start = e >> 1;
auto end = start + 4;
auto index = (address & 7) - e;
address &= ~7;
auto start = e;
auto end = min(start + 8, 16);
r128 tmp;
for(u32 offset = 0; offset < 4; offset++) {
tmp.element(offset + 0) = dmem.read<Byte>(address + (index + offset * 4 + 0 & 15)) << 7;
tmp.element(offset + 4) = dmem.read<Byte>(address + (index + offset * 4 + 8 & 15)) << 7;
}
for(u32 offset = start; offset < end; offset++) {
vt.element(offset & 7) = dmem.read<Byte>(address) << 7;
address += 4;
vt.byte(offset) = tmp.byte(offset);
}
}
template<u8 e>
auto RSP::LHV(r128& vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 16;
auto index = (address & 7) - e;
address &= ~7;
for(u32 offset = 0; offset < 8; offset++) {
vt.element(offset) = dmem.read<Byte>(address + (16 - e + offset * 2 & 15)) << 7;
vt.element(offset) = dmem.read<Byte>(address + (index + offset * 2 & 15)) << 7;
}
}
@ -177,7 +185,7 @@ template<u8 e>
auto RSP::LLV(r128& vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 4;
auto start = e;
auto end = start + 4;
auto end = min(start + 4, 16);
for(u32 offset = start; offset < end; offset++) {
vt.byte(offset & 15) = dmem.read<Byte>(address++);
}
@ -186,8 +194,10 @@ auto RSP::LLV(r128& vt, cr32& rs, s8 imm) -> void {
template<u8 e>
auto RSP::LPV(r128& vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 8;
auto index = (address & 7) - e;
address &= ~7;
for(u32 offset = 0; offset < 8; offset++) {
vt.element(offset) = dmem.read<Byte>(address + (16 - e + offset & 15)) << 8;
vt.element(offset) = dmem.read<Byte>(address + (index + offset & 15)) << 8;
}
}
@ -195,7 +205,7 @@ template<u8 e>
auto RSP::LQV(r128& vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 16;
auto start = e;
auto end = 16 - (address & 15);
auto end = min(16 + e - (address & 15), 16);
for(u32 offset = start; offset < end; offset++) {
vt.byte(offset & 15) = dmem.read<Byte>(address++);
}
@ -216,7 +226,7 @@ template<u8 e>
auto RSP::LSV(r128& vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 2;
auto start = e;
auto end = start + 2;
auto end = min(start + 2, 16);
for(u32 offset = start; offset < end; offset++) {
vt.byte(offset & 15) = dmem.read<Byte>(address++);
}
@ -225,21 +235,26 @@ auto RSP::LSV(r128& vt, cr32& rs, s8 imm) -> void {
template<u8 e>
auto RSP::LTV(u8 vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 16;
auto start = vt;
auto end = min(32, start + 8);
address = (address + 8 & ~15) + (e & 1);
for(u32 offset = start; offset < end; offset++) {
auto byte = (8 - (e >> 1) + (offset - start)) << 1;
vpu.r[offset].byte(byte + 0 & 15) = dmem.read<Byte>(address++);
vpu.r[offset].byte(byte + 1 & 15) = dmem.read<Byte>(address++);
auto begin = address & ~7;
address = begin + ((e + (address & 8)) & 15);
auto vtbase = vt & ~7;
auto vtoff = e >> 1;
for (u32 i = 0; i < 8; i++) {
vpu.r[vtbase + vtoff].byte(i * 2 + 0) = dmem.read<Byte>(address++);
if (address == begin + 16) address = begin;
vpu.r[vtbase + vtoff].byte(i * 2 + 1) = dmem.read<Byte>(address++);
if (address == begin + 16) address = begin;
vtoff = vtoff + 1 & 7;
}
}
template<u8 e>
auto RSP::LUV(r128& vt, cr32& rs, s8 imm) -> void {
auto address = rs.u32 + imm * 8;
auto index = (address & 7) - e;
address &= ~7;
for(u32 offset = 0; offset < 8; offset++) {
vt.element(offset) = dmem.read<Byte>(address + (16 - e + offset & 15)) << 7;
vt.element(offset) = dmem.read<Byte>(address + (index + offset & 15)) << 7;
}
}
@ -263,8 +278,8 @@ auto RSP::MFC2(r32& rt, cr128& vs) -> void {
template<u8 e>
auto RSP::MTC2(cr32& rt, r128& vs) -> void {
vs.byte(e + 0 & 15) = rt.u32 >> 8;
vs.byte(e + 1 & 15) = rt.u32 >> 0;
vs.byte(e + 0) = rt.u32 >> 8;
if (e != 15) vs.byte(e + 1) = rt.u32 >> 0;
}
template<u8 e>
@ -808,10 +823,9 @@ auto RSP::VMACQ(r128& vd) -> void {
for(u32 n : range(8)) {
s32 product = ACCH.element(n) << 16 | ACCM.element(n) << 0;
if(product < 0 && !(product & 1 << 5)) product += 32;
else if(product > 0 && !(product & 1 << 5)) product -= 32;
else if(product >= 32 && !(product & 1 << 5)) product -= 32;
ACCH.element(n) = product >> 16;
ACCM.element(n) = product >> 0;
ACCL.element(n) = 0;
vd.element(n) = sclamp<16>(product >> 1) & ~15;
}
}
@ -959,17 +973,11 @@ auto RSP::VMADN(r128& vd, cr128& vs, cr128& vt) -> void {
}
}
template<u8 E>
template<u8 e>
auto RSP::VMOV(r128& vd, u8 de, cr128& vt) -> void {
u8 e = E;
switch(e) {
case 0x0 ... 0x1: e = e & 0b000 | de & 0b111; break; //hardware glitch
case 0x2 ... 0x3: e = e & 0b001 | de & 0b110; break; //hardware glitch
case 0x4 ... 0x7: e = e & 0b011 | de & 0b100; break; //hardware glitch
case 0x8 ... 0xf: e = e & 0b111 | de & 0b000; break; //normal behavior
}
vd.u16(de) = vt.u16(e);
ACCL = vt(e);
cr128 vte = vt(e);
vd.u16(de) = vte.u16(de);
ACCL = vte;
}
template<u8 e>
@ -1293,12 +1301,12 @@ auto RSP::VRND(r128& vd, u8 vs, cr128& vt) -> void {
acc |= ACCM.element(n); acc <<= 16;
acc |= ACCL.element(n); acc <<= 16;
acc >>= 16;
if(D == 0 && acc < 0) acc += product;
if(D == 1 && acc >= 0) acc += product;
if(D == 0 && acc < 0) acc = sclip<48>(acc + product);
if(D == 1 && acc >= 0) acc = sclip<48>(acc + product);
ACCH.element(n) = acc >> 32;
ACCM.element(n) = acc >> 16;
ACCL.element(n) = acc >> 0;
vd.element(n) = acc >> 16;
vd.element(n) = sclamp<16>(acc >> 16);
}
}
@ -1417,6 +1425,26 @@ auto RSP::VXOR(r128& vd, cr128& vs, cr128& vt) -> void {
}
}
template<u8 e>
auto RSP::VZERO(r128& vd, cr128& vs, cr128& vt) -> void {
if constexpr(Accuracy::RSP::SISD) {
cr128 vte = vt(e);
for(u32 n : range(8)) {
s32 result = vs.s16(n) + vte.s16(n);
ACCL.s16(n) = result;
vd.s16(n) = 0;
}
}
if constexpr(Accuracy::RSP::SIMD) {
#if defined(ARCHITECTURE_AMD64)
r128 vte = vt(e), sum, min, max;
ACCL = _mm_add_epi16(vs, vte);
vd = _mm_xor_si128(vd, vd);
#endif
}
}
#undef ACCH
#undef ACCM
#undef ACCL

View File

@ -81,7 +81,7 @@ auto RSP::decoderEXECUTE() -> void {
op(0x24, LBU, RT, RS, IMMi16);
op(0x25, LHU, RT, RS, IMMi16);
op(0x26, INVALID); //LWR
op(0x27, INVALID); //LWU
op(0x27, LWU, RT, RS, IMMi16);
op(0x28, SB, RT, RS, IMMi16);
op(0x29, SH, RT, RS, IMMi16);
op(0x2a, INVALID); //SWL
@ -279,20 +279,20 @@ auto RSP::decoderVU() -> void {
vu(0x0f, VMADH, VD, VS, VT);
vu(0x10, VADD, VD, VS, VT);
vu(0x11, VSUB, VD, VS, VT);
op(0x12, INVALID);
vu(0x12, VZERO, VD, VS, VT); //VSUT
vu(0x13, VABS, VD, VS, VT);
vu(0x14, VADDC, VD, VS, VT);
vu(0x15, VSUBC, VD, VS, VT);
op(0x16, INVALID);
op(0x17, INVALID);
op(0x18, INVALID);
op(0x19, INVALID);
op(0x1a, INVALID);
op(0x1b, INVALID);
op(0x1c, INVALID);
vu(0x16, VZERO, VD, VS, VT); //VADDB
vu(0x17, VZERO, VD, VS, VT); //VSUBB
vu(0x18, VZERO, VD, VS, VT); //VACCB
vu(0x19, VZERO, VD, VS, VT); //VSUCB
vu(0x1a, VZERO, VD, VS, VT); //VSAD
vu(0x1b, VZERO, VD, VS, VT); //VSAC
vu(0x1c, VZERO, VD, VS, VT); //VSUM
vu(0x1d, VSAR, VD, VS);
op(0x1e, INVALID);
op(0x1f, INVALID);
vu(0x1e, VZERO, VD, VS, VT);
vu(0x1f, VZERO, VD, VS, VT);
vu(0x20, VLT, VD, VS, VT);
vu(0x21, VEQ, VD, VS, VT);
vu(0x22, VNE, VD, VS, VT);
@ -307,8 +307,8 @@ auto RSP::decoderVU() -> void {
vu(0x2b, VNOR, VD, VS, VT);
vu(0x2c, VXOR, VD, VS, VT);
vu(0x2d, VNXOR, VD, VS, VT);
op(0x2e, INVALID);
op(0x2f, INVALID);
vu(0x2e, VZERO, VD, VS, VT);
vu(0x2f, VZERO, VD, VS, VT);
vu(0x30, VRCP, VD, DE, VT);
vu(0x31, VRCPL, VD, DE, VT);
vu(0x32, VRCPH, VD, DE, VT);
@ -317,14 +317,14 @@ auto RSP::decoderVU() -> void {
vu(0x35, VRSQL, VD, DE, VT);
vu(0x36, VRSQH, VD, DE, VT);
op(0x37, VNOP);
op(0x38, INVALID);
op(0x39, INVALID);
op(0x3a, INVALID);
op(0x3b, INVALID);
op(0x3c, INVALID);
op(0x3d, INVALID);
op(0x3e, INVALID);
op(0x3f, INVALID);
vu(0x38, VZERO, VD, VS, VT); //VEXTT
vu(0x39, VZERO, VD, VS, VT); //VEXTQ
vu(0x3a, VZERO, VD, VS, VT); //VEXTN
vu(0x3b, VZERO, VD, VS, VT);
vu(0x3c, VZERO, VD, VS, VT); //VINST
vu(0x3d, VZERO, VD, VS, VT); //VINSQ
vu(0x3e, VZERO, VD, VS, VT); //VINSN
op(0x3f, VNOP); //VNULL
}
#undef E
#undef DE

View File

@ -1,38 +1,39 @@
auto RSP::readWord(u32 address) -> u32 {
if(address <= 0x0403'ffff) {
if(address & 0x1000) return imem.read<Word>(address);
else return dmem.read<Word>(address);
}
return ioRead(address);
}
auto RSP::ioRead(u32 address) -> u32 {
address = (address & 0x3ffff) >> 2;
n32 data;
if(address == 0) {
//SP_PBUS_ADDRESS
data.bit( 0,11) = dma.pbusAddress;
data.bit(12) = dma.pbusRegion;
data.bit( 0,11) = dma.current.pbusAddress;
data.bit(12) = dma.current.pbusRegion;
}
if(address == 1) {
//SP_DRAM_ADDRESS
data.bit(0,23) = dma.dramAddress;
data.bit(0,23) = dma.current.dramAddress;
}
if(address == 2) {
//SP_READ_LENGTH
data.bit( 0,11) = dma.read.length;
data.bit(12,19) = dma.read.count;
data.bit(20,31) = dma.read.skip;
}
if(address == 3) {
//SP_WRITE_LENGTH
data.bit( 0,11) = dma.write.length;
data.bit(12,19) = dma.write.count;
data.bit(20,31) = dma.write.skip;
if(address == 2 || address == 3) {
//SP_READ_LENGTH or SP_WRITE_LENGTH
data.bit( 0,11) = dma.current.length;
data.bit(12,19) = dma.current.count;
data.bit(20,31) = dma.current.skip;
}
if(address == 4) {
//SP_STATUS
data.bit( 0) = status.halted;
data.bit( 1) = status.broken;
data.bit( 2) = !dma.requests.empty();
data.bit( 3) = dma.requests.full();
data.bit( 2) = dma.busy.any();
data.bit( 3) = dma.full.any();
data.bit( 4) = status.full;
data.bit( 5) = status.singleStep;
data.bit( 6) = status.interruptOnBreak;
@ -48,12 +49,12 @@ auto RSP::readWord(u32 address) -> u32 {
if(address == 5) {
//SP_DMA_FULL
data.bit(0) = dma.requests.full();
data.bit(0) = dma.full.any();
}
if(address == 6) {
//SP_DMA_BUSY
data.bit(0) = !dma.requests.empty();
data.bit(0) = dma.busy.any();
}
if(address == 7) {
@ -66,86 +67,77 @@ auto RSP::readWord(u32 address) -> u32 {
return data;
}
auto RSP::writeWord(u32 address, u32 data_) -> void {
auto RSP::writeWord(u32 address, u32 data) -> void {
if(address <= 0x0403'ffff) {
if(address & 0x1000) return recompiler.invalidate(), imem.write<Word>(address, data);
else return dmem.write<Word>(address, data);
}
return ioWrite(address, data);
}
auto RSP::ioWrite(u32 address, u32 data_) -> void {
address = (address & 0x3ffff) >> 2;
n32 data = data_;
if(address == 0) {
//SP_PBUS_ADDRESS
dma.pbusAddress = data.bit( 0,11);
dma.pbusRegion = data.bit(12);
dma.pending.pbusAddress.bit(3,11) = data.bit( 3,11);
dma.pending.pbusRegion = data.bit(12);
}
if(address == 1) {
//SP_DRAM_ADDRESS
dma.dramAddress = data.bit(0,23);
dma.pending.dramAddress.bit(3,23) = data.bit(3,23);
}
if(address == 2) {
//SP_READ_LENGTH
dma.read.length = data.bit( 0,11);
dma.read.count = data.bit(12,19);
dma.read.skip = data.bit(20,31);
if(!dma.requests.full()) {
DMA::Request request;
request.type = DMA::Request::Type::Read;
request.pbusRegion = dma.pbusRegion;
request.pbusAddress = dma.pbusAddress & ~7;
request.dramAddress = dma.dramAddress & ~7;
request.length = 1 + (dma.read.length | 7);
request.count = 1 + (dma.read.count);
request.skip = dma.read.skip & ~7;
dma.requests.write(request);
queue.insert(Queue::RSP_DMA, request.length * request.count / 4);
}
dma.pending.length.bit(3,11) = data.bit( 3,11);
dma.pending.count = data.bit(12,19);
dma.pending.skip.bit(3,11) = data.bit(23,31);
dma.full.read = 1;
dma.full.write = 0;
dmaTransferStart();
}
if(address == 3) {
//SP_WRITE_LENGTH
dma.write.length = data.bit( 0,11);
dma.write.count = data.bit(12,19);
dma.write.skip = data.bit(20,31);
if(!dma.requests.full()) {
DMA::Request request;
request.type = DMA::Request::Type::Write;
request.pbusRegion = dma.pbusRegion;
request.pbusAddress = dma.pbusAddress & ~7;
request.dramAddress = dma.dramAddress & ~7;
request.length = 1 + (dma.write.length | 7);
request.count = 1 + (dma.write.count);
request.skip = dma.write.skip & ~7;
dma.requests.write(request);
queue.insert(Queue::RSP_DMA, request.length * request.count / 4);
}
dma.pending.length.bit(3,11) = data.bit( 3,11);
dma.pending.count = data.bit(12,19);
dma.pending.skip.bit(3,11) = data.bit(23,31);
dma.full.write = 1;
dma.full.read = 0;
dmaTransferStart();
}
if(address == 4) {
//SP_STATUS
if(data.bit( 0)) status.halted = 0;
if(data.bit( 1)) status.halted = 1;
if(data.bit( 0) && !data.bit( 1)) status.halted = 0;
if(data.bit( 1) && !data.bit( 0)) status.halted = 1;
if(data.bit( 2)) status.broken = 0;
if(data.bit( 3)) mi.lower(MI::IRQ::SP);
if(data.bit( 4)) mi.raise(MI::IRQ::SP);
if(data.bit( 5)) status.singleStep = 0;
if(data.bit( 6)) status.singleStep = 1;
if(data.bit( 7)) status.interruptOnBreak = 0;
if(data.bit( 8)) status.interruptOnBreak = 1;
if(data.bit( 9)) status.signal[0] = 0;
if(data.bit(10)) status.signal[0] = 1;
if(data.bit(11)) status.signal[1] = 0;
if(data.bit(12)) status.signal[1] = 1;
if(data.bit(13)) status.signal[2] = 0;
if(data.bit(14)) status.signal[2] = 1;
if(data.bit(15)) status.signal[3] = 0;
if(data.bit(16)) status.signal[3] = 1;
if(data.bit(17)) status.signal[4] = 0;
if(data.bit(18)) status.signal[4] = 1;
if(data.bit(19)) status.signal[5] = 0;
if(data.bit(20)) status.signal[5] = 1;
if(data.bit(21)) status.signal[6] = 0;
if(data.bit(22)) status.signal[6] = 1;
if(data.bit(23)) status.signal[7] = 0;
if(data.bit(24)) status.signal[7] = 1;
if(data.bit( 3) && !data.bit( 4)) mi.lower(MI::IRQ::SP);
if(data.bit( 4) && !data.bit( 3)) mi.raise(MI::IRQ::SP);
if(data.bit( 5) && !data.bit( 6)) status.singleStep = 0;
if(data.bit( 6) && !data.bit( 5)) status.singleStep = 1;
if(data.bit( 7) && !data.bit( 8)) status.interruptOnBreak = 0;
if(data.bit( 8) && !data.bit( 7)) status.interruptOnBreak = 1;
if(data.bit( 9) && !data.bit(10)) status.signal[0] = 0;
if(data.bit(10) && !data.bit( 9)) status.signal[0] = 1;
if(data.bit(11) && !data.bit(12)) status.signal[1] = 0;
if(data.bit(12) && !data.bit(11)) status.signal[1] = 1;
if(data.bit(13) && !data.bit(14)) status.signal[2] = 0;
if(data.bit(14) && !data.bit(13)) status.signal[2] = 1;
if(data.bit(15) && !data.bit(16)) status.signal[3] = 0;
if(data.bit(16) && !data.bit(15)) status.signal[3] = 1;
if(data.bit(17) && !data.bit(18)) status.signal[4] = 0;
if(data.bit(18) && !data.bit(17)) status.signal[4] = 1;
if(data.bit(19) && !data.bit(20)) status.signal[5] = 0;
if(data.bit(20) && !data.bit(19)) status.signal[5] = 1;
if(data.bit(21) && !data.bit(22)) status.signal[6] = 0;
if(data.bit(22) && !data.bit(21)) status.signal[6] = 1;
if(data.bit(23) && !data.bit(24)) status.signal[7] = 0;
if(data.bit(24) && !data.bit(23)) status.signal[7] = 1;
}
if(address == 5) {
@ -158,7 +150,7 @@ auto RSP::writeWord(u32 address, u32 data_) -> void {
if(address == 7) {
//SP_SEMAPHORE
if(!data.bit(0)) status.semaphore = 0;
status.semaphore = 0;
}
debugger.ioSCC(Write, address, data);

View File

@ -272,7 +272,16 @@ auto RSP::Recompiler::emitEXECUTE(u32 instruction) -> bool {
}
//INVALID
case 0x26 ... 0x27: {
case 0x26: {
return 0;
}
//LWU Rt,Rs,i16
case 0x27: {
lea(reg(1), Rt);
lea(reg(2), Rs);
mov32(reg(3), imm(i16));
call(&RSP::LWU);
return 0;
}
@ -795,9 +804,13 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool {
return 0;
}
//INVALID
//VSUT (broken)
case 0x12: {
return 0;
lea(reg(1), Vd);
lea(reg(2), Vs);
lea(reg(3), Vt);
callvu(&RSP::VZERO);
return 0;
}
//VABS Vd,Vs,Vt(e)
@ -827,9 +840,13 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool {
return 0;
}
//INVALID
//Broken opcodes: VADDB, VSUBB, VACCB, VSUCB, VSAD, VSAC, VSUM
case 0x16 ... 0x1c: {
return 0;
lea(reg(1), Vd);
lea(reg(2), Vs);
lea(reg(3), Vt);
callvu(&RSP::VZERO);
return 0;
}
//VSAR Vd,Vs,E
@ -840,9 +857,13 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool {
return 0;
}
//INVALID
//Invalid opcodes
case 0x1e ... 0x1f: {
return 0;
lea(reg(1), Vd);
lea(reg(2), Vs);
lea(reg(3), Vt);
callvu(&RSP::VZERO);
return 0;
}
//VLT Vd,Vs,Vt(e)
@ -973,6 +994,10 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool {
//INVALID
case 0x2e ... 0x2f: {
lea(reg(1), Vd);
lea(reg(2), Vs);
lea(reg(3), Vt);
callvu(&RSP::VZERO);
return 0;
}
@ -1042,10 +1067,39 @@ auto RSP::Recompiler::emitVU(u32 instruction) -> bool {
//VNOP
case 0x37: {
call(&RSP::VNOP);
return 0;
}
//Broken opcodes: VEXTT, VEXTQ, VEXTN
case 0x38 ... 0x3a: {
lea(reg(1), Vd);
lea(reg(2), Vs);
lea(reg(3), Vt);
callvu(&RSP::VZERO);
return 0;
}
//INVALID
case 0x38 ... 0x3f: {
case 0x3b: {
lea(reg(1), Vd);
lea(reg(2), Vs);
lea(reg(3), Vt);
callvu(&RSP::VZERO);
return 0;
}
//Broken opcodes: VINST, VINSQ, VINSN
case 0x3c ... 0x3e: {
lea(reg(1), Vd);
lea(reg(2), Vs);
lea(reg(3), Vt);
callvu(&RSP::VZERO);
return 0;
}
//VNULL
case 0x3f: {
call(&RSP::VNOP);
return 0;
}

View File

@ -119,6 +119,10 @@ auto RSP::power(bool reset) -> void {
recompiler.allocator.resize(64_MiB, bump_allocator::executable | bump_allocator::zero_fill, buffer);
recompiler.reset();
}
if constexpr(Accuracy::RSP::SISD) {
platform->status("RSP vectorization disabled (no SSE 4.1 support)");
}
}
}

View File

@ -43,39 +43,36 @@ struct RSP : Thread, Memory::IO<RSP> {
} pipeline;
//dma.cpp
auto dmaTransfer() -> void;
auto dmaTransferStart() -> void;
auto dmaTransferStep() -> void;
//io.cpp
auto readWord(u32 address) -> u32;
auto writeWord(u32 address, u32 data) -> void;
auto ioRead(u32 address) -> u32;
auto ioWrite(u32 address, u32 data) -> void;
//serialization.cpp
auto serialize(serializer&) -> void;
struct DMA {
n1 pbusRegion;
n12 pbusAddress;
n24 dramAddress;
struct Transfer {
n12 length;
n12 skip;
n8 count;
} read, write;
struct Request {
//serialization.cpp
auto serialize(serializer&) -> void;
enum class Type : u32 { Read, Write } type;
struct Regs {
n1 pbusRegion;
n12 pbusAddress;
n24 dramAddress;
n16 length;
n16 skip;
n16 count;
};
nall::queue<Request[2]> requests;
n12 length;
n12 skip;
n8 count;
auto serialize(serializer&) -> void;
} pending, current;
struct Status {
n1 read;
n1 write;
auto any() -> n1 { return read | write; }
} busy, full;
} dma;
struct Status : Memory::IO<Status> {
@ -111,7 +108,7 @@ struct RSP : Thread, Memory::IO<RSP> {
};
r32 r[32];
u32 pc;
u12 pc;
} ipu;
struct Branch {
@ -119,10 +116,10 @@ struct RSP : Thread, Memory::IO<RSP> {
auto inDelaySlot() const -> bool { return state == DelaySlot; }
auto reset() -> void { state = Step; }
auto take(u32 address) -> void { state = Take; pc = address; }
auto take(u12 address) -> void { state = Take; pc = address; }
auto delaySlot() -> void { state = DelaySlot; }
u64 pc = 0;
u12 pc = 0;
u32 state = Step;
} branch;
@ -150,6 +147,7 @@ struct RSP : Thread, Memory::IO<RSP> {
auto LHU(r32& rt, cr32& rs, s16 imm) -> void;
auto LUI(r32& rt, u16 imm) -> void;
auto LW(r32& rt, cr32& rs, s16 imm) -> void;
auto LWU(r32& rt, cr32& rs, s16 imm) -> void;
auto NOR(r32& rd, cr32& rs, cr32& rt) -> void;
auto OR(r32& rd, cr32& rs, cr32& rt) -> void;
auto ORI(r32& rt, cr32& rs, u16 imm) -> void;
@ -308,6 +306,7 @@ struct RSP : Thread, Memory::IO<RSP> {
template<u8 e> auto VSUB(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VSUBC(r128& vd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VXOR(r128& rd, cr128& vs, cr128& vt) -> void;
template<u8 e> auto VZERO(r128& rd, cr128& vs, cr128& vt) -> void;
//unserialized:
u16 reciprocals[512];

View File

@ -6,16 +6,12 @@ auto RSP::serialize(serializer& s) -> void {
s(pipeline.address);
s(pipeline.instruction);
s(dma.pbusRegion);
s(dma.pbusAddress);
s(dma.dramAddress);
s(dma.read.length);
s(dma.read.skip);
s(dma.read.count);
s(dma.write.length);
s(dma.write.skip);
s(dma.write.count);
s(dma.requests);
s(dma.pending);
s(dma.current);
s(dma.busy.read);
s(dma.busy.write);
s(dma.full.read);
s(dma.full.write);
s(status.semaphore);
s(status.halted);
@ -49,8 +45,7 @@ auto RSP::serialize(serializer& s) -> void {
}
}
auto RSP::DMA::Request::serialize(serializer& s) -> void {
s((u32&)type);
auto RSP::DMA::Regs::serialize(serializer& s) -> void {
s(pbusRegion);
s(pbusAddress);
s(dramAddress);

View File

@ -1,8 +1,8 @@
auto SI::dmaRead() -> void {
run();
for(u32 offset = 0; offset < 64; offset += 2) {
u16 data = bus.read<Half>(io.readAddress + offset);
bus.write<Half>(io.dramAddress + offset, data);
pif.run();
for(u32 offset = 0; offset < 64; offset += 4) {
u32 data = pif.readWord(io.readAddress + offset);
rdram.ram.write<Word>(io.dramAddress + offset, data);
}
io.dmaBusy = 0;
io.interrupt = 1;
@ -10,12 +10,12 @@ auto SI::dmaRead() -> void {
}
auto SI::dmaWrite() -> void {
for(u32 offset = 0; offset < 64; offset += 2) {
u16 data = bus.read<Half>(io.dramAddress + offset);
bus.write<Half>(io.writeAddress + offset, data);
for(u32 offset = 0; offset < 64; offset += 4) {
u32 data = rdram.ram.read<Word>(io.dramAddress + offset);
pif.writeWord(io.writeAddress + offset, data);
}
io.dmaBusy = 0;
io.interrupt = 1;
mi.raise(MI::IRQ::SI);
run();
pif.run();
}

View File

@ -26,366 +26,6 @@ auto SI::unload() -> void {
node.reset();
}
auto SI::addressCRC(u16 address) const -> n5 {
n5 crc = 0;
for(u32 i : range(16)) {
n5 xor = crc & 0x10 ? 0x15 : 0x00;
crc <<= 1;
if(address & 0x8000) crc |= 1;
address <<= 1;
crc ^= xor;
}
return crc;
}
auto SI::dataCRC(array_view<u8> data) const -> n8 {
n8 crc = 0;
for(u32 i : range(33)) {
for(u32 j : reverse(range(8))) {
n8 xor = crc & 0x80 ? 0x85 : 0x00;
crc <<= 1;
if(i < 32) {
if(data[i] & 1 << j) crc |= 1;
}
crc ^= xor;
}
}
return crc;
}
auto SI::run() -> void {
auto flags = pi.ram.read<Byte>(0x3f);
//controller polling
if(flags & 0x01) {
//todo: this flag is supposed to be cleared, but doing so breaks inputs
//flags &= ~0x01;
scan();
}
//CIC-NUS-6105 challenge/response
if(flags & 0x02) {
flags &= ~0x02;
challenge();
}
//unknown purpose
if(flags & 0x04) {
flags &= ~0x04;
debug(unimplemented, "[SI::main] flags & 0x04");
}
//must be sent within 5s of the console booting, or SM5 will lock the N64
if(flags & 0x08) {
flags &= ~0x08;
}
//PIF ROM lockout
if(flags & 0x10) {
flags &= ~0x10;
pi.io.romLockout = 1;
}
//initialization
if(flags & 0x20) {
flags &= ~0x20;
flags |= 0x80; //set completion flag
}
//clear PIF RAM
if(flags & 0x40) {
flags &= ~0x40;
pi.ram.fill();
}
pi.ram.write<Byte>(0x3f, flags);
}
auto SI::scan() -> void {
ControllerPort* controllers[4] = {
&controllerPort1,
&controllerPort2,
&controllerPort3,
&controllerPort4,
};
static constexpr bool Debug = 0;
if constexpr(Debug) {
print("{\n");
for(u32 y : range(8)) {
print(" ");
for(u32 x : range(8)) {
print(hex(pi.ram.read<Byte>(y * 8 + x), 2L), " ");
}
print("\n");
}
print("}\n");
}
n3 channel = 0; //0-5
for(u32 offset = 0; offset < 64;) {
n8 send = pi.ram.read<Byte>(offset++);
if(send == 0x00) { channel++; continue; }
if(send == 0xfd) continue; //channel reset
if(send == 0xfe) break; //end of packets
if(send == 0xff) continue; //alignment padding
n8 recvOffset = offset;
n8 recv = pi.ram.read<Byte>(offset++);
if(recv == 0xfe) break; //end of packets
//clear flags from lengths
send &= 0x3f;
recv &= 0x3f;
n8 input[64];
for(u32 index : range(send)) {
input[index] = pi.ram.read<Byte>(offset++);
}
n8 output[64];
b1 valid = 0;
//status
if(input[0] == 0x00 || input[0] == 0xff) {
//controller
if(channel < 4) {
if(auto& device = controllers[channel]->device) {
if(auto gamepad = dynamic_cast<Gamepad*>(device.data())) {
output[0] = 0x05; //0x05 = gamepad
output[1] = 0x00;
output[2] = 0x02; //0x02 = nothing present in controller slot
if(gamepad->ram || gamepad->motor || gamepad->transferPak) {
output[2] = 0x01; //0x01 = pak present
}
}
if(dynamic_cast<Mouse*>(device.data())) {
output[0] = 0x02; //0x02 = mouse
output[1] = 0x00;
output[2] = 0x00;
}
valid = 1;
}
}
//cartridge EEPROM (4kbit)
if(channel >= 4 && cartridge.eeprom.size == 512) {
output[0] = 0x00;
output[1] = 0x80;
output[2] = 0x00;
valid = 1;
}
//cartridge EEPROM (16kbit)
if(channel >= 4 && cartridge.eeprom.size == 2048) {
output[0] = 0x00;
output[1] = 0xc0;
output[2] = 0x00;
valid = 1;
}
}
//read controller state
if(input[0] == 0x01) {
if(channel < 4 && controllers[channel]->device) {
u32 data = controllers[channel]->device->read();
output[0] = data >> 24;
output[1] = data >> 16;
output[2] = data >> 8;
output[3] = data >> 0;
if(recv <= 4) {
pi.ram.write<Byte>(recvOffset, 0x00 | recv & 0x3f);
} else {
pi.ram.write<Byte>(recvOffset, 0x40 | recv & 0x3f);
}
valid = 1;
}
}
//read pak
if(input[0] == 0x02 && send >= 3 && recv >= 1) {
if(auto& device = controllers[channel]->device) {
if(auto gamepad = dynamic_cast<Gamepad*>(device.data())) {
//controller pak
if(auto& ram = gamepad->ram) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(addressCRC(address) == (n5)input[2]) {
for(u32 index : range(recv - 1)) {
output[index] = ram.read<Byte>(address++);
}
output[recv - 1] = dataCRC({&output[0], recv - 1});
valid = 1;
}
}
//rumble pak
if(gamepad->motor) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(addressCRC(address) == (n5)input[2]) {
for(u32 index : range(recv - 1)) {
output[index] = 0x80;
}
output[recv - 1] = dataCRC({&output[0], recv - 1});
valid = 1;
}
}
//transfer pak
if(auto& transferPak = gamepad->transferPak) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(addressCRC(address) == (n5)input[2]) {
for(u32 index : range(recv - 1)) {
output[index] = transferPak.read(address++);
}
output[recv - 1] = dataCRC({&output[0], recv - 1});
valid = 1;
}
}
}
}
}
//write pak
if(input[0] == 0x03 && send >= 3 && recv >= 1) {
if(auto& device = controllers[channel]->device) {
if(auto gamepad = dynamic_cast<Gamepad*>(device.data())) {
//controller pak
if(auto& ram = gamepad->ram) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(addressCRC(address) == (n5)input[2]) {
for(u32 index : range(send - 3)) {
ram.write<Byte>(address++, input[3 + index]);
}
output[0] = dataCRC({&input[3], send - 3});
valid = 1;
}
}
//rumble pak
if(gamepad->motor) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(addressCRC(address) == (n5)input[2]) {
output[0] = dataCRC({&input[3], send - 3});
valid = 1;
gamepad->rumble(input[3] & 1);
}
}
//transfer pak
if(auto& transferPak = gamepad->transferPak) {
u32 address = (input[1] << 8 | input[2] << 0) & ~31;
if(addressCRC(address) == (n5)input[2]) {
for(u32 index : range(send - 3)) {
transferPak.write(address++, input[3 + index]);
}
output[0] = dataCRC({&input[3], send - 3});
valid = 1;
}
}
}
}
}
//read EEPROM
if(input[0] == 0x04 && send >= 2) {
u32 address = input[1] * 8;
for(u32 index : range(recv)) {
output[index] = cartridge.eeprom.read<Byte>(address++);
}
valid = 1;
}
//write EEPROM
if(input[0] == 0x05 && send >= 2 && recv >= 1) {
u32 address = input[1] * 8;
for(u32 index : range(send - 2)) {
cartridge.eeprom.write<Byte>(address++, input[2 + index]);
}
output[0] = 0x00;
valid = 1;
}
//RTC status
if(input[0] == 0x06) {
debug(unimplemented, "[SI::main] RTC status");
}
//RTC read
if(input[0] == 0x07) {
debug(unimplemented, "[SI::main] RTC read");
}
//RTC write
if(input[0] == 0x08) {
debug(unimplemented, "[SI::main] RTC write");
}
if(!valid) {
pi.ram.write<Byte>(recvOffset, 0x80 | recv & 0x3f);
}
for(u32 index : range(recv)) {
pi.ram.write<Byte>(offset++, output[index]);
}
channel++;
}
if constexpr(Debug) {
print("[\n");
for(u32 y : range(8)) {
print(" ");
for(u32 x : range(8)) {
print(hex(pi.ram.read<Byte>(y * 8 + x), 2L), " ");
}
print("\n");
}
print("]\n");
}
}
//CIC-NUS-6105 anti-piracy challenge/response
auto SI::challenge() -> void {
static n4 lut[32] = {
0x4, 0x7, 0xa, 0x7, 0xe, 0x5, 0xe, 0x1,
0xc, 0xf, 0x8, 0xf, 0x6, 0x3, 0x6, 0x9,
0x4, 0x1, 0xa, 0x7, 0xe, 0x5, 0xe, 0x1,
0xc, 0x9, 0x8, 0x5, 0x6, 0x3, 0xc, 0x9,
};
n4 challenge[30];
n4 response[30];
//15 bytes -> 30 nibbles
for(u32 address : range(15)) {
auto data = pi.ram.read<Byte>(0x30 + address);
challenge[address << 1 | 0] = data >> 4;
challenge[address << 1 | 1] = data >> 0;
}
n4 key = 0xb;
n1 sel = 0;
for(u32 address : range(30)) {
n4 data = key + 5 * challenge[address];
response[address] = data;
key = lut[sel << 4 | data];
n1 mod = data >> 3;
n3 mag = data >> 0;
if(mod) mag = ~mag;
if(mag % 3 != 1) mod = !mod;
if(sel) {
if(data == 0x1 || data == 0x9) mod = 1;
if(data == 0xb || data == 0xe) mod = 0;
}
sel = mod;
}
//30 nibbles -> 15 bytes
for(u32 address : range(15)) {
n8 data = 0;
data |= response[address << 1 | 0] << 4;
data |= response[address << 1 | 1] << 0;
pi.ram.write<Byte>(0x30 + address, data);
}
}
auto SI::power(bool reset) -> void {
io = {};
}

View File

@ -16,11 +16,6 @@ struct SI : Memory::IO<SI> {
//si.cpp
auto load(Node::Object) -> void;
auto unload() -> void;
auto addressCRC(u16 address) const -> n5;
auto dataCRC(array_view<u8> data) const -> n8;
auto run() -> void;
auto scan() -> void;
auto challenge() -> void;
auto power(bool reset) -> void;
//dma.cpp

View File

@ -46,6 +46,7 @@ auto System::serialize(serializer& s, bool synchronize) -> void {
s(vi);
s(ai);
s(pi);
s(pif);
s(ri);
s(si);
s(cpu);

View File

@ -42,7 +42,7 @@ auto System::game() -> string {
auto System::run() -> void {
while(!vi.refreshed) cpu.main();
vi.refreshed = false;
if (!pi.io.romLockout) si.run();
if (!pif.io.romLockout) pif.run();
}
auto System::load(Node::System& root, string name) -> bool {
@ -80,6 +80,7 @@ auto System::load(Node::System& root, string name) -> bool {
vi.load(node);
ai.load(node);
pi.load(node);
pif.load(node);
ri.load(node);
si.load(node);
cpu.load(node);
@ -108,6 +109,7 @@ auto System::unload() -> void {
vi.unload();
ai.unload();
pi.unload();
pif.unload();
ri.unload();
si.unload();
cpu.unload();
@ -143,6 +145,7 @@ auto System::power(bool reset) -> void {
vi.power(reset);
ai.power(reset);
pi.power(reset);
pif.power(reset);
ri.power(reset);
si.power(reset);
cpu.power(reset);

View File

@ -40,7 +40,7 @@ auto VI::load(Node::Object parent) -> void {
return a << 48 | r << 32 | g << 16 | b << 0;
}
});
#if defined(VULKAN)
if(vulkan.enable) {
screen->setSize(vulkan.outputUpscale * 640, vulkan.outputUpscale * 480);
@ -153,12 +153,6 @@ auto VI::refresh() -> void {
u32 height = vi.io.yscale <= 0x400 ? 239 : 478;
screen->setViewport(0, 0, width, height);
if(vi.io.colorDepth == 0 || io.dramAddress == 0 || (signed)(vi.io.hend - vi.io.hstart) <= 0 || vi.io.hstart >= 640) {
//blank screen
memory::fill<u32>(screen->pixels(1).data(), 640 * 576);
return;
}
if(vi.io.colorDepth == 2) {
//15bpp
for(u32 y : range(height)) {

View File

@ -39,10 +39,17 @@ endif
# common commands
ifeq ($(shell echo ^^),^)
# cmd
delete = $(info Deleting $1 ...) @del /q $(subst /,\,$1)
rdelete = $(info Deleting $1 ...) @del /s /q $(subst /,\,$1) && if exist $(subst /,\,$1) (rmdir /s /q $(subst /,\,$1))
fixpath = $(subst /,\\,$1)
mkdir = @if not exist $(call fixpath,$1) (mkdir $(call fixpath,$1))
copy = @copy $(call fixpath,$1) $(call fixpath,$2)
rcopy = @xcopy /e /q /y $(call fixpath,$1) $(call fixpath,$2)
delete = $(info Deleting $1 ...) @del /q $(call fixpath,$1)
rdelete = $(info Deleting $1 ...) @if exist $(call fixpath,$1) (rmdir /s /q $(call fixpath,$1))
else
# sh
mkdir = @mkdir -p $1
copy = @cp $1 $2
rcopy = @cp -R $1 $2
delete = $(info Deleting $1 ...) @rm -f $1
rdelete = $(info Deleting $1 ...) @rm -rf $1
endif
@ -92,16 +99,12 @@ ifeq ($(build),debug)
symbols = true
flags += -Og -DBUILD_DEBUG
else ifeq ($(build),stable)
lto = true
flags += -O1 -DBUILD_STABLE
else ifeq ($(build),minified)
lto = true
flags += -Os -DBUILD_MINIFIED
else ifeq ($(build),release)
lto = true
flags += -O2 -DBUILD_RELEASE
else ifeq ($(build),optimized)
lto = true
flags += -O3 -fomit-frame-pointer -DBUILD_OPTIMIZED
else
$(error unrecognized build type.)
@ -211,6 +214,12 @@ nall.verbose:
$(info Compiling $(subst ../,,$<) ...)
@$(call compile)
$(object.path):
$(call mkdir,$(object.path))
$(output.path):
$(call mkdir,$(output.path))
# function compile([arguments])
compile = \
$(strip \

View File

@ -0,0 +1,247 @@
#pragma once
#include <nall/file.hpp>
#include <nall/maybe.hpp>
#include <nall/string.hpp>
//#include <libchdr/chd.h>
namespace nall::Decode {
struct CHD {
~CHD();
struct Index {
auto sectorCount() const -> u32;
u8 number = 0xff; //00-99
s32 lba = -1;
s32 end = -1;
s32 chd_lba = -1;
};
struct Track {
auto sectorCount() const -> u32;
u8 number = 0xff; //01-99
string type;
vector<Index> indices;
maybe<s32> pregap;
maybe<s32> postgap;
};
auto load(const string& location) -> bool;
auto read(u32 sector) -> vector<u8>;
auto sectorCount() const -> u32;
vector<Track> tracks;
private:
file_buffer fp;
//chd_file* chd = nullptr;
const int chd_sector_size = 2352 + 96;
size_t chd_hunk_size;
vector<u8> chd_hunk_buffer;
int chd_current_hunk = -1;
};
inline CHD::~CHD() {
/*if (chd != nullptr) {
chd_close(chd);
}*/
}
inline auto CHD::load(const string& location) -> bool {
fp = file::open(location, file::mode::read);
if(!fp) {
print("CHD: Failed to open ", location, "\n");
return false;
}
return false;
/*chd_error err = chd_open_file(fp.handle(), CHD_OPEN_READ, nullptr, &chd);
if (err != CHDERR_NONE) {
print("CHD: Failed to open ", location, ": ", chd_error_string(err), "\n");
return false;
}
const chd_header* header = chd_get_header(chd);
chd_hunk_size = header->hunkbytes;
if ((chd_hunk_size % chd_sector_size) != 0) {
print("CHD: hunk size (", chd_hunk_size, ") is not a multiple of ", chd_sector_size, "\n");
return false;
}
chd_hunk_buffer.resize(chd_hunk_size);
u32 disc_lba = 0;
u32 chd_lba = 0;
// Fetch track structure
while(true) {
char metadata[256];
char type[256];
char subtype[256];
char pgtype[256];
char pgsub[256];
u32 metadata_size;
int track_no;
int frames;
int pregap_frames;
int postgap_frames;
// First, attempt to fetch CDROMv2 metadata
err = chd_get_metadata(chd, CDROM_TRACK_METADATA2_TAG, tracks.size(), metadata, sizeof(metadata), &metadata_size, nullptr, nullptr);
if (err == CHDERR_NONE) {
if (std::sscanf(metadata, CDROM_TRACK_METADATA2_FORMAT, &track_no, type, subtype, &frames, &pregap_frames, pgtype, pgsub, &postgap_frames) != 8) {
print("CHD: Invalid track v2 metadata: ", metadata, "\n");
return false;
}
} else {
// That failed, so try to fetch CDROM (old) metadata
err = chd_get_metadata(chd, CDROM_TRACK_METADATA_TAG, tracks.size(), metadata, sizeof(metadata), &metadata_size, nullptr, nullptr);
if (err != CHDERR_NONE) {
// Both meta-data types failed to fetch, so assume there are no further tracks
break;
}
if (std::sscanf(metadata, CDROM_TRACK_METADATA_FORMAT, &track_no, type, subtype, &frames) != 4) {
print("CHD: Invalid track metadata: ", metadata, "\n");
return false;
}
}
// We currently only support RAW and audio tracks; log an error and exit if we see anything different
auto typeStr = string{type};
if (!(typeStr.find("_RAW") || typeStr.find("AUDIO"))) {
print("CHD: Unsupported track type: ", type, "\n");
return false;
}
// Ensure two second pregap is present
const bool pregap_in_file = (pregap_frames > 0 && pgtype[0] == 'V');
if (pregap_frames <= 0 && typeStr != "AUDIO") {
pregap_frames = 2 * 75;
}
// Add the new track
Track track;
track.number = track_no;
track.type = type;
track.pregap = pregap_frames;
track.postgap = postgap_frames;
// Pregap
if (pregap_frames > 0) {
Index index;
index.number = 0;
index.lba = disc_lba;
index.end = disc_lba + pregap_frames - 1;
if (pregap_in_file) {
if (pregap_frames > frames) {
print("CHD: pregap length ", pregap_frames, " exceeds track length ", frames, "\n");
return false;
}
index.chd_lba = chd_lba;
chd_lba += pregap_frames;
frames -= pregap_frames;
}
disc_lba += pregap_frames;
track.indices.append(index);
}
// index1 = track data
{
Index index;
index.number = 1;
index.lba = disc_lba;
index.end = disc_lba + frames - 1;
index.chd_lba = chd_lba;
track.indices.append(index);
disc_lba += frames;
chd_lba += frames;
// chdman pads each track to a 4-frame boundary
chd_lba = (chd_lba + 3) / 4 * 4;
}
// index2 = postgap
if (postgap_frames > 0) {
Index index;
index.number = 2;
index.lba = disc_lba;
index.end = disc_lba + postgap_frames - 1;
track.indices.append(index);
disc_lba += postgap_frames;
}
tracks.append(track);
}
return true;*/
}
inline auto CHD::read(u32 sector) -> vector<u8> {
// Convert LBA in CD-ROM to LBA in CHD
/*for(auto& track : tracks) {
for(auto& index : track.indices) {
if (sector >= index.lba && sector <= index.end) {
auto chd_lba = (sector - index.lba) + index.chd_lba;
vector<u8> output;
output.resize(2352);
int hunk = (chd_lba * chd_sector_size) / chd_hunk_size;
int offset = (chd_lba * chd_sector_size) % chd_hunk_size;
if (hunk != chd_current_hunk) {
chd_read(chd, hunk, chd_hunk_buffer.data());
chd_current_hunk = hunk;
}
// Audio data is in big-endian, so we need to byteswap
if (track.type == "AUDIO") {
u8* src_ptr = chd_hunk_buffer.data() + offset;
u8* dst_ptr = output.data();
const int value_count = 2352 / sizeof(uint16_t);
for (int i = 0; i < value_count; i++) {
u16 value;
memcpy(&value, src_ptr, sizeof(value));
value = (value << 8) | (value >> 8);
memcpy(dst_ptr, &value, sizeof(value));
src_ptr += sizeof(value);
dst_ptr += sizeof(value);
}
} else {
std::copy(chd_hunk_buffer.data() + offset, chd_hunk_buffer.data() + offset + 2352, output.data());
}
return output;
}
}
}
print("CHD: Attempting to read from unmapped sector ", sector, "\n");*/
return {};
}
inline auto CHD::sectorCount() const -> u32 {
u32 count = 0;
for(auto& track : tracks) count += track.sectorCount();
return count;
}
inline auto CHD::Track::sectorCount() const -> u32 {
u32 count = 0;
for(auto& index : indices) count += index.sectorCount();
return count;
}
inline auto CHD::Index::sectorCount() const -> u32 {
if(end < 0) return 0;
return end - lba + 1;
}
}

View File

@ -154,6 +154,10 @@ struct file_buffer {
fileOffset = seekOffset;
}
auto handle() const -> FILE* {
return fileHandle;
}
auto offset() const -> u64 {
if(!fileHandle) return 0;
return fileOffset;

View File

@ -173,6 +173,7 @@ namespace nall {
struct Architecture {
static constexpr bool x86 = 1;
static constexpr bool amd64 = 0;
static constexpr bool sse41 = 0;
static constexpr bool arm64 = 0;
static constexpr bool arm32 = 0;
static constexpr bool ppc64 = 0;
@ -183,6 +184,11 @@ namespace nall {
struct Architecture {
static constexpr bool x86 = 0;
static constexpr bool amd64 = 1;
#ifdef __SSE4_1__
static constexpr bool sse41 = 1;
#else
static constexpr bool sse41 = 0;
#endif
static constexpr bool arm64 = 0;
static constexpr bool arm32 = 0;
static constexpr bool ppc64 = 0;
@ -193,6 +199,7 @@ namespace nall {
struct Architecture {
static constexpr bool x86 = 0;
static constexpr bool amd64 = 0;
static constexpr bool sse41 = 0;
static constexpr bool arm64 = 1;
static constexpr bool arm32 = 0;
static constexpr bool ppc64 = 0;
@ -203,6 +210,7 @@ namespace nall {
struct Architecture {
static constexpr bool x86 = 0;
static constexpr bool amd64 = 0;
static constexpr bool sse41 = 0;
static constexpr bool arm64 = 0;
static constexpr bool arm32 = 1;
static constexpr bool ppc64 = 0;
@ -213,6 +221,7 @@ namespace nall {
struct Architecture {
static constexpr bool x86 = 0;
static constexpr bool amd64 = 0;
static constexpr bool sse41 = 0;
static constexpr bool arm64 = 0;
static constexpr bool arm32 = 0;
static constexpr bool ppc64 = 1;
@ -223,6 +232,7 @@ namespace nall {
struct Architecture {
static constexpr bool x86 = 0;
static constexpr bool amd64 = 0;
static constexpr bool sse41 = 0;
static constexpr bool arm64 = 0;
static constexpr bool arm32 = 0;
static constexpr bool ppc64 = 0;

View File

@ -72,7 +72,24 @@ inline auto user() -> string {
// /home/username/Desktop/
// c:/users/username/Desktop/
inline auto desktop(string_view name = {}) -> string {
return {user(), "Desktop/", name};
#if defined(PLATFORM_WINDOWS)
wchar_t path[PATH_MAX] = L"";
SHGetFolderPathW(nullptr, CSIDL_DESKTOP | CSIDL_FLAG_CREATE, nullptr, 0, path);
string result = (const char*)utf8_t(path);
result.transform("\\", "/");
#elif defined(PLATFORM_MACOS)
string result = {user(), "Desktop/"};
#else
string result;
if(const char *env = getenv("XDG_DESKTOP_DIR")) {
result = string(env);
} else {
result = {user(), "Desktop/"};
}
#endif
if(!result) result = ".";
if(!result.endsWith("/")) result.append("/");
return result.append(name);
}
//todo: MacOS uses the same location for userData() and userSettings()

View File

@ -163,7 +163,7 @@ inline auto spinloop() -> void {
usleep(1);
}
#if defined(PLATFORM_MACOS)
#if defined(PLATFORM_MACOS) && !defined(MSG_NOSIGNAL)
#define MSG_NOSIGNAL 0
#endif

View File

@ -83,8 +83,8 @@ struct priority_queue<T[Size]> {
}
auto remove(const T& event) -> void {
for(auto& entry : heap) {
if(entry.event == event) entry.valid = false;
for(u32 i = 0; i < size; i++) {
if(heap[i].event == event) heap[i].valid = false;
}
}

View File

@ -60,14 +60,10 @@ inline auto within(s64 offset, s64 length, s64 min, s64 max) -> bool {
static_assert(lo <= hi);
static constexpr s64 range = hi - lo + 1;
s64 lhs = (offset - lo) % range;
s64 rhs = (offset + length - 1) % range;
s64 rhs = (lhs + length - 1) % range;
min = (min - lo) % range;
max = (max - lo) % range;
if(rhs < lhs) {
return lhs <= max || rhs >= min;
} else {
return max >= lhs && min <= rhs;
}
return lhs >= min && lhs <= max || rhs >= min && rhs <= max;
}
//returns index of target within {offset ... offset+length-1} in range {lo ... hi}

View File

@ -53,8 +53,4 @@
struct mem : public op_base {
mem(sreg base, sljit_sw offset) : op_base(SLJIT_MEM1(base.fst), offset) {}
};
struct unused : public op_base {
unused() : op_base(SLJIT_UNUSED, 0) {}
};
//};

View File

@ -24,30 +24,31 @@
template<typename C, typename V, typename... P>
alwaysinline auto call(V (C::*function)(P...)) {
static_assert(sizeof...(P) <= 3);
sljit_s32 type = SLJIT_ARG1(SW);
if constexpr(sizeof...(P) >= 1) type |= SLJIT_ARG2(SW);
if constexpr(sizeof...(P) >= 2) type |= SLJIT_ARG3(SW);
if constexpr(sizeof...(P) >= 3) type |= SLJIT_ARG4(SW);
if constexpr(!std::is_void_v<V>) type |= SLJIT_RET(SW);
sljit_s32 type = SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1);
if constexpr(sizeof...(P) >= 1) type |= SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 2);
if constexpr(sizeof...(P) >= 2) type |= SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 3);
if constexpr(sizeof...(P) >= 3) type |= SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 4);
if constexpr(!std::is_void_v<V>) type |= SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S0, 0);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data));
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_ADDR(imm64{function}.data));
}
template<typename C, typename R, typename... P>
alwaysinline auto call(auto (C::*function)(P...) -> R, C* object) {
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data);
sljit_s32 type = SLJIT_ARG1(SW);
if constexpr(!std::is_void_v<R>) type |= SLJIT_RET(SW);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data));
sljit_s32 type = SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1);
if constexpr(!std::is_void_v<R>) type |= SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_ADDR(imm64{function}.data));
}
template<typename C, typename R, typename... P, typename P0>
alwaysinline auto call(auto (C::*function)(P...) -> R, C* object, P0 p0) {
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data);
sljit_s32 type = SLJIT_ARG1(SW) | SLJIT_ARG2(SW);
if constexpr(!std::is_void_v<R>) type |= SLJIT_RET(SW);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data));
sljit_s32 type = SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1)
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 2);
if constexpr(!std::is_void_v<R>) type |= SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_ADDR(imm64{function}.data));
}
template<typename C, typename R, typename... P, typename P0, typename P1>
@ -55,9 +56,11 @@
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, imm64{object}.data);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, imm64{p1}.data);
sljit_s32 type = SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW);
if constexpr(!std::is_void_v<R>) type |= SLJIT_RET(SW);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data));
sljit_s32 type = SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1)
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 2)
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 3);
if constexpr(!std::is_void_v<R>) type |= SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_ADDR(imm64{function}.data));
}
template<typename C, typename R, typename... P, typename P0, typename P1, typename P2>
@ -66,8 +69,11 @@
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, imm64{p0}.data);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, imm64{p1}.data);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, imm64{p2}.data);
sljit_s32 type = SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW);
if constexpr(!std::is_void_v<R>) type |= SLJIT_RET(SW);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_OFFSET(imm64{function}.data));
sljit_s32 type = SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1)
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 2)
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 3)
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 4);
if constexpr(!std::is_void_v<R>) type |= SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W);
sljit_emit_icall(compiler, SLJIT_CALL, type, SLJIT_IMM, SLJIT_FUNC_ADDR(imm64{function}.data));
}
//};

View File

@ -32,8 +32,6 @@
OP1(mov64_s32, MOV_S32)
OP1(not32, NOT32)
OP1(not64, NOT)
OP1(neg32, NEG32)
OP1(neg64, NEG)
#undef OP1
//2 operand instructions
@ -77,11 +75,10 @@
#define OPC(name, op) \
template<typename T, typename U> \
auto name(T x, U y, sljit_s32 flags) { \
sljit_emit_op2(compiler, \
SLJIT_##op | flags, \
SLJIT_UNUSED, 0, \
x.fst, x.snd, \
y.fst, y.snd); \
sljit_emit_op2u(compiler, \
SLJIT_##op | flags, \
x.fst, x.snd, \
y.fst, y.snd); \
}
OPC(cmp32, SUB32)
@ -93,7 +90,7 @@
template<typename T, typename U>
auto cmp32_jump(T x, U y, sljit_s32 flags) -> sljit_jump* {
return sljit_emit_cmp(compiler,
SLJIT_I32_OP | flags,
SLJIT_32 | flags,
x.fst, x.snd,
y.fst, y.snd);
}

View File

@ -14,13 +14,13 @@ namespace nall::recompiler {
compiler = sljit_create_compiler(nullptr, &allocator);
sljit_s32 options = 0;
if(args >= 1) options |= SLJIT_ARG1(SW);
if(args >= 2) options |= SLJIT_ARG2(SW);
if(args >= 3) options |= SLJIT_ARG3(SW);
if(args >= 1) options |= SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 1);
if(args >= 2) options |= SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 2);
if(args >= 3) options |= SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_W, 3);
sljit_emit_enter(compiler, 0, options, 4, 3, 0, 0, 0);
sljit_jump* entry = sljit_emit_jump(compiler, SLJIT_JUMP);
epilogue = sljit_emit_label(compiler);
sljit_emit_return(compiler, SLJIT_UNUSED, 0, 0);
sljit_emit_return_void(compiler);
sljit_set_label(entry, sljit_emit_label(compiler));
}
@ -34,7 +34,7 @@ namespace nall::recompiler {
}
auto testJumpEpilog() {
sljit_set_label(sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL | SLJIT_I32_OP, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0), epilogue);
sljit_set_label(sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL | SLJIT_32, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0), epilogue);
}
auto jumpEpilog() {

View File

@ -5,14 +5,16 @@
#include <nall/file.hpp>
#include <nall/string.hpp>
#include <nall/decode/cue.hpp>
#include <nall/decode/chd.hpp>
#include <nall/decode/wav.hpp>
namespace nall::vfs {
struct cdrom : file {
static auto open(const string& cueLocation) -> shared_pointer<cdrom> {
static auto open(const string& location) -> shared_pointer<cdrom> {
auto instance = shared_pointer<cdrom>{new cdrom};
if(instance->load(cueLocation)) return instance;
if(location.iendsWith(".cue") && instance->loadCue(location)) return instance;
if(location.iendsWith(".chd") && instance->loadChd(location)) return instance;
return {};
}
@ -44,7 +46,7 @@ struct cdrom : file {
}
private:
auto load(const string& cueLocation) -> bool {
auto loadCue(const string& cueLocation) -> bool {
Decode::CUE cuesheet;
if(!cuesheet.load(cueLocation)) return false;
@ -166,6 +168,75 @@ private:
return true;
}
auto loadChd(const string& location) -> bool {
Decode::CHD chd;
if(!chd.load(location)) return false;
CD::Session session;
session.leadIn.lba = -LeadInSectors;
session.leadIn.end = -1;
s32 lbaIndex = 0;
for(auto& track : chd.tracks) {
session.tracks[track.number].control = track.type == "AUDIO" ? 0b0000 : 0b0100;
for(auto& index : track.indices) {
session.tracks[track.number].indices[index.number].lba = index.lba;
session.tracks[track.number].indices[index.number].end = index.end;
lbaIndex = session.tracks[track.number].indices[index.number].end + 1;
}
}
session.leadOut.lba = lbaIndex;
session.leadOut.end = lbaIndex + LeadOutSectors - 1;
// determine track and index ranges
session.firstTrack = 0xff;
for(u32 track : range(100)) {
if(!session.tracks[track]) continue;
if(session.firstTrack > 99) session.firstTrack = track;
// find first index
for(u32 indexID : range(100)) {
auto& index = session.tracks[track].indices[indexID];
if(index) { session.tracks[track].firstIndex = indexID; break; }
}
// find last index
for(u32 indexID : reverse(range(100))) {
auto& index = session.tracks[track].indices[indexID];
if(index) { session.tracks[track].lastIndex = indexID; break; }
}
session.lastTrack = track;
}
_image.resize(2448 * (LeadInSectors + lbaIndex + LeadOutSectors));
s32 lba = 0;
for(auto& track : chd.tracks) {
for(auto& index : track.indices) {
for(s32 sector : range(index.sectorCount())) {
auto target = _image.data() + 2448ull * (LeadInSectors + index.lba + sector);
auto sectorData = chd.read(lba);
memory::copy(target, 2352, sectorData.data(), sectorData.size());
lba++;
}
}
}
auto subchannel = session.encode(LeadInSectors + session.leadOut.end + 1);
if(auto overlay = nall::file::read({Location::notsuffix(location), ".sub"})) {
auto target = subchannel.data() + 96 * (LeadInSectors + Track1Pregap);
auto length = (s64)subchannel.size() - 96 * (LeadInSectors + Track1Pregap);
memory::copy(target, length, overlay.data(), overlay.size());
}
for(u64 sector : range(size() / 2448)) {
auto source = subchannel.data() + sector * 96;
auto target = _image.data() + sector * 2448 + 2352;
memory::copy(target, source, 96);
}
return true;
}
vector<u8> _image;
u64 _offset = 0;

View File

@ -1,5 +1,40 @@
This file is the short summary of the API changes:
21.04.2022 - Non-backward compatible
Floating point comparison types are renamed.
01.03.2022 - Non-backward compatible
Remove SLJIT_NEG. Instead substraction from
immedate 0 is preferred.
31.01.2022 - Non-backward compatible
The SLJIT_CURRENT_FLAGS_ADD_SUB option is
split into SLJIT_CURRENT_FLAGS_ADD and
SLJIT_CURRENT_FLAGS_SUB.
27.02.2022 - Non-backward compatible
The SLJIT_F64_ALIGNMENT option is removed.
17.02.2022 - Non-backward compatible
Many floating point operations may destroy flags.
06.02.2022 - Non-backward compatible
The SLJIT_FUNC_OFFSET macro is renamed to SLJIT_FUNC_ADDR.
Furthermore a new SLJIT_FUNC_UADDR macro is added which
returns with an unsigned address.
01.02.2022 - Non-backward compatible
Rework function argument list descriptor macros used by
sljit_emit_enter, sljit_set_context, sljit_emit_call,
and sljit_emit_icall functions.
25.01.2022 - Non-backward compatible
Change SLJIT_I32_OP and SLJIT_F32_OP to SLJIT_32.
24.01.2022 - Non-backward compatible
The SLJIT_UNUSED value is replaced by sljit_emit_op2u and
sljit_emit_return_void functions.
27.05.2021 - Non-backward compatible
The comparison types with the 32 suffix are removed from the
project. The sljit_set_current_flags has a new flag which

View File

@ -0,0 +1,25 @@
/*
* Stack-less Just-In-Time compiler
*
* Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

View File

@ -16,7 +16,7 @@ EXTRA_LDFLAGS=
endif
CPPFLAGS = $(EXTRA_CPPFLAGS) -Isljit_src
CFLAGS += -O2 -Wall
CFLAGS += -O2 -Wall -Wextra -Wconversion -Wsign-compare -Werror
REGEX_CFLAGS += $(CFLAGS) -fshort-wchar
LDFLAGS = $(EXTRA_LDFLAGS)

View File

@ -9,7 +9,7 @@ with SLJIT. Further details can be found in sljitLir.h.
SLJIT is a platform independent assembler which
- provides access to common CPU features
- can be easily ported to wide-spread CPU
architectures (e.g. x86, ARM, POWER, MIPS, SPARC)
architectures (e.g. x86, ARM, POWER, MIPS, SPARC, s390x)
The key challenge of this project is finding a common
subset of CPU features which
@ -46,12 +46,12 @@ instruction.
For example, the following code snippet
is a valid instruction sequence:
sljit_emit_op1(compiler, SLJIT_IMOV,
sljit_emit_op1(compiler, SLJIT_MOV32,
SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0);
// An int32_t value is loaded into SLJIT_R0
sljit_emit_op1(compiler, SLJIT_INEG,
sljit_emit_op1(compiler, SLJIT_NOT32,
SLJIT_R0, 0, SLJIT_R0, 0);
// the int32_t value in SLJIT_R0 is negated
// the int32_t value in SLJIT_R0 is bit inverted
// and the type of the result is still int32_t
The next code snippet is not allowed:
@ -59,9 +59,9 @@ The next code snippet is not allowed:
sljit_emit_op1(compiler, SLJIT_MOV,
SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0);
// An intptr_t value is loaded into SLJIT_R0
sljit_emit_op1(compiler, SLJIT_INEG,
sljit_emit_op1(compiler, SLJIT_NOT32,
SLJIT_R0, 0, SLJIT_R0, 0);
// The result of SLJIT_INEG instruction
// The result of SLJIT_NOT instruction
// is undefined. Even crash is possible
// (e.g. on MIPS-64).
@ -71,7 +71,7 @@ register regardless its previous value:
sljit_emit_op1(compiler, SLJIT_MOV,
SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R1), 0);
// An intptr_t value is loaded into SLJIT_R0
sljit_emit_op1(compiler, SLJIT_IMOV,
sljit_emit_op1(compiler, SLJIT_MOV32,
SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R2), 0);
// From now on SLJIT_R0 contains an int32_t
// value. The previous value is discarded.

View File

@ -33,7 +33,7 @@ static int array_access(long *arr, long narr)
/* Create a SLJIT compiler */
struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);
sljit_emit_enter(C, 0, SLJIT_ARG1(SW), 1, 3, 0, 0, 0);
sljit_emit_enter(C, 0, SLJIT_ARGS2(W, P, W), 1, 3, 0, 0, 0);
/* opt arg R S FR FS local_size */
sljit_emit_op2(C, SLJIT_XOR, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_S2, 0); // S2 = 0
sljit_emit_op1(C, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, narr); // S1 = narr
@ -41,7 +41,7 @@ static int array_access(long *arr, long narr)
struct sljit_jump *out = sljit_emit_cmp(C, SLJIT_GREATER_EQUAL, SLJIT_S2, 0, SLJIT_S1, 0); // S2 >= a --> jump out
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_S2), SLJIT_WORD_SHIFT);// R0 = (long *)S0[S2];
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0);
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0);
sljit_emit_op2(C, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S2, 0, SLJIT_IMM, 1); // S2 += 1
sljit_set_label(sljit_emit_jump(C, SLJIT_JUMP), loopstart); // jump loopstart

View File

@ -148,13 +148,13 @@ static void *compile(FILE *src, unsigned long *lcode)
int SP = SLJIT_S0; /* bf SP */
int CELLS = SLJIT_S1; /* bf array */
sljit_emit_enter(C, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 2, 2, 0, 0, 0); /* opt arg R S FR FS local_size */
sljit_emit_enter(C, 0, SLJIT_ARGS2(VOID, W, W), 2, 2, 0, 0, 0); /* opt arg R S FR FS local_size */
sljit_emit_op2(C, SLJIT_XOR, SP, 0, SP, 0, SP, 0); /* SP = 0 */
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, BF_CELL_SIZE);
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 1);
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(my_alloc));/* calloc(BF_CELL_SIZE, 1) => R0 */
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS2(P, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(my_alloc));/* calloc(BF_CELL_SIZE, 1) => R0 */
end = sljit_emit_cmp(C, SLJIT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0); /* R0 == 0 --> jump end */
@ -176,10 +176,10 @@ static void *compile(FILE *src, unsigned long *lcode)
break;
case '.':
sljit_emit_op1(C, SLJIT_MOV_U8, SLJIT_R0, 0, SLJIT_MEM2(CELLS, SP), 0); /* R0 = CELLS[SP] */
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(my_putchar)); /* putchar(R0) */
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(my_putchar)); /* putchar(R0) */
break;
case ',':
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(my_getchar)); /* R0 = getchar() */
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS0(W), SLJIT_IMM, SLJIT_FUNC_ADDR(my_getchar)); /* R0 = getchar() */
sljit_emit_op1(C, SLJIT_MOV_U8, SLJIT_MEM2(CELLS, SP), 0, SLJIT_R0, 0); /* CELLS[SP] = R0 */
break;
case '[':
@ -210,10 +210,10 @@ static void *compile(FILE *src, unsigned long *lcode)
}
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, CELLS, 0);
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(my_free)); /* free(CELLS) */
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(P, P), SLJIT_IMM, SLJIT_FUNC_ADDR(my_free)); /* free(CELLS) */
sljit_set_label(end, sljit_emit_label(C));
sljit_emit_return(C, SLJIT_UNUSED, 0, 0);
sljit_emit_return_void(C);
code = sljit_generate_code(C);
if (lcode)

View File

@ -29,7 +29,7 @@ static int branch(long a, long b, long c)
struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);
/* 3 arg, 1 temp reg, 3 save reg */
sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW)|SLJIT_ARG3(SW), 3, 3, 0, 0, 0);
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 1, 3, 0, 0, 0);
/* R0 = a & 1, S0 is argument a */
sljit_emit_op2(C, SLJIT_AND, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1);

View File

@ -15,7 +15,7 @@ static int add3(long a, long b, long c)
struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);
/* Start a context(function entry), have 3 arguments, discuss later */
sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW)|SLJIT_ARG3(SW), 3, 3, 0, 0, 0);
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 1, 3, 0, 0, 0);
/* The first arguments of function is register SLJIT_S0, 2nd, SLJIT_S1, etc. */
/* R0 = first */

View File

@ -34,7 +34,7 @@ static int func_call(long a, long b, long c)
/* Create a SLJIT compiler */
struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);
sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW)|SLJIT_ARG3(SW), 3, 3, 0, 0, 0);
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 3, 3, 0, 0, 0);
/* a & 1 --> R0 */
sljit_emit_op2(C, SLJIT_AND, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1);
@ -43,7 +43,7 @@ static int func_call(long a, long b, long c)
/* R0 = S1; print_num(R0) */
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S1, 0);
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num));
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num));
/* jump out */
out = sljit_emit_jump(C, SLJIT_JUMP);
@ -52,7 +52,7 @@ static int func_call(long a, long b, long c)
/* R0 = c; print_num(R0); */
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S2, 0);
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num));
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num));
/* out: */
sljit_set_label(out, sljit_emit_label(C));

View File

@ -32,7 +32,7 @@ static int loop(long a, long b)
struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);
/* 2 arg, 2 temp reg, 2 saved reg */
sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW), 2, 2, 0, 0, 0);
sljit_emit_enter(C, 0, SLJIT_ARGS2(W, W, W), 2, 2, 0, 0, 0);
/* R0 = 0 */
sljit_emit_op2(C, SLJIT_XOR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R1, 0);

View File

@ -92,8 +92,8 @@ static int add3(sljit_sw a, sljit_sw b, sljit_sw c)<br>
/* Create a SLJIT compiler */<br>
struct sljit_compiler *C = sljit_create_compiler();<br>
<br>
/* Start a context(function entry), have 3 arguments, discuss later */<br>
sljit_emit_enter(C, 0, 3, 1, 3, 0, 0, 0);<br>
/* Start a context(function entry), has 3 arguments, discuss later */<br>
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 1, 3, 0, 0, 0);<br>
<br>
/* The first arguments of function is register SLJIT_S0, 2nd, SLJIT_S1, etc. */<br>
/* R0 = first */<br>
@ -158,8 +158,8 @@ Fortunately, SLJIT have done the most for us, SLJIT_S[0-9] represent those 'safe
registers, SLJIT_R[0-9] however, only for 'temporary used'.<br>
<br>
When a function start, SLJIT move the function arguments to S0, S1, S2 register, it
means function arguments are always 'safe' in the context, the limit of using stack for
storing arguments make SLJIT support only 3 arguments max.<br>
means function arguments are always 'safe' in the context; a maximum of 4
arguments is supported by SLJIT.<br>
<br>
Sljit_emit_opX is easy to understand, in SLJIT a data value is represented by 2
parameters, it can be a register, an In-memory data, or an immediate number.<br>
@ -215,7 +215,7 @@ static int branch(sljit_sw a, sljit_sw b, sljit_sw c)<br>
struct sljit_compiler *C = sljit_create_compiler();<br>
<br>
/* 3 arg, 1 temp reg, 3 save reg */<br>
sljit_emit_enter(C, 0, 3, 1, 3, 0, 0, 0);<br>
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 1, 3, 0, 0, 0);<br>
<br>
/* R0 = a & 1, S0 is argument a */<br>
sljit_emit_op2(C, SLJIT_AND, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1);<br>
@ -327,7 +327,7 @@ sljit_sw func(sljit_sw a, sljit_sw b)<br>
<br>
<ul>
/* 2 arg, 2 temp reg, 2 saved reg */<br>
sljit_emit_enter(C, 0, 2, 2, 2, 0, 0, 0);<br>
sljit_emit_enter(C, 0, SLJIT_ARGS2(W, W, W), 2, 2, 0, 0, 0);<br>
<br>
/* R0 = 0 */<br>
sljit_emit_op2(C, SLJIT_XOR, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_R1, 0);<br>
@ -364,13 +364,13 @@ chapter can be found in the attachment.<br>
<h2>Call external function</h2>
It's easy to call an external function in SLJIT, we use sljit_emit_ijump with SLJIT_CALL*
It's easy to call an external function in SLJIT, we use sljit_emit_icall with SLJIT_CALL
operation to do so.<br>
<br>
SLJIT_CALL[N] is use to call a function with N arguments, SLJIT has only SLJIT_CALL0,
CALL1, CALL2, CALL3, which means you can call a function with 3 arguments in max(that
disappoint me, no chance to call fwrite in SLJIT), the arguments for the callee function
are passed from SLJIT_R0, R1 and R2. Keep in mind to maintain those 'temp registers'.<br>
SLJIT_CALL is use to call a function with N arguments, the number of arguments
and the return type are defined in the third parameter from sljit_emit_icall
just like it is done for SLJIT defined dunctions.<br>
the arguments for the callee function are passed from SLJIT_R0, R1 and R2. Keep in mind to maintain those 'temp registers'.<br>
<br>
Assume that we have an external function:<br>
<ul>
@ -384,7 +384,7 @@ JIT code to call print_num(S1):
/* R0 = S1; */<br>
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_S1, 0);<br>
/* print_num(R0) */<br>
sljit_emit_ijump(C, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num));<br>
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num));<br>
</ul>
</div>
<br>
@ -407,11 +407,10 @@ struct point_st {<br>
int y;<br>
short z;<br>
char d;<br>
char e;<br>
</ul>
};<br>
<br>
sljit_emit_op1(C, SLJIT_MOV_SI, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0),<br>
sljit_emit_op1(C, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0),<br>
<ul>
SLJIT_OFFSETOF(struct point_st, y));<br>
</ul>
@ -420,15 +419,15 @@ SLJIT_OFFSETOF(struct point_st, y));<br>
In this case, SLJIT_S0 is the address of the point_st structure, offset of member 'y'
is determined in compile time, the important MOV operation always comes with a
'signed/size' postfix, like this one _SI means 'signed 32bits integer', the postfix
'signed/size' postfix, like this one _S32 means 'signed 32bits integer', the postfix
list:<br>
<ul>
<b>UB</b> = unsigned byte (8 bit)<br>
<b>SB</b> = signed byte (8 bit)<br>
<b>UH</b> = unsigned half (16 bit)<br>
<b>SH</b> = signed half (16 bit)<br>
<b>UI</b> = unsigned int (32 bit)<br>
<b>SI</b> = signed int (32 bit)<br>
<b>U8</b> = unsigned byte (8 bit)<br>
<b>S8</b> = signed byte (8 bit)<br>
<b>U16</b> = unsigned half (16 bit)<br>
<b>S16</b> = signed half (16 bit)<br>
<b>U32</b> = unsigned int (32 bit)<br>
<b>S32</b> = signed int (32 bit)<br>
<b>P</b> = pointer (sljit_p) size<br>
</ul>
@ -451,9 +450,9 @@ WORD S0[];<br>
R0 = S0[S2]<br>
</ul>
<br>
The array S0 is declared to be WORD, which will be sizeof(sljit_sw) in length.
Sljit use a 'shift' for length representation: (0 for single byte, 1 for 2
bytes, 2 for 4 bytes, 3 for 8bytes)<br>
The array S0 is declared to be WORD (using SLJIT_WORD_SHIFT), which will be sizeof(sljit_sw) in length.
SLJIT use a 'shift' for length representation: (0 for single byte, 1 for 2
bytes, 2 for 4 bytes, 3 for 8bytes).<br>
<br>
The file array_access.c demonstrate a array-print example, should be easy
to understand.<br>
@ -468,7 +467,7 @@ is the only choice.<br>
<div style='font-family:Courier New;font-size:11px'>
<ul>
/* reserved space in stack for sljit_sw arr[3] */<br>
sljit_emit_enter(C, 0, 3, 2, 3, 0, 0, 3 * sizeof(sljit_sw));<br>
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 2, 3, 0, 0, 3 * sizeof(sljit_sw));<br>
/* opt arg R S FR FS local_size */<br>
<br>
/* arr[0] = S0, SLJIT_SP is the init address of local var */<br>
@ -481,7 +480,7 @@ is the only choice.<br>
/* R0 = arr; in fact SLJIT_SP is the address of arr, but can't do so in SLJIT */<br>
sljit_get_local_base(C, SLJIT_R0, 0, 0); /* get the address of local variables */<br>
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); /* R1 = 3; */<br>
sljit_emit_ijump(C, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(print_arr));<br>
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS2(W, P, W), SLJIT_IMM, SLJIT_FUNC_ADDR(print_arr));<br>
sljit_emit_return(C, SLJIT_MOV, SLJIT_R0, 0);<br>
</ul>
</div>
@ -573,7 +572,8 @@ with GCC -O2<br>
Err... Ok, the optimization here may be weak, or, optimization there is crazy... :-)<br>
<table width="100%" cellspacing=0 cellpadding=0>
<tr><td align=right>By wenxichang#163.com, 2015.5.10</td></tr></table>
<tr><td align=right>Originally by wenxichang#163.com, 2015.5.10</td></tr>
</table>
</td><td width=20 class="main"></td></tr>
<tr height=20><td width=20 class="main"></td><td width=720 class="main"></td><td width=20 class="main"></td></tr>

View File

@ -44,20 +44,20 @@ static int struct_access()
/* Create a SLJIT compiler */
struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);
sljit_emit_enter(C, 0, SLJIT_ARG1(SW), 1, 1, 0, 0, 0);
sljit_emit_enter(C, 0, SLJIT_ARGS1(W, W), 1, 1, 0, 0, 0);
/* opt arg R S FR FS local_size */
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, x)); // S0->x --> R0
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW)|SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0);
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0);
sljit_emit_op1(C, SLJIT_MOV_S32, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, y)); // S0->y --> R0
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW)|SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0);
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0);
sljit_emit_op1(C, SLJIT_MOV_S16, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, z)); // S0->z --> R0
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW)|SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0);
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0);
sljit_emit_op1(C, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, d)); // S0->z --> R0
sljit_emit_icall(C, SLJIT_CALL, SLJIT_RET(SW)|SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_num)); // print_num(R0);
sljit_emit_op1(C, SLJIT_MOV_S8, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, d)); // S0->d --> R0
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS1(W, P), SLJIT_IMM, SLJIT_FUNC_ADDR(print_num)); // print_num(R0);
sljit_emit_return(C, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(struct point_st, x)); // return S0->x

View File

@ -36,7 +36,7 @@ static int temp_var(long a, long b, long c)
struct sljit_compiler *C = sljit_create_compiler(NULL, NULL);
/* reserved space in stack for long arr[3] */
sljit_emit_enter(C, 0, SLJIT_ARG1(SW)|SLJIT_ARG2(SW)|SLJIT_ARG3(SW), 3, 3, 0, 0, 3 * sizeof(long));
sljit_emit_enter(C, 0, SLJIT_ARGS3(W, W, W, W), 2, 3, 0, 0, 3 * sizeof(long));
/* opt arg R S FR FS local_size */
/* arr[0] = S0, SLJIT_SP is the init address of local var */
@ -49,7 +49,7 @@ static int temp_var(long a, long b, long c)
/* R0 = arr; in fact SLJIT_SP is the address of arr, but can't do so in SLJIT */
sljit_get_local_base(C, SLJIT_R0, 0, 0); /* get the address of local variables */
sljit_emit_op1(C, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 3); /* R1 = 3; */
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARG1(SW)|SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(print_arr));
sljit_emit_icall(C, SLJIT_CALL, SLJIT_ARGS2(W, P, W), SLJIT_IMM, SLJIT_FUNC_ADDR(print_arr));
sljit_emit_return(C, SLJIT_MOV, SLJIT_R0, 0);
/* Generate machine code */

View File

@ -151,8 +151,8 @@ struct stack_fragment {
struct stack {
struct stack_fragment *first;
struct stack_fragment *last;
int index;
int count;
sljit_uw index;
sljit_uw count;
};
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
@ -165,7 +165,7 @@ static void stack_check(struct stack *stack)
if (!stack)
return;
SLJIT_ASSERT(stack->index >= 0 && stack->index < STACK_FRAGMENT_SIZE);
SLJIT_ASSERT(stack->index < STACK_FRAGMENT_SIZE);
if (stack->first == NULL) {
SLJIT_ASSERT(stack->first == NULL && stack->last == NULL);
@ -178,8 +178,6 @@ static void stack_check(struct stack *stack)
SLJIT_ASSERT(stack->index == STACK_FRAGMENT_SIZE - 1 && stack->count == 0);
found = 1;
}
else
SLJIT_ASSERT(stack->index >= 0 && stack->count >= 0);
SLJIT_ASSERT(stack->first->data.prev == NULL);
curr = stack->first;
@ -289,19 +287,19 @@ static SLJIT_INLINE void stack_clone(struct stack *src, struct stack *dst)
static int stack_push_copy(struct stack *stack, int items, int length)
{
struct stack_fragment *frag1;
int ind1;
struct stack_fragment *frag2;
int ind2;
int counter;
sljit_uw ind1, ind2;
sljit_uw counter;
SLJIT_ASSERT(stack->count >= length && items <= length && items > 0);
SLJIT_ASSERT(stack->count >= (sljit_uw)length && items <= length && items > 0);
/* Allocate the necessary elements. */
counter = items;
counter = (sljit_uw)items;
frag1 = stack->last;
ind1 = stack->index;
while (counter > 0) {
if (stack->index + counter >= STACK_FRAGMENT_SIZE) {
SLJIT_ASSERT(counter >= STACK_FRAGMENT_SIZE - stack->index - 1 + 1);
counter -= STACK_FRAGMENT_SIZE - stack->index - 1 + 1;
stack->index = 0;
if (!stack->last->data.next) {
@ -322,22 +320,26 @@ static int stack_push_copy(struct stack *stack, int items, int length)
frag2 = stack->last;
ind2 = stack->index;
while (length > 0) {
frag2->items[ind2--] = frag1->items[ind1--];
if (ind1 < 0) {
ind1 = STACK_FRAGMENT_SIZE - 1;
frag2->items[ind2] = frag1->items[ind1];
if (ind1 == 0) {
ind1 = STACK_FRAGMENT_SIZE;
frag1 = frag1->data.prev;
}
if (ind2 < 0) {
ind2 = STACK_FRAGMENT_SIZE - 1;
if (ind2 == 0) {
ind2 = STACK_FRAGMENT_SIZE;
frag2 = frag2->data.prev;
}
ind1--;
ind2--;
length--;
}
#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
stack_check(stack);
#endif
stack->count += items;
stack->count += (sljit_uw)items;
return 0;
}
@ -378,7 +380,7 @@ struct compiler_common {
/* REGEX_ flags. */
int flags;
/* Encoded size of the dfa representation. */
sljit_sw dfa_size;
sljit_uw dfa_size;
/* Number of terms. */
sljit_sw terms_size;
/* Number of state descriptors for one term (same as machine->no_states). */
@ -455,7 +457,7 @@ static int iterate(struct stack *stack, int min, int max)
SLJIT_ASSERT(depth > 0);
depth--;
if (depth == 0)
count = it.count;
count = (int)it.count;
break;
case type_select:
@ -466,7 +468,7 @@ static int iterate(struct stack *stack, int min, int max)
default:
SLJIT_ASSERT(item->type != type_begin && item->type != type_end);
if (depth == 0)
count = it.count;
count = (int)it.count;
len++;
break;
}
@ -483,7 +485,7 @@ static int iterate(struct stack *stack, int min, int max)
return len;
}
count = stack->count - count;
count = (int)stack->count - count;
/* Put an open bracket before the sequence. */
if (stack_push_copy(stack, 1, count))
@ -554,7 +556,7 @@ static int iterate(struct stack *stack, int min, int max)
return len;
}
static int parse_iterator(const regex_char_t *regex_string, int length, struct stack *stack, sljit_sw *dfa_size, int begin)
static int parse_iterator(const regex_char_t *regex_string, int length, struct stack *stack, sljit_uw *dfa_size, int begin)
{
/* We only know that *regex_string == { . */
int val1, val2;
@ -578,7 +580,7 @@ static int parse_iterator(const regex_char_t *regex_string, int length, struct s
regex_string = decode_number(regex_string, length, &val1);
if (val1 < 0)
return -2;
length -= regex_string - from;
length -= (int)(regex_string - from);
if (length == 0)
return -2;
@ -592,7 +594,7 @@ static int parse_iterator(const regex_char_t *regex_string, int length, struct s
if (stack_push(stack, type_id, val1))
return -1;
(*dfa_size)++;
return (regex_string - base_from) + 1;
return (int)(regex_string - base_from) + 1;
}
else {
if (*regex_string != ',')
@ -614,7 +616,7 @@ static int parse_iterator(const regex_char_t *regex_string, int length, struct s
else {
from = regex_string;
regex_string = decode_number(regex_string, length, &val2);
length -= regex_string - from;
length -= (int)(regex_string - from);
if (val2 < 0 || length == 0 || *regex_string != '}' || val2 < val1)
return -2;
if (val2 == 0) {
@ -629,7 +631,7 @@ static int parse_iterator(const regex_char_t *regex_string, int length, struct s
val1 = iterate(stack, val1, val2);
if (val1 < 0)
return -1;
*dfa_size += val1;
*dfa_size += (sljit_uw)val1;
}
else if (val1 == 0 && val2 == 0) {
if (stack_push(stack, type_asterisk, 0))
@ -650,14 +652,14 @@ static int parse_iterator(const regex_char_t *regex_string, int length, struct s
val1 = iterate(stack, 0, 0);
if (val1 < 0)
return -1;
*dfa_size -= val1;
*dfa_size -= (sljit_uw)val1;
SLJIT_ASSERT(*dfa_size >= 2);
}
else {
/* Ignore. */
SLJIT_ASSERT(val1 == 1 && val2 == 1);
}
return regex_string - base_from;
return (int)(regex_string - base_from);
}
static int parse_char_range(const regex_char_t *regex_string, int length, struct compiler_common *compiler_common)
@ -756,7 +758,7 @@ static int parse_char_range(const regex_char_t *regex_string, int length, struct
if (stack_push(stack, type_rng_end, 0))
return -1;
return regex_string - base_from;
return (int)(regex_string - base_from);
}
static int parse(const regex_char_t *regex_string, int length, struct compiler_common *compiler_common)
@ -950,13 +952,13 @@ static struct stack_item* handle_iteratives(struct stack_item *transitions_ptr,
switch (item->type) {
case type_asterisk:
SLJIT_ASSERT(transitions[item->value].type == type_branch);
transitions[item->value].value = transitions_ptr - transitions;
transitions[item->value].value = (int)(transitions_ptr - transitions);
PUT_TRANSITION(type_branch, item->value + 1);
break;
case type_plus_sign:
SLJIT_ASSERT(transitions[item->value].type == type_branch);
transitions[item->value].value = transitions_ptr - transitions;
transitions[item->value].value = (int)(transitions_ptr - transitions);
break;
case type_qestion_mark:
@ -1004,7 +1006,7 @@ static int generate_transitions(struct compiler_common *compiler_common)
case type_close_br:
if (item->type == type_end)
*--transitions_ptr = *item;
if (stack_push(depth, type_close_br, transitions_ptr - compiler_common->dfa_transitions))
if (stack_push(depth, type_close_br, (int)(transitions_ptr - compiler_common->dfa_transitions)))
return REGEX_MEMORY_ERROR;
break;
@ -1014,13 +1016,13 @@ static int generate_transitions(struct compiler_common *compiler_common)
SLJIT_ASSERT(compiler_common->dfa_transitions[item->value].type == type_jump);
PUT_TRANSITION(type_branch, item->value + 1);
PUT_TRANSITION(type_jump, item->value);
item->value = transitions_ptr - compiler_common->dfa_transitions;
item->value = (int)(transitions_ptr - compiler_common->dfa_transitions);
}
else {
SLJIT_ASSERT(item->type == type_close_br);
item->type = type_select;
PUT_TRANSITION(type_jump, item->value);
item->value = transitions_ptr - compiler_common->dfa_transitions;
item->value = (int)(transitions_ptr - compiler_common->dfa_transitions);
}
break;
@ -1029,7 +1031,7 @@ static int generate_transitions(struct compiler_common *compiler_common)
case type_qestion_mark:
if (item->type != type_qestion_mark)
PUT_TRANSITION(type_branch, 0);
if (stack_push(depth, item->type, transitions_ptr - compiler_common->dfa_transitions))
if (stack_push(depth, item->type, (int)(transitions_ptr - compiler_common->dfa_transitions)))
return REGEX_MEMORY_ERROR;
break;
@ -1185,14 +1187,14 @@ static int generate_search_states(struct compiler_common *compiler_common)
break;
case type_char:
search_states_ptr->type = compiler_common->terms_size++;
search_states_ptr->type = (int)compiler_common->terms_size++;
break;
case type_newline:
if (transitions_ptr->value)
search_states_ptr->type = 1;
else
search_states_ptr->type = compiler_common->terms_size++;
search_states_ptr->type = (int)compiler_common->terms_size++;
SLJIT_ASSERT(search_states_ptr->type == 1 || search_states_ptr->type == 2);
break;
@ -1203,13 +1205,13 @@ static int generate_search_states(struct compiler_common *compiler_common)
break;
case type_rng_start:
search_states_ptr->type = compiler_common->terms_size;
search_states_ptr->type = (int)compiler_common->terms_size;
rng_start = search_states_ptr;
break;
case type_rng_end:
search_states_ptr->type = compiler_common->terms_size++;
/* Ok, this is a blunt over estimation :) */
search_states_ptr->type = (int)compiler_common->terms_size++;
/* This is an over estimation. */
if (compiler_common->longest_range_size < search_states_ptr - rng_start)
compiler_common->longest_range_size = search_states_ptr - rng_start;
break;
@ -1280,8 +1282,8 @@ static int trace_transitions(int from, struct compiler_common *compiler_common)
/* Code generator */
/* --------------------------------------------------------------------- */
#define TERM_OFFSET_OF(index, offs) (((index) * no_states + (offs)) * sizeof(sljit_sw))
#define TERM_REL_OFFSET_OF(base, offs) ((base) + ((offs) * sizeof(sljit_sw)))
#define TERM_OFFSET_OF(index, offs) (((index) * no_states + (offs)) * (sljit_sw)sizeof(sljit_sw))
#define TERM_REL_OFFSET_OF(base, offs) ((base) + ((offs) * (sljit_sw)sizeof(sljit_sw)))
#define EMIT_OP1(type, arg1, arg2, arg3, arg4) \
CHECK(sljit_emit_op1(compiler, type, arg1, arg2, arg3, arg4))
@ -1289,6 +1291,9 @@ static int trace_transitions(int from, struct compiler_common *compiler_common)
#define EMIT_OP2(type, arg1, arg2, arg3, arg4, arg5, arg6) \
CHECK(sljit_emit_op2(compiler, type, arg1, arg2, arg3, arg4, arg5, arg6))
#define EMIT_OP2U(type, arg1, arg2, arg3, arg4) \
CHECK(sljit_emit_op2u(compiler, type, arg1, arg2, arg3, arg4))
#define EMIT_LABEL(label) \
label = sljit_emit_label(compiler); \
CHECK(!label)
@ -1314,7 +1319,7 @@ static int compile_uncond_tran(struct compiler_common *compiler_common, int reg)
struct stack_item *search_states = compiler_common->search_states;
int flags = compiler_common->flags;
sljit_sw no_states = compiler_common->no_states;
sljit_uw head = 0;
sljit_sw head = 0;
sljit_sw offset, value;
if (reg != R_CURR_STATE || !(compiler_common->flags & REGEX_FAKE_MATCH_BEGIN)) {
@ -1401,8 +1406,8 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
if (!(flags & REGEX_ID_CHECK)) {
if (!(flags & REGEX_MATCH_BEGIN)) {
/* Check whether item is inserted. */
EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0);
EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + (sljit_sw)sizeof(sljit_sw), SLJIT_IMM, -1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + (sljit_sw)sizeof(sljit_sw), R_NEXT_HEAD, 0);
if (offset > 0) {
EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset);
}
@ -1412,19 +1417,19 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
EMIT_LABEL(label1);
sljit_set_label(jump1, label1);
EMIT_CMP(jump1, SLJIT_LESS_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0);
EMIT_CMP(jump1, SLJIT_LESS_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * (sljit_sw)sizeof(sljit_sw), R_TEMP, 0);
EMIT_LABEL(label1);
sljit_set_label(jump2, label1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * (sljit_sw)sizeof(sljit_sw), R_TEMP, 0);
EMIT_LABEL(label1);
sljit_set_label(jump1, label1);
}
else {
/* Check whether item is inserted. */
EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0);
EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + (sljit_sw)sizeof(sljit_sw), SLJIT_IMM, -1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + (sljit_sw)sizeof(sljit_sw), R_NEXT_HEAD, 0);
if (offset > 0) {
EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset);
}
@ -1437,8 +1442,8 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 2));
/* Check whether item is inserted. */
EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0);
EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + (sljit_sw)sizeof(sljit_sw), SLJIT_IMM, -1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + (sljit_sw)sizeof(sljit_sw), R_NEXT_HEAD, 0);
if (offset > 0) {
EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset);
}
@ -1448,7 +1453,7 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
EMIT_LABEL(label1);
sljit_set_label(jump1, label1);
EMIT_OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0);
EMIT_OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * (sljit_sw)sizeof(sljit_sw), R_TEMP, 0);
EMIT_JUMP(jump1, SLJIT_LESS);
EMIT_JUMP(jump3, SLJIT_NOT_EQUAL); /* Greater. */
@ -1462,7 +1467,7 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
sljit_set_label(jump4, label1);
}
EMIT_OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, SLJIT_MEM1(R_NEXT_STATE), offset + 3 * sizeof(sljit_sw), R_TEMP, 0);
EMIT_OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 3 * (sljit_sw)sizeof(sljit_sw), R_TEMP, 0);
EMIT_JUMP(jump4, SLJIT_GREATER_EQUAL);
EMIT_JUMP(jump5, SLJIT_JUMP);
@ -1470,7 +1475,7 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
EMIT_LABEL(label1);
sljit_set_label(jump3, label1);
sljit_set_label(jump2, label1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * (sljit_sw)sizeof(sljit_sw), R_TEMP, 0);
EMIT_OP1(SLJIT_MOV, R_TEMP, 0, SLJIT_MEM1(R_CURR_STATE), TERM_OFFSET_OF(curr_index, 3));
if (search_states[value].value > 0) {
@ -1483,7 +1488,7 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
EMIT_LABEL(label1);
sljit_set_label(jump5, label1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 3 * sizeof(sljit_sw), R_TEMP, 0);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 3 * (sljit_sw)sizeof(sljit_sw), R_TEMP, 0);
/* Exit. */
EMIT_LABEL(label1);
@ -1502,8 +1507,8 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
}
/* Check whether item is inserted. */
EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), SLJIT_IMM, -1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + sizeof(sljit_sw), R_NEXT_HEAD, 0);
EMIT_CMP(jump1, SLJIT_NOT_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + (sljit_sw)sizeof(sljit_sw), SLJIT_IMM, -1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + (sljit_sw)sizeof(sljit_sw), R_NEXT_HEAD, 0);
if (offset > 0) {
EMIT_OP1(SLJIT_MOV, R_NEXT_HEAD, 0, SLJIT_IMM, offset);
}
@ -1513,11 +1518,11 @@ static int compile_cond_tran(struct compiler_common *compiler_common, sljit_sw c
EMIT_LABEL(label1);
sljit_set_label(jump1, label1);
EMIT_CMP(jump1, SLJIT_GREATER_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0);
EMIT_CMP(jump1, SLJIT_GREATER_EQUAL, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * (sljit_sw)sizeof(sljit_sw), R_TEMP, 0);
EMIT_LABEL(label1);
sljit_set_label(jump2, label1);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * sizeof(sljit_sw), R_TEMP, 0);
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(R_NEXT_STATE), offset + 2 * (sljit_sw)sizeof(sljit_sw), R_TEMP, 0);
EMIT_LABEL(label1);
sljit_set_label(jump1, label1);
@ -1798,7 +1803,7 @@ static sljit_sw compile_range_check(struct compiler_common *compiler_common, slj
/* Main compiler */
/* --------------------------------------------------------------------- */
#define TERM_OFFSET_OF(ind, offs) (((ind) * compiler_common.no_states + (offs)) * sizeof(sljit_sw))
#define TERM_OFFSET_OF(ind, offs) (((ind) * compiler_common.no_states + (offs)) * (sljit_sw)sizeof(sljit_sw))
#define EMIT_OP1(type, arg1, arg2, arg3, arg4) \
CHECK(sljit_emit_op1(compiler_common.compiler, type, arg1, arg2, arg3, arg4))
@ -1904,14 +1909,14 @@ struct regex_machine* regex_compile(const regex_char_t *regex_string, int length
BEGIN_GUARD
compiler_common.machine = (struct regex_machine*)SLJIT_MALLOC(sizeof(struct regex_machine) + (compiler_common.terms_size - 1) * sizeof(sljit_uw), NULL);
compiler_common.machine = (struct regex_machine*)SLJIT_MALLOC(sizeof(struct regex_machine) + (sljit_uw)(compiler_common.terms_size - 1) * sizeof(sljit_uw), NULL);
CHECK(!compiler_common.machine);
compiler_common.compiler = sljit_create_compiler(NULL, NULL);
CHECK(!compiler_common.compiler);
if (compiler_common.longest_range_size > 0) {
compiler_common.range_jump_list = (struct sljit_jump**)SLJIT_MALLOC(sizeof(struct sljit_jump*) * compiler_common.longest_range_size, NULL);
compiler_common.range_jump_list = (struct sljit_jump**)SLJIT_MALLOC(sizeof(struct sljit_jump*) * (sljit_uw)compiler_common.longest_range_size, NULL);
CHECK(!compiler_common.range_jump_list);
}
@ -1961,7 +1966,7 @@ struct regex_machine* regex_compile(const regex_char_t *regex_string, int length
}
/* Step 4.1: Generate entry. */
CHECK(sljit_emit_enter(compiler_common.compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), 5, 5, 0, 0, 0));
CHECK(sljit_emit_enter(compiler_common.compiler, 0, SLJIT_ARGS3(VOID, P, P, 32), 5, 5, 0, 0, 0));
/* Copy arguments to their place. */
EMIT_OP1(SLJIT_MOV, R_REGEX_MATCH, 0, SLJIT_S0, 0);
@ -2183,9 +2188,9 @@ struct regex_machine* regex_compile(const regex_char_t *regex_string, int length
sljit_set_label(best_match_found_jump, label);
if (fast_forward_jump)
sljit_set_label(fast_forward_jump, label);
CHECK(sljit_emit_return(compiler_common.compiler, SLJIT_UNUSED, 0, 0));
CHECK(sljit_emit_return_void(compiler_common.compiler));
for (ind = 1; ind < compiler_common.dfa_size - 1; ind++) {
for (ind = 1; ind < (sljit_sw)compiler_common.dfa_size - 1; ind++) {
if (compiler_common.search_states[ind].type >= 0) {
SLJIT_ASSERT(compiler_common.search_states[ind].type < compiler_common.terms_size);
EMIT_LABEL(label);
@ -2204,7 +2209,7 @@ struct regex_machine* regex_compile(const regex_char_t *regex_string, int length
CHECK(compile_newline_check(&compiler_common, ind));
}
CHECK(trace_transitions(ind, &compiler_common));
CHECK(trace_transitions((int)ind, &compiler_common));
#ifdef REGEX_MATCH_VERBOSE
if (compiler_common.flags & REGEX_MATCH_VERBOSE)
printf("(%3d): ", compiler_common.search_states[ind].type);
@ -2230,10 +2235,10 @@ struct regex_machine* regex_compile(const regex_char_t *regex_string, int length
}
}
if (ind == compiler_common.dfa_size - 1) {
if (ind == (sljit_sw)compiler_common.dfa_size - 1) {
/* Generate an init stub function. */
EMIT_LABEL(label);
CHECK(sljit_emit_enter(compiler_common.compiler, 0, SLJIT_ARG1(SW) | SLJIT_ARG2(SW), 3, 3, 0, 0, 0));
CHECK(sljit_emit_enter(compiler_common.compiler, 0, SLJIT_ARGS2(W, P, P), 3, 3, 0, 0, 0));
if (empty_match_id == -1) {
EMIT_OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), SLJIT_OFFSETOF(struct regex_match, best_begin), SLJIT_IMM, -1);
@ -2329,7 +2334,7 @@ struct regex_match* regex_begin_match(struct regex_machine *machine)
sljit_sw *end;
sljit_sw *entry_addrs;
struct regex_match *match = (struct regex_match*)SLJIT_MALLOC(sizeof(struct regex_match) + (machine->size * 2 - 1) * sizeof(sljit_sw), NULL);
struct regex_match *match = (struct regex_match*)SLJIT_MALLOC(sizeof(struct regex_match) + (sljit_uw)(machine->size * 2 - 1) * sizeof(sljit_sw), NULL);
if (!match)
return NULL;
@ -2406,7 +2411,7 @@ void regex_reset_match(struct regex_match *match)
current = match->head;
current_ptr = match->current;
do {
ind = (current / sizeof(sljit_sw)) + 1;
ind = (current / (sljit_sw)sizeof(sljit_sw)) + 1;
current = current_ptr[ind];
current_ptr[ind] = -1;
} while (current != 0);
@ -2429,26 +2434,26 @@ int regex_get_result(struct regex_match *match, int *end, int *id)
int flags = match->machine->flags;
sljit_sw no_states;
*end = match->best_end;
*id = match->best_id;
*end = (int)match->best_end;
*id = (int)match->best_id;
if (!(flags & (REGEX_MATCH_END | REGEX_FAKE_MATCH_END)))
return match->best_begin;
return (int)match->best_begin;
if (flags & REGEX_FAKE_MATCH_END) {
SLJIT_ASSERT(!(flags & (REGEX_MATCH_BEGIN | REGEX_MATCH_END)));
if (match->best_begin != -1)
return match->best_begin;
return (int)match->best_begin;
no_states = match->machine->no_states;
if (match->current[no_states + 1] == -1)
return -1;
if (flags & REGEX_ID_CHECK)
*id = match->current[no_states + 3];
*id = (int)match->current[no_states + 3];
if (!(flags & REGEX_FAKE_MATCH_BEGIN))
*end = match->index - 1;
*end = (int)match->index - 1;
else
*end = match->index - 2;
return match->current[no_states + 2];
*end = (int)match->index - 2;
return (int)match->current[no_states + 2];
}
else {
/* Check the status of the last code. */
@ -2457,36 +2462,36 @@ int regex_get_result(struct regex_match *match, int *end, int *id)
if (!(flags & REGEX_ID_CHECK)) {
if (match->current[1] == -1)
return -1;
*end = match->index - 1;
return match->current[2];
*end = (int)match->index - 1;
return (int)match->current[2];
}
if (match->current[1] == -1)
return -1;
*end = match->index - 1;
*id = match->current[3];
return match->current[2];
*end = (int)match->index - 1;
*id = (int)match->current[3];
return (int)match->current[2];
}
/* Shortcut is possible in this case. */
if (!(flags & REGEX_ID_CHECK)) {
if (match->current[1] == -1 || match->head == -1)
return -1;
*end = match->index - 1;
*end = (int)match->index - 1;
return 0;
}
if (match->current[1] == -1 || match->head == -1)
return -1;
*end = match->index - 1;
*id = match->current[2];
*end = (int)match->index - 1;
*id = (int)match->current[2];
return 0;
}
}
int regex_is_match_finished(struct regex_match *match)
{
return match->fast_quit;
return (int)match->fast_quit;
}
#ifdef REGEX_MATCH_VERBOSE
@ -2567,10 +2572,10 @@ void regex_continue_match_debug(struct regex_match *match, const regex_char_t *i
current = match->head;
ptr = match->current;
while (current != 0) {
SLJIT_ASSERT(current >= 0 && current < len * sizeof(sljit_sw));
SLJIT_ASSERT((current % (no_states * sizeof(sljit_sw))) == 0);
SLJIT_ASSERT(current >= 0 && current < len * (sljit_sw)sizeof(sljit_sw));
SLJIT_ASSERT((current % (no_states * (sljit_sw)sizeof(sljit_sw))) == 0);
SLJIT_ASSERT(count > 0);
current = ptr[(current / sizeof(sljit_sw)) + 1];
current = ptr[(current / (sljit_sw)sizeof(sljit_sw)) + 1];
count--;
}
SLJIT_ASSERT(count == 0);

View File

@ -62,7 +62,7 @@ void verbose_test(regex_char_t *pattern, regex_char_t *string)
ptr++;
printf("Start test '%s' matches to '%s'\n", pattern, string);
machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
machine = regex_compile(pattern, (int)(ptr - pattern), REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
if (error) {
printf("WARNING: Error %d\n", error);
@ -84,7 +84,7 @@ void verbose_test(regex_char_t *pattern, regex_char_t *string)
while (*ptr)
ptr++;
regex_continue_match_debug(match, string, ptr - string);
regex_continue_match_debug(match, string, (int)(ptr - string));
begin = regex_get_result(match, &end, &id);
printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
@ -104,7 +104,7 @@ struct test_case {
const regex_char_t *string; /* NULL : end of tests. */
};
void run_tests(struct test_case* test, int verbose, int silent)
static void run_tests(struct test_case* test, int verbose, int silent)
{
int error;
const regex_char_t *ptr;
@ -129,7 +129,7 @@ void run_tests(struct test_case* test, int verbose, int silent)
while (*ptr)
ptr++;
machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
machine = regex_compile(test->pattern, (int)(ptr - test->pattern), test->flags, &error);
if (error) {
if (!verbose)
@ -164,7 +164,7 @@ void run_tests(struct test_case* test, int verbose, int silent)
regex_free_machine(machine);
return;
}
regex_continue_match_debug(match, test->string, ptr - test->string);
regex_continue_match_debug(match, test->string, (int)(ptr - test->string));
begin = regex_get_result(match, &end, &id);
finished = regex_is_match_finished(match);
@ -183,7 +183,7 @@ void run_tests(struct test_case* test, int verbose, int silent)
#endif
regex_reset_match(match);
regex_continue_match(match, test->string, ptr - test->string);
regex_continue_match(match, test->string, (int)(ptr - test->string));
begin = regex_get_result(match, &end, &id);
finished = regex_is_match_finished(match);
regex_free_match(match);

View File

@ -60,7 +60,7 @@ extern "C" {
SLJIT_LITTLE_ENDIAN : little endian architecture
SLJIT_BIG_ENDIAN : big endian architecture
SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information
Constants:
SLJIT_NUMBER_OF_REGISTERS : number of available registers
@ -98,6 +98,8 @@ extern "C" {
+ (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
+ (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
+ (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
+ (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
+ (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
+ (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
+ (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
+ (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \
@ -115,6 +117,8 @@ extern "C" {
&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
&& !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \
&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
&& !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \
&& !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
&& !(defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \
&& !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
&& !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \
@ -156,7 +160,11 @@ extern "C" {
#define SLJIT_CONFIG_MIPS_32 1
#elif defined(__mips64)
#define SLJIT_CONFIG_MIPS_64 1
#elif defined(__sparc__) || defined(__sparc)
#elif defined (__riscv_xlen) && (__riscv_xlen == 32)
#define SLJIT_CONFIG_RISCV_32 1
#elif defined (__riscv_xlen) && (__riscv_xlen == 64)
#define SLJIT_CONFIG_RISCV_64 1
#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64)
#define SLJIT_CONFIG_SPARC_32 1
#elif defined(__s390x__)
#define SLJIT_CONFIG_S390X 1
@ -205,6 +213,8 @@ extern "C" {
#define SLJIT_CONFIG_PPC 1
#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
#define SLJIT_CONFIG_MIPS 1
#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
#define SLJIT_CONFIG_RISCV 1
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64)
#define SLJIT_CONFIG_SPARC 1
#endif
@ -274,9 +284,13 @@ extern "C" {
#ifndef SLJIT_INLINE
/* Inline functions. Some old compilers do not support them. */
#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510
#ifdef __SUNPRO_C
#if __SUNPRO_C < 0x560
#define SLJIT_INLINE
#else
#define SLJIT_INLINE inline
#endif /* __SUNPRO_C */
#else
#define SLJIT_INLINE __inline
#endif
#endif /* !SLJIT_INLINE */
@ -319,18 +333,42 @@ extern "C" {
/* Instruction cache flush. */
/****************************/
/*
* TODO:
*
* clang >= 15 could be safe to enable below
* older versions are known to abort in some targets
* https://github.com/PhilipHazel/pcre2/issues/92
*
* beware some vendors (ex: Microsoft, Apple) are known to have
* removed the code to support this builtin even if the call for
* __has_builtin reports it is available.
*
* make sure linking doesn't fail because __clear_cache() is
* missing before changing it or add an exception so that the
* system provided method that should be defined below is used
* instead.
*/
#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin)
#if __has_builtin(__builtin___clear_cache)
#if __has_builtin(__builtin___clear_cache) && !defined(__clang__)
/*
* https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248
* https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811
* gcc's clear_cache builtin for power and sparc are broken
*/
#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32)
#define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)(from), (char*)(to))
#endif
#endif /* __has_builtin(__builtin___clear_cache) */
#endif /* gcc >= 10 */
#endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */
#ifndef SLJIT_CACHE_FLUSH
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
/* Not required to implement on archs with unified caches. */
#define SLJIT_CACHE_FLUSH(from, to)
@ -340,9 +378,9 @@ extern "C" {
/* Supported by all macs since Mac OS 10.5.
However, it does not work on non-jailbroken iOS devices,
although the compilation is successful. */
#include <libkern/OSCacheControl.h>
#define SLJIT_CACHE_FLUSH(from, to) \
sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from)))
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
@ -351,18 +389,6 @@ extern "C" {
ppc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
#define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)(from), (char*)(to))
#elif defined __ANDROID__
/* Android lacks __clear_cache; instead, cacheflush should be used. */
#define SLJIT_CACHE_FLUSH(from, to) \
cacheflush((long)(from), (long)(to), 0)
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
@ -370,14 +396,26 @@ extern "C" {
sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif defined _WIN32
#elif defined(_WIN32)
#define SLJIT_CACHE_FLUSH(from, to) \
FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from))
#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__)
#define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)(from), (char*)(to))
#elif defined __ANDROID__
/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */
#include <sys/cachectl.h>
#define SLJIT_CACHE_FLUSH(from, to) \
cacheflush((long)(from), (long)(to), 0)
#else
/* Calls __ARM_NR_cacheflush on ARM-Linux. */
/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */
#define SLJIT_CACHE_FLUSH(from, to) \
__clear_cache((char*)(from), (char*)(to))
@ -413,6 +451,7 @@ typedef long int sljit_sw;
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
&& !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
&& !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \
&& !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \
&& !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_32BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 2
@ -532,13 +571,12 @@ typedef double sljit_f64;
#ifndef SLJIT_UNALIGNED
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
|| (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
|| (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \
|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_UNALIGNED 1
#endif
@ -645,18 +683,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 9
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
#define SLJIT_PREF_SHIFT_REG SLJIT_R2
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#define SLJIT_NUMBER_OF_REGISTERS 13
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
#ifndef _WIN64
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#define SLJIT_LOCALS_OFFSET_BASE 0
#else /* _WIN64 */
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw))
#endif /* !_WIN64 */
#define SLJIT_PREF_SHIFT_REG SLJIT_R3
@ -664,31 +707,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
#define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw))
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw))
#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw))
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
/* Add +1 for double alignment. */
#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw))
#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * (sljit_s32)sizeof(sljit_sw))
#else
#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw))
#define SLJIT_LOCALS_OFFSET_BASE (3 * (sljit_s32)sizeof(sljit_sw))
#endif /* SLJIT_CONFIG_PPC_64 || _AIX */
#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
@ -696,19 +747,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 21
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw))
#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw))
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 13
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 6
#else
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#endif
#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
#define SLJIT_NUMBER_OF_REGISTERS 18
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
/* saved registers (16), return struct pointer (1), space for 6 argument words (1),
4th double arg (2), double alignment (1). */
#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * sizeof(sljit_sw))
#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * (sljit_s32)sizeof(sljit_sw))
#endif
#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
@ -736,12 +801,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE
#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
#define SLJIT_NUMBER_OF_REGISTERS 0
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#define SLJIT_LOCALS_OFFSET_BASE 0
#endif
@ -751,13 +820,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \
(SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS)
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64)
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1
#else
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#endif
#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
(SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
@ -765,9 +827,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
/* CPU status flags management. */
/********************************/
#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \
|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
|| (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \
|| (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_HAS_STATUS_FLAGS_STATE 1

View File

@ -66,7 +66,7 @@
/* --------------------------------------------------------------------- */
/* 64 KByte. */
#define CHUNK_SIZE 0x10000
#define CHUNK_SIZE (sljit_uw)0x10000u
/*
alloc_chunk / free_chunk :
@ -112,7 +112,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
static SLJIT_INLINE int get_map_jit_flag()
{
sljit_sw page_size;
size_t page_size;
void *ptr;
struct utsname name;
static int map_jit_flag = -1;
@ -235,7 +235,7 @@ struct free_block {
#define AS_FREE_BLOCK(base, offset) \
((struct free_block*)(((sljit_u8*)base) + offset))
#define MEM_START(base) ((void*)(((sljit_u8*)base) + sizeof(struct block_header)))
#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7)
#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7)
static struct free_block* free_blocks;
static sljit_uw allocated_size;

File diff suppressed because it is too large Load Diff

View File

@ -163,13 +163,6 @@ extern "C" {
is not available at all.
*/
/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1
or sljit_emit_op2 operations the result is discarded. Some status
flags must be set when the destination is SLJIT_UNUSED, because the
operation would have no effect otherwise. Other SLJIT operations do
not support SLJIT_UNUSED as a destination operand. */
#define SLJIT_UNUSED 0
/* Scratch registers. */
#define SLJIT_R0 1
#define SLJIT_R1 2
@ -231,9 +224,6 @@ extern "C" {
value. The FR and FS register sets are overlap in the same way as R
and S register sets. See above. */
/* Note: SLJIT_UNUSED as destination is not valid for floating point
operations, since they cannot be used for setting flags. */
/* Floating point scratch registers. */
#define SLJIT_FR0 1
#define SLJIT_FR1 2
@ -263,39 +253,38 @@ extern "C" {
/* Argument type definitions */
/* --------------------------------------------------------------------- */
/* Argument type definitions.
Used by SLJIT_[DEF_]ARGx and SLJIT_[DEF]_RET macros. */
#define SLJIT_ARG_TYPE_VOID 0
#define SLJIT_ARG_TYPE_SW 1
#define SLJIT_ARG_TYPE_UW 2
#define SLJIT_ARG_TYPE_S32 3
#define SLJIT_ARG_TYPE_U32 4
#define SLJIT_ARG_TYPE_F32 5
#define SLJIT_ARG_TYPE_F64 6
/* The following argument type definitions are used by sljit_emit_enter,
sljit_set_context, sljit_emit_call, and sljit_emit_icall functions.
The following return type definitions are used by sljit_emit_call
and sljit_emit_icall functions.
When a function is called, the first integer argument must be placed
in SLJIT_R0, the second in SLJIT_R1, and so on. Similarly the first
floating point argument must be placed in SLJIT_FR0, the second in
SLJIT_FR1, and so on.
As for sljit_emit_call and sljit_emit_icall, the first integer argument
must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on.
Similarly the first floating point argument must be placed into SLJIT_FR0,
the second one into SLJIT_FR1, and so on.
As for sljit_emit_enter, the integer arguments can be stored in scratch
or saved registers. The first integer argument without _R postfix is
stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer
arguments with _R postfix are placed into scratch registers. The index
of the scratch register is the count of the previous integer arguments
starting from SLJIT_R0. The floating point arguments are always placed
into SLJIT_FR0, SLJIT_FR1, and so on.
Note: if a function is called by sljit_emit_call/sljit_emit_icall and
an argument is stored in a scratch register by sljit_emit_enter,
that argument uses the same scratch register index for both
integer and floating point arguments.
Example function definition:
sljit_f32 SLJIT_FUNC example_c_callback(sljit_sw arg_a,
sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a,
sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d);
Argument type definition:
SLJIT_DEF_RET(SLJIT_ARG_TYPE_F32)
| SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F64)
| SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_U32) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F32)
SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32)
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2)
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4)
Short form of argument type definition:
SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64)
| SLJIT_ARG3(S32) | SLJIT_ARG4(F32)
SLJIT_ARGS4(32, P, F64, 32, F32)
Argument passing:
arg_a must be placed in SLJIT_R0
@ -303,34 +292,73 @@ extern "C" {
arg_b must be placed in SLJIT_FR0
arg_d must be placed in SLJIT_FR1
Note:
The SLJIT_ARG_TYPE_VOID type is only supported by
SLJIT_DEF_RET, and SLJIT_ARG_TYPE_VOID is also the
default value when SLJIT_DEF_RET is not specified. */
#define SLJIT_DEF_SHIFT 4
#define SLJIT_DEF_RET(type) (type)
#define SLJIT_DEF_ARG1(type) ((type) << SLJIT_DEF_SHIFT)
#define SLJIT_DEF_ARG2(type) ((type) << (2 * SLJIT_DEF_SHIFT))
#define SLJIT_DEF_ARG3(type) ((type) << (3 * SLJIT_DEF_SHIFT))
#define SLJIT_DEF_ARG4(type) ((type) << (4 * SLJIT_DEF_SHIFT))
Examples for argument processing by sljit_emit_enter:
SLJIT_ARGS4(VOID, P, 32_R, F32, W)
Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1
/* Short form of the macros above.
SLJIT_ARGS4(VOID, W, W_R, W, W_R)
Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3
For example the following definition:
SLJIT_DEF_RET(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_F32)
SLJIT_ARGS4(VOID, F64, W, F32, W_R)
Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1
Note: it is recommended to pass the scratch arguments first
followed by the saved arguments:
SLJIT_ARGS4(VOID, W_R, W_R, W, W)
Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1
*/
/* The following flag is only allowed for the integer arguments of
sljit_emit_enter. When the flag is set, the integer argument is
stored in a scratch register instead of a saved register. */
#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8
/* Void result, can only be used by SLJIT_ARG_RETURN. */
#define SLJIT_ARG_TYPE_VOID 0
/* Machine word sized integer argument or result. */
#define SLJIT_ARG_TYPE_W 1
#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG)
/* 32 bit integer argument or result. */
#define SLJIT_ARG_TYPE_32 2
#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG)
/* Pointer sized integer argument or result. */
#define SLJIT_ARG_TYPE_P 3
#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG)
/* 64 bit floating point argument or result. */
#define SLJIT_ARG_TYPE_F64 4
/* 32 bit floating point argument or result. */
#define SLJIT_ARG_TYPE_F32 5
#define SLJIT_ARG_SHIFT 4
#define SLJIT_ARG_RETURN(type) (type)
#define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT))
/* Simplified argument list definitions.
The following definition:
SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1)
can be shortened to:
SLJIT_RET(SW) | SLJIT_ARG1(F32)
SLJIT_ARGS1(W, F32)
*/
Note:
The VOID type is only supported by SLJIT_RET, and
VOID is also the default value when SLJIT_RET is
not specified. */
#define SLJIT_RET(type) SLJIT_DEF_RET(SLJIT_ARG_TYPE_ ## type)
#define SLJIT_ARG1(type) SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_ ## type)
#define SLJIT_ARG2(type) SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_ ## type)
#define SLJIT_ARG3(type) SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_ ## type)
#define SLJIT_ARG4(type) SLJIT_DEF_ARG4(SLJIT_ARG_TYPE_ ## type)
#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type
#define SLJIT_ARGS0(ret) \
SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret))
#define SLJIT_ARGS1(ret, arg1) \
(SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1))
#define SLJIT_ARGS2(ret, arg1, arg2) \
(SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2))
#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \
(SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3))
#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \
(SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4))
/* --------------------------------------------------------------------- */
/* Main structures and functions */
@ -408,7 +436,7 @@ struct sljit_compiler {
/* Code size. */
sljit_uw size;
/* Relative offset of the executable mapping from the writable mapping. */
sljit_uw executable_offset;
sljit_sw executable_offset;
/* Executable size for statistical purposes. */
sljit_uw executable_size;
@ -417,17 +445,13 @@ struct sljit_compiler {
#endif
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_s32 args;
sljit_s32 args_size;
sljit_s32 locals_offset;
sljit_s32 saveds_offset;
sljit_s32 stack_tmp_size;
sljit_s32 scratches_offset;
#endif
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
sljit_s32 mode32;
#ifdef _WIN64
sljit_s32 locals_offset;
#endif
#endif
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
@ -444,10 +468,14 @@ struct sljit_compiler {
#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
/* Temporary fields. */
sljit_uw shift_imm;
#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */
#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)
sljit_uw args_size;
#endif
#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
sljit_sw imm;
sljit_u32 imm;
#endif
#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
@ -456,6 +484,15 @@ struct sljit_compiler {
sljit_sw cache_argw;
#endif
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
sljit_uw args_size;
#endif
#if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
sljit_s32 cache_arg;
sljit_sw cache_argw;
#endif
#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
sljit_s32 delay_slot;
sljit_s32 cache_arg;
@ -476,7 +513,9 @@ struct sljit_compiler {
/* Flags specified by the last arithmetic instruction.
It contains the type of the variable flag. */
sljit_s32 last_flags;
/* Local size passed to the functions. */
/* Return value type set by entry functions. */
sljit_s32 last_return;
/* Local size passed to entry functions. */
sljit_s32 logical_local_size;
#endif
@ -600,6 +639,20 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type);
/* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL,
sljit_cmp_info returns one, if the cpu supports the passed floating
point comparison type.
If type is SLJIT_UNORDERED or SLJIT_ORDERED, sljit_cmp_info returns
one, if the cpu supports checking the unordered comparison result
regardless of the comparison type passed to the comparison instruction.
The returned value is always one, if there is at least one type between
SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL where sljit_cmp_info
returns with a zero value.
Otherwise it returns zero. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type);
/* Instruction generation. Returns with any error code. If there is no
error, they return with SLJIT_SUCCESS. */
@ -615,38 +668,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
available options are listed before sljit_emit_enter.
The function argument list is the combination of SLJIT_ARGx
(SLJIT_DEF_ARG1) macros. Currently maximum 3 SW / UW
(SLJIT_ARG_TYPE_SW / LJIT_ARG_TYPE_UW) arguments are supported.
The first argument goes to SLJIT_S0, the second goes to SLJIT_S1
and so on. The register set used by the function must be declared
as well. The number of scratch and saved registers used by the
function must be passed to sljit_emit_enter. Only R registers
between R0 and "scratches" argument can be used later. E.g. if
"scratches" is set to 2, the scratch register set will be limited
to SLJIT_R0 and SLJIT_R1. The S registers and the floating point
registers ("fscratches" and "fsaveds") are specified in a similar
manner. The sljit_emit_enter is also capable of allocating a stack
space for local variables. The "local_size" argument contains the
size in bytes of this local area and its staring address is stored
(SLJIT_DEF_ARG1) macros. Currently maximum 4 arguments are
supported. The first integer argument is loaded into SLJIT_S0,
the second one is loaded into SLJIT_S1, and so on. Similarly,
the first floating point argument is loaded into SLJIT_FR0,
the second one is loaded into SLJIT_FR1, and so on. Furthermore
the register set used by the function must be declared as well.
The number of scratch and saved registers used by the function
must be passed to sljit_emit_enter. Only R registers between R0
and "scratches" argument can be used later. E.g. if "scratches"
is set to 2, the scratch register set will be limited to SLJIT_R0
and SLJIT_R1. The S registers and the floating point registers
("fscratches" and "fsaveds") are specified in a similar manner.
The sljit_emit_enter is also capable of allocating a stack space
for local variables. The "local_size" argument contains the size
in bytes of this local area and its staring address is stored
in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and
SLJIT_SP + local_size (exclusive) can be modified freely until
the function returns. The stack space is not initialized.
Note: the following conditions must met:
0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS
0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS
0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS
scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS
0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
0 <= fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS
fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
Note: the compiler can use saved registers as scratch registers,
but the opposite is not supported
Note: every call of sljit_emit_enter and sljit_set_context
overwrites the previous context.
*/
/* The absolute address returned by sljit_get_local_base with
offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */
#define SLJIT_F64_ALIGNMENT 0x00000001
/* The SLJIT_S0/SLJIT_S1 registers are not saved / restored on function
enter / return. Instead, these registers can be used to pass / return
data (such as global / local context pointers) across function calls.
This is an sljit specific (non ABI compatible) function call extension
so both the caller and called function must be compiled by sljit. */
#define SLJIT_ENTER_KEEP_S0 0x00000001
#define SLJIT_ENTER_KEEP_S0_S1 0x00000002
/* The compiled function uses cdecl calling
* convention instead of SLJIT_FUNC. */
#define SLJIT_ENTER_CDECL 0x00000004
/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
#define SLJIT_MAX_LOCAL_SIZE 65536
@ -657,7 +723,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
/* The machine code has a context (which contains the local stack space size,
number of used registers, etc.) which initialized by sljit_emit_enter. Several
functions (like sljit_emit_return) requres this context to be able to generate
functions (such as sljit_emit_return) requres this context to be able to generate
the appropriate code. However, some code fragments (like inline cache) may have
no normal entry point so their context is unknown for the compiler. Their context
can be provided to the compiler by the sljit_set_context function.
@ -669,11 +735,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size);
/* Return from machine code. The op argument can be SLJIT_UNUSED which means the
function does not return with anything or any opcode between SLJIT_MOV and
SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op
is SLJIT_UNUSED, otherwise see below the description about source and
destination arguments. */
/* Return from machine code. The sljit_emit_return_void function does not return with
any value. The sljit_emit_return function returns with a single value which stores
the result of a data move instruction. The instruction is specified by the op
argument, and must be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1). */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler);
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw);
@ -766,7 +833,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
#define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8))
#define SLJIT_IMM 0x40
/* Set 32 bit operation mode (I) on 64 bit CPUs. This option is ignored on
/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on
32 bit CPUs. When this option is set for an arithmetic operation, only
the lower 32 bit of the input registers are used, and the CPU status
flags are set according to the 32 bit result. Although the higher 32 bit
@ -774,12 +841,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU
requirements all source registers must be the result of those operations
where this option was also set. Memory loads read 32 bit values rather
than 64 bit ones. In other words 32 bit and 64 bit operations cannot
be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
register can hold any 32 or 64 bit value, and it is converted to a 32 bit
compatible format first. This conversion is free (no instructions are
emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit
value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
than 64 bit ones. In other words 32 bit and 64 bit operations cannot be
mixed. The only exception is SLJIT_MOV32 whose source register can hold
any 32 or 64 bit value, and it is converted to a 32 bit compatible format
first. This conversion is free (no instructions are emitted) on most CPUs.
A 32 bit value can also be converted to a 64 bit value by SLJIT_MOV_S32
(sign extension) or SLJIT_MOV_U32 (zero extension).
As for floating-point operations, this option sets 32 bit single
precision mode. Similar to the integer operations, all register arguments
must be the result of those operations where this option was also set.
Note: memory addressing always uses 64 bit values on 64 bit systems so
the result of a 32 bit operation must not be used with SLJIT_MEMx
@ -788,22 +859,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
This option is part of the instruction name, so there is no need to
manually set it. E.g:
SLJIT_ADD32 == (SLJIT_ADD | SLJIT_I32_OP) */
#define SLJIT_I32_OP 0x100
/* Set F32 (single) precision mode for floating-point computation. This
option is similar to SLJIT_I32_OP, it just applies to floating point
registers. When this option is passed, the CPU performs 32 bit floating
point operations, rather than 64 bit one. Similar to SLJIT_I32_OP, all
register arguments must be the result of those operations where this
option was also set.
This option is part of the instruction name, so there is no need to
manually set it. E.g:
SLJIT_MOV_F32 = (SLJIT_MOV_F64 | SLJIT_F32_OP)
*/
#define SLJIT_F32_OP SLJIT_I32_OP
SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */
#define SLJIT_32 0x100
/* Many CPUs (x86, ARM, PPC) have status flags which can be set according
to the result of an operation. Other CPUs (MIPS) do not have status
@ -887,7 +944,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
Note: if SLJIT_R1 is 0, the behaviour is undefined. */
#define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4)
#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_I32_OP)
#define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_32)
/* Flags: - (may destroy flags)
Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
@ -895,13 +952,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00),
the behaviour is undefined. */
#define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5)
#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_I32_OP)
#define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_32)
/* Flags: - (may destroy flags)
Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
Note: if SLJIT_R1 is 0, the behaviour is undefined. */
#define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6)
#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_I32_OP)
#define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_32)
/* Flags: - (may destroy flags)
Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
@ -909,7 +966,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00),
the behaviour is undefined. */
#define SLJIT_DIV_SW (SLJIT_OP0_BASE + 7)
#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_I32_OP)
#define SLJIT_DIV_S32 (SLJIT_DIV_SW | SLJIT_32)
/* Flags: - (does not modify flags)
ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64
when Intel Control-flow Enforcement Technology (CET) is enabled.
@ -941,16 +998,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
#define SLJIT_MOV (SLJIT_OP1_BASE + 0)
/* Flags: - (does not modify flags) */
#define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1)
#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_I32_OP)
#define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_32)
/* Flags: - (does not modify flags) */
#define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2)
#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_I32_OP)
#define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_32)
/* Flags: - (does not modify flags) */
#define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3)
#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_I32_OP)
#define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_32)
/* Flags: - (does not modify flags) */
#define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4)
#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_I32_OP)
#define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_32)
/* Flags: - (does not modify flags)
Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */
#define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5)
@ -958,25 +1015,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */
#define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6)
/* Flags: - (does not modify flags) */
#define SLJIT_MOV32 (SLJIT_MOV_S32 | SLJIT_I32_OP)
#define SLJIT_MOV32 (SLJIT_OP1_BASE + 7)
/* Flags: - (does not modify flags)
Note: load a pointer sized data, useful on x32 (a 32 bit mode on x86-64
where all x64 features are available, e.g. 16 register) or similar
compiling modes */
#define SLJIT_MOV_P (SLJIT_OP1_BASE + 7)
#define SLJIT_MOV_P (SLJIT_OP1_BASE + 8)
/* Flags: Z
Note: immediate source argument is not supported */
#define SLJIT_NOT (SLJIT_OP1_BASE + 8)
#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_I32_OP)
/* Flags: Z | OVERFLOW
Note: immediate source argument is not supported */
#define SLJIT_NEG (SLJIT_OP1_BASE + 9)
#define SLJIT_NEG32 (SLJIT_NEG | SLJIT_I32_OP)
#define SLJIT_NOT (SLJIT_OP1_BASE + 9)
#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32)
/* Count leading zeroes
Flags: - (may destroy flags)
Note: immediate source argument is not supported */
#define SLJIT_CLZ (SLJIT_OP1_BASE + 10)
#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_I32_OP)
#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
@ -987,58 +1040,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
/* Flags: Z | OVERFLOW | CARRY */
#define SLJIT_ADD (SLJIT_OP2_BASE + 0)
#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_I32_OP)
#define SLJIT_ADD32 (SLJIT_ADD | SLJIT_32)
/* Flags: CARRY */
#define SLJIT_ADDC (SLJIT_OP2_BASE + 1)
#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_I32_OP)
#define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_32)
/* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL
SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER
SIG_LESS_EQUAL | CARRY */
#define SLJIT_SUB (SLJIT_OP2_BASE + 2)
#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_I32_OP)
#define SLJIT_SUB32 (SLJIT_SUB | SLJIT_32)
/* Flags: CARRY */
#define SLJIT_SUBC (SLJIT_OP2_BASE + 3)
#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_I32_OP)
#define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_32)
/* Note: integer mul
Flags: OVERFLOW */
#define SLJIT_MUL (SLJIT_OP2_BASE + 4)
#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_I32_OP)
#define SLJIT_MUL32 (SLJIT_MUL | SLJIT_32)
/* Flags: Z */
#define SLJIT_AND (SLJIT_OP2_BASE + 5)
#define SLJIT_AND32 (SLJIT_AND | SLJIT_I32_OP)
#define SLJIT_AND32 (SLJIT_AND | SLJIT_32)
/* Flags: Z */
#define SLJIT_OR (SLJIT_OP2_BASE + 6)
#define SLJIT_OR32 (SLJIT_OR | SLJIT_I32_OP)
#define SLJIT_OR32 (SLJIT_OR | SLJIT_32)
/* Flags: Z */
#define SLJIT_XOR (SLJIT_OP2_BASE + 7)
#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_I32_OP)
#define SLJIT_XOR32 (SLJIT_XOR | SLJIT_32)
/* Flags: Z
Let bit_length be the length of the shift operation: 32 or 64.
If src2 is immediate, src2w is masked by (bit_length - 1).
Otherwise, if the content of src2 is outside the range from 0
to bit_length - 1, the result is undefined. */
#define SLJIT_SHL (SLJIT_OP2_BASE + 8)
#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_I32_OP)
#define SLJIT_SHL32 (SLJIT_SHL | SLJIT_32)
/* Flags: Z
Let bit_length be the length of the shift operation: 32 or 64.
If src2 is immediate, src2w is masked by (bit_length - 1).
Otherwise, if the content of src2 is outside the range from 0
to bit_length - 1, the result is undefined. */
#define SLJIT_LSHR (SLJIT_OP2_BASE + 9)
#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_I32_OP)
#define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_32)
/* Flags: Z
Let bit_length be the length of the shift operation: 32 or 64.
If src2 is immediate, src2w is masked by (bit_length - 1).
Otherwise, if the content of src2 is outside the range from 0
to bit_length - 1, the result is undefined. */
#define SLJIT_ASHR (SLJIT_OP2_BASE + 10)
#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_I32_OP)
#define SLJIT_ASHR32 (SLJIT_ASHR | SLJIT_32)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
/* The sljit_emit_op2u function is the same as sljit_emit_op2 except the result is discarded. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w);
/* Starting index of opcodes for sljit_emit_op2. */
#define SLJIT_OP_SRC_BASE 128
@ -1082,35 +1141,35 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
/* Flags: - (does not modify flags) */
#define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0)
#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_F32_OP)
#define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_32)
/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE]
SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int
Rounding mode when the destination is W or I: round towards zero. */
/* Flags: - (does not modify flags) */
/* Flags: - (may destroy flags) */
#define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1)
#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2)
#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3)
#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4)
#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5)
#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_F32_OP)
#define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_32)
/* Note: dst is the left and src is the right operand for SLJIT_CMPD.
Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */
#define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6)
#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7)
#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8)
#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_F32_OP)
#define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_32)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
@ -1119,18 +1178,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
/* Starting index of opcodes for sljit_emit_fop2. */
#define SLJIT_FOP2_BASE 192
/* Flags: - (does not modify flags) */
/* Flags: - (may destroy flags) */
#define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0)
#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1)
#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2)
#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_F32_OP)
/* Flags: - (does not modify flags) */
#define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_32)
/* Flags: - (may destroy flags) */
#define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3)
#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_F32_OP)
#define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_32)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 dst, sljit_sw dstw,
@ -1170,47 +1229,87 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW)
#define SLJIT_NOT_OVERFLOW 11
/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */
#define SLJIT_SET_CARRY SLJIT_SET(12)
/* Unlike other flags, sljit_emit_jump may destroy this flag. */
#define SLJIT_CARRY 12
#define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY)
#define SLJIT_NOT_CARRY 13
/* Floating point comparison types. */
#define SLJIT_EQUAL_F64 14
#define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_F32_OP)
#define SLJIT_SET_EQUAL_F SLJIT_SET(SLJIT_EQUAL_F64)
#define SLJIT_NOT_EQUAL_F64 15
#define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP)
#define SLJIT_SET_NOT_EQUAL_F SLJIT_SET(SLJIT_NOT_EQUAL_F64)
#define SLJIT_LESS_F64 16
#define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_F32_OP)
#define SLJIT_SET_LESS_F SLJIT_SET(SLJIT_LESS_F64)
#define SLJIT_GREATER_EQUAL_F64 17
#define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP)
#define SLJIT_SET_GREATER_EQUAL_F SLJIT_SET(SLJIT_GREATER_EQUAL_F64)
#define SLJIT_GREATER_F64 18
#define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_F32_OP)
#define SLJIT_SET_GREATER_F SLJIT_SET(SLJIT_GREATER_F64)
#define SLJIT_LESS_EQUAL_F64 19
#define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP)
#define SLJIT_SET_LESS_EQUAL_F SLJIT_SET(SLJIT_LESS_EQUAL_F64)
#define SLJIT_UNORDERED_F64 20
#define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_F32_OP)
#define SLJIT_SET_UNORDERED_F SLJIT_SET(SLJIT_UNORDERED_F64)
#define SLJIT_ORDERED_F64 21
#define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_F32_OP)
#define SLJIT_SET_ORDERED_F SLJIT_SET(SLJIT_ORDERED_F64)
/* Basic floating point comparison types.
Note: when the comparison result is unordered, their behaviour is unspecified. */
#define SLJIT_F_EQUAL 14
#define SLJIT_SET_F_EQUAL SLJIT_SET(SLJIT_F_EQUAL)
#define SLJIT_F_NOT_EQUAL 15
#define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_NOT_EQUAL)
#define SLJIT_F_LESS 16
#define SLJIT_SET_F_LESS SLJIT_SET(SLJIT_F_LESS)
#define SLJIT_F_GREATER_EQUAL 17
#define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_GREATER_EQUAL)
#define SLJIT_F_GREATER 18
#define SLJIT_SET_F_GREATER SLJIT_SET(SLJIT_F_GREATER)
#define SLJIT_F_LESS_EQUAL 19
#define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_LESS_EQUAL)
/* Jumps when either argument contains a NaN value. */
#define SLJIT_UNORDERED 20
#define SLJIT_SET_UNORDERED SLJIT_SET(SLJIT_UNORDERED)
/* Jumps when neither argument contains a NaN value. */
#define SLJIT_ORDERED 21
#define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_ORDERED)
/* Ordered / unordered floating point comparison types.
Note: each comparison type has an ordered and unordered form. Some
architectures supports only either of them (see: sljit_cmp_info). */
#define SLJIT_ORDERED_EQUAL 22
#define SLJIT_SET_ORDERED_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL)
#define SLJIT_UNORDERED_OR_NOT_EQUAL 23
#define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_NOT_EQUAL)
#define SLJIT_ORDERED_LESS 24
#define SLJIT_SET_ORDERED_LESS SLJIT_SET(SLJIT_ORDERED_LESS)
#define SLJIT_UNORDERED_OR_GREATER_EQUAL 25
#define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER_EQUAL)
#define SLJIT_ORDERED_GREATER 26
#define SLJIT_SET_ORDERED_GREATER SLJIT_SET(SLJIT_ORDERED_GREATER)
#define SLJIT_UNORDERED_OR_LESS_EQUAL 27
#define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS_EQUAL)
#define SLJIT_UNORDERED_OR_EQUAL 28
#define SLJIT_SET_UNORDERED_OR_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL)
#define SLJIT_ORDERED_NOT_EQUAL 29
#define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_NOT_EQUAL)
#define SLJIT_UNORDERED_OR_LESS 30
#define SLJIT_SET_UNORDERED_OR_LESS SLJIT_SET(SLJIT_UNORDERED_OR_LESS)
#define SLJIT_ORDERED_GREATER_EQUAL 31
#define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER_EQUAL)
#define SLJIT_UNORDERED_OR_GREATER 32
#define SLJIT_SET_UNORDERED_OR_GREATER SLJIT_SET(SLJIT_UNORDERED_OR_GREATER)
#define SLJIT_ORDERED_LESS_EQUAL 33
#define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS_EQUAL)
/* Unconditional jump types. */
#define SLJIT_JUMP 22
#define SLJIT_JUMP 34
/* Fast calling method. See sljit_emit_fast_enter / SLJIT_FAST_RETURN. */
#define SLJIT_FAST_CALL 23
#define SLJIT_FAST_CALL 35
/* Called function must be declared with the SLJIT_FUNC attribute. */
#define SLJIT_CALL 24
#define SLJIT_CALL 36
/* Called function must be declared with cdecl attribute.
This is the default attribute for C functions. */
#define SLJIT_CALL_CDECL 25
#define SLJIT_CALL_CDECL 37
/* The target can be changed during runtime (see: sljit_set_jump_addr). */
#define SLJIT_REWRITABLE_JUMP 0x1000
/* When this flag is passed, the execution of the current function ends and
the called function returns to the caller of the current function. The
stack usage is reduced before the call, but it is not necessarily reduced
to zero. In the latter case the compiler needs to allocate space for some
arguments and the return register must be kept as well.
This feature is highly experimental and not supported on SPARC platform
at the moment. */
#define SLJIT_CALL_RETURN 0x2000
/* Emit a jump instruction. The destination is not set, only the type of the jump.
type must be between SLJIT_EQUAL and SLJIT_FAST_CALL
@ -1221,15 +1320,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
/* Emit a C compiler (ABI) compatible function call.
type must be SLJIT_CALL or SLJIT_CALL_CDECL
type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and SLJIT_CALL_RETURN
arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros
Flags: destroy all flags. */
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types);
/* Basic arithmetic comparison. In most architectures it is implemented as
an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting
appropriate flags) followed by a sljit_emit_jump. However some
an compare operation followed by a sljit_emit_jump. However some
architectures (i.e: ARM64 or MIPS) may employ special optimizations here.
It is suggested to use this comparison form when appropriate.
type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL
@ -1245,7 +1343,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
sljit_emit_jump. However some architectures (i.e: MIPS) may employ
special optimizations here. It is suggested to use this comparison form
when appropriate.
type must be between SLJIT_EQUAL_F64 and SLJIT_ORDERED_F32
type must be between SLJIT_F_EQUAL and SLJIT_ORDERED_LESS_EQUAL
type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
Flags: destroy flags.
Note: if either operand is NaN, the behaviour is undefined for
@ -1271,13 +1369,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
Direct form: set src to SLJIT_IMM() and srcw to the address
Indirect form: any other valid addressing mode
type must be SLJIT_CALL or SLJIT_CALL_CDECL
type can be combined (or'ed) with SLJIT_CALL_RETURN
arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros
Flags: destroy all flags. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw);
/* Perform the operation using the conditional flags as the second argument.
Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_F64. The value
Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL. The value
represented by the type is 1, if the condition represented by the type
is fulfilled, and 0 otherwise.
@ -1296,9 +1395,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
if the condition is satisfied. Unlike other arithmetic operations this
instruction does not support memory access.
type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64
type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL
dst_reg must be a valid register and it can be combined
with SLJIT_I32_OP to perform a 32 bit arithmetic operation
with SLJIT_32 to perform a 32 bit arithmetic operation
src must be register or immediate (SLJIT_IMM)
Flags: - (does not modify flags) */
@ -1454,26 +1553,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_st
#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
/* Get the entry address of a given function. */
#define SLJIT_FUNC_OFFSET(func_name) ((sljit_sw)func_name)
/* Get the entry address of a given function (signed, unsigned result). */
#define SLJIT_FUNC_ADDR(func_name) ((sljit_sw)func_name)
#define SLJIT_FUNC_UADDR(func_name) ((sljit_uw)func_name)
#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
/* All JIT related code should be placed in the same context (library, binary, etc.). */
#define SLJIT_FUNC_OFFSET(func_name) (*(sljit_sw*)(void*)func_name)
/* Get the entry address of a given function (signed, unsigned result). */
#define SLJIT_FUNC_ADDR(func_name) (*(sljit_sw*)(void*)func_name)
#define SLJIT_FUNC_UADDR(func_name) (*(sljit_uw*)(void*)func_name)
/* For powerpc64, the function pointers point to a context descriptor. */
struct sljit_function_context {
sljit_sw addr;
sljit_sw r2;
sljit_sw r11;
sljit_uw addr;
sljit_uw r2;
sljit_uw r11;
};
/* Fill the context arguments using the addr and the function.
If func_ptr is NULL, it will not be set to the address of context
If addr is NULL, the function address also comes from the func pointer. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func);
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func);
#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
@ -1516,17 +1618,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_s32 size);
void *instruction, sljit_u32 size);
/* Flags were set by a 32 bit operation. */
#define SLJIT_CURRENT_FLAGS_I32_OP SLJIT_I32_OP
#define SLJIT_CURRENT_FLAGS_32 SLJIT_32
/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */
#define SLJIT_CURRENT_FLAGS_ADD_SUB 0x01
/* Flags were set by an ADD or ADDC operations. */
#define SLJIT_CURRENT_FLAGS_ADD 0x01
/* Flags were set by a SUB, SUBC, or NEG operation. */
#define SLJIT_CURRENT_FLAGS_SUB 0x02
/* Flags were set by a SUB with unused destination.
Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */
#define SLJIT_CURRENT_FLAGS_COMPARE 0x02
/* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode.
Must be combined with SLJIT_CURRENT_FLAGS_SUB. */
#define SLJIT_CURRENT_FLAGS_COMPARE 0x04
/* Define the currently available CPU status flags. It is usually used after
an sljit_emit_label or sljit_emit_op_custom operations to define which CPU

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -38,384 +38,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS;
}
#define EMIT_LOGICAL(op_imm, op_norm) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
}
#define EMIT_SHIFT(op_imm, op_v) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
sljit_s32 is_overflow, is_carry, is_handled;
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
case SLJIT_MOV_U32:
case SLJIT_MOV_S32:
case SLJIT_MOV_P:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (dst != src2)
return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S8) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
#endif /* SLJIT_MIPS_REV >= 1 */
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
}
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S16) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
#endif /* SLJIT_MIPS_REV >= 1 */
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
}
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
return SLJIT_SUCCESS;
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst)));
#else /* SLJIT_MIPS_REV < 1 */
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
}
/* Nearly all instructions are unmovable in the following sequence. */
FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
/* Check zero. */
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst)));
/* Loop for searching the highest bit. */
FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
#endif /* SLJIT_MIPS_REV >= 1 */
return SLJIT_SUCCESS;
case SLJIT_ADD:
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
if (is_overflow || is_carry) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
else {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
}
else {
if (is_overflow)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
}
/* a + b >= a | b (otherwise, the carry should be set to 1). */
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (!is_overflow)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_ADDC:
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_carry) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
else {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
}
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst)));
} else {
if (is_carry)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
if (!is_carry)
return SLJIT_SUCCESS;
/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
/* Set carry flag. */
return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_SUB:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
is_handled = 0;
if (flags & SRC2_IMM) {
if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
}
if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
is_handled = 1;
if (flags & SRC2_IMM) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
{
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
{
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
if (is_handled) {
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
return push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst));
}
else {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
return push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst));
}
return SLJIT_SUCCESS;
}
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (is_overflow)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (!is_overflow)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_SUBC:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_carry)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#endif /* SLJIT_MIPS_REV >= 1 */
}
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
FAIL_IF(push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)));
FAIL_IF(push_inst(compiler, MUH | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
#else /* SLJIT_MIPS_REV < 6 */
FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
#endif /* SLJIT_MIPS_REV >= 6 */
FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_AND:
EMIT_LOGICAL(ANDI, AND);
return SLJIT_SUCCESS;
case SLJIT_OR:
EMIT_LOGICAL(ORI, OR);
return SLJIT_SUCCESS;
case SLJIT_XOR:
EMIT_LOGICAL(XORI, XOR);
return SLJIT_SUCCESS;
case SLJIT_SHL:
EMIT_SHIFT(SLL, SLLV);
return SLJIT_SUCCESS;
case SLJIT_LSHR:
EMIT_SHIFT(SRL, SRLV);
return SLJIT_SUCCESS;
case SLJIT_ASHR:
EMIT_SHIFT(SRA, SRAV);
return SLJIT_SUCCESS;
}
SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst)));
@ -438,92 +60,119 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_set_jump_addr(addr, new_constant, executable_offset);
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
}
static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr)
static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr, sljit_u32 *extra_space)
{
sljit_s32 stack_offset = 0;
sljit_s32 arg_count = 0;
sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
sljit_u32 offset = 0;
sljit_s32 float_arg_count = 0;
sljit_s32 word_arg_count = 0;
sljit_s32 types = 0;
sljit_s32 arg_count_save, types_save;
sljit_ins prev_ins = NOP;
sljit_ins ins = NOP;
sljit_u8 offsets[4];
sljit_u8 *offsets_ptr = offsets;
SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT;
arg_types >>= SLJIT_ARG_SHIFT;
/* See ABI description in sljit_emit_enter. */
while (arg_types) {
types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
*offsets_ptr = (sljit_u8)offset;
switch (arg_types & SLJIT_DEF_MASK) {
case SLJIT_ARG_TYPE_F32:
offsets[arg_count] = (sljit_u8)stack_offset;
switch (arg_types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
if (offset & 0x7) {
offset += sizeof(sljit_sw);
*offsets_ptr = (sljit_u8)offset;
}
if (word_arg_count == 0 && arg_count <= 1)
offsets[arg_count] = 254 + arg_count;
if (word_arg_count == 0 && float_arg_count <= 1)
*offsets_ptr = (sljit_u8)(254 + float_arg_count);
stack_offset += sizeof(sljit_f32);
arg_count++;
offset += sizeof(sljit_f64);
float_arg_count++;
break;
case SLJIT_ARG_TYPE_F64:
if (stack_offset & 0x7)
stack_offset += sizeof(sljit_sw);
offsets[arg_count] = (sljit_u8)stack_offset;
case SLJIT_ARG_TYPE_F32:
if (word_arg_count == 0 && float_arg_count <= 1)
*offsets_ptr = (sljit_u8)(254 + float_arg_count);
if (word_arg_count == 0 && arg_count <= 1)
offsets[arg_count] = 254 + arg_count;
stack_offset += sizeof(sljit_f64);
arg_count++;
offset += sizeof(sljit_f32);
float_arg_count++;
break;
default:
offsets[arg_count] = (sljit_u8)stack_offset;
stack_offset += sizeof(sljit_sw);
arg_count++;
offset += sizeof(sljit_sw);
word_arg_count++;
break;
}
arg_types >>= SLJIT_DEF_SHIFT;
arg_types >>= SLJIT_ARG_SHIFT;
offsets_ptr++;
}
/* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */
if (stack_offset > 16)
FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP)));
/* Stack is aligned to 16 bytes. */
SLJIT_ASSERT(offset <= 8 * sizeof(sljit_sw));
types_save = types;
arg_count_save = arg_count;
if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
if (is_tail_call) {
offset = (offset + sizeof(sljit_sw) + 15) & ~(sljit_uw)0xf;
FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset, &prev_ins));
*extra_space = offset;
} else {
FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP)));
*extra_space = 16;
}
} else {
if (is_tail_call)
FAIL_IF(emit_stack_frame_release(compiler, 0, &prev_ins));
*extra_space = 0;
}
while (types) {
switch (types & SLJIT_DEF_MASK) {
case SLJIT_ARG_TYPE_F32:
arg_count--;
if (offsets[arg_count] < 254)
ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]);
--offsets_ptr;
switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
if (*offsets_ptr < 4 * sizeof (sljit_sw)) {
if (prev_ins != NOP)
FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
/* Must be preceded by at least one other argument,
* and its starting offset must be 8 because of alignment. */
SLJIT_ASSERT((*offsets_ptr >> 2) == 2);
prev_ins = MFC1 | TA(6) | FS(float_arg_count) | (1 << 11);
ins = MFC1 | TA(7) | FS(float_arg_count);
} else if (*offsets_ptr < 254)
ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr);
else if (*offsets_ptr == 254)
ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
float_arg_count--;
break;
case SLJIT_ARG_TYPE_F64:
arg_count--;
if (offsets[arg_count] < 254)
ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]);
case SLJIT_ARG_TYPE_F32:
if (*offsets_ptr < 4 * sizeof (sljit_sw))
ins = MFC1 | TA(4 + (*offsets_ptr >> 2)) | FS(float_arg_count);
else if (*offsets_ptr < 254)
ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr);
else if (*offsets_ptr == 254)
ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
float_arg_count--;
break;
default:
if (offsets[arg_count - 1] >= 16)
ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(offsets[arg_count - 1]);
else if (arg_count != word_arg_count)
ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
else if (arg_count == 1)
if (*offsets_ptr >= 4 * sizeof (sljit_sw))
ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(*offsets_ptr);
else if ((*offsets_ptr >> 2) != word_arg_count - 1)
ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (*offsets_ptr >> 2));
else if (*offsets_ptr == 0)
ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--;
word_arg_count--;
break;
}
@ -535,45 +184,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
ins = NOP;
}
types >>= SLJIT_DEF_SHIFT;
}
types = types_save;
arg_count = arg_count_save;
while (types) {
switch (types & SLJIT_DEF_MASK) {
case SLJIT_ARG_TYPE_F32:
arg_count--;
if (offsets[arg_count] == 254)
ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
else if (offsets[arg_count] < 16)
ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]);
break;
case SLJIT_ARG_TYPE_F64:
arg_count--;
if (offsets[arg_count] == 254)
ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
else if (offsets[arg_count] < 16) {
if (prev_ins != NOP)
FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
prev_ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]);
ins = LW | S(SLJIT_SP) | TA(5 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count] + sizeof(sljit_sw));
}
break;
default:
arg_count--;
break;
}
if (ins != NOP) {
if (prev_ins != NOP)
FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
prev_ins = ins;
ins = NOP;
}
types >>= SLJIT_DEF_SHIFT;
types >>= SLJIT_ARG_SHIFT;
}
*ins_ptr = prev_ins;
@ -581,41 +192,11 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
return SLJIT_SUCCESS;
}
static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
{
sljit_s32 stack_offset = 0;
arg_types >>= SLJIT_DEF_SHIFT;
while (arg_types) {
switch (arg_types & SLJIT_DEF_MASK) {
case SLJIT_ARG_TYPE_F32:
stack_offset += sizeof(sljit_f32);
break;
case SLJIT_ARG_TYPE_F64:
if (stack_offset & 0x7)
stack_offset += sizeof(sljit_sw);
stack_offset += sizeof(sljit_f64);
break;
default:
stack_offset += sizeof(sljit_sw);
break;
}
arg_types >>= SLJIT_DEF_SHIFT;
}
/* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */
if (stack_offset > 16)
return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(16), DR(SLJIT_SP));
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 arg_types)
{
struct sljit_jump *jump;
sljit_u32 extra_space = (sljit_u32)type;
sljit_ins ins;
CHECK_ERROR_PTR();
@ -624,21 +205,35 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
PTR_FAIL_IF(!jump);
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
type &= 0xff;
PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) {
jump->flags |= IS_JAL | IS_CALL;
PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
} else
PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
jump->flags |= IS_JAL | IS_CALL;
PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
jump->addr = compiler->size;
PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
PTR_FAIL_IF(post_call_with_args(compiler, arg_types));
/* Maximum number of instructions required for generating a constant. */
compiler->size += 2;
if (extra_space == 0)
return jump;
if (type & SLJIT_CALL_RETURN)
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG,
SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw))));
if (type & SLJIT_CALL_RETURN)
PTR_FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
PTR_FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space),
(type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP)));
return jump;
}
@ -646,6 +241,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
sljit_s32 arg_types,
sljit_s32 src, sljit_sw srcw)
{
sljit_u32 extra_space = (sljit_u32)type;
sljit_ins ins;
CHECK_ERROR();
@ -662,10 +258,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
}
FAIL_IF(call_with_args(compiler, arg_types, &ins));
FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
/* Register input. */
FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
if (!(type & SLJIT_CALL_RETURN) || extra_space > 0)
FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
else
FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
return post_call_with_args(compiler, arg_types);
if (extra_space == 0)
return SLJIT_SUCCESS;
if (type & SLJIT_CALL_RETURN)
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG,
SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw))));
if (type & SLJIT_CALL_RETURN)
FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space),
(type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP));
}

View File

@ -46,9 +46,9 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
}
/* Zero extended number. */
uimm = imm;
uimm = (sljit_uw)imm;
if (imm < 0) {
uimm = ~imm;
uimm = ~(sljit_uw)imm;
inv = 1;
}
@ -118,400 +118,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar);
}
#define SELECT_OP(a, b) \
(!(op & SLJIT_I32_OP) ? a : b)
#define EMIT_LOGICAL(op_imm, op_norm) \
if (flags & SRC2_IMM) { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \
} \
else { \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \
}
#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \
if (flags & SRC2_IMM) { \
if (src2 >= 32) { \
SLJIT_ASSERT(!(op & SLJIT_I32_OP)); \
ins = op_dimm32; \
src2 -= 32; \
} \
else \
ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
} \
else { \
ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \
if (op & SLJIT_SET_Z) \
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
if (!(flags & UNUSED_DEST)) \
FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \
}
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
{
sljit_ins ins;
sljit_s32 is_overflow, is_carry, is_handled;
switch (GET_OPCODE(op)) {
case SLJIT_MOV:
case SLJIT_MOV_P:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (dst != src2)
return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst));
return SLJIT_SUCCESS;
case SLJIT_MOV_U8:
case SLJIT_MOV_S8:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S8) {
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
}
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_MOV_U16:
case SLJIT_MOV_S16:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
if (op == SLJIT_MOV_S16) {
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
}
return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
}
else {
SLJIT_ASSERT(dst == src2);
}
return SLJIT_SUCCESS;
case SLJIT_MOV_U32:
SLJIT_ASSERT(!(op & SLJIT_I32_OP));
FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
case SLJIT_MOV_S32:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
case SLJIT_NOT:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst)));
return SLJIT_SUCCESS;
case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst)));
#else /* SLJIT_MIPS_REV < 1 */
if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) {
FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG));
return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG);
}
/* Nearly all instructions are unmovable in the following sequence. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
/* Check zero. */
FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_I32_OP) ? 32 : 64), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst)));
/* Loop for searching the highest bit. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS));
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS));
#endif /* SLJIT_MIPS_REV >= 1 */
return SLJIT_SUCCESS;
case SLJIT_ADD:
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
if (is_overflow || is_carry) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
else {
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
}
else {
if (is_overflow)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
/* a + b >= a | b (otherwise, the carry should be set to 1). */
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (!is_overflow)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_ADDC:
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_carry) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
else {
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
}
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst)));
} else {
if (is_carry)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
if (!is_carry)
return SLJIT_SUCCESS;
/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
/* Set carry flag. */
return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_SUB:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
is_handled = 0;
if (flags & SRC2_IMM) {
if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
is_handled = 1;
}
}
if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
is_handled = 1;
if (flags & SRC2_IMM) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL)
{
FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) {
FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
}
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL)
{
FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG));
}
}
if (is_handled) {
if (flags & SRC2_IMM) {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst));
}
else {
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (!(flags & UNUSED_DEST))
return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst));
}
return SLJIT_SUCCESS;
}
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_overflow) {
if (src2 >= 0)
FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
else
FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG));
}
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (is_overflow)
FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
else if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
if (is_overflow || is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG));
/* dst may be the same as src1 or src2. */
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (!is_overflow)
return SLJIT_SUCCESS;
FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG));
if (op & SLJIT_SET_Z)
FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG));
return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG);
case SLJIT_SUBC:
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
src2 = TMP_REG2;
flags &= ~SRC2_IMM;
}
is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY);
if (flags & SRC2_IMM) {
if (is_carry)
FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)));
}
else {
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
/* dst may be the same as src1 or src2. */
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)));
}
if (is_carry)
FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1)));
FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst)));
return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS;
case SLJIT_MUL:
SLJIT_ASSERT(!(flags & SRC2_IMM));
if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) {
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst));
#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
if (op & SLJIT_I32_OP)
return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#else /* SLJIT_MIPS_REV < 1 */
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
return push_inst(compiler, MFLO | D(dst), DR(dst));
#endif /* SLJIT_MIPS_REV >= 6 */
}
#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)));
FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
#else /* SLJIT_MIPS_REV < 6 */
FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
#endif /* SLJIT_MIPS_REV >= 6 */
FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
case SLJIT_AND:
EMIT_LOGICAL(ANDI, AND);
return SLJIT_SUCCESS;
case SLJIT_OR:
EMIT_LOGICAL(ORI, OR);
return SLJIT_SUCCESS;
case SLJIT_XOR:
EMIT_LOGICAL(XORI, XOR);
return SLJIT_SUCCESS;
case SLJIT_SHL:
EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV);
return SLJIT_SUCCESS;
case SLJIT_LSHR:
EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV);
return SLJIT_SUCCESS;
case SLJIT_ASHR:
EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV);
return SLJIT_SUCCESS;
}
SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
}
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value)
{
FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst)));
@ -528,10 +134,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
SLJIT_UNUSED_ARG(executable_offset);
SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0);
inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff);
inst[0] = (inst[0] & 0xffff0000) | ((sljit_ins)(new_target >> 48) & 0xffff);
inst[1] = (inst[1] & 0xffff0000) | ((sljit_ins)(new_target >> 32) & 0xffff);
inst[3] = (inst[3] & 0xffff0000) | ((sljit_ins)(new_target >> 16) & 0xffff);
inst[5] = (inst[5] & 0xffff0000) | ((sljit_ins)new_target & 0xffff);
SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1);
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
SLJIT_CACHE_FLUSH(inst, inst + 6);
@ -539,7 +145,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
{
sljit_set_jump_addr(addr, new_constant, executable_offset);
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
}
static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr)
@ -548,19 +154,19 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_s32 word_arg_count = 0;
sljit_s32 float_arg_count = 0;
sljit_s32 types = 0;
sljit_ins prev_ins = NOP;
sljit_ins prev_ins = *ins_ptr;
sljit_ins ins = NOP;
SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT;
arg_types >>= SLJIT_ARG_SHIFT;
while (arg_types) {
types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
switch (arg_types & SLJIT_DEF_MASK) {
case SLJIT_ARG_TYPE_F32:
switch (arg_types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
case SLJIT_ARG_TYPE_F32:
arg_count++;
float_arg_count++;
break;
@ -570,19 +176,11 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
break;
}
arg_types >>= SLJIT_DEF_SHIFT;
arg_types >>= SLJIT_ARG_SHIFT;
}
while (types) {
switch (types & SLJIT_DEF_MASK) {
case SLJIT_ARG_TYPE_F32:
if (arg_count != float_arg_count)
ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count);
else if (arg_count == 1)
ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
arg_count--;
float_arg_count--;
break;
switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64:
if (arg_count != float_arg_count)
ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count);
@ -591,6 +189,14 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
arg_count--;
float_arg_count--;
break;
case SLJIT_ARG_TYPE_F32:
if (arg_count != float_arg_count)
ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count);
else if (arg_count == 1)
ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
arg_count--;
float_arg_count--;
break;
default:
if (arg_count != word_arg_count)
ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count);
@ -608,7 +214,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
ins = NOP;
}
types >>= SLJIT_DEF_SHIFT;
types >>= SLJIT_ARG_SHIFT;
}
*ins_ptr = prev_ins;
@ -620,7 +226,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
sljit_s32 arg_types)
{
struct sljit_jump *jump;
sljit_ins ins;
sljit_ins ins = NOP;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
@ -628,19 +234,25 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
PTR_FAIL_IF(!jump);
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
type &= 0xff;
if (type & SLJIT_CALL_RETURN)
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
if (!(type & SLJIT_CALL_RETURN)) {
jump->flags |= IS_JAL | IS_CALL;
PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
} else
PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
jump->flags |= IS_JAL | IS_CALL;
PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
jump->addr = compiler->size;
PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
/* Maximum number of instructions required for generating a constant. */
compiler->size += 6;
return jump;
}
@ -648,7 +260,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
sljit_s32 arg_types,
sljit_s32 src, sljit_sw srcw)
{
sljit_ins ins;
sljit_ins ins = NOP;
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
@ -664,9 +276,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
}
if (type & SLJIT_CALL_RETURN)
FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
FAIL_IF(call_with_args(compiler, arg_types, &ins));
/* Register input. */
FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
if (!(type & SLJIT_CALL_RETURN))
FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
else
FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
return push_inst(compiler, ins, UNMOVABLE_INS);
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More